megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.10.dist-info/RECORD +0 -224
  214. megadetector-5.0.10.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,1029 +0,0 @@
1
- ########
2
- #
3
- # run_detector_batch.py
4
- #
5
- # Module to run MegaDetector on lots of images, writing the results
6
- # to a file in the MegaDetector output format:
7
- #
8
- # https://github.com/agentmorris/MegaDetector/tree/main/api/batch_processing#megadetector-batch-output-format
9
- #
10
- # This enables the results to be used in our post-processing pipeline; see
11
- # api/batch_processing/postprocessing/postprocess_batch_results.py .
12
- #
13
- # This script can save results to checkpoints intermittently, in case disaster
14
- # strikes. To enable this, set --checkpoint_frequency to n > 0, and results
15
- # will be saved as a checkpoint every n images. Checkpoints will be written
16
- # to a file in the same directory as the output_file, and after all images
17
- # are processed and final results file written to output_file, the temporary
18
- # checkpoint file will be deleted. If you want to resume from a checkpoint, set
19
- # the checkpoint file's path using --resume_from_checkpoint.
20
- #
21
- # The `threshold` you can provide as an argument is the confidence threshold above
22
- # which detections will be included in the output file.
23
- #
24
- # Has preliminary multiprocessing support for CPUs only; if a GPU is available, it will
25
- # use the GPU instead of CPUs, and the --ncores option will be ignored. Checkpointing
26
- # is not supported when using a GPU.
27
- #
28
- # Does not have a command-line option to bind the process to a particular GPU, but you can
29
- # prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
30
- #
31
- # CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
32
- #
33
- # You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
34
- #
35
- ########
36
-
37
- #%% Constants, imports, environment
38
-
39
- import argparse
40
- import json
41
- import os
42
- import sys
43
- import time
44
- import copy
45
- import shutil
46
- import warnings
47
- import itertools
48
- import humanfriendly
49
-
50
- from datetime import datetime
51
- from functools import partial
52
- from tqdm import tqdm
53
-
54
- import multiprocessing
55
- from threading import Thread
56
- from multiprocessing import Process, Manager
57
-
58
- # Multiprocessing uses processes, not threads... leaving this here (and commented out)
59
- # to make sure I don't change this casually at some point, it changes a number of
60
- # assumptions about interaction with PyTorch and TF.
61
- # from multiprocessing.pool import ThreadPool as workerpool
62
- from multiprocessing.pool import Pool as workerpool
63
-
64
- import detection.run_detector as run_detector
65
- from detection.run_detector import is_gpu_available,\
66
- load_detector,\
67
- try_download_known_detector,\
68
- get_detector_version_from_filename,\
69
- get_detector_metadata_from_version_string
70
-
71
- from md_utils import path_utils
72
- import md_visualization.visualization_utils as vis_utils
73
- from data_management import read_exif
74
-
75
- # Numpy FutureWarnings from tensorflow import
76
- warnings.filterwarnings('ignore', category=FutureWarning)
77
-
78
- # Number of images to pre-fetch
79
- max_queue_size = 10
80
-
81
- # How often should we print progress when using the image queue?
82
- n_queue_print = 1000
83
-
84
- use_threads_for_queue = False
85
- verbose = False
86
-
87
- exif_options = read_exif.ReadExifOptions()
88
- exif_options.processing_library = 'pil'
89
- exif_options.byte_handling = 'convert_to_string'
90
-
91
-
92
- #%% Support functions for multiprocessing
93
-
94
- def producer_func(q,image_files):
95
- """
96
- Producer function; only used when using the (optional) image queue.
97
-
98
- Reads up to N images from disk and puts them on the blocking queue for processing.
99
- """
100
-
101
- if verbose:
102
- print('Producer starting'); sys.stdout.flush()
103
-
104
- for im_file in image_files:
105
-
106
- try:
107
- if verbose:
108
- print('Loading image {}'.format(im_file)); sys.stdout.flush()
109
- image = vis_utils.load_image(im_file)
110
- except Exception:
111
- print('Producer process: image {} cannot be loaded.'.format(im_file))
112
- image = run_detector.FAILURE_IMAGE_OPEN
113
-
114
- if verbose:
115
- print('Queueing image {}'.format(im_file)); sys.stdout.flush()
116
- q.put([im_file,image])
117
-
118
- q.put(None)
119
-
120
- print('Finished image loading'); sys.stdout.flush()
121
-
122
-
123
- def consumer_func(q,return_queue,model_file,confidence_threshold,image_size=None):
124
- """
125
- Consumer function; only used when using the (optional) image queue.
126
-
127
- Pulls images from a blocking queue and processes them.
128
- """
129
-
130
- if verbose:
131
- print('Consumer starting'); sys.stdout.flush()
132
-
133
- start_time = time.time()
134
- detector = load_detector(model_file)
135
- elapsed = time.time() - start_time
136
- print('Loaded model (before queueing) in {}, printing updates every {} images'.format(
137
- humanfriendly.format_timespan(elapsed),n_queue_print))
138
- sys.stdout.flush()
139
-
140
- results = []
141
-
142
- n_images_processed = 0
143
-
144
- while True:
145
- r = q.get()
146
- if r is None:
147
- q.task_done()
148
- return_queue.put(results)
149
- return
150
- n_images_processed += 1
151
- im_file = r[0]
152
- image = r[1]
153
- if verbose or ((n_images_processed % n_queue_print) == 1):
154
- elapsed = time.time() - start_time
155
- images_per_second = n_images_processed / elapsed
156
- print('De-queued image {} ({:.2f}/s) ({})'.format(n_images_processed,
157
- images_per_second,
158
- im_file));
159
- sys.stdout.flush()
160
- if isinstance(image,str):
161
- # This is how the producer function communicates read errors
162
- results.append({'file': im_file,
163
- 'failure': image})
164
- else:
165
- results.append(process_image(im_file=im_file,detector=detector,
166
- confidence_threshold=confidence_threshold,
167
- image=image,quiet=True,image_size=image_size))
168
- if verbose:
169
- print('Processed image {}'.format(im_file)); sys.stdout.flush()
170
- q.task_done()
171
-
172
-
173
- def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
174
- quiet=False,image_size=None):
175
- """
176
- Driver function for the (optional) multiprocessing-based image queue; only used
177
- when --use_image_queue is specified. Starts a reader process to read images from disk, but
178
- processes images in the process from which this function is called (i.e., does not currently
179
- spawn a separate consumer process).
180
- """
181
-
182
- q = multiprocessing.JoinableQueue(max_queue_size)
183
- return_queue = multiprocessing.Queue(1)
184
-
185
- if use_threads_for_queue:
186
- producer = Thread(target=producer_func,args=(q,image_files,))
187
- else:
188
- producer = Process(target=producer_func,args=(q,image_files,))
189
- producer.daemon = False
190
- producer.start()
191
-
192
- # The queue system is a little more elegant if we start one thread for reading and one
193
- # for processing, and this works fine on Windows, but because we import TF at module load,
194
- # CUDA will only work in the main process, so currently the consumer function runs here.
195
- #
196
- # To enable proper multi-GPU support, we may need to move the TF import to a separate module
197
- # that isn't loaded until very close to where inference actually happens.
198
- run_separate_consumer_process = False
199
-
200
- if run_separate_consumer_process:
201
- if use_threads_for_queue:
202
- consumer = Thread(target=consumer_func,args=(q,return_queue,model_file,
203
- confidence_threshold,image_size,))
204
- else:
205
- consumer = Process(target=consumer_func,args=(q,return_queue,model_file,
206
- confidence_threshold,image_size,))
207
- consumer.daemon = True
208
- consumer.start()
209
- else:
210
- consumer_func(q,return_queue,model_file,confidence_threshold,image_size)
211
-
212
- producer.join()
213
- print('Producer finished')
214
-
215
- if run_separate_consumer_process:
216
- consumer.join()
217
- print('Consumer finished')
218
-
219
- q.join()
220
- print('Queue joined')
221
-
222
- results = return_queue.get()
223
-
224
- return results
225
-
226
-
227
- #%% Other support functions
228
-
229
- def chunks_by_number_of_chunks(ls, n):
230
- """
231
- Splits a list into n even chunks.
232
-
233
- Args
234
- - ls: list
235
- - n: int, # of chunks
236
- """
237
-
238
- for i in range(0, n):
239
- yield ls[i::n]
240
-
241
-
242
- #%% Image processing functions
243
-
244
- def process_images(im_files, detector, confidence_threshold, use_image_queue=False,
245
- quiet=False, image_size=None, checkpoint_queue=None, include_image_size=False,
246
- include_image_timestamp=False, include_exif_data=False):
247
- """
248
- Runs MegaDetector over a list of image files.
249
-
250
- Args
251
- - im_files: list of str, paths to image files
252
- - detector: loaded model or str (path to .pb/.pt model file)
253
- - confidence_threshold: float, only detections above this threshold are returned
254
-
255
- Returns
256
- - results: list of dict, each dict represents detections on one image
257
- see the 'images' key in https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
258
- """
259
-
260
- if isinstance(detector, str):
261
- start_time = time.time()
262
- detector = load_detector(detector)
263
- elapsed = time.time() - start_time
264
- print('Loaded model (batch level) in {}'.format(humanfriendly.format_timespan(elapsed)))
265
-
266
- if use_image_queue:
267
- run_detector_with_image_queue(im_files, detector, confidence_threshold,
268
- quiet=quiet, image_size=image_size,
269
- include_image_size=include_image_size,
270
- include_image_timestamp=include_image_timestamp,
271
- include_exif_data=include_exif_data)
272
- else:
273
- results = []
274
- for im_file in im_files:
275
- result = process_image(im_file, detector, confidence_threshold,
276
- quiet=quiet, image_size=image_size,
277
- include_image_size=include_image_size,
278
- include_image_timestamp=include_image_timestamp,
279
- include_exif_data=include_exif_data)
280
-
281
- if checkpoint_queue is not None:
282
- checkpoint_queue.put(result)
283
- results.append(result)
284
-
285
- return results
286
-
287
- # ...def process_images(...)
288
-
289
-
290
- def process_image(im_file, detector, confidence_threshold, image=None,
291
- quiet=False, image_size=None, include_image_size=False,
292
- include_image_timestamp=False, include_exif_data=False,
293
- skip_image_resizing=False):
294
- """
295
- Runs MegaDetector on a single image file.
296
-
297
- Args
298
- - im_file: str, path to image file
299
- - detector: loaded model
300
- - confidence_threshold: float, only detections above this threshold are returned
301
- - image: previously-loaded image, if available
302
- - skip_image_resizing: whether to skip internal image resizing and rely on external resizing
303
-
304
- Returns:
305
- - result: dict representing detections on one image
306
- see the 'images' key in
307
- https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
308
- """
309
-
310
- if not quiet:
311
- print('Processing image {}'.format(im_file))
312
-
313
- if image is None:
314
- try:
315
- image = vis_utils.load_image(im_file)
316
- except Exception as e:
317
- if not quiet:
318
- print('Image {} cannot be loaded. Exception: {}'.format(im_file, e))
319
- result = {
320
- 'file': im_file,
321
- 'failure': run_detector.FAILURE_IMAGE_OPEN
322
- }
323
- return result
324
-
325
- try:
326
- result = detector.generate_detections_one_image(
327
- image, im_file, detection_threshold=confidence_threshold, image_size=image_size,
328
- skip_image_resizing=skip_image_resizing)
329
- except Exception as e:
330
- if not quiet:
331
- print('Image {} cannot be processed. Exception: {}'.format(im_file, e))
332
- result = {
333
- 'file': im_file,
334
- 'failure': run_detector.FAILURE_INFER
335
- }
336
- return result
337
-
338
- if include_image_size:
339
- result['width'] = image.width
340
- result['height'] = image.height
341
-
342
- if include_image_timestamp:
343
- result['datetime'] = get_image_datetime(image)
344
-
345
- if include_exif_data:
346
- result['exif_metadata'] = read_exif.read_pil_exif(image,exif_options)
347
-
348
- return result
349
-
350
- # ...def process_image(...)
351
-
352
-
353
- #%% Main function
354
-
355
- def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=None,
356
- confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD,
357
- checkpoint_frequency=-1, results=None, n_cores=1,
358
- use_image_queue=False, quiet=False, image_size=None, class_mapping_filename=None,
359
- include_image_size=False, include_image_timestamp=False,
360
- include_exif_data=False):
361
- """
362
- Args
363
- - model_file: str,quiet path to .pb model file
364
- - image_file_names: list of strings (image filenames), a single image filename,
365
- a folder to recursively search for images in, or a .json file containing
366
- a list of images.
367
- - checkpoint_path: str, path to JSON checkpoint file
368
- - confidence_threshold: float, only detections above this threshold are returned
369
- - checkpoint_frequency: int, write results to JSON checkpoint file every N images
370
- - results: list of dict, existing results loaded from checkpoint
371
- - n_cores: int, # of CPU cores to use
372
- - class_mapping_filename: str, use a non-default class mapping supplied in a .json file
373
-
374
- Returns
375
- - results: list of dicts; each dict represents detections on one image
376
- """
377
-
378
- if n_cores is None:
379
- n_cores = 1
380
-
381
- if confidence_threshold is None:
382
- confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
383
-
384
- if checkpoint_frequency is None:
385
- checkpoint_frequency = -1
386
-
387
- # This is an experimental hack to allow the use of non-MD YOLOv5 models through
388
- # the same infrastructure; it disables the code that enforces MDv5-like class lists.
389
- if class_mapping_filename is not None:
390
- run_detector.USE_MODEL_NATIVE_CLASSES = True
391
- with open(class_mapping_filename,'r') as f:
392
- class_mapping = json.load(f)
393
- print('Loaded custom class mapping:')
394
- print(class_mapping)
395
- run_detector.DEFAULT_DETECTOR_LABEL_MAP = class_mapping
396
-
397
- # Handle the case where image_file_names is not yet actually a list
398
- if isinstance(image_file_names,str):
399
-
400
- # Find the images to score; images can be a directory, may need to recurse
401
- if os.path.isdir(image_file_names):
402
- image_dir = image_file_names
403
- image_file_names = path_utils.find_images(image_dir, True)
404
- print('{} image files found in folder {}'.format(len(image_file_names),image_dir))
405
-
406
- # A json list of image paths
407
- elif os.path.isfile(image_file_names) and image_file_names.endswith('.json'):
408
- list_file = image_file_names
409
- with open(list_file) as f:
410
- image_file_names = json.load(f)
411
- print('Loaded {} image filenames from list file {}'.format(len(image_file_names),list_file))
412
-
413
- # A single image file
414
- elif os.path.isfile(image_file_names) and path_utils.is_image_file(image_file_names):
415
- image_file_names = [image_file_names]
416
- print('Processing image {}'.format(image_file_names[0]))
417
-
418
- else:
419
- raise ValueError('image_file_names is a string, but is not a directory, a json ' + \
420
- 'list (.json), or an image file')
421
-
422
- if results is None:
423
- results = []
424
-
425
- already_processed = set([i['file'] for i in results])
426
-
427
- model_file = try_download_known_detector(model_file)
428
-
429
- print('GPU available: {}'.format(is_gpu_available(model_file)))
430
-
431
- if n_cores > 1 and is_gpu_available(model_file):
432
-
433
- print('Warning: multiple cores requested, but a GPU is available; parallelization across ' + \
434
- 'GPUs is not currently supported, defaulting to one GPU')
435
- n_cores = 1
436
-
437
- if n_cores > 1 and use_image_queue:
438
-
439
- print('Warning: multiple cores requested, but the image queue is enabled; parallelization ' + \
440
- 'with the image queue is not currently supported, defaulting to one worker')
441
- n_cores = 1
442
-
443
- if use_image_queue:
444
-
445
- assert checkpoint_frequency < 0, \
446
- 'Using an image queue is not currently supported when checkpointing is enabled'
447
- assert len(results) == 0, \
448
- 'Using an image queue with results loaded from a checkpoint is not currently supported'
449
- assert n_cores <= 1
450
- results = run_detector_with_image_queue(image_file_names, model_file,
451
- confidence_threshold, quiet,
452
- image_size=image_size)
453
-
454
- elif n_cores <= 1:
455
-
456
- # Load the detector
457
- start_time = time.time()
458
- detector = load_detector(model_file)
459
- elapsed = time.time() - start_time
460
- print('Loaded model in {}'.format(humanfriendly.format_timespan(elapsed)))
461
-
462
- # This is only used for console reporting, so it's OK that it doesn't
463
- # include images we might have loaded from a previous checkpoint
464
- count = 0
465
-
466
- for im_file in tqdm(image_file_names):
467
-
468
- # Will not add additional entries not in the starter checkpoint
469
- if im_file in already_processed:
470
- if not quiet:
471
- print('Bypassing image {}'.format(im_file))
472
- continue
473
-
474
- count += 1
475
-
476
- result = process_image(im_file, detector,
477
- confidence_threshold, quiet=quiet,
478
- image_size=image_size, include_image_size=include_image_size,
479
- include_image_timestamp=include_image_timestamp,
480
- include_exif_data=include_exif_data)
481
- results.append(result)
482
-
483
- # Write a checkpoint if necessary
484
- if checkpoint_frequency != -1 and count % checkpoint_frequency == 0:
485
-
486
- print('Writing a new checkpoint after having processed {} images since '
487
- 'last restart'.format(count))
488
-
489
- write_checkpoint(checkpoint_path, results)
490
-
491
- else:
492
-
493
- # Multiprocessing is enabled at this point
494
-
495
- # When using multiprocessing, tell the workers to load the model on each
496
- # process, by passing the model_file string as the "model" argument to
497
- # process_images.
498
- detector = model_file
499
-
500
- print('Creating pool with {} cores'.format(n_cores))
501
-
502
- if len(already_processed) > 0:
503
- n_images_all = len(image_file_names)
504
- image_file_names = [fn for fn in image_file_names if fn not in already_processed]
505
- print('Loaded {} of {} images from checkpoint'.format(
506
- len(already_processed),n_images_all))
507
-
508
- # Divide images into chunks; we'll send one chunk to each worker process
509
- image_batches = list(chunks_by_number_of_chunks(image_file_names, n_cores))
510
-
511
- pool = workerpool(n_cores)
512
-
513
- if checkpoint_path is not None:
514
-
515
- # Multiprocessing and checkpointing are both enabled at this point
516
-
517
- checkpoint_queue = Manager().Queue()
518
-
519
- # Pass the "results" array (which may already contain images loaded from an existing
520
- # checkpoint) to the checkpoint queue handler function, which will append results to
521
- # the list as they become available.
522
- checkpoint_thread = Thread(target=checkpoint_queue_handler,
523
- args=(checkpoint_path, checkpoint_frequency,
524
- checkpoint_queue, results), daemon=True)
525
- checkpoint_thread.start()
526
-
527
- pool.map(partial(process_images, detector=detector,
528
- confidence_threshold=confidence_threshold,
529
- image_size=image_size,
530
- include_image_size=include_image_size,
531
- include_image_timestamp=include_image_timestamp,
532
- include_exif_data=include_exif_data,
533
- checkpoint_queue=checkpoint_queue),
534
- image_batches)
535
-
536
- checkpoint_queue.put(None)
537
-
538
- else:
539
-
540
- # Multprocessing is enabled, but checkpointing is not
541
-
542
- new_results = pool.map(partial(process_images, detector=detector,
543
- confidence_threshold=confidence_threshold,image_size=image_size,
544
- include_image_size=include_image_size,
545
- include_image_timestamp=include_image_timestamp,
546
- include_exif_data=include_exif_data),
547
- image_batches)
548
-
549
- new_results = list(itertools.chain.from_iterable(new_results))
550
-
551
- # Append the results we just computed to "results", which is *usually* empty, but will
552
- # be non-empty if we resumed from a checkpoint
553
- results += new_results
554
-
555
- # ...if checkpointing is/isn't enabled
556
-
557
- # ...if we're running (1) with image queue, (2) on one core, (3) on multiple cores
558
-
559
- # 'results' may have been modified in place, but we also return it for
560
- # backwards-compatibility.
561
- return results
562
-
563
- # ...def load_and_run_detector_batch(...)
564
-
565
-
566
- def checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
567
- """
568
- Thread function to accumulate results and write checkpoints when checkpointing and
569
- multiprocessing are both enabled.
570
- """
571
-
572
- result_count = 0
573
- while True:
574
- result = checkpoint_queue.get()
575
- if result is None:
576
- break
577
-
578
- result_count +=1
579
- results.append(result)
580
-
581
- if (checkpoint_frequency != -1) and (result_count % checkpoint_frequency == 0):
582
-
583
- print('Writing a new checkpoint after having processed {} images since '
584
- 'last restart'.format(result_count))
585
-
586
- write_checkpoint(checkpoint_path, results)
587
-
588
-
589
- def write_checkpoint(checkpoint_path, results):
590
- """
591
- Writes the 'images' field in the dict 'results' to a json checkpoint file.
592
- """
593
-
594
- assert checkpoint_path is not None
595
-
596
- # Back up any previous checkpoints, to protect against crashes while we're writing
597
- # the checkpoint file.
598
- checkpoint_tmp_path = None
599
- if os.path.isfile(checkpoint_path):
600
- checkpoint_tmp_path = checkpoint_path + '_tmp'
601
- shutil.copyfile(checkpoint_path,checkpoint_tmp_path)
602
-
603
- # Write the new checkpoint
604
- with open(checkpoint_path, 'w') as f:
605
- json.dump({'images': results}, f, indent=1, default=str)
606
-
607
- # Remove the backup checkpoint if it exists
608
- if checkpoint_tmp_path is not None:
609
- os.remove(checkpoint_tmp_path)
610
-
611
-
612
- def get_image_datetime(image):
613
- """
614
- Returns the EXIF datetime from [image] (a PIL Image object), if available, as a string.
615
-
616
- [im_file] is used only for error reporting.
617
- """
618
-
619
- exif_tags = read_exif.read_pil_exif(image,exif_options)
620
-
621
- try:
622
- datetime_str = exif_tags['DateTimeOriginal']
623
- _ = time.strptime(datetime_str, '%Y:%m:%d %H:%M:%S')
624
- return datetime_str
625
-
626
- except Exception:
627
- return None
628
-
629
-
630
- def write_results_to_file(results, output_file, relative_path_base=None,
631
- detector_file=None, info=None, include_max_conf=False,
632
- custom_metadata=None):
633
- """
634
- Writes list of detection results to JSON output file. Format matches:
635
-
636
- https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
637
-
638
- Args
639
- - results: list of dict, each dict represents detections on one image
640
- - output_file: str, path to JSON output file, should end in '.json'
641
- - relative_path_base: str, path to a directory as the base for relative paths
642
- - detector_file: filename of the detector used to generate these results, only
643
- used to pull out a version number for the "info" field
644
- - info: dictionary to use instead of the default "info" field
645
- - include_max_conf: old files (version 1.2 and earlier) included a "max_conf" field
646
- in each image; this was removed in version 1.3. Set this flag to force the inclusion
647
- of this field.
648
- - custom_metadata: additional data to include as info['custom_metadata']. Typically
649
- a dictionary, but no format checks are performed.
650
-
651
- Returns the complete output dictionary that was written to the output file.
652
- """
653
-
654
- if relative_path_base is not None:
655
- results_relative = []
656
- for r in results:
657
- r_relative = copy.copy(r)
658
- r_relative['file'] = os.path.relpath(r_relative['file'], start=relative_path_base)
659
- results_relative.append(r_relative)
660
- results = results_relative
661
-
662
- # The typical case: we need to build the 'info' struct
663
- if info is None:
664
-
665
- info = {
666
- 'detection_completion_time': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
667
- 'format_version': '1.3'
668
- }
669
-
670
- if detector_file is not None:
671
- detector_filename = os.path.basename(detector_file)
672
- detector_version = get_detector_version_from_filename(detector_filename)
673
- detector_metadata = get_detector_metadata_from_version_string(detector_version)
674
- info['detector'] = detector_filename
675
- info['detector_metadata'] = detector_metadata
676
- else:
677
- info['detector'] = 'unknown'
678
- info['detector_metadata'] = get_detector_metadata_from_version_string('unknown')
679
-
680
- # If the caller supplied the entire "info" struct
681
- else:
682
-
683
- if detector_file is not None:
684
-
685
- print('Warning (write_results_to_file): info struct and detector file ' + \
686
- 'supplied, ignoring detector file')
687
-
688
- if custom_metadata is not None:
689
- info['custom_metadata'] = custom_metadata
690
-
691
- # The 'max_detection_conf' field used to be included by default, and it caused all kinds
692
- # of headaches, so it's no longer included unless the user explicitly requests it.
693
- if not include_max_conf:
694
- for im in results:
695
- if 'max_detection_conf' in im:
696
- del im['max_detection_conf']
697
-
698
- final_output = {
699
- 'images': results,
700
- 'detection_categories': run_detector.DEFAULT_DETECTOR_LABEL_MAP,
701
- 'info': info
702
- }
703
-
704
- with open(output_file, 'w') as f:
705
- json.dump(final_output, f, indent=1, default=str)
706
- print('Output file saved at {}'.format(output_file))
707
-
708
- return final_output
709
-
710
- # ...def write_results_to_file(...)
711
-
712
-
713
- #%% Interactive driver
714
-
715
- if False:
716
-
717
- pass
718
-
719
- #%%
720
-
721
- checkpoint_path = None
722
- model_file = r'G:\temp\models\md_v4.1.0.pb'
723
- confidence_threshold = 0.1
724
- checkpoint_frequency = -1
725
- results = None
726
- ncores = 1
727
- use_image_queue = False
728
- quiet = False
729
- image_dir = r'G:\temp\demo_images\ssmini'
730
- image_size = None
731
- image_file_names = path_utils.find_images(image_dir, recursive=False)
732
-
733
- start_time = time.time()
734
-
735
- results = load_and_run_detector_batch(model_file=model_file,
736
- image_file_names=image_file_names,
737
- checkpoint_path=checkpoint_path,
738
- confidence_threshold=confidence_threshold,
739
- checkpoint_frequency=checkpoint_frequency,
740
- results=results,
741
- n_cores=ncores,
742
- use_image_queue=use_image_queue,
743
- quiet=quiet,
744
- image_size=image_size)
745
-
746
- elapsed = time.time() - start_time
747
-
748
- print('Finished inference in {}'.format(humanfriendly.format_timespan(elapsed)))
749
-
750
-
751
- #%% Command-line driver
752
-
753
- def main():
754
-
755
- parser = argparse.ArgumentParser(
756
- description='Module to run a TF/PT animal detection model on lots of images')
757
- parser.add_argument(
758
- 'detector_file',
759
- help='Path to detector model file (.pb or .pt). Can also be the strings "MDV4", "MDV5A", or "MDV5B" to request automatic download.')
760
- parser.add_argument(
761
- 'image_file',
762
- help='Path to a single image file, a JSON file containing a list of paths to images, or a directory')
763
- parser.add_argument(
764
- 'output_file',
765
- help='Path to output JSON results file, should end with a .json extension')
766
- parser.add_argument(
767
- '--recursive',
768
- action='store_true',
769
- help='Recurse into directories, only meaningful if image_file points to a directory')
770
- parser.add_argument(
771
- '--output_relative_filenames',
772
- action='store_true',
773
- help='Output relative file names, only meaningful if image_file points to a directory')
774
- parser.add_argument(
775
- '--include_max_conf',
776
- action='store_true',
777
- help='Include the "max_detection_conf" field in the output')
778
- parser.add_argument(
779
- '--quiet',
780
- action='store_true',
781
- help='Suppress per-image console output')
782
- parser.add_argument(
783
- '--image_size',
784
- type=int,
785
- default=None,
786
- help=('Force image resizing to a (square) integer size (not recommended to change this)'))
787
- parser.add_argument(
788
- '--use_image_queue',
789
- action='store_true',
790
- help='Pre-load images, may help keep your GPU busy; does not currently support ' + \
791
- 'checkpointing. Useful if you have a very fast GPU and a very slow disk.')
792
- parser.add_argument(
793
- '--threshold',
794
- type=float,
795
- default=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD,
796
- help="Confidence threshold between 0 and 1.0, don't include boxes below this " + \
797
- "confidence in the output file. Default is {}".format(
798
- run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD))
799
- parser.add_argument(
800
- '--checkpoint_frequency',
801
- type=int,
802
- default=-1,
803
- help='Write results to a temporary file every N images; default is -1, which ' + \
804
- 'disables this feature')
805
- parser.add_argument(
806
- '--checkpoint_path',
807
- type=str,
808
- default=None,
809
- help='File name to which checkpoints will be written if checkpoint_frequency is > 0')
810
- parser.add_argument(
811
- '--resume_from_checkpoint',
812
- type=str,
813
- default=None,
814
- help='Path to a JSON checkpoint file to resume from')
815
- parser.add_argument(
816
- '--allow_checkpoint_overwrite',
817
- action='store_true',
818
- help='By default, this script will bail if the specified checkpoint file ' + \
819
- 'already exists; this option allows it to overwrite existing checkpoints')
820
- parser.add_argument(
821
- '--ncores',
822
- type=int,
823
- default=0,
824
- help='Number of cores to use; only applies to CPU-based inference')
825
- parser.add_argument(
826
- '--class_mapping_filename',
827
- type=str,
828
- default=None,
829
- help='Use a non-default class mapping, supplied in a .json file with a dictionary mapping' + \
830
- 'int-strings to strings. This will also disable the addition of "1" to all category ' + \
831
- 'IDs, so your class mapping should start at zero.')
832
- parser.add_argument(
833
- '--include_image_size',
834
- action='store_true',
835
- help='Include image dimensions in output file'
836
- )
837
- parser.add_argument(
838
- '--include_image_timestamp',
839
- action='store_true',
840
- help='Include image datetime (if available) in output file'
841
- )
842
- parser.add_argument(
843
- '--include_exif_data',
844
- action='store_true',
845
- help='Include available EXIF data in output file'
846
- )
847
- parser.add_argument(
848
- '--overwrite_handling',
849
- type=str,
850
- default='overwrite',
851
- help='What should we do if the output file exists? overwrite/skip/error (default overwrite)'
852
- )
853
-
854
- if len(sys.argv[1:]) == 0:
855
- parser.print_help()
856
- parser.exit()
857
-
858
- args = parser.parse_args()
859
-
860
- # If the specified detector file is really the name of a known model, find
861
- # (and possibly download) that model
862
- args.detector_file = try_download_known_detector(args.detector_file)
863
-
864
- assert os.path.exists(args.detector_file), \
865
- 'detector file {} does not exist'.format(args.detector_file)
866
- assert 0.0 < args.threshold <= 1.0, 'Confidence threshold needs to be between 0 and 1'
867
- assert args.output_file.endswith('.json'), 'output_file specified needs to end with .json'
868
- if args.checkpoint_frequency != -1:
869
- assert args.checkpoint_frequency > 0, 'Checkpoint_frequency needs to be > 0 or == -1'
870
- if args.output_relative_filenames:
871
- assert os.path.isdir(args.image_file), \
872
- f'Could not find folder {args.image_file}, must supply a folder when ' + \
873
- '--output_relative_filenames is set'
874
-
875
- if os.path.exists(args.output_file):
876
- if args.overwrite_handling == 'overwrite':
877
- print('Warning: output file {} already exists and will be overwritten'.format(
878
- args.output_file))
879
- elif args.overwrite_handling == 'skip':
880
- print('Output file {} exists, returning'.format(
881
- args.output_file))
882
- return
883
- elif args.overwrite_handling == 'error':
884
- raise Exception('Output file {} exists'.format(args.output_file))
885
- else:
886
- raise ValueError('Illegal overwrite handling string {}'.format(args.overwrite_handling))
887
-
888
- # This is an experimental hack to allow the use of non-MD YOLOv5 models through
889
- # the same infrastructure; it disables the code that enforces MDv5-like class lists.
890
- if args.class_mapping_filename is not None:
891
- run_detector.USE_MODEL_NATIVE_CLASSES = True
892
- with open(args.class_mapping_filename,'r') as f:
893
- class_mapping = json.load(f)
894
- print('Loaded custom class mapping:')
895
- print(class_mapping)
896
- run_detector.DEFAULT_DETECTOR_LABEL_MAP = class_mapping
897
-
898
- # Load the checkpoint if available
899
- #
900
- # Relative file names are only output at the end; all file paths in the checkpoint are
901
- # still full paths.
902
- if args.resume_from_checkpoint is not None:
903
- assert os.path.exists(args.resume_from_checkpoint), \
904
- 'File at resume_from_checkpoint specified does not exist'
905
- with open(args.resume_from_checkpoint) as f:
906
- print('Loading previous results from checkpoint file {}'.format(
907
- args.resume_from_checkpoint))
908
- saved = json.load(f)
909
- assert 'images' in saved, \
910
- 'The checkpoint file does not have the correct fields; cannot be restored'
911
- results = saved['images']
912
- print('Restored {} entries from the checkpoint'.format(len(results)))
913
- else:
914
- results = []
915
-
916
- # Find the images to score; images can be a directory, may need to recurse
917
- if os.path.isdir(args.image_file):
918
- image_file_names = path_utils.find_images(args.image_file, args.recursive)
919
- if len(image_file_names) > 0:
920
- print('{} image files found in the input directory'.format(len(image_file_names)))
921
- else:
922
- if (args.recursive):
923
- print('No image files found in directory {}, exiting'.format(args.image_file))
924
- else:
925
- print('No image files found in directory {}, did you mean to specify '
926
- '--recursive?'.format(
927
- args.image_file))
928
- return
929
-
930
- # A json list of image paths
931
- elif os.path.isfile(args.image_file) and args.image_file.endswith('.json'):
932
- with open(args.image_file) as f:
933
- image_file_names = json.load(f)
934
- print('Loaded {} image filenames from list file {}'.format(
935
- len(image_file_names),args.image_file))
936
-
937
- # A single image file
938
- elif os.path.isfile(args.image_file) and path_utils.is_image_file(args.image_file):
939
- image_file_names = [args.image_file]
940
- print('Processing image {}'.format(args.image_file))
941
-
942
- else:
943
- raise ValueError('image_file specified is not a directory, a json list, or an image file, '
944
- '(or does not have recognizable extensions).')
945
-
946
- assert len(image_file_names) > 0, 'Specified image_file does not point to valid image files'
947
- assert os.path.exists(image_file_names[0]), \
948
- 'The first image to be scored does not exist at {}'.format(image_file_names[0])
949
-
950
- output_dir = os.path.dirname(args.output_file)
951
-
952
- if len(output_dir) > 0:
953
- os.makedirs(output_dir,exist_ok=True)
954
-
955
- assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
956
-
957
- # Test that we can write to the output_file's dir if checkpointing requested
958
- if args.checkpoint_frequency != -1:
959
-
960
- if args.checkpoint_path is not None:
961
- checkpoint_path = args.checkpoint_path
962
- else:
963
- checkpoint_path = os.path.join(output_dir,
964
- 'checkpoint_{}.json'.format(
965
- datetime.utcnow().strftime("%Y%m%d%H%M%S")))
966
-
967
- # Don't overwrite existing checkpoint files, this is a sure-fire way to eventually
968
- # erase someone's checkpoint.
969
- if (checkpoint_path is not None) and (not args.allow_checkpoint_overwrite) \
970
- and (args.resume_from_checkpoint is None):
971
-
972
- assert not os.path.isfile(checkpoint_path), \
973
- f'Checkpoint path {checkpoint_path} already exists, delete or move it before ' + \
974
- 're-using the same checkpoint path, or specify --allow_checkpoint_overwrite'
975
-
976
-
977
- # Confirm that we can write to the checkpoint path; this avoids issues where
978
- # we crash after several thousand images.
979
- #
980
- # But actually, commenting this out for now... the scenario where we are resuming from a
981
- # checkpoint, then immediately overwrite that checkpoint with empty data is higher-risk
982
- # than the annoyance of crashing a few minutes after starting a job.
983
- if False:
984
- with open(checkpoint_path, 'w') as f:
985
- json.dump({'images': []}, f)
986
-
987
- print('The checkpoint file will be written to {}'.format(checkpoint_path))
988
-
989
- else:
990
-
991
- checkpoint_path = None
992
-
993
- start_time = time.time()
994
-
995
- results = load_and_run_detector_batch(model_file=args.detector_file,
996
- image_file_names=image_file_names,
997
- checkpoint_path=checkpoint_path,
998
- confidence_threshold=args.threshold,
999
- checkpoint_frequency=args.checkpoint_frequency,
1000
- results=results,
1001
- n_cores=args.ncores,
1002
- use_image_queue=args.use_image_queue,
1003
- quiet=args.quiet,
1004
- image_size=args.image_size,
1005
- class_mapping_filename=args.class_mapping_filename,
1006
- include_image_size=args.include_image_size,
1007
- include_image_timestamp=args.include_image_timestamp,
1008
- include_exif_data=args.include_exif_data)
1009
-
1010
- elapsed = time.time() - start_time
1011
- images_per_second = len(results) / elapsed
1012
- print('Finished inference for {} images in {} ({:.2f} images per second)'.format(
1013
- len(results),humanfriendly.format_timespan(elapsed),images_per_second))
1014
-
1015
- relative_path_base = None
1016
- if args.output_relative_filenames:
1017
- relative_path_base = args.image_file
1018
- write_results_to_file(results, args.output_file, relative_path_base=relative_path_base,
1019
- detector_file=args.detector_file,include_max_conf=args.include_max_conf)
1020
-
1021
- if checkpoint_path and os.path.isfile(checkpoint_path):
1022
- os.remove(checkpoint_path)
1023
- print('Deleted checkpoint file {}'.format(checkpoint_path))
1024
-
1025
- print('Done!')
1026
-
1027
-
1028
- if __name__ == '__main__':
1029
- main()