megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -10,8 +10,8 @@ https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_pro
10
10
  This enables the results to be used in our post-processing pipeline; see postprocess_batch_results.py.
11
11
 
12
12
  This script can save results to checkpoints intermittently, in case disaster
13
- strikes. To enable this, set --checkpoint_frequency to n > 0, and results
14
- will be saved as a checkpoint every n images. Checkpoints will be written
13
+ strikes. To enable this, set --checkpoint_frequency to n > 0, and results
14
+ will be saved as a checkpoint every n images. Checkpoints will be written
15
15
  to a file in the same directory as the output_file, and after all images
16
16
  are processed and final results file written to output_file, the temporary
17
17
  checkpoint file will be deleted. If you want to resume from a checkpoint, set
@@ -26,10 +26,10 @@ run a gazillion MegaDetector images on multiple GPUs using this script, we just
26
26
  one GPU *per invocation of this script*. Dividing a big batch of images into one chunk
27
27
  per GPU happens outside of this script.
28
28
 
29
- Does not have a command-line option to bind the process to a particular GPU, but you can
29
+ Does not have a command-line option to bind the process to a particular GPU, but you can
30
30
  prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
31
31
 
32
- CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
32
+ CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
33
33
 
34
34
  You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
35
35
 
@@ -70,6 +70,7 @@ from megadetector.detection.run_detector import \
70
70
  get_detector_metadata_from_version_string
71
71
 
72
72
  from megadetector.utils import path_utils
73
+ from megadetector.utils import ct_utils
73
74
  from megadetector.utils.ct_utils import parse_kvp_list
74
75
  from megadetector.utils.ct_utils import split_list_into_n_chunks
75
76
  from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
@@ -92,7 +93,7 @@ max_queue_size = 10
92
93
  # How often should we print progress when using the image queue?
93
94
  n_queue_print = 1000
94
95
 
95
- # TODO: it's a little sloppy that these are module-level globals, but in practice it
96
+ # TODO: it's a little sloppy that these are module-level globals, but in practice it
96
97
  # doesn't really matter, so I'm not in a big rush to move these to options until I do
97
98
  # a larger cleanup of all the long argument lists in this module.
98
99
  #
@@ -116,40 +117,42 @@ def _producer_func(q,
116
117
  verbose=False,
117
118
  image_size=None,
118
119
  augment=None):
119
- """
120
+ """
120
121
  Producer function; only used when using the (optional) image queue.
121
-
122
- Reads up to images from disk and puts them on the blocking queue for
123
- processing. Each image is queued as a tuple of [filename,Image]. Sends
122
+
123
+ Reads up to images from disk and puts them on the blocking queue for
124
+ processing. Each image is queued as a tuple of [filename,Image]. Sends
124
125
  "None" to the queue when finished.
125
-
126
+
126
127
  The "detector" argument is only used for preprocessing.
127
128
  """
128
-
129
+
129
130
  if verbose:
130
131
  print('Producer starting: ID {}, preprocessor {}'.format(producer_id,preprocessor))
131
132
  sys.stdout.flush()
132
-
133
- if preprocessor is not None:
133
+
134
+ if preprocessor is not None:
134
135
  assert isinstance(preprocessor,str)
135
136
  detector_options = deepcopy(detector_options)
136
137
  detector_options['preprocess_only'] = True
137
- preprocessor = load_detector(preprocessor,detector_options=detector_options,verbose=verbose)
138
-
138
+ preprocessor = load_detector(preprocessor,
139
+ detector_options=detector_options,
140
+ verbose=verbose)
141
+
139
142
  for im_file in image_files:
140
-
143
+
141
144
  try:
142
145
  if verbose:
143
146
  print('Loading image {} on producer {}'.format(im_file,producer_id))
144
147
  sys.stdout.flush()
145
148
  image = vis_utils.load_image(im_file)
146
-
149
+
147
150
  if preprocessor is not None:
148
-
151
+
149
152
  image_info = preprocessor.generate_detections_one_image(
150
- image,
151
- im_file,
152
- detection_threshold=None,
153
+ image,
154
+ im_file,
155
+ detection_threshold=None,
153
156
  image_size=image_size,
154
157
  skip_image_resizing=False,
155
158
  augment=augment,
@@ -158,29 +161,29 @@ def _producer_func(q,
158
161
  if 'failure' in image_info:
159
162
  assert image_info['failure'] == run_detector.FAILURE_INFER
160
163
  raise
161
-
164
+
162
165
  image = image_info
163
-
166
+
164
167
  except Exception as e:
165
168
  print('Producer process: image {} cannot be loaded:\n{}'.format(im_file,str(e)))
166
- image = run_detector.FAILURE_IMAGE_OPEN
167
-
169
+ image = run_detector.FAILURE_IMAGE_OPEN
170
+
168
171
  if verbose:
169
172
  print('Queueing image {} from producer {}'.format(im_file,producer_id))
170
173
  sys.stdout.flush()
171
-
174
+
172
175
  q.put([im_file,image,producer_id])
173
-
176
+
174
177
  # This is a signal to the consumer function that a worker has finished
175
178
  q.put(None)
176
-
179
+
177
180
  if verbose:
178
181
  print('Loader worker {} finished'.format(producer_id))
179
182
  sys.stdout.flush()
180
183
 
181
184
  # ...def _producer_func(...)
182
-
183
-
185
+
186
+
184
187
  def _consumer_func(q,
185
188
  return_queue,
186
189
  model_file,
@@ -188,27 +191,29 @@ def _consumer_func(q,
188
191
  loader_workers,
189
192
  image_size=None,
190
193
  include_image_size=False,
191
- include_image_timestamp=False,
194
+ include_image_timestamp=False,
192
195
  include_exif_data=False,
193
196
  augment=False,
194
197
  detector_options=None,
195
198
  preprocess_on_image_queue=default_preprocess_on_image_queue,
196
199
  n_total_images=None
197
200
  ):
198
- """
201
+ """
199
202
  Consumer function; only used when using the (optional) image queue.
200
-
203
+
201
204
  Pulls images from a blocking queue and processes them. Returns when "None" has
202
205
  been read from each loader's queue.
203
206
  """
204
-
207
+
205
208
  if verbose:
206
209
  print('Consumer starting'); sys.stdout.flush()
207
210
 
208
211
  start_time = time.time()
209
-
212
+
210
213
  if isinstance(model_file,str):
211
- detector = load_detector(model_file,detector_options=detector_options,verbose=verbose)
214
+ detector = load_detector(model_file,
215
+ detector_options=detector_options,
216
+ verbose=verbose)
212
217
  elapsed = time.time() - start_time
213
218
  print('Loaded model (before queueing) in {}, printing updates every {} images'.format(
214
219
  humanfriendly.format_timespan(elapsed),n_queue_print))
@@ -216,21 +221,21 @@ def _consumer_func(q,
216
221
  else:
217
222
  detector = model_file
218
223
  print('Detector of type {} passed to consumer function'.format(type(detector)))
219
-
224
+
220
225
  results = []
221
-
226
+
222
227
  n_images_processed = 0
223
228
  n_queues_finished = 0
224
-
229
+
225
230
  pbar = None
226
231
  if n_total_images is not None:
227
232
  # TODO: in principle I should close this pbar
228
233
  pbar = tqdm(total=n_total_images)
229
-
234
+
230
235
  while True:
231
-
236
+
232
237
  r = q.get()
233
-
238
+
234
239
  # Is this the last image in one of the producer queues?
235
240
  if r is None:
236
241
  n_queues_finished += 1
@@ -246,7 +251,7 @@ def _consumer_func(q,
246
251
  n_images_processed += 1
247
252
  im_file = r[0]
248
253
  image = r[1]
249
-
254
+
250
255
  """
251
256
  result['img_processed'] = img
252
257
  result['img_original'] = img_original
@@ -255,19 +260,19 @@ def _consumer_func(q,
255
260
  result['letterbox_ratio'] = letterbox_ratio
256
261
  result['letterbox_pad'] = letterbox_pad
257
262
  """
258
-
263
+
259
264
  if pbar is not None:
260
265
  pbar.update(1)
261
-
266
+
262
267
  if False:
263
268
  if verbose or ((n_images_processed % n_queue_print) == 1):
264
269
  elapsed = time.time() - start_time
265
270
  images_per_second = n_images_processed / elapsed
266
271
  print('De-queued image {} ({:.2f}/s) ({})'.format(n_images_processed,
267
272
  images_per_second,
268
- im_file));
273
+ im_file))
269
274
  sys.stdout.flush()
270
-
275
+
271
276
  if isinstance(image,str):
272
277
  # This is how the producer function communicates read errors
273
278
  results.append({'file': im_file,
@@ -276,7 +281,7 @@ def _consumer_func(q,
276
281
  print('Expected a dict, received an image of type {}'.format(type(image)))
277
282
  results.append({'file': im_file,
278
283
  'failure': 'illegal image type'})
279
-
284
+
280
285
  else:
281
286
  results.append(process_image(im_file=im_file,
282
287
  detector=detector,
@@ -285,14 +290,14 @@ def _consumer_func(q,
285
290
  quiet=True,
286
291
  image_size=image_size,
287
292
  include_image_size=include_image_size,
288
- include_image_timestamp=include_image_timestamp,
293
+ include_image_timestamp=include_image_timestamp,
289
294
  include_exif_data=include_exif_data,
290
295
  augment=augment,
291
296
  skip_image_resizing=preprocess_on_image_queue))
292
297
  if verbose:
293
298
  print('Processed image {}'.format(im_file)); sys.stdout.flush()
294
299
  q.task_done()
295
-
300
+
296
301
  # ...while True (consumer loop)
297
302
 
298
303
  # ...def _consumer_func(...)
@@ -303,7 +308,7 @@ def run_detector_with_image_queue(image_files,
303
308
  confidence_threshold,
304
309
  quiet=False,
305
310
  image_size=None,
306
- include_image_size=False,
311
+ include_image_size=False,
307
312
  include_image_timestamp=False,
308
313
  include_exif_data=False,
309
314
  augment=False,
@@ -311,54 +316,59 @@ def run_detector_with_image_queue(image_files,
311
316
  loader_workers=default_loaders,
312
317
  preprocess_on_image_queue=default_preprocess_on_image_queue):
313
318
  """
314
- Driver function for the (optional) multiprocessing-based image queue; only used
315
- when --use_image_queue is specified. Starts a reader process to read images from disk, but
319
+ Driver function for the (optional) multiprocessing-based image queue; only used
320
+ when --use_image_queue is specified. Starts a reader process to read images from disk, but
316
321
  processes images in the process from which this function is called (i.e., does not currently
317
322
  spawn a separate consumer process).
318
-
323
+
319
324
  Args:
320
325
  image_files (str): list of absolute paths to images
321
326
  model_file (str): filename or model identifier (e.g. "MDV5A")
322
327
  confidence_threshold (float): minimum confidence detection to include in
323
328
  output
324
329
  quiet (bool, optional): suppress per-image console printouts
325
- image_size (tuple, optional): image size to use for inference, only mess with this
330
+ image_size (int, optional): image size to use for inference, only mess with this
326
331
  if (a) you're using a model other than MegaDetector or (b) you know what you're
327
332
  doing
328
333
  include_image_size (bool, optional): should we include image size in the output for each image?
329
334
  include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
330
335
  include_exif_data (bool, optional): should we include EXIF data in the output for each image?
331
336
  augment (bool, optional): enable image augmentation
332
- detector_options (dict, optional): key/value pairs that are interpreted differently
337
+ detector_options (dict, optional): key/value pairs that are interpreted differently
333
338
  by different detectors
334
339
  loader_workers (int, optional): number of loaders to use
335
-
340
+ preprocess_on_image_queue (bool, optional): if the image queue is enabled, should it handle
341
+ image loading and preprocessing (True), or just image loading (False)?
342
+
336
343
  Returns:
337
344
  list: list of dicts in the format returned by process_image()
338
345
  """
339
-
346
+
340
347
  # Validate inputs
341
348
  assert isinstance(model_file,str)
342
-
349
+
343
350
  if loader_workers <= 0:
344
351
  loader_workers = 1
345
-
352
+
353
+ if detector_options is None:
354
+ detector_options = {}
355
+
346
356
  q = multiprocessing.JoinableQueue(max_queue_size)
347
357
  return_queue = multiprocessing.Queue(1)
348
-
358
+
349
359
  producers = []
350
-
360
+
351
361
  worker_string = 'thread' if use_threads_for_queue else 'process'
352
362
  print('Starting a {} pool with {} workers'.format(worker_string,loader_workers))
353
-
363
+
354
364
  preprocessor = None
355
-
365
+
356
366
  if preprocess_on_image_queue:
357
367
  print('Enabling image queue preprocessing')
358
368
  preprocessor = model_file
359
-
369
+
360
370
  n_total_images = len(image_files)
361
-
371
+
362
372
  chunks = split_list_into_n_chunks(image_files, loader_workers, chunk_strategy='greedy')
363
373
  for i_chunk,chunk in enumerate(chunks):
364
374
  if use_threads_for_queue:
@@ -379,11 +389,11 @@ def run_detector_with_image_queue(image_files,
379
389
  image_size,
380
390
  augment))
381
391
  producers.append(producer)
382
-
392
+
383
393
  for producer in producers:
384
394
  producer.daemon = False
385
395
  producer.start()
386
-
396
+
387
397
  if run_separate_consumer_process:
388
398
  if use_threads_for_queue:
389
399
  consumer = Thread(target=_consumer_func,args=(q,
@@ -393,7 +403,7 @@ def run_detector_with_image_queue(image_files,
393
403
  loader_workers,
394
404
  image_size,
395
405
  include_image_size,
396
- include_image_timestamp,
406
+ include_image_timestamp,
397
407
  include_exif_data,
398
408
  augment,
399
409
  detector_options,
@@ -407,7 +417,7 @@ def run_detector_with_image_queue(image_files,
407
417
  loader_workers,
408
418
  image_size,
409
419
  include_image_size,
410
- include_image_timestamp,
420
+ include_image_timestamp,
411
421
  include_exif_data,
412
422
  augment,
413
423
  detector_options,
@@ -423,7 +433,7 @@ def run_detector_with_image_queue(image_files,
423
433
  loader_workers,
424
434
  image_size,
425
435
  include_image_size,
426
- include_image_timestamp,
436
+ include_image_timestamp,
427
437
  include_exif_data,
428
438
  augment,
429
439
  detector_options,
@@ -434,21 +444,21 @@ def run_detector_with_image_queue(image_files,
434
444
  producer.join()
435
445
  if verbose:
436
446
  print('Producer {} finished'.format(i_producer))
437
-
447
+
438
448
  if verbose:
439
449
  print('All producers finished')
440
-
450
+
441
451
  if run_separate_consumer_process:
442
452
  consumer.join()
443
453
  if verbose:
444
454
  print('Consumer loop finished')
445
-
455
+
446
456
  q.join()
447
457
  if verbose:
448
458
  print('Queue joined')
449
459
 
450
460
  results = return_queue.get()
451
-
461
+
452
462
  return results
453
463
 
454
464
  # ...def run_detector_with_image_queue(...)
@@ -459,29 +469,29 @@ def run_detector_with_image_queue(image_files,
459
469
  def _chunks_by_number_of_chunks(ls, n):
460
470
  """
461
471
  Splits a list into n even chunks.
462
-
472
+
463
473
  External callers should use ct_utils.split_list_into_n_chunks().
464
474
 
465
475
  Args:
466
476
  ls (list): list to break up into chunks
467
477
  n (int): number of chunks
468
478
  """
469
-
479
+
470
480
  for i in range(0, n):
471
481
  yield ls[i::n]
472
482
 
473
483
 
474
484
  #%% Image processing functions
475
485
 
476
- def process_images(im_files,
477
- detector,
478
- confidence_threshold,
479
- use_image_queue=False,
480
- quiet=False,
481
- image_size=None,
482
- checkpoint_queue=None,
483
- include_image_size=False,
484
- include_image_timestamp=False,
486
+ def process_images(im_files,
487
+ detector,
488
+ confidence_threshold,
489
+ use_image_queue=False,
490
+ quiet=False,
491
+ image_size=None,
492
+ checkpoint_queue=None,
493
+ include_image_size=False,
494
+ include_image_timestamp=False,
485
495
  include_exif_data=False,
486
496
  augment=False,
487
497
  detector_options=None,
@@ -489,15 +499,15 @@ def process_images(im_files,
489
499
  preprocess_on_image_queue=default_preprocess_on_image_queue):
490
500
  """
491
501
  Runs a detector (typically MegaDetector) over a list of image files on a single thread.
492
-
502
+
493
503
  Args:
494
- im_files (list: paths to image files
504
+ im_files (list): paths to image files
495
505
  detector (str or detector object): loaded model or str; if this is a string, it can be a
496
506
  path to a .pb/.pt model file or a known model identifier (e.g. "MDV5A")
497
507
  confidence_threshold (float): only detections above this threshold are returned
498
508
  use_image_queue (bool, optional): separate image loading onto a dedicated worker process
499
509
  quiet (bool, optional): suppress per-image printouts
500
- image_size (tuple, optional): image size to use for inference, only mess with this
510
+ image_size (int, optional): image size to use for inference, only mess with this
501
511
  if (a) you're using a model other than MegaDetector or (b) you know what you're
502
512
  doing
503
513
  checkpoint_queue (Queue, optional): internal parameter used to pass image queues around
@@ -505,68 +515,75 @@ def process_images(im_files,
505
515
  include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
506
516
  include_exif_data (bool, optional): should we include EXIF data in the output for each image?
507
517
  augment (bool, optional): enable image augmentation
508
- detector_options (dict, optional): key/value pairs that are interpreted differently
518
+ detector_options (dict, optional): key/value pairs that are interpreted differently
509
519
  by different detectors
510
520
  loader_workers (int, optional): number of loaders to use (only relevant when using image queue)
521
+ preprocess_on_image_queue (bool, optional): if the image queue is enabled, should it handle
522
+ image loading and preprocessing (True), or just image loading (False)?
511
523
 
512
524
  Returns:
513
525
  list: list of dicts, in which each dict represents detections on one image,
514
526
  see the 'images' key in https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_processing#batch-processing-api-output-format
515
527
  """
516
-
528
+
517
529
  if isinstance(detector, str):
518
-
530
+
519
531
  start_time = time.time()
520
- detector = load_detector(detector,detector_options=detector_options,verbose=verbose)
532
+ detector = load_detector(detector,
533
+ detector_options=detector_options,
534
+ verbose=verbose)
521
535
  elapsed = time.time() - start_time
522
536
  print('Loaded model (batch level) in {}'.format(humanfriendly.format_timespan(elapsed)))
523
537
 
538
+ if detector_options is None:
539
+ detector_options = {}
540
+
524
541
  if use_image_queue:
525
-
526
- run_detector_with_image_queue(im_files,
527
- detector,
528
- confidence_threshold,
529
- quiet=quiet,
542
+
543
+ run_detector_with_image_queue(im_files,
544
+ detector,
545
+ confidence_threshold,
546
+ quiet=quiet,
530
547
  image_size=image_size,
531
- include_image_size=include_image_size,
548
+ include_image_size=include_image_size,
532
549
  include_image_timestamp=include_image_timestamp,
533
550
  include_exif_data=include_exif_data,
534
551
  augment=augment,
535
552
  detector_options=detector_options,
536
553
  loader_workers=loader_workers,
537
554
  preprocess_on_image_queue=preprocess_on_image_queue)
538
-
539
- else:
540
-
555
+
556
+ else:
557
+
541
558
  results = []
542
559
  for im_file in im_files:
543
- result = process_image(im_file,
544
- detector,
560
+ result = process_image(im_file,
561
+ detector,
545
562
  confidence_threshold,
546
- quiet=quiet,
547
- image_size=image_size,
548
- include_image_size=include_image_size,
563
+ quiet=quiet,
564
+ image_size=image_size,
565
+ include_image_size=include_image_size,
549
566
  include_image_timestamp=include_image_timestamp,
550
567
  include_exif_data=include_exif_data,
551
568
  augment=augment)
552
569
 
553
570
  if checkpoint_queue is not None:
554
571
  checkpoint_queue.put(result)
555
- results.append(result)
556
-
572
+ results.append(result)
573
+
557
574
  return results
558
575
 
559
576
  # ...def process_images(...)
560
577
 
561
578
 
562
- def process_image(im_file,
563
- detector,
564
- confidence_threshold,
565
- image=None,
566
- quiet=False,
567
- image_size=None,
579
+ def process_image(im_file,
580
+ detector,
581
+ confidence_threshold,
582
+ image=None,
583
+ quiet=False,
584
+ image_size=None,
568
585
  include_image_size=False,
569
- include_image_timestamp=False,
586
+ include_image_timestamp=False,
570
587
  include_exif_data=False,
571
588
  skip_image_resizing=False,
572
589
  augment=False):
@@ -575,30 +592,30 @@ def process_image(im_file,
575
592
 
576
593
  Args:
577
594
  im_file (str): path to image file
578
- detector (detector object): loaded model, this can no longer be a string by the time
595
+ detector (detector object): loaded model, this can no longer be a string by the time
579
596
  you get this far down the pipeline
580
597
  confidence_threshold (float): only detections above this threshold are returned
581
598
  image (Image, optional): previously-loaded image, if available, used when a worker
582
599
  thread is handling image loads
583
600
  quiet (bool, optional): suppress per-image printouts
584
- image_size (tuple, optional): image size to use for inference, only mess with this
601
+ image_size (int, optional): image size to use for inference, only mess with this
585
602
  if (a) you're using a model other than MegaDetector or (b) you know what you're
586
- doing
603
+ doing
587
604
  include_image_size (bool, optional): should we include image size in the output for each image?
588
605
  include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
589
- include_exif_data (bool, optional): should we include EXIF data in the output for each image?
606
+ include_exif_data (bool, optional): should we include EXIF data in the output for each image?
590
607
  skip_image_resizing (bool, optional): whether to skip internal image resizing and rely on external resizing
591
608
  augment (bool, optional): enable image augmentation
592
609
 
593
610
  Returns:
594
611
  dict: dict representing detections on one image,
595
- see the 'images' key in
612
+ see the 'images' key in
596
613
  https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_processing#batch-processing-api-output-format
597
614
  """
598
-
615
+
599
616
  if not quiet:
600
617
  print('Processing image {}'.format(im_file))
601
-
618
+
602
619
  if image is None:
603
620
  try:
604
621
  image = vis_utils.load_image(im_file)
@@ -612,11 +629,11 @@ def process_image(im_file,
612
629
  return result
613
630
 
614
631
  try:
615
-
632
+
616
633
  result = detector.generate_detections_one_image(
617
- image,
618
- im_file,
619
- detection_threshold=confidence_threshold,
634
+ image,
635
+ im_file,
636
+ detection_threshold=confidence_threshold,
620
637
  image_size=image_size,
621
638
  skip_image_resizing=skip_image_resizing,
622
639
  augment=augment)
@@ -632,7 +649,7 @@ def process_image(im_file,
632
649
  if isinstance(image,dict):
633
650
  image = image['img_original_pil']
634
651
 
635
- if include_image_size:
652
+ if include_image_size:
636
653
  result['width'] = image.width
637
654
  result['height'] = image.height
638
655
 
@@ -651,13 +668,13 @@ def _load_custom_class_mapping(class_mapping_filename):
651
668
  """
652
669
  This is an experimental hack to allow the use of non-MD YOLOv5 models through
653
670
  the same infrastructure; it disables the code that enforces MDv5-like class lists.
654
-
671
+
655
672
  Should be a .json file that maps int-strings to strings, or a YOLOv5 dataset.yaml file.
656
673
  """
657
-
674
+
658
675
  if class_mapping_filename is None:
659
676
  return
660
-
677
+
661
678
  run_detector.USE_MODEL_NATIVE_CLASSES = True
662
679
  if class_mapping_filename.endswith('.json'):
663
680
  with open(class_mapping_filename,'r') as f:
@@ -668,28 +685,28 @@ def _load_custom_class_mapping(class_mapping_filename):
668
685
  class_mapping = {str(k):v for k,v in class_mapping.items()}
669
686
  else:
670
687
  raise ValueError('Unrecognized class mapping file {}'.format(class_mapping_filename))
671
-
688
+
672
689
  print('Loaded custom class mapping:')
673
690
  print(class_mapping)
674
691
  run_detector.DEFAULT_DETECTOR_LABEL_MAP = class_mapping
675
692
  return class_mapping
676
-
677
-
693
+
694
+
678
695
  #%% Main function
679
696
 
680
- def load_and_run_detector_batch(model_file,
681
- image_file_names,
697
+ def load_and_run_detector_batch(model_file,
698
+ image_file_names,
682
699
  checkpoint_path=None,
683
700
  confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD,
684
- checkpoint_frequency=-1,
685
- results=None,
701
+ checkpoint_frequency=-1,
702
+ results=None,
686
703
  n_cores=1,
687
- use_image_queue=False,
688
- quiet=False,
689
- image_size=None,
690
- class_mapping_filename=None,
691
- include_image_size=False,
692
- include_image_timestamp=False,
704
+ use_image_queue=False,
705
+ quiet=False,
706
+ image_size=None,
707
+ class_mapping_filename=None,
708
+ include_image_size=False,
709
+ include_image_timestamp=False,
693
710
  include_exif_data=False,
694
711
  augment=False,
695
712
  force_model_download=False,
@@ -698,27 +715,26 @@ def load_and_run_detector_batch(model_file,
698
715
  preprocess_on_image_queue=default_preprocess_on_image_queue):
699
716
  """
700
717
  Load a model file and run it on a list of images.
701
-
718
+
702
719
  Args:
703
-
704
720
  model_file (str): path to model file, or supported model string (e.g. "MDV5A")
705
- image_file_names (list or str): list of strings (image filenames), a single image filename,
706
- a folder to recursively search for images in, or a .json or .txt file containing a list
721
+ image_file_names (list or str): list of strings (image filenames), a single image filename,
722
+ a folder to recursively search for images in, or a .json or .txt file containing a list
707
723
  of images.
708
- checkpoint_path (str, optional), path to use for checkpoints (if None, checkpointing
724
+ checkpoint_path (str, optional): path to use for checkpoints (if None, checkpointing
709
725
  is disabled)
710
726
  confidence_threshold (float, optional): only detections above this threshold are returned
711
- checkpoint_frequency (int, optional): int, write results to JSON checkpoint file every N
727
+ checkpoint_frequency (int, optional): int, write results to JSON checkpoint file every N
712
728
  images, -1 disabled checkpointing
713
- results (list, optional): list of dicts, existing results loaded from checkpoint; generally
729
+ results (list, optional): list of dicts, existing results loaded from checkpoint; generally
714
730
  not useful if you're using this function outside of the CLI
715
731
  n_cores (int, optional): number of parallel worker to use, ignored if we're running on a GPU
716
732
  use_image_queue (bool, optional): use a dedicated worker for image loading
717
733
  quiet (bool, optional): disable per-image console output
718
- image_size (tuple, optional): image size to use for inference, only mess with this
734
+ image_size (int, optional): image size to use for inference, only mess with this
719
735
  if (a) you're using a model other than MegaDetector or (b) you know what you're
720
736
  doing
721
- class_mapping_filename (str, optional), use a non-default class mapping supplied in a .json
737
+ class_mapping_filename (str, optional): use a non-default class mapping supplied in a .json
722
738
  file or YOLOv5 dataset.yaml file
723
739
  include_image_size (bool, optional): should we include image size in the output for each image?
724
740
  include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
@@ -727,37 +743,42 @@ def load_and_run_detector_batch(model_file,
727
743
  force_model_download (bool, optional): force downloading the model file if
728
744
  a named model (e.g. "MDV5A") is supplied, even if the local file already
729
745
  exists
730
- detector_options (dict, optional): key/value pairs that are interpreted differently
746
+ detector_options (dict, optional): key/value pairs that are interpreted differently
731
747
  by different detectors
732
748
  loader_workers (int, optional): number of loaders to use, only relevant when use_image_queue is True
733
-
749
+ preprocess_on_image_queue (bool, optional): if the image queue is enabled, should it handle
750
+ image loading and preprocessing (True), or just image loading (False)?
751
+
734
752
  Returns:
735
753
  results: list of dicts; each dict represents detections on one image
736
754
  """
737
-
755
+
738
756
  # Validate input arguments
739
757
  if n_cores is None or n_cores <= 0:
740
758
  n_cores = 1
741
-
759
+
760
+ if detector_options is None:
761
+ detector_options = {}
762
+
742
763
  if confidence_threshold is None:
743
764
  confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
744
-
765
+
745
766
  # Disable checkpointing if checkpoint_path is None
746
767
  if checkpoint_frequency is None or checkpoint_path is None:
747
768
  checkpoint_frequency = -1
748
769
 
749
770
  if class_mapping_filename is not None:
750
771
  _load_custom_class_mapping(class_mapping_filename)
751
-
772
+
752
773
  # Handle the case where image_file_names is not yet actually a list
753
774
  if isinstance(image_file_names,str):
754
-
775
+
755
776
  # Find the images to score; images can be a directory, may need to recurse
756
777
  if os.path.isdir(image_file_names):
757
778
  image_dir = image_file_names
758
779
  image_file_names = path_utils.find_images(image_dir, True)
759
780
  print('{} image files found in folder {}'.format(len(image_file_names),image_dir))
760
-
781
+
761
782
  # A single file, or a list of image paths
762
783
  elif os.path.isfile(image_file_names):
763
784
  list_file = image_file_names
@@ -780,43 +801,45 @@ def load_and_run_detector_batch(model_file,
780
801
  'File {} supplied as [image_file_names] argument, but extension is neither .json nor .txt'\
781
802
  .format(
782
803
  list_file))
783
- else:
804
+ else:
784
805
  raise ValueError(
785
806
  '{} supplied as [image_file_names] argument, but it does not appear to be a file or folder'.format(
786
807
  image_file_names))
787
-
808
+
788
809
  if results is None:
789
810
  results = []
790
811
 
791
812
  already_processed = set([i['file'] for i in results])
792
813
 
793
- model_file = try_download_known_detector(model_file, force_download=force_model_download)
794
-
814
+ model_file = try_download_known_detector(model_file,
815
+ force_download=force_model_download,
816
+ verbose=verbose)
817
+
795
818
  print('GPU available: {}'.format(is_gpu_available(model_file)))
796
-
797
- if n_cores > 1 and is_gpu_available(model_file):
798
-
819
+
820
+ if (n_cores > 1) and is_gpu_available(model_file):
821
+
799
822
  print('Warning: multiple cores requested, but a GPU is available; parallelization across ' + \
800
823
  'GPUs is not currently supported, defaulting to one GPU')
801
824
  n_cores = 1
802
825
 
803
- if n_cores > 1 and use_image_queue:
804
-
826
+ if (n_cores > 1) and use_image_queue:
827
+
805
828
  print('Warning: multiple cores requested, but the image queue is enabled; parallelization ' + \
806
829
  'with the image queue is not currently supported, defaulting to one worker')
807
830
  n_cores = 1
808
-
831
+
809
832
  if use_image_queue:
810
-
833
+
811
834
  assert checkpoint_frequency < 0, \
812
835
  'Using an image queue is not currently supported when checkpointing is enabled'
813
836
  assert len(results) == 0, \
814
837
  'Using an image queue with results loaded from a checkpoint is not currently supported'
815
838
  assert n_cores <= 1
816
- results = run_detector_with_image_queue(image_file_names,
817
- model_file,
818
- confidence_threshold,
819
- quiet,
839
+ results = run_detector_with_image_queue(image_file_names,
840
+ model_file,
841
+ confidence_threshold,
842
+ quiet,
820
843
  image_size=image_size,
821
844
  include_image_size=include_image_size,
822
845
  include_image_timestamp=include_image_timestamp,
@@ -825,12 +848,14 @@ def load_and_run_detector_batch(model_file,
825
848
  detector_options=detector_options,
826
849
  loader_workers=loader_workers,
827
850
  preprocess_on_image_queue=preprocess_on_image_queue)
828
-
851
+
829
852
  elif n_cores <= 1:
830
853
 
831
854
  # Load the detector
832
855
  start_time = time.time()
833
- detector = load_detector(model_file,detector_options=detector_options,verbose=verbose)
856
+ detector = load_detector(model_file,
857
+ detector_options=detector_options,
858
+ verbose=verbose)
834
859
  elapsed = time.time() - start_time
835
860
  print('Loaded model in {}'.format(humanfriendly.format_timespan(elapsed)))
836
861
 
@@ -848,11 +873,11 @@ def load_and_run_detector_batch(model_file,
848
873
 
849
874
  count += 1
850
875
 
851
- result = process_image(im_file,
852
- detector,
853
- confidence_threshold,
854
- quiet=quiet,
855
- image_size=image_size,
876
+ result = process_image(im_file,
877
+ detector,
878
+ confidence_threshold,
879
+ quiet=quiet,
880
+ image_size=image_size,
856
881
  include_image_size=include_image_size,
857
882
  include_image_timestamp=include_image_timestamp,
858
883
  include_exif_data=include_exif_data,
@@ -861,97 +886,100 @@ def load_and_run_detector_batch(model_file,
861
886
 
862
887
  # Write a checkpoint if necessary
863
888
  if (checkpoint_frequency != -1) and ((count % checkpoint_frequency) == 0):
864
-
889
+
865
890
  print('Writing a new checkpoint after having processed {} images since '
866
891
  'last restart'.format(count))
867
-
892
+
868
893
  _write_checkpoint(checkpoint_path, results)
869
-
894
+
870
895
  else:
871
-
896
+
872
897
  # Multiprocessing is enabled at this point
873
-
898
+
874
899
  # When using multiprocessing, tell the workers to load the model on each
875
900
  # process, by passing the model_file string as the "model" argument to
876
901
  # process_images.
877
902
  detector = model_file
878
903
 
879
- print('Creating pool with {} cores'.format(n_cores))
904
+ print('Creating worker pool with {} cores'.format(n_cores))
880
905
 
881
906
  if len(already_processed) > 0:
882
907
  n_images_all = len(image_file_names)
883
908
  image_file_names = [fn for fn in image_file_names if fn not in already_processed]
884
909
  print('Loaded {} of {} images from checkpoint'.format(
885
910
  len(already_processed),n_images_all))
886
-
887
- # Divide images into chunks; we'll send one chunk to each worker process
911
+
912
+ # Divide images into chunks; we'll send one chunk to each worker process
888
913
  image_batches = list(_chunks_by_number_of_chunks(image_file_names, n_cores))
889
-
890
- pool = workerpool(n_cores)
891
-
892
- if checkpoint_path is not None:
893
-
894
- # Multiprocessing and checkpointing are both enabled at this point
895
-
896
- checkpoint_queue = Manager().Queue()
897
-
898
- # Pass the "results" array (which may already contain images loaded from an existing
899
- # checkpoint) to the checkpoint queue handler function, which will append results to
900
- # the list as they become available.
901
- checkpoint_thread = Thread(target=_checkpoint_queue_handler,
902
- args=(checkpoint_path, checkpoint_frequency,
903
- checkpoint_queue, results), daemon=True)
904
- checkpoint_thread.start()
905
-
906
- pool.map(partial(process_images,
907
- detector=detector,
908
- confidence_threshold=confidence_threshold,
909
- use_image_queue=False,
910
- quiet=quiet,
911
- image_size=image_size,
912
- checkpoint_queue=checkpoint_queue,
913
- include_image_size=include_image_size,
914
- include_image_timestamp=include_image_timestamp,
915
- include_exif_data=include_exif_data,
916
- augment=augment,
917
- detector_options=detector_options),
918
- image_batches)
919
-
920
- checkpoint_queue.put(None)
921
914
 
922
- else:
923
-
924
- # Multprocessing is enabled, but checkpointing is not
925
-
926
- new_results = pool.map(partial(process_images,
927
- detector=detector,
928
- confidence_threshold=confidence_threshold,
929
- use_image_queue=False,
930
- quiet=quiet,
931
- checkpoint_queue=None,
932
- image_size=image_size,
933
- include_image_size=include_image_size,
934
- include_image_timestamp=include_image_timestamp,
935
- include_exif_data=include_exif_data,
936
- augment=augment,
937
- detector_options=detector_options),
938
- image_batches)
939
-
940
- new_results = list(itertools.chain.from_iterable(new_results))
941
-
942
- # Append the results we just computed to "results", which is *usually* empty, but will
943
- # be non-empty if we resumed from a checkpoint
944
- results += new_results
945
-
946
- # ...if checkpointing is/isn't enabled
947
-
915
+ pool = None
948
916
  try:
949
- pool.close()
950
- except Exception as e:
951
- print('Warning: error closing multiprocessing pool:\n{}'.format(str(e)))
952
-
917
+ pool = workerpool(n_cores)
918
+
919
+ if checkpoint_path is not None:
920
+
921
+ # Multiprocessing and checkpointing are both enabled at this point
922
+
923
+ checkpoint_queue = Manager().Queue()
924
+
925
+ # Pass the "results" array (which may already contain images loaded from an existing
926
+ # checkpoint) to the checkpoint queue handler function, which will append results to
927
+ # the list as they become available.
928
+ checkpoint_thread = Thread(target=_checkpoint_queue_handler,
929
+ args=(checkpoint_path, checkpoint_frequency,
930
+ checkpoint_queue, results), daemon=True)
931
+ checkpoint_thread.start()
932
+
933
+ pool.map(partial(process_images,
934
+ detector=detector,
935
+ confidence_threshold=confidence_threshold,
936
+ use_image_queue=False,
937
+ quiet=quiet,
938
+ image_size=image_size,
939
+ checkpoint_queue=checkpoint_queue,
940
+ include_image_size=include_image_size,
941
+ include_image_timestamp=include_image_timestamp,
942
+ include_exif_data=include_exif_data,
943
+ augment=augment,
944
+ detector_options=detector_options),
945
+ image_batches)
946
+
947
+ checkpoint_queue.put(None)
948
+
949
+ else:
950
+
951
+ # Multprocessing is enabled, but checkpointing is not
952
+
953
+ new_results = pool.map(partial(process_images,
954
+ detector=detector,
955
+ confidence_threshold=confidence_threshold,
956
+ use_image_queue=False,
957
+ quiet=quiet,
958
+ checkpoint_queue=None,
959
+ image_size=image_size,
960
+ include_image_size=include_image_size,
961
+ include_image_timestamp=include_image_timestamp,
962
+ include_exif_data=include_exif_data,
963
+ augment=augment,
964
+ detector_options=detector_options),
965
+ image_batches)
966
+
967
+ new_results = list(itertools.chain.from_iterable(new_results))
968
+
969
+ # Append the results we just computed to "results", which is *usually* empty, but will
970
+ # be non-empty if we resumed from a checkpoint
971
+ results += new_results
972
+
973
+ # ...if checkpointing is/isn't enabled
974
+
975
+ finally:
976
+ if pool is not None:
977
+ pool.close()
978
+ pool.join()
979
+ print("Pool closed and joined for multi-core inference")
980
+
953
981
  # ...if we're running (1) with image queue, (2) on one core, or (3) on multiple cores
954
-
982
+
955
983
  # 'results' may have been modified in place, but we also return it for
956
984
  # backwards-compatibility.
957
985
  return results
@@ -964,21 +992,21 @@ def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_
964
992
  Thread function to accumulate results and write checkpoints when checkpointing and
965
993
  multiprocessing are both enabled.
966
994
  """
967
-
995
+
968
996
  result_count = 0
969
997
  while True:
970
- result = checkpoint_queue.get()
971
- if result is None:
972
- break
973
-
998
+ result = checkpoint_queue.get()
999
+ if result is None:
1000
+ break
1001
+
974
1002
  result_count +=1
975
1003
  results.append(result)
976
1004
 
977
1005
  if (checkpoint_frequency != -1) and (result_count % checkpoint_frequency == 0):
978
-
1006
+
979
1007
  print('Writing a new checkpoint after having processed {} images since '
980
1008
  'last restart'.format(result_count))
981
-
1009
+
982
1010
  _write_checkpoint(checkpoint_path, results)
983
1011
 
984
1012
 
@@ -986,20 +1014,19 @@ def _write_checkpoint(checkpoint_path, results):
986
1014
  """
987
1015
  Writes the 'images' field in the dict 'results' to a json checkpoint file.
988
1016
  """
989
-
990
- assert checkpoint_path is not None
991
-
1017
+
1018
+ assert checkpoint_path is not None
1019
+
992
1020
  # Back up any previous checkpoints, to protect against crashes while we're writing
993
1021
  # the checkpoint file.
994
1022
  checkpoint_tmp_path = None
995
1023
  if os.path.isfile(checkpoint_path):
996
1024
  checkpoint_tmp_path = checkpoint_path + '_tmp'
997
1025
  shutil.copyfile(checkpoint_path,checkpoint_tmp_path)
998
-
1026
+
999
1027
  # Write the new checkpoint
1000
- with open(checkpoint_path, 'w') as f:
1001
- json.dump({'images': results}, f, indent=1, default=str)
1002
-
1028
+ ct_utils.write_json(checkpoint_path, {'images': results}, force_str=True)
1029
+
1003
1030
  # Remove the backup checkpoint if it exists
1004
1031
  if checkpoint_tmp_path is not None:
1005
1032
  os.remove(checkpoint_tmp_path)
@@ -1008,33 +1035,33 @@ def _write_checkpoint(checkpoint_path, results):
1008
1035
  def get_image_datetime(image):
1009
1036
  """
1010
1037
  Reads EXIF datetime from a PIL Image object.
1011
-
1038
+
1012
1039
  Args:
1013
1040
  image (Image): the PIL Image object from which we should read datetime information
1014
-
1041
+
1015
1042
  Returns:
1016
1043
  str: the EXIF datetime from [image] (a PIL Image object), if available, as a string;
1017
1044
  returns None if EXIF datetime is not available.
1018
1045
  """
1019
-
1046
+
1020
1047
  exif_tags = read_exif.read_pil_exif(image,exif_options)
1021
-
1048
+
1022
1049
  try:
1023
1050
  datetime_str = exif_tags['DateTimeOriginal']
1024
1051
  _ = time.strptime(datetime_str, '%Y:%m:%d %H:%M:%S')
1025
1052
  return datetime_str
1026
1053
 
1027
1054
  except Exception:
1028
- return None
1055
+ return None
1029
1056
 
1030
1057
 
1031
- def write_results_to_file(results,
1032
- output_file,
1033
- relative_path_base=None,
1034
- detector_file=None,
1035
- info=None,
1058
+ def write_results_to_file(results,
1059
+ output_file,
1060
+ relative_path_base=None,
1061
+ detector_file=None,
1062
+ info=None,
1036
1063
  include_max_conf=False,
1037
- custom_metadata=None,
1064
+ custom_metadata=None,
1038
1065
  force_forward_slashes=True):
1039
1066
  """
1040
1067
  Writes list of detection results to JSON output file. Format matches:
@@ -1056,11 +1083,11 @@ def write_results_to_file(results,
1056
1083
  a dictionary, but no type/format checks are performed
1057
1084
  force_forward_slashes (bool, optional): convert all slashes in filenames within [results] to
1058
1085
  forward slashes
1059
-
1086
+
1060
1087
  Returns:
1061
1088
  dict: the MD-formatted dictionary that was written to [output_file]
1062
1089
  """
1063
-
1090
+
1064
1091
  if relative_path_base is not None:
1065
1092
  results_relative = []
1066
1093
  for r in results:
@@ -1076,68 +1103,67 @@ def write_results_to_file(results,
1076
1103
  r_converted['file'] = r_converted['file'].replace('\\','/')
1077
1104
  results_converted.append(r_converted)
1078
1105
  results = results_converted
1079
-
1106
+
1080
1107
  # The typical case: we need to build the 'info' struct
1081
1108
  if info is None:
1082
-
1083
- info = {
1109
+
1110
+ info = {
1084
1111
  'detection_completion_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
1085
- 'format_version': '1.4'
1112
+ 'format_version': '1.4'
1086
1113
  }
1087
-
1114
+
1088
1115
  if detector_file is not None:
1089
1116
  detector_filename = os.path.basename(detector_file)
1090
- detector_version = get_detector_version_from_filename(detector_filename)
1117
+ detector_version = get_detector_version_from_filename(detector_filename,verbose=True)
1091
1118
  detector_metadata = get_detector_metadata_from_version_string(detector_version)
1092
- info['detector'] = detector_filename
1119
+ info['detector'] = detector_filename
1093
1120
  info['detector_metadata'] = detector_metadata
1094
1121
  else:
1095
1122
  info['detector'] = 'unknown'
1096
1123
  info['detector_metadata'] = get_detector_metadata_from_version_string('unknown')
1097
-
1124
+
1098
1125
  # If the caller supplied the entire "info" struct
1099
1126
  else:
1100
-
1101
- if detector_file is not None:
1127
+
1128
+ if detector_file is not None:
1102
1129
  print('Warning (write_results_to_file): info struct and detector file ' + \
1103
1130
  'supplied, ignoring detector file')
1104
1131
 
1105
1132
  if custom_metadata is not None:
1106
1133
  info['custom_metadata'] = custom_metadata
1107
-
1134
+
1108
1135
  # The 'max_detection_conf' field used to be included by default, and it caused all kinds
1109
1136
  # of headaches, so it's no longer included unless the user explicitly requests it.
1110
1137
  if not include_max_conf:
1111
1138
  for im in results:
1112
1139
  if 'max_detection_conf' in im:
1113
1140
  del im['max_detection_conf']
1114
-
1141
+
1115
1142
  # Sort results by filename; not required by the format, but convenient for consistency
1116
1143
  results = sort_list_of_dicts_by_key(results,'file')
1117
-
1144
+
1118
1145
  # Sort detections in descending order by confidence; not required by the format, but
1119
1146
  # convenient for consistency
1120
1147
  for r in results:
1121
1148
  if ('detections' in r) and (r['detections'] is not None):
1122
1149
  r['detections'] = sort_list_of_dicts_by_key(r['detections'], 'conf', reverse=True)
1123
-
1150
+
1124
1151
  final_output = {
1125
1152
  'images': results,
1126
1153
  'detection_categories': run_detector.DEFAULT_DETECTOR_LABEL_MAP,
1127
1154
  'info': info
1128
1155
  }
1129
-
1156
+
1130
1157
  # Create the folder where the output file belongs; this will fail if
1131
1158
  # this is a relative path with no folder component
1132
1159
  try:
1133
1160
  os.makedirs(os.path.dirname(output_file),exist_ok=True)
1134
1161
  except Exception:
1135
1162
  pass
1136
-
1137
- with open(output_file, 'w') as f:
1138
- json.dump(final_output, f, indent=1, default=str)
1163
+
1164
+ ct_utils.write_json(output_file, final_output, force_str=True)
1139
1165
  print('Output file saved at {}'.format(output_file))
1140
-
1166
+
1141
1167
  return final_output
1142
1168
 
1143
1169
  # ...def write_results_to_file(...)
@@ -1146,15 +1172,15 @@ def write_results_to_file(results,
1146
1172
  #%% Interactive driver
1147
1173
 
1148
1174
  if False:
1149
-
1175
+
1150
1176
  pass
1151
1177
 
1152
1178
  #%%
1153
-
1179
+
1154
1180
  model_file = 'MDV5A'
1155
1181
  image_dir = r'g:\camera_traps\camera_trap_images'
1156
1182
  output_file = r'g:\temp\md-test.json'
1157
-
1183
+
1158
1184
  recursive = True
1159
1185
  output_relative_filenames = True
1160
1186
  include_max_conf = False
@@ -1162,7 +1188,7 @@ if False:
1162
1188
  image_size = None
1163
1189
  use_image_queue = False
1164
1190
  confidence_threshold = 0.0001
1165
- checkpoint_frequency = 5
1191
+ checkpoint_frequency = 5
1166
1192
  checkpoint_path = None
1167
1193
  resume_from_checkpoint = 'auto'
1168
1194
  allow_checkpoint_overwrite = False
@@ -1172,11 +1198,11 @@ if False:
1172
1198
  include_image_timestamp = True
1173
1199
  include_exif_data = True
1174
1200
  overwrite_handling = None
1175
-
1201
+
1176
1202
  # Generate a command line
1177
1203
  cmd = 'python run_detector_batch.py "{}" "{}" "{}"'.format(
1178
1204
  model_file,image_dir,output_file)
1179
-
1205
+
1180
1206
  if recursive:
1181
1207
  cmd += ' --recursive'
1182
1208
  if output_relative_filenames:
@@ -1211,18 +1237,18 @@ if False:
1211
1237
  cmd += ' --include_exif_data'
1212
1238
  if overwrite_handling is not None:
1213
1239
  cmd += ' --overwrite_handling {}'.format(overwrite_handling)
1214
-
1240
+
1215
1241
  print(cmd)
1216
1242
  import clipboard; clipboard.copy(cmd)
1217
-
1218
-
1243
+
1244
+
1219
1245
  #%% Run inference interactively
1220
-
1221
- image_file_names = path_utils.find_images(image_dir, recursive=False)
1246
+
1247
+ image_file_names = path_utils.find_images(image_dir, recursive=False)
1222
1248
  results = None
1223
-
1249
+
1224
1250
  start_time = time.time()
1225
-
1251
+
1226
1252
  results = load_and_run_detector_batch(model_file=model_file,
1227
1253
  image_file_names=image_file_names,
1228
1254
  checkpoint_path=checkpoint_path,
@@ -1233,21 +1259,22 @@ if False:
1233
1259
  use_image_queue=use_image_queue,
1234
1260
  quiet=quiet,
1235
1261
  image_size=image_size)
1236
-
1262
+
1237
1263
  elapsed = time.time() - start_time
1238
-
1264
+
1239
1265
  print('Finished inference in {}'.format(humanfriendly.format_timespan(elapsed)))
1240
1266
 
1241
-
1267
+
1242
1268
  #%% Command-line driver
1243
1269
 
1244
- def main():
1245
-
1270
+ def main(): # noqa
1271
+
1246
1272
  parser = argparse.ArgumentParser(
1247
1273
  description='Module to run a TF/PT animal detection model on lots of images')
1248
1274
  parser.add_argument(
1249
1275
  'detector_file',
1250
- help='Path to detector model file (.pb or .pt). Can also be the strings "MDV4", "MDV5A", or "MDV5B" to request automatic download.')
1276
+ help='Path to detector model file (.pb or .pt). Can also be the strings "MDV4", ' + \
1277
+ '"MDV5A", or "MDV5B" to request automatic download.')
1251
1278
  parser.add_argument(
1252
1279
  'image_file',
1253
1280
  help=\
@@ -1279,7 +1306,7 @@ def main():
1279
1306
  '--image_size',
1280
1307
  type=int,
1281
1308
  default=None,
1282
- help=('Force image resizing to a specific integer size on the long axis (not recommended to change this)'))
1309
+ help=('Force image resizing to a specific integer size on the long axis (not recommended to change this)'))
1283
1310
  parser.add_argument(
1284
1311
  '--augment',
1285
1312
  action='store_true',
@@ -1316,7 +1343,7 @@ def main():
1316
1343
  type=str,
1317
1344
  default=None,
1318
1345
  help='File name to which checkpoints will be written if checkpoint_frequency is > 0, ' + \
1319
- 'defaults to md_checkpoint_[date].json in the same folder as the output file')
1346
+ 'defaults to md_checkpoint_[date].json in the same folder as the output file')
1320
1347
  parser.add_argument(
1321
1348
  '--resume_from_checkpoint',
1322
1349
  type=str,
@@ -1367,7 +1394,7 @@ def main():
1367
1394
  type=str,
1368
1395
  default='overwrite',
1369
1396
  help='What should we do if the output file exists? overwrite/skip/error (default overwrite)'
1370
- )
1397
+ )
1371
1398
  parser.add_argument(
1372
1399
  '--force_model_download',
1373
1400
  action='store_true',
@@ -1387,28 +1414,29 @@ def main():
1387
1414
  metavar='KEY=VALUE',
1388
1415
  default='',
1389
1416
  help='Detector-specific options, as a space-separated list of key-value pairs')
1390
-
1417
+
1391
1418
  if len(sys.argv[1:]) == 0:
1392
1419
  parser.print_help()
1393
1420
  parser.exit()
1394
1421
 
1395
1422
  args = parser.parse_args()
1396
-
1423
+
1397
1424
  global verbose
1398
1425
  global use_threads_for_queue
1399
-
1426
+
1400
1427
  if args.verbose:
1401
1428
  verbose = True
1402
1429
  if args.use_threads_for_queue:
1403
1430
  use_threads_for_queue = True
1404
-
1431
+
1405
1432
  detector_options = parse_kvp_list(args.detector_options)
1406
-
1407
- # If the specified detector file is really the name of a known model, find
1433
+
1434
+ # If the specified detector file is really the name of a known model, find
1408
1435
  # (and possibly download) that model
1409
- args.detector_file = try_download_known_detector(args.detector_file,
1410
- force_download=args.force_model_download)
1411
-
1436
+ args.detector_file = try_download_known_detector(args.detector_file,
1437
+ force_download=args.force_model_download,
1438
+ verbose=verbose)
1439
+
1412
1440
  assert os.path.exists(args.detector_file), \
1413
1441
  'detector file {} does not exist'.format(args.detector_file)
1414
1442
  assert 0.0 <= args.threshold <= 1.0, 'Confidence threshold needs to be between 0 and 1'
@@ -1439,12 +1467,12 @@ def main():
1439
1467
 
1440
1468
  if len(output_dir) > 0:
1441
1469
  os.makedirs(output_dir,exist_ok=True)
1442
-
1470
+
1443
1471
  assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
1444
-
1472
+
1445
1473
  if args.class_mapping_filename is not None:
1446
1474
  _load_custom_class_mapping(args.class_mapping_filename)
1447
-
1475
+
1448
1476
  # Load the checkpoint if available
1449
1477
  #
1450
1478
  # File paths in the checkpoint are always absolute paths; conversion to relative paths
@@ -1463,7 +1491,7 @@ def main():
1463
1491
  len(checkpoint_files),output_dir))
1464
1492
  checkpoint_files = sorted(checkpoint_files)
1465
1493
  checkpoint_file_relative = checkpoint_files[-1]
1466
- checkpoint_file = os.path.join(output_dir,checkpoint_file_relative)
1494
+ checkpoint_file = os.path.join(output_dir,checkpoint_file_relative)
1467
1495
  else:
1468
1496
  checkpoint_file = args.resume_from_checkpoint
1469
1497
  assert os.path.exists(checkpoint_file), \
@@ -1483,7 +1511,7 @@ def main():
1483
1511
  if os.path.isdir(args.image_file):
1484
1512
  image_file_names = path_utils.find_images(args.image_file, args.recursive)
1485
1513
  if len(image_file_names) > 0:
1486
- print('{} image files found in the input directory'.format(len(image_file_names)))
1514
+ print('{} image files found in the input directory'.format(len(image_file_names)))
1487
1515
  else:
1488
1516
  if (args.recursive):
1489
1517
  print('No image files found in directory {}, exiting'.format(args.image_file))
@@ -1492,14 +1520,14 @@ def main():
1492
1520
  '--recursive?'.format(
1493
1521
  args.image_file))
1494
1522
  return
1495
-
1523
+
1496
1524
  # A json list of image paths
1497
- elif os.path.isfile(args.image_file) and args.image_file.endswith('.json'):
1525
+ elif os.path.isfile(args.image_file) and args.image_file.endswith('.json'):
1498
1526
  with open(args.image_file) as f:
1499
1527
  image_file_names = json.load(f)
1500
1528
  print('Loaded {} image filenames from .json list file {}'.format(
1501
1529
  len(image_file_names),args.image_file))
1502
-
1530
+
1503
1531
  # A text list of image paths
1504
1532
  elif os.path.isfile(args.image_file) and args.image_file.endswith('.txt'):
1505
1533
  with open(args.image_file) as f:
@@ -1507,51 +1535,51 @@ def main():
1507
1535
  image_file_names = [fn.strip() for fn in image_file_names if len(fn.strip()) > 0]
1508
1536
  print('Loaded {} image filenames from .txt list file {}'.format(
1509
1537
  len(image_file_names),args.image_file))
1510
-
1538
+
1511
1539
  # A single image file
1512
1540
  elif os.path.isfile(args.image_file) and path_utils.is_image_file(args.image_file):
1513
1541
  image_file_names = [args.image_file]
1514
1542
  print('Processing image {}'.format(args.image_file))
1515
-
1516
- else:
1543
+
1544
+ else:
1517
1545
  raise ValueError('image_file specified is not a directory, a json list, or an image file, '
1518
1546
  '(or does not have recognizable extensions).')
1519
1547
 
1520
- # At this point, regardless of how they were specified, [image_file_names] is a list of
1548
+ # At this point, regardless of how they were specified, [image_file_names] is a list of
1521
1549
  # absolute image paths.
1522
1550
  assert len(image_file_names) > 0, 'Specified image_file does not point to valid image files'
1523
-
1551
+
1524
1552
  # Convert to forward slashes to facilitate comparison with previous results
1525
1553
  image_file_names = [fn.replace('\\','/') for fn in image_file_names]
1526
-
1554
+
1527
1555
  # We can head off many problems related to incorrect command line formulation if we confirm
1528
- # that one image exists before proceeding. The use of the first image for this test is
1556
+ # that one image exists before proceeding. The use of the first image for this test is
1529
1557
  # arbitrary.
1530
1558
  assert os.path.exists(image_file_names[0]), \
1531
1559
  'The first image to be processed does not exist at {}'.format(image_file_names[0])
1532
1560
 
1533
1561
  # Possibly load results from a previous pass
1534
1562
  previous_results = None
1535
-
1563
+
1536
1564
  if args.previous_results_file is not None:
1537
-
1565
+
1538
1566
  assert os.path.isfile(args.previous_results_file), \
1539
1567
  'Could not find previous results file {}'.format(args.previous_results_file)
1540
1568
  with open(args.previous_results_file,'r') as f:
1541
1569
  previous_results = json.load(f)
1542
-
1570
+
1543
1571
  assert previous_results['detection_categories'] == run_detector.DEFAULT_DETECTOR_LABEL_MAP, \
1544
1572
  "Can't merge previous results when those results use a different set of detection categories"
1545
-
1573
+
1546
1574
  print('Loaded previous results for {} images from {}'.format(
1547
1575
  len(previous_results['images']), args.previous_results_file))
1548
-
1549
- # Convert previous result filenames to absolute paths if necessary
1576
+
1577
+ # Convert previous result filenames to absolute paths if necessary
1550
1578
  #
1551
- # We asserted above to make sure that we are using relative paths and processing a
1579
+ # We asserted above to make sure that we are using relative paths and processing a
1552
1580
  # folder, but just to be super-clear...
1553
1581
  assert os.path.isdir(args.image_file)
1554
-
1582
+
1555
1583
  previous_image_files_set = set()
1556
1584
  for im in previous_results['images']:
1557
1585
  assert not os.path.isabs(im['file']), \
@@ -1559,54 +1587,53 @@ def main():
1559
1587
  fn_abs = os.path.join(args.image_file,im['file']).replace('\\','/')
1560
1588
  # Absolute paths are expected at the final output stage below
1561
1589
  im['file'] = fn_abs
1562
- previous_image_files_set.add(fn_abs)
1563
-
1590
+ previous_image_files_set.add(fn_abs)
1591
+
1564
1592
  image_file_names_to_keep = []
1565
1593
  for fn_abs in image_file_names:
1566
1594
  if fn_abs not in previous_image_files_set:
1567
1595
  image_file_names_to_keep.append(fn_abs)
1568
-
1596
+
1569
1597
  print('Based on previous results file, processing {} of {} images'.format(
1570
1598
  len(image_file_names_to_keep), len(image_file_names)))
1571
-
1599
+
1572
1600
  image_file_names = image_file_names_to_keep
1573
-
1601
+
1574
1602
  # ...if we're handling previous results
1575
-
1603
+
1576
1604
  # Test that we can write to the output_file's dir if checkpointing requested
1577
1605
  if args.checkpoint_frequency != -1:
1578
-
1606
+
1579
1607
  if args.checkpoint_path is not None:
1580
1608
  checkpoint_path = args.checkpoint_path
1581
1609
  else:
1582
1610
  checkpoint_path = os.path.join(output_dir,
1583
1611
  'md_checkpoint_{}.json'.format(
1584
1612
  datetime.now().strftime("%Y%m%d%H%M%S")))
1585
-
1613
+
1586
1614
  # Don't overwrite existing checkpoint files, this is a sure-fire way to eventually
1587
1615
  # erase someone's checkpoint.
1588
1616
  if (checkpoint_path is not None) and (not args.allow_checkpoint_overwrite) \
1589
1617
  and (args.resume_from_checkpoint is None):
1590
-
1618
+
1591
1619
  assert not os.path.isfile(checkpoint_path), \
1592
1620
  f'Checkpoint path {checkpoint_path} already exists, delete or move it before ' + \
1593
1621
  're-using the same checkpoint path, or specify --allow_checkpoint_overwrite'
1594
1622
 
1595
-
1623
+
1596
1624
  # Confirm that we can write to the checkpoint path; this avoids issues where
1597
1625
  # we crash after several thousand images.
1598
1626
  #
1599
- # But actually, commenting this out for now... the scenario where we are resuming from a
1627
+ # But actually, commenting this out for now... the scenario where we are resuming from a
1600
1628
  # checkpoint, then immediately overwrite that checkpoint with empty data is higher-risk
1601
1629
  # than the annoyance of crashing a few minutes after starting a job.
1602
1630
  if False:
1603
- with open(checkpoint_path, 'w') as f:
1604
- json.dump({'images': []}, f)
1605
-
1631
+ ct_utils.write_json(checkpoint_path, {'images': []}, indent=None)
1632
+
1606
1633
  print('The checkpoint file will be written to {}'.format(checkpoint_path))
1607
-
1634
+
1608
1635
  else:
1609
-
1636
+
1610
1637
  if args.checkpoint_path is not None:
1611
1638
  print('Warning: checkpointing disabled because checkpoint_frequency is -1, ' + \
1612
1639
  'but a checkpoint path was specified')
@@ -1641,23 +1668,23 @@ def main():
1641
1668
  len(results),humanfriendly.format_timespan(elapsed),images_per_second))
1642
1669
 
1643
1670
  relative_path_base = None
1644
-
1645
- # We asserted above to make sure that if output_relative_filenames is set,
1671
+
1672
+ # We asserted above to make sure that if output_relative_filenames is set,
1646
1673
  # args.image_file is a folder, but we'll double-check for clarity.
1647
1674
  if args.output_relative_filenames:
1648
1675
  assert os.path.isdir(args.image_file)
1649
1676
  relative_path_base = args.image_file
1650
-
1677
+
1651
1678
  # Merge results from a previous file if necessary
1652
1679
  if previous_results is not None:
1653
1680
  previous_filenames_set = set([im['file'] for im in previous_results['images']])
1654
1681
  new_filenames_set = set([im['file'] for im in results])
1655
1682
  assert len(previous_filenames_set.intersection(new_filenames_set)) == 0, \
1656
1683
  'Previous results handling error: redundant image filenames'
1657
- results.extend(previous_results['images'])
1658
-
1659
- write_results_to_file(results,
1660
- args.output_file,
1684
+ results.extend(previous_results['images'])
1685
+
1686
+ write_results_to_file(results,
1687
+ args.output_file,
1661
1688
  relative_path_base=relative_path_base,
1662
1689
  detector_file=args.detector_file,
1663
1690
  include_max_conf=args.include_max_conf)