megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +701 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +563 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +192 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +665 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +984 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2172 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1604 -0
  81. megadetector/detection/run_tiled_inference.py +1044 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1943 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2140 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +231 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2872 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1766 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1973 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +498 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.15.dist-info/METADATA +115 -0
  144. megadetector-10.0.15.dist-info/RECORD +147 -0
  145. megadetector-10.0.15.dist-info/WHEEL +5 -0
  146. megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1314 @@
1
+ """
2
+
3
+ run_inference_with_yolov5_val.py
4
+
5
+ Runs a folder of images through MegaDetector (or another YOLOv5/YOLOv8 model) with YOLO's
6
+ val.py, converting the output to the standard MD format. The reasons this script exists,
7
+ as an alternative to the standard run_detector_batch.py are:
8
+
9
+ * This script provides access to YOLO's test-time augmentation tools.
10
+ * This script serves a reference implementation: by any reasonable definition, YOLOv5's
11
+ val.py produces the "correct" result for any image, since it matches what was used in
12
+ training.
13
+ * This script works for any Ultralytics detection model, including YOLOv8 models
14
+
15
+ YOLOv5's val.py uses each file's base name as a unique identifier, which doesn't work
16
+ when you have typical camera trap images like:
17
+
18
+ * a/b/c/RECONYX0001.JPG
19
+ * d/e/f/RECONYX0001.JPG
20
+
21
+ ...both of which would just be "RECONYX0001.JPG". So this script jumps through a bunch of
22
+ hoops to put a symlinks in a flat folder, run YOLOv5 on that folder, and map the results back
23
+ to the real files.
24
+
25
+ If you are running a YOLOv5 model, this script currently requires the caller to supply the path
26
+ where a working YOLOv5 install lives, and assumes that the current conda environment is all set up for
27
+ YOLOv5. If you are running a YOLOv8 model, the folder doesn't matter, but it assumes that ultralytics
28
+ tools are available in the current environment.
29
+
30
+ By default, this script uses symlinks to format the input images in a way that YOLO's
31
+ val.py likes, as per above. This requires admin privileges on Windows... actually technically this
32
+ only requires permissions to create symbolic links, but I've never seen a case where someone has
33
+ that permission and *doesn't* have admin privileges. If you are running this script on
34
+ Windows and you don't have admin privileges, use --no_use_symlinks, which will make copies of images,
35
+ rather than using symlinks.
36
+
37
+ """
38
+
39
+ #%% Imports
40
+
41
+ import os
42
+ import sys
43
+ import uuid
44
+ import glob
45
+ import tempfile
46
+ import shutil
47
+ import json
48
+ import copy
49
+ import argparse
50
+
51
+ from tqdm import tqdm
52
+
53
+ from megadetector.utils import path_utils
54
+ from megadetector.utils import process_utils
55
+ from megadetector.utils import string_utils
56
+ from megadetector.utils.ct_utils import args_to_object
57
+
58
+ from megadetector.utils.ct_utils import is_iterable, split_list_into_fixed_size_chunks
59
+ from megadetector.utils import ct_utils
60
+ from megadetector.utils.path_utils import path_is_abs
61
+ from megadetector.data_management import yolo_output_to_md_output
62
+ from megadetector.detection.run_detector import try_download_known_detector
63
+ from megadetector.postprocessing.combine_batch_outputs import combine_batch_output_files
64
+
65
+ default_image_size_with_augmentation = int(1280 * 1.3)
66
+ default_image_size_with_no_augmentation = 1280
67
+
68
+
69
+ #%% Options class
70
+
71
+ class YoloInferenceOptions:
72
+ """
73
+ Parameters that control the behavior of run_inference_with_yolov5_val(), including
74
+ the input/output filenames.
75
+ """
76
+
77
+ def __init__(self):
78
+
79
+ ## Required-ish ##
80
+
81
+ #: Folder of images to process (can be None if image_filename_list contains absolute paths)
82
+ self.input_folder = None
83
+
84
+ #: If this is None, [input_folder] can't be None, we'll process all images in [input_folder].
85
+ #:
86
+ #: If this is not None, and [input_folder] is not None, this should be a list of relative image
87
+ #: paths within [input_folder] to process, or a .txt or .json file containing a list of
88
+ #: relative image paths.
89
+ #:
90
+ #: If this is not None, and [input_folder] is None, this should be a list of absolute image
91
+ #: paths, or a .txt or .json file containing a list of absolute image paths.
92
+ self.image_filename_list = None
93
+
94
+ #: Model filename (ending in .pt), or a well-known model name (e.g. "MDV5A")
95
+ self.model_filename = None
96
+
97
+ #: .json output file, in MD results format
98
+ self.output_file = None
99
+
100
+
101
+ ## Optional ##
102
+
103
+ #: Required for older YOLOv5 inference, not for newer ulytralytics/YOLOv8 inference
104
+ self.yolo_working_folder = None
105
+
106
+ #: Currently 'yolov5' and 'ultralytics' are supported, and really these are proxies for
107
+ #: "the yolov5 repo" and "the ultralytics repo".
108
+ self.model_type = 'yolov5'
109
+
110
+ #: Image size to use; this is a single int, which in ultralytics's terminology means
111
+ #: "scale the long side of the image to this size, and preserve aspect ratio".
112
+ #:
113
+ #: If None, will choose based on whether augmentation is enabled.
114
+ self.image_size = None
115
+
116
+ #: Detections below this threshold will not be included in the output file
117
+ self.conf_thres = '0.001'
118
+
119
+ #: Batch size... has no impact on results, but may create memory issues if you set
120
+ #: this to large values
121
+ self.batch_size = 1
122
+
123
+ #: Device string: typically '0' for GPU 0, '1' for GPU 1, etc., or 'cpu'
124
+ self.device_string = '0'
125
+
126
+ #: Should we enable test-time augmentation?
127
+ self.augment = False
128
+
129
+ #: Should we enable half-precision inference?
130
+ self.half_precision_enabled = None
131
+
132
+ #: Where should we stash the temporary symlinks (or copies) used to give unique identifiers to image
133
+ # files?
134
+ #:
135
+ #: If this is None, we'll create a folder in system temp space.
136
+ self.symlink_folder = None
137
+
138
+ #: Should we use symlinks to give unique identifiers to image files (vs. copies)?
139
+ self.use_symlinks = True
140
+
141
+ #: How should we guarantee that YOLO IDs (base filenames) are unique? Choices are:
142
+ #:
143
+ #: * 'verify': assume image IDs are unique, but verify and error if they're not
144
+ #: * 'links': create symlinks (or copies, depending on use_symlinks) to enforce uniqueness
145
+ #: * 'auto': check whether IDs are unique, create links if necessary
146
+ self.unique_id_strategy = 'links'
147
+
148
+ #: Temporary folder to stash intermediate YOLO results.
149
+ #:
150
+ #: If this is None, we'll create a folder in system temp space.
151
+ self.yolo_results_folder = None
152
+
153
+ #: Should we remove the symlink folder when we're done?
154
+ self.remove_symlink_folder = True
155
+
156
+ #: Should we remove the intermediate results folder when we're done?
157
+ self.remove_yolo_results_folder = True
158
+
159
+ #: These are deliberately offset from the standard MD categories; YOLOv5
160
+ #: needs categories IDs to start at 0.
161
+ #:
162
+ #: This can also be a string that points to any class mapping file supported
163
+ #: by read_classes_from_yolo_dataset_file(): a YOLO dataset.yaml file, a text
164
+ #: file with a list of classes, or a .json file with an ID --> name dict
165
+ self.yolo_category_id_to_name = {0:'animal',1:'person',2:'vehicle'}
166
+
167
+ #: What should we do if the output file already exists?
168
+ #:
169
+ #: Can be 'error', 'skip', or 'overwrite'.
170
+ self.overwrite_handling = 'skip'
171
+
172
+ #: If True, we'll do a dry run that lets you preview the YOLO val command, without
173
+ #: actually running it.
174
+ self.preview_yolo_command_only = False
175
+
176
+ #: By default, if any errors occur while we're copying images or creating symlinks, it's
177
+ #: game over. If this is True, those errors become warnings, and we plow ahead.
178
+ self.treat_copy_failures_as_warnings = False
179
+
180
+ #: Save YOLO console output
181
+ self.save_yolo_debug_output = False
182
+
183
+ #: Whether to search for images recursively within [input_folder]
184
+ #:
185
+ #: Ignored if a list of files is provided.
186
+ self.recursive = True
187
+
188
+ #: Maximum number of images to run in a single chunk
189
+ self.checkpoint_frequency = None
190
+
191
+ #: By default, if we're creating symlinks to images, we append a unique job ID to the
192
+ #: symlink folder. If the caller is 100% sure that the symlink folder can be re-used
193
+ #: across calls, this can be set to False.
194
+ self.append_job_id_to_symlink_folder = True
195
+
196
+ #: By default, we turn category ID 0 coming out of the YOLO .json file
197
+ #: into category 1 in the MD-formatted .json file.
198
+ self.offset_yolo_category_ids = True
199
+
200
+ # ...def __init__()
201
+
202
+ # ...YoloInferenceOptions()
203
+
204
+
205
+ #%% Support functions
206
+
207
+ def _clean_up_temporary_folders(options,
208
+ symlink_folder,yolo_results_folder,
209
+ symlink_folder_is_temp_folder,yolo_folder_is_temp_folder):
210
+ """
211
+ Remove temporary symlink/results folders, unless the caller requested that we leave them in place.
212
+ """
213
+
214
+ if options.remove_symlink_folder:
215
+ shutil.rmtree(symlink_folder)
216
+ elif symlink_folder_is_temp_folder:
217
+ print('Warning: using temporary symlink folder {}, but not removing it'.format(
218
+ symlink_folder))
219
+
220
+ if options.remove_yolo_results_folder:
221
+ shutil.rmtree(yolo_results_folder)
222
+ elif yolo_folder_is_temp_folder:
223
+ print('Warning: using temporary YOLO results folder {}, but not removing it'.format(
224
+ yolo_results_folder))
225
+
226
+
227
+ def get_stats_for_category(filename,category='all'):
228
+ """
229
+ Retrieve statistics for a category from the YOLO console output
230
+ stored in [filenam].
231
+
232
+ Args:
233
+ filename (str): a text file containing console output from a YOLO val run
234
+ category (str, optional): a category name
235
+
236
+ Returns:
237
+ dict: a dict with fields n_images, n_labels, P, R, mAP50, and mAP50-95
238
+ """
239
+
240
+ with open(filename,'r',encoding='utf-8') as f:
241
+ lines = f.readlines()
242
+
243
+ # This is just a hedge to make sure there isn't some YOLO version floating
244
+ # around that used different IoU thresholds in the console output.
245
+ found_map50 = False
246
+ found_map5095 = False
247
+
248
+ for line in lines:
249
+
250
+ s = line.strip()
251
+
252
+ if ' map50 ' in s.lower() or ' map@.5 ' in s.lower():
253
+ found_map50 = True
254
+ if 'map50-95' in s.lower() or 'map@.5:.95' in s.lower():
255
+ found_map5095 = True
256
+
257
+ if not s.startswith(category):
258
+ continue
259
+
260
+ tokens = s.split(' ')
261
+ tokens_filtered = list(filter(None,tokens))
262
+
263
+ if len(tokens_filtered) != 7:
264
+ continue
265
+
266
+ assert found_map50 and found_map5095, \
267
+ 'Parsing error in YOLO console output file {}'.format(filename)
268
+
269
+ to_return = {}
270
+ to_return['category'] = category
271
+ assert category == tokens_filtered[0]
272
+ to_return['n_images'] = int(tokens_filtered[1])
273
+ to_return['n_labels'] = int(tokens_filtered[2])
274
+ to_return['P'] = float(tokens_filtered[3])
275
+ to_return['R'] = float(tokens_filtered[4])
276
+ to_return['mAP50'] = float(tokens_filtered[5])
277
+ to_return['mAP50-95'] = float(tokens_filtered[6])
278
+ return to_return
279
+
280
+ # ...for each line
281
+
282
+ return None
283
+
284
+
285
+ #%% Main function
286
+
287
+ def run_inference_with_yolo_val(options):
288
+ """
289
+ Runs a folder of images through MegaDetector (or another YOLOv5/YOLOv8 model) with YOLO's
290
+ val.py, converting the output to the standard MD format.
291
+
292
+ Args:
293
+ options (YoloInferenceOptions): all the parameters used to control this process,
294
+ including filenames; see YoloInferenceOptions for details
295
+ """
296
+
297
+ ##%% Input and path handling
298
+
299
+ default_options = YoloInferenceOptions()
300
+
301
+ for k in options.__dict__.keys():
302
+ if k not in default_options.__dict__:
303
+ # Print warnings about unexpected variables, except for things like
304
+ # "no_append_job_id_to_symlink_folder", which just negate existing objects
305
+ if not k.startswith('no_'):
306
+ print('Warning: unexpected variable {} in options object'.format(k))
307
+
308
+ if options.model_type == 'yolov8':
309
+
310
+ print('Warning: model type "yolov8" supplied, "ultralytics" is the preferred model ' + \
311
+ 'type string for YOLOv8 models')
312
+ options.model_type = 'ultralytics'
313
+
314
+ if (options.model_type == 'yolov5') and ('yolov8' in options.model_filename.lower()):
315
+ print('\n\n*** Warning: model type set as "yolov5", but your model filename contains "yolov8"... ' + \
316
+ 'did you mean to use --model_type yolov8?" ***\n\n')
317
+
318
+ if options.yolo_working_folder is None:
319
+ assert options.model_type == 'ultralytics', \
320
+ 'A working folder is required to run YOLOv5 val.py'
321
+ else:
322
+ assert os.path.isdir(options.yolo_working_folder), \
323
+ 'Could not find working folder {}'.format(options.yolo_working_folder)
324
+
325
+ if options.half_precision_enabled is not None:
326
+ assert options.half_precision_enabled in (0,1), \
327
+ 'Invalid value {} for --half_precision_enabled (should be 0 or 1)'.format(
328
+ options.half_precision_enabled)
329
+
330
+ # If the model filename is a known model string (e.g. "MDv5A", download the model if necessary)
331
+ model_filename = try_download_known_detector(options.model_filename)
332
+
333
+ assert os.path.isfile(model_filename), \
334
+ 'Could not find model file {}'.format(model_filename)
335
+
336
+ assert (options.input_folder is not None) or (options.image_filename_list is not None), \
337
+ 'You must specify a folder and/or a file list'
338
+
339
+ if options.input_folder is not None:
340
+ assert os.path.isdir(options.input_folder), 'Could not find input folder {}'.format(
341
+ options.input_folder)
342
+
343
+ if os.path.exists(options.output_file):
344
+ if options.overwrite_handling == 'skip':
345
+ print('Warning: output file {} exists, skipping'.format(options.output_file))
346
+ return
347
+ elif options.overwrite_handling == 'overwrite':
348
+ print('Warning: output file {} exists, overwriting'.format(options.output_file))
349
+ elif options.overwrite_handling == 'error':
350
+ raise ValueError('Output file {} exists'.format(options.output_file))
351
+ else:
352
+ raise ValueError('Unknown output handling method {}'.format(options.overwrite_handling))
353
+
354
+ output_dir = os.path.dirname(options.output_file)
355
+ if len(output_dir) > 0:
356
+ os.makedirs(output_dir, exist_ok=True)
357
+
358
+ if options.input_folder is not None:
359
+ options.input_folder = options.input_folder.replace('\\','/')
360
+
361
+
362
+ ##%% Other input handling
363
+
364
+ if isinstance(options.yolo_category_id_to_name,str):
365
+
366
+ assert os.path.isfile(options.yolo_category_id_to_name)
367
+ yolo_dataset_file = options.yolo_category_id_to_name
368
+ options.yolo_category_id_to_name = \
369
+ yolo_output_to_md_output.read_classes_from_yolo_dataset_file(yolo_dataset_file)
370
+ print('Loaded {} category mappings from {}'.format(
371
+ len(options.yolo_category_id_to_name),yolo_dataset_file))
372
+
373
+ temporary_folder = None
374
+ symlink_folder_is_temp_folder = False
375
+ yolo_folder_is_temp_folder = False
376
+
377
+ job_id = str(uuid.uuid1())
378
+
379
+ def get_job_temporary_folder(tf):
380
+ if tf is not None:
381
+ return tf
382
+ tempdir_base = tempfile.gettempdir()
383
+ tf = os.path.join(tempdir_base,'md_to_yolo','md_to_yolo_' + job_id)
384
+ os.makedirs(tf,exist_ok=True)
385
+ return tf
386
+
387
+ symlink_folder = options.symlink_folder
388
+ yolo_results_folder = options.yolo_results_folder
389
+
390
+ if symlink_folder is None:
391
+ temporary_folder = get_job_temporary_folder(temporary_folder)
392
+ symlink_folder = os.path.join(temporary_folder,'symlinks')
393
+ symlink_folder_is_temp_folder = True
394
+
395
+ if yolo_results_folder is None:
396
+ temporary_folder = get_job_temporary_folder(temporary_folder)
397
+ yolo_results_folder = os.path.join(temporary_folder,'yolo_results')
398
+ yolo_folder_is_temp_folder = True
399
+
400
+ if options.append_job_id_to_symlink_folder:
401
+ # Attach a GUID to the symlink folder, regardless of whether we created it
402
+ symlink_folder_inner = os.path.join(symlink_folder,job_id)
403
+ else:
404
+ print('Re-using existing symlink folder {}'.format(symlink_folder))
405
+ symlink_folder_inner = symlink_folder
406
+
407
+ os.makedirs(symlink_folder_inner,exist_ok=True)
408
+ os.makedirs(yolo_results_folder,exist_ok=True)
409
+
410
+
411
+ ##%% Enumerate images
412
+
413
+ image_files_relative = None
414
+ image_files_absolute = None
415
+
416
+ # If the caller just provided a folder, not a list of files...
417
+ if options.image_filename_list is None:
418
+
419
+ assert options.input_folder is not None and os.path.isdir(options.input_folder), \
420
+ 'Could not find input folder {}'.format(options.input_folder)
421
+ image_files_relative = path_utils.find_images(options.input_folder,
422
+ recursive=options.recursive,
423
+ return_relative_paths=True,
424
+ convert_slashes=True)
425
+ image_files_absolute = [os.path.join(options.input_folder,fn) for \
426
+ fn in image_files_relative]
427
+
428
+ else:
429
+
430
+ # If the caller provided a list of image files (rather than a filename pointing
431
+ # to a list of image files)...
432
+ if is_iterable(options.image_filename_list) and not isinstance(options.image_filename_list,str):
433
+
434
+ image_files_relative = options.image_filename_list
435
+
436
+ # If the caller provided a filename pointing to a list of image files...
437
+ else:
438
+
439
+ assert isinstance(options.image_filename_list,str), \
440
+ 'Unrecognized image filename list object type: {}'.format(options.image_filename_list)
441
+ assert os.path.isfile(options.image_filename_list), \
442
+ 'Could not find image filename list file: {}'.format(options.image_filename_list)
443
+ ext = os.path.splitext(options.image_filename_list)[-1].lower()
444
+ assert ext in ('.json','.txt'), \
445
+ 'Unrecognized image filename list file extension: {}'.format(options.image_filename_list)
446
+ if ext == '.json':
447
+ with open(options.image_filename_list,'r') as f:
448
+ image_files_relative = json.load(f)
449
+ assert is_iterable(image_files_relative)
450
+ else:
451
+ assert ext == '.txt'
452
+ with open(options.image_filename_list,'r') as f:
453
+ image_files_relative = f.readlines()
454
+ image_files_relative = [s.strip() for s in image_files_relative]
455
+
456
+ # ...whether the image filename list was supplied as list vs. a filename
457
+
458
+ if options.input_folder is None:
459
+
460
+ image_files_absolute = image_files_relative
461
+
462
+ else:
463
+
464
+ # The list should be relative filenames
465
+ for fn in image_files_relative:
466
+ assert not path_is_abs(fn), \
467
+ 'When providing a folder and a list, paths in the list should be relative'
468
+
469
+ image_files_absolute = \
470
+ [os.path.join(options.input_folder,fn) for fn in image_files_relative]
471
+
472
+ for fn in image_files_absolute:
473
+ assert os.path.isfile(fn), 'Could not find image file {}'.format(fn)
474
+
475
+ # ...whether the caller supplied a list of filenames
476
+
477
+ image_files_absolute = [fn.replace('\\','/') for fn in image_files_absolute]
478
+
479
+ del image_files_relative
480
+
481
+
482
+ ##%% Recurse if necessary to handle checkpoints
483
+
484
+ if options.checkpoint_frequency is not None and options.checkpoint_frequency > 0:
485
+
486
+ chunks = split_list_into_fixed_size_chunks(image_files_absolute,options.checkpoint_frequency)
487
+
488
+ chunk_output_files = []
489
+
490
+ # i_chunk = 0; chunk_files_abs = chunks[i_chunk]
491
+ for i_chunk,chunk_files_abs in enumerate(chunks):
492
+
493
+ print('Processing {} images from chunk {} of {}'.format(
494
+ len(chunk_files_abs),i_chunk,len(chunks)))
495
+
496
+ chunk_options = copy.deepcopy(options)
497
+
498
+ # Run each chunk without checkpointing
499
+ chunk_options.checkpoint_frequency = None
500
+
501
+ if options.input_folder is not None:
502
+ chunk_files_relative = \
503
+ [os.path.relpath(fn,options.input_folder) for fn in chunk_files_abs]
504
+ chunk_options.image_filename_list = chunk_files_relative
505
+ else:
506
+ chunk_options.image_filename_list = chunk_files_abs
507
+
508
+ chunk_options.image_filename_list = \
509
+ [fn.replace('\\','/') for fn in chunk_options.image_filename_list]
510
+
511
+ chunk_string = 'chunk_{}'.format(str(i_chunk).zfill(5))
512
+ chunk_options.yolo_results_folder = yolo_results_folder + '_' + chunk_string
513
+ chunk_options.symlink_folder = symlink_folder + '_' + chunk_string
514
+
515
+ # Put the output file in the parent job's scratch folder
516
+ chunk_output_file = os.path.join(yolo_results_folder,chunk_string + '_results_md_format.json')
517
+ chunk_output_files.append(chunk_output_file)
518
+ chunk_options.output_file = chunk_output_file
519
+
520
+ if os.path.isfile(chunk_output_file):
521
+
522
+ print('Chunk output file {} exists, checking completeness'.format(chunk_output_file))
523
+
524
+ with open(chunk_output_file,'r') as f:
525
+ chunk_results = json.load(f)
526
+ images_in_this_chunk_results_file = [im['file'] for im in chunk_results['images']]
527
+ assert len(images_in_this_chunk_results_file) == len(chunk_options.image_filename_list), \
528
+ f'Expected {len(chunk_options.image_filename_list)} images in ' + \
529
+ f'chunk results file {chunk_output_file}, found {len(images_in_this_chunk_results_file)}, ' + \
530
+ 'possibly this is left over from a previous job?'
531
+ for fn in images_in_this_chunk_results_file:
532
+ assert fn in chunk_options.image_filename_list, \
533
+ f'Unexpected image {fn} in chunk results file {chunk_output_file}, ' + \
534
+ 'possibly this is left over from a previous job?'
535
+
536
+ print('Chunk output file {} exists and is complete, skipping this chunk'.format(
537
+ chunk_output_file))
538
+
539
+ # ...if the outptut file exists
540
+
541
+ else:
542
+
543
+ run_inference_with_yolo_val(chunk_options)
544
+
545
+ # ...if we do/don't have to run this chunk
546
+
547
+ assert os.path.isfile(chunk_options.output_file)
548
+
549
+ # ...for each chunk
550
+
551
+ # Merge
552
+ _ = combine_batch_output_files(input_files=chunk_output_files,
553
+ output_file=options.output_file,
554
+ require_uniqueness=True,
555
+ verbose=True)
556
+
557
+ # Validate
558
+ with open(options.output_file,'r') as f:
559
+ combined_results = json.load(f)
560
+ assert len(combined_results['images']) == len(image_files_absolute), \
561
+ 'Expected {} images in merged output file, found {}'.format(
562
+ len(image_files_absolute),len(combined_results['images']))
563
+
564
+ # Clean up
565
+ _clean_up_temporary_folders(options,
566
+ symlink_folder,yolo_results_folder,
567
+ symlink_folder_is_temp_folder,yolo_folder_is_temp_folder)
568
+
569
+ return
570
+
571
+ # ...if we need to make recursive calls for file chunks
572
+
573
+
574
+ ##%% Create symlinks (or copy images) to give a unique ID to each image
575
+
576
+ # Maps YOLO image IDs (base filename without extension as it will appear in YOLO .json output)
577
+ # to the *original full path* for each image (not the symlink path).
578
+ image_id_to_file = {}
579
+
580
+ # Maps YOLO image IDs (base filename without extension as it will appear in YOLO .json output)
581
+ # to errors, including errors that happen before we run the model at all (e.g. file access errors).
582
+ image_id_to_error = {}
583
+
584
+ create_links = True
585
+
586
+ if options.unique_id_strategy == 'links':
587
+
588
+ create_links = True
589
+
590
+ else:
591
+
592
+ assert options.unique_id_strategy in ('auto','verify'), \
593
+ 'Unknown unique ID strategy {}'.format(options.unique_id_strategy)
594
+
595
+ image_ids_are_unique = True
596
+
597
+ for i_image,image_fn in tqdm(enumerate(image_files_absolute),total=len(image_files_absolute)):
598
+
599
+ image_id = os.path.splitext(os.path.basename(image_fn))[0]
600
+
601
+ # Is this image ID unique?
602
+ if image_id in image_id_to_file:
603
+ if options.unique_id_strategy == 'verify':
604
+ raise ValueError('"verify" specified for image uniqueness, but ' +
605
+ 'image ID {} occurs more than once:\n\n{}\n\n{}'.format(
606
+ image_id,image_fn,image_id_to_file[image_id]))
607
+ else:
608
+ assert options.unique_id_strategy == 'auto'
609
+ image_ids_are_unique = False
610
+ image_id_to_file = {}
611
+ break
612
+
613
+ image_id_to_file[image_id] = image_fn
614
+
615
+ # ...for each image
616
+
617
+ if image_ids_are_unique:
618
+
619
+ print('"{}" specified for image uniqueness and images are unique, skipping links'.format(
620
+ options.unique_id_strategy))
621
+ assert len(image_id_to_file) == len(image_files_absolute)
622
+ create_links = False
623
+
624
+ else:
625
+
626
+ assert options.unique_id_strategy == 'auto'
627
+ create_links = True
628
+ link_type = 'copies'
629
+ if options.use_symlinks:
630
+ link_type = 'links'
631
+ print('"auto" specified for image uniqueness and images are not unique, defaulting to {}'.format(
632
+ link_type))
633
+
634
+ # ...which unique ID strategy?
635
+
636
+ if create_links:
637
+
638
+ if options.use_symlinks:
639
+ print('Creating {} symlinks in {}'.format(len(image_files_absolute),symlink_folder_inner))
640
+ else:
641
+ print('Symlinks disabled, copying {} images to {}'.format(len(image_files_absolute),symlink_folder_inner))
642
+
643
+ link_full_paths = []
644
+
645
+ # i_image = 0; image_fn = image_files_absolute[i_image]
646
+ for i_image,image_fn in tqdm(enumerate(image_files_absolute),total=len(image_files_absolute)):
647
+
648
+ ext = os.path.splitext(image_fn)[1]
649
+ image_fn_without_extension = os.path.splitext(image_fn)[0]
650
+
651
+ # YOLO .json output identifies images by the base filename without the extension
652
+ image_id = str(i_image).zfill(10)
653
+ image_id_to_file[image_id] = image_fn
654
+ symlink_name = image_id + ext
655
+ symlink_full_path = os.path.join(symlink_folder_inner,symlink_name)
656
+ link_full_paths.append(symlink_full_path)
657
+
658
+ # If annotation files exist, link those too; only useful if we're reading the computed
659
+ # mAP value, but it doesn't hurt.
660
+ annotation_fn = image_fn_without_extension + '.txt'
661
+ annotation_file_exists = False
662
+ if os.path.isfile(annotation_fn):
663
+ annotation_file_exists = True
664
+ annotation_symlink_name = image_id + '.txt'
665
+ annotation_symlink_full_path = os.path.join(symlink_folder_inner,annotation_symlink_name)
666
+
667
+ try:
668
+
669
+ if options.use_symlinks:
670
+ path_utils.safe_create_link(image_fn,symlink_full_path)
671
+ if annotation_file_exists:
672
+ path_utils.safe_create_link(annotation_fn,annotation_symlink_full_path)
673
+ else:
674
+ shutil.copyfile(image_fn,symlink_full_path)
675
+ if annotation_file_exists:
676
+ shutil.copyfile(annotation_fn,annotation_symlink_full_path)
677
+
678
+ except Exception as e:
679
+
680
+ error_string = str(e)
681
+ image_id_to_error[image_id] = error_string
682
+
683
+ # Always break if the user is trying to create symlinks on Windows without
684
+ # permission, 100% of images will always fail in this case.
685
+ if ('a required privilege is not held by the client' in error_string.lower()) or \
686
+ (not options.treat_copy_failures_as_warnings):
687
+
688
+ print('\nError copying/creating link for input file {}: {}'.format(
689
+ image_fn,error_string))
690
+
691
+ raise
692
+
693
+ else:
694
+
695
+ print('Warning: error copying/creating link for input file {}: {}'.format(
696
+ image_fn,error_string))
697
+ continue
698
+
699
+ # ...except
700
+
701
+ # ...for each image
702
+
703
+ # ...if we need to create links/copies
704
+
705
+
706
+ ##%% Create the dataset file if necessary
707
+
708
+ # This may have been passed in as a string, but at this point, we should have
709
+ # loaded the dataset file.
710
+ assert isinstance(options.yolo_category_id_to_name,dict)
711
+
712
+ # Category IDs need to be continuous integers starting at 0
713
+ category_ids = sorted(list(options.yolo_category_id_to_name.keys()))
714
+ assert category_ids[0] == 0
715
+ assert len(category_ids) == 1 + category_ids[-1]
716
+
717
+ yolo_dataset_file = os.path.join(yolo_results_folder,'dataset.yaml')
718
+ yolo_image_list_file = os.path.join(yolo_results_folder,'images.txt')
719
+
720
+ with open(yolo_image_list_file,'w') as f:
721
+
722
+ if create_links:
723
+ image_files_to_write = link_full_paths
724
+ else:
725
+ image_files_to_write = image_files_absolute
726
+
727
+ for fn_abs in image_files_to_write:
728
+ # At least in YOLOv5 val (need to verify for YOLOv8 val), filenames in this
729
+ # text file are treated as relative to the text file itself if they start with
730
+ # "./", otherwise they're treated as absolute paths. Since we don't want to put this
731
+ # text file in the image folder, we'll use absolute paths.
732
+ # fn_relative = os.path.relpath(fn_abs,options.input_folder)
733
+ # f.write(fn_relative + '\n')
734
+ f.write(fn_abs + '\n')
735
+
736
+ if create_links:
737
+ inference_folder = symlink_folder_inner
738
+ else:
739
+ # This doesn't matter, but it has to be a valid path
740
+ inference_folder = options.yolo_results_folder
741
+
742
+ with open(yolo_dataset_file,'w') as f:
743
+
744
+ f.write('path: {}\n'.format(inference_folder))
745
+ # These need to be valid paths, even if you're not using them, and "." is always safe
746
+ f.write('train: .\n')
747
+ f.write('val: .\n')
748
+ f.write('test: {}\n'.format(yolo_image_list_file))
749
+ f.write('\n')
750
+ f.write('nc: {}\n'.format(len(options.yolo_category_id_to_name)))
751
+ f.write('\n')
752
+ f.write('names:\n')
753
+ for category_id in category_ids:
754
+ assert isinstance(category_id,int)
755
+ f.write(' {}: {}\n'.format(category_id,
756
+ options.yolo_category_id_to_name[category_id]))
757
+
758
+
759
+ ##%% Prepare Python command or YOLO CLI command
760
+
761
+ if options.image_size is None:
762
+ if options.augment:
763
+ image_size = default_image_size_with_augmentation
764
+ else:
765
+ image_size = default_image_size_with_no_augmentation
766
+ else:
767
+ image_size = options.image_size
768
+
769
+ image_size_string = str(round(image_size))
770
+
771
+ if options.model_type == 'yolov5':
772
+
773
+ cmd = 'python val.py --task test --data "{}"'.format(yolo_dataset_file)
774
+ cmd += ' --weights "{}"'.format(model_filename)
775
+ cmd += ' --batch-size {} --imgsz {} --conf-thres {}'.format(
776
+ options.batch_size,image_size_string,options.conf_thres)
777
+ cmd += ' --device "{}" --save-json'.format(options.device_string)
778
+ cmd += ' --project "{}" --name "{}" --exist-ok'.format(yolo_results_folder,'yolo_results')
779
+
780
+ # This is the NMS IoU threshold
781
+ # cmd += ' --iou-thres 0.6'
782
+
783
+ if options.augment:
784
+ cmd += ' --augment'
785
+
786
+ # --half is a store_true argument for YOLOv5's val.py
787
+ if (options.half_precision_enabled is not None) and (options.half_precision_enabled == 1):
788
+ cmd += ' --half'
789
+
790
+ # Sometimes useful for debugging
791
+ # cmd += ' --save_conf --save_txt'
792
+
793
+ elif options.model_type == 'ultralytics':
794
+
795
+ if options.augment:
796
+ augment_string = 'augment'
797
+ else:
798
+ augment_string = ''
799
+
800
+ cmd = 'yolo val {} model="{}" imgsz={} batch={} data="{}" project="{}" name="{}" device="{}"'.\
801
+ format(augment_string,model_filename,image_size_string,options.batch_size,
802
+ yolo_dataset_file,yolo_results_folder,'yolo_results',options.device_string)
803
+ cmd += ' save_json exist_ok'
804
+
805
+ if (options.half_precision_enabled is not None):
806
+ if options.half_precision_enabled == 1:
807
+ cmd += ' --half=True'
808
+ else:
809
+ assert options.half_precision_enabled == 0
810
+ cmd += ' --half=False'
811
+
812
+ # Sometimes useful for debugging
813
+ # cmd += ' save_conf save_txt'
814
+
815
+ else:
816
+
817
+ raise ValueError('Unrecognized model type {}'.format(options.model_type))
818
+
819
+ # print(cmd); import clipboard; clipboard.copy(cmd)
820
+
821
+
822
+ ##%% Run YOLO command
823
+
824
+ if options.yolo_working_folder is not None:
825
+ current_dir = os.getcwd()
826
+ os.chdir(options.yolo_working_folder)
827
+
828
+ print('Running YOLO inference command:\n{}\n'.format(cmd))
829
+
830
+ if options.preview_yolo_command_only:
831
+
832
+ if options.remove_symlink_folder:
833
+ try:
834
+ print('Removing YOLO symlink folder {}'.format(symlink_folder))
835
+ shutil.rmtree(symlink_folder)
836
+ except Exception:
837
+ print('Warning: error removing symlink folder {}'.format(symlink_folder))
838
+ pass
839
+ if options.remove_yolo_results_folder:
840
+ try:
841
+ print('Removing YOLO results folder {}'.format(yolo_results_folder))
842
+ shutil.rmtree(yolo_results_folder)
843
+ except Exception:
844
+ print('Warning: error removing YOLO results folder {}'.format(yolo_results_folder))
845
+ pass
846
+
847
+ # sys.exit()
848
+ return
849
+
850
+ execution_result = process_utils.execute_and_print(cmd,encoding='utf-8',verbose=True)
851
+ assert execution_result['status'] == 0, 'Error running {}'.format(options.model_type)
852
+ yolo_console_output = execution_result['output']
853
+
854
+ if options.save_yolo_debug_output:
855
+
856
+ with open(os.path.join(yolo_results_folder,'yolo_console_output.txt'),'w',encoding='utf-8') as f:
857
+ for s in yolo_console_output:
858
+ f.write(s + '\n')
859
+ ct_utils.write_json(os.path.join(yolo_results_folder,'image_id_to_file.json'), image_id_to_file)
860
+ ct_utils.write_json(os.path.join(yolo_results_folder,'image_id_to_error.json'), image_id_to_error)
861
+
862
+
863
+ # YOLO console output contains lots of ANSI escape codes, remove them for easier parsing
864
+ yolo_console_output = [string_utils.remove_ansi_codes(s) for s in yolo_console_output]
865
+
866
+ # Find errors that occurred during the initial corruption check; these will not be included in the
867
+ # output. Errors that occur during inference will be handled separately.
868
+ yolo_read_failures = []
869
+
870
+ for line in yolo_console_output:
871
+
872
+ #
873
+ # Lines indicating read failures look like:
874
+ #
875
+ # For ultralytics val:
876
+ #
877
+ # line = "val: WARNING ⚠️ /a/b/c/d.jpg: ignoring corrupt image/label: [Errno 13] Permission denied: '/a/b/c/d.jpg'"
878
+ #
879
+ # For yolov5 val.py:
880
+ #
881
+ # line = "test: WARNING: a/b/c/d.jpg: ignoring corrupt image/label: cannot identify image file '/a/b/c/d.jpg'"
882
+ #
883
+ # In both cases, when we are using symlinks, the first filename is the symlink name, the
884
+ # second filename is the target, e.g.:
885
+ #
886
+ # line = "test: WARNING: /tmp/md_to_yolo/md_to_yolo_xyz/symlinks/xyz/0000000004.jpg: ignoring corrupt image/label: cannot identify image file '/tmp/md-tests/md-test-images/corrupt-images/real-file.jpg'"
887
+ #
888
+ # Windows example:
889
+ #
890
+ # line = "test: WARNING: g:\\temp\\md-test-images\\corrupt-images\\irfanview-can-still-read-me-caltech_camera_traps_5a0e37cc-23d2-11e8-a6a3-ec086b02610b.jpg: ignoring corrupt image/label: cannot identify image file 'g:\\\\temp\\\\md-test-images\\\\corrupt-images\\\\irfanview-can-still-read-me-caltech_camera_traps_5a0e37cc-23d2-11e8-a6a3-ec086b02610b.jpg'"
891
+ #
892
+
893
+ line = line.replace('⚠️',':')
894
+ if 'ignoring corrupt image/label' in line:
895
+
896
+ line_tokens = line.split('ignoring corrupt image/label')
897
+ assert len(line_tokens) == 2
898
+
899
+ tokens = line_tokens[0].split(':') # ,maxsplit=3)
900
+ tokens = [s.strip() for s in tokens]
901
+
902
+ # ['test', ' WARNING', ' a/b/c/d.jpg', ' ']
903
+ assert len(tokens[-1]) == 0
904
+ tokens = tokens[:-1]
905
+ assert 'warning' in tokens[1].lower()
906
+
907
+ if len(tokens) == 3:
908
+ image_name = tokens[2].strip()
909
+ else:
910
+ # Windows filenames have one extra colon
911
+ assert len(tokens) == 4
912
+ assert len(tokens[2]) == 1
913
+ image_name = ':'.join(tokens[2:4])
914
+
915
+ yolo_read_failures.append(image_name)
916
+
917
+ # ...if this line indicated a corrupt image
918
+
919
+ # ...for each line in the console output
920
+
921
+ # image_file = yolo_read_failures[0]
922
+ for image_file in yolo_read_failures:
923
+ image_id = os.path.splitext(os.path.basename(image_file))[0]
924
+ assert image_id in image_id_to_file, 'Unexpected image ID {}'.format(image_id)
925
+ if image_id not in image_id_to_error:
926
+ image_id_to_error[image_id] = 'YOLO read failure'
927
+
928
+ if options.yolo_working_folder is not None:
929
+ os.chdir(current_dir)
930
+
931
+
932
+ ##%% Convert results to MD format
933
+
934
+ json_files = glob.glob(yolo_results_folder + '/yolo_results/*.json')
935
+ assert len(json_files) == 1
936
+ yolo_json_file = json_files[0]
937
+
938
+ # Map YOLO image IDs to paths
939
+ image_id_to_relative_path = {}
940
+ for image_id in image_id_to_file:
941
+ fn = image_id_to_file[image_id].replace('\\','/')
942
+ assert path_is_abs(fn)
943
+ if options.input_folder is not None:
944
+ assert os.path.isdir(options.input_folder)
945
+ assert options.input_folder in fn, 'Internal error: base folder {} not in file {}'.format(
946
+ options.input_folder,fn)
947
+ relative_path = os.path.relpath(fn,options.input_folder)
948
+ else:
949
+ # We'll use the absolute path as a relative path, and pass '/'
950
+ # as the base path in this case.
951
+ relative_path = fn
952
+ image_id_to_relative_path[image_id] = relative_path
953
+
954
+ # Are we working with a base folder?
955
+ if options.input_folder is not None:
956
+ assert os.path.isdir(options.input_folder)
957
+ image_base = options.input_folder
958
+ else:
959
+ image_base = '/'
960
+
961
+ yolo_output_to_md_output.yolo_json_output_to_md_output(
962
+ yolo_json_file=yolo_json_file,
963
+ image_folder=image_base,
964
+ output_file=options.output_file,
965
+ yolo_category_id_to_name=options.yolo_category_id_to_name,
966
+ detector_name=os.path.basename(model_filename),
967
+ image_id_to_relative_path=image_id_to_relative_path,
968
+ image_id_to_error=image_id_to_error,
969
+ offset_yolo_class_ids=options.offset_yolo_category_ids)
970
+
971
+
972
+ ##%% Clean up
973
+
974
+ _clean_up_temporary_folders(options,
975
+ symlink_folder,yolo_results_folder,
976
+ symlink_folder_is_temp_folder,yolo_folder_is_temp_folder)
977
+
978
+ # ...def run_inference_with_yolo_val()
979
+
980
+
981
+ #%% Command-line driver
982
+
983
+ def main(): # noqa
984
+
985
+ options = YoloInferenceOptions()
986
+
987
+ parser = argparse.ArgumentParser()
988
+ parser.add_argument(
989
+ 'model_filename',type=str,
990
+ help='model file name')
991
+ parser.add_argument(
992
+ 'input_folder',type=str,
993
+ help='folder on which to recursively run the model, or a .json or .txt file ' + \
994
+ 'containing a list of absolute image paths')
995
+ parser.add_argument(
996
+ 'output_file',type=str,
997
+ help='.json file where output will be written')
998
+
999
+ parser.add_argument(
1000
+ '--image_filename_list',type=str,default=None,
1001
+ help='.json or .txt file containing a list of relative image filenames within [input_folder]')
1002
+ parser.add_argument(
1003
+ '--yolo_working_folder',type=str,default=None,
1004
+ help='folder in which to execute val.py (not necessary for YOLOv8 inference)')
1005
+ parser.add_argument(
1006
+ '--image_size', default=None, type=int,
1007
+ help='image size for model execution (default {} when augmentation is enabled, else {})'.format(
1008
+ default_image_size_with_augmentation,default_image_size_with_no_augmentation))
1009
+ parser.add_argument(
1010
+ '--conf_thres', default=options.conf_thres, type=float,
1011
+ help='confidence threshold for including detections in the output file (default {})'.format(
1012
+ options.conf_thres))
1013
+ parser.add_argument(
1014
+ '--batch_size', default=options.batch_size, type=int,
1015
+ help='inference batch size (default {})'.format(options.batch_size))
1016
+ parser.add_argument(
1017
+ '--half_precision_enabled', default=None, type=int,
1018
+ help='use half-precision-inference (1 or 0) (default is the underlying model\'s default, ' + \
1019
+ 'probably full for YOLOv8 and half for YOLOv5')
1020
+ parser.add_argument(
1021
+ '--device_string', default=options.device_string, type=str,
1022
+ help='CUDA device specifier, typically "0" or "1" for CUDA devices, "mps" for ' + \
1023
+ 'M1/M2 devices, or "cpu" (default {})'.format(
1024
+ options.device_string))
1025
+ parser.add_argument(
1026
+ '--overwrite_handling', default=options.overwrite_handling, type=str,
1027
+ help='action to take if the output file exists (skip, error, overwrite) (default {})'.format(
1028
+ options.overwrite_handling))
1029
+ parser.add_argument(
1030
+ '--yolo_dataset_file', default=None, type=str,
1031
+ help='YOLOv5 dataset.yaml file from which we should load category information ' + \
1032
+ '(otherwise defaults to MD categories)')
1033
+ parser.add_argument(
1034
+ '--model_type', default=options.model_type, type=str,
1035
+ help='model type ("yolov5" or "ultralytics" ("yolov8" behaves the same as "ultralytics")) (default {})'.format(
1036
+ options.model_type))
1037
+
1038
+ parser.add_argument('--unique_id_strategy', default=options.unique_id_strategy, type=str,
1039
+ help='how should we ensure that unique filenames are passed to the YOLO val script, ' + \
1040
+ 'can be "verify", "auto", or "links", see options class docs for details (default {})'.format(
1041
+ options.unique_id_strategy))
1042
+ parser.add_argument(
1043
+ '--symlink_folder', default=None, type=str,
1044
+ help='temporary folder for symlinks (defaults to a folder in the system temp dir)')
1045
+ parser.add_argument(
1046
+ '--yolo_results_folder', default=None, type=str,
1047
+ help='temporary folder for YOLO intermediate output (defaults to a folder in the system temp dir)')
1048
+ parser.add_argument(
1049
+ '--no_use_symlinks', action='store_true',
1050
+ help='copy files instead of creating symlinks when preparing the yolo input folder')
1051
+ parser.add_argument(
1052
+ '--no_remove_symlink_folder', action='store_true',
1053
+ help='don\'t remove the temporary folder full of symlinks')
1054
+ parser.add_argument(
1055
+ '--no_remove_yolo_results_folder', action='store_true',
1056
+ help='don\'t remove the temporary folder full of YOLO intermediate files')
1057
+ parser.add_argument(
1058
+ '--save_yolo_debug_output', action='store_true',
1059
+ help='write yolo console output to a text file in the results folder, along with additional debug files')
1060
+ parser.add_argument(
1061
+ '--checkpoint_frequency', default=options.checkpoint_frequency, type=int,
1062
+ help='break the job into chunks with no more than this many images (default {})'.format(
1063
+ options.checkpoint_frequency))
1064
+ parser.add_argument(
1065
+ '--no_append_job_id_to_symlink_folder', action='store_true',
1066
+ help="don't append a unique job ID to the symlink folder name")
1067
+ parser.add_argument(
1068
+ '--nonrecursive', action='store_true',
1069
+ help='disable recursive folder processing')
1070
+ parser.add_argument(
1071
+ '--no_offset_class_ids', action='store_true',
1072
+ help='disable class ID offsetting')
1073
+
1074
+ parser.add_argument(
1075
+ '--preview_yolo_command_only', action='store_true',
1076
+ help='don\'t run inference, just preview the YOLO inference command (still creates symlinks)')
1077
+
1078
+ if options.augment:
1079
+ default_augment_enabled = 1
1080
+ else:
1081
+ default_augment_enabled = 0
1082
+
1083
+ parser.add_argument(
1084
+ '--augment_enabled', default=default_augment_enabled, type=int,
1085
+ help='enable/disable augmentation (default {})'.format(default_augment_enabled))
1086
+
1087
+ if len(sys.argv[1:]) == 0:
1088
+ parser.print_help()
1089
+ parser.exit()
1090
+
1091
+ args = parser.parse_args()
1092
+
1093
+ # If the caller hasn't specified an image size, choose one based on whether augmentation
1094
+ # is enabled.
1095
+ if args.image_size is None:
1096
+ assert args.augment_enabled in (0,1), \
1097
+ 'Illegal augment_enabled value {}'.format(args.augment_enabled)
1098
+ if args.augment_enabled == 1:
1099
+ args.image_size = default_image_size_with_augmentation
1100
+ else:
1101
+ args.image_size = default_image_size_with_no_augmentation
1102
+ augment_enabled_string = 'enabled'
1103
+ if not args.augment_enabled:
1104
+ augment_enabled_string = 'disabled'
1105
+ print('Augmentation is {}, using default image size {}'.format(
1106
+ augment_enabled_string,args.image_size))
1107
+
1108
+ args_to_object(args, options)
1109
+
1110
+ if args.yolo_dataset_file is not None:
1111
+ options.yolo_category_id_to_name = args.yolo_dataset_file
1112
+
1113
+ # The function convention is that input_folder should be None when we want to use a list of
1114
+ # absolute paths, but the CLI convention is that the required argument is always valid, whether
1115
+ # it's a folder or a list of absolute paths.
1116
+ if os.path.isfile(options.input_folder):
1117
+ assert options.image_filename_list is None, \
1118
+ 'image_filename_list should not be specified when input_folder is a file'
1119
+ options.image_filename_list = options.input_folder
1120
+ options.input_folder = None
1121
+
1122
+ options.recursive = (not options.nonrecursive)
1123
+ options.append_job_id_to_symlink_folder = (not options.no_append_job_id_to_symlink_folder)
1124
+ options.remove_symlink_folder = (not options.no_remove_symlink_folder)
1125
+ options.remove_yolo_results_folder = (not options.no_remove_yolo_results_folder)
1126
+ options.use_symlinks = (not options.no_use_symlinks)
1127
+ options.augment = (options.augment_enabled > 0)
1128
+ options.offset_yolo_category_ids = (not options.no_offset_class_ids)
1129
+
1130
+ del options.nonrecursive
1131
+ del options.no_remove_symlink_folder
1132
+ del options.no_remove_yolo_results_folder
1133
+ del options.no_use_symlinks
1134
+ del options.augment_enabled
1135
+ del options.yolo_dataset_file
1136
+ del options.no_offset_class_ids
1137
+
1138
+ print(options.__dict__)
1139
+
1140
+ run_inference_with_yolo_val(options)
1141
+
1142
+ if __name__ == '__main__':
1143
+ main()
1144
+
1145
+
1146
+ #%% Interactive driver
1147
+
1148
+ if False:
1149
+
1150
+
1151
+ #%% Debugging
1152
+
1153
+ input_folder = r'g:\temp\md-test-images'
1154
+ model_filename = 'MDV5A'
1155
+ output_folder = r'g:\temp\yolo-test-out'
1156
+ yolo_working_folder = r'c:\git\yolov5-md'
1157
+ dataset_file = r"g:\temp\md-test-images\dataset.yaml"
1158
+ job_name = 'yolo-debug'
1159
+ symlink_folder = os.path.join(output_folder,'symlinks')
1160
+ yolo_results_folder = os.path.join(output_folder,'yolo_results')
1161
+ model_name = os.path.splitext(os.path.basename(model_filename))[0]
1162
+
1163
+ output_file = os.path.join(output_folder,'{}_{}-md_format.json'.format(
1164
+ job_name,model_name))
1165
+
1166
+ options = YoloInferenceOptions()
1167
+
1168
+ options.yolo_working_folder = yolo_working_folder
1169
+ options.input_folder = input_folder
1170
+ options.output_file = output_file
1171
+
1172
+ options.yolo_category_id_to_name = dataset_file
1173
+ options.augment = False
1174
+ options.conf_thres = '0.001'
1175
+ options.batch_size = 1
1176
+ options.device_string = '0'
1177
+ options.unique_id_strategy = 'auto'
1178
+ options.overwrite_handling = 'overwrite'
1179
+
1180
+ if options.augment:
1181
+ options.image_size = round(1280 * 1.3)
1182
+ else:
1183
+ options.image_size = 1280
1184
+
1185
+ options.model_filename = model_filename
1186
+
1187
+ options.yolo_results_folder = yolo_results_folder # os.path.join(output_folder + 'yolo_results')
1188
+ options.symlink_folder = symlink_folder # os.path.join(output_folder,'symlinks')
1189
+ options.use_symlinks = False
1190
+
1191
+ options.remove_symlink_folder = True
1192
+ options.remove_yolo_results_folder = True
1193
+
1194
+ options.checkpoint_frequency = None
1195
+
1196
+ cmd = f'python run_inference_with_yolov5_val.py {model_filename} {input_folder} ' + \
1197
+ f'{output_file} --yolo_working_folder {yolo_working_folder} ' + \
1198
+ f' --image_size {options.image_size} --conf_thres {options.conf_thres} ' + \
1199
+ f' --batch_size {options.batch_size} ' + \
1200
+ f' --symlink_folder {options.symlink_folder} --yolo_results_folder {options.yolo_results_folder} ' + \
1201
+ f' --yolo_dataset_file {options.yolo_category_id_to_name} ' + \
1202
+ f' --unique_id_strategy {options.unique_id_strategy} --overwrite_handling {options.overwrite_handling}'
1203
+
1204
+ if not options.remove_symlink_folder:
1205
+ cmd += ' --no_remove_symlink_folder'
1206
+ if not options.remove_yolo_results_folder:
1207
+ cmd += ' --no_remove_yolo_results_folder'
1208
+ if options.checkpoint_frequency is not None:
1209
+ cmd += f' --checkpoint_frequency {options.checkpoint_frequency}'
1210
+ if not options.use_symlinks:
1211
+ cmd += ' --no_use_symlinks'
1212
+ if not options.augment:
1213
+ cmd += ' --augment_enabled 0'
1214
+
1215
+ print(cmd)
1216
+ execute_in_python = False
1217
+ if execute_in_python:
1218
+ run_inference_with_yolo_val(options)
1219
+ else:
1220
+ import clipboard; clipboard.copy(cmd)
1221
+
1222
+
1223
+
1224
+ #%% Run inference on a folder
1225
+
1226
+ input_folder = r'g:\temp\tegu-val-mini'.replace('\\','/')
1227
+ model_filename = r'g:\temp\usgs-tegus-yolov5x-231003-b8-img1280-e3002-best.pt'
1228
+ output_folder = r'g:\temp\tegu-scratch'
1229
+ yolo_working_folder = r'c:\git\yolov5-tegus'
1230
+ dataset_file = r'g:\temp\dataset.yaml'
1231
+
1232
+ # This only impacts the output file name, it's not passed to the inference function
1233
+ job_name = 'yolo-inference-test'
1234
+
1235
+ model_name = os.path.splitext(os.path.basename(model_filename))[0]
1236
+
1237
+ symlink_folder = os.path.join(output_folder,'symlinks')
1238
+ yolo_results_folder = os.path.join(output_folder,'yolo_results')
1239
+
1240
+ output_file = os.path.join(output_folder,'{}_{}-md_format.json'.format(
1241
+ job_name,model_name))
1242
+
1243
+ options = YoloInferenceOptions()
1244
+
1245
+ options.yolo_working_folder = yolo_working_folder
1246
+ options.input_folder = input_folder
1247
+ options.output_file = output_file
1248
+
1249
+ pass_image_filename_list = False
1250
+ pass_relative_paths = True
1251
+
1252
+ if pass_image_filename_list:
1253
+ if pass_relative_paths:
1254
+ options.image_filename_list = [
1255
+ r"val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(05) 18AUG17 - 05SEP17 FTC AEG#MFDC1949_000065.JPG",
1256
+ r"val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(04) 27JUL17 - 18AUG17 FTC AEG#MFDC1902_000064.JPG"
1257
+ ]
1258
+ else:
1259
+ options.image_filename_list = [
1260
+ r"g:/temp/tegu-val-mini/val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(05) 18AUG17 - 05SEP17 FTC AEG#MFDC1949_000065.JPG",
1261
+ r"g:/temp/tegu-val-mini/val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(04) 27JUL17 - 18AUG17 FTC AEG#MFDC1902_000064.JPG"
1262
+ ]
1263
+ else:
1264
+ options.image_filename_list = None
1265
+
1266
+ options.yolo_category_id_to_name = dataset_file
1267
+ options.augment = False
1268
+ options.conf_thres = '0.001'
1269
+ options.batch_size = 1
1270
+ options.device_string = '0'
1271
+ options.unique_id_strategy = 'auto'
1272
+ options.overwrite_handling = 'overwrite'
1273
+
1274
+ if options.augment:
1275
+ options.image_size = round(1280 * 1.3)
1276
+ else:
1277
+ options.image_size = 1280
1278
+
1279
+ options.model_filename = model_filename
1280
+
1281
+ options.yolo_results_folder = yolo_results_folder # os.path.join(output_folder + 'yolo_results')
1282
+ options.symlink_folder = symlink_folder # os.path.join(output_folder,'symlinks')
1283
+ options.use_symlinks = False
1284
+
1285
+ options.remove_symlink_folder = True
1286
+ options.remove_yolo_results_folder = True
1287
+
1288
+ options.checkpoint_frequency = 5
1289
+
1290
+ cmd = f'python run_inference_with_yolov5_val.py {model_filename} {input_folder} ' + \
1291
+ f'{output_file} --yolo_working_folder {yolo_working_folder} ' + \
1292
+ f' --image_size {options.image_size} --conf_thres {options.conf_thres} ' + \
1293
+ f' --batch_size {options.batch_size} ' + \
1294
+ f' --symlink_folder {options.symlink_folder} --yolo_results_folder {options.yolo_results_folder} ' + \
1295
+ f' --yolo_dataset_file {options.yolo_category_id_to_name} ' + \
1296
+ f' --unique_id_strategy {options.unique_id_strategy} --overwrite_handling {options.overwrite_handling}'
1297
+
1298
+ if not options.remove_symlink_folder:
1299
+ cmd += ' --no_remove_symlink_folder'
1300
+ if not options.remove_yolo_results_folder:
1301
+ cmd += ' --no_remove_yolo_results_folder'
1302
+ if options.checkpoint_frequency is not None:
1303
+ cmd += f' --checkpoint_frequency {options.checkpoint_frequency}'
1304
+ if not options.use_symlinks:
1305
+ cmd += ' --no_use_symlinks'
1306
+ if not options.augment:
1307
+ cmd += ' --augment_enabled 0'
1308
+
1309
+ print(cmd)
1310
+ execute_in_python = False
1311
+ if execute_in_python:
1312
+ run_inference_with_yolo_val(options)
1313
+ else:
1314
+ import clipboard; clipboard.copy(cmd)