megadetector 5.0.12__py3-none-any.whl → 5.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (45) hide show
  1. megadetector/api/batch_processing/api_core/server.py +1 -1
  2. megadetector/api/batch_processing/api_core/server_api_config.py +0 -1
  3. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -3
  4. megadetector/api/batch_processing/api_core/server_utils.py +0 -4
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  6. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -3
  7. megadetector/classification/efficientnet/utils.py +0 -3
  8. megadetector/data_management/camtrap_dp_to_coco.py +0 -2
  9. megadetector/data_management/cct_json_utils.py +15 -6
  10. megadetector/data_management/coco_to_labelme.py +12 -1
  11. megadetector/data_management/databases/integrity_check_json_db.py +43 -27
  12. megadetector/data_management/importers/cacophony-thermal-importer.py +1 -4
  13. megadetector/data_management/ocr_tools.py +0 -4
  14. megadetector/data_management/read_exif.py +178 -44
  15. megadetector/data_management/rename_images.py +187 -0
  16. megadetector/data_management/wi_download_csv_to_coco.py +3 -2
  17. megadetector/data_management/yolo_output_to_md_output.py +7 -2
  18. megadetector/detection/process_video.py +548 -244
  19. megadetector/detection/pytorch_detector.py +33 -14
  20. megadetector/detection/run_detector.py +17 -5
  21. megadetector/detection/run_detector_batch.py +179 -65
  22. megadetector/detection/run_inference_with_yolov5_val.py +527 -357
  23. megadetector/detection/tf_detector.py +14 -3
  24. megadetector/detection/video_utils.py +284 -61
  25. megadetector/postprocessing/categorize_detections_by_size.py +16 -14
  26. megadetector/postprocessing/classification_postprocessing.py +716 -0
  27. megadetector/postprocessing/compare_batch_results.py +101 -93
  28. megadetector/postprocessing/convert_output_format.py +12 -5
  29. megadetector/postprocessing/merge_detections.py +18 -7
  30. megadetector/postprocessing/postprocess_batch_results.py +133 -127
  31. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +236 -232
  32. megadetector/postprocessing/subset_json_detector_output.py +66 -62
  33. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +0 -2
  34. megadetector/utils/ct_utils.py +5 -4
  35. megadetector/utils/md_tests.py +380 -128
  36. megadetector/utils/path_utils.py +39 -6
  37. megadetector/utils/process_utils.py +13 -4
  38. megadetector/visualization/visualization_utils.py +7 -2
  39. megadetector/visualization/visualize_db.py +79 -77
  40. megadetector/visualization/visualize_detector_output.py +0 -1
  41. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/LICENSE +0 -0
  42. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/METADATA +2 -2
  43. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/RECORD +45 -43
  44. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/top_level.txt +0 -0
  45. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/WHEEL +0 -0
@@ -34,13 +34,6 @@ that permission and *doesn't* have admin privileges. If you are running this sc
34
34
  Windows and you don't have admin privileges, use --no_use_symlinks, which will make copies of images,
35
35
  rather than using symlinks.
36
36
 
37
- TODO:
38
-
39
- * Multiple GPU support
40
- * Checkpointing
41
- * Support alternative class names at the command line (currently defaults to MD classes,
42
- though other class names can be supplied programmatically)
43
-
44
37
  """
45
38
 
46
39
  #%% Imports
@@ -52,14 +45,19 @@ import glob
52
45
  import tempfile
53
46
  import shutil
54
47
  import json
48
+ import copy
55
49
 
56
50
  from tqdm import tqdm
57
51
 
58
52
  from megadetector.utils import path_utils
59
53
  from megadetector.utils import process_utils
60
54
  from megadetector.utils import string_utils
55
+
56
+ from megadetector.utils.ct_utils import is_iterable, split_list_into_fixed_size_chunks
57
+ from megadetector.utils.path_utils import path_is_abs
61
58
  from megadetector.data_management import yolo_output_to_md_output
62
59
  from megadetector.detection.run_detector import try_download_known_detector
60
+ from megadetector.postprocessing.combine_api_outputs import combine_api_output_files
63
61
 
64
62
  default_image_size_with_augmentation = int(1280 * 1.3)
65
63
  default_image_size_with_no_augmentation = 1280
@@ -73,93 +71,141 @@ class YoloInferenceOptions:
73
71
  the input/output filenames.
74
72
  """
75
73
 
76
- ## Required ##
77
-
78
- #: Folder of images to process
79
- input_folder = None
80
-
81
- #: Model filename (ending in .pt), or a well-known model name (e.g. "MDV5A")
82
- model_filename = None
83
-
84
- #: .json output file, in MD results format
85
- output_file = None
86
-
87
-
88
- ## Optional ##
74
+ def __init__(self):
75
+
76
+ ## Required-ish ##
77
+
78
+ #: Folder of images to process (can be None if image_filename_list contains absolute paths)
79
+ self.input_folder = None
80
+
81
+ #: If this is None, [input_folder] can't be None, we'll process all images in [input_folder].
82
+ #:
83
+ #: If this is not None, and [input_folder] is not None, this should be a list of relative image
84
+ #: paths within [input_folder] to process, or a .txt or .json file containing a list of
85
+ #: relative image paths.
86
+ #:
87
+ #: If this is not None, and [input_folder] is None, this should be a list of absolute image
88
+ #: paths, or a .txt or .json file containing a list of absolute image paths.
89
+ self.image_filename_list = None
90
+
91
+ #: Model filename (ending in .pt), or a well-known model name (e.g. "MDV5A")
92
+ self.model_filename = None
93
+
94
+ #: .json output file, in MD results format
95
+ self.output_file = None
96
+
97
+
98
+ ## Optional ##
99
+
100
+ #: Required for older YOLOv5 inference, not for newer ulytralytics/YOLOv8 inference
101
+ self.yolo_working_folder = None
102
+
103
+ #: Currently 'yolov5' and 'ultralytics' are supported, and really these are proxies for
104
+ #: "the yolov5 repo" and "the ultralytics repo".
105
+ self.model_type = 'yolov5'
89
106
 
90
- #: Required for older YOLOv5 inference, not for newer ulytralytics/YOLOv8 inference
91
- yolo_working_folder = None
107
+ #: Image size to use; this is a single int, which in ultralytics's terminology means
108
+ #: "scale the long side of the image to this size, and preserve aspect ratio".
109
+ self.image_size = default_image_size_with_augmentation
110
+
111
+ #: Detections below this threshold will not be included in the output file
112
+ self.conf_thres = '0.001'
113
+
114
+ #: Batch size... has no impact on results, but may create memory issues if you set
115
+ #: this to large values
116
+ self.batch_size = 1
117
+
118
+ #: Device string: typically '0' for GPU 0, '1' for GPU 1, etc., or 'cpu'
119
+ self.device_string = '0'
120
+
121
+ #: Should we enable test-time augmentation?
122
+ self.augment = True
123
+
124
+ #: Should we enable half-precision inference?
125
+ self.half_precision_enabled = None
126
+
127
+ #: Where should we stash the temporary symlinks (or copies) used to give unique identifiers to image
128
+ # files?
129
+ #:
130
+ #: If this is None, we'll create a folder in system temp space.
131
+ self.symlink_folder = None
132
+
133
+ #: Should we use symlinks to give unique identifiers to image files (vs. copies)?
134
+ self.use_symlinks = True
135
+
136
+ #: How should we guarantee that YOLO IDs (base filenames) are unique? Choices are:
137
+ #:
138
+ #: * 'verify': assume image IDs are unique, but verify and error if they're not
139
+ #: * 'links': create symlinks (or copies, depending on use_symlinks) to enforce uniqueness
140
+ #: * 'auto': check whether IDs are unique, create links if necessary
141
+ self.unique_id_strategy = 'links'
142
+
143
+ #: Temporary folder to stash intermediate YOLO results.
144
+ #:
145
+ #: If this is None, we'll create a folder in system temp space.
146
+ self.yolo_results_folder = None
147
+
148
+ #: Should we remove the symlink folder when we're done?
149
+ self.remove_symlink_folder = True
150
+
151
+ #: Should we remove the intermediate results folder when we're done?
152
+ self.remove_yolo_results_folder = True
153
+
154
+ #: These are deliberately offset from the standard MD categories; YOLOv5
155
+ #: needs categories IDs to start at 0.
156
+ #:
157
+ #: This can also be a string that points to a YOLO dataset.yaml file.
158
+ self.yolo_category_id_to_name = {0:'animal',1:'person',2:'vehicle'}
159
+
160
+ #: What should we do if the output file already exists?
161
+ #:
162
+ #: Can be 'error', 'skip', or 'overwrite'.
163
+ self.overwrite_handling = 'skip'
164
+
165
+ #: If True, we'll do a dry run that lets you preview the YOLO val command, without
166
+ #: actually running it.
167
+ self.preview_yolo_command_only = False
168
+
169
+ #: By default, if any errors occur while we're copying images or creating symlinks, it's
170
+ #: game over. If this is True, those errors become warnings, and we plow ahead.
171
+ self.treat_copy_failures_as_warnings = False
172
+
173
+ #: Save YOLO console output
174
+ self.save_yolo_debug_output = False
175
+
176
+ #: Whether to search for images recursively within [input_folder]
177
+ #:
178
+ #: Ignored if a list of files is provided.
179
+ self.recursive = True
180
+
181
+ #: Maximum number of images to run in a single chunk
182
+ self.checkpoint_frequency = None
183
+
184
+ # ...def __init__()
92
185
 
93
- #: Currently 'yolov5' and 'ultralytics' are supported, and really these are proxies for
94
- #: "the yolov5 repo" and "the ultralytics repo".
95
- model_type = 'yolov5'
186
+ # ...YoloInferenceOptions()
96
187
 
97
- #: Image size to use; this is a single int, which in ultralytics's terminology means
98
- #: "scale the long side of the image to this size, and preserve aspect ratio".
99
- image_size = default_image_size_with_augmentation
100
-
101
- #: Detections below this threshold will not be included in the output file
102
- conf_thres = '0.001'
103
-
104
- #: Batch size... has no impact on results, but may create memory issues if you set
105
- #: this to large values
106
- batch_size = 1
107
-
108
- #: Device string: typically '0' for GPU 0, '1' for GPU 1, etc., or 'cpu'
109
- device_string = '0'
110
-
111
- #: Should we enable test-time augmentation?
112
- augment = True
113
-
114
- #: Should we enable half-precision inference?
115
- half_precision_enabled = None
116
-
117
- #: Where should we stash the temporary symlinks used to give unique identifiers to image files?
118
- #:
119
- #: If this is None, we'll create a folder in system temp space.
120
- symlink_folder = None
121
-
122
- #: Should we use symlinks to give unique identifiers to image files (vs. copies)?
123
- use_symlinks = True
124
-
125
- #: Temporary folder to stash intermediate YOLO results.
126
- #:
127
- #: If this is None, we'll create a folder in system temp space.
128
- yolo_results_folder = None
129
-
130
- #: Should we remove the symlink folder when we're done?
131
- remove_symlink_folder = True
132
-
133
- #: Should we remove the intermediate results folder when we're done?
134
- remove_yolo_results_folder = True
135
-
136
- #: These are deliberately offset from the standard MD categories; YOLOv5
137
- #: needs categories IDs to start at 0.
138
- #:
139
- #: This can also be a string that points to a YOLO dataset.yaml file.
140
- yolo_category_id_to_name = {0:'animal',1:'person',2:'vehicle'}
141
-
142
- #: What should we do if the output file already exists?
143
- #:
144
- #: Can be 'error', 'skip', or 'overwrite'.
145
- overwrite_handling = 'skip'
146
-
147
- #: If True, we'll do a dry run that lets you preview the YOLO val command, without
148
- #: actually running it.
149
- preview_yolo_command_only = False
150
-
151
- #: By default, if any errors occur while we're copying images or creating symlinks, it's
152
- #: game over. If this is True, those errors become warnings, and we plow ahead.
153
- treat_copy_failures_as_warnings = False
154
-
155
- #: Save YOLO console output
156
- save_yolo_debug_output = False
157
-
158
- #: Whether to search for images recursively within [input_folder]
159
- recursive = True
160
-
188
+
189
+ #%% Support functions
190
+
191
+ def _clean_up_temporary_folders(options,
192
+ symlink_folder,yolo_results_folder,
193
+ symlink_folder_is_temp_folder,yolo_folder_is_temp_folder):
194
+ """
195
+ Remove temporary symlink/results folders, unless the caller requested that we leave them in place.
196
+ """
161
197
 
162
- # ...YoloInferenceOptions()
198
+ if options.remove_symlink_folder:
199
+ shutil.rmtree(symlink_folder)
200
+ elif symlink_folder_is_temp_folder:
201
+ print('Warning: using temporary symlink folder {}, but not removing it'.format(
202
+ symlink_folder))
203
+
204
+ if options.remove_yolo_results_folder:
205
+ shutil.rmtree(yolo_results_folder)
206
+ elif yolo_folder_is_temp_folder:
207
+ print('Warning: using temporary YOLO results folder {}, but not removing it'.format(
208
+ yolo_results_folder))
163
209
 
164
210
 
165
211
  #%% Main function
@@ -173,9 +219,15 @@ def run_inference_with_yolo_val(options):
173
219
  options (YoloInferenceOptions): all the parameters used to control this process,
174
220
  including filenames; see YoloInferenceOptions for details
175
221
  """
176
-
222
+
177
223
  ##%% Input and path handling
178
224
 
225
+ default_options = YoloInferenceOptions()
226
+
227
+ for k in options.__dict__.keys():
228
+ if k not in default_options.__dict__:
229
+ print('Warning: unexpected variable {} in options object'.format(k))
230
+
179
231
  if options.model_type == 'yolov8':
180
232
 
181
233
  print('Warning: model type "yolov8" supplied, "ultralytics" is the preferred model type string for YOLOv8 models')
@@ -191,20 +243,24 @@ def run_inference_with_yolo_val(options):
191
243
  assert os.path.isdir(options.yolo_working_folder), \
192
244
  'Could not find working folder {}'.format(options.yolo_working_folder)
193
245
 
194
- assert os.path.isdir(options.input_folder) or os.path.isfile(options.input_folder), \
195
- 'Could not find input {}'.format(options.input_folder)
196
-
197
246
  if options.half_precision_enabled is not None:
198
247
  assert options.half_precision_enabled in (0,1), \
199
248
  'Invalid value {} for --half_precision_enabled (should be 0 or 1)'.format(
200
249
  options.half_precision_enabled)
201
-
250
+
202
251
  # If the model filename is a known model string (e.g. "MDv5A", download the model if necessary)
203
252
  model_filename = try_download_known_detector(options.model_filename)
204
253
 
205
254
  assert os.path.isfile(model_filename), \
206
255
  'Could not find model file {}'.format(model_filename)
207
256
 
257
+ assert (options.input_folder is not None) or (options.image_filename_list is not None), \
258
+ 'You must specify a folder and/or a file list'
259
+
260
+ if options.input_folder is not None:
261
+ assert os.path.isdir(options.input_folder), 'Could not find input folder {}'.format(
262
+ options.input_folder)
263
+
208
264
  if os.path.exists(options.output_file):
209
265
  if options.overwrite_handling == 'skip':
210
266
  print('Warning: output file {} exists, skipping'.format(options.output_file))
@@ -215,13 +271,17 @@ def run_inference_with_yolo_val(options):
215
271
  raise ValueError('Output file {} exists'.format(options.output_file))
216
272
  else:
217
273
  raise ValueError('Unknown output handling method {}'.format(options.overwrite_handling))
218
-
219
- os.makedirs(os.path.dirname(options.output_file),exist_ok=True)
220
274
 
275
+ os.makedirs(os.path.dirname(options.output_file),exist_ok=True)
221
276
 
277
+ if options.input_folder is not None:
278
+ options.input_folder = options.input_folder.replace('\\','/')
279
+
280
+
222
281
  ##%% Other input handling
223
282
 
224
283
  if isinstance(options.yolo_category_id_to_name,str):
284
+
225
285
  assert os.path.isfile(options.yolo_category_id_to_name)
226
286
  yolo_dataset_file = options.yolo_category_id_to_name
227
287
  options.yolo_category_id_to_name = \
@@ -265,61 +325,273 @@ def run_inference_with_yolo_val(options):
265
325
 
266
326
  ##%% Enumerate images
267
327
 
268
- if os.path.isdir(options.input_folder):
269
- image_files_absolute = path_utils.find_images(options.input_folder,recursive=options.recursive)
328
+ image_files_relative = None
329
+ image_files_absolute = None
330
+
331
+ if options.image_filename_list is None:
332
+ assert options.input_folder is not None and os.path.isdir(options.input_folder), \
333
+ 'Could not find input folder {}'.format(options.input_folder)
334
+ image_files_relative = path_utils.find_images(options.input_folder,
335
+ recursive=options.recursive,
336
+ return_relative_paths=True,
337
+ convert_slashes=True)
338
+ image_files_absolute = [os.path.join(options.input_folder,fn) for \
339
+ fn in image_files_relative]
270
340
  else:
271
- assert os.path.isfile(options.input_folder)
272
- with open(options.input_folder,'r') as f:
273
- image_files_absolute = json.load(f)
274
- assert isinstance(image_files_absolute,list)
275
- for fn in image_files_absolute:
276
- assert os.path.isfile(fn), 'Could not find image file {}'.format(fn)
341
+
342
+ if is_iterable(options.image_filename_list):
343
+
344
+ image_files_relative = options.image_filename_list
345
+
346
+ else:
347
+ assert isinstance(options.image_filename_list,str), \
348
+ 'Unrecognized image filename list object type: {}'.format(options.image_filename_list)
349
+ assert os.path.isfile(options.image_filename_list), \
350
+ 'Could not find image filename list file: {}'.format(options.image_filename_list)
351
+ ext = os.path.splitext(options.image_filename_list).lower()
352
+ assert ext in ('.json','.txt'), \
353
+ 'Unrecognized image filename list file extension: {}'.format(options.image_filename_list)
354
+ if ext == '.json':
355
+ with open(options.image_filename_list,'r') as f:
356
+ image_files_relative = json.load(f)
357
+ assert is_iterable(image_files_relative)
358
+ else:
359
+ assert ext == '.txt'
360
+ with open(options.image_filename_list,'r') as f:
361
+ image_files_relative = f.readlines()
362
+ image_files_relative = [s.strip() for s in image_files_relative]
363
+
364
+ # ...whether the image filename list was supplied as list vs. a filename
365
+
366
+ if options.input_folder is None:
367
+ image_files_absolute = image_files_relative
368
+ else:
369
+ # The list should be relative filenames
370
+ for fn in image_files_relative:
371
+ assert not path_is_abs(fn), \
372
+ 'When providing a folder and a list, paths in the list should be relative'
373
+
374
+ image_files_absolute = \
375
+ [os.path.join(options.input_folder,fn) for fn in image_files_relative]
376
+ for fn in image_files_absolute:
377
+ assert os.path.isfile(fn), 'Could not find image file {}'.format(fn)
378
+
379
+ # ...whether the caller supplied a list of filenames
380
+
381
+ image_files_absolute = [fn.replace('\\','/') for fn in image_files_absolute]
382
+ del image_files_relative
383
+
384
+
385
+ ##%% Recurse if necessary to handle checkpoints
386
+
387
+ if options.checkpoint_frequency is not None and options.checkpoint_frequency > 0:
388
+
389
+ chunks = split_list_into_fixed_size_chunks(image_files_absolute,options.checkpoint_frequency)
390
+
391
+ chunk_output_files = []
392
+
393
+ # i_chunk = 0; chunk_files_abs = chunks[i_chunk]
394
+ for i_chunk,chunk_files_abs in enumerate(chunks):
395
+
396
+ print('Processing {} images from chunk {} of {}'.format(
397
+ len(chunk_files_abs),i_chunk,len(chunks)))
398
+
399
+ chunk_options = copy.deepcopy(options)
400
+
401
+ # Run each chunk without checkpointing
402
+ chunk_options.checkpoint_frequency = None
403
+
404
+ if options.input_folder is not None:
405
+ chunk_files_relative = \
406
+ [os.path.relpath(fn,options.input_folder) for fn in chunk_files_abs]
407
+ chunk_options.image_filename_list = chunk_files_relative
408
+ else:
409
+ chunk_options.image_filename_list = chunk_files_abs
410
+
411
+ chunk_options.image_filename_list = \
412
+ [fn.replace('\\','/') for fn in chunk_options.image_filename_list]
413
+
414
+ chunk_string = 'chunk_{}'.format(str(i_chunk).zfill(5))
415
+ chunk_options.yolo_results_folder = yolo_results_folder + '_' + chunk_string
416
+ chunk_options.symlink_folder = symlink_folder + '_' + chunk_string
417
+
418
+ # Put the output file in the parent job's scratch folder
419
+ chunk_output_file = os.path.join(yolo_results_folder,chunk_string + '_results_md_format.json')
420
+ chunk_output_files.append(chunk_output_file)
421
+ chunk_options.output_file = chunk_output_file
422
+
423
+ if os.path.isfile(chunk_output_file):
424
+
425
+ print('Chunk output file {} exists, checking completeness'.format(chunk_output_file))
426
+
427
+ with open(chunk_output_file,'r') as f:
428
+ chunk_results = json.load(f)
429
+ images_in_this_chunk_results_file = [im['file'] for im in chunk_results['images']]
430
+ assert len(images_in_this_chunk_results_file) == len(chunk_options.image_filename_list), \
431
+ 'Expected {} images in chunk results file {}, found {}, possibly this is left over from a previous job?'.format(
432
+ len(chunk_options.image_filename_list),chunk_output_file,
433
+ len(images_in_this_chunk_results_file))
434
+ for fn in images_in_this_chunk_results_file:
435
+ assert fn in chunk_options.image_filename_list, \
436
+ 'Unexpected image {} in chunk results file {}, possibly this is left over from a previous job?'.format(
437
+ fn,chunk_output_file)
438
+
439
+ print('Chunk output file {} exists and is complete, skipping this chunk'.format(
440
+ chunk_output_file))
441
+
442
+ # ...if the outptut file exists
443
+
444
+ else:
445
+
446
+ run_inference_with_yolo_val(chunk_options)
447
+
448
+ # ...if we do/don't have to run this chunk
449
+
450
+ assert os.path.isfile(chunk_options.output_file)
451
+
452
+ # ...for each chunk
277
453
 
454
+ # Merge
455
+ _ = combine_api_output_files(input_files=chunk_output_files,
456
+ output_file=options.output_file,
457
+ require_uniqueness=True,
458
+ verbose=True)
459
+
460
+ # Validate
461
+ with open(options.output_file,'r') as f:
462
+ combined_results = json.load(f)
463
+ assert len(combined_results['images']) == len(image_files_absolute), \
464
+ 'Expected {} images in merged output file, found {}'.format(
465
+ len(image_files_absolute),len(combined_results['images']))
466
+
467
+ # Clean up
468
+ _clean_up_temporary_folders(options,
469
+ symlink_folder,yolo_results_folder,
470
+ symlink_folder_is_temp_folder,yolo_folder_is_temp_folder)
471
+
472
+ return
278
473
 
279
- ##%% Create symlinks to give a unique ID to each image
474
+ # ...if we need to make recursive calls for file chunks
280
475
 
476
+
477
+ ##%% Create symlinks (or copy images) to give a unique ID to each image
478
+
479
+ # Maps YOLO image IDs (base filename without extension as it will appear in YOLO .json output)
480
+ # to the *original full path* for each image (not the symlink path).
281
481
  image_id_to_file = {}
482
+
483
+ # Maps YOLO image IDs (base filename without extension as it will appear in YOLO .json output)
484
+ # to errors, including errors that happen before we run the model at all (e.g. file access errors).
282
485
  image_id_to_error = {}
283
486
 
284
- if options.use_symlinks:
285
- print('Creating {} symlinks in {}'.format(len(image_files_absolute),symlink_folder_inner))
487
+ create_links = True
488
+
489
+ if options.unique_id_strategy == 'links':
490
+
491
+ create_links = True
492
+
286
493
  else:
287
- print('Symlinks disabled, copying {} images to {}'.format(len(image_files_absolute),symlink_folder_inner))
288
494
 
289
- # i_image = 0; image_fn = image_files_absolute[i_image]
290
- for i_image,image_fn in tqdm(enumerate(image_files_absolute),total=len(image_files_absolute)):
495
+ assert options.unique_id_strategy in ('auto','verify'), \
496
+ 'Unknown unique ID strategy {}'.format(options.unique_id_strategy)
497
+
498
+ image_ids_are_unique = True
291
499
 
292
- ext = os.path.splitext(image_fn)[1]
500
+ for i_image,image_fn in tqdm(enumerate(image_files_absolute),total=len(image_files_absolute)):
501
+
502
+ image_id = os.path.splitext(os.path.basename(image_fn))[0]
503
+
504
+ # Is this image ID unique?
505
+ if image_id in image_id_to_file:
506
+ if options.unique_id_strategy == 'verify':
507
+ raise ValueError('"verify" specified for image uniqueness, but ' +
508
+ 'image ID {} occurs more than once:\n\n{}\n\n{}'.format(
509
+ image_id,image_fn,image_id_to_file[image_id]))
510
+ else:
511
+ assert options.unique_id_strategy == 'auto'
512
+ image_ids_are_unique = False
513
+ image_id_to_file = {}
514
+ break
515
+
516
+ image_id_to_file[image_id] = image_fn
293
517
 
294
- image_id = str(i_image).zfill(10)
295
- image_id_to_file[image_id] = image_fn
296
- symlink_name = image_id + ext
297
- symlink_full_path = os.path.join(symlink_folder_inner,symlink_name)
518
+ # ...for each image
298
519
 
299
- try:
520
+ if image_ids_are_unique:
521
+
522
+ print('"{}" specified for image uniqueness and images are unique, skipping links'.format(
523
+ options.unique_id_strategy))
524
+ assert len(image_id_to_file) == len(image_files_absolute)
525
+ create_links = False
526
+
527
+ else:
528
+
529
+ assert options.unique_id_strategy == 'auto'
530
+ create_links = True
531
+ link_type = 'copies'
300
532
  if options.use_symlinks:
301
- path_utils.safe_create_link(image_fn,symlink_full_path)
302
- else:
303
- shutil.copyfile(image_fn,symlink_full_path)
304
- except Exception as e:
305
- error_string = str(e)
306
- image_id_to_error[image_id] = error_string
307
- # Always break if the user is trying to create symlinks on Windows without
308
- # permission, 100% of images will always fail in this case.
309
- if ('a required privilege is not held by the client' in error_string.lower()) or \
310
- (not options.treat_copy_failures_as_warnings):
311
- print('\nError copying/creating link for input file {}: {}'.format(
312
- image_fn,error_string))
313
-
314
- raise
315
- else:
316
- print('Warning: error copying/creating link for input file {}: {}'.format(
317
- image_fn,error_string))
318
- continue
533
+ link_type = 'links'
534
+ print('"auto" specified for image uniqueness and images are not unique, defaulting to {}'.format(
535
+ link_type))
536
+
537
+ # ...which unique ID strategy?
538
+
539
+ if create_links:
319
540
 
320
- # ...for each image
321
-
541
+ if options.use_symlinks:
542
+ print('Creating {} symlinks in {}'.format(len(image_files_absolute),symlink_folder_inner))
543
+ else:
544
+ print('Symlinks disabled, copying {} images to {}'.format(len(image_files_absolute),symlink_folder_inner))
545
+
546
+ link_full_paths = []
547
+
548
+ # i_image = 0; image_fn = image_files_absolute[i_image]
549
+ for i_image,image_fn in tqdm(enumerate(image_files_absolute),total=len(image_files_absolute)):
550
+
551
+ ext = os.path.splitext(image_fn)[1]
552
+
553
+ # YOLO .json output identifies images by the base filename without the extension
554
+ image_id = str(i_image).zfill(10)
555
+ image_id_to_file[image_id] = image_fn
556
+ symlink_name = image_id + ext
557
+ symlink_full_path = os.path.join(symlink_folder_inner,symlink_name)
558
+ link_full_paths.append(symlink_full_path)
559
+
560
+ try:
561
+
562
+ if options.use_symlinks:
563
+ path_utils.safe_create_link(image_fn,symlink_full_path)
564
+ else:
565
+ shutil.copyfile(image_fn,symlink_full_path)
566
+
567
+ except Exception as e:
568
+
569
+ error_string = str(e)
570
+ image_id_to_error[image_id] = error_string
571
+
572
+ # Always break if the user is trying to create symlinks on Windows without
573
+ # permission, 100% of images will always fail in this case.
574
+ if ('a required privilege is not held by the client' in error_string.lower()) or \
575
+ (not options.treat_copy_failures_as_warnings):
576
+
577
+ print('\nError copying/creating link for input file {}: {}'.format(
578
+ image_fn,error_string))
579
+
580
+ raise
581
+
582
+ else:
583
+
584
+ print('Warning: error copying/creating link for input file {}: {}'.format(
585
+ image_fn,error_string))
586
+ continue
587
+
588
+ # ...except
589
+
590
+ # ...for each image
591
+
592
+ # ...if we need to create links/copies
322
593
 
594
+
323
595
  ##%% Create the dataset file if necessary
324
596
 
325
597
  # This may have been passed in as a string, but at this point, we should have
@@ -330,14 +602,40 @@ def run_inference_with_yolo_val(options):
330
602
  category_ids = sorted(list(options.yolo_category_id_to_name.keys()))
331
603
  assert category_ids[0] == 0
332
604
  assert len(category_ids) == 1 + category_ids[-1]
333
-
605
+
334
606
  yolo_dataset_file = os.path.join(yolo_results_folder,'dataset.yaml')
607
+ yolo_image_list_file = os.path.join(yolo_results_folder,'images.txt')
608
+
335
609
 
610
+ with open(yolo_image_list_file,'w') as f:
611
+
612
+ if create_links:
613
+ image_files_to_write = link_full_paths
614
+ else:
615
+ image_files_to_write = image_files_absolute
616
+
617
+ for fn_abs in image_files_to_write:
618
+ # At least in YOLOv5 val (need to verify for YOLOv8 val), filenames in this
619
+ # text file are treated as relative to the text file itself if they start with
620
+ # "./", otherwise they're treated as absolute paths. Since we don't want to put this
621
+ # text file in the image folder, we'll use absolute paths.
622
+ # fn_relative = os.path.relpath(fn_abs,options.input_folder)
623
+ # f.write(fn_relative + '\n')
624
+ f.write(fn_abs + '\n')
625
+
626
+ if create_links:
627
+ inference_folder = symlink_folder_inner
628
+ else:
629
+ # This doesn't matter, but it has to be a valid path
630
+ inference_folder = options.yolo_results_folder
631
+
336
632
  with open(yolo_dataset_file,'w') as f:
337
- f.write('path: {}\n'.format(symlink_folder_inner))
633
+
634
+ f.write('path: {}\n'.format(inference_folder))
635
+ # These need to be valid paths, even if you're not using them, and "." is always safe
338
636
  f.write('train: .\n')
339
637
  f.write('val: .\n')
340
- f.write('test: .\n')
638
+ f.write('test: {}\n'.format(yolo_image_list_file))
341
639
  f.write('\n')
342
640
  f.write('nc: {}\n'.format(len(options.yolo_category_id_to_name)))
343
641
  f.write('\n')
@@ -425,13 +723,15 @@ def run_inference_with_yolo_val(options):
425
723
  print('Warning: error removing YOLO results folder {}'.format(yolo_results_folder))
426
724
  pass
427
725
 
428
- sys.exit()
726
+ # sys.exit()
727
+ return
429
728
 
430
729
  execution_result = process_utils.execute_and_print(cmd,encoding='utf-8',verbose=True)
431
730
  assert execution_result['status'] == 0, 'Error running {}'.format(options.model_type)
432
731
  yolo_console_output = execution_result['output']
433
732
 
434
733
  if options.save_yolo_debug_output:
734
+
435
735
  with open(os.path.join(yolo_results_folder,'yolo_console_output.txt'),'w') as f:
436
736
  for s in yolo_console_output:
437
737
  f.write(s + '\n')
@@ -485,7 +785,7 @@ def run_inference_with_yolo_val(options):
485
785
  # image_file = yolo_read_failures[0]
486
786
  for image_file in yolo_read_failures:
487
787
  image_id = os.path.splitext(os.path.basename(image_file))[0]
488
- assert image_id in image_id_to_file
788
+ assert image_id in image_id_to_file, 'Unexpected image ID {}'.format(image_id)
489
789
  if image_id not in image_id_to_error:
490
790
  image_id_to_error[image_id] = 'YOLO read failure'
491
791
 
@@ -499,23 +799,27 @@ def run_inference_with_yolo_val(options):
499
799
  assert len(json_files) == 1
500
800
  yolo_json_file = json_files[0]
501
801
 
802
+ # Map YOLO image IDs to paths
502
803
  image_id_to_relative_path = {}
503
804
  for image_id in image_id_to_file:
504
- fn = image_id_to_file[image_id]
505
- if os.path.isdir(options.input_folder):
506
- assert options.input_folder in fn
805
+ fn = image_id_to_file[image_id].replace('\\','/')
806
+ assert path_is_abs(fn)
807
+ if options.input_folder is not None:
808
+ assert os.path.isdir(options.input_folder)
809
+ assert options.input_folder in fn, 'Internal error: base folder {} not in file {}'.format(
810
+ options.input_folder,fn)
507
811
  relative_path = os.path.relpath(fn,options.input_folder)
508
812
  else:
509
- assert os.path.isfile(options.input_folder)
510
813
  # We'll use the absolute path as a relative path, and pass '/'
511
814
  # as the base path in this case.
512
815
  relative_path = fn
513
816
  image_id_to_relative_path[image_id] = relative_path
514
817
 
515
- if os.path.isdir(options.input_folder):
818
+ # Are we working with a base folder?
819
+ if options.input_folder is not None:
820
+ assert os.path.isdir(options.input_folder)
516
821
  image_base = options.input_folder
517
822
  else:
518
- assert os.path.isfile(options.input_folder)
519
823
  image_base = '/'
520
824
 
521
825
  yolo_output_to_md_output.yolo_json_output_to_md_output(
@@ -530,18 +834,10 @@ def run_inference_with_yolo_val(options):
530
834
 
531
835
  ##%% Clean up
532
836
 
533
- if options.remove_symlink_folder:
534
- shutil.rmtree(symlink_folder)
535
- elif symlink_folder_is_temp_folder:
536
- print('Warning: using temporary symlink folder {}, but not removing it'.format(
537
- symlink_folder))
837
+ _clean_up_temporary_folders(options,
838
+ symlink_folder,yolo_results_folder,
839
+ symlink_folder_is_temp_folder,yolo_folder_is_temp_folder)
538
840
 
539
- if options.remove_yolo_results_folder:
540
- shutil.rmtree(yolo_results_folder)
541
- elif yolo_folder_is_temp_folder:
542
- print('Warning: using temporary YOLO results folder {}, but not removing it'.format(
543
- yolo_results_folder))
544
-
545
841
  # ...def run_inference_with_yolo_val()
546
842
 
547
843
 
@@ -560,11 +856,14 @@ def main():
560
856
  help='model file name')
561
857
  parser.add_argument(
562
858
  'input_folder',type=str,
563
- help='folder on which to recursively run the model, or a .json list of filenames')
859
+ help='folder on which to recursively run the model')
564
860
  parser.add_argument(
565
861
  'output_file',type=str,
566
862
  help='.json file where output will be written')
567
863
 
864
+ parser.add_argument(
865
+ '--image_filename_list',type=str,default=None,
866
+ help='.json or .txt file containing a list of relative image filenames within [input_folder]')
568
867
  parser.add_argument(
569
868
  '--yolo_working_folder',type=str,default=None,
570
869
  help='folder in which to execute val.py (not necessary for YOLOv8 inference)')
@@ -584,24 +883,30 @@ def main():
584
883
  help='use half-precision-inference (1 or 0) (default is the underlying model\'s default, probably full for YOLOv8 and half for YOLOv5')
585
884
  parser.add_argument(
586
885
  '--device_string', default=options.device_string, type=str,
587
- help='CUDA device specifier, typically "0" or "1" for CUDA devices, "mps" for M1/M2 devices, or "cpu" (default {})'.format(options.device_string))
886
+ help='CUDA device specifier, typically "0" or "1" for CUDA devices, "mps" for M1/M2 devices, or "cpu" (default {})'.format(
887
+ options.device_string))
588
888
  parser.add_argument(
589
889
  '--overwrite_handling', default=options.overwrite_handling, type=str,
590
890
  help='action to take if the output file exists (skip, error, overwrite) (default {})'.format(
591
891
  options.overwrite_handling))
592
892
  parser.add_argument(
593
893
  '--yolo_dataset_file', default=None, type=str,
594
- help='YOLOv5 dataset.yml file from which we should load category information ' + \
894
+ help='YOLOv5 dataset.yaml file from which we should load category information ' + \
595
895
  '(otherwise defaults to MD categories)')
596
896
  parser.add_argument(
597
897
  '--model_type', default=options.model_type, type=str,
598
- help='Model type ("yolov5" or "ultralytics" ("yolov8" behaves the same as "ultralytics")) (default {})'.format(options.model_type))
898
+ help='model type ("yolov5" or "ultralytics" ("yolov8" behaves the same as "ultralytics")) (default {})'.format(
899
+ options.model_type))
599
900
 
901
+ parser.add_argument('--unique_id_strategy', default=options.unique_id_strategy, type=str,
902
+ help='how should we ensure that unique filenames are passed to the YOLO val script, ' + \
903
+ 'can be "verify", "auto", or "links", see options class docs for details (default {})'.format(
904
+ options.unique_id_strategy))
600
905
  parser.add_argument(
601
- '--symlink_folder', type=str,
906
+ '--symlink_folder', default=None, type=str,
602
907
  help='temporary folder for symlinks (defaults to a folder in the system temp dir)')
603
908
  parser.add_argument(
604
- '--yolo_results_folder', type=str,
909
+ '--yolo_results_folder', default=None, type=str,
605
910
  help='temporary folder for YOLO intermediate output (defaults to a folder in the system temp dir)')
606
911
  parser.add_argument(
607
912
  '--no_use_symlinks', action='store_true',
@@ -615,6 +920,10 @@ def main():
615
920
  parser.add_argument(
616
921
  '--save_yolo_debug_output', action='store_true',
617
922
  help='write yolo console output to a text file in the results folder, along with additional debug files')
923
+ parser.add_argument(
924
+ '--checkpoint_frequency', default=options.checkpoint_frequency, type=int,
925
+ help='break the job into chunks with no more than this many images (default {})'.format(
926
+ options.checkpoint_frequency))
618
927
 
619
928
  parser.add_argument(
620
929
  '--nonrecursive', action='store_true',
@@ -658,13 +967,20 @@ def main():
658
967
 
659
968
  if args.yolo_dataset_file is not None:
660
969
  options.yolo_category_id_to_name = args.yolo_dataset_file
970
+ del options.yolo_dataset_file
661
971
 
662
972
  options.recursive = (not options.nonrecursive)
663
973
  options.remove_symlink_folder = (not options.no_remove_symlink_folder)
664
974
  options.remove_yolo_results_folder = (not options.no_remove_yolo_results_folder)
665
975
  options.use_symlinks = (not options.no_use_symlinks)
666
976
  options.augment = (options.augment_enabled > 0)
667
-
977
+
978
+ del options.nonrecursive
979
+ del options.no_remove_symlink_folder
980
+ del options.no_remove_yolo_results_folder
981
+ del options.no_use_symlinks
982
+ del options.augment_enabled
983
+
668
984
  print(options.__dict__)
669
985
 
670
986
  run_inference_with_yolo_val(options)
@@ -673,58 +989,90 @@ if __name__ == '__main__':
673
989
  main()
674
990
 
675
991
 
676
- #%% Scrap
992
+ #%% Interactive driver
677
993
 
678
994
  if False:
679
995
 
680
- #%% Test driver (folder)
996
+ #%% Run inference on a folder
997
+
998
+ input_folder = r'g:\temp\tegu-val-mini'.replace('\\','/')
999
+ model_filename = r'g:\temp\usgs-tegus-yolov5x-231003-b8-img1280-e3002-best.pt'
1000
+ output_folder = r'g:\temp\tegu-scratch'
1001
+ yolo_working_folder = r'c:\git\yolov5-tegus'
1002
+ dataset_file = r'g:\temp\dataset.yaml'
1003
+
1004
+ # This only impacts the output file name, it's not passed to the inference functio
1005
+ job_name = 'yolo-inference-test'
681
1006
 
682
- project_name = 'KRU-test-corrupted'
683
- input_folder = os.path.expanduser(f'~/data/{project_name}')
684
- output_folder = os.path.expanduser(f'~/tmp/{project_name}')
685
- model_filename = os.path.expanduser('~/models/camera_traps/megadetector/md_v5.0.0/md_v5a.0.0.pt')
686
- yolo_working_folder = os.path.expanduser('~/git/yolov5')
687
1007
  model_name = os.path.splitext(os.path.basename(model_filename))[0]
688
1008
 
689
1009
  symlink_folder = os.path.join(output_folder,'symlinks')
690
1010
  yolo_results_folder = os.path.join(output_folder,'yolo_results')
691
1011
 
692
1012
  output_file = os.path.join(output_folder,'{}_{}-md_format.json'.format(
693
- project_name,model_name))
1013
+ job_name,model_name))
694
1014
 
695
1015
  options = YoloInferenceOptions()
696
1016
 
697
1017
  options.yolo_working_folder = yolo_working_folder
698
-
1018
+ options.input_folder = input_folder
699
1019
  options.output_file = output_file
700
1020
 
1021
+ pass_image_filename_list = False
1022
+ pass_relative_paths = True
1023
+
1024
+ if pass_image_filename_list:
1025
+ if pass_relative_paths:
1026
+ options.image_filename_list = [
1027
+ r"val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(05) 18AUG17 - 05SEP17 FTC AEG#MFDC1949_000065.JPG",
1028
+ r"val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(04) 27JUL17 - 18AUG17 FTC AEG#MFDC1902_000064.JPG"
1029
+ ]
1030
+ else:
1031
+ options.image_filename_list = [
1032
+ r"g:/temp/tegu-val-mini/val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(05) 18AUG17 - 05SEP17 FTC AEG#MFDC1949_000065.JPG",
1033
+ r"g:/temp/tegu-val-mini/val#american_cardinal#american_cardinal#CaCa#31W.01_C83#2017-2019#C90 and C83_31W.01#(04) 27JUL17 - 18AUG17 FTC AEG#MFDC1902_000064.JPG"
1034
+ ]
1035
+ else:
1036
+ options.image_filename_list = None
1037
+
1038
+ options.yolo_category_id_to_name = dataset_file
701
1039
  options.augment = False
702
1040
  options.conf_thres = '0.001'
703
1041
  options.batch_size = 1
704
1042
  options.device_string = '0'
1043
+ options.unique_id_strategy = 'auto'
1044
+ options.overwrite_handling = 'overwrite'
705
1045
 
706
1046
  if options.augment:
707
1047
  options.image_size = round(1280 * 1.3)
708
1048
  else:
709
1049
  options.image_size = 1280
710
1050
 
711
- options.input_folder = input_folder
712
1051
  options.model_filename = model_filename
713
1052
 
714
1053
  options.yolo_results_folder = yolo_results_folder # os.path.join(output_folder + 'yolo_results')
715
1054
  options.symlink_folder = symlink_folder # os.path.join(output_folder,'symlinks')
716
1055
  options.use_symlinks = False
717
1056
 
718
- options.remove_temporary_symlink_folder = False
719
- options.remove_yolo_results_file = False
1057
+ options.remove_symlink_folder = True
1058
+ options.remove_yolo_results_folder = True
1059
+
1060
+ options.checkpoint_frequency = 5
720
1061
 
721
1062
  cmd = f'python run_inference_with_yolov5_val.py {model_filename} {input_folder} ' + \
722
1063
  f'{output_file} --yolo_working_folder {yolo_working_folder} ' + \
723
1064
  f' --image_size {options.image_size} --conf_thres {options.conf_thres} ' + \
724
1065
  f' --batch_size {options.batch_size} ' + \
725
1066
  f' --symlink_folder {options.symlink_folder} --yolo_results_folder {options.yolo_results_folder} ' + \
726
- ' --no_remove_symlink_folder --no_remove_yolo_results_folder'
1067
+ f' --yolo_dataset_file {options.yolo_category_id_to_name} ' + \
1068
+ f' --unique_id_strategy {options.unique_id_strategy} --overwrite_handling {options.overwrite_handling}'
727
1069
 
1070
+ if not options.remove_symlink_folder:
1071
+ cmd += ' --no_remove_symlink_folder'
1072
+ if not options.remove_yolo_results_folder:
1073
+ cmd += ' --no_remove_yolo_results_folder'
1074
+ if options.checkpoint_frequency is not None:
1075
+ cmd += f' --checkpoint_frequency {options.checkpoint_frequency}'
728
1076
  if not options.use_symlinks:
729
1077
  cmd += ' --no_use_symlinks'
730
1078
  if not options.augment:
@@ -736,182 +1084,4 @@ if False:
736
1084
  run_inference_with_yolo_val(options)
737
1085
  else:
738
1086
  import clipboard; clipboard.copy(cmd)
739
-
740
-
741
- #%% Test driver (folder) (YOLOv8 model)
742
-
743
- project_name = 'yolov8-inference-test'
744
- input_folder = os.path.expanduser('~/data/usgs-kissel-training-resized/val')
745
- dataset_file = os.path.expanduser('~/data/usgs-kissel-training-yolo/dataset.yaml')
746
- output_folder = os.path.expanduser(f'~/tmp/{project_name}')
747
- model_filename = os.path.expanduser(
748
- '~/models/usgs-tegus/usgs-tegus-yolov8x-2023.10.25-b-1-img640-e200-best.pt')
749
- model_name = os.path.splitext(os.path.basename(model_filename))[0]
750
-
751
- assert os.path.isdir(input_folder)
752
- assert os.path.isfile(dataset_file)
753
- assert os.path.isfile(model_filename)
754
-
755
- symlink_folder = os.path.join(output_folder,'symlinks')
756
- yolo_results_folder = os.path.join(output_folder,'yolo_results')
757
-
758
- output_file = os.path.join(output_folder,'{}_{}-md_format.json'.format(
759
- project_name,model_name))
760
-
761
- options = YoloInferenceOptions()
762
-
763
- options.model_type = 'yolov8'
764
- options.yolo_category_id_to_name = dataset_file
765
- options.yolo_working_folder = None
766
- options.output_file = output_file
767
-
768
- options.augment = False
769
- options.conf_thres = '0.001'
770
- options.batch_size = 1
771
- options.device_string = '0'
772
1087
 
773
- if options.augment:
774
- options.image_size = round(640 * 1.3)
775
- else:
776
- options.image_size = 640
777
-
778
- options.input_folder = input_folder
779
- options.model_filename = model_filename
780
-
781
- options.yolo_results_folder = yolo_results_folder
782
- options.symlink_folder = symlink_folder
783
- options.use_symlinks = False
784
-
785
- options.remove_temporary_symlink_folder = False
786
- options.remove_yolo_results_file = False
787
-
788
- cmd = f'python run_inference_with_yolov5_val.py {model_filename} ' + \
789
- f'{input_folder} {output_file}' + \
790
- f' --image_size {options.image_size} --conf_thres {options.conf_thres} ' + \
791
- f' --batch_size {options.batch_size} --symlink_folder {options.symlink_folder} ' + \
792
- f'--yolo_results_folder {options.yolo_results_folder} --model_type {options.model_type}' + \
793
- f' --yolo_dataset_file {options.yolo_category_id_to_name}' + \
794
- ' --no_remove_symlink_folder --no_remove_yolo_results_folder'
795
-
796
- if not options.use_symlinks:
797
- cmd += ' --no_use_symlinks'
798
- if not options.augment:
799
- cmd += ' --augment_enabled 0'
800
-
801
- print(cmd)
802
- execute_in_python = False
803
- if execute_in_python:
804
- run_inference_with_yolo_val(options)
805
- else:
806
- import clipboard; clipboard.copy(cmd)
807
-
808
-
809
- #%% Preview results
810
-
811
- postprocessing_output_folder = os.path.join(output_folder,'yolo-val-preview')
812
- md_json_file = options.output_file
813
-
814
- from megadetector.postprocessing.postprocess_batch_results import \
815
- PostProcessingOptions, process_batch_results
816
-
817
- with open(md_json_file,'r') as f:
818
- d = json.load(f)
819
-
820
- base_task_name = os.path.basename(md_json_file)
821
-
822
- pp_options = PostProcessingOptions()
823
- pp_options.image_base_dir = input_folder
824
- pp_options.include_almost_detections = True
825
- pp_options.num_images_to_sample = None
826
- pp_options.confidence_threshold = 0.1
827
- pp_options.almost_detection_confidence_threshold = pp_options.confidence_threshold - 0.025
828
- pp_options.ground_truth_json_file = None
829
- pp_options.separate_detections_by_category = True
830
- # pp_options.sample_seed = 0
831
-
832
- pp_options.parallelize_rendering = True
833
- pp_options.parallelize_rendering_n_cores = 16
834
- pp_options.parallelize_rendering_with_threads = False
835
-
836
- output_base = os.path.join(postprocessing_output_folder,
837
- base_task_name + '_{:.3f}'.format(pp_options.confidence_threshold))
838
-
839
- os.makedirs(output_base, exist_ok=True)
840
- print('Processing to {}'.format(output_base))
841
-
842
- pp_options.md_results_file = md_json_file
843
- pp_options.output_dir = output_base
844
- ppresults = process_batch_results(pp_options)
845
- html_output_file = ppresults.output_html_file
846
-
847
- path_utils.open_file(html_output_file)
848
-
849
- # ...for each prediction file
850
-
851
-
852
- #%% Compare results
853
-
854
- import itertools
855
-
856
- from megadetector.postprocessing.compare_batch_results import \
857
- BatchComparisonOptions,PairwiseBatchComparisonOptions,compare_batch_results
858
-
859
- options = BatchComparisonOptions()
860
-
861
- organization_name = ''
862
- project_name = ''
863
-
864
- options.job_name = f'{organization_name}-comparison'
865
- options.output_folder = os.path.join(output_folder,'model_comparison')
866
- options.image_folder = input_folder
867
-
868
- options.pairwise_options = []
869
-
870
- filenames = [
871
- f'/home/user/tmp/{project_name}/{project_name}_md_v5a.0.0-md_format.json',
872
- f'/home/user/postprocessing/{organization_name}/{organization_name}-2023-04-06-v5a.0.0/combined_api_outputs/{organization_name}-2023-04-06-v5a.0.0_detections.json',
873
- f'/home/user/postprocessing/{organization_name}/{organization_name}-2023-04-06-v5b.0.0/combined_api_outputs/{organization_name}-2023-04-06-v5b.0.0_detections.json'
874
- ]
875
-
876
- descriptions = ['YOLO w/augment','MDv5a','MDv5b']
877
-
878
- if False:
879
- results = []
880
-
881
- for fn in filenames:
882
- with open(fn,'r') as f:
883
- d = json.load(f)
884
- results.append(d)
885
-
886
- detection_thresholds = [0.1,0.1,0.1]
887
-
888
- assert len(detection_thresholds) == len(filenames)
889
-
890
- rendering_thresholds = [(x*0.6666) for x in detection_thresholds]
891
-
892
- # Choose all pairwise combinations of the files in [filenames]
893
- for i, j in itertools.combinations(list(range(0,len(filenames))),2):
894
-
895
- pairwise_options = PairwiseBatchComparisonOptions()
896
-
897
- pairwise_options.results_filename_a = filenames[i]
898
- pairwise_options.results_filename_b = filenames[j]
899
-
900
- pairwise_options.results_description_a = descriptions[i]
901
- pairwise_options.results_description_b = descriptions[j]
902
-
903
- pairwise_options.rendering_confidence_threshold_a = rendering_thresholds[i]
904
- pairwise_options.rendering_confidence_threshold_b = rendering_thresholds[j]
905
-
906
- pairwise_options.detection_thresholds_a = {'animal':detection_thresholds[i],
907
- 'person':detection_thresholds[i],
908
- 'vehicle':detection_thresholds[i]}
909
- pairwise_options.detection_thresholds_b = {'animal':detection_thresholds[j],
910
- 'person':detection_thresholds[j],
911
- 'vehicle':detection_thresholds[j]}
912
- options.pairwise_options.append(pairwise_options)
913
-
914
- results = compare_batch_results(options)
915
-
916
- from megadetector.utils.path_utils import open_file
917
- open_file(results.html_output_file)