megadetector 10.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +702 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +528 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +187 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +663 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +876 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2159 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1494 -0
  81. megadetector/detection/run_tiled_inference.py +1038 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1752 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2077 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +213 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +224 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2832 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1759 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1940 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +479 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.13.dist-info/METADATA +134 -0
  144. megadetector-10.0.13.dist-info/RECORD +147 -0
  145. megadetector-10.0.13.dist-info/WHEEL +5 -0
  146. megadetector-10.0.13.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2072 @@
1
+ """
2
+
3
+ md_tests.py
4
+
5
+ A series of tests to validate basic repo functionality and verify either "correct"
6
+ inference behavior, or - when operating in environments other than the training
7
+ environment - acceptable deviation from the correct results.
8
+
9
+ This module should not depend on anything else in this repo outside of the
10
+ tests themselves, even if it means some duplicated code (e.g. for downloading files),
11
+ since much of what it tries to test is, e.g., imports.
12
+
13
+ "Correctness" is determined by agreement with a file that this script fetches from lila.science.
14
+
15
+ """
16
+
17
+ #%% Imports and constants
18
+
19
+ ### Only standard imports belong here, not MD-specific imports ###
20
+
21
+ import os
22
+ import json
23
+ import glob
24
+ import sys
25
+ import tempfile
26
+ import urllib
27
+ import urllib.request
28
+ import zipfile
29
+ import subprocess
30
+ import argparse
31
+ import inspect
32
+ import pytest
33
+
34
+ from copy import copy
35
+
36
+
37
+ #%% Classes
38
+
39
+ class MDTestOptions:
40
+ """
41
+ Options controlling test behavior
42
+ """
43
+
44
+ def __init__(self):
45
+
46
+ ## Required ##
47
+
48
+ #: Force CPU execution
49
+ self.disable_gpu = False
50
+
51
+ #: If GPU execution is requested, but a GPU is not available, should we error?
52
+ self.cpu_execution_is_error = False
53
+
54
+ #: Skip tests related to video processing
55
+ self.skip_video_tests = False
56
+
57
+ #: Skip tests related to still image processing
58
+ self.skip_image_tests = False
59
+
60
+ #: Skip tests launched via Python functions (as opposed to CLIs)
61
+ self.skip_python_tests = False
62
+
63
+ #: Skip module import tests
64
+ self.skip_import_tests = False
65
+
66
+ #: Skip CLI tests
67
+ self.skip_cli_tests = False
68
+
69
+ #: Skip download tests
70
+ self.skip_download_tests = False
71
+
72
+ #: Skip download tests for local URLs
73
+ self.skip_localhost_downloads = False
74
+
75
+ #: Skip force-CPU tests
76
+ self.skip_cpu_tests = False
77
+
78
+ #: Force a specific folder for temporary input/output
79
+ self.scratch_dir = None
80
+
81
+ #: Where does the test data live?
82
+ self.test_data_url = 'https://lila.science/public/md-test-package.zip'
83
+
84
+ #: Download test data even if it appears to have already been downloaded
85
+ self.force_data_download = False
86
+
87
+ #: Unzip test data even if it appears to have already been unzipped
88
+ self.force_data_unzip = False
89
+
90
+ #: By default, any unexpected behavior is an error; this forces most errors to
91
+ #: be treated as warnings.
92
+ self.warning_mode = False
93
+
94
+ #: How much deviation from the expected detection coordinates should we allow before
95
+ #: a disrepancy becomes an error?
96
+ self.max_coord_error = 0.001
97
+
98
+ #: How much deviation from the expected confidence values should we allow before
99
+ #: a disrepancy becomes an error?
100
+ self.max_conf_error = 0.005
101
+
102
+ #: Current working directory when running CLI tests
103
+ #:
104
+ #: If this is None, we won't mess with the inherited working directory.
105
+ self.cli_working_dir = None
106
+
107
+ #: YOLOv5 installation, only relevant if we're testing run_inference_with_yolov5_val.
108
+ #:
109
+ #: If this is None, we'll skip that test.
110
+ self.yolo_working_dir = None
111
+
112
+ #: Default model to use for testing (filename, URL, or well-known model string)
113
+ self.default_model = 'MDV5A'
114
+
115
+ #: For comparison tests, use a model that produces slightly different output
116
+ self.alt_model = 'MDV5B'
117
+
118
+ #: PYTHONPATH to set for CLI tests; if None, inherits from the parent process. Only
119
+ #: impacts the called functions, not the parent process.
120
+ self.cli_test_pythonpath = None
121
+
122
+ #: IoU threshold used to determine whether boxes in two detection files likely correspond
123
+ #: to the same box.
124
+ self.iou_threshold_for_file_comparison = 0.85
125
+
126
+ #: Detector options passed to PTDetector
127
+ self.detector_options = {'compatibility_mode':'classic-test'}
128
+
129
+ #: Used to drive a series of tests (typically with a low value for
130
+ #: python_test_depth) over a folder of models.
131
+ self.model_folder = None
132
+
133
+ #: Used as a knob to control the level of Python tests, typically used when
134
+ #: we want to run a series of simple tests on a small number of models, rather
135
+ #: than a deep test of tests on a small number of models. The gestalt is that
136
+ #: this is a range from 0-100.
137
+ self.python_test_depth = 100
138
+
139
+ #: Currently should be 'all' or 'utils-only'
140
+ self.test_mode = 'all'
141
+
142
+ #: Number of cores to use for multi-CPU inference tests
143
+ self.n_cores_for_multiprocessing_tests = 2
144
+
145
+ #: Batch size to use when testing batches of size > 1
146
+ self.alternative_batch_size = 3
147
+
148
+ # ...def __init__()
149
+
150
+ # ...class MDTestOptions()
151
+
152
+
153
+ #%% Support functions
154
+
155
+ def get_expected_results_filename(gpu_is_available,
156
+ model_string='mdv5a',
157
+ test_type='image',
158
+ augment=False,
159
+ options=None):
160
+ """
161
+ Expected results vary just a little across inference environments, particularly
162
+ between PT 1.x and 2.x, so when making sure things are working acceptably, we
163
+ compare to a reference file that matches the current environment.
164
+
165
+ This function gets the correct filename to compare to current results, depending
166
+ on whether a GPU is available.
167
+
168
+ Args:
169
+ gpu_is_available (bool): whether a GPU is available
170
+ model_string (str, optional): the model for which we're retrieving expected results
171
+ test_type (str, optional): the test type we're running ("image" or "video")
172
+ augment (bool, optional): whether we're running this test with image augmentation
173
+ options (MDTestOptions, optional): additional control flow options
174
+
175
+ Returns:
176
+ str: relative filename of the results file we should use (within the test
177
+ data zipfile)
178
+ """
179
+
180
+ if gpu_is_available:
181
+ hw_string = 'gpu'
182
+ else:
183
+ hw_string = 'cpu'
184
+ import torch
185
+ torch_version = str(torch.__version__)
186
+ if torch_version.startswith('1'):
187
+ assert torch_version == '1.10.1', 'Only tested against PT 1.10.1 and PT 2.x'
188
+ pt_string = 'pt1.10.1'
189
+ else:
190
+ assert torch_version.startswith('2'), 'Unknown torch version: {}'.format(torch_version)
191
+ pt_string = 'pt2.x'
192
+
193
+ # A hack for now to account for the fact that even with acceleration enabled and PT2
194
+ # installed, Apple silicon appears to provide the same results as CPU/PT1 inference
195
+ try:
196
+ import torch
197
+ m1_inference = torch.backends.mps.is_built and torch.backends.mps.is_available()
198
+ if m1_inference:
199
+ print('I appear to be running on M1/M2 hardware, using pt1/cpu as the reference results')
200
+ hw_string = 'cpu'
201
+ pt_string = 'pt1.10.1'
202
+ except Exception:
203
+ pass
204
+
205
+ aug_string = ''
206
+ if augment:
207
+ aug_string = 'augment-'
208
+
209
+ # We only have a single set of video results
210
+ if test_type == 'image':
211
+ fn = '{}-{}{}-{}-{}.json'.format(model_string,aug_string,test_type,hw_string,pt_string)
212
+ else:
213
+ fn = '{}-{}.json'.format(model_string,test_type)
214
+
215
+ if options is not None and options.scratch_dir is not None:
216
+ fn = os.path.join(options.scratch_dir,fn)
217
+
218
+ return fn
219
+
220
+
221
+ def download_test_data(options=None):
222
+ """
223
+ Downloads the test zipfile if necessary, unzips if necessary. Initializes
224
+ temporary fields in [options], particularly [options.scratch_dir].
225
+
226
+ Args:
227
+ options (MDTestOptions, optional): see MDTestOptions for details
228
+
229
+ Returns:
230
+ MDTestOptions: the same object passed in as input, or the options that
231
+ were used if [options] was supplied as None
232
+ """
233
+
234
+ if options is None:
235
+ options = MDTestOptions()
236
+
237
+ if options.scratch_dir is None:
238
+ tempdir_base = tempfile.gettempdir()
239
+ scratch_dir = os.path.join(tempdir_base,'md-tests')
240
+ else:
241
+ scratch_dir = options.scratch_dir
242
+
243
+ os.makedirs(scratch_dir,exist_ok=True)
244
+
245
+ # See whether we've already downloaded the data zipfile
246
+ download_zipfile = True
247
+ if not options.force_data_download:
248
+ local_zipfile = os.path.join(scratch_dir,options.test_data_url.split('/')[-1])
249
+ if os.path.isfile(local_zipfile):
250
+ url_info = urllib.request.urlopen(options.test_data_url).info()
251
+ remote_size = int(url_info['Content-Length'])
252
+ target_file_size = os.path.getsize(local_zipfile)
253
+ if remote_size == target_file_size:
254
+ download_zipfile = False
255
+
256
+ if download_zipfile:
257
+ print('Downloading test data zipfile')
258
+ urllib.request.urlretrieve(options.test_data_url, local_zipfile)
259
+ print('Finished download to {}'.format(local_zipfile))
260
+ else:
261
+ print('Bypassing test data zipfile download for {}'.format(local_zipfile))
262
+
263
+
264
+ ## Unzip data
265
+
266
+ zipf = zipfile.ZipFile(local_zipfile)
267
+ zip_contents = zipf.filelist
268
+
269
+ # file_info = zip_contents[1]
270
+ for file_info in zip_contents:
271
+
272
+ expected_size = file_info.file_size
273
+ if expected_size == 0:
274
+ continue
275
+ fn_relative = file_info.filename
276
+ target_file = os.path.join(scratch_dir,fn_relative)
277
+ unzip_file = True
278
+ if (not options.force_data_unzip) and os.path.isfile(target_file):
279
+ existing_file_size = os.path.getsize(target_file)
280
+ if existing_file_size == expected_size:
281
+ unzip_file = False
282
+ if unzip_file:
283
+ os.makedirs(os.path.dirname(target_file),exist_ok=True)
284
+ with open(target_file,'wb') as f:
285
+ f.write(zipf.read(fn_relative))
286
+
287
+ # ...for each file in the zipfile
288
+
289
+ try:
290
+ zipf.close()
291
+ except Exception as e:
292
+ print('Warning: error closing zipfile:\n{}'.format(str(e)))
293
+
294
+ # Warn if files are present that aren't expected
295
+ test_files = glob.glob(os.path.join(scratch_dir,'**/*'), recursive=True)
296
+ test_files = [os.path.relpath(fn,scratch_dir).replace('\\','/') for fn in test_files]
297
+ test_files_set = set(test_files)
298
+ expected_images_set = set(zipf.namelist())
299
+ for fn in expected_images_set:
300
+ if fn.endswith('/'):
301
+ continue
302
+ assert fn in test_files_set, 'File {} is missing from the test image folder'.format(fn)
303
+
304
+ # Populate the test options with test data information
305
+ options.scratch_dir = scratch_dir
306
+ options.all_test_files = test_files
307
+ options.test_images = [fn for fn in test_files if os.path.splitext(fn.lower())[1] in ('.jpg','.jpeg','.png')]
308
+ options.test_videos = [fn for fn in test_files if os.path.splitext(fn.lower())[1] in ('.mp4','.avi')]
309
+ options.test_videos = [fn for fn in options.test_videos if \
310
+ os.path.isfile(os.path.join(scratch_dir,fn))]
311
+
312
+ print('Finished unzipping and enumerating test data')
313
+
314
+ return options
315
+
316
+ # ...def download_test_data(...)
317
+
318
+
319
+ def is_gpu_available(verbose=True):
320
+ """
321
+ Checks whether a GPU (including M1/M2 MPS) is available, according to PyTorch. Returns
322
+ false if PT fails to import.
323
+
324
+ Args:
325
+ verbose (bool, optional): enable additional debug console output
326
+
327
+ Returns:
328
+ bool: whether a GPU is available
329
+ """
330
+
331
+ # Import torch inside this function, so we have a chance to set CUDA_VISIBLE_DEVICES
332
+ # before checking GPU availability.
333
+ try:
334
+ import torch
335
+ except Exception:
336
+ print('Warning: could not import torch')
337
+ return False
338
+
339
+ gpu_available = torch.cuda.is_available()
340
+
341
+ if gpu_available:
342
+ if verbose:
343
+ print('CUDA available: {}'.format(gpu_available))
344
+ device_ids = list(range(torch.cuda.device_count()))
345
+ if len(device_ids) > 1:
346
+ print('Found multiple devices: {}'.format(str(device_ids)))
347
+ else:
348
+ try:
349
+ gpu_available = torch.backends.mps.is_built and torch.backends.mps.is_available()
350
+ except AttributeError:
351
+ pass
352
+ if gpu_available:
353
+ print('Metal performance shaders available')
354
+
355
+ if not gpu_available:
356
+ print('No GPU available')
357
+
358
+ return gpu_available
359
+
360
+ # ...def is_gpu_available(...)
361
+
362
+
363
+ def output_files_are_identical(fn1,fn2,verbose=False):
364
+ """
365
+ Checks whether two MD-formatted output files are identical other than file sorting.
366
+
367
+ Args:
368
+ fn1 (str): the first filename to compare
369
+ fn2 (str): the second filename to compare
370
+ verbose (bool, optional): enable additional debug output
371
+
372
+ Returns:
373
+ bool: whether [fn1] and [fn2] are identical other than file sorting.
374
+ """
375
+
376
+ if verbose:
377
+ print('Comparing {} to {}'.format(fn1,fn2))
378
+
379
+ with open(fn1,'r') as f:
380
+ fn1_results = json.load(f)
381
+ fn1_results['images'] = \
382
+ sorted(fn1_results['images'], key=lambda d: d['file'])
383
+
384
+ with open(fn2,'r') as f:
385
+ fn2_results = json.load(f)
386
+ fn2_results['images'] = \
387
+ sorted(fn2_results['images'], key=lambda d: d['file'])
388
+
389
+ if len(fn1_results['images']) != len(fn2_results['images']):
390
+ if verbose:
391
+ print('{} images in {}, {} images in {}'.format(
392
+ len(fn1_results['images']),fn1,
393
+ len(fn2_results['images']),fn2))
394
+ return False
395
+
396
+ # i_image = 0; fn1_image = fn1_results['images'][i_image]
397
+ for i_image,fn1_image in enumerate(fn1_results['images']):
398
+
399
+ fn2_image = fn2_results['images'][i_image]
400
+
401
+ if fn1_image['file'] != fn2_image['file']:
402
+ if verbose:
403
+ print('Filename difference at {}: {} vs {} '.format(i_image,
404
+ fn1_image['file'],
405
+ fn2_image['file']))
406
+ return False
407
+
408
+ if fn1_image != fn2_image:
409
+ if verbose:
410
+ print('Image-level difference in image {}: {}'.format(i_image,fn1_image['file']))
411
+ return False
412
+
413
+ return True
414
+
415
+ # ...def output_files_are_identical(...)
416
+
417
+
418
+ def compare_detection_lists(detections_a,detections_b,options,bidirectional_comparison=True):
419
+ """
420
+ Compare two lists of MD-formatted detections, matching detections across lists using IoU
421
+ criteria. Generally used to compare detections for the same image when two sets of results
422
+ are expected to be more or less the same.
423
+
424
+ Args:
425
+ detections_a (list): the first set of detection dicts
426
+ detections_b (list): the second set of detection dicts
427
+ options (MDTestOptions): options that determine tolerable differences between files
428
+ bidirectional_comparison (bool, optional): reverse the arguments and make a recursive
429
+ call.
430
+
431
+ Returns:
432
+ dict: a dictionary with keys 'max_conf_error' and 'max_coord_error'.
433
+ """
434
+
435
+ from megadetector.utils.ct_utils import get_iou
436
+
437
+ max_conf_error = 0
438
+ max_coord_error = 0
439
+
440
+ max_conf_error_det_a = None
441
+ max_conf_error_det_b = None
442
+
443
+ max_coord_error_det_a = None
444
+ max_coord_error_det_b = None
445
+
446
+ # i_det_a = 0
447
+ for i_det_a in range(0,len(detections_a)):
448
+
449
+ det_a = detections_a[i_det_a]
450
+
451
+ # Don't process very-low-confidence boxes
452
+ # if det_a['conf'] < options.max_conf_error:
453
+ # continue
454
+
455
+ matching_det_b = None
456
+ highest_iou = -1
457
+
458
+ # Find the closest match in the detections_b list
459
+
460
+ # i_det_b = 0
461
+ for i_det_b in range(0,len(detections_b)):
462
+
463
+ det_b = detections_b[i_det_b]
464
+
465
+ if det_b['category'] != det_a['category']:
466
+ continue
467
+
468
+ iou = get_iou(det_a['bbox'],det_b['bbox'])
469
+
470
+ # Is this likely the same detection as det_a?
471
+ if iou >= options.iou_threshold_for_file_comparison and iou > highest_iou:
472
+ matching_det_b = det_b
473
+ highest_iou = iou
474
+
475
+ # If there are no detections in this category in detections_b
476
+ if matching_det_b is None:
477
+ if det_a['conf'] > max_conf_error:
478
+ max_conf_error = det_a['conf']
479
+ max_conf_error_det_a = det_a
480
+ # max_coord_error = 1.0
481
+ continue
482
+
483
+ assert det_a['category'] == matching_det_b['category']
484
+ conf_err = abs(det_a['conf'] - matching_det_b['conf'])
485
+ coord_differences = []
486
+ for i_coord in range(0,4):
487
+ coord_differences.append(abs(det_a['bbox'][i_coord]-\
488
+ matching_det_b['bbox'][i_coord]))
489
+ coord_err = max(coord_differences)
490
+
491
+ if conf_err >= max_conf_error:
492
+ max_conf_error = conf_err
493
+ max_conf_error_det_a = det_a
494
+ max_conf_error_det_b = det_b
495
+
496
+ if coord_err >= max_coord_error:
497
+ max_coord_error = coord_err
498
+ max_coord_error_det_a = det_a
499
+ max_coord_error_det_b = det_b
500
+
501
+ # ...for each detection in detections_a
502
+
503
+ if bidirectional_comparison:
504
+
505
+ reverse_comparison_results = compare_detection_lists(detections_b,
506
+ detections_a,
507
+ options,
508
+ bidirectional_comparison=False)
509
+
510
+ if reverse_comparison_results['max_conf_error'] > max_conf_error:
511
+ max_conf_error = reverse_comparison_results['max_conf_error']
512
+ max_conf_error_det_a = reverse_comparison_results['max_conf_error_det_b']
513
+ max_conf_error_det_b = reverse_comparison_results['max_conf_error_det_a']
514
+ if reverse_comparison_results['max_coord_error'] > max_coord_error:
515
+ max_coord_error = reverse_comparison_results['max_coord_error']
516
+ max_coord_error_det_a = reverse_comparison_results['max_coord_error_det_b']
517
+ max_coord_error_det_b = reverse_comparison_results['max_coord_error_det_a']
518
+
519
+ list_comparison_results = {}
520
+
521
+ list_comparison_results['max_coord_error'] = max_coord_error
522
+ list_comparison_results['max_coord_error_det_a'] = max_coord_error_det_a
523
+ list_comparison_results['max_coord_error_det_b'] = max_coord_error_det_b
524
+
525
+ list_comparison_results['max_conf_error'] = max_conf_error
526
+ list_comparison_results['max_conf_error_det_a'] = max_conf_error_det_a
527
+ list_comparison_results['max_conf_error_det_b'] = max_conf_error_det_b
528
+
529
+ return list_comparison_results
530
+
531
+ # ...def compare_detection_lists(...)
532
+
533
+
534
+ def compare_results(inference_output_file,
535
+ expected_results_file,
536
+ options,
537
+ expected_results_file_is_absolute=False):
538
+ """
539
+ Compare two MD-formatted output files that should be nearly identical, allowing small
540
+ changes (e.g. rounding differences). Generally used to compare a new results file to
541
+ an expected results file.
542
+
543
+ Args:
544
+ inference_output_file (str): the first results file to compare
545
+ expected_results_file (str): the second results file to compare
546
+ options (MDTestOptions): options that determine tolerable differences between files
547
+ expected_results_file_is_absolute (str, optional): by default,
548
+ expected_results_file is appended to options.scratch_dir; this option
549
+ specifies that it's an absolute path.
550
+
551
+ Returns:
552
+ dict: dictionary with keys 'max_coord_error' and 'max_conf_error'
553
+ """
554
+
555
+ # Read results
556
+ with open(inference_output_file,'r') as f:
557
+ results_from_file = json.load(f) # noqa
558
+
559
+ if not expected_results_file_is_absolute:
560
+ expected_results_file= os.path.join(options.scratch_dir,expected_results_file)
561
+
562
+ with open(expected_results_file,'r') as f:
563
+ expected_results = json.load(f)
564
+
565
+ filename_to_results = {im['file'].replace('\\','/'):im for im in results_from_file['images']}
566
+ filename_to_results_expected = {im['file'].replace('\\','/'):im for im in expected_results['images']}
567
+
568
+ assert len(filename_to_results) == len(filename_to_results_expected), \
569
+ 'Error: comparing expected file {} to actual file {}, expected {} files in results, found {}'.format(
570
+ expected_results_file,
571
+ inference_output_file,
572
+ len(filename_to_results_expected),
573
+ len(filename_to_results))
574
+
575
+ max_conf_error = -1
576
+ max_conf_error_file = None
577
+ max_conf_error_comparison_results = None
578
+
579
+ max_coord_error = -1
580
+ max_coord_error_file = None
581
+ max_coord_error_comparison_results = None
582
+
583
+ # fn = next(iter(filename_to_results.keys()))
584
+ for fn in filename_to_results.keys():
585
+
586
+ actual_image_results = filename_to_results[fn]
587
+ expected_image_results = filename_to_results_expected[fn]
588
+
589
+ if 'failure' in actual_image_results:
590
+ # We allow some variation in how failures are represented
591
+ assert 'failure' in expected_image_results and \
592
+ (
593
+ ('detections' not in actual_image_results) or \
594
+ (actual_image_results['detections'] is None)
595
+ ) and \
596
+ (
597
+ ('detections' not in expected_image_results) or \
598
+ (expected_image_results['detections'] is None)
599
+ )
600
+ continue
601
+ assert 'failure' not in expected_image_results
602
+
603
+ actual_detections = actual_image_results['detections']
604
+ expected_detections = expected_image_results['detections']
605
+
606
+ comparison_results_this_image = compare_detection_lists(
607
+ detections_a=actual_detections,
608
+ detections_b=expected_detections,
609
+ options=options,
610
+ bidirectional_comparison=True)
611
+
612
+ if comparison_results_this_image['max_conf_error'] > max_conf_error:
613
+ max_conf_error = comparison_results_this_image['max_conf_error']
614
+ max_conf_error_comparison_results = comparison_results_this_image
615
+ max_conf_error_file = fn
616
+
617
+ if comparison_results_this_image['max_coord_error'] > max_coord_error:
618
+ max_coord_error = comparison_results_this_image['max_coord_error']
619
+ max_coord_error_comparison_results = comparison_results_this_image
620
+ max_coord_error_file = fn
621
+
622
+ # ...for each image
623
+
624
+ if not options.warning_mode:
625
+
626
+ assert max_conf_error <= options.max_conf_error, \
627
+ 'Confidence error {} is greater than allowable ({}), on file:\n{} ({},{})'.format(
628
+ max_conf_error,options.max_conf_error,max_conf_error_file,
629
+ inference_output_file,expected_results_file)
630
+
631
+ assert max_coord_error <= options.max_coord_error, \
632
+ 'Coord error {} is greater than allowable ({}), on file:\n{} ({},{})'.format(
633
+ max_coord_error,options.max_coord_error,max_coord_error_file,
634
+ inference_output_file,expected_results_file)
635
+
636
+ print('Max conf error: {} (file {})'.format(
637
+ max_conf_error,max_conf_error_file))
638
+ print('Max coord error: {} (file {})'.format(
639
+ max_coord_error,max_coord_error_file))
640
+
641
+ comparison_results = {}
642
+ comparison_results['max_conf_error'] = max_conf_error
643
+ comparison_results['max_conf_error_comparison_results'] = max_conf_error_comparison_results
644
+ comparison_results['max_coord_error'] = max_coord_error
645
+ comparison_results['max_coord_error_comparison_results'] = max_coord_error_comparison_results
646
+
647
+ return comparison_results
648
+
649
+ # ...def compare_results(...)
650
+
651
+
652
+ def _args_to_object(args, obj):
653
+ """
654
+ Copies all fields from a Namespace (typically the output from parse_args) to an
655
+ object. Skips fields starting with _. Does not check existence in the target
656
+ object.
657
+
658
+ Args:
659
+ args (argparse.Namespace): the namespace to convert to an object
660
+ obj (object): object whose whose attributes will be updated
661
+
662
+ Returns:
663
+ object: the modified object (modified in place, but also returned)
664
+ """
665
+
666
+ for n, v in inspect.getmembers(args):
667
+ if not n.startswith('_'):
668
+ setattr(obj, n, v)
669
+
670
+ return obj
671
+
672
+
673
+ #%% CLI functions
674
+
675
+ # These are copied from process_utils.py to avoid imports outside of the test
676
+ # functions.
677
+
678
+ os.environ["PYTHONUNBUFFERED"] = "1"
679
+
680
+ # In some circumstances I want to allow CLI tests to "succeed" even when they return
681
+ # specific non-zero output values.
682
+ allowable_process_return_codes = [0]
683
+
684
+ def execute(cmd):
685
+ """
686
+ Runs [cmd] (a single string) in a shell, yielding each line of output to the caller.
687
+
688
+ Args:
689
+ cmd (str): command to run
690
+
691
+ Returns:
692
+ int: the command's return code, always zero, otherwise a CalledProcessError is raised
693
+ """
694
+
695
+ # https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
696
+ popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
697
+ shell=True, universal_newlines=True)
698
+ for stdout_line in iter(popen.stdout.readline, ""):
699
+ yield stdout_line
700
+ popen.stdout.close()
701
+ return_code = popen.wait()
702
+ if return_code not in allowable_process_return_codes:
703
+ raise subprocess.CalledProcessError(return_code, cmd)
704
+ return return_code
705
+
706
+
707
+ def execute_and_print(cmd,print_output=True,catch_exceptions=False,echo_command=True):
708
+ """
709
+ Runs [cmd] (a single string) in a shell, capturing (and optionally printing) output.
710
+
711
+ Args:
712
+ cmd (str): command to run
713
+ print_output (bool, optional): whether to print output from [cmd]
714
+ catch_exceptions (bool, optional): whether to catch exceptions, rather than raising
715
+ them
716
+ echo_command (bool, optional): whether to print [cmd] to stdout prior to execution
717
+
718
+ Returns:
719
+ dict: a dictionary with fields "status" (the process return code) and "output"
720
+ (the content of stdout)
721
+ """
722
+
723
+ if echo_command:
724
+ print('Running command:\n{}\n'.format(cmd))
725
+
726
+ to_return = {'status':'unknown','output':''}
727
+ output = []
728
+ try:
729
+ for s in execute(cmd):
730
+ output.append(s)
731
+ if print_output:
732
+ print(s,end='',flush=True)
733
+ to_return['status'] = 0
734
+ except subprocess.CalledProcessError as cpe:
735
+ if not catch_exceptions:
736
+ raise
737
+ print('execute_and_print caught error: {}'.format(cpe.output))
738
+ to_return['status'] = cpe.returncode
739
+ to_return['output'] = output
740
+
741
+ return to_return
742
+
743
+
744
+ #%% Python tests
745
+
746
+ @pytest.mark.skip(reason='Called one for each module')
747
+ def test_package_imports(package_name,exceptions=None,verbose=True):
748
+ """
749
+ Imports all modules in [package_name]
750
+
751
+ Args:
752
+ package_name (str): the package name to test
753
+ exceptions (list, optional): exclude any modules that contain any of these strings
754
+ verbose (bool, optional): enable additional debug output
755
+ """
756
+ import importlib
757
+ import pkgutil
758
+
759
+ package = importlib.import_module(package_name)
760
+ package_path = package.__path__
761
+ imported_modules = []
762
+
763
+ if exceptions is None:
764
+ exceptions = []
765
+
766
+ for _, modname, _ in pkgutil.walk_packages(package_path, package_name + '.'):
767
+
768
+ skip_module = False
769
+ for s in exceptions:
770
+ if s in modname:
771
+ skip_module = True
772
+ break
773
+ if skip_module:
774
+ continue
775
+
776
+ if verbose:
777
+ print('Testing import: {}'.format(modname))
778
+
779
+ try:
780
+ # Attempt to import each module
781
+ _ = importlib.import_module(modname)
782
+ imported_modules.append(modname)
783
+ except ImportError as e:
784
+ print(f"Failed to import module {modname}: {e}")
785
+ raise
786
+
787
+
788
+ def run_python_tests(options):
789
+ """
790
+ Runs Python-based (as opposed to CLI-based) package tests.
791
+
792
+ Args:
793
+ options (MDTestOptions): see MDTestOptions for details
794
+ """
795
+
796
+ print('\n*** Starting module tests ***\n')
797
+
798
+
799
+ ## Prepare data
800
+
801
+ download_test_data(options)
802
+
803
+
804
+ ## Import tests
805
+
806
+ if not options.skip_import_tests:
807
+
808
+ print('\n** Running package import tests **\n')
809
+ test_package_imports('megadetector.visualization')
810
+ test_package_imports('megadetector.postprocessing')
811
+ test_package_imports('megadetector.postprocessing.repeat_detection_elimination')
812
+ test_package_imports('megadetector.utils',exceptions=['md_tests'])
813
+ test_package_imports('megadetector.data_management',exceptions=['lila','ocr_tools'])
814
+
815
+
816
+ ## Return early if we're not running torch-related tests
817
+
818
+ if options.test_mode == 'utils-only':
819
+ return
820
+
821
+
822
+ ## Make sure our tests are doing what we think they're doing
823
+
824
+ from megadetector.detection import pytorch_detector
825
+ pytorch_detector.require_non_default_compatibility_mode = True
826
+
827
+
828
+ if not options.skip_image_tests:
829
+
830
+ from megadetector.utils import path_utils # noqa
831
+ image_folder = os.path.join(options.scratch_dir,'md-test-images')
832
+ assert os.path.isdir(image_folder), 'Test image folder {} is not available'.format(image_folder)
833
+ inference_output_file = os.path.join(options.scratch_dir,'folder_inference_output.json')
834
+ image_file_names = path_utils.find_images(image_folder,recursive=True)
835
+
836
+
837
+ ## Run inference on an image
838
+
839
+ print('\n** Running MD on a single image (module) **\n')
840
+
841
+ from megadetector.detection import run_detector
842
+ from megadetector.visualization import visualization_utils as vis_utils # noqa
843
+ image_fn = os.path.join(options.scratch_dir,options.test_images[0])
844
+ model = run_detector.load_detector(options.default_model,
845
+ detector_options=copy(options.detector_options))
846
+ pil_im = vis_utils.load_image(image_fn)
847
+ result = model.generate_detections_one_image(pil_im) # noqa
848
+
849
+ if options.python_test_depth <= 1:
850
+ return
851
+
852
+
853
+ ## Run inference on a folder
854
+
855
+ print('\n** Running MD on a folder of images (module) **\n')
856
+
857
+ from megadetector.detection.run_detector_batch import load_and_run_detector_batch,write_results_to_file
858
+
859
+ results = load_and_run_detector_batch(options.default_model,
860
+ image_file_names,
861
+ quiet=True,
862
+ detector_options=copy(options.detector_options))
863
+ _ = write_results_to_file(results,
864
+ inference_output_file,
865
+ relative_path_base=image_folder,
866
+ detector_file=options.default_model)
867
+
868
+ ## Verify results
869
+
870
+ # Verify format correctness
871
+ from megadetector.postprocessing.validate_batch_results import validate_batch_results #noqa
872
+ validate_batch_results(inference_output_file)
873
+
874
+ # Verify value correctness
875
+ expected_results_file = get_expected_results_filename(is_gpu_available(verbose=False),
876
+ options=options)
877
+ compare_results(inference_output_file,expected_results_file,options)
878
+
879
+
880
+ # Make note of this filename, we will use it again later
881
+ inference_output_file_standard_inference = inference_output_file
882
+
883
+ if options.python_test_depth <= 2:
884
+ return
885
+
886
+
887
+ ## Run again with a batch size > 1
888
+
889
+ print('\n** Running MD on a folder of images with batch size > 1 (module) **\n')
890
+
891
+ from megadetector.detection.run_detector_batch import load_and_run_detector_batch,write_results_to_file
892
+ from megadetector.utils.path_utils import insert_before_extension
893
+
894
+ inference_output_file_batch = insert_before_extension(inference_output_file,'batch')
895
+ from megadetector.detection import run_detector_batch
896
+ run_detector_batch.verbose = True
897
+ results = load_and_run_detector_batch(options.default_model,
898
+ image_file_names,
899
+ quiet=True,
900
+ batch_size=options.alternative_batch_size,
901
+ detector_options=copy(options.detector_options))
902
+ run_detector_batch.verbose = False
903
+ _ = write_results_to_file(results,
904
+ inference_output_file_batch,
905
+ relative_path_base=image_folder,
906
+ detector_file=options.default_model)
907
+
908
+ expected_results_file = get_expected_results_filename(is_gpu_available(verbose=False),
909
+ options=options)
910
+ compare_results(inference_output_file_batch,expected_results_file,options)
911
+
912
+ ## Run and verify again with augmentation enabled
913
+
914
+ print('\n** Running MD on images with augmentation (module) **\n')
915
+
916
+ inference_output_file_augmented = insert_before_extension(inference_output_file,'augmented')
917
+ results = load_and_run_detector_batch(options.default_model,
918
+ image_file_names,
919
+ quiet=True,
920
+ augment=True,
921
+ detector_options=copy(options.detector_options))
922
+ _ = write_results_to_file(results,
923
+ inference_output_file_augmented,
924
+ relative_path_base=image_folder,
925
+ detector_file=options.default_model)
926
+
927
+ expected_results_file_augmented = \
928
+ get_expected_results_filename(is_gpu_available(verbose=False),
929
+ augment=True,options=options)
930
+ compare_results(inference_output_file_augmented,expected_results_file_augmented,options)
931
+
932
+
933
+ ## Postprocess results
934
+
935
+ print('\n** Post-processing results (module) **\n')
936
+
937
+ from megadetector.postprocessing.postprocess_batch_results import \
938
+ PostProcessingOptions,process_batch_results
939
+ postprocessing_options = PostProcessingOptions()
940
+
941
+ postprocessing_options.md_results_file = inference_output_file
942
+ postprocessing_options.output_dir = os.path.join(options.scratch_dir,'postprocessing_output')
943
+ postprocessing_options.image_base_dir = image_folder
944
+
945
+ postprocessing_results = process_batch_results(postprocessing_options)
946
+ assert os.path.isfile(postprocessing_results.output_html_file), \
947
+ 'Postprocessing output file {} not found'.format(postprocessing_results.output_html_file)
948
+
949
+
950
+ ## Partial RDE test
951
+
952
+ print('\n** Testing RDE (module) **\n')
953
+
954
+ from megadetector.postprocessing.repeat_detection_elimination.repeat_detections_core import \
955
+ RepeatDetectionOptions, find_repeat_detections
956
+
957
+ rde_options = RepeatDetectionOptions()
958
+ rde_options.occurrenceThreshold = 2
959
+ rde_options.confidenceMin = 0.001
960
+ rde_options.outputBase = os.path.join(options.scratch_dir,'rde_working_dir')
961
+ rde_options.imageBase = image_folder
962
+ rde_output_file = inference_output_file.replace('.json','_filtered.json')
963
+ assert rde_output_file != inference_output_file
964
+ rde_results = find_repeat_detections(inference_output_file, rde_output_file, rde_options)
965
+ assert os.path.isfile(rde_results.filterFile),\
966
+ 'Could not find RDE output file {}'.format(rde_results.filterFile)
967
+
968
+
969
+ ## Run inference on a folder (with YOLOv5 val script)
970
+
971
+ if options.yolo_working_dir is None:
972
+
973
+ print('Skipping YOLO val inference tests, no YOLO folder supplied')
974
+
975
+ else:
976
+
977
+ print('\n** Running YOLO val inference test (module) **\n')
978
+
979
+ from megadetector.detection.run_inference_with_yolov5_val import \
980
+ YoloInferenceOptions, run_inference_with_yolo_val
981
+ from megadetector.utils.path_utils import insert_before_extension
982
+
983
+ inference_output_file_yolo_val = os.path.join(options.scratch_dir,'folder_inference_output_yolo_val.json')
984
+
985
+ yolo_inference_options = YoloInferenceOptions()
986
+ yolo_inference_options.input_folder = os.path.join(options.scratch_dir,'md-test-images')
987
+ yolo_inference_options.output_file = inference_output_file_yolo_val
988
+ yolo_inference_options.yolo_working_folder = options.yolo_working_dir
989
+ yolo_inference_options.model_filename = options.default_model
990
+ yolo_inference_options.augment = False
991
+ yolo_inference_options.overwrite_handling = 'overwrite'
992
+ from megadetector.detection.run_detector import DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
993
+ yolo_inference_options.conf_thres = DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
994
+
995
+ run_inference_with_yolo_val(yolo_inference_options)
996
+
997
+ ## Confirm this matches the standard inference path
998
+
999
+ if False:
1000
+ # TODO: compare_results() isn't quite ready for this yet
1001
+ compare_results(inference_output_file=inference_output_file_yolo_val,
1002
+ expected_results_file=inference_output_file_standard_inference,
1003
+ options=options)
1004
+
1005
+ # Run again, without symlinks this time
1006
+
1007
+ inference_output_file_yolo_val_no_links = insert_before_extension(inference_output_file_yolo_val,
1008
+ 'no-links')
1009
+ yolo_inference_options.output_file = inference_output_file_yolo_val_no_links
1010
+ yolo_inference_options.use_symlinks = False
1011
+ run_inference_with_yolo_val(yolo_inference_options)
1012
+
1013
+ # Run again, with chunked inference and symlinks
1014
+
1015
+ inference_output_file_yolo_val_checkpoints = insert_before_extension(inference_output_file_yolo_val,
1016
+ 'checkpoints')
1017
+ yolo_inference_options.output_file = inference_output_file_yolo_val_checkpoints
1018
+ yolo_inference_options.use_symlinks = True
1019
+ yolo_inference_options.checkpoint_frequency = 5
1020
+ run_inference_with_yolo_val(yolo_inference_options)
1021
+
1022
+ # Run again, with chunked inference and no symlinks
1023
+
1024
+ inference_output_file_yolo_val_checkpoints_no_links = \
1025
+ insert_before_extension(inference_output_file_yolo_val,'checkpoints-no-links')
1026
+ yolo_inference_options.output_file = inference_output_file_yolo_val_checkpoints_no_links
1027
+ yolo_inference_options.use_symlinks = False
1028
+ yolo_inference_options.checkpoint_frequency = 5
1029
+ run_inference_with_yolo_val(yolo_inference_options)
1030
+
1031
+ fn1 = inference_output_file_yolo_val
1032
+
1033
+ output_files_to_compare = [
1034
+ inference_output_file_yolo_val_no_links,
1035
+ inference_output_file_yolo_val_checkpoints,
1036
+ inference_output_file_yolo_val_checkpoints_no_links
1037
+ ]
1038
+
1039
+ for fn2 in output_files_to_compare:
1040
+ assert output_files_are_identical(fn1, fn2, verbose=True)
1041
+
1042
+ # ...if we need to run the YOLO val inference tests
1043
+
1044
+ # ...if we're not skipping image tests
1045
+
1046
+ if not options.skip_video_tests:
1047
+
1048
+ ## Video test (single video)
1049
+
1050
+ # This test just checks non-crashing-ness; we will test correctness in the next
1051
+ # test (which runs a folder of videos)
1052
+
1053
+ print('\n** Running MD on a single video (module) **\n')
1054
+
1055
+ from megadetector.detection.process_video import ProcessVideoOptions, process_videos
1056
+ from megadetector.utils.path_utils import insert_before_extension
1057
+
1058
+ video_options = ProcessVideoOptions()
1059
+ video_options.model_file = options.default_model
1060
+ video_options.input_video_file = os.path.join(options.scratch_dir,options.test_videos[0])
1061
+ video_options.output_json_file = os.path.join(options.scratch_dir,'single_video_output.json')
1062
+ video_options.frame_sample = 10
1063
+ video_options.detector_options = copy(options.detector_options)
1064
+
1065
+ _ = process_videos(video_options)
1066
+
1067
+ assert os.path.isfile(video_options.output_json_file), \
1068
+ 'Python video test failed to render output .json file'
1069
+
1070
+
1071
+ ## Video test (folder)
1072
+
1073
+ print('\n** Running MD on a folder of videos (module) **\n')
1074
+
1075
+ from megadetector.detection.process_video import ProcessVideoOptions, process_videos
1076
+ from megadetector.utils.path_utils import insert_before_extension
1077
+
1078
+ video_options = ProcessVideoOptions()
1079
+ video_options.model_file = options.default_model
1080
+ video_options.input_video_file = os.path.join(options.scratch_dir,
1081
+ os.path.dirname(options.test_videos[0]))
1082
+ video_options.output_json_file = os.path.join(options.scratch_dir,'video_folder_output.json')
1083
+ video_options.output_video_file = None
1084
+ video_options.recursive = True
1085
+ video_options.verbose = True
1086
+ video_options.json_confidence_threshold = 0.05
1087
+ video_options.time_sample = 2
1088
+ video_options.detector_options = copy(options.detector_options)
1089
+ _ = process_videos(video_options)
1090
+
1091
+ assert os.path.isfile(video_options.output_json_file), \
1092
+ 'Python video test failed to render output .json file'
1093
+
1094
+ ## Verify results
1095
+
1096
+ expected_results_file = \
1097
+ get_expected_results_filename(is_gpu_available(verbose=False),test_type='video',options=options)
1098
+ assert os.path.isfile(expected_results_file)
1099
+
1100
+ from copy import deepcopy
1101
+ options_loose = deepcopy(options)
1102
+ options_loose.max_conf_error = 0.05
1103
+ options_loose.max_coord_error = 0.01
1104
+
1105
+ compare_results(video_options.output_json_file,expected_results_file,options_loose)
1106
+
1107
+ # ...if we're not skipping video tests
1108
+
1109
+ print('\n*** Finished module tests ***\n')
1110
+
1111
+ # ...def run_python_tests(...)
1112
+
1113
+
1114
+ #%% Command-line tests
1115
+
1116
+ def run_cli_tests(options):
1117
+ """
1118
+ Runs CLI (as opposed to Python-based) package tests.
1119
+
1120
+ Args:
1121
+ options (MDTestOptions): see MDTestOptions for details
1122
+ """
1123
+
1124
+ print('\n*** Starting CLI tests ***\n')
1125
+
1126
+ ## Environment management
1127
+
1128
+ if options.cli_test_pythonpath is not None:
1129
+ os.environ['PYTHONPATH'] = options.cli_test_pythonpath
1130
+
1131
+
1132
+ ## chdir if necessary
1133
+
1134
+ if options.cli_working_dir is not None:
1135
+ os.chdir(options.cli_working_dir)
1136
+
1137
+
1138
+ ## Prepare data
1139
+
1140
+ download_test_data(options)
1141
+
1142
+
1143
+ ## Utility imports
1144
+
1145
+ from megadetector.utils.ct_utils import dict_to_kvp_list
1146
+ from megadetector.utils.path_utils import insert_before_extension
1147
+
1148
+
1149
+ ## Utility tests
1150
+
1151
+ # TODO: move postprocessing tests up to this point, using pre-generated .json results files
1152
+
1153
+
1154
+ ## Return early if we're not running torch-related tests
1155
+
1156
+ if options.test_mode == 'utils-only':
1157
+ print('utils-only tests finished, returning')
1158
+ return
1159
+
1160
+
1161
+ if not options.skip_image_tests:
1162
+
1163
+ ## Run inference on an image
1164
+
1165
+ print('\n** Running MD on a single image (CLI) **\n')
1166
+
1167
+ image_fn = os.path.join(options.scratch_dir,options.test_images[0])
1168
+ output_dir = os.path.join(options.scratch_dir,'single_image_test')
1169
+ if options.cli_working_dir is None:
1170
+ cmd = 'python -m megadetector.detection.run_detector'
1171
+ else:
1172
+ cmd = 'python megadetector/detection/run_detector.py'
1173
+ cmd += ' "{}" --image_file "{}" --output_dir "{}"'.format(
1174
+ options.default_model,image_fn,output_dir)
1175
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1176
+ cmd_results = execute_and_print(cmd)
1177
+
1178
+ if options.cpu_execution_is_error:
1179
+ gpu_available_via_cli = False
1180
+ for s in cmd_results['output']:
1181
+ if 'GPU available: True' in s:
1182
+ gpu_available_via_cli = True
1183
+ break
1184
+ if not gpu_available_via_cli:
1185
+ raise Exception('GPU execution is required, but not available')
1186
+
1187
+
1188
+ ## Make sure we can also pass an absolute path to a model file, instead of, e.g. "MDV5A"
1189
+
1190
+ print('\n** Running MD on a single image (CLI) (with symbolic model name) **\n')
1191
+
1192
+ from megadetector.detection.run_detector import try_download_known_detector
1193
+ model_file = try_download_known_detector(options.default_model,force_download=False,verbose=False)
1194
+ cmd = cmd.replace(options.default_model,model_file)
1195
+ cmd_results = execute_and_print(cmd)
1196
+
1197
+
1198
+ ## Run inference on a folder
1199
+
1200
+ print('\n** Running MD on a folder (CLI) **\n')
1201
+
1202
+ image_folder = os.path.join(options.scratch_dir,'md-test-images')
1203
+ assert os.path.isdir(image_folder), 'Test image folder {} is not available'.format(image_folder)
1204
+ inference_output_file = os.path.join(options.scratch_dir,'folder_inference_output.json')
1205
+ if options.cli_working_dir is None:
1206
+ cmd = 'python -m megadetector.detection.run_detector_batch'
1207
+ else:
1208
+ cmd = 'python megadetector/detection/run_detector_batch.py'
1209
+ cmd += ' "{}" "{}" "{}" --recursive'.format(
1210
+ options.default_model,image_folder,inference_output_file)
1211
+ cmd += ' --output_relative_filenames --quiet --include_image_size'
1212
+ cmd += ' --include_image_timestamp --include_exif_data'
1213
+
1214
+ base_cmd = cmd
1215
+
1216
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1217
+ cmd_results = execute_and_print(cmd)
1218
+
1219
+
1220
+ ## Run again with a batch size > 1
1221
+
1222
+ print('\n** Running MD on a folder (with a batch size > 1) (CLI) **\n')
1223
+
1224
+ batch_string = ' --batch_size {}'.format(options.alternative_batch_size)
1225
+ cmd = base_cmd + batch_string
1226
+ inference_output_file_batch = insert_before_extension(inference_output_file,'batch')
1227
+ cmd = cmd.replace(inference_output_file,inference_output_file_batch)
1228
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1229
+ cmd_results = execute_and_print(cmd)
1230
+
1231
+ # Use compare_results() here rather than output_files_are_identical(), because
1232
+ # batch inference may introduce very small differences. Override the default tolerance,
1233
+ # though, because these differences should be very small compared to, e.g., differences
1234
+ # across library versions.
1235
+ batch_options = copy(options)
1236
+ batch_options.max_coord_error = 0.01
1237
+ batch_options.max_conf_error = 0.01
1238
+ compare_results(inference_output_file,inference_output_file_batch,batch_options)
1239
+
1240
+
1241
+ ## Run again with the image queue enabled
1242
+
1243
+ print('\n** Running MD on a folder (with image queue but consumer-side preprocessing) (CLI) **\n')
1244
+
1245
+ cmd = base_cmd + ' --use_image_queue'
1246
+ inference_output_file_queue = insert_before_extension(inference_output_file,'queue')
1247
+ cmd = cmd.replace(inference_output_file,inference_output_file_queue)
1248
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1249
+ cmd_results = execute_and_print(cmd)
1250
+
1251
+ assert output_files_are_identical(fn1=inference_output_file,
1252
+ fn2=inference_output_file_queue,
1253
+ verbose=True)
1254
+
1255
+
1256
+ ## Run again with the image queue and worker-side preprocessing enabled
1257
+
1258
+ print('\n** Running MD on a folder (with image queue and worker-side preprocessing) (CLI) **\n')
1259
+
1260
+ cmd = base_cmd + ' --use_image_queue --preprocess_on_image_queue'
1261
+ inference_output_file_preprocess_queue = \
1262
+ insert_before_extension(inference_output_file,'preprocess_queue')
1263
+ cmd = cmd.replace(inference_output_file,inference_output_file_preprocess_queue)
1264
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1265
+ cmd_results = execute_and_print(cmd)
1266
+
1267
+ assert output_files_are_identical(fn1=inference_output_file,
1268
+ fn2=inference_output_file_preprocess_queue,
1269
+ verbose=True)
1270
+
1271
+
1272
+ ## Run again with the image queue but no worker-side preprocessing
1273
+
1274
+ print('\n** Running MD on a folder (with image queue but no worker-side preprocessing) (CLI) **\n')
1275
+
1276
+ cmd = base_cmd + ' --use_image_queue'
1277
+ inference_output_file_no_preprocess_queue = \
1278
+ insert_before_extension(inference_output_file,'no_preprocess_queue')
1279
+ cmd = cmd.replace(inference_output_file,inference_output_file_no_preprocess_queue)
1280
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1281
+ cmd_results = execute_and_print(cmd)
1282
+
1283
+ assert output_files_are_identical(fn1=inference_output_file,
1284
+ fn2=inference_output_file_no_preprocess_queue,
1285
+ verbose=True)
1286
+
1287
+
1288
+ ## Run again with the worker-side preprocessing and an alternative batch size
1289
+
1290
+ print('\n** Running MD on a folder (with worker-side preprocessing and batched inference) (CLI) **\n')
1291
+
1292
+ batch_string = ' --batch_size {}'.format(options.alternative_batch_size)
1293
+
1294
+ # I reduce the number of loader workers here to force batching to actually append; with a small
1295
+ # number of images and a few that are intentionally corrupt, with the default number of loader
1296
+ # workers we end up with batches that are mostly just one image.
1297
+ cmd = base_cmd + ' --use_image_queue --preprocess_on_image_queue --loader_workers 2' + batch_string
1298
+ inference_output_file_queue_batch = \
1299
+ insert_before_extension(inference_output_file,'preprocess_queue_batch')
1300
+ cmd = cmd.replace(inference_output_file,inference_output_file_queue_batch)
1301
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1302
+ cmd_results = execute_and_print(cmd)
1303
+
1304
+ compare_results(inference_output_file,inference_output_file_queue_batch,batch_options)
1305
+
1306
+
1307
+ ## Run again with checkpointing enabled
1308
+
1309
+ print('\n** Running MD on a folder (with checkpoints) (CLI) **\n')
1310
+
1311
+ checkpoint_string = ' --checkpoint_frequency 5'
1312
+ cmd = base_cmd + checkpoint_string
1313
+ inference_output_file_checkpoint = insert_before_extension(inference_output_file,'checkpoint')
1314
+ cmd = cmd.replace(inference_output_file,inference_output_file_checkpoint)
1315
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1316
+ cmd_results = execute_and_print(cmd)
1317
+
1318
+ assert output_files_are_identical(fn1=inference_output_file,
1319
+ fn2=inference_output_file_checkpoint,
1320
+ verbose=True)
1321
+
1322
+
1323
+ ## Run again with "modern" postprocessing, make sure the results are *not* the same as classic
1324
+
1325
+ print('\n** Running MD on a folder (with modern preprocessing) (CLI) **\n')
1326
+
1327
+ inference_output_file_modern = insert_before_extension(inference_output_file,'modern')
1328
+ cmd = base_cmd
1329
+ cmd = cmd.replace(inference_output_file,inference_output_file_modern)
1330
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list({'compatibility_mode':'modern'}))
1331
+ cmd_results = execute_and_print(cmd)
1332
+
1333
+ assert not output_files_are_identical(fn1=inference_output_file,
1334
+ fn2=inference_output_file_modern,
1335
+ verbose=True)
1336
+
1337
+
1338
+ ## Run again with "modern" postprocessing and worker-side preprocessing,
1339
+ ## make sure the results are the same as modern.
1340
+
1341
+ print('\n** Running MD on a folder (with worker-side modern preprocessing) (CLI) **\n')
1342
+
1343
+ inference_output_file_modern_worker_preprocessing = insert_before_extension(inference_output_file,'modern')
1344
+ cmd = base_cmd + ' --use_image_queue --preprocess_on_image_queue'
1345
+ cmd = cmd.replace(inference_output_file,inference_output_file_modern_worker_preprocessing)
1346
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list({'compatibility_mode':'modern'}))
1347
+ cmd_results = execute_and_print(cmd)
1348
+
1349
+ # This should not be the same as the "classic" results
1350
+ assert not output_files_are_identical(fn1=inference_output_file,
1351
+ fn2=inference_output_file_modern_worker_preprocessing,
1352
+ verbose=True)
1353
+
1354
+ # ...but it should be the same as the single-threaded "modern" results
1355
+ assert output_files_are_identical(fn1=inference_output_file_modern,
1356
+ fn2=inference_output_file_modern_worker_preprocessing,
1357
+ verbose=True)
1358
+
1359
+
1360
+ if not options.skip_cpu_tests:
1361
+
1362
+ ## Run again on multiple cores
1363
+
1364
+ # First run again on the CPU on a single thread if necessary, so we get a file that
1365
+ # *should* be identical to the multicore version.
1366
+ gpu_available = is_gpu_available(verbose=False)
1367
+
1368
+ cuda_visible_devices = None
1369
+ if 'CUDA_VISIBLE_DEVICES' in os.environ:
1370
+ cuda_visible_devices = os.environ['CUDA_VISIBLE_DEVICES']
1371
+ os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
1372
+
1373
+ # If we already ran on the CPU, no need to run again
1374
+ if not gpu_available:
1375
+
1376
+ inference_output_file_cpu = inference_output_file
1377
+
1378
+ else:
1379
+
1380
+ print('\n** Running MD on a folder (single CPU) (CLI) **\n')
1381
+
1382
+ inference_output_file_cpu = insert_before_extension(inference_output_file,'cpu')
1383
+ cmd = base_cmd
1384
+ cmd = cmd.replace(inference_output_file,inference_output_file_cpu)
1385
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1386
+ cmd_results = execute_and_print(cmd)
1387
+
1388
+ print('\n** Running MD on a folder (multiple CPUs) (CLI) **\n')
1389
+
1390
+ cpu_string = ' --ncores {}'.format(options.n_cores_for_multiprocessing_tests)
1391
+ cmd = base_cmd + cpu_string
1392
+ inference_output_file_cpu_multicore = insert_before_extension(inference_output_file,'multicore')
1393
+ cmd = cmd.replace(inference_output_file,inference_output_file_cpu_multicore)
1394
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1395
+ cmd_results = execute_and_print(cmd)
1396
+
1397
+ if cuda_visible_devices is not None:
1398
+ print('Restoring CUDA_VISIBLE_DEVICES')
1399
+ os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices
1400
+ else:
1401
+ del os.environ['CUDA_VISIBLE_DEVICES']
1402
+
1403
+ assert output_files_are_identical(fn1=inference_output_file_cpu,
1404
+ fn2=inference_output_file_cpu_multicore,
1405
+ verbose=True)
1406
+
1407
+ # ...if we're not skipping the force-cpu tests
1408
+
1409
+
1410
+ ## Postprocessing
1411
+
1412
+ print('\n** Testing post-processing (CLI) **\n')
1413
+
1414
+ postprocessing_output_dir = os.path.join(options.scratch_dir,'postprocessing_output_cli')
1415
+
1416
+ if options.cli_working_dir is None:
1417
+ cmd = 'python -m megadetector.postprocessing.postprocess_batch_results'
1418
+ else:
1419
+ cmd = 'python megadetector/postprocessing/postprocess_batch_results.py'
1420
+ cmd += ' "{}" "{}"'.format(
1421
+ inference_output_file,postprocessing_output_dir)
1422
+ cmd += ' --image_base_dir "{}"'.format(image_folder)
1423
+ cmd_results = execute_and_print(cmd)
1424
+
1425
+
1426
+ ## RDE
1427
+
1428
+ print('\n** Running RDE (CLI) **\n')
1429
+
1430
+ rde_output_dir = os.path.join(options.scratch_dir,'rde_output_cli')
1431
+
1432
+ if options.cli_working_dir is None:
1433
+ cmd = 'python -m megadetector.postprocessing.repeat_detection_elimination.find_repeat_detections'
1434
+ else:
1435
+ cmd = 'python megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py'
1436
+ cmd += ' "{}"'.format(inference_output_file)
1437
+ cmd += ' --imageBase "{}"'.format(image_folder)
1438
+ cmd += ' --outputBase "{}"'.format(rde_output_dir)
1439
+ cmd += ' --occurrenceThreshold 1' # Use an absurd number here to make sure we get some suspicious detections
1440
+ cmd_results = execute_and_print(cmd)
1441
+
1442
+ # Find the latest filtering folder
1443
+ filtering_output_dir = os.listdir(rde_output_dir)
1444
+ filtering_output_dir = [fn for fn in filtering_output_dir if fn.startswith('filtering_')]
1445
+ filtering_output_dir = [os.path.join(rde_output_dir,fn) for fn in filtering_output_dir]
1446
+ filtering_output_dir = [fn for fn in filtering_output_dir if os.path.isdir(fn)]
1447
+ filtering_output_dir = sorted(filtering_output_dir)[-1]
1448
+
1449
+ print('Using RDE filtering folder {}'.format(filtering_output_dir))
1450
+
1451
+ filtered_output_file = inference_output_file.replace('.json','_filtered.json')
1452
+
1453
+ if options.cli_working_dir is None:
1454
+ cmd = 'python -m megadetector.postprocessing.repeat_detection_elimination.remove_repeat_detections'
1455
+ else:
1456
+ cmd = 'python megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py'
1457
+ cmd += ' "{}" "{}" "{}"'.format(inference_output_file,filtered_output_file,filtering_output_dir)
1458
+ cmd_results = execute_and_print(cmd)
1459
+
1460
+ assert os.path.isfile(filtered_output_file), \
1461
+ 'Could not find RDE output file {}'.format(filtered_output_file)
1462
+
1463
+
1464
+ ## Run inference on a folder (tiled)
1465
+
1466
+ # This is a rather esoteric code path that I turn off when I'm testing some
1467
+ # features that it doesn't include yet, particularly compatibility mode
1468
+ # control.
1469
+ skip_tiling_tests = True
1470
+
1471
+ if skip_tiling_tests:
1472
+
1473
+ print('### DEBUG: skipping tiling tests ###')
1474
+
1475
+ else:
1476
+ print('\n** Running tiled inference (CLI) **\n')
1477
+
1478
+ image_folder = os.path.join(options.scratch_dir,'md-test-images')
1479
+ tiling_folder = os.path.join(options.scratch_dir,'tiling-folder')
1480
+ inference_output_file_tiled = os.path.join(options.scratch_dir,'folder_inference_output_tiled.json')
1481
+ if options.cli_working_dir is None:
1482
+ cmd = 'python -m megadetector.detection.run_tiled_inference'
1483
+ else:
1484
+ cmd = 'python megadetector/detection/run_tiled_inference.py'
1485
+ cmd += ' "{}" "{}" "{}" "{}"'.format(
1486
+ options.default_model,image_folder,tiling_folder,inference_output_file_tiled)
1487
+ cmd += ' --overwrite_handling overwrite'
1488
+ cmd_results = execute_and_print(cmd)
1489
+
1490
+ with open(inference_output_file_tiled,'r') as f:
1491
+ results_from_file = json.load(f) # noqa
1492
+
1493
+
1494
+ ## Run inference on a folder (augmented, w/YOLOv5 val script)
1495
+
1496
+ if options.yolo_working_dir is None:
1497
+
1498
+ print('Bypassing YOLOv5 val tests, no yolo folder supplied')
1499
+
1500
+ else:
1501
+
1502
+ print('\n** Running YOLOv5 val tests (CLI) **\n')
1503
+
1504
+ image_folder = os.path.join(options.scratch_dir,'md-test-images')
1505
+ yolo_results_folder = os.path.join(options.scratch_dir,'yolo-output-folder')
1506
+ yolo_symlink_folder = os.path.join(options.scratch_dir,'yolo-symlink_folder')
1507
+ inference_output_file_yolo_val = os.path.join(options.scratch_dir,'folder_inference_output_yolo_val.json')
1508
+ if options.cli_working_dir is None:
1509
+ cmd = 'python -m megadetector.detection.run_inference_with_yolov5_val'
1510
+ else:
1511
+ cmd = 'python megadetector/detection/run_inference_with_yolov5_val.py'
1512
+ cmd += ' "{}" "{}" "{}"'.format(
1513
+ options.default_model,image_folder,inference_output_file_yolo_val)
1514
+ cmd += ' --yolo_working_folder "{}"'.format(options.yolo_working_dir)
1515
+ cmd += ' --yolo_results_folder "{}"'.format(yolo_results_folder)
1516
+ cmd += ' --symlink_folder "{}"'.format(yolo_symlink_folder)
1517
+ cmd += ' --augment_enabled 1'
1518
+ # cmd += ' --no_use_symlinks'
1519
+ cmd += ' --overwrite_handling overwrite'
1520
+ cmd_results = execute_and_print(cmd)
1521
+
1522
+ # Run again with checkpointing, make sure the outputs are identical
1523
+ cmd += ' --checkpoint_frequency 5'
1524
+ inference_output_file_yolo_val_checkpoint = \
1525
+ os.path.join(options.scratch_dir,'folder_inference_output_yolo_val_checkpoint.json')
1526
+ assert inference_output_file_yolo_val_checkpoint != inference_output_file_yolo_val
1527
+ cmd = cmd.replace(inference_output_file_yolo_val,inference_output_file_yolo_val_checkpoint)
1528
+ cmd_results = execute_and_print(cmd)
1529
+
1530
+ assert output_files_are_identical(fn1=inference_output_file_yolo_val,
1531
+ fn2=inference_output_file_yolo_val_checkpoint,
1532
+ verbose=True)
1533
+
1534
+
1535
+ ## Run inference on a folder (with MDV5B, so we can do a comparison)
1536
+
1537
+ print('\n** Running MDv5b (CLI) **\n')
1538
+
1539
+ image_folder = os.path.join(options.scratch_dir,'md-test-images')
1540
+ inference_output_file_alt = os.path.join(options.scratch_dir,'folder_inference_output_alt.json')
1541
+ if options.cli_working_dir is None:
1542
+ cmd = 'python -m megadetector.detection.run_detector_batch'
1543
+ else:
1544
+ cmd = 'python megadetector/detection/run_detector_batch.py'
1545
+ cmd += ' "{}" "{}" "{}" --recursive'.format(
1546
+ options.alt_model,image_folder,inference_output_file_alt)
1547
+ cmd += ' --output_relative_filenames --quiet --include_image_size'
1548
+ cmd += ' --include_image_timestamp --include_exif_data'
1549
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1550
+ cmd_results = execute_and_print(cmd)
1551
+
1552
+ with open(inference_output_file_alt,'r') as f:
1553
+ results_from_file = json.load(f) # noqa
1554
+
1555
+
1556
+ ## Compare the two files
1557
+
1558
+ comparison_output_folder = os.path.join(options.scratch_dir,'results_comparison')
1559
+ image_folder = os.path.join(options.scratch_dir,'md-test-images')
1560
+ results_files_string = '"{}" "{}"'.format(
1561
+ inference_output_file,inference_output_file_alt)
1562
+ if options.cli_working_dir is None:
1563
+ cmd = 'python -m megadetector.postprocessing.compare_batch_results'
1564
+ else:
1565
+ cmd = 'python megadetector/postprocessing/compare_batch_results.py'
1566
+ cmd += ' "{}" "{}" {}'.format(comparison_output_folder,image_folder,results_files_string)
1567
+ cmd_results = execute_and_print(cmd)
1568
+
1569
+ assert cmd_results['status'] == 0, 'Error generating comparison HTML'
1570
+ assert os.path.isfile(os.path.join(comparison_output_folder,'index.html')), \
1571
+ 'Failed to generate comparison HTML'
1572
+
1573
+ # ...if we're not skipping image tests
1574
+
1575
+
1576
+ if not options.skip_video_tests:
1577
+
1578
+ ## Video test
1579
+
1580
+ print('\n** Testing video processing (CLI) **\n')
1581
+
1582
+ video_inference_output_file = os.path.join(options.scratch_dir,'video_folder_output_cli.json')
1583
+ if options.cli_working_dir is None:
1584
+ cmd = 'python -m megadetector.detection.process_video'
1585
+ else:
1586
+ cmd = 'python megadetector/detection/process_video.py'
1587
+
1588
+ cmd += ' "{}" "{}"'.format(options.default_model,options.scratch_dir)
1589
+ cmd += ' --output_json_file "{}"'.format(video_inference_output_file)
1590
+ cmd += ' --frame_sample 4'
1591
+ cmd += ' --verbose'
1592
+ cmd += ' --recursive'
1593
+ cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1594
+
1595
+ cmd_results = execute_and_print(cmd)
1596
+
1597
+ # ...if we're not skipping video tests
1598
+
1599
+ print('\n*** Finished CLI tests ***\n')
1600
+
1601
+ # ...def run_cli_tests(...)
1602
+
1603
+
1604
+ def run_download_tests(options):
1605
+ """
1606
+ Test automatic model downloads.
1607
+
1608
+ Args:
1609
+ options (MDTestOptions): see MDTestOptions for details
1610
+ """
1611
+
1612
+ if options.skip_download_tests or options.test_mode == 'utils-only':
1613
+ return
1614
+
1615
+ from megadetector.detection.run_detector import known_models, \
1616
+ try_download_known_detector, \
1617
+ get_detector_version_from_model_file, \
1618
+ model_string_to_model_version
1619
+
1620
+ # Make sure we can download models based on canonical version numbers,
1621
+ # e.g. "v5a.0.0"
1622
+ for model_name in known_models:
1623
+ url = known_models[model_name]['url']
1624
+ if ('localhost' in url) and options.skip_localhost_downloads:
1625
+ continue
1626
+ print('Testing download for known model {}'.format(model_name))
1627
+ fn = try_download_known_detector(model_name,
1628
+ force_download=False,
1629
+ verbose=False)
1630
+ version_string = get_detector_version_from_model_file(fn, verbose=False)
1631
+ # Make sure this is the same version we asked for, modulo the MDv5 re-releases
1632
+ assert (version_string.replace('.0.1','.0.0') == model_name.replace('.0.1','.0.0'))
1633
+
1634
+ # Make sure we can download models based on short names, e.g. "MDV5A"
1635
+ for model_name in model_string_to_model_version:
1636
+ model_version = model_string_to_model_version[model_name]
1637
+ assert model_version in known_models
1638
+ url = known_models[model_version]['url']
1639
+ if 'localhost' in url:
1640
+ continue
1641
+ print('Testing download for model short name {}'.format(model_name))
1642
+ fn = try_download_known_detector(model_name,
1643
+ force_download=False,
1644
+ verbose=False)
1645
+ assert fn != model_name
1646
+
1647
+ # Test corruption handling for .pt files
1648
+ print('Testing corruption handling for MDV5B')
1649
+
1650
+ # First ensure MDV5B is downloaded
1651
+ mdv5b_file = try_download_known_detector('MDV5B',
1652
+ force_download=False,
1653
+ verbose=False)
1654
+ assert mdv5b_file is not None
1655
+ assert os.path.exists(mdv5b_file)
1656
+ assert mdv5b_file.endswith('.pt')
1657
+
1658
+ # Get the original file size and MD5 hash for comparison
1659
+ original_size = os.path.getsize(mdv5b_file)
1660
+ from megadetector.utils.path_utils import compute_file_hash
1661
+ original_hash = compute_file_hash(mdv5b_file, algorithm='md5')
1662
+
1663
+ # Deliberately corrupt the file by overwriting the first few bytes
1664
+ print('Corrupting model file: {}'.format(mdv5b_file))
1665
+ with open(mdv5b_file, 'r+b') as f:
1666
+ f.write(b'CORRUPTED_FILE_DATA_XXXXXX')
1667
+
1668
+ # Verify the file is now corrupted (different hash)
1669
+ corrupted_hash = compute_file_hash(mdv5b_file, algorithm='md5')
1670
+ assert corrupted_hash != original_hash, 'File corruption verification failed'
1671
+
1672
+ # Try to download again; this should detect corruption and re-download
1673
+ print('Testing corruption detection and re-download')
1674
+ mdv5b_file_redownloaded = try_download_known_detector('MDV5B',
1675
+ force_download=False,
1676
+ verbose=True)
1677
+
1678
+ # Verify that the file was re-downloaded and is now valid
1679
+ assert mdv5b_file_redownloaded is not None
1680
+ assert os.path.exists(mdv5b_file_redownloaded)
1681
+ assert mdv5b_file_redownloaded == mdv5b_file
1682
+
1683
+ # Verify that the file is back to its original state
1684
+ new_size = os.path.getsize(mdv5b_file_redownloaded)
1685
+ new_hash = compute_file_hash(mdv5b_file_redownloaded, algorithm='md5')
1686
+
1687
+ assert new_size == original_size, \
1688
+ 'Re-downloaded file size ({}) does not match original ({})'.format(new_size, original_size)
1689
+ assert new_hash == original_hash, \
1690
+ 'Re-downloaded file hash ({}) does not match original ({})'.format(new_hash, original_hash)
1691
+
1692
+ print('Corruption handling test passed')
1693
+
1694
+ # ...def run_download_tests()
1695
+
1696
+
1697
+ #%% Main test wrapper
1698
+
1699
+ def run_tests(options):
1700
+ """
1701
+ Runs Python-based and/or CLI-based package tests.
1702
+
1703
+ Args:
1704
+ options (MDTestOptions): see MDTestOptions for details
1705
+ """
1706
+
1707
+ # Prepare data folder
1708
+ download_test_data(options)
1709
+
1710
+ # Run model download tests if necessary
1711
+ run_download_tests(options)
1712
+
1713
+ if options.disable_gpu:
1714
+ os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
1715
+
1716
+ # Verify GPU
1717
+ gpu_available = is_gpu_available()
1718
+
1719
+ # If the GPU is required and isn't available, error
1720
+ if options.cpu_execution_is_error and (not gpu_available):
1721
+ raise ValueError('GPU not available, and cpu_execution_is_error is set')
1722
+
1723
+ # If the GPU should be disabled, verify that it is
1724
+ if options.disable_gpu:
1725
+ assert (not gpu_available), 'CPU execution specified, but the GPU appears to be available'
1726
+
1727
+ # Run python tests
1728
+ if not options.skip_python_tests:
1729
+
1730
+ if options.model_folder is not None:
1731
+
1732
+ assert os.path.isdir(options.model_folder), \
1733
+ 'Could not find model folder {}'.format(options.model_folder)
1734
+
1735
+ model_files = os.listdir(options.model_folder)
1736
+ model_files = [fn for fn in model_files if fn.endswith('.pt')]
1737
+ model_files = [os.path.join(options.model_folder,fn) for fn in model_files]
1738
+
1739
+ assert len(model_files) > 0, \
1740
+ 'Could not find any models in folder {}'.format(options.model_folder)
1741
+
1742
+ original_default_model = options.default_model
1743
+
1744
+ for model_file in model_files:
1745
+ print('Running Python tests for model {}'.format(model_file))
1746
+ options.default_model = model_file
1747
+ run_python_tests(options)
1748
+
1749
+ options.default_model = original_default_model
1750
+
1751
+ else:
1752
+
1753
+ run_python_tests(options)
1754
+
1755
+ # Run CLI tests
1756
+ if not options.skip_cli_tests:
1757
+ run_cli_tests(options)
1758
+
1759
+
1760
+ #%% Automated test entry point
1761
+
1762
+ def test_suite_entry_point():
1763
+ """
1764
+ This is the entry point when running tests via pytest; we run a subset of
1765
+ tests in this environment, e.g. we don't run CLI or video tests.
1766
+ """
1767
+
1768
+ options = MDTestOptions()
1769
+ options.disable_gpu = False
1770
+ options.cpu_execution_is_error = False
1771
+ options.skip_video_tests = True
1772
+ options.skip_python_tests = False
1773
+ options.skip_cli_tests = True
1774
+ options.scratch_dir = None
1775
+ options.test_data_url = 'https://lila.science/public/md-test-package.zip'
1776
+ options.force_data_download = False
1777
+ options.force_data_unzip = False
1778
+ options.warning_mode = False
1779
+ options.max_coord_error = 0.01 # 0.001
1780
+ options.max_conf_error = 0.01 # 0.005
1781
+ options.skip_video_rendering_tests = True
1782
+ options.cli_working_dir = None
1783
+ options.cli_test_pythonpath = None
1784
+ options.skip_download_tests = True
1785
+ options.skip_localhost_downloads = True
1786
+ options.skip_import_tests = False
1787
+
1788
+ if sys.platform == 'darwin':
1789
+ print('Detected a Mac environment, widening tolerance')
1790
+ options.max_coord_error = 0.05
1791
+ options.max_conf_error = 0.05
1792
+
1793
+ options = download_test_data(options)
1794
+
1795
+ run_tests(options)
1796
+
1797
+
1798
+ #%% Interactive driver
1799
+
1800
+ if False:
1801
+
1802
+ pass
1803
+
1804
+ #%% Test Prep
1805
+
1806
+ from megadetector.utils.md_tests import MDTestOptions, download_test_data
1807
+
1808
+ options = MDTestOptions()
1809
+
1810
+ options.disable_gpu = False
1811
+ options.cpu_execution_is_error = False
1812
+ options.skip_video_tests = True
1813
+ options.skip_python_tests = True
1814
+ options.skip_cli_tests = False
1815
+ options.scratch_dir = None
1816
+ options.test_data_url = 'https://lila.science/public/md-test-package.zip'
1817
+ options.force_data_download = False
1818
+ options.force_data_unzip = False
1819
+ options.warning_mode = False
1820
+ options.max_coord_error = 0.01 # 0.001
1821
+ options.max_conf_error = 0.01 # 0.005
1822
+ options.skip_cpu_tests = True
1823
+ options.skip_video_rendering_tests = True
1824
+ options.skip_download_tests = True
1825
+ options.skip_localhost_downloads = False
1826
+
1827
+ # options.iou_threshold_for_file_comparison = 0.7
1828
+
1829
+ # options.cli_working_dir = r'c:\git\MegaDetector'
1830
+ # When running in the cameratraps-detector environment
1831
+ # options.cli_test_pythonpath = r'c:\git\MegaDetector;c:\git\yolov5-md'
1832
+
1833
+ # When running in the MegaDetector environment
1834
+ # options.cli_test_pythonpath = r'c:\git\MegaDetector'
1835
+
1836
+ # options.cli_working_dir = os.path.expanduser('~')
1837
+ # options.yolo_working_dir = r'c:\git\yolov5-md'
1838
+ # options.yolo_working_dir = '/mnt/c/git/yolov5-md'
1839
+ options = download_test_data(options)
1840
+
1841
+
1842
+ #%% Environment prep
1843
+
1844
+ # Add the YOLO working dir to the PYTHONPATH if necessary
1845
+ import os
1846
+ if (options.yolo_working_dir is not None) and \
1847
+ (('PYTHONPATH' not in os.environ) or (options.yolo_working_dir not in os.environ['PYTHONPATH'])):
1848
+ if ('PYTHONPATH' not in os.environ):
1849
+ os.environ['PYTHONPATH'] = options.yolo_working_dir
1850
+ else:
1851
+ os.environ['PYTHONPATH'] = os.environ['PYTHONPATH'] + ';' + options.yolo_working_dir
1852
+
1853
+
1854
+ #%% Run download tests
1855
+
1856
+ from megadetector.utils.md_tests import run_download_tests
1857
+ run_download_tests(options=options)
1858
+
1859
+
1860
+ #%% Run all tests
1861
+
1862
+ from megadetector.utils.md_tests import run_tests
1863
+ run_tests(options)
1864
+
1865
+
1866
+ #%% Run YOLO inference tests
1867
+
1868
+ yolo_inference_options_dict = {'input_folder': '/tmp/md-tests/md-test-images',
1869
+ 'image_filename_list': None,
1870
+ 'model_filename': 'MDV5A',
1871
+ 'output_file': '/tmp/md-tests/folder_inference_output_yolo_val.json',
1872
+ 'yolo_working_folder': '/mnt/c/git/yolov5-md',
1873
+ 'model_type': 'yolov5',
1874
+ 'image_size': None,
1875
+ 'conf_thres': 0.005,
1876
+ 'batch_size': 1,
1877
+ 'device_string': '0',
1878
+ 'augment': False,
1879
+ 'half_precision_enabled': None,
1880
+ 'symlink_folder': None,
1881
+ 'use_symlinks': True,
1882
+ 'unique_id_strategy': 'links',
1883
+ 'yolo_results_folder': None,
1884
+ 'remove_symlink_folder': True,
1885
+ 'remove_yolo_results_folder': True,
1886
+ 'yolo_category_id_to_name': {0: 'animal', 1: 'person', 2: 'vehicle'},
1887
+ 'overwrite_handling': 'overwrite',
1888
+ 'preview_yolo_command_only': False,
1889
+ 'treat_copy_failures_as_warnings': False,
1890
+ 'save_yolo_debug_output': False,
1891
+ 'recursive': True,
1892
+ 'checkpoint_frequency': None}
1893
+
1894
+ from megadetector.utils.ct_utils import dict_to_object
1895
+ from megadetector.detection.run_inference_with_yolov5_val import \
1896
+ YoloInferenceOptions, run_inference_with_yolo_val
1897
+
1898
+ yolo_inference_options = YoloInferenceOptions()
1899
+ yolo_inference_options = dict_to_object(yolo_inference_options_dict, yolo_inference_options)
1900
+
1901
+ os.makedirs(options.scratch_dir,exist_ok=True)
1902
+
1903
+ inference_output_file_yolo_val = os.path.join(options.scratch_dir,'folder_inference_output_yolo_val.json')
1904
+
1905
+ run_inference_with_yolo_val(yolo_inference_options)
1906
+
1907
+
1908
+ #%% Command-line driver
1909
+
1910
+ def main(): # noqa
1911
+
1912
+ options = MDTestOptions()
1913
+
1914
+ parser = argparse.ArgumentParser(
1915
+ description='MegaDetector test suite')
1916
+
1917
+ parser.add_argument(
1918
+ '--disable_gpu',
1919
+ action='store_true',
1920
+ help='Disable GPU operation')
1921
+
1922
+ parser.add_argument(
1923
+ '--cpu_execution_is_error',
1924
+ action='store_true',
1925
+ help='Fail if the GPU appears not to be available')
1926
+
1927
+ parser.add_argument(
1928
+ '--scratch_dir',
1929
+ default=None,
1930
+ type=str,
1931
+ help='Directory for temporary storage (defaults to system temp dir)')
1932
+
1933
+ parser.add_argument(
1934
+ '--skip_image_tests',
1935
+ action='store_true',
1936
+ help='Skip tests related to still images')
1937
+
1938
+ parser.add_argument(
1939
+ '--skip_video_tests',
1940
+ action='store_true',
1941
+ help='Skip tests related to video')
1942
+
1943
+ parser.add_argument(
1944
+ '--skip_video_rendering_tests',
1945
+ action='store_true',
1946
+ help='Skip tests related to *rendering* video')
1947
+
1948
+ parser.add_argument(
1949
+ '--skip_python_tests',
1950
+ action='store_true',
1951
+ help='Skip python tests')
1952
+
1953
+ parser.add_argument(
1954
+ '--skip_cli_tests',
1955
+ action='store_true',
1956
+ help='Skip CLI tests')
1957
+
1958
+ parser.add_argument(
1959
+ '--skip_download_tests',
1960
+ action='store_true',
1961
+ help='Skip model download tests')
1962
+
1963
+ parser.add_argument(
1964
+ '--skip_import_tests',
1965
+ action='store_true',
1966
+ help='Skip module import tests')
1967
+
1968
+ parser.add_argument(
1969
+ '--skip_cpu_tests',
1970
+ action='store_true',
1971
+ help='Skip force-CPU tests')
1972
+
1973
+ parser.add_argument(
1974
+ '--force_data_download',
1975
+ action='store_true',
1976
+ help='Force download of the test data file, even if it\'s already available')
1977
+
1978
+ parser.add_argument(
1979
+ '--force_data_unzip',
1980
+ action='store_true',
1981
+ help='Force extraction of all files in the test data file, even if they\'re already available')
1982
+
1983
+ parser.add_argument(
1984
+ '--warning_mode',
1985
+ action='store_true',
1986
+ help='Turns numeric/content errors into warnings')
1987
+
1988
+ parser.add_argument(
1989
+ '--max_conf_error',
1990
+ type=float,
1991
+ default=options.max_conf_error,
1992
+ help='Maximum tolerable confidence value deviation from expected (default {})'.format(
1993
+ options.max_conf_error))
1994
+
1995
+ parser.add_argument(
1996
+ '--max_coord_error',
1997
+ type=float,
1998
+ default=options.max_coord_error,
1999
+ help='Maximum tolerable coordinate value deviation from expected (default {})'.format(
2000
+ options.max_coord_error))
2001
+
2002
+ parser.add_argument(
2003
+ '--cli_working_dir',
2004
+ type=str,
2005
+ default=None,
2006
+ help='Working directory for CLI tests')
2007
+
2008
+ parser.add_argument(
2009
+ '--yolo_working_dir',
2010
+ type=str,
2011
+ default=None,
2012
+ help='Working directory for yolo inference tests')
2013
+
2014
+ parser.add_argument(
2015
+ '--cli_test_pythonpath',
2016
+ type=str,
2017
+ default=None,
2018
+ help='PYTHONPATH to set for CLI tests; if None, inherits from the parent process'
2019
+ )
2020
+
2021
+ parser.add_argument(
2022
+ '--test_mode',
2023
+ type=str,
2024
+ default='all',
2025
+ help='Test mode: "all" or "utils-only"'
2026
+ )
2027
+
2028
+ parser.add_argument(
2029
+ '--python_test_depth',
2030
+ type=int,
2031
+ default=options.python_test_depth,
2032
+ help='Used as a knob to control the level of Python tests (0-100)'
2033
+ )
2034
+
2035
+ parser.add_argument(
2036
+ '--model_folder',
2037
+ type=str,
2038
+ default=None,
2039
+ help='Run Python tests on every model in this folder'
2040
+ )
2041
+
2042
+ parser.add_argument(
2043
+ '--detector_options',
2044
+ nargs='*',
2045
+ metavar='KEY=VALUE',
2046
+ default='',
2047
+ help='Detector-specific options, as a space-separated list of key-value pairs')
2048
+
2049
+ parser.add_argument(
2050
+ '--default_model',
2051
+ type=str,
2052
+ default=options.default_model,
2053
+ help='Default model file or well-known model name (used for most tests)')
2054
+
2055
+ # The following token is used for linting, do not remove.
2056
+ #
2057
+ # no_arguments_required
2058
+
2059
+ args = parser.parse_args()
2060
+
2061
+ initial_detector_options = options.detector_options
2062
+ _args_to_object(args,options)
2063
+ from megadetector.utils.ct_utils import parse_kvp_list
2064
+ options.detector_options = parse_kvp_list(args.detector_options,d=initial_detector_options)
2065
+
2066
+ run_tests(options)
2067
+
2068
+ # ...def main()
2069
+
2070
+ if __name__ == '__main__':
2071
+ main()
2072
+