megadetector 5.0.5__py3-none-any.whl → 5.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (132) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +302 -263
  2. api/batch_processing/data_preparation/manage_video_batch.py +81 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/categorize_detections_by_size.py +50 -19
  5. api/batch_processing/postprocessing/compare_batch_results.py +110 -60
  6. api/batch_processing/postprocessing/load_api_results.py +56 -70
  7. api/batch_processing/postprocessing/md_to_coco.py +1 -1
  8. api/batch_processing/postprocessing/md_to_labelme.py +2 -1
  9. api/batch_processing/postprocessing/postprocess_batch_results.py +240 -81
  10. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
  11. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  12. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  13. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +227 -75
  14. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  15. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  16. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +2 -2
  17. classification/prepare_classification_script.py +191 -191
  18. data_management/coco_to_yolo.py +68 -45
  19. data_management/databases/integrity_check_json_db.py +7 -5
  20. data_management/generate_crops_from_cct.py +3 -3
  21. data_management/get_image_sizes.py +8 -6
  22. data_management/importers/add_timestamps_to_icct.py +79 -0
  23. data_management/importers/animl_results_to_md_results.py +160 -0
  24. data_management/importers/auckland_doc_test_to_json.py +4 -4
  25. data_management/importers/auckland_doc_to_json.py +1 -1
  26. data_management/importers/awc_to_json.py +5 -5
  27. data_management/importers/bellevue_to_json.py +5 -5
  28. data_management/importers/carrizo_shrubfree_2018.py +5 -5
  29. data_management/importers/carrizo_trail_cam_2017.py +5 -5
  30. data_management/importers/cct_field_adjustments.py +2 -3
  31. data_management/importers/channel_islands_to_cct.py +4 -4
  32. data_management/importers/ena24_to_json.py +5 -5
  33. data_management/importers/helena_to_cct.py +10 -10
  34. data_management/importers/idaho-camera-traps.py +12 -12
  35. data_management/importers/idfg_iwildcam_lila_prep.py +8 -8
  36. data_management/importers/jb_csv_to_json.py +4 -4
  37. data_management/importers/missouri_to_json.py +1 -1
  38. data_management/importers/noaa_seals_2019.py +1 -1
  39. data_management/importers/pc_to_json.py +5 -5
  40. data_management/importers/prepare-noaa-fish-data-for-lila.py +4 -4
  41. data_management/importers/prepare_zsl_imerit.py +5 -5
  42. data_management/importers/rspb_to_json.py +4 -4
  43. data_management/importers/save_the_elephants_survey_A.py +5 -5
  44. data_management/importers/save_the_elephants_survey_B.py +6 -6
  45. data_management/importers/snapshot_safari_importer.py +9 -9
  46. data_management/importers/snapshot_serengeti_lila.py +9 -9
  47. data_management/importers/timelapse_csv_set_to_json.py +5 -7
  48. data_management/importers/ubc_to_json.py +4 -4
  49. data_management/importers/umn_to_json.py +4 -4
  50. data_management/importers/wellington_to_json.py +1 -1
  51. data_management/importers/wi_to_json.py +2 -2
  52. data_management/importers/zamba_results_to_md_results.py +181 -0
  53. data_management/labelme_to_coco.py +35 -7
  54. data_management/labelme_to_yolo.py +229 -0
  55. data_management/lila/add_locations_to_island_camera_traps.py +1 -1
  56. data_management/lila/add_locations_to_nacti.py +147 -0
  57. data_management/lila/create_lila_blank_set.py +474 -0
  58. data_management/lila/create_lila_test_set.py +2 -1
  59. data_management/lila/create_links_to_md_results_files.py +106 -0
  60. data_management/lila/download_lila_subset.py +46 -21
  61. data_management/lila/generate_lila_per_image_labels.py +23 -14
  62. data_management/lila/get_lila_annotation_counts.py +17 -11
  63. data_management/lila/lila_common.py +14 -11
  64. data_management/lila/test_lila_metadata_urls.py +116 -0
  65. data_management/ocr_tools.py +829 -0
  66. data_management/resize_coco_dataset.py +13 -11
  67. data_management/yolo_output_to_md_output.py +84 -12
  68. data_management/yolo_to_coco.py +38 -20
  69. detection/process_video.py +36 -14
  70. detection/pytorch_detector.py +23 -8
  71. detection/run_detector.py +76 -19
  72. detection/run_detector_batch.py +178 -63
  73. detection/run_inference_with_yolov5_val.py +326 -57
  74. detection/run_tiled_inference.py +153 -43
  75. detection/video_utils.py +34 -8
  76. md_utils/ct_utils.py +172 -1
  77. md_utils/md_tests.py +372 -51
  78. md_utils/path_utils.py +167 -39
  79. md_utils/process_utils.py +26 -7
  80. md_utils/split_locations_into_train_val.py +215 -0
  81. md_utils/string_utils.py +10 -0
  82. md_utils/url_utils.py +0 -2
  83. md_utils/write_html_image_list.py +9 -26
  84. md_visualization/plot_utils.py +12 -8
  85. md_visualization/visualization_utils.py +106 -7
  86. md_visualization/visualize_db.py +16 -8
  87. md_visualization/visualize_detector_output.py +208 -97
  88. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/METADATA +3 -6
  89. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/RECORD +98 -121
  90. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
  91. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  92. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  93. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  94. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  95. taxonomy_mapping/species_lookup.py +33 -13
  96. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  97. api/synchronous/api_core/yolov5/detect.py +0 -252
  98. api/synchronous/api_core/yolov5/export.py +0 -607
  99. api/synchronous/api_core/yolov5/hubconf.py +0 -146
  100. api/synchronous/api_core/yolov5/models/__init__.py +0 -0
  101. api/synchronous/api_core/yolov5/models/common.py +0 -738
  102. api/synchronous/api_core/yolov5/models/experimental.py +0 -104
  103. api/synchronous/api_core/yolov5/models/tf.py +0 -574
  104. api/synchronous/api_core/yolov5/models/yolo.py +0 -338
  105. api/synchronous/api_core/yolov5/train.py +0 -670
  106. api/synchronous/api_core/yolov5/utils/__init__.py +0 -36
  107. api/synchronous/api_core/yolov5/utils/activations.py +0 -103
  108. api/synchronous/api_core/yolov5/utils/augmentations.py +0 -284
  109. api/synchronous/api_core/yolov5/utils/autoanchor.py +0 -170
  110. api/synchronous/api_core/yolov5/utils/autobatch.py +0 -66
  111. api/synchronous/api_core/yolov5/utils/aws/__init__.py +0 -0
  112. api/synchronous/api_core/yolov5/utils/aws/resume.py +0 -40
  113. api/synchronous/api_core/yolov5/utils/benchmarks.py +0 -148
  114. api/synchronous/api_core/yolov5/utils/callbacks.py +0 -71
  115. api/synchronous/api_core/yolov5/utils/dataloaders.py +0 -1087
  116. api/synchronous/api_core/yolov5/utils/downloads.py +0 -178
  117. api/synchronous/api_core/yolov5/utils/flask_rest_api/example_request.py +0 -19
  118. api/synchronous/api_core/yolov5/utils/flask_rest_api/restapi.py +0 -46
  119. api/synchronous/api_core/yolov5/utils/general.py +0 -1018
  120. api/synchronous/api_core/yolov5/utils/loggers/__init__.py +0 -187
  121. api/synchronous/api_core/yolov5/utils/loggers/wandb/__init__.py +0 -0
  122. api/synchronous/api_core/yolov5/utils/loggers/wandb/log_dataset.py +0 -27
  123. api/synchronous/api_core/yolov5/utils/loggers/wandb/sweep.py +0 -41
  124. api/synchronous/api_core/yolov5/utils/loggers/wandb/wandb_utils.py +0 -577
  125. api/synchronous/api_core/yolov5/utils/loss.py +0 -234
  126. api/synchronous/api_core/yolov5/utils/metrics.py +0 -355
  127. api/synchronous/api_core/yolov5/utils/plots.py +0 -489
  128. api/synchronous/api_core/yolov5/utils/torch_utils.py +0 -314
  129. api/synchronous/api_core/yolov5/val.py +0 -394
  130. md_utils/matlab_porting_tools.py +0 -97
  131. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
  132. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
@@ -14,18 +14,6 @@
14
14
  # the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
15
15
  # script):
16
16
  #
17
- # * You can specify the MegaDetector location, but you may find it useful to use the same paths
18
- # I use; on all the machines where I run MD, I keep all versions of MegaDetector handy at these
19
- # paths:
20
- #
21
- # ~/models/camera_traps/megadetector/md_v5.0.0/md_v5a.0.0.pt
22
- # ~/models/camera_traps/megadetector/md_v5.0.0/md_v5b.0.0.pt
23
- # ~/models/camera_traps/megadetector/md_v4.1.0/md_v4.1.0.pb
24
- #
25
- # On Windows, this translates to, for example:
26
- #
27
- # c:\users\dmorr\models\camera_traps\megadetector\md_v5.0.0\md_v5a.0.0.pt
28
- #
29
17
  # * Typically when I have a MegaDetector job to run, I make a copy of this script. Let's
30
18
  # say I'm running a job for an organization called "bibblebop"; I have a big folder of
31
19
  # job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
@@ -78,6 +66,7 @@ import json
78
66
  import os
79
67
  import stat
80
68
  import time
69
+ import re
81
70
 
82
71
  import humanfriendly
83
72
 
@@ -86,15 +75,16 @@ from collections import defaultdict
86
75
 
87
76
  from md_utils import path_utils
88
77
  from md_utils.ct_utils import is_list_sorted
78
+ from md_utils.ct_utils import split_list_into_n_chunks
89
79
 
90
80
  from detection.run_detector_batch import load_and_run_detector_batch, write_results_to_file
91
81
  from detection.run_detector import DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
82
+ from detection.run_detector import estimate_md_images_per_second
92
83
 
93
84
  from api.batch_processing.postprocessing.postprocess_batch_results import (
94
85
  PostProcessingOptions, process_batch_results)
95
86
  from detection.run_detector import get_detector_version_from_filename
96
-
97
- max_task_name_length = 92
87
+ from md_utils.ct_utils import image_file_to_camera_folder
98
88
 
99
89
  # To specify a non-default confidence threshold for including detections in the .json file
100
90
  json_threshold = None
@@ -102,61 +92,108 @@ json_threshold = None
102
92
  # Turn warnings into errors if more than this many images are missing
103
93
  max_tolerable_failed_images = 100
104
94
 
95
+ # Should we supply the --image_queue_option to run_detector_batch.py? I only set this
96
+ # when I have a very slow drive and a comparably fast GPU. When this is enabled, checkpointing
97
+ # is not supported within a job, so I set n_jobs to a large number (typically 100).
105
98
  use_image_queue = False
106
99
 
107
100
  # Only relevant when we're using a single GPU
108
101
  default_gpu_number = 0
109
102
 
103
+ # Should we supply --quiet to run_detector_batch.py?
110
104
  quiet_mode = True
111
105
 
112
106
  # Specify a target image size when running MD... strongly recommended to leave this at "None"
107
+ #
108
+ # When using augmented inference, if you leave this at "None", run_inference_with_yolov5_val.py
109
+ # will use its default size, which is 1280 * 1.3, which is almost always what you want.
113
110
  image_size = None
114
111
 
115
112
  # Only relevant when running on CPU
116
113
  ncores = 1
117
114
 
118
- # OS-specific script line continuation character
115
+ # OS-specific script line continuation character (modified later if we're running on Windows)
119
116
  slcc = '\\'
120
117
 
121
- # OS-specific script comment character
118
+ # OS-specific script comment character (modified later if we're running on Windows)
122
119
  scc = '#'
123
120
 
121
+ # # OS-specific script extension (modified later if we're running on Windows)
124
122
  script_extension = '.sh'
125
123
 
124
+ # If False, we'll load chunk files with file lists if they exist
125
+ force_enumeration = False
126
+
126
127
  # Prefer threads on Windows, processes on Linux
127
128
  parallelization_defaults_to_threads = False
128
129
 
129
130
  # This is for things like image rendering, not for MegaDetector
130
131
  default_workers_for_parallel_tasks = 30
131
132
 
133
+ overwrite_handling = 'skip' # 'skip', 'error', or 'overwrite'
134
+
135
+ # Only relevant to repeat detection elimination; try to identify EK113/RCNX101-style
136
+ # overflow folders and treat them as the same camera
137
+ overflow_folder_handling_enabled = True
138
+
139
+ # The function used to get camera names from image paths; can also replace this
140
+ # with a custom function.
141
+ relative_path_to_location = image_file_to_camera_folder
142
+
143
+ # This will be the .json results file after RDE; if this is still None when
144
+ # we get to classification stuff, that will indicate that we didn't do RDE.
145
+ filtered_output_filename = None
146
+
147
+ if os.name == 'nt':
148
+
149
+ slcc = '^'
150
+ scc = 'REM'
151
+ script_extension = '.bat'
152
+
153
+ # My experience has been that Python multiprocessing is flaky on Windows, so
154
+ # default to threads on Windows
155
+ parallelization_defaults_to_threads = True
156
+ default_workers_for_parallel_tasks = 10
157
+
158
+
159
+ ## Constants related to using YOLOv5's val.py
160
+
132
161
  # Should we use YOLOv5's val.py instead of run_detector_batch.py?
133
162
  use_yolo_inference_scripts = False
134
163
 
135
- # Directory in which to run val.py. Only relevant if use_yolo_inference_scripts is True.
164
+ # Directory in which to run val.py (relevant for YOLOv5, not for YOLOv8)
136
165
  yolo_working_dir = os.path.expanduser('~/git/yolov5')
137
166
 
167
+ # Only used for loading the mapping from class indices to names
168
+ yolo_dataset_file = None
169
+
170
+ # 'yolov5' or 'yolov8'; assumes YOLOv5 if this is None
171
+ yolo_model_type = None
172
+
173
+ # inference batch size
174
+ yolo_batch_size = 1
175
+
138
176
  # Should we remove intermediate files used for running YOLOv5's val.py?
139
177
  #
140
178
  # Only relevant if use_yolo_inference_scripts is True.
141
- remove_yolo_intermediate_results = False
142
- remove_yolo_symlink_folder = False
179
+ remove_yolo_intermediate_results = True
180
+ remove_yolo_symlink_folder = True
143
181
  use_symlinks_for_yolo_inference = True
182
+ write_yolo_debug_output = False
144
183
 
145
- overwrite_handling = 'skip' # 'skip', 'error', or 'overwrite'
184
+ # Should we apply YOLOv5's test-time augmentation?
185
+ augment = False
146
186
 
147
- # Set later if EK113/RCNX101-style overflow folders are being handled in this dataset
148
- overflow_folder_handling_enabled = False
149
187
 
150
- # Should we apply YOLOv5's augmentation? Only allowed when use_yolo_inference_scripts
151
- # is True.
152
- augment = False
188
+ ## Constants related to tiled inference
153
189
 
154
- if os.name == 'nt':
155
- slcc = '^'
156
- scc = 'REM'
157
- script_extension = '.bat'
158
- parallelization_defaults_to_threads = True
159
- default_workers_for_parallel_tasks = 10
190
+ use_tiled_inference = True
191
+
192
+ # Should we delete tiles after each job? Only set this to False for debugging;
193
+ # large jobs will take up a lot of space if you keep tiles around after each task.
194
+ remove_tiles = True
195
+ tile_size = (1280,1280)
196
+ tile_overlap = 0.2
160
197
 
161
198
 
162
199
  #%% Constants I set per script
@@ -164,9 +201,11 @@ if os.name == 'nt':
164
201
  input_path = '/drive/organization'
165
202
 
166
203
  assert not (input_path.endswith('/') or input_path.endswith('\\'))
204
+ assert os.path.isdir(input_path), 'Could not find input folder {}'.format(input_path)
205
+ input_path = input_path.replace('\\','/')
167
206
 
168
207
  organization_name_short = 'organization'
169
- job_date = None # '2023-05-08'
208
+ job_date = None # '2024-01-01'
170
209
  assert job_date is not None and organization_name_short != 'organization'
171
210
 
172
211
  # Optional descriptor
@@ -177,9 +216,7 @@ if job_tag is None:
177
216
  else:
178
217
  job_description_string = '-' + job_tag
179
218
 
180
- model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v5.0.0/md_v5a.0.0.pt')
181
- # model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v5.0.0/md_v5b.0.0.pt')
182
- # model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v4.1.0/md_v4.1.0.pb')
219
+ model_file = 'MDV5A' # 'MDV5A', 'MDV5B', 'MDV4'
183
220
 
184
221
  postprocessing_base = os.path.expanduser('~/postprocessing')
185
222
 
@@ -193,16 +230,12 @@ n_gpus = 2
193
230
  # checkpointing. Don't worry, this will be assert()'d in the next cell.
194
231
  checkpoint_frequency = 10000
195
232
 
196
- # gpu_images_per_second is only used to print out a time estimate, and it's completely
197
- # tied to the assumption of running on an RTX 3090. YMMV.
198
- if ('v5') in model_file:
199
- gpu_images_per_second = 10
200
- else:
201
- gpu_images_per_second = 2.9
233
+ # Estimate inference speed for the current GPU
234
+ approx_images_per_second = estimate_md_images_per_second(model_file)
202
235
 
203
- # Rough estimate for how much slower everything runs when using augmentation
236
+ # Rough estimate for the inference time cost of augmentation
204
237
  if augment:
205
- gpu_images_per_second = gpu_images_per_second * 0.7
238
+ approx_images_per_second = approx_images_per_second * 0.7
206
239
 
207
240
  base_task_name = organization_name_short + '-' + job_date + job_description_string + '-' + \
208
241
  get_detector_version_from_filename(model_file)
@@ -223,6 +256,14 @@ if augment:
223
256
  assert use_yolo_inference_scripts,\
224
257
  'Augmentation is only supported when running with the YOLO inference scripts'
225
258
 
259
+ if use_tiled_inference:
260
+ assert not augment, \
261
+ 'Augmentation is not supported when using tiled inference'
262
+ assert not use_yolo_inference_scripts, \
263
+ 'Using the YOLO inference script is not supported when using tiled inference'
264
+ assert checkpoint_frequency is None, \
265
+ 'Checkpointing is not supported when using tiled inference'
266
+
226
267
  filename_base = os.path.join(base_output_folder_name, base_task_name)
227
268
  combined_api_output_folder = os.path.join(filename_base, 'combined_api_outputs')
228
269
  postprocessing_output_folder = os.path.join(filename_base, 'preview')
@@ -239,20 +280,17 @@ print('Output folder:\n{}'.format(filename_base))
239
280
 
240
281
  #%% Enumerate files
241
282
 
242
- all_images = sorted(path_utils.find_images(input_path,recursive=True))
243
-
244
- print('Enumerated {} image files in {}'.format(len(all_images),input_path))
245
-
246
- if False:
283
+ # Have we already listed files for this job?
284
+ chunk_files = os.listdir(filename_base)
285
+ pattern = re.compile('chunk\d+.json')
286
+ chunk_files = [fn for fn in chunk_files if pattern.match(fn)]
247
287
 
248
- pass
288
+ if (not force_enumeration) and (len(chunk_files) > 0):
249
289
 
250
- #%% Load files from prior enumeration
290
+ print('Found {} chunk files in folder {}, bypassing enumeration'.format(
291
+ len(chunk_files),
292
+ filename_base))
251
293
 
252
- import re
253
- chunk_files = os.listdir(filename_base)
254
- pattern = re.compile('chunk\d+.json')
255
- chunk_files = [fn for fn in chunk_files if pattern.match(fn)]
256
294
  all_images = []
257
295
  for fn in chunk_files:
258
296
  with open(os.path.join(filename_base,fn),'r') as f:
@@ -260,27 +298,45 @@ if False:
260
298
  assert isinstance(chunk,list)
261
299
  all_images.extend(chunk)
262
300
  all_images = sorted(all_images)
263
- print('Loaded {} image files from chunks in {}'.format(len(all_images),filename_base))
264
301
 
302
+ print('Loaded {} image files from {} chunks in {}'.format(
303
+ len(all_images),len(chunk_files),filename_base))
265
304
 
266
- #%% Divide images into chunks
305
+ else:
306
+
307
+ print('Enumerating image files in {}'.format(input_path))
308
+
309
+ all_images = sorted(path_utils.find_images(input_path,recursive=True,convert_slashes=True))
310
+
311
+ # It's common to run this notebook on an external drive with the main folders in the drive root
312
+ all_images = [fn for fn in all_images if not \
313
+ (fn.startswith('$RECYCLE') or fn.startswith('System Volume Information'))]
314
+
315
+ print('')
316
+
317
+ print('Enumerated {} image files in {}'.format(len(all_images),input_path))
318
+
267
319
 
268
- def split_list(L, n):
269
- k, m = divmod(len(L), n)
270
- return list(L[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
320
+ #%% Divide images into chunks
271
321
 
272
- folder_chunks = split_list(all_images,n_jobs)
322
+ folder_chunks = split_list_into_n_chunks(all_images,n_jobs)
273
323
 
274
324
 
275
325
  #%% Estimate total time
276
326
 
277
- n_images = len(all_images)
278
- execution_seconds = n_images / gpu_images_per_second
279
- wallclock_seconds = execution_seconds / n_gpus
280
- print('Expected time: {}'.format(humanfriendly.format_timespan(wallclock_seconds)))
281
-
282
- seconds_per_chunk = len(folder_chunks[0]) / gpu_images_per_second
283
- print('Expected time per chunk: {}'.format(humanfriendly.format_timespan(seconds_per_chunk)))
327
+ if approx_images_per_second is None:
328
+
329
+ print("Can't estimate inference time for the current environment")
330
+
331
+ else:
332
+
333
+ n_images = len(all_images)
334
+ execution_seconds = n_images / approx_images_per_second
335
+ wallclock_seconds = execution_seconds / n_gpus
336
+ print('Expected time: {}'.format(humanfriendly.format_timespan(wallclock_seconds)))
337
+
338
+ seconds_per_chunk = len(folder_chunks[0]) / approx_images_per_second
339
+ print('Expected time per chunk: {}'.format(humanfriendly.format_timespan(seconds_per_chunk)))
284
340
 
285
341
 
286
342
  #%% Write file lists
@@ -297,19 +353,20 @@ for i_chunk,chunk_list in enumerate(folder_chunks):
297
353
  #%% Generate commands
298
354
 
299
355
  # A list of the scripts tied to each GPU, as absolute paths. We'll write this out at
300
- # the end so each GPU's list of commands can be run at once. Generally only used when
301
- # running lots of small batches via YOLOv5's val.py, which doesn't support checkpointing.
356
+ # the end so each GPU's list of commands can be run at once
302
357
  gpu_to_scripts = defaultdict(list)
303
358
 
304
359
  # i_task = 0; task = task_info[i_task]
305
360
  for i_task,task in enumerate(task_info):
306
361
 
307
362
  chunk_file = task['input_file']
363
+ checkpoint_filename = chunk_file.replace('.json','_checkpoint.json')
364
+
308
365
  output_fn = chunk_file.replace('.json','_results.json')
309
366
 
310
367
  task['output_file'] = output_fn
311
368
 
312
- if n_jobs > 1:
369
+ if n_gpus > 1:
313
370
  gpu_number = i_task % n_gpus
314
371
  else:
315
372
  gpu_number = default_gpu_number
@@ -325,6 +382,10 @@ for i_task,task in enumerate(task_info):
325
382
  augment_string = ''
326
383
  if augment:
327
384
  augment_string = '--augment_enabled 1'
385
+ else:
386
+ augment_string = '--augment_enabled 0'
387
+
388
+ batch_string = '--batch_size {}'.format(yolo_batch_size)
328
389
 
329
390
  symlink_folder = os.path.join(filename_base,'symlinks','symlinks_{}'.format(
330
391
  str(i_task).zfill(3)))
@@ -338,6 +399,10 @@ for i_task,task in enumerate(task_info):
338
399
  if not remove_yolo_symlink_folder:
339
400
  remove_symlink_folder_string = '--no_remove_symlink_folder'
340
401
 
402
+ write_yolo_debug_output_string = ''
403
+ if write_yolo_debug_output:
404
+ write_yolo_debug_output = '--write_yolo_debug_output'
405
+
341
406
  remove_yolo_results_string = ''
342
407
  if not remove_yolo_intermediate_results:
343
408
  remove_yolo_results_string = '--no_remove_yolo_results_folder'
@@ -354,12 +419,48 @@ for i_task,task in enumerate(task_info):
354
419
 
355
420
  overwrite_handling_string = '--overwrite_handling {}'.format(overwrite_handling)
356
421
 
357
- cmd += f'python run_inference_with_yolov5_val.py "{model_file}" "{chunk_file}" "{output_fn}" "{yolo_working_dir}" {image_size_string} {augment_string} {symlink_folder_string} {yolo_results_folder_string} {remove_yolo_results_string} {remove_symlink_folder_string} {confidence_threshold_string} {device_string} {overwrite_handling_string}'
358
-
422
+ cmd += f'python run_inference_with_yolov5_val.py "{model_file}" "{chunk_file}" "{output_fn}" '
423
+ cmd += f'{image_size_string} {augment_string} '
424
+ cmd += f'{symlink_folder_string} {yolo_results_folder_string} {remove_yolo_results_string} '
425
+ cmd += f'{remove_symlink_folder_string} {confidence_threshold_string} {device_string} '
426
+ cmd += f'{overwrite_handling_string} {batch_string} {write_yolo_debug_output_string}'
427
+
428
+ if yolo_working_dir is not None:
429
+ cmd += f' --yolo_working_folder "{yolo_working_dir}"'
430
+ if yolo_dataset_file is not None:
431
+ cmd += ' --yolo_dataset_file "{}"'.format(yolo_dataset_file)
432
+ if yolo_model_type is not None:
433
+ cmd += ' --model_type {}'.format(yolo_model_type)
434
+
359
435
  if not use_symlinks_for_yolo_inference:
360
436
  cmd += ' --no_use_symlinks'
361
437
 
362
438
  cmd += '\n'
439
+
440
+ elif use_tiled_inference:
441
+
442
+ tiling_folder = os.path.join(filename_base,'tile_cache','tile_cache_{}'.format(
443
+ str(i_task).zfill(3)))
444
+
445
+ if os.name == 'nt':
446
+ cuda_string = f'set CUDA_VISIBLE_DEVICES={gpu_number} & '
447
+ else:
448
+ cuda_string = f'CUDA_VISIBLE_DEVICES={gpu_number} '
449
+
450
+ cmd = f'{cuda_string} python run_tiled_inference.py "{model_file}" "{input_path}" "{tiling_folder}" "{output_fn}"'
451
+
452
+ cmd += f' --image_list "{chunk_file}"'
453
+ cmd += f' --overwrite_handling {overwrite_handling}'
454
+
455
+ if not remove_tiles:
456
+ cmd += ' --no_remove_tiles'
457
+
458
+ # If we're using non-default tile sizes
459
+ if tile_size is not None and (tile_size[0] > 0 or tile_size[1] > 0):
460
+ cmd += ' --tile_size_x {} --tile_size_y {}'.format(tile_size[0],tile_size[1])
461
+
462
+ if tile_overlap is not None:
463
+ cmd += f' --tile_overlap {tile_overlap}'
363
464
 
364
465
  else:
365
466
 
@@ -370,7 +471,6 @@ for i_task,task in enumerate(task_info):
370
471
 
371
472
  checkpoint_frequency_string = ''
372
473
  checkpoint_path_string = ''
373
- checkpoint_filename = chunk_file.replace('.json','_checkpoint.json')
374
474
 
375
475
  if checkpoint_frequency is not None and checkpoint_frequency > 0:
376
476
  checkpoint_frequency_string = f'--checkpoint_frequency {checkpoint_frequency}'
@@ -479,12 +579,10 @@ multiple processes, so the tasks will run serially. This only matters if you ha
479
579
  GPUs.
480
580
  """
481
581
 
482
- if False:
483
-
484
- pass
485
-
486
- #%%% Run the tasks (commented out)
582
+ run_tasks_in_notebook = False
487
583
 
584
+ if run_tasks_in_notebook:
585
+
488
586
  assert not use_yolo_inference_scripts, \
489
587
  'If you want to use the YOLOv5 inference scripts, you can\'t run the model interactively (yet)'
490
588
 
@@ -532,15 +630,32 @@ if False:
532
630
 
533
631
  # ...for each chunk
534
632
 
535
- # ...if False
633
+ # ...if we're running tasks in this notebook
536
634
 
537
635
 
538
636
  #%% Load results, look for failed or missing images in each task
539
637
 
638
+ # Check that all task output files exist
639
+
640
+ missing_output_files = []
641
+
642
+ # i_task = 0; task = task_info[i_task]
643
+ for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
644
+ output_file = task['output_file']
645
+ if not os.path.isfile(output_file):
646
+ missing_output_files.append(output_file)
647
+
648
+ if len(missing_output_files) > 0:
649
+ print('Missing {} output files:'.format(len(missing_output_files)))
650
+ for s in missing_output_files:
651
+ print(s)
652
+ raise Exception('Missing output files')
653
+
654
+
540
655
  n_total_failures = 0
541
656
 
542
657
  # i_task = 0; task = task_info[i_task]
543
- for i_task,task in enumerate(task_info):
658
+ for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
544
659
 
545
660
  chunk_file = task['input_file']
546
661
  output_file = task['output_file']
@@ -557,6 +672,13 @@ for i_task,task in enumerate(task_info):
557
672
 
558
673
  # im = task_results['images'][0]
559
674
  for im in task_results['images']:
675
+
676
+ # Most of the time, inference result files use absolute paths, but it's
677
+ # getting annoying to make sure that's *always* true, so handle both here.
678
+ # E.g., when using tiled inference, paths will be relative.
679
+ if not os.path.isabs(im['file']):
680
+ fn = os.path.join(input_path,im['file']).replace('\\','/')
681
+ im['file'] = fn
560
682
  assert im['file'].startswith(input_path)
561
683
  assert im['file'] in task_images_set
562
684
  filename_to_results[im['file']] = im
@@ -568,7 +690,8 @@ for i_task,task in enumerate(task_info):
568
690
  task['results'] = task_results
569
691
 
570
692
  for fn in task_images:
571
- assert fn in filename_to_results
693
+ assert fn in filename_to_results, \
694
+ 'File {} not found in results for task {}'.format(fn,i_task)
572
695
 
573
696
  n_total_failures += n_task_failures
574
697
 
@@ -582,13 +705,13 @@ print('Processed all {} images with {} failures'.format(
582
705
  len(all_images),n_total_failures))
583
706
 
584
707
 
585
- #%% Merge results files and make images relative
708
+ ##%% Merge results files and make filenames relative
586
709
 
587
710
  combined_results = {}
588
711
  combined_results['images'] = []
589
712
  images_processed = set()
590
713
 
591
- for i_task,task in enumerate(task_info):
714
+ for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
592
715
 
593
716
  task_results = task['results']
594
717
 
@@ -615,10 +738,14 @@ assert len(combined_results['images']) == len(all_images), \
615
738
  result_filenames = [im['file'] for im in combined_results['images']]
616
739
  assert len(combined_results['images']) == len(set(result_filenames))
617
740
 
618
- # Check for valid path names
741
+ # Convert to relative paths, preserving '/' as the path separator, regardless of OS
619
742
  for im in combined_results['images']:
620
- assert im['file'].startswith(input_path + os.path.sep)
621
- im['file'] = im['file'].replace(input_path + os.path.sep,'',1)
743
+ assert '\\' not in im['file']
744
+ assert im['file'].startswith(input_path)
745
+ if input_path.endswith(':'):
746
+ im['file'] = im['file'].replace(input_path,'',1)
747
+ else:
748
+ im['file'] = im['file'].replace(input_path + '/','',1)
622
749
 
623
750
  combined_api_output_file = os.path.join(
624
751
  combined_api_output_folder,
@@ -642,10 +769,8 @@ options.confidence_threshold = 0.2
642
769
  options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
643
770
  options.ground_truth_json_file = None
644
771
  options.separate_detections_by_category = True
645
- options.sort_html_by_confidence = False
646
-
647
- # options.sample_seed = 0
648
- # options.max_figures_per_html_file = 5000
772
+ options.sample_seed = 0
773
+ options.max_figures_per_html_file = 2500
649
774
 
650
775
  options.parallelize_rendering = True
651
776
  options.parallelize_rendering_n_cores = default_workers_for_parallel_tasks
@@ -668,89 +793,8 @@ options.api_output_file = combined_api_output_file
668
793
  options.output_dir = output_base
669
794
  ppresults = process_batch_results(options)
670
795
  html_output_file = ppresults.output_html_file
671
- path_utils.open_file(html_output_file)
672
-
673
-
674
-
675
- #%% RDE (sample directory collapsing)
676
-
677
- #
678
- # The next few cells are about repeat detection elimination; if you want to skip this,
679
- # and still do other stuff in this notebook (e.g. running classifiers), that's fine, but
680
- # the rest of the notebook weakly assumes you've done this. Specifically, it looks for
681
- # the variable "filtered_api_output_file" (a file produced by the RDE process). If you
682
- # don't run the RDE cells, just change "filtered_api_output_file" to "combined_api_output_file"
683
- # (the raw output from MegaDetector). Then it will be like all this RDE stuff doesn't exist.
684
- #
685
- # Though FWIW, once you're sufficiently power-user-ish to use this notebook, RDE is almost
686
- # always worth it.
687
- #
688
-
689
- def remove_overflow_folders(relative_path):
690
- """
691
- This is a sample function that returns a camera name given an image path. By
692
- default in the RDE process, leaf-node folders are equivalent to cameras. To map
693
- something other than leaf-node folders to cameras, fill in this function, and un-comment the
694
- line below containing "remove_overflow_folders".
695
-
696
- Sample regular expressions are included here for common patterns, particularly the
697
- overflow folders created by Reconyx and Bushnell camera traps. So if one of those
698
- fits your scenario, you don't have to modify this function, just un-comment the line
699
- below that enables this feature.
700
-
701
- Nothing bad happens if you have overflow folders like this and you don't
702
- enable this mapping, you are just taking a more conservative approach to RDE in that
703
- scenario.
704
- """
705
-
706
- import re
707
-
708
- # 100RECNX is the overflow folder style for Reconyx cameras
709
- # 100EK113 is (for some reason) the overflow folder style for Bushnell cameras
710
- # 100_BTCF is the overflow folder style for Browning cameras
711
- patterns = ['\/\d+RECNX\/','\/\d+EK\d+\/','\/\d+_BTCF\/']
712
-
713
- relative_path = relative_path.replace('\\','/')
714
- for pat in patterns:
715
- relative_path = re.sub(pat,'/',relative_path)
716
- dir_name = os.path.dirname(relative_path)
717
-
718
- return dir_name
719
-
720
-
721
- #%% Test cells for remove_overflow_folders
722
-
723
- if False:
724
-
725
- pass
726
-
727
- #%% Test the generic cases
728
-
729
- relative_path = 'a/b/c/d/100EK113/blah.jpg'
730
- print(remove_overflow_folders(relative_path))
731
-
732
- relative_path = 'a/b/c/d/100RECNX/blah.jpg'
733
- print(remove_overflow_folders(relative_path))
734
-
735
-
736
- #%% Test remove_overflow_folders on the current dataset
737
-
738
- with open(combined_api_output_file,'r') as f:
739
- d = json.load(f)
740
- image_filenames = [im['file'] for im in d['images']]
741
-
742
- dir_names = set()
743
-
744
- # relative_path = image_filenames[0]
745
- for relative_path in tqdm(image_filenames):
746
- dir_name = remove_overflow_folders(relative_path)
747
- dir_names.add(dir_name)
748
-
749
- dir_names = list(dir_names)
750
- dir_names.sort()
751
-
752
- for s in dir_names:
753
- print(s)
796
+ path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
797
+ # import clipboard; clipboard.copy(html_output_file)
754
798
 
755
799
 
756
800
  #%% Repeat detection elimination, phase 1
@@ -762,7 +806,7 @@ task_index = 0
762
806
 
763
807
  options = repeat_detections_core.RepeatDetectionOptions()
764
808
 
765
- options.confidenceMin = 0.15
809
+ options.confidenceMin = 0.1
766
810
  options.confidenceMax = 1.01
767
811
  options.iouThreshold = 0.85
768
812
  options.occurrenceThreshold = 15
@@ -779,13 +823,13 @@ options.otherDetectionsThreshold = options.confidenceMin
779
823
 
780
824
  options.bRenderDetectionTiles = True
781
825
  options.maxOutputImageWidth = 2000
782
- options.detectionTilesMaxCrops = 1000
826
+ options.detectionTilesMaxCrops = 300
783
827
 
784
828
  # options.lineThickness = 5
785
829
  # options.boxExpansion = 8
786
830
 
787
831
  # To invoke custom collapsing of folders for a particular manufacturer's naming scheme
788
- # options.customDirNameFunction = remove_overflow_folders; overflow_folder_handling_enabled = True
832
+ options.customDirNameFunction = relative_path_to_location
789
833
 
790
834
  options.bRenderHtml = False
791
835
  options.imageBase = input_path
@@ -810,9 +854,9 @@ options.debugMaxRenderInstance = -1
810
854
  # Can be None, 'xsort', or 'clustersort'
811
855
  options.smartSort = 'xsort'
812
856
 
813
- suspiciousDetectionResults = repeat_detections_core.find_repeat_detections(combined_api_output_file,
814
- None,
815
- options)
857
+ suspicious_detection_results = repeat_detections_core.find_repeat_detections(combined_api_output_file,
858
+ outputFilename=None,
859
+ options=options)
816
860
 
817
861
 
818
862
  #%% Manual RDE step
@@ -820,7 +864,8 @@ suspiciousDetectionResults = repeat_detections_core.find_repeat_detections(combi
820
864
  ## DELETE THE VALID DETECTIONS ##
821
865
 
822
866
  # If you run this line, it will open the folder up in your file browser
823
- path_utils.open_file(os.path.dirname(suspiciousDetectionResults.filterFile))
867
+ path_utils.open_file(os.path.dirname(suspicious_detection_results.filterFile),
868
+ attempt_to_open_in_wsl_host=True)
824
869
 
825
870
  #
826
871
  # If you ran the previous cell, but then you change your mind and you don't want to do
@@ -828,7 +873,7 @@ path_utils.open_file(os.path.dirname(suspiciousDetectionResults.filterFile))
828
873
  # previous cell. If you do that, you're implicitly telling the notebook that you looked
829
874
  # at everything in that folder, and confirmed there were no red boxes on animals.
830
875
  #
831
- # Instead, either change "filtered_api_output_file" below to "combined_api_output_file",
876
+ # Instead, either change "filtered_output_filename" below to "combined_api_output_file",
832
877
  # or delete *all* the images in the filtering folder.
833
878
  #
834
879
 
@@ -837,12 +882,13 @@ path_utils.open_file(os.path.dirname(suspiciousDetectionResults.filterFile))
837
882
 
838
883
  from api.batch_processing.postprocessing.repeat_detection_elimination import remove_repeat_detections
839
884
 
840
- filtered_output_filename = path_utils.insert_before_extension(combined_api_output_file, 'filtered_{}'.format(rde_string))
885
+ filtered_output_filename = path_utils.insert_before_extension(combined_api_output_file,
886
+ 'filtered_{}'.format(rde_string))
841
887
 
842
888
  remove_repeat_detections.remove_repeat_detections(
843
889
  inputFile=combined_api_output_file,
844
890
  outputFile=filtered_output_filename,
845
- filteringDir=os.path.dirname(suspiciousDetectionResults.filterFile)
891
+ filteringDir=os.path.dirname(suspicious_detection_results.filterFile)
846
892
  )
847
893
 
848
894
 
@@ -858,8 +904,8 @@ options.confidence_threshold = 0.2
858
904
  options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
859
905
  options.ground_truth_json_file = None
860
906
  options.separate_detections_by_category = True
861
- # options.sample_seed = 0
862
- # options.max_figures_per_html_file = 5000
907
+ options.sample_seed = 0
908
+ options.max_figures_per_html_file = 5000
863
909
 
864
910
  options.parallelize_rendering = True
865
911
  options.parallelize_rendering_n_cores = default_workers_for_parallel_tasks
@@ -884,7 +930,8 @@ options.output_dir = output_base
884
930
  ppresults = process_batch_results(options)
885
931
  html_output_file = ppresults.output_html_file
886
932
 
887
- path_utils.open_file(html_output_file)
933
+ path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
934
+ # import clipboard; clipboard.copy(html_output_file)
888
935
 
889
936
 
890
937
  #%% Run MegaClassifier (actually, write out a script that runs MegaClassifier)
@@ -893,6 +940,11 @@ path_utils.open_file(html_output_file)
893
940
  final_output_path_mc = None
894
941
  final_output_path_ic = None
895
942
 
943
+ # If we didn't do RDE
944
+ if filtered_output_filename is None:
945
+ print("Warning: it looks like you didn't do RDE, using the raw output file")
946
+ filtered_output_filename = combined_api_output_file
947
+
896
948
  classifier_name_short = 'megaclassifier'
897
949
  threshold_str = '0.15' # 0.6
898
950
  classifier_name = 'megaclassifier_v0.1_efficientnet-b3'
@@ -1080,7 +1132,6 @@ with open(output_file,'w') as f:
1080
1132
  for s in commands:
1081
1133
  f.write('{}'.format(s))
1082
1134
 
1083
- import stat
1084
1135
  st = os.stat(output_file)
1085
1136
  os.chmod(output_file, st.st_mode | stat.S_IEXEC)
1086
1137
 
@@ -1250,8 +1301,6 @@ os.chmod(output_file, st.st_mode | stat.S_IEXEC)
1250
1301
 
1251
1302
  #%% Within-image classification smoothing
1252
1303
 
1253
- from collections import defaultdict
1254
-
1255
1304
  #
1256
1305
  # Only count detections with a classification confidence threshold above
1257
1306
  # *classification_confidence_threshold*, which in practice means we're only
@@ -1510,7 +1559,7 @@ else:
1510
1559
  import datetime
1511
1560
  from data_management.read_exif import parse_exif_datetime_string
1512
1561
 
1513
- min_valid_timestamp_year = 2015
1562
+ min_valid_timestamp_year = 2001
1514
1563
 
1515
1564
  now = datetime.datetime.now()
1516
1565
 
@@ -1528,12 +1577,13 @@ for exif_result in tqdm(exif_results):
1528
1577
 
1529
1578
  # By default we assume that each leaf-node folder is a location
1530
1579
  if overflow_folder_handling_enabled:
1531
- im['location'] = remove_overflow_folders(os.path.dirname(exif_result['file_name']))
1580
+ im['location'] = relative_path_to_location(os.path.dirname(exif_result['file_name']))
1532
1581
  else:
1533
1582
  im['location'] = os.path.dirname(exif_result['file_name'])
1534
1583
 
1535
1584
  im['file_name'] = exif_result['file_name']
1536
1585
  im['id'] = im['file_name']
1586
+
1537
1587
  if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
1538
1588
  (exif_datetime_tag not in exif_result['exif_tags']):
1539
1589
  exif_dt = None
@@ -1567,7 +1617,7 @@ for exif_result in tqdm(exif_results):
1567
1617
 
1568
1618
  # ...for each exif image result
1569
1619
 
1570
- print('Parsed EXIF datetime information, unable to parse EXIF data from {} of {} images'.format(
1620
+ print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
1571
1621
  len(images_without_datetime),len(exif_results)))
1572
1622
 
1573
1623
 
@@ -1633,7 +1683,7 @@ min_dominant_class_classifications_above_threshold_for_class_smoothing = 5 # 2
1633
1683
  max_secondary_class_classifications_above_threshold_for_class_smoothing = 5
1634
1684
 
1635
1685
  # If the ratio between a dominant class and a secondary class count is greater than this,
1636
- # regardless of the secondary class count, switch those classificaitons (i.e., ignore
1686
+ # regardless of the secondary class count, switch those classifications (i.e., ignore
1637
1687
  # max_secondary_class_classifications_above_threshold_for_class_smoothing).
1638
1688
  #
1639
1689
  # This may be different for different dominant classes, e.g. if we see lots of cows, they really
@@ -1935,7 +1985,7 @@ options.classification_confidence_threshold = 0.7
1935
1985
  options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
1936
1986
  options.ground_truth_json_file = None
1937
1987
  options.separate_detections_by_category = True
1938
- # options.max_figures_per_html_file = 5000
1988
+ options.max_figures_per_html_file = 2500
1939
1989
 
1940
1990
  options.parallelize_rendering = True
1941
1991
  options.parallelize_rendering_n_cores = default_workers_for_parallel_tasks
@@ -1953,8 +2003,8 @@ print('Processing {} to {}'.format(base_task_name, output_base))
1953
2003
  options.api_output_file = sequence_smoothed_classification_file
1954
2004
  options.output_dir = output_base
1955
2005
  ppresults = process_batch_results(options)
1956
- path_utils.open_file(ppresults.output_html_file)
1957
-
2006
+ path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True)
2007
+ # import clipboard; clipboard.copy(ppresults.output_html_file)
1958
2008
 
1959
2009
  #% Zip .json files
1960
2010
 
@@ -2021,7 +2071,7 @@ for i, j in itertools.combinations(list(range(0,len(filenames))),2):
2021
2071
  results = compare_batch_results(options)
2022
2072
 
2023
2073
  from md_utils.path_utils import open_file
2024
- open_file(results.html_output_file)
2074
+ open_file(results.html_output_file,attempt_to_open_in_wsl_host=True)
2025
2075
 
2026
2076
 
2027
2077
  #%% Merge in high-confidence detections from another results file
@@ -2048,15 +2098,17 @@ from api.batch_processing.postprocessing import categorize_detections_by_size
2048
2098
 
2049
2099
  size_options = categorize_detections_by_size.SizeCategorizationOptions()
2050
2100
 
2051
- # This is a size threshold, not a confidence threshold
2052
- size_options.threshold = 0.9
2053
- size_options.output_category_name = 'large_detections'
2054
- # size_options.categories_to_separate = [3]
2101
+ size_options.size_thresholds = [0.9]
2102
+ size_options.size_category_names = ['large_detections']
2103
+
2104
+ size_options.categories_to_separate = [1]
2055
2105
  size_options.measurement = 'size' # 'width'
2056
2106
 
2107
+ threshold_string = '-'.join([str(x) for x in size_options.size_thresholds])
2108
+
2057
2109
  input_file = filtered_output_filename
2058
2110
  size_separated_file = input_file.replace('.json','-size-separated-{}.json'.format(
2059
- size_options.threshold))
2111
+ threshold_string))
2060
2112
  d = categorize_detections_by_size.categorize_detections_by_size(input_file,size_separated_file,
2061
2113
  size_options)
2062
2114
 
@@ -2064,7 +2116,7 @@ d = categorize_detections_by_size.categorize_detections_by_size(input_file,size_
2064
2116
  #%% Preview large boxes
2065
2117
 
2066
2118
  output_base_large_boxes = os.path.join(postprocessing_output_folder,
2067
- base_task_name + '_{}_{:.3f}_large_boxes'.format(rde_string, options.confidence_threshold))
2119
+ base_task_name + '_{}_{:.3f}_size_separated_boxes'.format(rde_string, options.confidence_threshold))
2068
2120
  os.makedirs(output_base_large_boxes, exist_ok=True)
2069
2121
  print('Processing post-RDE, post-size-separation to {}'.format(output_base_large_boxes))
2070
2122
 
@@ -2073,7 +2125,7 @@ options.output_dir = output_base_large_boxes
2073
2125
 
2074
2126
  ppresults = process_batch_results(options)
2075
2127
  html_output_file = ppresults.output_html_file
2076
- path_utils.open_file(html_output_file)
2128
+ path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
2077
2129
 
2078
2130
 
2079
2131
  #%% .json splitting
@@ -2086,12 +2138,6 @@ from api.batch_processing.postprocessing.subset_json_detector_output import (
2086
2138
  input_filename = filtered_output_filename
2087
2139
  output_base = os.path.join(combined_api_output_folder,base_task_name + '_json_subsets')
2088
2140
 
2089
- if False:
2090
- if data is None:
2091
- with open(input_filename) as f:
2092
- data = json.load(f)
2093
- print('Data set contains {} images'.format(len(data['images'])))
2094
-
2095
2141
  print('Processing file {} to {}'.format(input_filename,output_base))
2096
2142
 
2097
2143
  options = SubsetJsonDetectorOutputOptions()
@@ -2185,65 +2231,45 @@ options.allow_existing_directory = False
2185
2231
  separate_detections_into_folders(options)
2186
2232
 
2187
2233
 
2188
- #%% Generate commands for a subset of tasks
2234
+ #%% Convert frame-level results to video-level results
2189
2235
 
2190
- task_set = [8,10,12,14,16]; gpu_number = 0; sleep_time_between_tasks = 60; sleep_time_before_tasks = 0
2191
- commands = []
2236
+ # This cell is only useful if the files submitted to this job were generated via
2237
+ # video_folder_to_frames().
2192
2238
 
2193
- # i_task = 8
2194
- for i_task in task_set:
2195
-
2196
- if i_task == task_set[0]:
2197
- commands.append('sleep {}'.format(str(sleep_time_before_tasks)))
2198
-
2199
- task = task_info[i_task]
2200
- chunk_file = task['input_file']
2201
- output_fn = chunk_file.replace('.json','_results.json')
2202
-
2203
- task['output_file'] = output_fn
2239
+ from detection.video_utils import frame_results_to_video_results
2204
2240
 
2205
- cuda_string = f'CUDA_VISIBLE_DEVICES={gpu_number}'
2206
-
2207
- checkpoint_frequency_string = ''
2208
- checkpoint_path_string = ''
2209
- if checkpoint_frequency is not None and checkpoint_frequency > 0:
2210
- checkpoint_frequency_string = f'--checkpoint_frequency {checkpoint_frequency}'
2211
- checkpoint_path_string = '--checkpoint_path {}'.format(chunk_file.replace(
2212
- '.json','_checkpoint.json'))
2213
-
2214
- use_image_queue_string = ''
2215
- if (use_image_queue):
2216
- use_image_queue_string = '--use_image_queue'
2241
+ video_output_filename = filtered_output_filename.replace('.json','_aggregated.json')
2242
+ frame_results_to_video_results(filtered_output_filename,video_output_filename)
2217
2243
 
2218
- ncores_string = ''
2219
- if (ncores > 1):
2220
- ncores_string = '--ncores {}'.format(ncores)
2221
-
2222
- quiet_string = ''
2223
- if quiet_mode:
2224
- quiet_string = '--quiet'
2225
-
2226
- cmd = f'{cuda_string} python run_detector_batch.py {model_file} {chunk_file} {output_fn} {checkpoint_frequency_string} {checkpoint_path_string} {use_image_queue_string} {ncores_string} {quiet_string}'
2227
-
2228
- task['command'] = cmd
2229
- commands.append(cmd)
2230
- if i_task != task_set[-1]:
2231
- commands.append('sleep {}'.format(str(sleep_time_between_tasks)))
2244
+
2245
+ #%% Sample custom path replacement function
2246
+
2247
+ def custom_relative_path_to_location(relative_path):
2232
2248
 
2233
- # ...for each task
2249
+ relative_path = relative_path.replace('\\','/')
2250
+ tokens = relative_path.split('/')
2251
+ location_name = '/'.join(tokens[0:2])
2252
+ return location_name
2234
2253
 
2235
- task_strings = [str(k).zfill(3) for k in task_set]
2236
- task_set_string = '_'.join(task_strings)
2237
- cmd_file = os.path.join(filename_base,'run_chunk_{}_gpu_{}.sh'.format(task_set_string,
2238
- str(gpu_number).zfill(2)))
2239
2254
 
2240
- with open(cmd_file,'w') as f:
2241
- for cmd in commands:
2242
- f.write(cmd + '\n')
2255
+ #%% Test relative_path_to_location on the current dataset
2256
+
2257
+ with open(combined_api_output_file,'r') as f:
2258
+ d = json.load(f)
2259
+ image_filenames = [im['file'] for im in d['images']]
2260
+
2261
+ location_names = set()
2262
+
2263
+ # relative_path = image_filenames[0]
2264
+ for relative_path in tqdm(image_filenames):
2265
+ location_name = relative_path_to_location(relative_path)
2266
+ location_names.add(location_name)
2243
2267
 
2244
- import stat
2245
- st = os.stat(cmd_file)
2246
- os.chmod(cmd_file, st.st_mode | stat.S_IEXEC)
2268
+ location_names = list(location_names)
2269
+ location_names.sort()
2270
+
2271
+ for s in location_names:
2272
+ print(s)
2247
2273
 
2248
2274
 
2249
2275
  #%% End notebook: turn this script into a notebook (how meta!)
@@ -2251,8 +2277,12 @@ os.chmod(cmd_file, st.st_mode | stat.S_IEXEC)
2251
2277
  import os
2252
2278
  import nbformat as nbf
2253
2279
 
2254
- input_py_file = os.path.expanduser(
2255
- '~/git/MegaDetector/api/batch_processing/data_preparation/manage_local_batch.py')
2280
+ if os.name == 'nt':
2281
+ git_base = r'c:\git'
2282
+ else:
2283
+ git_base = os.path.expanduer('~/git')
2284
+
2285
+ input_py_file = git_base + '/MegaDetector/api/batch_processing/data_preparation/manage_local_batch.py'
2256
2286
  assert os.path.isfile(input_py_file)
2257
2287
  output_ipynb_file = input_py_file.replace('.py','.ipynb')
2258
2288
 
@@ -2275,14 +2305,23 @@ i_line = 0
2275
2305
 
2276
2306
  header_comment = ''
2277
2307
 
2308
+ # Delete a few lines from the top that don't belong in the NB version, e.g. the name
2309
+ # of the .py file
2278
2310
  lines_to_ignore = 7
2311
+ expected_first_token = '# This script'
2312
+ found_first_token = False
2279
2313
 
2280
2314
  # Everything before the first cell is the header comment
2281
2315
  while(not lines[i_line].startswith('#%%')):
2316
+
2282
2317
  if i_line < lines_to_ignore:
2283
2318
  i_line += 1
2284
2319
  continue
2285
2320
 
2321
+ if not found_first_token:
2322
+ assert lines[i_line].startswith(expected_first_token)
2323
+ found_first_token = True
2324
+
2286
2325
  s = lines[i_line].replace('#','').strip()
2287
2326
  if len(s) == 0:
2288
2327
  header_comment += '\n\n'