megadetector 10.0.3__py3-none-any.whl → 10.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/animl_to_md.py +158 -0
- megadetector/data_management/cct_json_utils.py +1 -0
- megadetector/data_management/speciesnet_to_md.py +2 -2
- megadetector/data_management/zamba_to_md.py +188 -0
- megadetector/detection/process_video.py +52 -40
- megadetector/detection/pytorch_detector.py +24 -34
- megadetector/detection/run_detector_batch.py +138 -93
- megadetector/detection/run_md_and_speciesnet.py +22 -4
- megadetector/detection/video_utils.py +5 -4
- megadetector/postprocessing/classification_postprocessing.py +26 -10
- megadetector/postprocessing/combine_batch_outputs.py +2 -0
- megadetector/postprocessing/generate_csv_report.py +1 -1
- megadetector/postprocessing/load_api_results.py +1 -1
- megadetector/postprocessing/md_to_wi.py +1 -1
- megadetector/postprocessing/postprocess_batch_results.py +1 -1
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1 -1
- megadetector/postprocessing/separate_detections_into_folders.py +1 -1
- megadetector/postprocessing/subset_json_detector_output.py +1 -3
- megadetector/utils/ct_utils.py +71 -0
- megadetector/utils/md_tests.py +8 -7
- megadetector/utils/path_utils.py +4 -15
- megadetector/utils/wi_platform_utils.py +824 -0
- megadetector/utils/wi_taxonomy_utils.py +1711 -0
- megadetector/visualization/visualization_utils.py +1 -1
- megadetector/visualization/visualize_detector_output.py +7 -5
- megadetector/visualization/visualize_video_output.py +1 -1
- {megadetector-10.0.3.dist-info → megadetector-10.0.5.dist-info}/METADATA +2 -2
- {megadetector-10.0.3.dist-info → megadetector-10.0.5.dist-info}/RECORD +31 -28
- megadetector/utils/wi_utils.py +0 -2674
- {megadetector-10.0.3.dist-info → megadetector-10.0.5.dist-info}/WHEEL +0 -0
- {megadetector-10.0.3.dist-info → megadetector-10.0.5.dist-info}/licenses/LICENSE +0 -0
- {megadetector-10.0.3.dist-info → megadetector-10.0.5.dist-info}/top_level.txt +0 -0
|
@@ -158,9 +158,7 @@ def _producer_func(q,
|
|
|
158
158
|
for im_file in image_files:
|
|
159
159
|
|
|
160
160
|
try:
|
|
161
|
-
|
|
162
|
-
print('Loading image {} on producer {}'.format(im_file,producer_id))
|
|
163
|
-
sys.stdout.flush()
|
|
161
|
+
|
|
164
162
|
image = vis_utils.load_image(im_file)
|
|
165
163
|
|
|
166
164
|
if preprocessor is not None:
|
|
@@ -179,10 +177,6 @@ def _producer_func(q,
|
|
|
179
177
|
print('Producer process: image {} cannot be loaded:\n{}'.format(im_file,str(e)))
|
|
180
178
|
image = run_detector.FAILURE_IMAGE_OPEN
|
|
181
179
|
|
|
182
|
-
if verbose:
|
|
183
|
-
print('Queueing image {} from producer {}'.format(im_file,producer_id))
|
|
184
|
-
sys.stdout.flush()
|
|
185
|
-
|
|
186
180
|
q.put([im_file,image,producer_id])
|
|
187
181
|
|
|
188
182
|
# ...for each image
|
|
@@ -210,7 +204,9 @@ def _consumer_func(q,
|
|
|
210
204
|
detector_options=None,
|
|
211
205
|
preprocess_on_image_queue=default_preprocess_on_image_queue,
|
|
212
206
|
n_total_images=None,
|
|
213
|
-
batch_size=1
|
|
207
|
+
batch_size=1,
|
|
208
|
+
checkpoint_path=None,
|
|
209
|
+
checkpoint_frequency=-1
|
|
214
210
|
):
|
|
215
211
|
"""
|
|
216
212
|
Consumer function; only used when using the (optional) image queue.
|
|
@@ -231,9 +227,14 @@ def _consumer_func(q,
|
|
|
231
227
|
augment (bool, optional): enable image augmentation
|
|
232
228
|
detector_options (dict, optional): key/value pairs that are interpreted differently
|
|
233
229
|
by different detectors
|
|
234
|
-
preprocess_on_image_queue (bool, optional): whether images are already preprocessed on
|
|
230
|
+
preprocess_on_image_queue (bool, optional): whether images are already preprocessed on
|
|
231
|
+
the queue
|
|
235
232
|
n_total_images (int, optional): total number of images expected (for progress bar)
|
|
236
233
|
batch_size (int, optional): batch size for GPU inference
|
|
234
|
+
checkpoint_path (str, optional): path to write checkpoint files, None disables
|
|
235
|
+
checkpointing
|
|
236
|
+
checkpoint_frequency (int, optional): write checkpoint every N images, -1 disables
|
|
237
|
+
checkpointing
|
|
237
238
|
"""
|
|
238
239
|
|
|
239
240
|
if verbose:
|
|
@@ -257,6 +258,25 @@ def _consumer_func(q,
|
|
|
257
258
|
|
|
258
259
|
n_images_processed = 0
|
|
259
260
|
n_queues_finished = 0
|
|
261
|
+
last_checkpoint_count = 0
|
|
262
|
+
|
|
263
|
+
def _should_write_checkpoint():
|
|
264
|
+
"""
|
|
265
|
+
Check whether we should write a checkpoint. Returns True if we've crossed a
|
|
266
|
+
checkpoint boundary.
|
|
267
|
+
"""
|
|
268
|
+
|
|
269
|
+
if (checkpoint_frequency <= 0) or (checkpoint_path is None):
|
|
270
|
+
return False
|
|
271
|
+
|
|
272
|
+
# Calculate the checkpoint threshold we should have crossed
|
|
273
|
+
current_checkpoint_threshold = \
|
|
274
|
+
(n_images_processed // checkpoint_frequency) * checkpoint_frequency
|
|
275
|
+
last_checkpoint_threshold = \
|
|
276
|
+
(last_checkpoint_count // checkpoint_frequency) * checkpoint_frequency
|
|
277
|
+
|
|
278
|
+
# We should write a checkpoint if we've crossed into a new checkpoint interval
|
|
279
|
+
return (current_checkpoint_threshold > last_checkpoint_threshold)
|
|
260
280
|
|
|
261
281
|
pbar = None
|
|
262
282
|
if n_total_images is not None:
|
|
@@ -314,6 +334,10 @@ def _consumer_func(q,
|
|
|
314
334
|
|
|
315
335
|
n_images_processed += len(leftover_batch)
|
|
316
336
|
|
|
337
|
+
# In theory we could write a checkpoint here, but because we're basically
|
|
338
|
+
# done at this point, there's not much upside to writing another checkpoint,
|
|
339
|
+
# so for simplicity, I'm skipping it.
|
|
340
|
+
|
|
317
341
|
# ...for each batch we have left to process
|
|
318
342
|
|
|
319
343
|
return_queue.put(results)
|
|
@@ -334,16 +358,6 @@ def _consumer_func(q,
|
|
|
334
358
|
im_file = r[0]
|
|
335
359
|
image = r[1]
|
|
336
360
|
|
|
337
|
-
# This block is sometimes useful for debugging, so I'm leaving it here, but if'd out
|
|
338
|
-
if False:
|
|
339
|
-
if verbose or ((n_images_processed % n_queue_print) == 1):
|
|
340
|
-
elapsed = time.time() - start_time
|
|
341
|
-
images_per_second = n_images_processed / elapsed
|
|
342
|
-
print('De-queued image {} ({:.2f}/s) ({})'.format(n_images_processed,
|
|
343
|
-
images_per_second,
|
|
344
|
-
im_file))
|
|
345
|
-
sys.stdout.flush()
|
|
346
|
-
|
|
347
361
|
# Handle failed images immediately (don't batch them)
|
|
348
362
|
#
|
|
349
363
|
# Loader workers communicate failures by passing a string to
|
|
@@ -418,10 +432,14 @@ def _consumer_func(q,
|
|
|
418
432
|
|
|
419
433
|
# ...if we are/aren't doing batch processing
|
|
420
434
|
|
|
421
|
-
|
|
435
|
+
# Write checkpoint if necessary
|
|
436
|
+
if _should_write_checkpoint():
|
|
437
|
+
print('Consumer: writing checkpoint after {} images'.format(
|
|
438
|
+
n_images_processed))
|
|
439
|
+
write_checkpoint(checkpoint_path, results)
|
|
440
|
+
last_checkpoint_count = n_images_processed
|
|
422
441
|
|
|
423
|
-
|
|
424
|
-
print('Processed image {}'.format(im_file)); sys.stdout.flush()
|
|
442
|
+
# ...whether we received a string (indicating failure) or an image from the loader worker
|
|
425
443
|
|
|
426
444
|
q.task_done()
|
|
427
445
|
|
|
@@ -442,7 +460,9 @@ def _run_detector_with_image_queue(image_files,
|
|
|
442
460
|
detector_options=None,
|
|
443
461
|
loader_workers=default_loaders,
|
|
444
462
|
preprocess_on_image_queue=default_preprocess_on_image_queue,
|
|
445
|
-
batch_size=1
|
|
463
|
+
batch_size=1,
|
|
464
|
+
checkpoint_path=None,
|
|
465
|
+
checkpoint_frequency=-1):
|
|
446
466
|
"""
|
|
447
467
|
Driver function for the (optional) multiprocessing-based image queue. Spawns workers to read and
|
|
448
468
|
preprocess images, runs the consumer function in the calling process.
|
|
@@ -466,6 +486,8 @@ def _run_detector_with_image_queue(image_files,
|
|
|
466
486
|
preprocess_on_image_queue (bool, optional): if the image queue is enabled, should it handle
|
|
467
487
|
image loading and preprocessing (True), or just image loading (False)?
|
|
468
488
|
batch_size (int, optional): batch size for GPU processing
|
|
489
|
+
checkpoint_path (str, optional): path to write checkpoint files, None disables checkpointing
|
|
490
|
+
checkpoint_frequency (int, optional): write checkpoint every N images, -1 disables checkpointing
|
|
469
491
|
|
|
470
492
|
Returns:
|
|
471
493
|
list: list of dicts in the format returned by process_image()
|
|
@@ -536,7 +558,9 @@ def _run_detector_with_image_queue(image_files,
|
|
|
536
558
|
detector_options,
|
|
537
559
|
preprocess_on_image_queue,
|
|
538
560
|
n_total_images,
|
|
539
|
-
batch_size
|
|
561
|
+
batch_size,
|
|
562
|
+
checkpoint_path,
|
|
563
|
+
checkpoint_frequency))
|
|
540
564
|
else:
|
|
541
565
|
consumer = Process(target=_consumer_func,args=(q,
|
|
542
566
|
return_queue,
|
|
@@ -551,7 +575,9 @@ def _run_detector_with_image_queue(image_files,
|
|
|
551
575
|
detector_options,
|
|
552
576
|
preprocess_on_image_queue,
|
|
553
577
|
n_total_images,
|
|
554
|
-
batch_size
|
|
578
|
+
batch_size,
|
|
579
|
+
checkpoint_path,
|
|
580
|
+
checkpoint_frequency))
|
|
555
581
|
consumer.daemon = True
|
|
556
582
|
consumer.start()
|
|
557
583
|
else:
|
|
@@ -568,7 +594,9 @@ def _run_detector_with_image_queue(image_files,
|
|
|
568
594
|
detector_options,
|
|
569
595
|
preprocess_on_image_queue,
|
|
570
596
|
n_total_images,
|
|
571
|
-
batch_size
|
|
597
|
+
batch_size,
|
|
598
|
+
checkpoint_path,
|
|
599
|
+
checkpoint_frequency)
|
|
572
600
|
|
|
573
601
|
for i_producer,producer in enumerate(producers):
|
|
574
602
|
producer.join()
|
|
@@ -665,9 +693,6 @@ def _process_batch(image_items_batch,
|
|
|
665
693
|
list of dict: list of results for each image in the batch
|
|
666
694
|
"""
|
|
667
695
|
|
|
668
|
-
if (verbose):
|
|
669
|
-
print('_process_batch called with {} items'.format(len(image_items_batch)))
|
|
670
|
-
|
|
671
696
|
# This will be the set of items we send for inference; it may be
|
|
672
697
|
# smaller than the input list (image_items_batch) if some images
|
|
673
698
|
# fail to load. [valid_images] will be either a list of PIL Image
|
|
@@ -703,9 +728,6 @@ def _process_batch(image_items_batch,
|
|
|
703
728
|
|
|
704
729
|
assert len(valid_images) == len(valid_image_filenames)
|
|
705
730
|
|
|
706
|
-
if verbose:
|
|
707
|
-
print('_process_batch found {} valid items in batch'.format(len(valid_images)))
|
|
708
|
-
|
|
709
731
|
valid_batch_results = []
|
|
710
732
|
|
|
711
733
|
# Process the batch if we have any valid images
|
|
@@ -785,9 +807,6 @@ def _process_batch(image_items_batch,
|
|
|
785
807
|
|
|
786
808
|
batch_results.extend(valid_batch_results)
|
|
787
809
|
|
|
788
|
-
if verbose:
|
|
789
|
-
print('_process batch returning results for {} items'.format(len(batch_results)))
|
|
790
|
-
|
|
791
810
|
return batch_results
|
|
792
811
|
|
|
793
812
|
# ...def _process_batch(...)
|
|
@@ -1153,30 +1172,39 @@ def load_and_run_detector_batch(model_file,
|
|
|
1153
1172
|
|
|
1154
1173
|
if use_image_queue:
|
|
1155
1174
|
|
|
1156
|
-
assert checkpoint_frequency < 0, \
|
|
1157
|
-
'Using an image queue is not currently supported when checkpointing is enabled'
|
|
1158
|
-
assert len(results) == 0, \
|
|
1159
|
-
'Using an image queue with results loaded from a checkpoint is not currently supported'
|
|
1160
1175
|
assert n_cores <= 1
|
|
1161
1176
|
|
|
1162
|
-
#
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
+
# Filter out already processed images
|
|
1178
|
+
images_to_process = [im_file for im_file in image_file_names
|
|
1179
|
+
if im_file not in already_processed]
|
|
1180
|
+
|
|
1181
|
+
if len(images_to_process) != len(image_file_names):
|
|
1182
|
+
print('Bypassing {} images that have already been processed'.format(
|
|
1183
|
+
len(image_file_names) - len(images_to_process)))
|
|
1184
|
+
|
|
1185
|
+
new_results = _run_detector_with_image_queue(images_to_process,
|
|
1186
|
+
model_file,
|
|
1187
|
+
confidence_threshold,
|
|
1188
|
+
quiet,
|
|
1189
|
+
image_size=image_size,
|
|
1190
|
+
include_image_size=include_image_size,
|
|
1191
|
+
include_image_timestamp=include_image_timestamp,
|
|
1192
|
+
include_exif_data=include_exif_data,
|
|
1193
|
+
augment=augment,
|
|
1194
|
+
detector_options=detector_options,
|
|
1195
|
+
loader_workers=loader_workers,
|
|
1196
|
+
preprocess_on_image_queue=preprocess_on_image_queue,
|
|
1197
|
+
batch_size=batch_size,
|
|
1198
|
+
checkpoint_path=checkpoint_path,
|
|
1199
|
+
checkpoint_frequency=checkpoint_frequency)
|
|
1200
|
+
|
|
1201
|
+
# Merge new results with existing results from checkpoint
|
|
1202
|
+
results.extend(new_results)
|
|
1177
1203
|
|
|
1178
1204
|
elif n_cores <= 1:
|
|
1179
1205
|
|
|
1206
|
+
# Single-threaded processing, no image queue
|
|
1207
|
+
|
|
1180
1208
|
# Load the detector
|
|
1181
1209
|
start_time = time.time()
|
|
1182
1210
|
detector = load_detector(model_file,
|
|
@@ -1233,7 +1261,7 @@ def load_and_run_detector_batch(model_file,
|
|
|
1233
1261
|
if (checkpoint_frequency != -1) and ((image_count % checkpoint_frequency) == 0):
|
|
1234
1262
|
print('Writing a new checkpoint after having processed {} images since '
|
|
1235
1263
|
'last restart'.format(image_count))
|
|
1236
|
-
|
|
1264
|
+
write_checkpoint(checkpoint_path, results)
|
|
1237
1265
|
|
|
1238
1266
|
else:
|
|
1239
1267
|
|
|
@@ -1257,7 +1285,7 @@ def load_and_run_detector_batch(model_file,
|
|
|
1257
1285
|
if (checkpoint_frequency != -1) and ((image_count % checkpoint_frequency) == 0):
|
|
1258
1286
|
print('Writing a new checkpoint after having processed {} images since '
|
|
1259
1287
|
'last restart'.format(image_count))
|
|
1260
|
-
|
|
1288
|
+
write_checkpoint(checkpoint_path, results)
|
|
1261
1289
|
|
|
1262
1290
|
# ...if the batch size is > 1
|
|
1263
1291
|
|
|
@@ -1291,9 +1319,9 @@ def load_and_run_detector_batch(model_file,
|
|
|
1291
1319
|
|
|
1292
1320
|
checkpoint_queue = Manager().Queue()
|
|
1293
1321
|
|
|
1294
|
-
# Pass the "results" array (which may already contain images loaded from an
|
|
1295
|
-
|
|
1296
|
-
# the list as they become available.
|
|
1322
|
+
# Pass the "results" array (which may already contain images loaded from an
|
|
1323
|
+
# existing checkpoint) to the checkpoint queue handler function, which will
|
|
1324
|
+
# append results to the list as they become available.
|
|
1297
1325
|
checkpoint_thread = Thread(target=_checkpoint_queue_handler,
|
|
1298
1326
|
args=(checkpoint_path, checkpoint_frequency,
|
|
1299
1327
|
checkpoint_queue, results), daemon=True)
|
|
@@ -1337,7 +1365,7 @@ def load_and_run_detector_batch(model_file,
|
|
|
1337
1365
|
|
|
1338
1366
|
# Append the results we just computed to "results", which is *usually* empty, but will
|
|
1339
1367
|
# be non-empty if we resumed from a checkpoint
|
|
1340
|
-
results
|
|
1368
|
+
results.extend(new_results)
|
|
1341
1369
|
|
|
1342
1370
|
# ...if checkpointing is/isn't enabled
|
|
1343
1371
|
|
|
@@ -1376,12 +1404,18 @@ def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_
|
|
|
1376
1404
|
print('Writing a new checkpoint after having processed {} images since '
|
|
1377
1405
|
'last restart'.format(result_count))
|
|
1378
1406
|
|
|
1379
|
-
|
|
1407
|
+
write_checkpoint(checkpoint_path, results)
|
|
1380
1408
|
|
|
1381
1409
|
|
|
1382
|
-
def
|
|
1410
|
+
def write_checkpoint(checkpoint_path, results):
|
|
1383
1411
|
"""
|
|
1384
|
-
Writes the
|
|
1412
|
+
Writes the object in [results] to a json checkpoint file, as a dict with the
|
|
1413
|
+
key "checkpoint". First backs up the checkpoint file if it exists, in case we
|
|
1414
|
+
crash while writing the file.
|
|
1415
|
+
|
|
1416
|
+
Args:
|
|
1417
|
+
checkpoint_path (str): the file to write the checkpoint to
|
|
1418
|
+
results (object): the object we should write
|
|
1385
1419
|
"""
|
|
1386
1420
|
|
|
1387
1421
|
assert checkpoint_path is not None
|
|
@@ -1394,11 +1428,41 @@ def _write_checkpoint(checkpoint_path, results):
|
|
|
1394
1428
|
shutil.copyfile(checkpoint_path,checkpoint_tmp_path)
|
|
1395
1429
|
|
|
1396
1430
|
# Write the new checkpoint
|
|
1397
|
-
ct_utils.write_json(checkpoint_path, {'
|
|
1431
|
+
ct_utils.write_json(checkpoint_path, {'checkpoint': results}, force_str=True)
|
|
1398
1432
|
|
|
1399
1433
|
# Remove the backup checkpoint if it exists
|
|
1400
1434
|
if checkpoint_tmp_path is not None:
|
|
1401
|
-
|
|
1435
|
+
try:
|
|
1436
|
+
os.remove(checkpoint_tmp_path)
|
|
1437
|
+
except Exception as e:
|
|
1438
|
+
print('Warning: error removing backup checkpoint file {}:\n{}'.format(
|
|
1439
|
+
checkpoint_tmp_path,str(e)))
|
|
1440
|
+
|
|
1441
|
+
|
|
1442
|
+
def load_checkpoint(checkpoint_path):
|
|
1443
|
+
"""
|
|
1444
|
+
Loads results from a checkpoint file. A checkpoint file is always a dict
|
|
1445
|
+
with the key "checkpoint".
|
|
1446
|
+
|
|
1447
|
+
Args:
|
|
1448
|
+
checkpoint_path (str): the .json file to load
|
|
1449
|
+
|
|
1450
|
+
Returns:
|
|
1451
|
+
object: object retrieved from the checkpoint, typically a list of results
|
|
1452
|
+
"""
|
|
1453
|
+
|
|
1454
|
+
print('Loading previous results from checkpoint file {}'.format(checkpoint_path))
|
|
1455
|
+
|
|
1456
|
+
with open(checkpoint_path, 'r') as f:
|
|
1457
|
+
checkpoint_data = json.load(f)
|
|
1458
|
+
|
|
1459
|
+
if 'checkpoint' not in checkpoint_data:
|
|
1460
|
+
raise ValueError('Checkpoint file {} is missing "checkpoint" field'.format(checkpoint_path))
|
|
1461
|
+
|
|
1462
|
+
results = checkpoint_data['checkpoint']
|
|
1463
|
+
print('Restored {} entries from the checkpoint {}'.format(len(results),checkpoint_path))
|
|
1464
|
+
|
|
1465
|
+
return results
|
|
1402
1466
|
|
|
1403
1467
|
|
|
1404
1468
|
def get_image_datetime(image):
|
|
@@ -1585,8 +1649,6 @@ if False:
|
|
|
1585
1649
|
cmd += ' --output_relative_filenames'
|
|
1586
1650
|
if include_max_conf:
|
|
1587
1651
|
cmd += ' --include_max_conf'
|
|
1588
|
-
if quiet:
|
|
1589
|
-
cmd += ' --quiet'
|
|
1590
1652
|
if image_size is not None:
|
|
1591
1653
|
cmd += ' --image_size {}'.format(image_size)
|
|
1592
1654
|
if use_image_queue:
|
|
@@ -1670,10 +1732,6 @@ def main(): # noqa
|
|
|
1670
1732
|
'--include_max_conf',
|
|
1671
1733
|
action='store_true',
|
|
1672
1734
|
help='Include the "max_detection_conf" field in the output')
|
|
1673
|
-
parser.add_argument(
|
|
1674
|
-
'--quiet',
|
|
1675
|
-
action='store_true',
|
|
1676
|
-
help='Suppress per-image console output')
|
|
1677
1735
|
parser.add_argument(
|
|
1678
1736
|
'--verbose',
|
|
1679
1737
|
action='store_true',
|
|
@@ -1796,6 +1854,12 @@ def main(): # noqa
|
|
|
1796
1854
|
default=1,
|
|
1797
1855
|
help='Batch size for GPU inference (default 1). CPU inference will ignore this and use batch_size=1.')
|
|
1798
1856
|
|
|
1857
|
+
# This argument is deprecated, we always use what was formerly "quiet mode"
|
|
1858
|
+
parser.add_argument(
|
|
1859
|
+
'--quiet',
|
|
1860
|
+
action='store_true',
|
|
1861
|
+
help=argparse.SUPPRESS)
|
|
1862
|
+
|
|
1799
1863
|
if len(sys.argv[1:]) == 0:
|
|
1800
1864
|
parser.print_help()
|
|
1801
1865
|
parser.exit()
|
|
@@ -1857,7 +1921,7 @@ def main(): # noqa
|
|
|
1857
1921
|
# Load the checkpoint if available
|
|
1858
1922
|
#
|
|
1859
1923
|
# File paths in the checkpoint are always absolute paths; conversion to relative paths
|
|
1860
|
-
# happens
|
|
1924
|
+
# (if requested) happens at the time results are exported at the end of a job.
|
|
1861
1925
|
if args.resume_from_checkpoint is not None:
|
|
1862
1926
|
if args.resume_from_checkpoint == 'auto':
|
|
1863
1927
|
checkpoint_files = os.listdir(output_dir)
|
|
@@ -1875,16 +1939,7 @@ def main(): # noqa
|
|
|
1875
1939
|
checkpoint_file = os.path.join(output_dir,checkpoint_file_relative)
|
|
1876
1940
|
else:
|
|
1877
1941
|
checkpoint_file = args.resume_from_checkpoint
|
|
1878
|
-
|
|
1879
|
-
'File at resume_from_checkpoint specified does not exist'
|
|
1880
|
-
with open(checkpoint_file) as f:
|
|
1881
|
-
print('Loading previous results from checkpoint file {}'.format(
|
|
1882
|
-
checkpoint_file))
|
|
1883
|
-
saved = json.load(f)
|
|
1884
|
-
assert 'images' in saved, \
|
|
1885
|
-
'The checkpoint file does not have the correct fields; cannot be restored'
|
|
1886
|
-
results = saved['images']
|
|
1887
|
-
print('Restored {} entries from the checkpoint'.format(len(results)))
|
|
1942
|
+
results = load_checkpoint(checkpoint_file)
|
|
1888
1943
|
else:
|
|
1889
1944
|
results = []
|
|
1890
1945
|
|
|
@@ -2001,16 +2056,6 @@ def main(): # noqa
|
|
|
2001
2056
|
f'Checkpoint path {checkpoint_path} already exists, delete or move it before ' + \
|
|
2002
2057
|
're-using the same checkpoint path, or specify --allow_checkpoint_overwrite'
|
|
2003
2058
|
|
|
2004
|
-
|
|
2005
|
-
# Confirm that we can write to the checkpoint path; this avoids issues where
|
|
2006
|
-
# we crash after several thousand images.
|
|
2007
|
-
#
|
|
2008
|
-
# But actually, commenting this out for now... the scenario where we are resuming from a
|
|
2009
|
-
# checkpoint, then immediately overwrite that checkpoint with empty data is higher-risk
|
|
2010
|
-
# than the annoyance of crashing a few minutes after starting a job.
|
|
2011
|
-
if False:
|
|
2012
|
-
ct_utils.write_json(checkpoint_path, {'images': []}, indent=None)
|
|
2013
|
-
|
|
2014
2059
|
print('The checkpoint file will be written to {}'.format(checkpoint_path))
|
|
2015
2060
|
|
|
2016
2061
|
else:
|
|
@@ -2030,7 +2075,7 @@ def main(): # noqa
|
|
|
2030
2075
|
results=results,
|
|
2031
2076
|
n_cores=args.ncores,
|
|
2032
2077
|
use_image_queue=args.use_image_queue,
|
|
2033
|
-
quiet=
|
|
2078
|
+
quiet=True,
|
|
2034
2079
|
image_size=args.image_size,
|
|
2035
2080
|
class_mapping_filename=args.class_mapping_filename,
|
|
2036
2081
|
include_image_size=args.include_image_size,
|
|
@@ -31,6 +31,7 @@ from megadetector.utils.ct_utils import round_float
|
|
|
31
31
|
from megadetector.utils.ct_utils import write_json
|
|
32
32
|
from megadetector.utils.ct_utils import make_temp_folder
|
|
33
33
|
from megadetector.utils.ct_utils import is_list_sorted
|
|
34
|
+
from megadetector.utils.ct_utils import is_sphinx_build
|
|
34
35
|
from megadetector.utils import path_utils
|
|
35
36
|
from megadetector.visualization import visualization_utils as vis_utils
|
|
36
37
|
from megadetector.postprocessing.validate_batch_results import \
|
|
@@ -808,8 +809,9 @@ def _run_detection_step(source_folder: str,
|
|
|
808
809
|
|
|
809
810
|
files_to_merge = []
|
|
810
811
|
|
|
811
|
-
# Process images if
|
|
812
|
+
# Process images if necessary
|
|
812
813
|
if len(image_files) > 0:
|
|
814
|
+
|
|
813
815
|
print('Running MegaDetector on {} images...'.format(len(image_files)))
|
|
814
816
|
|
|
815
817
|
image_results = load_and_run_detector_batch(
|
|
@@ -841,8 +843,11 @@ def _run_detection_step(source_folder: str,
|
|
|
841
843
|
print('Image detection results written to {}'.format(image_output_file))
|
|
842
844
|
files_to_merge.append(image_output_file)
|
|
843
845
|
|
|
844
|
-
#
|
|
846
|
+
# ...if we had images to process
|
|
847
|
+
|
|
848
|
+
# Process videos if necessary
|
|
845
849
|
if len(video_files) > 0:
|
|
850
|
+
|
|
846
851
|
print('Running MegaDetector on {} videos...'.format(len(video_files)))
|
|
847
852
|
|
|
848
853
|
# Set up video processing options
|
|
@@ -853,6 +858,7 @@ def _run_detection_step(source_folder: str,
|
|
|
853
858
|
video_options.json_confidence_threshold = detection_confidence_threshold
|
|
854
859
|
video_options.frame_sample = frame_sample
|
|
855
860
|
video_options.time_sample = time_sample
|
|
861
|
+
video_options.recursive = True
|
|
856
862
|
|
|
857
863
|
# Process videos
|
|
858
864
|
process_videos(video_options)
|
|
@@ -860,6 +866,8 @@ def _run_detection_step(source_folder: str,
|
|
|
860
866
|
print('Video detection results written to {}'.format(video_options.output_json_file))
|
|
861
867
|
files_to_merge.append(video_options.output_json_file)
|
|
862
868
|
|
|
869
|
+
# ...if we had videos to process
|
|
870
|
+
|
|
863
871
|
# Merge results if we have both images and videos
|
|
864
872
|
if len(files_to_merge) > 1:
|
|
865
873
|
print('Merging image and video detection results...')
|
|
@@ -868,6 +876,9 @@ def _run_detection_step(source_folder: str,
|
|
|
868
876
|
elif len(files_to_merge) == 1:
|
|
869
877
|
# Just rename the single file
|
|
870
878
|
if files_to_merge[0] != detector_output_file:
|
|
879
|
+
if os.path.isfile(detector_output_file):
|
|
880
|
+
print('Detector file {} exists, over-writing'.format(detector_output_file))
|
|
881
|
+
os.remove(detector_output_file)
|
|
871
882
|
os.rename(files_to_merge[0], detector_output_file)
|
|
872
883
|
print('Detection results written to {}'.format(detector_output_file))
|
|
873
884
|
|
|
@@ -949,7 +960,7 @@ def _run_classification_step(detector_results_file: str,
|
|
|
949
960
|
|
|
950
961
|
# This will block every time the queue reaches its maximum depth, so for
|
|
951
962
|
# very small jobs, this will not be a useful progress bar.
|
|
952
|
-
with tqdm(total=len(images)) as pbar:
|
|
963
|
+
with tqdm(total=len(images),desc='Classification') as pbar:
|
|
953
964
|
for image_data in images:
|
|
954
965
|
image_queue.put(image_data)
|
|
955
966
|
pbar.update()
|
|
@@ -1104,6 +1115,8 @@ def _run_classification_step(detector_results_file: str,
|
|
|
1104
1115
|
detector_results['classification_category_descriptions'] = \
|
|
1105
1116
|
category_state.classification_category_descriptions
|
|
1106
1117
|
|
|
1118
|
+
print('Writing output file')
|
|
1119
|
+
|
|
1107
1120
|
# Write results
|
|
1108
1121
|
write_json(merged_results_file, detector_results)
|
|
1109
1122
|
|
|
@@ -1120,6 +1133,11 @@ def main():
|
|
|
1120
1133
|
Command-line driver for run_md_and_speciesnet.py
|
|
1121
1134
|
"""
|
|
1122
1135
|
|
|
1136
|
+
if 'speciesnet' not in sys.modules:
|
|
1137
|
+
print('It looks like the speciesnet package is not available, try "pip install speciesnet"')
|
|
1138
|
+
if not is_sphinx_build():
|
|
1139
|
+
sys.exit(-1)
|
|
1140
|
+
|
|
1123
1141
|
parser = argparse.ArgumentParser(
|
|
1124
1142
|
description='Run MegaDetector and SpeciesNet on a folder of images/videos',
|
|
1125
1143
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
|
@@ -1153,7 +1171,7 @@ def main():
|
|
|
1153
1171
|
parser.add_argument('--detection_confidence_threshold_for_classification',
|
|
1154
1172
|
type=float,
|
|
1155
1173
|
default=DEFAULT_DETECTION_CONFIDENCE_THRESHOLD_FOR_CLASSIFICATION,
|
|
1156
|
-
help='
|
|
1174
|
+
help='Classify detections above this threshold')
|
|
1157
1175
|
parser.add_argument('--detection_confidence_threshold_for_output',
|
|
1158
1176
|
type=float,
|
|
1159
1177
|
default=DEFAULT_DETECTION_CONFIDENCE_THRESHOLD_FOR_OUTPUT,
|
|
@@ -27,6 +27,7 @@ from megadetector.visualization import visualization_utils as vis_utils
|
|
|
27
27
|
|
|
28
28
|
default_fourcc = 'h264'
|
|
29
29
|
|
|
30
|
+
video_progress_bar_description = 'Processing video'
|
|
30
31
|
|
|
31
32
|
#%% Path utilities
|
|
32
33
|
|
|
@@ -418,7 +419,7 @@ def run_callback_on_frames_for_folder(input_video_folder,
|
|
|
418
419
|
# Process each video
|
|
419
420
|
|
|
420
421
|
# video_fn_abs = input_files_full_paths[0]
|
|
421
|
-
for video_fn_abs in tqdm(input_files_full_paths):
|
|
422
|
+
for video_fn_abs in tqdm(input_files_full_paths,desc=video_progress_bar_description):
|
|
422
423
|
|
|
423
424
|
video_filename_relative = os.path.relpath(video_fn_abs,input_video_folder)
|
|
424
425
|
video_filename_relative = video_filename_relative.replace('\\','/')
|
|
@@ -870,7 +871,7 @@ def video_folder_to_frames(input_folder,
|
|
|
870
871
|
# For each video
|
|
871
872
|
#
|
|
872
873
|
# input_fn_relative = input_files_relative_paths[0]
|
|
873
|
-
for input_fn_relative in tqdm(input_files_relative_paths):
|
|
874
|
+
for input_fn_relative in tqdm(input_files_relative_paths,desc='Video to frames'):
|
|
874
875
|
|
|
875
876
|
# If frames_to_extract is a dict, get the specific frames for this video
|
|
876
877
|
if isinstance(frames_to_extract, dict):
|
|
@@ -918,7 +919,7 @@ def video_folder_to_frames(input_folder,
|
|
|
918
919
|
for relative_fn in input_files_relative_paths]
|
|
919
920
|
|
|
920
921
|
results = list(tqdm(pool.imap(_video_to_frames_with_per_video_frames, args_for_pool),
|
|
921
|
-
total=len(args_for_pool)))
|
|
922
|
+
total=len(args_for_pool),desc='Video to frames'))
|
|
922
923
|
|
|
923
924
|
else:
|
|
924
925
|
|
|
@@ -933,7 +934,7 @@ def video_folder_to_frames(input_folder,
|
|
|
933
934
|
frames_to_extract=frames_to_extract,
|
|
934
935
|
allow_empty_videos=allow_empty_videos)
|
|
935
936
|
results = list(tqdm(pool.imap(process_video_with_options, input_files_relative_paths),
|
|
936
|
-
total=len(input_files_relative_paths)))
|
|
937
|
+
total=len(input_files_relative_paths),desc='Video to frames'))
|
|
937
938
|
|
|
938
939
|
# ...if we need to pass different frames for each video
|
|
939
940
|
|
|
@@ -25,14 +25,14 @@ from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
|
25
25
|
from megadetector.utils.ct_utils import sort_dictionary_by_key
|
|
26
26
|
from megadetector.utils.ct_utils import invert_dictionary
|
|
27
27
|
|
|
28
|
-
from megadetector.utils.
|
|
29
|
-
from megadetector.utils.
|
|
30
|
-
from megadetector.utils.
|
|
28
|
+
from megadetector.utils.wi_taxonomy_utils import clean_taxonomy_string
|
|
29
|
+
from megadetector.utils.wi_taxonomy_utils import taxonomy_level_index
|
|
30
|
+
from megadetector.utils.wi_taxonomy_utils import taxonomy_level_string_to_index
|
|
31
31
|
|
|
32
|
-
from megadetector.utils.
|
|
33
|
-
from megadetector.utils.
|
|
34
|
-
from megadetector.utils.
|
|
35
|
-
from megadetector.utils.
|
|
32
|
+
from megadetector.utils.wi_taxonomy_utils import non_taxonomic_prediction_strings
|
|
33
|
+
from megadetector.utils.wi_taxonomy_utils import human_prediction_string
|
|
34
|
+
from megadetector.utils.wi_taxonomy_utils import animal_prediction_string
|
|
35
|
+
from megadetector.utils.wi_taxonomy_utils import blank_prediction_string # noqa
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
#%% Options classes
|
|
@@ -1100,7 +1100,8 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1100
1100
|
input_file,
|
|
1101
1101
|
output_file,
|
|
1102
1102
|
allow_walk_down=False,
|
|
1103
|
-
add_pre_filtering_description=True
|
|
1103
|
+
add_pre_filtering_description=True,
|
|
1104
|
+
allow_redundant_latin_names=False):
|
|
1104
1105
|
"""
|
|
1105
1106
|
Given a prediction file in MD .json format, likely without having had
|
|
1106
1107
|
a geofence applied, apply a custom taxa list.
|
|
@@ -1123,6 +1124,10 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1123
1124
|
add_pre_filtering_description (bool, optional): should we add a new metadata
|
|
1124
1125
|
field that summarizes each image's classifications prior to taxonomic
|
|
1125
1126
|
restriction?
|
|
1127
|
+
allow_redundant_latin_names (bool, optional): if False, we'll raise an Exception
|
|
1128
|
+
if the same latin name appears twice in the taxonomy list; if True, we'll
|
|
1129
|
+
just print a warning and ignore all entries other than the first for this
|
|
1130
|
+
latin name
|
|
1126
1131
|
"""
|
|
1127
1132
|
|
|
1128
1133
|
##%% Read target taxa list
|
|
@@ -1137,11 +1142,14 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1137
1142
|
taxa_list = [s for s in taxa_list if len(s) > 0]
|
|
1138
1143
|
|
|
1139
1144
|
target_latin_to_common = {}
|
|
1145
|
+
|
|
1140
1146
|
for s in taxa_list:
|
|
1147
|
+
|
|
1141
1148
|
if s.strip().startswith('#'):
|
|
1142
1149
|
continue
|
|
1143
1150
|
tokens = s.split(',')
|
|
1144
|
-
|
|
1151
|
+
# We allow additional columns now
|
|
1152
|
+
# assert len(tokens) <= 2
|
|
1145
1153
|
binomial_name = tokens[0]
|
|
1146
1154
|
assert len(binomial_name.split(' ')) in (1,2,3), \
|
|
1147
1155
|
'Illegal binomial name in species list: {}'.format(binomial_name)
|
|
@@ -1149,9 +1157,17 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1149
1157
|
common_name = tokens[1].strip().lower()
|
|
1150
1158
|
else:
|
|
1151
1159
|
common_name = None
|
|
1152
|
-
|
|
1160
|
+
if binomial_name in target_latin_to_common:
|
|
1161
|
+
error_string = 'scientific name {} appears multiple times in the taxonomy list'.format(
|
|
1162
|
+
binomial_name)
|
|
1163
|
+
if allow_redundant_latin_names:
|
|
1164
|
+
print('Warning: {}'.format(error_string))
|
|
1165
|
+
else:
|
|
1166
|
+
raise ValueError(error_string)
|
|
1153
1167
|
target_latin_to_common[binomial_name] = common_name
|
|
1154
1168
|
|
|
1169
|
+
# ...for each line in the taxonomy file
|
|
1170
|
+
|
|
1155
1171
|
|
|
1156
1172
|
##%% Read taxonomy file
|
|
1157
1173
|
|
|
@@ -40,6 +40,8 @@ def combine_batch_output_files(input_files,
|
|
|
40
40
|
Merges the list of MD results files [input_files] into a single
|
|
41
41
|
dictionary, optionally writing the result to [output_file].
|
|
42
42
|
|
|
43
|
+
Always overwrites [output_file] if it exists.
|
|
44
|
+
|
|
43
45
|
Args:
|
|
44
46
|
input_files (list of str): paths to JSON detection files
|
|
45
47
|
output_file (str, optional): path to write merged JSON
|