megadetector 10.0.3__py3-none-any.whl → 10.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (32) hide show
  1. megadetector/data_management/animl_to_md.py +158 -0
  2. megadetector/data_management/cct_json_utils.py +1 -0
  3. megadetector/data_management/speciesnet_to_md.py +2 -2
  4. megadetector/data_management/zamba_to_md.py +188 -0
  5. megadetector/detection/process_video.py +52 -40
  6. megadetector/detection/pytorch_detector.py +24 -34
  7. megadetector/detection/run_detector_batch.py +138 -93
  8. megadetector/detection/run_md_and_speciesnet.py +22 -4
  9. megadetector/detection/video_utils.py +5 -4
  10. megadetector/postprocessing/classification_postprocessing.py +26 -10
  11. megadetector/postprocessing/combine_batch_outputs.py +2 -0
  12. megadetector/postprocessing/generate_csv_report.py +1 -1
  13. megadetector/postprocessing/load_api_results.py +1 -1
  14. megadetector/postprocessing/md_to_wi.py +1 -1
  15. megadetector/postprocessing/postprocess_batch_results.py +1 -1
  16. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1 -1
  17. megadetector/postprocessing/separate_detections_into_folders.py +1 -1
  18. megadetector/postprocessing/subset_json_detector_output.py +1 -3
  19. megadetector/utils/ct_utils.py +71 -0
  20. megadetector/utils/md_tests.py +8 -7
  21. megadetector/utils/path_utils.py +4 -15
  22. megadetector/utils/wi_platform_utils.py +824 -0
  23. megadetector/utils/wi_taxonomy_utils.py +1711 -0
  24. megadetector/visualization/visualization_utils.py +1 -1
  25. megadetector/visualization/visualize_detector_output.py +7 -5
  26. megadetector/visualization/visualize_video_output.py +1 -1
  27. {megadetector-10.0.3.dist-info → megadetector-10.0.5.dist-info}/METADATA +2 -2
  28. {megadetector-10.0.3.dist-info → megadetector-10.0.5.dist-info}/RECORD +31 -28
  29. megadetector/utils/wi_utils.py +0 -2674
  30. {megadetector-10.0.3.dist-info → megadetector-10.0.5.dist-info}/WHEEL +0 -0
  31. {megadetector-10.0.3.dist-info → megadetector-10.0.5.dist-info}/licenses/LICENSE +0 -0
  32. {megadetector-10.0.3.dist-info → megadetector-10.0.5.dist-info}/top_level.txt +0 -0
@@ -158,9 +158,7 @@ def _producer_func(q,
158
158
  for im_file in image_files:
159
159
 
160
160
  try:
161
- if verbose:
162
- print('Loading image {} on producer {}'.format(im_file,producer_id))
163
- sys.stdout.flush()
161
+
164
162
  image = vis_utils.load_image(im_file)
165
163
 
166
164
  if preprocessor is not None:
@@ -179,10 +177,6 @@ def _producer_func(q,
179
177
  print('Producer process: image {} cannot be loaded:\n{}'.format(im_file,str(e)))
180
178
  image = run_detector.FAILURE_IMAGE_OPEN
181
179
 
182
- if verbose:
183
- print('Queueing image {} from producer {}'.format(im_file,producer_id))
184
- sys.stdout.flush()
185
-
186
180
  q.put([im_file,image,producer_id])
187
181
 
188
182
  # ...for each image
@@ -210,7 +204,9 @@ def _consumer_func(q,
210
204
  detector_options=None,
211
205
  preprocess_on_image_queue=default_preprocess_on_image_queue,
212
206
  n_total_images=None,
213
- batch_size=1
207
+ batch_size=1,
208
+ checkpoint_path=None,
209
+ checkpoint_frequency=-1
214
210
  ):
215
211
  """
216
212
  Consumer function; only used when using the (optional) image queue.
@@ -231,9 +227,14 @@ def _consumer_func(q,
231
227
  augment (bool, optional): enable image augmentation
232
228
  detector_options (dict, optional): key/value pairs that are interpreted differently
233
229
  by different detectors
234
- preprocess_on_image_queue (bool, optional): whether images are already preprocessed on the queue
230
+ preprocess_on_image_queue (bool, optional): whether images are already preprocessed on
231
+ the queue
235
232
  n_total_images (int, optional): total number of images expected (for progress bar)
236
233
  batch_size (int, optional): batch size for GPU inference
234
+ checkpoint_path (str, optional): path to write checkpoint files, None disables
235
+ checkpointing
236
+ checkpoint_frequency (int, optional): write checkpoint every N images, -1 disables
237
+ checkpointing
237
238
  """
238
239
 
239
240
  if verbose:
@@ -257,6 +258,25 @@ def _consumer_func(q,
257
258
 
258
259
  n_images_processed = 0
259
260
  n_queues_finished = 0
261
+ last_checkpoint_count = 0
262
+
263
+ def _should_write_checkpoint():
264
+ """
265
+ Check whether we should write a checkpoint. Returns True if we've crossed a
266
+ checkpoint boundary.
267
+ """
268
+
269
+ if (checkpoint_frequency <= 0) or (checkpoint_path is None):
270
+ return False
271
+
272
+ # Calculate the checkpoint threshold we should have crossed
273
+ current_checkpoint_threshold = \
274
+ (n_images_processed // checkpoint_frequency) * checkpoint_frequency
275
+ last_checkpoint_threshold = \
276
+ (last_checkpoint_count // checkpoint_frequency) * checkpoint_frequency
277
+
278
+ # We should write a checkpoint if we've crossed into a new checkpoint interval
279
+ return (current_checkpoint_threshold > last_checkpoint_threshold)
260
280
 
261
281
  pbar = None
262
282
  if n_total_images is not None:
@@ -314,6 +334,10 @@ def _consumer_func(q,
314
334
 
315
335
  n_images_processed += len(leftover_batch)
316
336
 
337
+ # In theory we could write a checkpoint here, but because we're basically
338
+ # done at this point, there's not much upside to writing another checkpoint,
339
+ # so for simplicity, I'm skipping it.
340
+
317
341
  # ...for each batch we have left to process
318
342
 
319
343
  return_queue.put(results)
@@ -334,16 +358,6 @@ def _consumer_func(q,
334
358
  im_file = r[0]
335
359
  image = r[1]
336
360
 
337
- # This block is sometimes useful for debugging, so I'm leaving it here, but if'd out
338
- if False:
339
- if verbose or ((n_images_processed % n_queue_print) == 1):
340
- elapsed = time.time() - start_time
341
- images_per_second = n_images_processed / elapsed
342
- print('De-queued image {} ({:.2f}/s) ({})'.format(n_images_processed,
343
- images_per_second,
344
- im_file))
345
- sys.stdout.flush()
346
-
347
361
  # Handle failed images immediately (don't batch them)
348
362
  #
349
363
  # Loader workers communicate failures by passing a string to
@@ -418,10 +432,14 @@ def _consumer_func(q,
418
432
 
419
433
  # ...if we are/aren't doing batch processing
420
434
 
421
- # ...whether we received a string (indicating failure) or an image from the loader worker
435
+ # Write checkpoint if necessary
436
+ if _should_write_checkpoint():
437
+ print('Consumer: writing checkpoint after {} images'.format(
438
+ n_images_processed))
439
+ write_checkpoint(checkpoint_path, results)
440
+ last_checkpoint_count = n_images_processed
422
441
 
423
- if verbose:
424
- print('Processed image {}'.format(im_file)); sys.stdout.flush()
442
+ # ...whether we received a string (indicating failure) or an image from the loader worker
425
443
 
426
444
  q.task_done()
427
445
 
@@ -442,7 +460,9 @@ def _run_detector_with_image_queue(image_files,
442
460
  detector_options=None,
443
461
  loader_workers=default_loaders,
444
462
  preprocess_on_image_queue=default_preprocess_on_image_queue,
445
- batch_size=1):
463
+ batch_size=1,
464
+ checkpoint_path=None,
465
+ checkpoint_frequency=-1):
446
466
  """
447
467
  Driver function for the (optional) multiprocessing-based image queue. Spawns workers to read and
448
468
  preprocess images, runs the consumer function in the calling process.
@@ -466,6 +486,8 @@ def _run_detector_with_image_queue(image_files,
466
486
  preprocess_on_image_queue (bool, optional): if the image queue is enabled, should it handle
467
487
  image loading and preprocessing (True), or just image loading (False)?
468
488
  batch_size (int, optional): batch size for GPU processing
489
+ checkpoint_path (str, optional): path to write checkpoint files, None disables checkpointing
490
+ checkpoint_frequency (int, optional): write checkpoint every N images, -1 disables checkpointing
469
491
 
470
492
  Returns:
471
493
  list: list of dicts in the format returned by process_image()
@@ -536,7 +558,9 @@ def _run_detector_with_image_queue(image_files,
536
558
  detector_options,
537
559
  preprocess_on_image_queue,
538
560
  n_total_images,
539
- batch_size))
561
+ batch_size,
562
+ checkpoint_path,
563
+ checkpoint_frequency))
540
564
  else:
541
565
  consumer = Process(target=_consumer_func,args=(q,
542
566
  return_queue,
@@ -551,7 +575,9 @@ def _run_detector_with_image_queue(image_files,
551
575
  detector_options,
552
576
  preprocess_on_image_queue,
553
577
  n_total_images,
554
- batch_size))
578
+ batch_size,
579
+ checkpoint_path,
580
+ checkpoint_frequency))
555
581
  consumer.daemon = True
556
582
  consumer.start()
557
583
  else:
@@ -568,7 +594,9 @@ def _run_detector_with_image_queue(image_files,
568
594
  detector_options,
569
595
  preprocess_on_image_queue,
570
596
  n_total_images,
571
- batch_size)
597
+ batch_size,
598
+ checkpoint_path,
599
+ checkpoint_frequency)
572
600
 
573
601
  for i_producer,producer in enumerate(producers):
574
602
  producer.join()
@@ -665,9 +693,6 @@ def _process_batch(image_items_batch,
665
693
  list of dict: list of results for each image in the batch
666
694
  """
667
695
 
668
- if (verbose):
669
- print('_process_batch called with {} items'.format(len(image_items_batch)))
670
-
671
696
  # This will be the set of items we send for inference; it may be
672
697
  # smaller than the input list (image_items_batch) if some images
673
698
  # fail to load. [valid_images] will be either a list of PIL Image
@@ -703,9 +728,6 @@ def _process_batch(image_items_batch,
703
728
 
704
729
  assert len(valid_images) == len(valid_image_filenames)
705
730
 
706
- if verbose:
707
- print('_process_batch found {} valid items in batch'.format(len(valid_images)))
708
-
709
731
  valid_batch_results = []
710
732
 
711
733
  # Process the batch if we have any valid images
@@ -785,9 +807,6 @@ def _process_batch(image_items_batch,
785
807
 
786
808
  batch_results.extend(valid_batch_results)
787
809
 
788
- if verbose:
789
- print('_process batch returning results for {} items'.format(len(batch_results)))
790
-
791
810
  return batch_results
792
811
 
793
812
  # ...def _process_batch(...)
@@ -1153,30 +1172,39 @@ def load_and_run_detector_batch(model_file,
1153
1172
 
1154
1173
  if use_image_queue:
1155
1174
 
1156
- assert checkpoint_frequency < 0, \
1157
- 'Using an image queue is not currently supported when checkpointing is enabled'
1158
- assert len(results) == 0, \
1159
- 'Using an image queue with results loaded from a checkpoint is not currently supported'
1160
1175
  assert n_cores <= 1
1161
1176
 
1162
- # Image queue now supports batch processing
1163
-
1164
- results = _run_detector_with_image_queue(image_file_names,
1165
- model_file,
1166
- confidence_threshold,
1167
- quiet,
1168
- image_size=image_size,
1169
- include_image_size=include_image_size,
1170
- include_image_timestamp=include_image_timestamp,
1171
- include_exif_data=include_exif_data,
1172
- augment=augment,
1173
- detector_options=detector_options,
1174
- loader_workers=loader_workers,
1175
- preprocess_on_image_queue=preprocess_on_image_queue,
1176
- batch_size=batch_size)
1177
+ # Filter out already processed images
1178
+ images_to_process = [im_file for im_file in image_file_names
1179
+ if im_file not in already_processed]
1180
+
1181
+ if len(images_to_process) != len(image_file_names):
1182
+ print('Bypassing {} images that have already been processed'.format(
1183
+ len(image_file_names) - len(images_to_process)))
1184
+
1185
+ new_results = _run_detector_with_image_queue(images_to_process,
1186
+ model_file,
1187
+ confidence_threshold,
1188
+ quiet,
1189
+ image_size=image_size,
1190
+ include_image_size=include_image_size,
1191
+ include_image_timestamp=include_image_timestamp,
1192
+ include_exif_data=include_exif_data,
1193
+ augment=augment,
1194
+ detector_options=detector_options,
1195
+ loader_workers=loader_workers,
1196
+ preprocess_on_image_queue=preprocess_on_image_queue,
1197
+ batch_size=batch_size,
1198
+ checkpoint_path=checkpoint_path,
1199
+ checkpoint_frequency=checkpoint_frequency)
1200
+
1201
+ # Merge new results with existing results from checkpoint
1202
+ results.extend(new_results)
1177
1203
 
1178
1204
  elif n_cores <= 1:
1179
1205
 
1206
+ # Single-threaded processing, no image queue
1207
+
1180
1208
  # Load the detector
1181
1209
  start_time = time.time()
1182
1210
  detector = load_detector(model_file,
@@ -1233,7 +1261,7 @@ def load_and_run_detector_batch(model_file,
1233
1261
  if (checkpoint_frequency != -1) and ((image_count % checkpoint_frequency) == 0):
1234
1262
  print('Writing a new checkpoint after having processed {} images since '
1235
1263
  'last restart'.format(image_count))
1236
- _write_checkpoint(checkpoint_path, results)
1264
+ write_checkpoint(checkpoint_path, results)
1237
1265
 
1238
1266
  else:
1239
1267
 
@@ -1257,7 +1285,7 @@ def load_and_run_detector_batch(model_file,
1257
1285
  if (checkpoint_frequency != -1) and ((image_count % checkpoint_frequency) == 0):
1258
1286
  print('Writing a new checkpoint after having processed {} images since '
1259
1287
  'last restart'.format(image_count))
1260
- _write_checkpoint(checkpoint_path, results)
1288
+ write_checkpoint(checkpoint_path, results)
1261
1289
 
1262
1290
  # ...if the batch size is > 1
1263
1291
 
@@ -1291,9 +1319,9 @@ def load_and_run_detector_batch(model_file,
1291
1319
 
1292
1320
  checkpoint_queue = Manager().Queue()
1293
1321
 
1294
- # Pass the "results" array (which may already contain images loaded from an existing
1295
- # checkpoint) to the checkpoint queue handler function, which will append results to
1296
- # the list as they become available.
1322
+ # Pass the "results" array (which may already contain images loaded from an
1323
+ # existing checkpoint) to the checkpoint queue handler function, which will
1324
+ # append results to the list as they become available.
1297
1325
  checkpoint_thread = Thread(target=_checkpoint_queue_handler,
1298
1326
  args=(checkpoint_path, checkpoint_frequency,
1299
1327
  checkpoint_queue, results), daemon=True)
@@ -1337,7 +1365,7 @@ def load_and_run_detector_batch(model_file,
1337
1365
 
1338
1366
  # Append the results we just computed to "results", which is *usually* empty, but will
1339
1367
  # be non-empty if we resumed from a checkpoint
1340
- results += new_results
1368
+ results.extend(new_results)
1341
1369
 
1342
1370
  # ...if checkpointing is/isn't enabled
1343
1371
 
@@ -1376,12 +1404,18 @@ def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_
1376
1404
  print('Writing a new checkpoint after having processed {} images since '
1377
1405
  'last restart'.format(result_count))
1378
1406
 
1379
- _write_checkpoint(checkpoint_path, results)
1407
+ write_checkpoint(checkpoint_path, results)
1380
1408
 
1381
1409
 
1382
- def _write_checkpoint(checkpoint_path, results):
1410
+ def write_checkpoint(checkpoint_path, results):
1383
1411
  """
1384
- Writes the 'images' field in the dict 'results' to a json checkpoint file.
1412
+ Writes the object in [results] to a json checkpoint file, as a dict with the
1413
+ key "checkpoint". First backs up the checkpoint file if it exists, in case we
1414
+ crash while writing the file.
1415
+
1416
+ Args:
1417
+ checkpoint_path (str): the file to write the checkpoint to
1418
+ results (object): the object we should write
1385
1419
  """
1386
1420
 
1387
1421
  assert checkpoint_path is not None
@@ -1394,11 +1428,41 @@ def _write_checkpoint(checkpoint_path, results):
1394
1428
  shutil.copyfile(checkpoint_path,checkpoint_tmp_path)
1395
1429
 
1396
1430
  # Write the new checkpoint
1397
- ct_utils.write_json(checkpoint_path, {'images': results}, force_str=True)
1431
+ ct_utils.write_json(checkpoint_path, {'checkpoint': results}, force_str=True)
1398
1432
 
1399
1433
  # Remove the backup checkpoint if it exists
1400
1434
  if checkpoint_tmp_path is not None:
1401
- os.remove(checkpoint_tmp_path)
1435
+ try:
1436
+ os.remove(checkpoint_tmp_path)
1437
+ except Exception as e:
1438
+ print('Warning: error removing backup checkpoint file {}:\n{}'.format(
1439
+ checkpoint_tmp_path,str(e)))
1440
+
1441
+
1442
+ def load_checkpoint(checkpoint_path):
1443
+ """
1444
+ Loads results from a checkpoint file. A checkpoint file is always a dict
1445
+ with the key "checkpoint".
1446
+
1447
+ Args:
1448
+ checkpoint_path (str): the .json file to load
1449
+
1450
+ Returns:
1451
+ object: object retrieved from the checkpoint, typically a list of results
1452
+ """
1453
+
1454
+ print('Loading previous results from checkpoint file {}'.format(checkpoint_path))
1455
+
1456
+ with open(checkpoint_path, 'r') as f:
1457
+ checkpoint_data = json.load(f)
1458
+
1459
+ if 'checkpoint' not in checkpoint_data:
1460
+ raise ValueError('Checkpoint file {} is missing "checkpoint" field'.format(checkpoint_path))
1461
+
1462
+ results = checkpoint_data['checkpoint']
1463
+ print('Restored {} entries from the checkpoint {}'.format(len(results),checkpoint_path))
1464
+
1465
+ return results
1402
1466
 
1403
1467
 
1404
1468
  def get_image_datetime(image):
@@ -1585,8 +1649,6 @@ if False:
1585
1649
  cmd += ' --output_relative_filenames'
1586
1650
  if include_max_conf:
1587
1651
  cmd += ' --include_max_conf'
1588
- if quiet:
1589
- cmd += ' --quiet'
1590
1652
  if image_size is not None:
1591
1653
  cmd += ' --image_size {}'.format(image_size)
1592
1654
  if use_image_queue:
@@ -1670,10 +1732,6 @@ def main(): # noqa
1670
1732
  '--include_max_conf',
1671
1733
  action='store_true',
1672
1734
  help='Include the "max_detection_conf" field in the output')
1673
- parser.add_argument(
1674
- '--quiet',
1675
- action='store_true',
1676
- help='Suppress per-image console output')
1677
1735
  parser.add_argument(
1678
1736
  '--verbose',
1679
1737
  action='store_true',
@@ -1796,6 +1854,12 @@ def main(): # noqa
1796
1854
  default=1,
1797
1855
  help='Batch size for GPU inference (default 1). CPU inference will ignore this and use batch_size=1.')
1798
1856
 
1857
+ # This argument is deprecated, we always use what was formerly "quiet mode"
1858
+ parser.add_argument(
1859
+ '--quiet',
1860
+ action='store_true',
1861
+ help=argparse.SUPPRESS)
1862
+
1799
1863
  if len(sys.argv[1:]) == 0:
1800
1864
  parser.print_help()
1801
1865
  parser.exit()
@@ -1857,7 +1921,7 @@ def main(): # noqa
1857
1921
  # Load the checkpoint if available
1858
1922
  #
1859
1923
  # File paths in the checkpoint are always absolute paths; conversion to relative paths
1860
- # happens below (if necessary).
1924
+ # (if requested) happens at the time results are exported at the end of a job.
1861
1925
  if args.resume_from_checkpoint is not None:
1862
1926
  if args.resume_from_checkpoint == 'auto':
1863
1927
  checkpoint_files = os.listdir(output_dir)
@@ -1875,16 +1939,7 @@ def main(): # noqa
1875
1939
  checkpoint_file = os.path.join(output_dir,checkpoint_file_relative)
1876
1940
  else:
1877
1941
  checkpoint_file = args.resume_from_checkpoint
1878
- assert os.path.exists(checkpoint_file), \
1879
- 'File at resume_from_checkpoint specified does not exist'
1880
- with open(checkpoint_file) as f:
1881
- print('Loading previous results from checkpoint file {}'.format(
1882
- checkpoint_file))
1883
- saved = json.load(f)
1884
- assert 'images' in saved, \
1885
- 'The checkpoint file does not have the correct fields; cannot be restored'
1886
- results = saved['images']
1887
- print('Restored {} entries from the checkpoint'.format(len(results)))
1942
+ results = load_checkpoint(checkpoint_file)
1888
1943
  else:
1889
1944
  results = []
1890
1945
 
@@ -2001,16 +2056,6 @@ def main(): # noqa
2001
2056
  f'Checkpoint path {checkpoint_path} already exists, delete or move it before ' + \
2002
2057
  're-using the same checkpoint path, or specify --allow_checkpoint_overwrite'
2003
2058
 
2004
-
2005
- # Confirm that we can write to the checkpoint path; this avoids issues where
2006
- # we crash after several thousand images.
2007
- #
2008
- # But actually, commenting this out for now... the scenario where we are resuming from a
2009
- # checkpoint, then immediately overwrite that checkpoint with empty data is higher-risk
2010
- # than the annoyance of crashing a few minutes after starting a job.
2011
- if False:
2012
- ct_utils.write_json(checkpoint_path, {'images': []}, indent=None)
2013
-
2014
2059
  print('The checkpoint file will be written to {}'.format(checkpoint_path))
2015
2060
 
2016
2061
  else:
@@ -2030,7 +2075,7 @@ def main(): # noqa
2030
2075
  results=results,
2031
2076
  n_cores=args.ncores,
2032
2077
  use_image_queue=args.use_image_queue,
2033
- quiet=args.quiet,
2078
+ quiet=True,
2034
2079
  image_size=args.image_size,
2035
2080
  class_mapping_filename=args.class_mapping_filename,
2036
2081
  include_image_size=args.include_image_size,
@@ -31,6 +31,7 @@ from megadetector.utils.ct_utils import round_float
31
31
  from megadetector.utils.ct_utils import write_json
32
32
  from megadetector.utils.ct_utils import make_temp_folder
33
33
  from megadetector.utils.ct_utils import is_list_sorted
34
+ from megadetector.utils.ct_utils import is_sphinx_build
34
35
  from megadetector.utils import path_utils
35
36
  from megadetector.visualization import visualization_utils as vis_utils
36
37
  from megadetector.postprocessing.validate_batch_results import \
@@ -808,8 +809,9 @@ def _run_detection_step(source_folder: str,
808
809
 
809
810
  files_to_merge = []
810
811
 
811
- # Process images if any
812
+ # Process images if necessary
812
813
  if len(image_files) > 0:
814
+
813
815
  print('Running MegaDetector on {} images...'.format(len(image_files)))
814
816
 
815
817
  image_results = load_and_run_detector_batch(
@@ -841,8 +843,11 @@ def _run_detection_step(source_folder: str,
841
843
  print('Image detection results written to {}'.format(image_output_file))
842
844
  files_to_merge.append(image_output_file)
843
845
 
844
- # Process videos if any
846
+ # ...if we had images to process
847
+
848
+ # Process videos if necessary
845
849
  if len(video_files) > 0:
850
+
846
851
  print('Running MegaDetector on {} videos...'.format(len(video_files)))
847
852
 
848
853
  # Set up video processing options
@@ -853,6 +858,7 @@ def _run_detection_step(source_folder: str,
853
858
  video_options.json_confidence_threshold = detection_confidence_threshold
854
859
  video_options.frame_sample = frame_sample
855
860
  video_options.time_sample = time_sample
861
+ video_options.recursive = True
856
862
 
857
863
  # Process videos
858
864
  process_videos(video_options)
@@ -860,6 +866,8 @@ def _run_detection_step(source_folder: str,
860
866
  print('Video detection results written to {}'.format(video_options.output_json_file))
861
867
  files_to_merge.append(video_options.output_json_file)
862
868
 
869
+ # ...if we had videos to process
870
+
863
871
  # Merge results if we have both images and videos
864
872
  if len(files_to_merge) > 1:
865
873
  print('Merging image and video detection results...')
@@ -868,6 +876,9 @@ def _run_detection_step(source_folder: str,
868
876
  elif len(files_to_merge) == 1:
869
877
  # Just rename the single file
870
878
  if files_to_merge[0] != detector_output_file:
879
+ if os.path.isfile(detector_output_file):
880
+ print('Detector file {} exists, over-writing'.format(detector_output_file))
881
+ os.remove(detector_output_file)
871
882
  os.rename(files_to_merge[0], detector_output_file)
872
883
  print('Detection results written to {}'.format(detector_output_file))
873
884
 
@@ -949,7 +960,7 @@ def _run_classification_step(detector_results_file: str,
949
960
 
950
961
  # This will block every time the queue reaches its maximum depth, so for
951
962
  # very small jobs, this will not be a useful progress bar.
952
- with tqdm(total=len(images)) as pbar:
963
+ with tqdm(total=len(images),desc='Classification') as pbar:
953
964
  for image_data in images:
954
965
  image_queue.put(image_data)
955
966
  pbar.update()
@@ -1104,6 +1115,8 @@ def _run_classification_step(detector_results_file: str,
1104
1115
  detector_results['classification_category_descriptions'] = \
1105
1116
  category_state.classification_category_descriptions
1106
1117
 
1118
+ print('Writing output file')
1119
+
1107
1120
  # Write results
1108
1121
  write_json(merged_results_file, detector_results)
1109
1122
 
@@ -1120,6 +1133,11 @@ def main():
1120
1133
  Command-line driver for run_md_and_speciesnet.py
1121
1134
  """
1122
1135
 
1136
+ if 'speciesnet' not in sys.modules:
1137
+ print('It looks like the speciesnet package is not available, try "pip install speciesnet"')
1138
+ if not is_sphinx_build():
1139
+ sys.exit(-1)
1140
+
1123
1141
  parser = argparse.ArgumentParser(
1124
1142
  description='Run MegaDetector and SpeciesNet on a folder of images/videos',
1125
1143
  formatter_class=argparse.ArgumentDefaultsHelpFormatter
@@ -1153,7 +1171,7 @@ def main():
1153
1171
  parser.add_argument('--detection_confidence_threshold_for_classification',
1154
1172
  type=float,
1155
1173
  default=DEFAULT_DETECTION_CONFIDENCE_THRESHOLD_FOR_CLASSIFICATION,
1156
- help='Classifiy detections above this threshold')
1174
+ help='Classify detections above this threshold')
1157
1175
  parser.add_argument('--detection_confidence_threshold_for_output',
1158
1176
  type=float,
1159
1177
  default=DEFAULT_DETECTION_CONFIDENCE_THRESHOLD_FOR_OUTPUT,
@@ -27,6 +27,7 @@ from megadetector.visualization import visualization_utils as vis_utils
27
27
 
28
28
  default_fourcc = 'h264'
29
29
 
30
+ video_progress_bar_description = 'Processing video'
30
31
 
31
32
  #%% Path utilities
32
33
 
@@ -418,7 +419,7 @@ def run_callback_on_frames_for_folder(input_video_folder,
418
419
  # Process each video
419
420
 
420
421
  # video_fn_abs = input_files_full_paths[0]
421
- for video_fn_abs in tqdm(input_files_full_paths):
422
+ for video_fn_abs in tqdm(input_files_full_paths,desc=video_progress_bar_description):
422
423
 
423
424
  video_filename_relative = os.path.relpath(video_fn_abs,input_video_folder)
424
425
  video_filename_relative = video_filename_relative.replace('\\','/')
@@ -870,7 +871,7 @@ def video_folder_to_frames(input_folder,
870
871
  # For each video
871
872
  #
872
873
  # input_fn_relative = input_files_relative_paths[0]
873
- for input_fn_relative in tqdm(input_files_relative_paths):
874
+ for input_fn_relative in tqdm(input_files_relative_paths,desc='Video to frames'):
874
875
 
875
876
  # If frames_to_extract is a dict, get the specific frames for this video
876
877
  if isinstance(frames_to_extract, dict):
@@ -918,7 +919,7 @@ def video_folder_to_frames(input_folder,
918
919
  for relative_fn in input_files_relative_paths]
919
920
 
920
921
  results = list(tqdm(pool.imap(_video_to_frames_with_per_video_frames, args_for_pool),
921
- total=len(args_for_pool)))
922
+ total=len(args_for_pool),desc='Video to frames'))
922
923
 
923
924
  else:
924
925
 
@@ -933,7 +934,7 @@ def video_folder_to_frames(input_folder,
933
934
  frames_to_extract=frames_to_extract,
934
935
  allow_empty_videos=allow_empty_videos)
935
936
  results = list(tqdm(pool.imap(process_video_with_options, input_files_relative_paths),
936
- total=len(input_files_relative_paths)))
937
+ total=len(input_files_relative_paths),desc='Video to frames'))
937
938
 
938
939
  # ...if we need to pass different frames for each video
939
940
 
@@ -25,14 +25,14 @@ from megadetector.utils.ct_utils import sort_dictionary_by_value
25
25
  from megadetector.utils.ct_utils import sort_dictionary_by_key
26
26
  from megadetector.utils.ct_utils import invert_dictionary
27
27
 
28
- from megadetector.utils.wi_utils import clean_taxonomy_string
29
- from megadetector.utils.wi_utils import taxonomy_level_index
30
- from megadetector.utils.wi_utils import taxonomy_level_string_to_index
28
+ from megadetector.utils.wi_taxonomy_utils import clean_taxonomy_string
29
+ from megadetector.utils.wi_taxonomy_utils import taxonomy_level_index
30
+ from megadetector.utils.wi_taxonomy_utils import taxonomy_level_string_to_index
31
31
 
32
- from megadetector.utils.wi_utils import non_taxonomic_prediction_strings
33
- from megadetector.utils.wi_utils import human_prediction_string
34
- from megadetector.utils.wi_utils import animal_prediction_string
35
- from megadetector.utils.wi_utils import blank_prediction_string # noqa
32
+ from megadetector.utils.wi_taxonomy_utils import non_taxonomic_prediction_strings
33
+ from megadetector.utils.wi_taxonomy_utils import human_prediction_string
34
+ from megadetector.utils.wi_taxonomy_utils import animal_prediction_string
35
+ from megadetector.utils.wi_taxonomy_utils import blank_prediction_string # noqa
36
36
 
37
37
 
38
38
  #%% Options classes
@@ -1100,7 +1100,8 @@ def restrict_to_taxa_list(taxa_list,
1100
1100
  input_file,
1101
1101
  output_file,
1102
1102
  allow_walk_down=False,
1103
- add_pre_filtering_description=True):
1103
+ add_pre_filtering_description=True,
1104
+ allow_redundant_latin_names=False):
1104
1105
  """
1105
1106
  Given a prediction file in MD .json format, likely without having had
1106
1107
  a geofence applied, apply a custom taxa list.
@@ -1123,6 +1124,10 @@ def restrict_to_taxa_list(taxa_list,
1123
1124
  add_pre_filtering_description (bool, optional): should we add a new metadata
1124
1125
  field that summarizes each image's classifications prior to taxonomic
1125
1126
  restriction?
1127
+ allow_redundant_latin_names (bool, optional): if False, we'll raise an Exception
1128
+ if the same latin name appears twice in the taxonomy list; if True, we'll
1129
+ just print a warning and ignore all entries other than the first for this
1130
+ latin name
1126
1131
  """
1127
1132
 
1128
1133
  ##%% Read target taxa list
@@ -1137,11 +1142,14 @@ def restrict_to_taxa_list(taxa_list,
1137
1142
  taxa_list = [s for s in taxa_list if len(s) > 0]
1138
1143
 
1139
1144
  target_latin_to_common = {}
1145
+
1140
1146
  for s in taxa_list:
1147
+
1141
1148
  if s.strip().startswith('#'):
1142
1149
  continue
1143
1150
  tokens = s.split(',')
1144
- assert len(tokens) <= 2
1151
+ # We allow additional columns now
1152
+ # assert len(tokens) <= 2
1145
1153
  binomial_name = tokens[0]
1146
1154
  assert len(binomial_name.split(' ')) in (1,2,3), \
1147
1155
  'Illegal binomial name in species list: {}'.format(binomial_name)
@@ -1149,9 +1157,17 @@ def restrict_to_taxa_list(taxa_list,
1149
1157
  common_name = tokens[1].strip().lower()
1150
1158
  else:
1151
1159
  common_name = None
1152
- assert binomial_name not in target_latin_to_common
1160
+ if binomial_name in target_latin_to_common:
1161
+ error_string = 'scientific name {} appears multiple times in the taxonomy list'.format(
1162
+ binomial_name)
1163
+ if allow_redundant_latin_names:
1164
+ print('Warning: {}'.format(error_string))
1165
+ else:
1166
+ raise ValueError(error_string)
1153
1167
  target_latin_to_common[binomial_name] = common_name
1154
1168
 
1169
+ # ...for each line in the taxonomy file
1170
+
1155
1171
 
1156
1172
  ##%% Read taxonomy file
1157
1173
 
@@ -40,6 +40,8 @@ def combine_batch_output_files(input_files,
40
40
  Merges the list of MD results files [input_files] into a single
41
41
  dictionary, optionally writing the result to [output_file].
42
42
 
43
+ Always overwrites [output_file] if it exists.
44
+
43
45
  Args:
44
46
  input_files (list of str): paths to JSON detection files
45
47
  output_file (str, optional): path to write merged JSON