PyPI - megadetector - Versions diffs - 10.0.3__py3-none-any.whl → 10.0.5__py3-none-any.whl - Mend

megadetector 10.0.3py3-none-any.whl → 10.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (32) hide show

megadetector/detection/run_detector_batch.py CHANGED Viewed

@@ -158,9 +158,7 @@ def _producer_func(q,
     for im_file in image_files:
         try:
-            if verbose:
-                print('Loading image {} on producer {}'.format(im_file,producer_id))
-                sys.stdout.flush()
             image = vis_utils.load_image(im_file)
             if preprocessor is not None:
@@ -179,10 +177,6 @@ def _producer_func(q,
             print('Producer process: image {} cannot be loaded:\n{}'.format(im_file,str(e)))
             image = run_detector.FAILURE_IMAGE_OPEN
-        if verbose:
-            print('Queueing image {} from producer {}'.format(im_file,producer_id))
-            sys.stdout.flush()
         q.put([im_file,image,producer_id])
     # ...for each image
@@ -210,7 +204,9 @@ def _consumer_func(q,
                    detector_options=None,
                    preprocess_on_image_queue=default_preprocess_on_image_queue,
                    n_total_images=None,
-                   batch_size=1
+                   batch_size=1,
+                   checkpoint_path=None,
+                   checkpoint_frequency=-1
                    ):
     """
     Consumer function; only used when using the (optional) image queue.
@@ -231,9 +227,14 @@ def _consumer_func(q,
         augment (bool, optional): enable image augmentation
         detector_options (dict, optional): key/value pairs that are interpreted differently
             by different detectors
-        preprocess_on_image_queue (bool, optional): whether images are already preprocessed on the queue
+        preprocess_on_image_queue (bool, optional): whether images are already preprocessed on
+            the queue
         n_total_images (int, optional): total number of images expected (for progress bar)
         batch_size (int, optional): batch size for GPU inference
+        checkpoint_path (str, optional): path to write checkpoint files, None disables
+            checkpointing
+        checkpoint_frequency (int, optional): write checkpoint every N images, -1 disables
+            checkpointing
     """
     if verbose:
@@ -257,6 +258,25 @@ def _consumer_func(q,
     n_images_processed = 0
     n_queues_finished = 0
+    last_checkpoint_count = 0
+    def _should_write_checkpoint():
+        """
+        Check whether we should write a checkpoint. Returns True if we've crossed a
+        checkpoint boundary.
+        """
+        if (checkpoint_frequency <= 0) or (checkpoint_path is None):
+            return False
+        # Calculate the checkpoint threshold we should have crossed
+        current_checkpoint_threshold = \
+            (n_images_processed // checkpoint_frequency) * checkpoint_frequency
+        last_checkpoint_threshold = \
+            (last_checkpoint_count // checkpoint_frequency) * checkpoint_frequency
+        # We should write a checkpoint if we've crossed into a new checkpoint interval
+        return (current_checkpoint_threshold > last_checkpoint_threshold)
     pbar = None
     if n_total_images is not None:
@@ -314,6 +334,10 @@ def _consumer_func(q,
                         n_images_processed += len(leftover_batch)
+                        # In theory we could write a checkpoint here, but because we're basically
+                        # done at this point, there's not much upside to writing another checkpoint,
+                        # so for simplicity, I'm skipping it.
                     # ...for each batch we have left to process
                 return_queue.put(results)
@@ -334,16 +358,6 @@ def _consumer_func(q,
         im_file = r[0]
         image = r[1]
-        # This block is sometimes useful for debugging, so I'm leaving it here, but if'd out
-        if False:
-            if verbose or ((n_images_processed % n_queue_print) == 1):
-                elapsed = time.time() - start_time
-                images_per_second = n_images_processed / elapsed
-                print('De-queued image {} ({:.2f}/s) ({})'.format(n_images_processed,
-                                                              images_per_second,
-                                                              im_file))
-                sys.stdout.flush()
         # Handle failed images immediately (don't batch them)
         #
         # Loader workers communicate failures by passing a string to
@@ -418,10 +432,14 @@ def _consumer_func(q,
             # ...if we are/aren't doing batch processing
-        # ...whether we received a string (indicating failure) or an image from the loader worker
+            # Write checkpoint if necessary
+            if _should_write_checkpoint():
+                print('Consumer: writing checkpoint after {} images'.format(
+                    n_images_processed))
+                write_checkpoint(checkpoint_path, results)
+                last_checkpoint_count = n_images_processed
-        if verbose:
-            print('Processed image {}'.format(im_file)); sys.stdout.flush()
+        # ...whether we received a string (indicating failure) or an image from the loader worker
         q.task_done()
@@ -442,7 +460,9 @@ def _run_detector_with_image_queue(image_files,
                                    detector_options=None,
                                    loader_workers=default_loaders,
                                    preprocess_on_image_queue=default_preprocess_on_image_queue,
-                                   batch_size=1):
+                                   batch_size=1,
+                                   checkpoint_path=None,
+                                   checkpoint_frequency=-1):
     """
     Driver function for the (optional) multiprocessing-based image queue.  Spawns workers to read and
     preprocess images, runs the consumer function in the calling process.
@@ -466,6 +486,8 @@ def _run_detector_with_image_queue(image_files,
         preprocess_on_image_queue (bool, optional): if the image queue is enabled, should it handle
             image loading and preprocessing (True), or just image loading (False)?
         batch_size (int, optional): batch size for GPU processing
+        checkpoint_path (str, optional): path to write checkpoint files, None disables checkpointing
+        checkpoint_frequency (int, optional): write checkpoint every N images, -1 disables checkpointing
     Returns:
         list: list of dicts in the format returned by process_image()
@@ -536,7 +558,9 @@ def _run_detector_with_image_queue(image_files,
                                                           detector_options,
                                                           preprocess_on_image_queue,
                                                           n_total_images,
-                                                          batch_size))
+                                                          batch_size,
+                                                          checkpoint_path,
+                                                          checkpoint_frequency))
         else:
             consumer = Process(target=_consumer_func,args=(q,
                                                            return_queue,
@@ -551,7 +575,9 @@ def _run_detector_with_image_queue(image_files,
                                                            detector_options,
                                                            preprocess_on_image_queue,
                                                            n_total_images,
-                                                           batch_size))
+                                                           batch_size,
+                                                           checkpoint_path,
+                                                           checkpoint_frequency))
         consumer.daemon = True
         consumer.start()
     else:
@@ -568,7 +594,9 @@ def _run_detector_with_image_queue(image_files,
                        detector_options,
                        preprocess_on_image_queue,
                        n_total_images,
-                       batch_size)
+                       batch_size,
+                       checkpoint_path,
+                       checkpoint_frequency)
     for i_producer,producer in enumerate(producers):
         producer.join()
@@ -665,9 +693,6 @@ def _process_batch(image_items_batch,
         list of dict: list of results for each image in the batch
     """
-    if (verbose):
-        print('_process_batch called with {} items'.format(len(image_items_batch)))
     # This will be the set of items we send for inference; it may be
     # smaller than the input list (image_items_batch) if some images
     # fail to load.  [valid_images] will be either a list of PIL Image
@@ -703,9 +728,6 @@ def _process_batch(image_items_batch,
     assert len(valid_images) == len(valid_image_filenames)
-    if verbose:
-        print('_process_batch found {} valid items in batch'.format(len(valid_images)))
     valid_batch_results = []
     # Process the batch if we have any valid images
@@ -785,9 +807,6 @@ def _process_batch(image_items_batch,
     batch_results.extend(valid_batch_results)
-    if verbose:
-        print('_process batch returning results for {} items'.format(len(batch_results)))
     return batch_results
 # ...def _process_batch(...)
@@ -1153,30 +1172,39 @@ def load_and_run_detector_batch(model_file,
     if use_image_queue:
-        assert checkpoint_frequency < 0, \
-            'Using an image queue is not currently supported when checkpointing is enabled'
-        assert len(results) == 0, \
-            'Using an image queue with results loaded from a checkpoint is not currently supported'
         assert n_cores <= 1
-        # Image queue now supports batch processing
-        results = _run_detector_with_image_queue(image_file_names,
-                                                 model_file,
-                                                 confidence_threshold,
-                                                 quiet,
-                                                 image_size=image_size,
-                                                 include_image_size=include_image_size,
-                                                 include_image_timestamp=include_image_timestamp,
-                                                 include_exif_data=include_exif_data,
-                                                 augment=augment,
-                                                 detector_options=detector_options,
-                                                 loader_workers=loader_workers,
-                                                 preprocess_on_image_queue=preprocess_on_image_queue,
-                                                 batch_size=batch_size)
+        # Filter out already processed images
+        images_to_process = [im_file for im_file in image_file_names
+                             if im_file not in already_processed]
+        if len(images_to_process) != len(image_file_names):
+            print('Bypassing {} images that have already been processed'.format(
+                len(image_file_names) - len(images_to_process)))
+        new_results = _run_detector_with_image_queue(images_to_process,
+                          model_file,
+                          confidence_threshold,
+                          quiet,
+                          image_size=image_size,
+                          include_image_size=include_image_size,
+                          include_image_timestamp=include_image_timestamp,
+                          include_exif_data=include_exif_data,
+                          augment=augment,
+                          detector_options=detector_options,
+                          loader_workers=loader_workers,
+                          preprocess_on_image_queue=preprocess_on_image_queue,
+                          batch_size=batch_size,
+                          checkpoint_path=checkpoint_path,
+                          checkpoint_frequency=checkpoint_frequency)
+        # Merge new results with existing results from checkpoint
+        results.extend(new_results)
     elif n_cores <= 1:
+        # Single-threaded processing, no image queue
         # Load the detector
         start_time = time.time()
         detector = load_detector(model_file,
@@ -1233,7 +1261,7 @@ def load_and_run_detector_batch(model_file,
                 if (checkpoint_frequency != -1) and ((image_count % checkpoint_frequency) == 0):
                     print('Writing a new checkpoint after having processed {} images since '
                           'last restart'.format(image_count))
-                    _write_checkpoint(checkpoint_path, results)
+                    write_checkpoint(checkpoint_path, results)
         else:
@@ -1257,7 +1285,7 @@ def load_and_run_detector_batch(model_file,
                 if (checkpoint_frequency != -1) and ((image_count % checkpoint_frequency) == 0):
                     print('Writing a new checkpoint after having processed {} images since '
                           'last restart'.format(image_count))
-                    _write_checkpoint(checkpoint_path, results)
+                    write_checkpoint(checkpoint_path, results)
         # ...if the batch size is > 1
@@ -1291,9 +1319,9 @@ def load_and_run_detector_batch(model_file,
                 checkpoint_queue = Manager().Queue()
-                # Pass the "results" array (which may already contain images loaded from an existing
-            # checkpoint) to the checkpoint queue handler function, which will append results to
-                # the list as they become available.
+                # Pass the "results" array (which may already contain images loaded from an
+                # existing checkpoint) to the checkpoint queue handler function, which will
+                # append results to the list as they become available.
                 checkpoint_thread = Thread(target=_checkpoint_queue_handler,
                                            args=(checkpoint_path, checkpoint_frequency,
                                                  checkpoint_queue, results), daemon=True)
@@ -1337,7 +1365,7 @@ def load_and_run_detector_batch(model_file,
                 # Append the results we just computed to "results", which is *usually* empty, but will
                 # be non-empty if we resumed from a checkpoint
-                results += new_results
+                results.extend(new_results)
             # ...if checkpointing is/isn't enabled
@@ -1376,12 +1404,18 @@ def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_
             print('Writing a new checkpoint after having processed {} images since '
                     'last restart'.format(result_count))
-            _write_checkpoint(checkpoint_path, results)
+            write_checkpoint(checkpoint_path, results)
-def _write_checkpoint(checkpoint_path, results):
+def write_checkpoint(checkpoint_path, results):
     """
-    Writes the 'images' field in the dict 'results' to a json checkpoint file.
+    Writes the object in [results] to a json checkpoint file, as a dict with the
+    key "checkpoint".  First backs up the checkpoint file if it exists, in case we
+    crash while writing the file.
+    Args:
+        checkpoint_path (str): the file to write the checkpoint to
+        results (object): the object we should write
     """
     assert checkpoint_path is not None
@@ -1394,11 +1428,41 @@ def _write_checkpoint(checkpoint_path, results):
         shutil.copyfile(checkpoint_path,checkpoint_tmp_path)
     # Write the new checkpoint
-    ct_utils.write_json(checkpoint_path, {'images': results}, force_str=True)
+    ct_utils.write_json(checkpoint_path, {'checkpoint': results}, force_str=True)
     # Remove the backup checkpoint if it exists
     if checkpoint_tmp_path is not None:
-        os.remove(checkpoint_tmp_path)
+        try:
+            os.remove(checkpoint_tmp_path)
+        except Exception as e:
+            print('Warning: error removing backup checkpoint file {}:\n{}'.format(
+                checkpoint_tmp_path,str(e)))
+def load_checkpoint(checkpoint_path):
+    """
+    Loads results from a checkpoint file.  A checkpoint file is always a dict
+    with the key "checkpoint".
+    Args:
+        checkpoint_path (str): the .json file to load
+    Returns:
+        object: object retrieved from the checkpoint, typically a list of results
+    """
+    print('Loading previous results from checkpoint file {}'.format(checkpoint_path))
+    with open(checkpoint_path, 'r') as f:
+        checkpoint_data = json.load(f)
+    if 'checkpoint' not in checkpoint_data:
+        raise ValueError('Checkpoint file {} is missing "checkpoint" field'.format(checkpoint_path))
+    results = checkpoint_data['checkpoint']
+    print('Restored {} entries from the checkpoint {}'.format(len(results),checkpoint_path))
+    return results
 def get_image_datetime(image):
@@ -1585,8 +1649,6 @@ if False:
         cmd += ' --output_relative_filenames'
     if include_max_conf:
         cmd += ' --include_max_conf'
-    if quiet:
-        cmd += ' --quiet'
     if image_size is not None:
         cmd += ' --image_size {}'.format(image_size)
     if use_image_queue:
@@ -1670,10 +1732,6 @@ def main(): # noqa
         '--include_max_conf',
         action='store_true',
         help='Include the "max_detection_conf" field in the output')
-    parser.add_argument(
-        '--quiet',
-        action='store_true',
-        help='Suppress per-image console output')
     parser.add_argument(
         '--verbose',
         action='store_true',
@@ -1796,6 +1854,12 @@ def main(): # noqa
         default=1,
         help='Batch size for GPU inference (default 1). CPU inference will ignore this and use batch_size=1.')
+    # This argument is deprecated, we always use what was formerly "quiet mode"
+    parser.add_argument(
+        '--quiet',
+        action='store_true',
+        help=argparse.SUPPRESS)
     if len(sys.argv[1:]) == 0:
         parser.print_help()
         parser.exit()
@@ -1857,7 +1921,7 @@ def main(): # noqa
     # Load the checkpoint if available
     #
     # File paths in the checkpoint are always absolute paths; conversion to relative paths
-    # happens below (if necessary).
+    # (if requested) happens at the time results are exported at the end of a job.
     if args.resume_from_checkpoint is not None:
         if args.resume_from_checkpoint == 'auto':
             checkpoint_files = os.listdir(output_dir)
@@ -1875,16 +1939,7 @@ def main(): # noqa
                 checkpoint_file = os.path.join(output_dir,checkpoint_file_relative)
         else:
             checkpoint_file = args.resume_from_checkpoint
-        assert os.path.exists(checkpoint_file), \
-            'File at resume_from_checkpoint specified does not exist'
-        with open(checkpoint_file) as f:
-            print('Loading previous results from checkpoint file {}'.format(
-                checkpoint_file))
-            saved = json.load(f)
-        assert 'images' in saved, \
-            'The checkpoint file does not have the correct fields; cannot be restored'
-        results = saved['images']
-        print('Restored {} entries from the checkpoint'.format(len(results)))
+        results = load_checkpoint(checkpoint_file)
     else:
         results = []
@@ -2001,16 +2056,6 @@ def main(): # noqa
                 f'Checkpoint path {checkpoint_path} already exists, delete or move it before ' + \
                 're-using the same checkpoint path, or specify --allow_checkpoint_overwrite'
-        # Confirm that we can write to the checkpoint path; this avoids issues where
-        # we crash after several thousand images.
-        #
-        # But actually, commenting this out for now... the scenario where we are resuming from a
-        # checkpoint, then immediately overwrite that checkpoint with empty data is higher-risk
-        # than the annoyance of crashing a few minutes after starting a job.
-        if False:
-            ct_utils.write_json(checkpoint_path, {'images': []}, indent=None)
         print('The checkpoint file will be written to {}'.format(checkpoint_path))
     else:
@@ -2030,7 +2075,7 @@ def main(): # noqa
                                           results=results,
                                           n_cores=args.ncores,
                                           use_image_queue=args.use_image_queue,
-                                          quiet=args.quiet,
+                                          quiet=True,
                                           image_size=args.image_size,
                                           class_mapping_filename=args.class_mapping_filename,
                                           include_image_size=args.include_image_size,

megadetector/detection/run_md_and_speciesnet.py CHANGED Viewed

@@ -31,6 +31,7 @@ from megadetector.utils.ct_utils import round_float
 from megadetector.utils.ct_utils import write_json
 from megadetector.utils.ct_utils import make_temp_folder
 from megadetector.utils.ct_utils import is_list_sorted
+from megadetector.utils.ct_utils import is_sphinx_build
 from megadetector.utils import path_utils
 from megadetector.visualization import visualization_utils as vis_utils
 from megadetector.postprocessing.validate_batch_results import \
@@ -808,8 +809,9 @@ def _run_detection_step(source_folder: str,
     files_to_merge = []
-    # Process images if any
+    # Process images if necessary
     if len(image_files) > 0:
         print('Running MegaDetector on {} images...'.format(len(image_files)))
         image_results = load_and_run_detector_batch(
@@ -841,8 +843,11 @@ def _run_detection_step(source_folder: str,
         print('Image detection results written to {}'.format(image_output_file))
         files_to_merge.append(image_output_file)
-    # Process videos if any
+    # ...if we had images to process
+    # Process videos if necessary
     if len(video_files) > 0:
         print('Running MegaDetector on {} videos...'.format(len(video_files)))
         # Set up video processing options
@@ -853,6 +858,7 @@ def _run_detection_step(source_folder: str,
         video_options.json_confidence_threshold = detection_confidence_threshold
         video_options.frame_sample = frame_sample
         video_options.time_sample = time_sample
+        video_options.recursive = True
         # Process videos
         process_videos(video_options)
@@ -860,6 +866,8 @@ def _run_detection_step(source_folder: str,
         print('Video detection results written to {}'.format(video_options.output_json_file))
         files_to_merge.append(video_options.output_json_file)
+    # ...if we had videos to process
     # Merge results if we have both images and videos
     if len(files_to_merge) > 1:
         print('Merging image and video detection results...')
@@ -868,6 +876,9 @@ def _run_detection_step(source_folder: str,
     elif len(files_to_merge) == 1:
         # Just rename the single file
         if files_to_merge[0] != detector_output_file:
+            if os.path.isfile(detector_output_file):
+                print('Detector file {} exists, over-writing'.format(detector_output_file))
+                os.remove(detector_output_file)
             os.rename(files_to_merge[0], detector_output_file)
         print('Detection results written to {}'.format(detector_output_file))
@@ -949,7 +960,7 @@ def _run_classification_step(detector_results_file: str,
     # This will block every time the queue reaches its maximum depth, so for
     # very small jobs, this will not be a useful progress bar.
-    with tqdm(total=len(images)) as pbar:
+    with tqdm(total=len(images),desc='Classification') as pbar:
         for image_data in images:
             image_queue.put(image_data)
             pbar.update()
@@ -1104,6 +1115,8 @@ def _run_classification_step(detector_results_file: str,
     detector_results['classification_category_descriptions'] = \
         category_state.classification_category_descriptions
+    print('Writing output file')
     # Write results
     write_json(merged_results_file, detector_results)
@@ -1120,6 +1133,11 @@ def main():
     Command-line driver for run_md_and_speciesnet.py
     """
+    if 'speciesnet' not in sys.modules:
+        print('It looks like the speciesnet package is not available, try "pip install speciesnet"')
+        if not is_sphinx_build():
+            sys.exit(-1)
     parser = argparse.ArgumentParser(
         description='Run MegaDetector and SpeciesNet on a folder of images/videos',
         formatter_class=argparse.ArgumentDefaultsHelpFormatter
@@ -1153,7 +1171,7 @@ def main():
     parser.add_argument('--detection_confidence_threshold_for_classification',
                         type=float,
                         default=DEFAULT_DETECTION_CONFIDENCE_THRESHOLD_FOR_CLASSIFICATION,
-                        help='Classifiy detections above this threshold')
+                        help='Classify detections above this threshold')
     parser.add_argument('--detection_confidence_threshold_for_output',
                         type=float,
                         default=DEFAULT_DETECTION_CONFIDENCE_THRESHOLD_FOR_OUTPUT,

megadetector/detection/video_utils.py CHANGED Viewed

@@ -27,6 +27,7 @@ from megadetector.visualization import visualization_utils as vis_utils
 default_fourcc = 'h264'
+video_progress_bar_description = 'Processing video'
 #%% Path utilities
@@ -418,7 +419,7 @@ def run_callback_on_frames_for_folder(input_video_folder,
     # Process each video
     # video_fn_abs = input_files_full_paths[0]
-    for video_fn_abs in tqdm(input_files_full_paths):
+    for video_fn_abs in tqdm(input_files_full_paths,desc=video_progress_bar_description):
         video_filename_relative = os.path.relpath(video_fn_abs,input_video_folder)
         video_filename_relative = video_filename_relative.replace('\\','/')
@@ -870,7 +871,7 @@ def video_folder_to_frames(input_folder,
         # For each video
         #
         # input_fn_relative = input_files_relative_paths[0]
-        for input_fn_relative in tqdm(input_files_relative_paths):
+        for input_fn_relative in tqdm(input_files_relative_paths,desc='Video to frames'):
             # If frames_to_extract is a dict, get the specific frames for this video
             if isinstance(frames_to_extract, dict):
@@ -918,7 +919,7 @@ def video_folder_to_frames(input_folder,
                                  for relative_fn in input_files_relative_paths]
                 results = list(tqdm(pool.imap(_video_to_frames_with_per_video_frames, args_for_pool),
-                                    total=len(args_for_pool)))
+                                    total=len(args_for_pool),desc='Video to frames'))
             else:
@@ -933,7 +934,7 @@ def video_folder_to_frames(input_folder,
                                                      frames_to_extract=frames_to_extract,
                                                      allow_empty_videos=allow_empty_videos)
                 results = list(tqdm(pool.imap(process_video_with_options, input_files_relative_paths),
-                                    total=len(input_files_relative_paths)))
+                                    total=len(input_files_relative_paths),desc='Video to frames'))
             # ...if we need to pass different frames for each video

megadetector/postprocessing/classification_postprocessing.py CHANGED Viewed

@@ -25,14 +25,14 @@ from megadetector.utils.ct_utils import sort_dictionary_by_value
 from megadetector.utils.ct_utils import sort_dictionary_by_key
 from megadetector.utils.ct_utils import invert_dictionary
-from megadetector.utils.wi_utils import clean_taxonomy_string
-from megadetector.utils.wi_utils import taxonomy_level_index
-from megadetector.utils.wi_utils import taxonomy_level_string_to_index
+from megadetector.utils.wi_taxonomy_utils import clean_taxonomy_string
+from megadetector.utils.wi_taxonomy_utils import taxonomy_level_index
+from megadetector.utils.wi_taxonomy_utils import taxonomy_level_string_to_index
-from megadetector.utils.wi_utils import non_taxonomic_prediction_strings
-from megadetector.utils.wi_utils import human_prediction_string
-from megadetector.utils.wi_utils import animal_prediction_string
-from megadetector.utils.wi_utils import blank_prediction_string # noqa
+from megadetector.utils.wi_taxonomy_utils import non_taxonomic_prediction_strings
+from megadetector.utils.wi_taxonomy_utils import human_prediction_string
+from megadetector.utils.wi_taxonomy_utils import animal_prediction_string
+from megadetector.utils.wi_taxonomy_utils import blank_prediction_string # noqa
 #%% Options classes
@@ -1100,7 +1100,8 @@ def restrict_to_taxa_list(taxa_list,
                           input_file,
                           output_file,
                           allow_walk_down=False,
-                          add_pre_filtering_description=True):
+                          add_pre_filtering_description=True,
+                          allow_redundant_latin_names=False):
     """
     Given a prediction file in MD .json format, likely without having had
     a geofence applied, apply a custom taxa list.
@@ -1123,6 +1124,10 @@ def restrict_to_taxa_list(taxa_list,
         add_pre_filtering_description (bool, optional): should we add a new metadata
             field that summarizes each image's classifications prior to taxonomic
             restriction?
+        allow_redundant_latin_names (bool, optional): if False, we'll raise an Exception
+            if the same latin name appears twice in the taxonomy list; if True, we'll
+            just print a warning and ignore all entries other than the first for this
+            latin name
     """
     ##%% Read target taxa list
@@ -1137,11 +1142,14 @@ def restrict_to_taxa_list(taxa_list,
     taxa_list = [s for s in taxa_list if len(s) > 0]
     target_latin_to_common = {}
     for s in taxa_list:
         if s.strip().startswith('#'):
             continue
         tokens = s.split(',')
-        assert len(tokens) <= 2
+        # We allow additional columns now
+        # assert len(tokens) <= 2
         binomial_name = tokens[0]
         assert len(binomial_name.split(' ')) in (1,2,3), \
             'Illegal binomial name in species list: {}'.format(binomial_name)
@@ -1149,9 +1157,17 @@ def restrict_to_taxa_list(taxa_list,
             common_name = tokens[1].strip().lower()
         else:
             common_name = None
-        assert binomial_name not in target_latin_to_common
+        if binomial_name in target_latin_to_common:
+            error_string = 'scientific name {} appears multiple times in the taxonomy list'.format(
+                    binomial_name)
+            if allow_redundant_latin_names:
+                print('Warning: {}'.format(error_string))
+            else:
+                raise ValueError(error_string)
         target_latin_to_common[binomial_name] = common_name
+    # ...for each line in the taxonomy file
     ##%% Read taxonomy file

megadetector/postprocessing/combine_batch_outputs.py CHANGED Viewed

@@ -40,6 +40,8 @@ def combine_batch_output_files(input_files,
     Merges the list of MD results files [input_files] into a single
     dictionary, optionally writing the result to [output_file].
+    Always overwrites [output_file] if it exists.
     Args:
         input_files (list of str): paths to JSON detection files
         output_file (str, optional): path to write merged JSON

megadetector 10.0.3__py3-none-any.whl → 10.0.5__py3-none-any.whl

Potentially problematic release.

megadetector 10.0.3py3-none-any.whl → 10.0.5py3-none-any.whl