PyPI - megadetector - Versions diffs - 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl - Mend

megadetector 5.0.6py3-none-any.whl → 5.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show

api/batch_processing/data_preparation/manage_local_batch.py +297 -202
api/batch_processing/data_preparation/manage_video_batch.py +7 -2
api/batch_processing/postprocessing/add_max_conf.py +1 -0
api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
api/batch_processing/postprocessing/compare_batch_results.py +111 -61
api/batch_processing/postprocessing/convert_output_format.py +24 -6
api/batch_processing/postprocessing/load_api_results.py +56 -72
api/batch_processing/postprocessing/md_to_labelme.py +119 -51
api/batch_processing/postprocessing/merge_detections.py +30 -5
api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
classification/prepare_classification_script.py +191 -191
data_management/cct_json_utils.py +7 -2
data_management/coco_to_labelme.py +263 -0
data_management/coco_to_yolo.py +72 -48
data_management/databases/integrity_check_json_db.py +75 -64
data_management/databases/subset_json_db.py +1 -1
data_management/generate_crops_from_cct.py +1 -1
data_management/get_image_sizes.py +44 -26
data_management/importers/animl_results_to_md_results.py +3 -5
data_management/importers/noaa_seals_2019.py +2 -2
data_management/importers/zamba_results_to_md_results.py +2 -2
data_management/labelme_to_coco.py +264 -127
data_management/labelme_to_yolo.py +96 -53
data_management/lila/create_lila_blank_set.py +557 -0
data_management/lila/create_lila_test_set.py +2 -1
data_management/lila/create_links_to_md_results_files.py +1 -1
data_management/lila/download_lila_subset.py +138 -45
data_management/lila/generate_lila_per_image_labels.py +23 -14
data_management/lila/get_lila_annotation_counts.py +16 -10
data_management/lila/lila_common.py +15 -42
data_management/lila/test_lila_metadata_urls.py +116 -0
data_management/read_exif.py +65 -16
data_management/remap_coco_categories.py +84 -0
data_management/resize_coco_dataset.py +14 -31
data_management/wi_download_csv_to_coco.py +239 -0
data_management/yolo_output_to_md_output.py +40 -13
data_management/yolo_to_coco.py +313 -100
detection/process_video.py +36 -14
detection/pytorch_detector.py +1 -1
detection/run_detector.py +73 -18
detection/run_detector_batch.py +116 -27
detection/run_inference_with_yolov5_val.py +135 -27
detection/run_tiled_inference.py +153 -43
detection/tf_detector.py +2 -1
detection/video_utils.py +4 -2
md_utils/ct_utils.py +101 -6
md_utils/md_tests.py +264 -17
md_utils/path_utils.py +326 -47
md_utils/process_utils.py +26 -7
md_utils/split_locations_into_train_val.py +215 -0
md_utils/string_utils.py +10 -0
md_utils/url_utils.py +66 -3
md_utils/write_html_image_list.py +12 -2
md_visualization/visualization_utils.py +380 -74
md_visualization/visualize_db.py +41 -10
md_visualization/visualize_detector_output.py +185 -104
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
taxonomy_mapping/map_new_lila_datasets.py +43 -39
taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
taxonomy_mapping/preview_lila_taxonomy.py +27 -27
taxonomy_mapping/species_lookup.py +33 -13
taxonomy_mapping/taxonomy_csv_checker.py +7 -5
md_visualization/visualize_megadb.py +0 -183
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0

detection/run_detector.py CHANGED Viewed

@@ -10,12 +10,7 @@
 # This script is not a good way to process lots of images (tens of thousands,
 # say). It does not facilitate checkpointing the results so if it crashes you
 # would have to start from scratch. If you want to run a detector (e.g., ours)
-# on lots of images, you should check out:
-#
-# 1) run_detector_batch.py (for local execution)
-#
-# 2) https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing
-#    (for running large jobs on Azure ML)
+# on lots of images, you should check out run_detector_batch.py.
 #
 # To run this script, we recommend you set up a conda virtual environment
 # following instructions in the Installation section on the main README, using
@@ -136,6 +131,33 @@ downloadable_models = {
     'MDV5B':'https://github.com/agentmorris/MegaDetector/releases/download/v5.0/md_v5b.0.0.pt'
 }
+model_string_to_model_version = {
+    'v2':'v2.0.0',
+    'v3':'v3.0.0',
+    'v4.1':'v4.1.0',
+    'v5a.0.0':'v5a.0.0',
+    'v5b.0.0':'v5b.0.0',
+    'mdv5a':'v5a.0.0',
+    'mdv5b':'v5b.0.0',
+    'mdv4':'v4.1.0',
+    'mdv3':'v3.0.0'
+}
+# Approximate inference speeds (in images per second) for MDv5 based on
+# benchmarks, only used for reporting very coarse expectations about inference time.
+device_token_to_mdv5_inference_speed = {
+    '4090':17.6,
+    '3090':11.4,
+    '3080':9.5,
+    '3050':4.2,
+    'P2000':2.1,
+    # These are written this way because they're MDv4 benchmarks, and MDv5
+    # is around 3.5x faster than MDv4.
+    'V100':2.79*3.5,
+    '2080':2.3*3.5,
+    '2060':1.6*3.5
+}
 #%% Utility functions
@@ -190,18 +212,9 @@ def get_detector_version_from_filename(detector_filename):
     "v4.1.0", "v5a.0.0", and "v5b.0.0", respectively.
     """
-    fn = os.path.basename(detector_filename)
-    known_model_versions = {'v2':'v2.0.0',
-                            'v3':'v3.0.0',
-                            'v4.1':'v4.1.0',
-                            'v5a.0.0':'v5a.0.0',
-                            'v5b.0.0':'v5b.0.0',
-                            'MDV5A':'v5a.0.0',
-                            'MDV5B':'v5b.0.0',
-                            'MDV4':'v4.1.0',
-                            'MDV3':'v3.0.0'}
+    fn = os.path.basename(detector_filename).lower()
     matches = []
-    for s in known_model_versions.keys():
+    for s in model_string_to_model_version.keys():
         if s in fn:
             matches.append(s)
     if len(matches) == 0:
@@ -211,9 +224,51 @@ def get_detector_version_from_filename(detector_filename):
         print('Warning: multiple MegaDetector versions for model file {}'.format(detector_filename))
         return 'multiple'
     else:
-        return known_model_versions[matches[0]]
+        return model_string_to_model_version[matches[0]]
+def estimate_md_images_per_second(model_file, device_name=None):
+    """
+    Estimate how fast MegaDetector will run based on benchmarks.  Defaults to querying
+    the current device.  Returns None if no data is available for the current card/model.
+    Estimates only available for a small handful of GPUs.
+    """
+    if device_name is None:
+        try:
+            import torch
+            device_name = torch.cuda.get_device_name()
+        except Exception as e:
+            print('Error querying device name: {}'.format(e))
+            return None
+    model_file = model_file.lower().strip()
+    if model_file in model_string_to_model_version.values():
+        model_version = model_file
+    else:
+        model_version = get_detector_version_from_filename(model_file)
+        if model_version not in model_string_to_model_version.values():
+            print('Error determining model version for model file {}'.format(model_file))
+            return None
+    mdv5_inference_speed = None
+    for device_token in device_token_to_mdv5_inference_speed.keys():
+        if device_token in device_name:
+            mdv5_inference_speed = device_token_to_mdv5_inference_speed[device_token]
+            break
+    if mdv5_inference_speed is None:
+        print('No speed estimate available for {}'.format(device_name))
+    if 'v5' in model_version:
+        return mdv5_inference_speed
+    elif 'v2' in model_version or 'v3' in model_version or 'v4' in model_version:
+        return mdv5_inference_speed / 3.5
+    else:
+        print('Could not estimate inference speed for model file {}'.format(model_file))
+        return None
 def get_typical_confidence_threshold_from_results(results):
     """
     Given the .json data loaded from a MD results file, determine a typical confidence

detection/run_detector_batch.py CHANGED Viewed

@@ -245,7 +245,8 @@ def process_images(im_files, detector, confidence_threshold, use_image_queue=Fal
                    quiet=False, image_size=None, checkpoint_queue=None, include_image_size=False,
                    include_image_timestamp=False, include_exif_data=False):
     """
-    Runs MegaDetector over a list of image files.
+    Runs MegaDetector over a list of image files.  As of 3/2024, this entry point is used when the
+    image queue is enabled, but not in the standard inference path (which loops over process_image()).
     Args
     - im_files: list of str, paths to image files
@@ -269,7 +270,7 @@ def process_images(im_files, detector, confidence_threshold, use_image_queue=Fal
                                       include_image_size=include_image_size,
                                       include_image_timestamp=include_image_timestamp,
                                       include_exif_data=include_exif_data)
-    else:
+    else:
         results = []
         for im_file in im_files:
             result = process_image(im_file, detector, confidence_threshold,
@@ -662,7 +663,7 @@ def get_image_datetime(image):
 def write_results_to_file(results, output_file, relative_path_base=None,
                           detector_file=None, info=None, include_max_conf=False,
-                          custom_metadata=None):
+                          custom_metadata=None, force_forward_slashes=True):
     """
     Writes list of detection results to JSON output file. Format matches:
@@ -692,6 +693,14 @@ def write_results_to_file(results, output_file, relative_path_base=None,
             results_relative.append(r_relative)
         results = results_relative
+    if force_forward_slashes:
+        results_converted = []
+        for r in results:
+            r_converted = copy.copy(r)
+            r_converted['file'] = r_converted['file'].replace('\\','/')
+            results_converted.append(r_converted)
+        results = results_converted
     # The typical case: we need to build the 'info' struct
     if info is None:
@@ -751,17 +760,75 @@ if False:
     #%%
+    model_file = 'MDV5A'
+    image_dir = r'g:\camera_traps\camera_trap_images'
+    output_file = r'g:\temp\md-test.json'
+    recursive = True
+    output_relative_filenames = True
+    include_max_conf = False
+    quiet = True
+    image_size = None
+    use_image_queue = False
+    confidence_threshold = 0.0001
+    checkpoint_frequency = 5
     checkpoint_path = None
-    model_file = r'G:\temp\models\md_v4.1.0.pb'
-    confidence_threshold = 0.1
-    checkpoint_frequency = -1
-    results = None
+    resume_from_checkpoint = 'auto'
+    allow_checkpoint_overwrite = False
     ncores = 1
-    use_image_queue = False
-    quiet = False
-    image_dir = r'G:\temp\demo_images\ssmini'
-    image_size = None
+    class_mapping_filename = None
+    include_image_size = True
+    include_image_timestamp = True
+    include_exif_data = True
+    overwrite_handling = None
+    # Generate a command line
+    cmd = 'python run_detector_batch.py "{}" "{}" "{}"'.format(
+        model_file,image_dir,output_file)
+    if recursive:
+        cmd += ' --recursive'
+    if output_relative_filenames:
+        cmd += ' --output_relative_filenames'
+    if include_max_conf:
+        cmd += ' --include_max_conf'
+    if quiet:
+        cmd += ' --quiet'
+    if image_size is not None:
+        cmd += ' --image_size {}'.format(image_size)
+    if use_image_queue:
+        cmd += ' --use_image_queue'
+    if confidence_threshold is not None:
+        cmd += ' --threshold {}'.format(confidence_threshold)
+    if checkpoint_frequency is not None:
+        cmd += ' --checkpoint_frequency {}'.format(checkpoint_frequency)
+    if checkpoint_path is not None:
+        cmd += ' --checkpoint_path "{}"'.format(checkpoint_path)
+    if resume_from_checkpoint is not None:
+        cmd += ' --resume_from_checkpoint "{}"'.format(resume_from_checkpoint)
+    if allow_checkpoint_overwrite:
+        cmd += ' --allow_checkpoint_overwrite'
+    if ncores is not None:
+        cmd += ' --ncores {}'.format(ncores)
+    if class_mapping_filename is not None:
+        cmd += ' --class_mapping_filename "{}"'.format(class_mapping_filename)
+    if include_image_size:
+        cmd += ' --include_image_size'
+    if include_image_timestamp:
+        cmd += ' --include_image_timestamp'
+    if include_exif_data:
+        cmd += ' --include_exif_data'
+    if overwrite_handling is not None:
+        cmd += ' --overwrite_handling {}'.format(overwrite_handling)
+    print(cmd)
+    import clipboard; clipboard.copy(cmd)
+    #%% Run inference interactively
     image_file_names = path_utils.find_images(image_dir, recursive=False)
+    results = None
     start_time = time.time()
@@ -840,12 +907,15 @@ def main():
         '--checkpoint_path',
         type=str,
         default=None,
-        help='File name to which checkpoints will be written if checkpoint_frequency is > 0')
+        help='File name to which checkpoints will be written if checkpoint_frequency is > 0, ' + \
+             'defaults to md_checkpoint_[date].json in the same folder as the output file')
     parser.add_argument(
         '--resume_from_checkpoint',
         type=str,
         default=None,
-        help='Path to a JSON checkpoint file to resume from')
+        help='Path to a JSON checkpoint file to resume from, or "auto" to ' + \
+             'find the most recent checkpoint in the same folder as the output file.  "auto" uses' + \
+             'checkpoint_path (rather than searching the output folder) if checkpoint_path is specified.')
     parser.add_argument(
         '--allow_checkpoint_overwrite',
         action='store_true',
@@ -897,7 +967,7 @@ def main():
     assert os.path.exists(args.detector_file), \
         'detector file {} does not exist'.format(args.detector_file)
-    assert 0.0 < args.threshold <= 1.0, 'Confidence threshold needs to be between 0 and 1'
+    assert 0.0 <= args.threshold <= 1.0, 'Confidence threshold needs to be between 0 and 1'
     assert args.output_file.endswith('.json'), 'output_file specified needs to end with .json'
     if args.checkpoint_frequency != -1:
         assert args.checkpoint_frequency > 0, 'Checkpoint_frequency needs to be > 0 or == -1'
@@ -919,19 +989,42 @@ def main():
         else:
             raise ValueError('Illegal overwrite handling string {}'.format(args.overwrite_handling))
+    output_dir = os.path.dirname(args.output_file)
+    if len(output_dir) > 0:
+        os.makedirs(output_dir,exist_ok=True)
+    assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
     if args.class_mapping_filename is not None:
         load_custom_class_mapping(args.class_mapping_filename)
     # Load the checkpoint if available
     #
     # Relative file names are only output at the end; all file paths in the checkpoint are
-    # still full paths.
+    # still absolute paths.
     if args.resume_from_checkpoint is not None:
-        assert os.path.exists(args.resume_from_checkpoint), \
+        if args.resume_from_checkpoint == 'auto':
+            checkpoint_files = os.listdir(output_dir)
+            checkpoint_files = [fn for fn in checkpoint_files if \
+                                (fn.startswith('md_checkpoint') and fn.endswith('.json'))]
+            if len(checkpoint_files) == 0:
+                raise ValueError('resume_from_checkpoint set to "auto", but no checkpoints found in {}'.format(
+                    output_dir))
+            else:
+                if len(checkpoint_files) > 1:
+                    print('Warning: found {} checkpoints in {}, using the latest'.format(
+                        len(checkpoint_files),output_dir))
+                    checkpoint_files = sorted(checkpoint_files)
+                checkpoint_file_relative = checkpoint_files[-1]
+                checkpoint_file = os.path.join(output_dir,checkpoint_file_relative)
+        else:
+            checkpoint_file = args.resume_from_checkpoint
+        assert os.path.exists(checkpoint_file), \
             'File at resume_from_checkpoint specified does not exist'
-        with open(args.resume_from_checkpoint) as f:
+        with open(checkpoint_file) as f:
             print('Loading previous results from checkpoint file {}'.format(
-                args.resume_from_checkpoint))
+                checkpoint_file))
             saved = json.load(f)
         assert 'images' in saved, \
             'The checkpoint file does not have the correct fields; cannot be restored'
@@ -982,13 +1075,6 @@ def main():
     assert os.path.exists(image_file_names[0]), \
         'The first image to be processed does not exist at {}'.format(image_file_names[0])
-    output_dir = os.path.dirname(args.output_file)
-    if len(output_dir) > 0:
-        os.makedirs(output_dir,exist_ok=True)
-    assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
     # Test that we can write to the output_file's dir if checkpointing requested
     if args.checkpoint_frequency != -1:
@@ -996,7 +1082,7 @@ def main():
             checkpoint_path = args.checkpoint_path
         else:
             checkpoint_path = os.path.join(output_dir,
-                                           'checkpoint_{}.json'.format(
+                                           'md_checkpoint_{}.json'.format(
                                                datetime.utcnow().strftime("%Y%m%d%H%M%S")))
         # Don't overwrite existing checkpoint files, this is a sure-fire way to eventually
@@ -1023,6 +1109,9 @@ def main():
     else:
+        if args.checkpoint_path is not None:
+            print('Warning: checkpointing disabled because checkpoint_frequency is -1, ' + \
+                  'but a checkpoint path was specified')
         checkpoint_path = None
     start_time = time.time()

detection/run_inference_with_yolov5_val.py CHANGED Viewed

@@ -49,6 +49,7 @@ from tqdm import tqdm
 from md_utils import path_utils
 from md_utils import process_utils
+from md_utils import string_utils
 from data_management import yolo_output_to_md_output
 from detection.run_detector import try_download_known_detector
@@ -68,17 +69,20 @@ class YoloInferenceOptions:
     ## Optional ##
-    # Required for YOLOv5 models, not for YOLOv8 models
+    # Required for older YOLOv5 inference, not for newer ulytralytics inference
     yolo_working_folder = None
-    model_type = 'yolov5' # currently 'yolov5' and 'yolov8' are supported
+    # Currently 'yolov5' and 'ultralytics' are supported, and really these are proxies for
+    # "the yolov5 repo" and "the ultralytics repo" (typically YOLOv8).
+    model_type = 'yolov5'
     image_size = default_image_size_with_augmentation
     conf_thres = '0.001'
     batch_size = 1
     device_string = '0'
     augment = True
+    half_precision_enabled = None
     symlink_folder = None
     use_symlinks = True
@@ -97,16 +101,30 @@ class YoloInferenceOptions:
     overwrite_handling = 'skip'
     preview_yolo_command_only = False
+    treat_copy_failures_as_warnings = False
+    save_yolo_debug_output = False
+    recursive = True
 #%% Main function
 def run_inference_with_yolo_val(options):
-    ##%% Path handling
+    ##%% Input and path handling
+    if options.model_type == 'yolov8':
+        print('Warning: model type "yolov8" supplied, "ultralytics" is the preferred model type string for YOLOv8 models')
+        options.model_type = 'ultralytics'
+    if (options.model_type == 'yolov5') and ('yolov8' in options.model_filename.lower()):
+        print('\n\n*** Warning: model type set as "yolov5", but your model filename contains "yolov8"... did you mean to use --model_type yolov8?" ***\n\n')
     if options.yolo_working_folder is None:
-        assert options.model_type == 'yolov8', \
+        assert options.model_type == 'ultralytics', \
             'A working folder is required to run YOLOv5 val.py'
     else:
         assert os.path.isdir(options.yolo_working_folder), \
@@ -115,6 +133,11 @@ def run_inference_with_yolo_val(options):
     assert os.path.isdir(options.input_folder) or os.path.isfile(options.input_folder), \
         'Could not find input {}'.format(options.input_folder)
+    if options.half_precision_enabled is not None:
+        assert options.half_precision_enabled in (0,1), \
+            'Invalid value {} for --half_precision_enabled (should be 0 or 1)'.format(
+                options.half_precision_enabled)
     # If the model filename is a known model string (e.g. "MDv5A", download the model if necessary)
     model_filename = try_download_known_detector(options.model_filename)
@@ -182,7 +205,7 @@ def run_inference_with_yolo_val(options):
     ##%% Enumerate images
     if os.path.isdir(options.input_folder):
-        image_files_absolute = path_utils.find_images(options.input_folder,recursive=True)
+        image_files_absolute = path_utils.find_images(options.input_folder,recursive=options.recursive)
     else:
         assert os.path.isfile(options.input_folder)
         with open(options.input_folder,'r') as f:
@@ -218,10 +241,20 @@ def run_inference_with_yolo_val(options):
             else:
                 shutil.copyfile(image_fn,symlink_full_path)
         except Exception as e:
-            image_id_to_error[image_id] = str(e)
-            print('Warning: error copying/creating link for input file {}: {}'.format(
-                image_fn,str(e)))
-            continue
+            error_string = str(e)
+            image_id_to_error[image_id] = error_string
+            # Always break if the user is trying to create symlinks on Windows without
+            # permission, 100% of images will always fail in this case.
+            if ('a required privilege is not held by the client' in error_string.lower()) or \
+               (not options.treat_copy_failures_as_warnings):
+                   print('\nError copying/creating link for input file {}: {}'.format(
+                       image_fn,error_string))
+                   raise
+            else:
+                print('Warning: error copying/creating link for input file {}: {}'.format(
+                    image_fn,error_string))
+                continue
     # ...for each image
@@ -270,17 +303,34 @@ def run_inference_with_yolo_val(options):
         if options.augment:
             cmd += ' --augment'
-    elif options.model_type == 'yolov8':
+        # --half is a store_true argument for YOLOv5's val.py
+        if (options.half_precision_enabled is not None) and (options.half_precision_enabled == 1):
+            cmd += ' --half'
+        # Sometimes useful for debugging
+        # cmd += ' --save_conf --save_txt'
+    elif options.model_type == 'ultralytics':
         if options.augment:
             augment_string = 'augment'
         else:
             augment_string = ''
-        cmd = 'yolo val {} model="{}" imgsz={} batch={} data="{}" project="{}" name="{}"'.format(
-            augment_string,model_filename,image_size_string,options.batch_size,yolo_dataset_file,
-            yolo_results_folder,'yolo_results')
-        cmd += ' save_hybrid save_json'
+        cmd = 'yolo val {} model="{}" imgsz={} batch={} data="{}" project="{}" name="{}" device="{}"'.\
+            format(augment_string,model_filename,image_size_string,options.batch_size,
+                   yolo_dataset_file,yolo_results_folder,'yolo_results',options.device_string)
+        cmd += ' save_json exist_ok'
+        if (options.half_precision_enabled is not None):
+            if options.half_precision_enabled == 1:
+                cmd += ' --half=True'
+            else:
+                assert options.half_precision_enabled == 0
+                cmd += ' --half=False'
+        # Sometimes useful for debugging
+        # cmd += ' save_conf save_txt'
     else:
@@ -293,38 +343,84 @@ def run_inference_with_yolo_val(options):
     if options.yolo_working_folder is not None:
         current_dir = os.getcwd()
-        os.chdir(options.yolo_working_folder)
+        os.chdir(options.yolo_working_folder)
     print('Running YOLO inference command:\n{}\n'.format(cmd))
     if options.preview_yolo_command_only:
         if options.remove_symlink_folder:
             try:
+                print('Removing YOLO symlink folder {}'.format(symlink_folder))
                 shutil.rmtree(symlink_folder)
             except Exception:
                 print('Warning: error removing symlink folder {}'.format(symlink_folder))
                 pass
         if options.remove_yolo_results_folder:
             try:
+                print('Removing YOLO results folder {}'.format(yolo_results_folder))
                 shutil.rmtree(yolo_results_folder)
             except Exception:
                 print('Warning: error removing YOLO results folder {}'.format(yolo_results_folder))
                 pass
         sys.exit()
-    execution_result = process_utils.execute_and_print(cmd)
+    execution_result = process_utils.execute_and_print(cmd,encoding='utf-8',verbose=True)
     assert execution_result['status'] == 0, 'Error running {}'.format(options.model_type)
     yolo_console_output = execution_result['output']
+    if options.save_yolo_debug_output:
+        with open(os.path.join(yolo_results_folder,'yolo_console_output.txt'),'w') as f:
+            for s in yolo_console_output:
+                f.write(s + '\n')
+        with open(os.path.join(yolo_results_folder,'image_id_to_file.json'),'w') as f:
+            json.dump(image_id_to_file,f,indent=1)
+        with open(os.path.join(yolo_results_folder,'image_id_to_error.json'),'w') as f:
+            json.dump(image_id_to_error,f,indent=1)
+    # YOLO console output contains lots of ANSI escape codes, remove them for easier parsing
+    yolo_console_output = [string_utils.remove_ansi_codes(s) for s in yolo_console_output]
+    # Find errors that occurred during the initial corruption check; these will not be included in the
+    # output.  Errors that occur during inference will be handled separately.
     yolo_read_failures = []
     for line in yolo_console_output:
+        # Lines look like:
+        #
+        # For ultralytics val:
+        #
+        # val: WARNING ⚠️ /a/b/c/d.jpg: ignoring corrupt image/label: [Errno 13] Permission denied: '/a/b/c/d.jpg'
+        # line = "val: WARNING ⚠️ /a/b/c/d.jpg: ignoring corrupt image/label: [Errno 13] Permission denied: '/a/b/c/d.jpg'"
+        #
+        # For yolov5 val.py:
+        #
+        # test: WARNING: a/b/c/d.jpg: ignoring corrupt image/label: cannot identify image file '/a/b/c/d.jpg'
+        # line = "test: WARNING: a/b/c/d.jpg: ignoring corrupt image/label: cannot identify image file '/a/b/c/d.jpg'"
         if 'cannot identify image file' in line:
             tokens = line.split('cannot identify image file')
             image_name = tokens[-1].strip()
             assert image_name[0] == "'" and image_name [-1] == "'"
             image_name = image_name[1:-1]
             yolo_read_failures.append(image_name)
+        elif 'ignoring corrupt image/label' in line:
+            assert 'WARNING' in line
+            if '⚠️' in line:
+                assert line.startswith('val'), \
+                    'Unrecognized line in YOLO output: {}'.format(line)
+                tokens = line.split('ignoring corrupt image/label')
+                image_name = tokens[0].split('⚠️')[-1].strip()
+            else:
+                assert line.startswith('test'), \
+                    'Unrecognized line in YOLO output: {}'.format(line)
+                tokens = line.split('ignoring corrupt image/label')
+                image_name = tokens[0].split('WARNING:')[-1].strip()
+            assert image_name.endswith(':')
+            image_name = image_name[0:-1]
+            yolo_read_failures.append(image_name)
     # image_file = yolo_read_failures[0]
     for image_file in yolo_read_failures:
         image_id = os.path.splitext(os.path.basename(image_file))[0]
@@ -338,7 +434,7 @@ def run_inference_with_yolo_val(options):
     ##%% Convert results to MD format
-    json_files = glob.glob(yolo_results_folder+ '/yolo_results/*.json')
+    json_files = glob.glob(yolo_results_folder + '/yolo_results/*.json')
     assert len(json_files) == 1
     yolo_json_file = json_files[0]
@@ -390,7 +486,7 @@ def run_inference_with_yolo_val(options):
 #%% Command-line driver
-import argparse,sys
+import argparse
 from md_utils.ct_utils import args_to_object
 def main():
@@ -422,9 +518,12 @@ def main():
     parser.add_argument(
         '--batch_size', default=options.batch_size, type=int,
         help='inference batch size (default {})'.format(options.batch_size))
+    parser.add_argument(
+        '--half_precision_enabled', default=None, type=int,
+        help='use half-precision-inference (1 or 0) (default is the underlying model\'s default, probably full for YOLOv8 and half for YOLOv5')
     parser.add_argument(
         '--device_string', default=options.device_string, type=str,
-        help='CUDA device specifier, e.g. "0" or "cpu" (default {})'.format(options.device_string))
+        help='CUDA device specifier, typically "0" or "1" for CUDA devices, "mps" for M1/M2 devices, or "cpu" (default {})'.format(options.device_string))
     parser.add_argument(
         '--overwrite_handling', default=options.overwrite_handling, type=str,
         help='action to take if the output file exists (skip, error, overwrite) (default {})'.format(
@@ -435,7 +534,7 @@ def main():
             '(otherwise defaults to MD categories)')
     parser.add_argument(
         '--model_type', default=options.model_type, type=str,
-        help='Model type (yolov5 or yolov8) (default {})'.format(options.model_type))
+        help='Model type ("yolov5" or "ultralytics" ("yolov8" behaves the same as "ultralytics")) (default {})'.format(options.model_type))
     parser.add_argument(
         '--symlink_folder', type=str,
@@ -452,6 +551,13 @@ def main():
     parser.add_argument(
         '--no_remove_yolo_results_folder', action='store_true',
         help='don\'t remove the temporary folder full of YOLO intermediate files')
+    parser.add_argument(
+        '--save_yolo_debug_output', action='store_true',
+        help='write yolo console output to a text file in the results folder, along with additional debug files')
+    parser.add_argument(
+        '--nonrecursive', action='store_true',
+        help='Disable recursive folder processing')
     parser.add_argument(
         '--preview_yolo_command_only', action='store_true',
@@ -474,14 +580,15 @@ def main():
     # If the caller hasn't specified an image size, choose one based on whether augmentation
     # is enabled.
-    if args.image_size is None:
-        assert options.augment in (0,1)
-        if options.augment == 1:
+    if args.image_size is None:
+        assert args.augment_enabled in (0,1), \
+            'Illegal augment_enabled value {}'.format(args.augment_enabled)
+        if args.augment_enabled == 1:
             args.image_size = default_image_size_with_augmentation
         else:
             args.image_size = default_image_size_with_no_augmentation
         augment_enabled_string = 'enabled'
-        if not options.augment:
+        if not args.augment_enabled:
             augment_enabled_string = 'disabled'
         print('Augmentation is {}, using default image size {}'.format(
             augment_enabled_string,args.image_size))
@@ -491,6 +598,7 @@ def main():
     if args.yolo_dataset_file is not None:
         options.yolo_category_id_to_name = args.yolo_dataset_file
+    options.recursive = (not options.nonrecursive)
     options.remove_symlink_folder = (not options.no_remove_symlink_folder)
     options.remove_yolo_results_folder = (not options.no_remove_yolo_results_folder)
     options.use_symlinks = (not options.no_use_symlinks)

megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.6py3-none-any.whl → 5.0.8py3-none-any.whl