PyPI - megadetector - Versions diffs - 5.0.18__py3-none-any.whl → 5.0.20__py3-none-any.whl - Mend

megadetector 5.0.18py3-none-any.whl → 5.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (26) hide show

megadetector/detection/run_inference_with_yolov5_val.py CHANGED Viewed

@@ -788,38 +788,35 @@ def run_inference_with_yolo_val(options):
     yolo_read_failures = []
     for line in yolo_console_output:
-        # Lines look like:
+        #
+        # Lines indicating read failures look like:
         #
         # For ultralytics val:
         #
-        # val: WARNING ⚠️ /a/b/c/d.jpg: ignoring corrupt image/label: [Errno 13] Permission denied: '/a/b/c/d.jpg'
         # line = "val: WARNING ⚠️ /a/b/c/d.jpg: ignoring corrupt image/label: [Errno 13] Permission denied: '/a/b/c/d.jpg'"
         #
         # For yolov5 val.py:
         #
-        # test: WARNING: a/b/c/d.jpg: ignoring corrupt image/label: cannot identify image file '/a/b/c/d.jpg'
         # line = "test: WARNING: a/b/c/d.jpg: ignoring corrupt image/label: cannot identify image file '/a/b/c/d.jpg'"
-        if 'cannot identify image file' in line:
-            tokens = line.split('cannot identify image file')
-            image_name = tokens[-1].strip()
-            assert image_name[0] == "'" and image_name [-1] == "'"
-            image_name = image_name[1:-1]
-            yolo_read_failures.append(image_name)
-        elif 'ignoring corrupt image/label' in line:
-            assert 'WARNING' in line
-            if '⚠️' in line:
-                assert line.startswith('val'), \
-                    'Unrecognized line in YOLO output: {}'.format(line)
-                tokens = line.split('ignoring corrupt image/label')
-                image_name = tokens[0].split('⚠️')[-1].strip()
-            else:
-                assert line.startswith('test'), \
-                    'Unrecognized line in YOLO output: {}'.format(line)
-                tokens = line.split('ignoring corrupt image/label')
-                image_name = tokens[0].split('WARNING:')[-1].strip()
-            assert image_name.endswith(':')
-            image_name = image_name[0:-1]
+        #
+        # In both cases, when we are using symlinks, the first filename is the symlink name, the
+        # second filename is the target, e.g.:
+        #
+        # line = "test: WARNING: /tmp/md_to_yolo/md_to_yolo_xyz/symlinks/xyz/0000000004.jpg: ignoring corrupt image/label: cannot identify image file '/tmp/md-tests/md-test-images/corrupt-images/real-file.jpg'"
+        #
+        line = line.replace('⚠️',':')
+        if 'ignoring corrupt image/label' in line:
+            tokens = line.split('ignoring corrupt image/label')
+            assert len(tokens) == 2
+            tokens = tokens[0].split(':',maxsplit=3)
+            assert len(tokens) == 4
+            assert 'warning' in tokens[1].lower()
+            image_name = tokens[2].strip()
             yolo_read_failures.append(image_name)
+    # ...for each line in the console output
     # image_file = yolo_read_failures[0]
     for image_file in yolo_read_failures:

megadetector/detection/video_utils.py CHANGED Viewed

@@ -228,9 +228,10 @@ def run_callback_on_frames(input_video_file,
         input_video_file (str): video file to process
         frame_callback (function): callback to run on frames, should take an np.array and a string and
             return a single value.  callback should expect PIL-formatted (RGB) images.
-        every_n_frames (int, optional): sample every Nth frame starting from the first frame;
-            if this is None or 1, every frame is processed. Mutually exclusive with
-            frames_to_process.
+        every_n_frames (float, optional): sample every Nth frame starting from the first frame;
+            if this is None or 1, every frame is processed.  If this is a negative value, that's
+            interpreted as a sampling rate in seconds, which is rounded to the nearest frame sampling
+            rate. Mutually exclusive with frames_to_process.
         verbose (bool, optional): enable additional debug console output
         frames_to_process (list of int, optional): process this specific set of frames;
             mutually exclusive with every_n_frames.  If all values are beyond the length
@@ -263,6 +264,13 @@ def run_callback_on_frames(input_video_file,
     frame_filenames = []
     results = []
+    if every_n_frames is not None and every_n_frames < 0:
+        every_n_seconds = abs(every_n_frames)
+        every_n_frames = int(every_n_seconds * frame_rate)
+        if verbose:
+            print('Interpreting a time sampling rate of {} hz as a frame interval of {}'.format(
+                every_n_seconds,every_n_frames))
     # frame_number = 0
     for frame_number in range(0,n_frames):
@@ -776,12 +784,18 @@ class FrameToVideoOptions:
         #: for the whole video, i.e. "1" means "use the confidence value from the highest-confidence frame"
         self.nth_highest_confidence = 1
+        #: Should we include just a single representative frame result for each video (default), or
+        #: every frame that was processed?
+        self.include_all_processed_frames = False
         #: What to do if a file referred to in a .json results file appears not to be a
         #: video; can be 'error' or 'skip_with_warning'
         self.non_video_behavior = 'error'
-def frame_results_to_video_results(input_file,output_file,options=None,
+def frame_results_to_video_results(input_file,
+                                   output_file,
+                                   options=None,
                                    video_filename_to_frame_rate=None):
     """
     Given an MD results file produced at the *frame* level, corresponding to a directory
@@ -801,7 +815,7 @@ def frame_results_to_video_results(input_file,output_file,options=None,
     if options is None:
         options = FrameToVideoOptions()
     # Load results
     with open(input_file,'r') as f:
         input_data = json.load(f)
@@ -854,47 +868,58 @@ def frame_results_to_video_results(input_file,output_file,options=None,
     # video_name = list(video_to_frame_info.keys())[0]
     for video_name in tqdm(video_to_frame_info):
-        frames = video_to_frame_info[video_name]
-        all_detections_this_video = []
-        # frame = frames[0]
-        for frame in frames:
-            if ('detections' in frame) and (frame['detections'] is not None):
-                all_detections_this_video.extend(frame['detections'])
-        # At most one detection for each category for the whole video
-        canonical_detections = []
-        # category_id = list(detection_categories.keys())[0]
-        for category_id in detection_categories:
-            category_detections = [det for det in all_detections_this_video if \
-                                   det['category'] == category_id]
-            # Find the nth-highest-confidence video to choose a confidence value
-            if len(category_detections) >= options.nth_highest_confidence:
-                category_detections_by_confidence = sorted(category_detections,
-                                                           key = lambda i: i['conf'],reverse=True)
-                canonical_detection = category_detections_by_confidence[options.nth_highest_confidence-1]
-                canonical_detections.append(canonical_detection)
         # Prepare the output representation for this video
         im_out = {}
         im_out['file'] = video_name
-        im_out['detections'] = canonical_detections
         if (video_filename_to_frame_rate is not None) and \
             (video_name in video_filename_to_frame_rate):
             im_out['frame_rate'] = video_filename_to_frame_rate[video_name]
-        # 'max_detection_conf' is no longer included in output files by default
-        if False:
-            im_out['max_detection_conf'] = 0
-            if len(canonical_detections) > 0:
-                confidences = [d['conf'] for d in canonical_detections]
-                im_out['max_detection_conf'] = max(confidences)
+        # Find all detections for this video
+        all_detections_this_video = []
+        frames = video_to_frame_info[video_name]
+        # frame = frames[0]
+        for frame in frames:
+            if ('detections' in frame) and (frame['detections'] is not None):
+                all_detections_this_video.extend(frame['detections'])
+        # Should we keep detections for all frames?
+        if (options.include_all_processed_frames):
+            im_out['detections'] = all_detections_this_video
+        # ...or should we keep just a canonical detection for each category?
+        else:
+            canonical_detections = []
+            # category_id = list(detection_categories.keys())[0]
+            for category_id in detection_categories:
+                category_detections = [det for det in all_detections_this_video if \
+                                       det['category'] == category_id]
+                # Find the nth-highest-confidence video to choose a confidence value
+                if len(category_detections) >= options.nth_highest_confidence:
+                    category_detections_by_confidence = sorted(category_detections,
+                                                               key = lambda i: i['conf'],reverse=True)
+                    canonical_detection = category_detections_by_confidence[options.nth_highest_confidence-1]
+                    canonical_detections.append(canonical_detection)
+            im_out['detections'] = canonical_detections
+            # 'max_detection_conf' is no longer included in output files by default
+            if False:
+                im_out['max_detection_conf'] = 0
+                if len(canonical_detections) > 0:
+                    confidences = [d['conf'] for d in canonical_detections]
+                    im_out['max_detection_conf'] = max(confidences)
+        # ...if we're keeping output for all frames / canonical frames
         output_images.append(im_out)

megadetector/postprocessing/validate_batch_results.py ADDED Viewed

@@ -0,0 +1,186 @@
+"""
+validate_batch_results.py
+Given a .json file containing MD results, validate that it's compliant with the format spec:
+https://lila.science/megadetector-output-format
+"""
+#%% Constants and imports
+import os
+import sys
+import json
+import argparse
+from megadetector.detection.video_utils import is_video_file
+from megadetector.utils.ct_utils import args_to_object
+typical_info_fields = ['detector','detection_completion_time',
+                       'classifier','classification_completion_time',
+                       'detection_metadata','classifier_metadata']
+required_keys = ['info','images','detection_categories']
+typical_keys = ['classification_categories']
+#%% Classes
+class ValidateBatchResultsOptions:
+    """
+    Options controlling the behavior of validate_bach_results()
+    """
+    def __init__(self):
+        #: Should we verify that images exist?  If this is True, and the .json
+        #: file contains relative paths, relative_path_base needs to be specified.
+        self.check_image_existence = False
+        #: If check_image_existence is True, where do the images live?
+        #:
+        #: If None, assumes absolute paths.
+        self.relative_path_base = None
+# ...class ValidateBatchResultsOptions
+#%% Main function
+def validate_batch_results(json_filename,options=None):
+    """
+    Verify that [json_filename] is a valid MD output file.  Currently errors on invalid files.
+    Args:
+        json_filename (str): the filename to validate
+        options (ValidateBatchResultsOptions, optionsl): all the parameters used to control this
+            process, see ValidateBatchResultsOptions for details
+    Returns:
+        bool: reserved; currently always errors or returns True.
+    """
+    if options is None:
+        options = ValidateBatchResultsOptions()
+    with open(json_filename,'r') as f:
+        d = json.load(f)
+    ## Info validation
+    assert 'info' in d
+    info = d['info']
+    assert isinstance(info,dict)
+    assert 'format_version' in info
+    format_version = float(info['format_version'])
+    assert format_version >= 1.3, 'This validator can only be used with format version 1.3 or later'
+    print('Validating a .json results file with format version {}'.format(format_version))
+    ## Category validation
+    assert 'detection_categories' in d
+    for k in d['detection_categories'].keys():
+        # Categories should be string-formatted ints
+        assert isinstance(k,str)
+        _ = int(k)
+        assert isinstance(d['detection_categories'][k],str)
+    if 'classification_categories' in d:
+        for k in d['classification_categories'].keys():
+            # Categories should be string-formatted ints
+            assert isinstance(k,str)
+            _ = int(k)
+            assert isinstance(d['classification_categories'][k],str)
+    ## Image validation
+    assert 'images' in d
+    assert isinstance(d['images'],list)
+    # im = d['images'][0]
+    for im in d['images']:
+        assert isinstance(im,dict)
+        assert 'file' in im
+        file = im['file']
+        if options.check_image_existence:
+            if options.relative_path_base is None:
+                file_abs = file
+            else:
+                file_abs = os.path.join(options.relative_path_base,file)
+            assert os.path.isfile(file_abs), 'Cannot find file {}'.format(file_abs)
+        if 'detections' not in im or im['detections'] is None:
+            assert 'failure' in im and isinstance(im['failure'],str)
+        else:
+            assert isinstance(im['detections'],list)
+        if is_video_file(im['file']) and (format_version >= 1.4):
+            assert 'frame_rate' in im
+            if 'detections' in im and im['detections'] is not None:
+                for det in im['detections']:
+                    assert 'frame_number' in det
+    # ...for each image
+    ## Checking on other keys
+    for k in d.keys():
+        if k not in typical_keys and k not in required_keys:
+            print('Warning: non-standard key {} present at file level'.format(k))
+# ...def validate_batch_results(...)
+#%% Interactive driver(s)
+if False:
+    #%%
+    options = ValidateBatchResultsOptions()
+    # json_filename = r'g:\temp\format.json'
+    # json_filename = r'g:\temp\test-videos\video_results.json'
+    json_filename = r'g:\temp\test-videos\image_results.json'
+    options.check_image_existence = True
+    options.relative_path_base = r'g:\temp\test-videos'
+    validate_batch_results(json_filename,options)
+#%% Command-line driver
+def main():
+    options = ValidateBatchResultsOptions()
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'json_filename',
+        help='path to .json file containing MegaDetector results')
+    parser.add_argument(
+        '--check_image_existence', action='store_true',
+        help='check that all images referred to in the results file exist')
+    parser.add_argument(
+        '--relative_path_base', default=None,
+        help='if --check_image_existence is specified and paths are relative, use this as the base folder')
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    args_to_object(args, options)
+    validate_batch_results(args.json_filename,options)
+if __name__ == '__main__':
+    main()

megadetector/taxonomy_mapping/map_new_lila_datasets.py CHANGED Viewed

@@ -15,10 +15,10 @@ import json
 # Created by get_lila_category_list.py
 input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
-output_file = os.path.expanduser('~/lila/lila_additions_2024.07.16.csv')
+output_file = os.path.expanduser('~/lila/lila_additions_2024.10.05.csv')
 datasets_to_map = [
-    'Desert Lion Conservation Camera Traps'
+    'Ohio Small Animals'
     ]
@@ -127,13 +127,18 @@ output_df.to_csv(output_file, index=None, header=True)
 #%% Manual lookup
 if False:
+    #%%
+    from megadetector.utils.path_utils import open_file
+    open_file(output_file)
     #%%
     # q = 'white-throated monkey'
     # q = 'cingulata'
     # q = 'notamacropus'
-    q = 'aves'
+    q = 'thamnophis saurita saurita'
     taxonomy_preference = 'inat'
     m = get_preferred_taxonomic_match(q,taxonomy_preference)
     # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)

megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py CHANGED Viewed

@@ -68,7 +68,8 @@ if False:
     #%% Generate the final output file
-    assert not os.path.isfile(release_taxonomy_file)
+    assert not os.path.isfile(release_taxonomy_file), \
+        'File {} exists, delete it manually before proceeding'.format(release_taxonomy_file)
     known_levels = ['stateofmatter', #noqa
                         'kingdom',
@@ -88,7 +89,7 @@ if False:
                         'genus',
                         'species','subspecies','variety']
-    levels_to_exclude = ['stateofmatter','zoosection','parvorder']
+    levels_to_exclude = ['stateofmatter','zoosection','parvorder','complex']
     for s in levels_to_exclude:
         assert s not in levels_to_include

megadetector/taxonomy_mapping/preview_lila_taxonomy.py CHANGED Viewed

@@ -16,7 +16,7 @@ import os
 import pandas as pd
 # lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
-lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2024.07.16.csv')
+lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2024.10.05.csv')
 preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
 os.makedirs(preview_base,exist_ok=True)
@@ -383,6 +383,8 @@ for i_row,row in df.iterrows():
 #%% Download sample images for all scientific names
+# Takes ~1 minute per 10 rows
 remapped_queries = {'papio':'papio+baboon',
                     'damaliscus lunatus jimela':'damaliscus lunatus',
                     'mazama':'genus+mazama',

megadetector/utils/ct_utils.py CHANGED Viewed

@@ -105,6 +105,26 @@ def args_to_object(args, obj):
     return obj
+def dict_to_object(d, obj):
+    """
+    Copies all fields from a dict to an object. Skips fields starting with _.
+    Does not check existence in the target object.
+    Args:
+        d (dict): the dict to convert to an object
+        obj (object): object whose whose attributes will be updated
+    Returns:
+        object: the modified object (modified in place, but also returned)
+    """
+    for k in d.keys():
+        if not k.startswith('_'):
+            setattr(obj, k, d[k])
+    return obj
 def pretty_print_object(obj, b_print=True):
     """
     Converts an arbitrary object to .json, optionally printing the .json representation.

megadetector/utils/md_tests.py CHANGED Viewed

@@ -177,7 +177,8 @@ def get_expected_results_filename(gpu_is_available,
 def download_test_data(options=None):
     """
-    Downloads the test zipfile if necessary, unzips if necessary.
+    Downloads the test zipfile if necessary, unzips if necessary.  Initializes
+    temporary fields in [options], particularly [options.scratch_dir].
     Args:
         options (MDTestOptions, optional): see MDTestOptions for details
@@ -683,10 +684,16 @@ def run_python_tests(options):
     ## Verify results
+    # Verify format correctness
+    from megadetector.postprocessing.validate_batch_results import validate_batch_results
+    validate_batch_results(inference_output_file)
+    # Verify value correctness
     expected_results_file = get_expected_results_filename(is_gpu_available(verbose=False),
                                                           options=options)
     compare_results(inference_output_file,expected_results_file,options)
     # Make note of this filename, we will use it again later
     inference_output_file_standard_inference = inference_output_file
@@ -1341,11 +1348,13 @@ if False:
     options.force_data_download = False
     options.force_data_unzip = False
     options.warning_mode = False
-    options.max_coord_error = 0.001
-    options.max_conf_error = 0.005
-    options.cli_working_dir = r'c:\git\MegaDetector'
-    options.yolo_working_dir = r'c:\git\yolov5-md'
+    options.max_coord_error = 0.01 # 0.001
+    options.max_conf_error = 0.01 # 0.005
+    # options.cli_working_dir = r'c:\git\MegaDetector'
+    # options.yolo_working_dir = r'c:\git\yolov5-md'
+    options.cli_working_dir = os.path.expanduser('~')
+    options.yolo_working_dir = '/mnt/c/git/yolov5-md'
+    options = download_test_data(options)
     #%%
@@ -1357,6 +1366,47 @@ if False:
     run_tests(options)
+    #%%
+    yolo_inference_options_dict = {'input_folder': '/tmp/md-tests/md-test-images',
+                                   'image_filename_list': None,
+                                   'model_filename': 'MDV5A',
+                                   'output_file': '/tmp/md-tests/folder_inference_output_yolo_val.json',
+                                   'yolo_working_folder': '/mnt/c/git/yolov5-md',
+                                   'model_type': 'yolov5',
+                                   'image_size': None,
+                                   'conf_thres': 0.005,
+                                   'batch_size': 1,
+                                   'device_string': '0',
+                                   'augment': False,
+                                   'half_precision_enabled': None,
+                                   'symlink_folder': None,
+                                   'use_symlinks': True,
+                                   'unique_id_strategy': 'links',
+                                   'yolo_results_folder': None,
+                                   'remove_symlink_folder': True,
+                                   'remove_yolo_results_folder': True,
+                                   'yolo_category_id_to_name': {0: 'animal', 1: 'person', 2: 'vehicle'},
+                                   'overwrite_handling': 'overwrite',
+                                   'preview_yolo_command_only': False,
+                                   'treat_copy_failures_as_warnings': False,
+                                   'save_yolo_debug_output': False,
+                                   'recursive': True,
+                                   'checkpoint_frequency': None}
+    from megadetector.utils.ct_utils import dict_to_object
+    from megadetector.detection.run_inference_with_yolov5_val import \
+        YoloInferenceOptions, run_inference_with_yolo_val
+    yolo_inference_options = YoloInferenceOptions()
+    yolo_inference_options = dict_to_object(yolo_inference_options_dict, yolo_inference_options)
+    os.makedirs(options.scratch_dir,exist_ok=True)
+    inference_output_file_yolo_val = os.path.join(options.scratch_dir,'folder_inference_output_yolo_val.json')
+    run_inference_with_yolo_val(yolo_inference_options)
 #%% Command-line driver

{megadetector-5.0.18.dist-info → megadetector-5.0.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: megadetector
-Version: 5.0.18
+Version: 5.0.20
 Summary: MegaDetector is an AI model that helps conservation folks spend less time doing boring things with camera trap images.
 Author-email: Your friendly neighborhood MegaDetector team <cameratraps@lila.science>
 Maintainer-email: Your friendly neighborhood MegaDetector team <cameratraps@lila.science>
@@ -39,7 +39,7 @@ Requires-Dist: Pillow >=9.5
 Requires-Dist: tqdm >=4.64.0
 Requires-Dist: jsonpickle >=3.0.2
 Requires-Dist: humanfriendly >=10.0
-Requires-Dist: numpy >=1.26.0
+Requires-Dist: numpy <1.24,>=1.22
 Requires-Dist: matplotlib >=3.8.0
 Requires-Dist: opencv-python >=4.8.0
 Requires-Dist: requests >=2.31.0

megadetector 5.0.18__py3-none-any.whl → 5.0.20__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.18py3-none-any.whl → 5.0.20py3-none-any.whl