PyPI - megadetector - Versions diffs - 5.0.21__py3-none-any.whl → 5.0.23__py3-none-any.whl - Mend

megadetector 5.0.21py3-none-any.whl → 5.0.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (37) hide show

megadetector/postprocessing/validate_batch_results.py CHANGED Viewed

@@ -15,8 +15,10 @@ import sys
 import json
 import argparse
+from tqdm import tqdm
 from megadetector.detection.video_utils import is_video_file
-from megadetector.utils.ct_utils import args_to_object
+from megadetector.utils.ct_utils import args_to_object, is_list_sorted # noqa
 typical_info_fields = ['detector','detection_completion_time',
                        'classifier','classification_completion_time',
@@ -45,6 +47,9 @@ class ValidateBatchResultsOptions:
         #: Should we return the loaded data, or just the validation results?
         self.return_data = False
+        #: Enable additional debug output
+        self.verbose = False
 # ...class ValidateBatchResultsOptions
@@ -73,6 +78,9 @@ def validate_batch_results(json_filename,options=None):
     if options is None:
         options = ValidateBatchResultsOptions()
+    if options.verbose:
+        print('Loading results from {}'.format(json_filename))
     with open(json_filename,'r') as f:
         d = json.load(f)
@@ -140,8 +148,11 @@ def validate_batch_results(json_filename,options=None):
         if not isinstance(d['images'],list):
             raise ValueError('Invalid images field')
+        if options.verbose:
+            print('Validating images')
         # im = d['images'][0]
-        for i_im,im in enumerate(d['images']):
+        for i_im,im in tqdm(enumerate(d['images']),total=len(d['images']),disable=(not options.verbose)):
             if not isinstance(im,dict):
                 raise ValueError('Invalid image at index {}'.format(i_im))
@@ -150,34 +161,61 @@ def validate_batch_results(json_filename,options=None):
             file = im['file']
+            if 'detections' in im and im['detections'] is not None:
+                for det in im['detections']:
+                    assert 'category' in det, 'Image {} has a detection with no category'.format(file)
+                    assert 'conf' in det, 'Image {} has a detection with no confidence'.format(file)
+                    assert isinstance(det['conf'],float), \
+                        'Image {} has an illegal confidence value'.format(file)
+                    assert 'bbox' in det, 'Image {} has a detection with no box'.format(file)
+                    assert det['category'] in d['detection_categories'], \
+                        'Image {} has a detection with an unmapped category {}'.format(
+                            file,det['category'])
             if options.check_image_existence:
                 if options.relative_path_base is None:
                     file_abs = file
                 else:
                     file_abs = os.path.join(options.relative_path_base,file)
                 if not os.path.isfile(file_abs):
                     raise ValueError('Cannot find file {}'.format(file_abs))
-            if ('detections' not in im) or (im['detections'] is None):
-                if not ('failure' in im and isinstance(im['failure'],str)):
-                    raise ValueError('Image {} has no detections and no failure'.format(im['file']))
+            if 'failure' in im:
+                if im['failure'] is not None:
+                    if not isinstance(im['failure'],str):
+                        raise ValueError('Image {} has an illegal [failure] value: {}'.format(
+                            im['file'],str(im['failure'])))
+                    if 'detections' not in im:
+                        s = 'Image {} has a failure value, should also have a null detections array'.format(
+                            im['file'])
+                        validation_results['warnings'].append(s)
+                    elif im['detections'] is not None:
+                        raise ValueError('Image {} has a failure value but a non-null detections array'.format(
+                            im['file']))
             else:
                 if not isinstance(im['detections'],list):
                     raise ValueError('Invalid detections list for image {}'.format(im['file']))
             if is_video_file(im['file']) and (format_version >= 1.4):
                 if 'frame_rate' not in im:
                     raise ValueError('Video without frame rate: {}'.format(im['file']))
+                if im['frame_rate'] < 0:
+                    raise ValueError('Video with illegal frame rate {}: {}'.format(
+                        str(im['frame_rate']),im['file']))
                 if 'detections' in im and im['detections'] is not None:
                     for det in im['detections']:
                         if 'frame_number' not in det:
                             raise ValueError('Frame without frame number in video {}'.format(
                                 im['file']))
+                    frame_numbers = [det['frame_number'] for det in im['detections']] # noqa
+                    # assert is_list_sorted(frame_numbers)
         # ...for each image
-        ## Checking on other keys
+        ## Validation of other keys
         for k in d.keys():
             if (k not in typical_keys) and (k not in required_keys):
@@ -188,6 +226,8 @@ def validate_batch_results(json_filename,options=None):
         validation_results['errors'].append(str(e))
+    # ...try/except
     if options.return_data:
         to_return = d
     else:
@@ -204,15 +244,25 @@ def validate_batch_results(json_filename,options=None):
 if False:
-    #%%
+    #%% Validate all .json files in the MD test suite
+    from megadetector.utils.path_utils import recursive_file_list
+    filenames = recursive_file_list(os.path.expanduser('~/AppData/Local/Temp/md-tests'))
+    filenames = [fn for fn in filenames if fn.endswith('.json')]
+    filenames = [fn for fn in filenames if 'detectionIndex' not in fn]
     options = ValidateBatchResultsOptions()
-    # json_filename = r'g:\temp\format.json'
-    # json_filename = r'g:\temp\test-videos\video_results.json'
-    json_filename = r'g:\temp\test-videos\image_results.json'
-    options.check_image_existence = True
-    options.relative_path_base = r'g:\temp\test-videos'
-    validate_batch_results(json_filename,options)
+    options.check_image_existence = False
+    options.relative_path_base = None # r'g:\temp\test-videos'
+    for json_filename in filenames:
+        results = validate_batch_results(json_filename,options)
+        if len(results['validation_results']['warnings']) > 0:
+            print('Warnings in file {}:'.format(json_filename))
+            for s in results['validation_results']['warnings']:
+                print(s)
+            print('')
+        assert len(results['validation_results']['errors']) == 0
 #%% Command-line driver

megadetector/taxonomy_mapping/map_new_lila_datasets.py CHANGED Viewed

@@ -15,15 +15,17 @@ import json
 # Created by get_lila_category_list.py
 input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
-output_file = os.path.expanduser('~/lila/lila_additions_2024.10.05.csv')
+output_file = os.path.expanduser('~/lila/lila_additions_2024.12.31.csv')
 datasets_to_map = [
-    'Ohio Small Animals'
+    'Seattle(ish) Camera Traps'
     ]
 #%% Initialize taxonomic lookup
+# Takes ~2 mins
 from megadetector.taxonomy_mapping.species_lookup import \
     initialize_taxonomy_lookup, get_preferred_taxonomic_match
@@ -39,27 +41,27 @@ lila_datasets = set()
 for dataset_name in input_lila_categories.keys():
     # The script that generates this dictionary creates a separate entry for bounding box
-    # metadata files, but those don't represent new dataset names
+    # metadata files, but those don't represent new dataset names, so we ignore them here.
     lila_datasets.add(dataset_name.replace('_bbox',''))
 for s in datasets_to_map:
     assert s in lila_datasets
 #%% Find all categories
 category_mappings = []
 # dataset_name = datasets_to_map[0]
 for dataset_name in datasets_to_map:
     ds_categories = input_lila_categories[dataset_name]
     for category in ds_categories:
         category_name = category['name']
         assert ':' not in category_name
         mapping_name = dataset_name + ':' + category_name
         category_mappings.append(mapping_name)
 print('Need to create {} mappings'.format(len(category_mappings)))
@@ -128,22 +130,23 @@ output_df.to_csv(output_file, index=None, header=True)
 if False:
-    #%%
+    #%% You probably want to open the .csv file first
     from megadetector.utils.path_utils import open_file
     open_file(output_file)
     #%%
     # q = 'white-throated monkey'
     # q = 'cingulata'
     # q = 'notamacropus'
-    q = 'thamnophis saurita saurita'
+    q = 'insects'
     taxonomy_preference = 'inat'
     m = get_preferred_taxonomic_match(q,taxonomy_preference)
     # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
-    if m is None:
+    if (m is None) or (len(m.taxonomy_string) == 0):
         print('No match')
     else:
         if m.source != taxonomy_preference:

megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py CHANGED Viewed

@@ -89,7 +89,7 @@ if False:
                         'genus',
                         'species','subspecies','variety']
-    levels_to_exclude = ['stateofmatter','zoosection','parvorder','complex']
+    levels_to_exclude = ['stateofmatter','zoosection','parvorder','complex','epifamily']
     for s in levels_to_exclude:
         assert s not in levels_to_include

megadetector/taxonomy_mapping/preview_lila_taxonomy.py CHANGED Viewed

@@ -16,7 +16,7 @@ import os
 import pandas as pd
 # lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
-lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2024.10.05.csv')
+lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2024.12.31.csv')
 preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
 os.makedirs(preview_base,exist_ok=True)
@@ -399,6 +399,8 @@ images_per_query = 15
 min_valid_images_per_query = 3
 min_valid_image_size = 3000
+# TODO: parallelize this loop
+#
 # i_row = 0; row = df.iloc[i_row]
 for i_row,row in df.iterrows():

megadetector/utils/ct_utils.py CHANGED Viewed

@@ -12,6 +12,7 @@ import inspect
 import json
 import math
 import os
+import builtins
 import jsonpickle
 import numpy as np
@@ -547,7 +548,7 @@ def image_file_to_camera_folder(image_fn):
     # 100EK113 is (for some reason) the overflow folder style for Bushnell cameras
     # 100_BTCF is the overflow folder style for Browning cameras
     # 100MEDIA is the overflow folder style used on a number of consumer-grade cameras
-    patterns = ['\/\d+RECNX\/','\/\d+EK\d+\/','\/\d+_BTCF\/','\/\d+MEDIA\/']
+    patterns = [r'/\d+RECNX/',r'/\d+EK\d+/',r'/\d+_BTCF/',r'/\d+MEDIA/']
     image_fn = image_fn.replace('\\','/')
     for pat in patterns:
@@ -613,6 +614,50 @@ def is_empty(v):
     return False
+def min_none(a,b):
+    """
+    Returns the minimum of a and b.  If both are None, returns None.  If one is None,
+    returns the other.
+    Args:
+        a (numeric): the first value to compare
+        b (numeric): the second value to compare
+    Returns:
+        numeric: the minimum of a and b, or None
+    """
+    if a is None and b is None:
+        return None
+    elif a is None:
+        return b
+    elif b is None:
+        return a
+    else:
+        return min(a,b)
+def max_none(a,b):
+    """
+    Returns the maximum of a and b.  If both are None, returns None.  If one is None,
+    returns the other.
+    Args:
+        a (numeric): the first value to compare
+        b (numeric): the second value to compare
+    Returns:
+        numeric: the maximum of a and b, or None
+    """
+    if a is None and b is None:
+        return None
+    elif a is None:
+        return b
+    elif b is None:
+        return a
+    else:
+        return max(a,b)
 def isnan(v):
     """
     Returns True if v is a nan-valued float, otherwise returns False.
@@ -645,23 +690,36 @@ def sets_overlap(set1, set2):
     return not set(set1).isdisjoint(set(set2))
-#%% Test drivers
-if False:
+def is_function_name(s,calling_namespace):
+    """
+    Determines whether [s] is a callable function in the global or local scope, or a
+    built-in function.
-    pass
+    Args:
+        s (str): the string to test for function-ness
+        calling_namespace (dict): typically pass the output of locals()
+    """
+    assert isinstance(s,str), 'Input is not a string'
-    #%% Test image_file_to_camera_folder()
+    return callable(globals().get(s)) or \
+        callable(locals().get(s)) or \
+        callable(calling_namespace.get(s)) or \
+        callable(getattr(builtins, s, None))
+def __module_test__():
+    """
+    Module test driver
+    """
-    relative_path = 'a/b/c/d/100EK113/blah.jpg'
-    print(image_file_to_camera_folder(relative_path))
+    ##%% Camera folder mapping
-    relative_path = 'a/b/c/d/100RECNX/blah.jpg'
-    print(image_file_to_camera_folder(relative_path))
+    assert image_file_to_camera_folder('a/b/c/d/100EK113/blah.jpg') == 'a/b/c/d'
+    assert image_file_to_camera_folder('a/b/c/d/100RECNX/blah.jpg') == 'a/b/c/d'
-    #%% Test a few rectangle distances
+    ##%% Test a few rectangle distances
     r1 = [0,0,1,1]; r2 = [0,0,1,1]; assert rect_distance(r1,r2)==0
     r1 = [0,0,1,1]; r2 = [0,0,1,100]; assert rect_distance(r1,r2)==0
@@ -673,9 +731,8 @@ if False:
     r1 = [0.4,0.8,10,22]; r2 = [120, 120, 200, 210.4]; assert abs(rect_distance(r1,r2)-147.323) < 0.001
-    #%% Test dictionary sorting
+    ##%% Test dictionary sorting
     L = [{'a':5},{'a':0},{'a':10}]
     k = 'a'
     sort_list_of_dicts_by_key(L, k, reverse=True)

megadetector/utils/md_tests.py CHANGED Viewed

@@ -654,6 +654,14 @@ def run_python_tests(options):
     download_test_data(options)
+    ## Miscellaneous utility tests
+    print('\n** Running ct_utils module test **\n')
+    from megadetector.utils.ct_utils import __module_test__ as ct_utils_test
+    ct_utils_test()
     ## Run inference on an image
     print('\n** Running MD on a single image (module) **\n')
@@ -1210,7 +1218,7 @@ def run_cli_tests(options):
         cmd += ' --overwrite_handling overwrite'
         cmd_results = execute_and_print(cmd)
-        # Run again with checkpointing, make sure the output are identical
+        # Run again with checkpointing, make sure the outputs are identical
         cmd += ' --checkpoint_frequency 5'
         inference_output_file_yolo_val_checkpoint = \
             os.path.join(options.scratch_dir,'folder_inference_output_yolo_val_checkpoint.json')

megadetector/utils/path_utils.py CHANGED Viewed

@@ -32,6 +32,8 @@ from functools import partial
 from shutil import which
 from tqdm import tqdm
+from megadetector.utils.ct_utils import is_iterable
 # Should all be lower-case
 IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
@@ -770,16 +772,21 @@ def parallel_get_file_sizes(filenames,
     folder_name = None
-    if verbose:
-        print('Enumerating files')
-    if isinstance(filenames,str) and os.path.isdir(filenames):
+    if isinstance(filenames,str):
         folder_name = filenames
+        assert os.path.isdir(filenames), 'Could not find folder {}'.format(folder_name)
+        if verbose:
+            print('Enumerating files in {}'.format(folder_name))
         # Enumerate absolute paths here, we'll convert to relative later if requested
-        filenames = recursive_file_list(filenames,recursive=recursive,return_relative_paths=False)
+        filenames = recursive_file_list(folder_name,recursive=recursive,return_relative_paths=False)
+    else:
+        assert is_iterable(filenames), '[filenames] argument is neither a folder nor an iterable'
     if verbose:
         print('Creating worker pool')
@@ -940,7 +947,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
         output_fn (str, optional): output filename; if this is None, we'll write to [input_folder].zip
         overwrite (bool, optional): whether to overwrite an existing .tar file
         verbose (bool, optional): enable additional debug console output
-        compresslevel (int, optional): compression level to use, between 0 and 9
+        compresslevel (int, optional): compression level to use, between 0 and 9
     Returns:
         str: the output zipfile, whether we created it or determined that it already exists

megadetector/utils/process_utils.py CHANGED Viewed

@@ -59,8 +59,13 @@ def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
     return return_code
-def execute_and_print(cmd,print_output=True,encoding=None,errors=None,
-                      env=None,verbose=False,catch_exceptions=True,
+def execute_and_print(cmd,
+                      print_output=True,
+                      encoding=None,
+                      errors=None,
+                      env=None,
+                      verbose=False,
+                      catch_exceptions=True,
                       echo_command=False):
     """
     Run [cmd] (a single string) in a shell, capturing and printing output.  Returns
@@ -73,7 +78,8 @@ def execute_and_print(cmd,print_output=True,encoding=None,errors=None,
     Args:
         cmd (str): command to run
-        print_output (bool, optional): whether to print output from [cmd]
+        print_output (bool, optional): whether to print output from [cmd] (stdout is
+            captured regardless of the value of print_output)
         encoding (str, optional): stdout encoding, see Popen() documentation
         errors (str, optional): error handling, see Popen() documentation
         env (dict, optional): environment variables, see Popen() documentation

megadetector/utils/write_html_image_list.py CHANGED Viewed

@@ -44,6 +44,7 @@ def write_html_image_list(filename=None,images=None,options=None):
             - fHtml (file pointer to write to, used for splitting write operations over multiple calls)
             - pageTitle (HTML page title)
             - headerHtml (html text to include before the image list)
+            - subPageHeaderHtml (html text to include before the images when images are broken into pages)
             - trailerHtml (html text to include after the image list)
             - defaultImageStyle (default css style for images)
             - defaultTextStyle (default css style for image titles)
@@ -67,6 +68,9 @@ def write_html_image_list(filename=None,images=None,options=None):
     if 'headerHtml' not in options or options['headerHtml'] is None:
         options['headerHtml'] = ''
+    if 'subPageHeaderHtml' not in options or options['subPageHeaderHtml'] is None:
+        options['subPageHeaderHtml'] = ''
     if 'trailerHtml' not in options or options['trailerHtml'] is None:
         options['trailerHtml'] = ''
@@ -152,7 +156,7 @@ def write_html_image_list(filename=None,images=None,options=None):
             localImages = images[iStart:iEnd+1]
             localOptions = options.copy();
-            localOptions['headerHtml'] = '';
+            localOptions['headerHtml'] = options['subPageHeaderHtml'];
             localOptions['trailerHtml'] = '';
             # Make a recursive call for this image set

megadetector 5.0.21__py3-none-any.whl → 5.0.23__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.21py3-none-any.whl → 5.0.23py3-none-any.whl