PyPI - megadetector - Versions diffs - 5.0.12__py3-none-any.whl → 5.0.14__py3-none-any.whl - Mend

megadetector 5.0.12py3-none-any.whl → 5.0.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (45) hide show

megadetector/api/batch_processing/api_core/server.py CHANGED Viewed

@@ -105,7 +105,7 @@ def request_detections():
     model_version = post_body.get('model_version', '')
     if model_version != '':
         model_version = str(model_version)  # in case user used an int
-        if model_version not in api_config.MD_VERSIONS_TO_REL_PATH:  # TODO use AppConfig to store model version info
+        if model_version not in api_config.MD_VERSIONS_TO_REL_PATH:
             return make_error(400, f'model_version {model_version} is not supported.')
     # check request_name has only allowed characters

megadetector/api/batch_processing/api_core/server_api_config.py CHANGED Viewed

@@ -47,7 +47,6 @@ MAX_BATCH_ACCOUNT_ACTIVE_JOBS = 300
 DETECTION_CONF_THRESHOLD = 0.1
 # relative to the `megadetector_copies` folder in the container `models`
-# TODO add MD versions info to AppConfig
 MD_VERSIONS_TO_REL_PATH = {
     '4.1': 'megadetector_v4_1/md_v4.1.0.pb',
     '3': 'megadetector_v3/megadetector_v3_tf19.pb',

megadetector/api/batch_processing/api_core/server_job_status_table.py CHANGED Viewed

@@ -67,9 +67,6 @@ class JobStatusTable:
         assert 'request_status' in status and 'message' in status
         assert status['request_status'] in JobStatusTable.allowed_statuses
-        # TODO do not read the entry first to get the call_params when the Cosmos SDK add a
-        # patching functionality:
-        # https://feedback.azure.com/forums/263030-azure-cosmos-db/suggestions/6693091-be-able-to-do-partial-updates-on-document
         item_old = self.read_job_status(job_id)
         if item_old is None:
             raise ValueError

megadetector/api/batch_processing/api_core/server_utils.py CHANGED Viewed

@@ -19,7 +19,6 @@ log = logging.getLogger(os.environ['FLASK_APP'])
 #%% helper classes and functions
 def make_error(error_code: int, error_message: str) -> Tuple[dict, int]:
-    # TODO log exception when we have more telemetry
     log.error(f'Error {error_code} - {error_message}')
     return {'error': error_message}, error_code
@@ -28,8 +27,6 @@ def check_data_container_sas(input_container_sas: str) -> Optional[Tuple[int, st
     """
     Returns a tuple (error_code, msg) if not a usable SAS URL, else returns None
     """
-    # TODO check that the expiry date of input_container_sas is at least a month
-    # into the future
     permissions = sas_blob_utils.get_permissions_from_uri(input_container_sas)
     data = sas_blob_utils.get_all_query_parts(input_container_sas)
@@ -38,7 +35,6 @@ def check_data_container_sas(input_container_sas: str) -> Optional[Tuple[int, st
     if 'read' not in permissions or 'list' not in permissions:
         if 'si' in data:
             # if no permission specified explicitly but has an access policy, assumes okay
-            # TODO - check based on access policy as well
             return None
         return 400, msg

megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py CHANGED Viewed

@@ -72,7 +72,6 @@ def main():
     print(deployment_id)
-    # TODO: check project ID ?
     sql = ''' SELECT emammal_project_taxa_id FROM wild_id.emammal_project_taxa
             where species in ("No Animal", "Unknown Animal", "Homo sapiens", "Vehicle") '''

megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py CHANGED Viewed

@@ -147,8 +147,6 @@ def detect_sync():
         try:
             # Write images to temporary files
-            #
-            # TODO: read from memory rather than using intermediate files
             os.makedirs(temp_direc,exist_ok=True)
             for name, file in request.files.items():
                 if file.content_type in config.IMAGE_CONTENT_TYPES:
@@ -166,7 +164,6 @@ def detect_sync():
         while True:
-            # TODO: convert to a blocking read and eliminate the sleep() statement in this loop
             result = db.get(redis_id)
             if result:

megadetector/classification/efficientnet/utils.py CHANGED Viewed

@@ -90,9 +90,6 @@ def round_filters(filters, global_params):
     multiplier = global_params.width_coefficient
     if not multiplier:
         return filters
-    # TODO: modify the params names.
-    #       maybe the names (width_divisor,min_width)
-    #       are more suitable than (depth_divisor,min_depth).
     divisor = global_params.depth_divisor
     min_depth = global_params.min_depth
     filters *= multiplier

megadetector/data_management/camtrap_dp_to_coco.py CHANGED Viewed

@@ -235,5 +235,3 @@ if False:
 #%% Command-line driver
 # TODO

megadetector/data_management/cct_json_utils.py CHANGED Viewed

@@ -295,8 +295,9 @@ class SequenceOptions:
     Options parameterizing the grouping of images into sequences by time.
     """
-    #: Images separated by <= this duration will be grouped into the same sequence.
-    episode_interval_seconds = 60.0
+    def __init__(self):
+        #: Images separated by <= this duration will be grouped into the same sequence.
+        self.episode_interval_seconds = 60.0
 #%% Functions
@@ -305,16 +306,24 @@ def create_sequences(image_info,options=None):
     """
     Synthesizes episodes/sequences/bursts for the images in [image_info].
-    Modifies [image_info], populating the 'seq_id', 'seq_num_frames', and 'frame_num' fields
-    for each image.
+    Modifies [image_info] in place, populating the 'seq_id', 'seq_num_frames', and 'frame_num'
+    fields for each image.
     Args:
-        image_info (dict): a list of dicts in CCT format, i.e. with fields 'file_name' (str),
-            'datetime' (datetime), and 'location' (str).
+        image_info (str, dict, or list): a dict in CCT format, a CCT .json file, or just the 'images' component
+            of a CCT dataset (a list of dicts with  fields 'file_name' (str), 'datetime' (datetime), and
+            'location' (str)).
     """
     if options is None:
         options = SequenceOptions()
+    if isinstance(image_info,str):
+        with open(image_info,'r') as f:
+            image_info = json.load(f)
+    if isinstance(image_info,dict):
+        image_info = image_info['images']
     # Find all unique locations
     locations = set()

megadetector/data_management/coco_to_labelme.py CHANGED Viewed

@@ -95,7 +95,18 @@ def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=N
 def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False):
     """
     For all the images in [coco_data] (a dict or a filename), write a .json file in
-    labelme format alongside the corresponding relative path within image_base.
+    labelme format alongside the corresponding relative path within image_base.
+    Args:
+        coco_data (dict or str): path to a COCO-formatted .json file, or an already-loaded
+            COCO-formatted dict
+        image_base (str): path where images live (filenames in [coco_data] should be relative to
+            [image_base]); this is also where labelme files will be written
+        overwrite (bool, optional): overwrite existing .json files
+        bypass_image_size_check (bool, optional): if you're sure that the COCO data already has
+            correct 'width' and 'height' fields, this bypasses the somewhat-slow loading of
+            each image to fetch image sizes
+        verbose (bool, optional): enable additional debug output
     """
     # Load COCO data if necessary

megadetector/data_management/databases/integrity_check_json_db.py CHANGED Viewed

@@ -37,30 +37,34 @@ class IntegrityCheckOptions:
     Options for integrity_check_json_db()
     """
-    #: Image path; the filenames in the .json file should be relative to this folder
-    baseDir = ''
-    #: Should we validate the image sizes?
-    bCheckImageSizes = False
-    #: Should we check that all the images in the .json file exist on disk?
-    bCheckImageExistence = False
-    #: Should we search [baseDir] for images that are not used in the .json file?
-    bFindUnusedImages = False
-    #: Should we require that all images in the .json file have a 'location' field?
-    bRequireLocation = True
-    #: For debugging, limit the number of images we'll process
-    iMaxNumImages = -1
-    #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
-    nThreads = 10
-    #: Enable additional debug output
-    verbose = True
+    def __init__(self):
+        #: Image path; the filenames in the .json file should be relative to this folder
+        self.baseDir = ''
+        #: Should we validate the image sizes?
+        self.bCheckImageSizes = False
+        #: Should we check that all the images in the .json file exist on disk?
+        self.bCheckImageExistence = False
+        #: Should we search [baseDir] for images that are not used in the .json file?
+        self.bFindUnusedImages = False
+        #: Should we require that all images in the .json file have a 'location' field?
+        self.bRequireLocation = True
+        #: For debugging, limit the number of images we'll process
+        self.iMaxNumImages = -1
+        #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
+        self.nThreads = 10
+        #: Enable additional debug output
+        self.verbose = True
+        #: Allow integer-valued image and annotation IDs (COCO uses this, CCT files use strings)
+        self.allowIntIDs = False
 # This is used in a medium-hacky way to share modified options across threads
 defaultOptions = IntegrityCheckOptions()
@@ -231,7 +235,12 @@ def integrity_check_json_db(jsonFile, options=None):
         imagePathsInJson.add(image['file_name'])
         assert isinstance(image['file_name'],str), 'Illegal image filename type'
-        assert isinstance(image['id'],str), 'Illegal image ID type'
+        if options.allowIntIDs:
+            assert isinstance(image['id'],str) or isinstance(image['id'],int), \
+                'Illegal image ID type'
+        else:
+            assert isinstance(image['id'],str), 'Illegal image ID type'
         imageId = image['id']
@@ -329,9 +338,16 @@ def integrity_check_json_db(jsonFile, options=None):
         assert 'id' in ann
         assert 'category_id' in ann
-        assert isinstance(ann['id'],str), 'Illegal annotation ID type'
+        if options.allowIntIDs:
+            assert isinstance(ann['id'],str) or isinstance(ann['id'],int), \
+                'Illegal annotation ID type'
+            assert isinstance(ann['image_id'],str) or isinstance(ann['image_id'],int), \
+                'Illegal annotation image ID type'
+        else:
+            assert isinstance(ann['id'],str), 'Illegal annotation ID type'
+            assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
         assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
-        assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
         if 'bbox' in ann:
             nBoxes += 1

megadetector/data_management/importers/cacophony-thermal-importer.py CHANGED Viewed

@@ -315,7 +315,7 @@ def process_file(fn_relative,verbose=False):
             bottom = float(position[3])
             frame_number = int(position[4])
-            # TODO: I'm being lazy about the fact that these don't reflect the
+            # I'm being lazy about the fact that these don't reflect the
             # pixels cropped out of the border.  IMO this is OK because for this dataset,
             # this is just an approximate set of coordinates used to disambiguate simultaneous
             # areas of movement when multiple different labels are present in the same video.
@@ -488,7 +488,6 @@ def process_file(fn_relative,verbose=False):
             if tag['confidence'] >= confidence_threshold:
                 valid_tags.append(tag)
             else:
-                # TODO
                 print('Zero-confidence tag in {}'.format(fn_relative))
         track_info['tags'] = valid_tags
@@ -497,13 +496,11 @@ def process_file(fn_relative,verbose=False):
         if len(valid_tags) > 0:
             valid_tracks.append(track_info)
         else:
-            # TODO
             print('Invalid track in {}'.format(fn_relative))
     # ...for each track
     if (len(clip_metadata['tracks']) > 0) and (len(valid_tracks) == 0):
-        # TODO
         print('Removed all tracks from {}'.format(fn_relative))
     clip_metadata['tracks'] = valid_tracks

megadetector/data_management/ocr_tools.py CHANGED Viewed

@@ -868,7 +868,3 @@ if False:
     if extracted_datetime is not None:
         assert extracted_datetime.year <= 2023 and extracted_datetime.year >= 1990
-#%% Command-line driver
-# TODO

megadetector/data_management/read_exif.py CHANGED Viewed

@@ -16,7 +16,7 @@ path.  No attempt is made to be consistent in format across the two approaches.
 import os
 import subprocess
 import json
-from datetime import datetime
+from datetime import date, datetime
 from multiprocessing.pool import ThreadPool as ThreadPool
 from multiprocessing.pool import Pool as Pool
@@ -26,6 +26,7 @@ from PIL import Image, ExifTags
 from megadetector.utils.path_utils import find_images, is_executable
 from megadetector.utils.ct_utils import args_to_object
+from megadetector.utils.ct_utils import image_file_to_camera_folder
 debug_max_images = None
@@ -37,51 +38,74 @@ class ReadExifOptions:
     Parameters controlling metadata extraction.
     """
-    #: Enable additional debug console output
-    verbose = False
-    #: If this is True and an output file is specified for read_exif_from_folder,
-    #: and we encounter a serialization issue, we'll return the results but won't
-    #: error.
-    allow_write_error = False
-    #: Number of concurrent workers, set to <= 1 to disable parallelization
-    n_workers = 1
-    #: Should we use threads (vs. processes) for parallelization?
-    #:
-    #: Not relevant if n_workers is <= 1.
-    use_threads = True
+    def __init__(self):
-    #: "File" and "ExifTool" are tag types used by ExifTool to report data that
-    #: doesn't come from EXIF, rather from the file (e.g. file size).
-    tag_types_to_ignore = set(['File','ExifTool'])
-    #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
-    #:
-    #: A useful set of tags one might want to limit queries for:
-    #:
-    #: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
-    #: 'DateTimeOriginal','Orientation']
-    tags_to_include = None
+        #: Enable additional debug console output
+        self.verbose = False
+        #: If this is True and an output file is specified for read_exif_from_folder,
+        #: and we encounter a serialization issue, we'll return the results but won't
+        #: error.
+        self.allow_write_error = False
+        #: Number of concurrent workers, set to <= 1 to disable parallelization
+        self.n_workers = 1
+        #: Should we use threads (vs. processes) for parallelization?
+        #:
+        #: Not relevant if n_workers is <= 1.
+        self.use_threads = True
+        #: "File" and "ExifTool" are tag types used by ExifTool to report data that
+        #: doesn't come from EXIF, rather from the file (e.g. file size).
+        self.tag_types_to_ignore = set(['File','ExifTool'])
+        #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
+        #:
+        #: A useful set of tags one might want to limit queries for:
+        #:
+        #: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight',
+        #: 'DateTimeOriginal','Orientation']
+        self.tags_to_include = None
+        #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
+        self.tags_to_exclude = None
+        #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
+        #: can be just "exiftool", in which case it should be on your system path.
+        self.exiftool_command_name = 'exiftool'
+        #: How should we handle byte-formatted EXIF tags?
+        #:
+        #: 'convert_to_string': convert to a Python string
+        #: 'delete': don't include at all
+        #: 'raw': include as a byte string
+        self.byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
+        #: Should we use exiftool or PIL?
+        self.processing_library = 'pil' # 'exiftool','pil'
+class ExifResultsToCCTOptions:
+    """
+    Options controlling the behavior of exif_results_to_cct() (which reformats the datetime information)
+    extracted by read_exif_from_folder().
+    """
-    #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
-    tags_to_exclude = None
+    def __init__(self):
+        #: Timestamps older than this are assumed to be junk; lots of cameras use a
+        #: default time in 2000.
+        self.min_valid_timestamp_year = 2001
-    #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
-    #: can be just "exiftool", in which case it should be on your system path.
-    exiftool_command_name = 'exiftool'
+        #: The EXIF tag from which to pull datetime information
+        self.exif_datetime_tag = 'DateTimeOriginal'
-    #: How should we handle byte-formatted EXIF tags?
-    #:
-    #: 'convert_to_string': convert to a Python string
-    #: 'delete': don't include at all
-    #: 'raw': include as a byte string
-    byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
+        #: Function for extracting location information, should take a string
+        #: and return a string.  Defaults to ct_utils.image_file_to_camera_folder.  If
+        #: this is None, location is written as "unknown".
+        self.filename_to_location_function = image_file_to_camera_folder
-    #: Should we use exiftool or PIL?
-    processing_library = 'pil' # 'exiftool','pil'
 #%% Functions
@@ -437,7 +461,7 @@ def _create_image_objects(image_files,recursive=True):
 def _populate_exif_for_images(image_base,images,options=None):
     """
     Main worker loop: read EXIF data for each image object in [images] and
-    populate the image objects.
+    populate the image objects in place.
     'images' should be a list of dicts with the field 'file_name' containing
     a relative path (relative to 'image_base').
@@ -544,6 +568,8 @@ def _write_exif_results(results,output_file):
     print('Wrote results to {}'.format(output_file))
+# ..._write_exif_results(...)
 def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
     """
@@ -559,10 +585,12 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
             a list of absolute filenames (if [input_folder] is None)
         recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
             is None.
+        verbose (bool, optional): enable additional debug output
     Returns:
-        dict: a dictionary mapping relative filenames to EXIF data, whose format depends on whether
-        we're using PIL or exiftool.
+        list: list of dicts, each of which contains EXIF information for one images.  Fields include at least:
+            * 'file_name': the relative path to the image
+            * 'exif_tags': a dict of EXIF tags whose exact format depends on [options.processing_library].
     """
     if options is None:
@@ -618,6 +646,112 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
     return results
+# ...read_exif_from_folder(...)
+def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
+    """
+    Given the EXIF results for a folder of images read via read_exif_from_folder,
+    create a COCO Camera Traps .json file that has no annotations, but
+    attaches image filenames to locations and datetimes.
+    Args:
+        exif_results (str or list): the filename (or loaded list) containing the results
+          from read_exif_from_folder
+        cct_file (str,optional): the filename to which we should write COCO-Camera-Traps-formatted
+          data
+    Returns:
+        dict: a COCO Camera Traps dict (with no annotations).
+    """
+    if options is None:
+        options = ExifResultsToCCTOptions()
+    if isinstance(exif_results,str):
+        print('Reading EXIF results from {}'.format(exif_results))
+        with open(exif_results,'r') as f:
+            exif_results = json.load(f)
+    else:
+        assert isinstance(exif_results,list)
+    now = datetime.now()
+    image_info = []
+    images_without_datetime = []
+    images_with_invalid_datetime = []
+    # exif_result = exif_results[0]
+    for exif_result in tqdm(exif_results):
+        im = {}
+        # By default we assume that each leaf-node folder is a location
+        if options.filename_to_location_function is None:
+            im['location'] = 'unknown'
+        else:
+            im['location'] = options.filename_to_location_function(exif_result['file_name'])
+        im['file_name'] = exif_result['file_name']
+        im['id'] = im['file_name']
+        if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
+            (options.exif_datetime_tag not in exif_result['exif_tags']):
+            exif_dt = None
+        else:
+            exif_dt = exif_result['exif_tags'][options.exif_datetime_tag]
+            exif_dt = parse_exif_datetime_string(exif_dt)
+        if exif_dt is None:
+            im['datetime'] = None
+            images_without_datetime.append(im['file_name'])
+        else:
+            dt = exif_dt
+            # An image from the future (or within the last 24 hours) is invalid
+            if (now - dt).total_seconds() <= 1*24*60*60:
+                print('Warning: datetime for {} is {}'.format(
+                    im['file_name'],dt))
+                im['datetime'] = None
+                images_with_invalid_datetime.append(im['file_name'])
+            # An image from before the dawn of time is also invalid
+            elif dt.year < options.min_valid_timestamp_year:
+                print('Warning: datetime for {} is {}'.format(
+                    im['file_name'],dt))
+                im['datetime'] = None
+                images_with_invalid_datetime.append(im['file_name'])
+            else:
+                im['datetime'] = dt
+        image_info.append(im)
+    # ...for each exif image result
+    print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
+        len(images_without_datetime),len(exif_results)))
+    d = {}
+    d['info'] = {}
+    d['images'] = image_info
+    d['annotations'] = []
+    d['categories'] = []
+    def json_serialize_datetime(obj):
+        if isinstance(obj, (datetime, date)):
+            return obj.isoformat()
+        raise TypeError('Object {} (type {}) not serializable'.format(
+            str(obj),type(obj)))
+    if cct_output_file is not None:
+        with open(cct_output_file,'w') as f:
+            json.dump(d,f,indent=1,default=json_serialize_datetime)
+    return d
+# ...exif_results_to_cct(...)
 #%% Interactive driver

megadetector 5.0.12__py3-none-any.whl → 5.0.14__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.12py3-none-any.whl → 5.0.14py3-none-any.whl