PyPI - megadetector - Versions diffs - 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl - Mend

megadetector 5.0.11py3-none-any.whl → 5.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (203) hide show

megadetector/data_management/remap_coco_categories.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""
+remap_coco_categories.py
+Given a COCO-formatted dataset, remap the categories to a new mapping.
+"""
+#%% Imports and constants
+import os
+import json
+from copy import deepcopy
+#%% Main function
+def remap_coco_categories(input_data,
+                          output_category_name_to_id,
+                          input_category_name_to_output_category_name,
+                          output_file=None):
+    """
+    Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
+    writing the results to a new file.
+    output_category_name_to_id is a dict mapping strings to ints.
+    input_category_name_to_output_category_name is a dict mapping strings to strings.
+    [input_data] can be a COCO-formatted dict or a filename.  If it's a dict, it will be copied,
+    not modified in place.
+    """
+    if isinstance(input_data,str):
+        assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
+        with open(input_data,'r') as f:
+            input_data = json.load(f)
+        assert isinstance(input_data,dict), 'Illegal COCO input data'
+    else:
+        assert isinstance(input_data,dict), 'Illegal COCO input data'
+        input_data = deepcopy(input_data)
+    # It's safe to modify in-place now
+    output_data = input_data
+    # Read input name --> ID mapping
+    input_category_name_to_input_category_id = {}
+    for c in input_data['categories']:
+        input_category_name_to_input_category_id[c['name']] = c['id']
+    # Map input IDs --> output IDs
+    input_category_id_to_output_category_id = {}
+    for input_name in input_category_name_to_output_category_name.keys():
+        output_name = input_category_name_to_output_category_name[input_name]
+        assert output_name in output_category_name_to_id, \
+            'No output ID for {} --> {}'.format(input_name,output_name)
+        input_id = input_category_name_to_input_category_id[input_name]
+        output_id = output_category_name_to_id[output_name]
+        input_category_id_to_output_category_id[input_id] = output_id
+    # Map annotations
+    for ann in output_data['annotations']:
+        assert ann['category_id'] in input_category_id_to_output_category_id, \
+            'Unrecognized category ID {}'.format(ann['category_id'])
+        ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
+    # Update the category list
+    output_categories = []
+    for output_name in output_category_name_to_id:
+        category = {'name':output_name,'id':output_category_name_to_id[output_name]}
+        output_categories.append(category)
+    output_data['categories'] = output_categories
+    if output_file is not None:
+        with open(output_file,'w') as f:
+            json.dump(output_data,f,indent=1)
+    return input_data
+#%% Command-line driver
+# TODO

megadetector/data_management/remove_exif.py ADDED Viewed

@@ -0,0 +1,66 @@
+"""
+remove_exif.py
+Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
+backup copies, using pyexiv2.
+TODO: This is a one-off script waiting to be cleaned up for more general use.
+"""
+input_base = r'f:\images'
+#%% Imports and constants
+import os
+import glob
+def main():
+    assert os.path.isdir(input_base)
+    ##%% List files
+    all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
+    image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
+    ##%% Remove EXIF data (support)
+    import pyexiv2
+    # PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
+    def remove_exif(fn):
+        try:
+            img = pyexiv2.Image(fn)
+            # data = img.read_exif(); print(data)
+            img.clear_exif()
+            img.clear_iptc()
+            img.clear_xmp()
+            img.close()
+        except Exception as e:
+            print('EXIF error on {}: {}'.format(fn,str(e)))
+    ##%% Remove EXIF data (execution)
+    from joblib import Parallel, delayed
+    n_exif_threads = 50
+    if n_exif_threads == 1:
+        # fn = image_files[0]
+        for fn in image_files:
+            remove_exif(fn)
+    else:
+        # joblib.Parallel defaults to a process-based backend, but let's be sure
+        # results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files[0:10])
+        _ = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files)
+if __name__ == '__main__':
+    main()

megadetector/data_management/rename_images.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""
+rename_images.py.py
+Copies images from a possibly-nested folder structure to a flat folder structure, including EXIF
+timestamps in each filename.  Loosely equivalent to camtrapR's imageRename() function.
+"""
+#%% Imports and constants
+import os
+from megadetector.utils.path_utils import \
+    find_images, insert_before_extension, parallel_copy_files
+from megadetector.data_management.read_exif import \
+    ReadExifOptions, read_exif_from_folder
+#%% Functions
+def rename_images(input_folder,
+                  output_folder,
+                  dry_run=False,
+                  verbose=False,
+                  read_exif_options=None,
+                  n_copy_workers=8):
+    """
+    For the given image struct in COCO format and associated list of annotations, reformats the
+    detections into labelme format.
+    Args:
+        input_folder: the folder to search for images, always recursive
+        output_folder: the folder to which we will copy images; cannot be the
+            same as [input_folder]
+        dry_run: only map images, don't actually copy
+        verbose (bool, optional): enable additional debug output
+        read_exif_options (ReadExifOptions, optional): parameters controlling the reading of
+            EXIF information
+        n_copy_workers (int, optional): number of parallel threads to use for copying
+    Returns:
+        dict: a dict mapping relative filenames in the input folder to relative filenames in the output
+        folder
+    """
+    assert os.path.isdir(input_folder), 'Input folder {} does not exist'.format(
+        input_folder)
+    if not dry_run:
+        os.makedirs(output_folder,exist_ok=True)
+    # Read exif information
+    if read_exif_options is None:
+        read_exif_options = ReadExifOptions()
+    read_exif_options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
+                                         'DateTimeOriginal']
+    read_exif_options.verbose = False
+    exif_info = read_exif_from_folder(input_folder=input_folder,
+                                      output_file=None,
+                                      options=read_exif_options,
+                                      filenames=None,recursive=True)
+    print('Read EXIF information for {} images'.format(len(exif_info)))
+    filename_to_exif_info = {info['file_name']:info for info in exif_info}
+    image_files = find_images(input_folder,return_relative_paths=True,convert_slashes=True,recursive=True)
+    for fn in image_files:
+        assert fn in filename_to_exif_info, 'No EXIF info available for {}'.format(fn)
+    input_fn_relative_to_output_fn_relative = {}
+    # fn_relative = image_files[0]
+    for fn_relative in image_files:
+        input_fn_abs = os.path.join(input_folder,fn_relative)
+        image_exif_info = filename_to_exif_info[fn_relative]
+        if 'exif_tags' in image_exif_info:
+            image_exif_info = image_exif_info['exif_tags']
+        if image_exif_info is None or \
+            'DateTimeOriginal' not in image_exif_info or \
+            image_exif_info['DateTimeOriginal'] is None:
+            dt_tag = 'unknown_datetime'
+            print('Warning: no datetime for {}'.format(fn_relative))
+        else:
+            dt_tag = str(image_exif_info['DateTimeOriginal']).replace(':','-').replace(' ','_').strip()
+        flat_filename = fn_relative.replace('\\','/').replace('/','_')
+        output_fn_relative = insert_before_extension(flat_filename,dt_tag)
+        input_fn_relative_to_output_fn_relative[fn_relative] = output_fn_relative
+    if not dry_run:
+        input_fn_abs_to_output_fn_abs = {}
+        for input_fn_relative in input_fn_relative_to_output_fn_relative:
+            output_fn_relative = input_fn_relative_to_output_fn_relative[input_fn_relative]
+            input_fn_abs = os.path.join(input_folder,input_fn_relative)
+            output_fn_abs = os.path.join(output_folder,output_fn_relative)
+            input_fn_abs_to_output_fn_abs[input_fn_abs] = output_fn_abs
+            parallel_copy_files(input_file_to_output_file=input_fn_abs_to_output_fn_abs,
+                                max_workers=n_copy_workers,
+                                use_threads=True,
+                                overwrite=True,
+                                verbose=verbose)
+    return input_fn_relative_to_output_fn_relative
+# ...def rename_images()
+#%% Interactive driver
+if False:
+    pass
+    #%% Configure options
+    input_folder = r'G:\camera_traps\camera_trap_videos\2024.05.25\cam3'
+    output_folder = r'G:\camera_traps\camera_trap_videos\2024.05.25\cam3_flat'
+    dry_run = False
+    verbose = True
+    read_exif_options = ReadExifOptions()
+    read_exif_options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
+                               'DateTimeOriginal']
+    read_exif_options.n_workers = 8
+    read_exif_options.verbose = verbose
+    n_copy_workers = 8
+    #%% Programmatic execution
+    input_fn_relative_to_output_fn_relative = rename_images(input_folder,
+                                                            output_folder,
+                                                            dry_run=dry_run,
+                                                            verbose=verbose,
+                                                            read_exif_options=read_exif_options,
+                                                            n_copy_workers=n_copy_workers)
+#%% Command-line driver
+import sys,argparse
+def main():
+    parser = argparse.ArgumentParser(
+        description='Copies images from a possibly-nested folder structure to a flat folder structure, ' + \
+            'adding datetime information from EXIF to each filename')
+    parser.add_argument(
+        'input_folder',
+        type=str,
+        help='The folder to search for images, always recursive')
+    parser.add_argument(
+        'output_folder',
+        type=str,
+        help='The folder to which we should write the flattened image structure')
+    parser.add_argument(
+        '--dry_run',
+        action='store_true',
+        help="Only map images, don't actually copy")
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    rename_images(args.input_folder,args.output_folder,dry_run=args.dry_run,
+                  verbose=True,read_exif_options=None)
+if __name__ == '__main__':
+    main()

megadetector/data_management/resize_coco_dataset.py ADDED Viewed

@@ -0,0 +1,189 @@
+"""
+resize_coco_dataset.py
+Given a COCO-formatted dataset, resizes all the images to a target size,
+scaling bounding boxes accordingly.
+"""
+#%% Imports and constants
+import os
+import json
+import shutil
+from tqdm import tqdm
+from collections import defaultdict
+from megadetector.utils.path_utils import insert_before_extension
+from megadetector.visualization.visualization_utils import \
+    open_image, resize_image, exif_preserving_save
+#%% Functions
+def resize_coco_dataset(input_folder,input_filename,
+                        output_folder,output_filename,
+                        target_size=(-1,-1),
+                        correct_size_image_handling='copy'):
+    """
+    Given a COCO-formatted dataset (images in input_folder, data in input_filename), resizes
+    all the images to a target size (in output_folder) and scales bounding boxes accordingly.
+    Args:
+        input_folder (str): the folder where images live; filenames in [input_filename] should
+            be relative to [input_folder]
+        input_filename (str): the (input) COCO-formatted .json file containing annotations
+        output_folder (str): the folder to which we should write resized images; can be the
+            same as [input_folder], in which case images are over-written
+        output_filename (str): the COCO-formatted .json file we should generate that refers to
+            the resized images
+        target_size (list or tuple of ints): this should be tuple/list of ints, with length 2 (w,h).
+            If either dimension is -1, aspect ratio will be preserved.  If both dimensions are -1, this means
+            "keep the original size".  If  both dimensions are -1 and correct_size_image_handling is copy, this
+            function is basically a no-op.
+        correct_size_image_handling (str): can be 'copy' (in which case the original image is just copied
+            to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
+            attempting to preserve the same quality).  The only reason to do use 'rewrite' 'is the case where
+            you're superstitious about biases coming from images in a training set being written by different
+            image encoders.
+    Returns:
+        dict: the COCO database with resized images, identical to the content of [output_filename]
+    """
+    # Read input data
+    with open(input_filename,'r') as f:
+        d = json.load(f)
+    # Map image IDs to annotations
+    image_id_to_annotations = defaultdict(list)
+    for ann in d['annotations']:
+        image_id_to_annotations[ann['image_id']].append(ann)
+    # For each image
+    # TODO: this is trivially parallelizable
+    #
+    # im = d['images'][0]
+    for im in tqdm(d['images']):
+        input_fn_relative = im['file_name']
+        input_fn_abs = os.path.join(input_folder,input_fn_relative)
+        assert os.path.isfile(input_fn_abs), "Can't find image file {}".format(input_fn_abs)
+        output_fn_abs = os.path.join(output_folder,input_fn_relative)
+        os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
+        pil_im = open_image(input_fn_abs)
+        input_w = pil_im.width
+        input_h = pil_im.height
+        image_is_already_target_size = \
+            (input_w == target_size[0]) and (input_h == target_size[1])
+        preserve_original_size = \
+            (target_size[0] == -1) and (target_size[1] == -1)
+        # If the image is already the right size...
+        if (image_is_already_target_size or preserve_original_size):
+            output_w = input_w
+            output_h = input_h
+            if correct_size_image_handling == 'copy':
+                shutil.copyfile(input_fn_abs,output_fn_abs)
+            elif correct_size_image_handling == 'rewrite':
+                exif_preserving_save(pil_im,output_fn_abs)
+            else:
+                raise ValueError('Unrecognized value {} for correct_size_image_handling'.format(
+                    correct_size_image_handling))
+        else:
+            pil_im = resize_image(pil_im, target_size[0], target_size[1])
+            output_w = pil_im.width
+            output_h = pil_im.height
+            exif_preserving_save(pil_im,output_fn_abs)
+        im['width'] = output_w
+        im['height'] = output_h
+        # For each box
+        annotations_this_image = image_id_to_annotations[im['id']]
+        # ann = annotations_this_image[0]
+        for ann in annotations_this_image:
+            if 'bbox' in ann:
+                # boxes are [x,y,w,h]
+                bbox = ann['bbox']
+                # Do we need to scale this box?
+                if (output_w != input_w) or (output_h != input_h):
+                    width_scale = output_w/input_w
+                    height_scale = output_h/input_h
+                    bbox = \
+                           [bbox[0] * width_scale,
+                            bbox[1] * height_scale,
+                            bbox[2] * width_scale,
+                            bbox[3] * height_scale]
+                ann['bbox'] = bbox
+            # ...if this annotation has a box
+        # ...for each annotation
+    # ...for each image
+    # Write output file
+    with open(output_filename,'w') as f:
+        json.dump(d,f,indent=1)
+    return d
+# ...def resize_coco_dataset(...)
+#%% Interactive driver
+if False:
+    pass
+    #%% Test resizing
+    input_folder = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training')
+    input_filename = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training.json')
+    target_size = (1600,-1)
+    output_filename = insert_before_extension(input_filename,'resized-test')
+    output_folder = input_folder + '-resized-test'
+    correct_size_image_handling = 'rewrite'
+    resize_coco_dataset(input_folder,input_filename,
+                        output_folder,output_filename,
+                        target_size=target_size,
+                        correct_size_image_handling=correct_size_image_handling)
+    #%% Preview
+    from megadetector.visualization import visualize_db
+    options = visualize_db.DbVizOptions()
+    options.parallelize_rendering = True
+    options.viz_size = (900, -1)
+    options.num_to_visualize = 5000
+    html_file,_ = visualize_db.visualize_db(output_filename,
+                                              os.path.expanduser('~/tmp/resize_coco_preview'),
+                                              output_folder,options)
+    from megadetector.utils import path_utils # noqa
+    path_utils.open_file(html_file)
+#%% Command-line driver
+# TODO

megadetector 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.11py3-none-any.whl → 5.0.13py3-none-any.whl