PyPI - megadetector - Versions diffs - 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl - Mend - Supply Chain Defender

megadetector 5.0.11py3-none-any.whl → 5.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show

megadetector/data_management/cct_to_wi.py ADDED Viewed

@@ -0,0 +1,289 @@
+"""
+cct_to_wi.py
+Converts COCO Camera Traps .json files to the Wildlife Insights
+batch upload format.
+**This is very much just a demo script; all the relevant constants are hard-coded
+at the top of main().**
+But given that caveat, it works.  You need to set up all the paths in the "paths" cell
+at the top of main().
+Also see:
+* https://github.com/ConservationInternational/Wildlife-Insights----Data-Migration
+* https://data.naturalsciences.org/wildlife-insights/taxonomy/search
+"""
+#%% Imports
+import os
+import json
+import pandas as pd
+from collections import defaultdict
+#%% Main wrapper
+def main():
+    """
+    Converts COCO Camera Traps .json files to the Wildlife Insights
+    batch upload format; to use this, you need to modify all the paths in the "Paths"
+    cell.
+    """
+    #%% Paths
+    # A COCO camera traps file with information about this dataset
+    input_file = r'c:\temp\camera_trap_images_no_people\bellevue_camera_traps.2020-12-26.json'
+    # A .json dictionary mapping common names in this dataset to dictionaries with the
+    # WI taxonomy fields: common_name, wi_taxon_id, class, order, family, genus, species
+    taxonomy_file = r'c:\temp\camera_trap_images_no_people\bellevue_camera_traps_to_wi.json'
+    # The folder where the .csv template files live
+    templates_dir = r'c:\temp\wi_batch_upload_templates'
+    # The folder to which you want to write WI-formatted .csv files
+    output_base = r'c:\temp\wi_output'
+    #%% Path validation
+    assert os.path.isfile(input_file)
+    assert os.path.isfile(taxonomy_file)
+    assert os.path.isdir(templates_dir)
+    os.makedirs(output_base,exist_ok = True)
+    #%% Constants
+    projects_file_name = 'Template Wildlife Insights Batch Upload - Projectv1.0.csv'
+    deployments_file_name = 'Template Wildlife Insights Batch Upload - Deploymentv1.0.csv'
+    images_file_name = 'Template Wildlife Insights Batch Upload - Imagev1.0.csv'
+    cameras_file_name = 'Template Wildlife Insights Batch Upload - Camerav1.0.csv'
+    assert all([os.path.isfile(os.path.join(templates_dir,fn)) for fn in \
+                [projects_file_name,deployments_file_name,images_file_name,cameras_file_name]])
+    #%% Project information
+    project_info = {}
+    project_info['project_name'] = 'Bellevue Camera Traps'
+    project_info['project_id'] = 'bct_001'
+    project_info['project_short_name'] = 'BCT'
+    project_info['project_objectives'] = 'none'
+    project_info['project_species'] = 'Multiple'
+    project_info['project_species_individual'] = ''
+    project_info['project_sensor_layout'] = 'Convenience'
+    project_info['project_sensor_layout_targeted_type'] = ''
+    project_info['project_bait_use'] = 'No'
+    project_info['project_bait_type'] = 'None'
+    project_info['project_stratification'] = 'No'
+    project_info['project_stratification_type'] = ''
+    project_info['project_sensor_method'] = 'Sensor Detection'
+    project_info['project_individual_animals'] = 'No'
+    project_info['project_admin'] = 'Dan Morris'
+    project_info['project_admin_email'] = 'cameratraps@lila.science'
+    project_info['country_code'] = 'USA'
+    project_info['embargo'] = str(0)
+    project_info['initiative_id'] = ''
+    project_info['metadata_license'] = 'CC0'
+    project_info['image_license'] = 'CC0'
+    project_info['project_blank_images'] = 'No'
+    project_info['project_sensor_cluster'] = 'No'
+    camera_info = {}
+    camera_info['project_id'] = project_info['project_id']
+    camera_info['camera_id'] = '0000'
+    camera_info['make'] = ''
+    camera_info['model'] = ''
+    camera_info['serial_number'] = ''
+    camera_info['year_purchased'] = ''
+    deployment_info = {}
+    deployment_info['project_id'] = project_info['project_id']
+    deployment_info['deployment_id'] = 'test_deployment'
+    deployment_info['subproject_name'] = 'test_subproject'
+    deployment_info['subproject_design'] = ''
+    deployment_info['placename'] = 'yard'
+    deployment_info['longitude'] = '47.6101'
+    deployment_info['latitude'] = '-122.2015'
+    deployment_info['start_date'] = '2016-01-01 00:00:00'
+    deployment_info['end_date'] = '2026-01-01 00:00:00'
+    deployment_info['event_name'] = ''
+    deployment_info['event_description'] = ''
+    deployment_info['event_type'] = ''
+    deployment_info['bait_type'] = ''
+    deployment_info['bait_description'] = ''
+    deployment_info['feature_type'] = 'None'
+    deployment_info['feature_type_methodology'] = ''
+    deployment_info['camera_id'] = camera_info['camera_id']
+    deployment_info['quiet_period'] = str(60)
+    deployment_info['camera_functioning'] = 'Camera Functioning'
+    deployment_info['sensor_height'] = 'Chest height'
+    deployment_info['height_other'] = ''
+    deployment_info['sensor_orientation'] = 'Parallel'
+    deployment_info['orientation_other'] = ''
+    deployment_info['recorded_by'] = 'Dan Morris'
+    image_info = {}
+    image_info['identified_by'] = 'Dan Morris'
+    #%% Read templates
+    def parse_fields(templates_dir,file_name):
+        with open(os.path.join(templates_dir,file_name),'r') as f:
+            lines = f.readlines()
+            lines = [s.strip() for s in lines if len(s.strip().replace(',','')) > 0]
+            assert len(lines) == 1, 'Error processing template {}'.format(file_name)
+            fields = lines[0].split(',')
+            print('Parsed {} columns from {}'.format(len(fields),file_name))
+        return fields
+    projects_fields = parse_fields(templates_dir,projects_file_name)
+    deployments_fields = parse_fields(templates_dir,deployments_file_name)
+    images_fields = parse_fields(templates_dir,images_file_name)
+    cameras_fields = parse_fields(templates_dir,cameras_file_name)
+    #%% Compare dictionary to template lists
+    def compare_info_to_template(info,template_fields,name):
+        for s in info.keys():
+            assert s in template_fields,'Field {} not specified in {}_fields'.format(s,name)
+        for s in template_fields:
+            assert s in info.keys(),'Field {} not specified in {}_info'.format(s,name)
+    def write_table(file_name,info,template_fields):
+        assert len(info) == len(template_fields)
+        project_output_file = os.path.join(output_base,file_name)
+        with open(project_output_file,'w') as f:
+            # Write the header
+            for i_field,s in enumerate(template_fields):
+                f.write(s)
+                if i_field != len(template_fields)-1:
+                    f.write(',')
+            f.write('\n')
+            # Write values
+            for i_field,s in enumerate(template_fields):
+                f.write(info[s])
+                if i_field != len(template_fields)-1:
+                    f.write(',')
+            f.write('\n')
+    #%% Project file
+    compare_info_to_template(project_info,projects_fields,'project')
+    write_table(projects_file_name,project_info,projects_fields)
+    #%% Camera file
+    compare_info_to_template(camera_info,cameras_fields,'camera')
+    write_table(cameras_file_name,camera_info,cameras_fields)
+    #%% Deployment file
+    compare_info_to_template(deployment_info,deployments_fields,'deployment')
+    write_table(deployments_file_name,deployment_info,deployments_fields)
+    #%% Images file
+    # Read .json file with image information
+    with open(input_file,'r') as f:
+        input_data = json.load(f)
+    # Read taxonomy dictionary
+    with open(taxonomy_file,'r') as f:
+        taxonomy_mapping = json.load(f)
+    url_base = taxonomy_mapping['url_base']
+    taxonomy_mapping = taxonomy_mapping['taxonomy']
+    # Populate output information
+    # df = pd.DataFrame(columns = images_fields)
+    category_id_to_name = {cat['id']:cat['name'] for cat in input_data['categories']}
+    image_id_to_annotations = defaultdict(list)
+    annotations = input_data['annotations']
+    # annotation = annotations[0]
+    for annotation in annotations:
+        image_id_to_annotations[annotation['image_id']].append(
+            category_id_to_name[annotation['category_id']])
+    rows = []
+    # im = input_data['images'][0]
+    for im in input_data['images']:
+        row = {}
+        url = url_base + im['file_name'].replace('\\','/')
+        row['project_id'] = project_info['project_id']
+        row['deployment_id'] = deployment_info['deployment_id']
+        row['image_id'] = im['id']
+        row['location'] = url
+        row['identified_by'] = image_info['identified_by']
+        category_names = image_id_to_annotations[im['id']]
+        assert len(category_names) == 1
+        category_name = category_names[0]
+        taxon_info = taxonomy_mapping[category_name]
+        assert len(taxon_info.keys()) == 7
+        for s in taxon_info.keys():
+            row[s] = taxon_info[s]
+        # We don't have counts, but we can differentiate between zero and 1
+        if category_name == 'empty':
+            row['number_of_objects'] = 0
+        else:
+            row['number_of_objects'] = 1
+        row['uncertainty'] = None
+        row['timestamp'] = im['datetime']; assert isinstance(im['datetime'],str)
+        row['highlighted'] = 0
+        row['age'] = None
+        row['sex'] = None
+        row['animal_recognizable'] = 'No'
+        row['individual_id'] = None
+        row['individual_animal_notes'] = None
+        row['markings'] = None
+        assert len(row) == len(images_fields)
+        rows.append(row)
+    df = pd.DataFrame(rows)
+    df.to_csv(os.path.join(output_base,images_file_name),index=False)
+# ...main()
+#%% Command-line driver
+if __name__ == '__main__':
+    main()

megadetector/data_management/coco_to_labelme.py ADDED Viewed

@@ -0,0 +1,272 @@
+"""
+coco_to_labelme.py
+Converts a COCO dataset to labelme format (one .json per image file).
+If you want to convert YOLO-formatted data to labelme format, use yolo_to_coco, then
+coco_to_labelme.
+"""
+#%% Imports and constants
+import os
+import json
+from tqdm import tqdm
+from collections import defaultdict
+from megadetector.visualization.visualization_utils import open_image
+#%% Functions
+def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=None):
+    """
+    For the given image struct in COCO format and associated list of annotations, reformats the
+    detections into labelme format.
+    Args:
+        im (dict): image dict, as loaded from a COCO .json file; 'height' and 'width' are required
+        annotations (list): a list of annotations that refer to this image (this function errors if
+            that's not the case)
+        categories (list): a list of category in dicts in COCO format ({'id':x,'name':'s'})
+        info (dict, optional): a dict to store in a non-standard "custom_info"  field in the output
+    Returns:
+        dict: a dict in labelme format, suitable for writing to a labelme .json file
+    """
+    image_base_name = os.path.basename(im['file_name'])
+    output_dict = {}
+    if info is not None:
+        output_dict['custom_info'] = info
+    output_dict['version'] = '5.3.0a0'
+    output_dict['flags'] = {}
+    output_dict['shapes'] = []
+    output_dict['imagePath'] = image_base_name
+    output_dict['imageHeight'] = im['height']
+    output_dict['imageWidth'] = im['width']
+    output_dict['imageData'] = None
+    # Store COCO categories in case we want to reconstruct the original IDs later
+    output_dict['coco_categories'] = categories
+    category_id_to_name = {c['id']:c['name'] for c in categories}
+    if 'flags' in im:
+        output_dict['flags'] = im['flags']
+    # ann = annotations[0]
+    for ann in annotations:
+        assert ann['image_id'] == im['id'], 'Annotation {} does not refer to image {}'.format(
+            ann['id'],im['id'])
+        if 'bbox' not in ann:
+            continue
+        shape = {}
+        shape['label'] = category_id_to_name[ann['category_id']]
+        shape['shape_type'] = 'rectangle'
+        shape['description'] = ''
+        shape['group_id'] = None
+        # COCO boxes are [x_min, y_min, width_of_box, height_of_box] (absolute)
+        #
+        # labelme boxes are [[x0,y0],[x1,y1]] (absolute)
+        x0 = ann['bbox'][0]
+        y0 = ann['bbox'][1]
+        x1 = ann['bbox'][0] + ann['bbox'][2]
+        y1 = ann['bbox'][1] + ann['bbox'][3]
+        shape['points'] = [[x0,y0],[x1,y1]]
+        output_dict['shapes'].append(shape)
+    # ...for each detection
+    return output_dict
+# ...def get_labelme_dict_for_image()
+def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False):
+    """
+    For all the images in [coco_data] (a dict or a filename), write a .json file in
+    labelme format alongside the corresponding relative path within image_base.
+    """
+    # Load COCO data if necessary
+    if isinstance(coco_data,str):
+        with open(coco_data,'r') as f:
+            coco_data = json.load(f)
+    assert isinstance(coco_data,dict)
+    ## Read image sizes if necessary
+    if bypass_image_size_check:
+        print('Bypassing size check')
+    else:
+        # TODO: parallelize this loop
+        print('Reading/validating image sizes...')
+        # im = coco_data['images'][0]
+        for im in tqdm(coco_data['images']):
+            # Make sure this file exists
+            im_full_path = os.path.join(image_base,im['file_name'])
+            assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
+            # Load w/h information if necessary
+            if 'height' not in im or 'width' not in im:
+                try:
+                    pil_im = open_image(im_full_path)
+                    im['width'] = pil_im.width
+                    im['height'] = pil_im.height
+                except Exception:
+                    print('Warning: cannot open image {}'.format(im_full_path))
+                    if 'failure' not in im:
+                        im['failure'] = 'Failure image access'
+            # ...if we need to read w/h information
+        # ...for each image
+    # ...if we need to load image sizes
+    ## Generate labelme files
+    print('Generating .json files...')
+    image_id_to_annotations = defaultdict(list)
+    for ann in coco_data['annotations']:
+        image_id_to_annotations[ann['image_id']].append(ann)
+    n_json_files_written = 0
+    n_json_files_error = 0
+    n_json_files_exist = 0
+    # Write output
+    for im in tqdm(coco_data['images']):
+        # Skip this image if it failed to load in whatever system generated this COCO file
+        skip_image = False
+        # Errors are represented differently depending on the source
+        for error_string in ('failure','error'):
+            if (error_string in im) and (im[error_string] is not None):
+                if verbose:
+                    print('Warning: skipping labelme file generation for failed image {}'.format(
+                        im['file_name']))
+                skip_image = True
+                n_json_files_error += 1
+                break
+        if skip_image:
+            continue
+        im_full_path = os.path.join(image_base,im['file_name'])
+        json_path = os.path.splitext(im_full_path)[0] + '.json'
+        if (not overwrite) and (os.path.isfile(json_path)):
+            if verbose:
+                print('Skipping existing file {}'.format(json_path))
+            n_json_files_exist += 1
+            continue
+        annotations_this_image = image_id_to_annotations[im['id']]
+        output_dict = get_labelme_dict_for_image_from_coco_record(im,
+                                                                  annotations_this_image,
+                                                                  coco_data['categories'],
+                                                                  info=None)
+        n_json_files_written += 1
+        with open(json_path,'w') as f:
+            json.dump(output_dict,f,indent=1)
+    # ...for each image
+    print('\nWrote {} .json files (skipped {} for errors, {} because they exist)'.format(
+        n_json_files_written,n_json_files_error,n_json_files_exist))
+# ...def coco_to_labelme()
+#%% Interactive driver
+if False:
+    pass
+    #%% Configure options
+    coco_file = \
+        r'C:\\temp\\snapshot-exploration\\images\\training-images-good\\training-images-good_from_yolo.json'
+    image_folder = os.path.dirname(coco_file)
+    overwrite = True
+    #%% Programmatic execution
+    coco_to_labelme(coco_data=coco_file,image_base=image_folder,overwrite=overwrite)
+    #%% Command-line execution
+    s = 'python coco_to_labelme.py "{}" "{}"'.format(coco_file,image_folder)
+    if overwrite:
+        s += ' --overwrite'
+    print(s)
+    import clipboard; clipboard.copy(s)
+    #%% Opening labelme
+    s = 'python labelme {}'.format(image_folder)
+    print(s)
+    import clipboard; clipboard.copy(s)
+#%% Command-line driver
+import sys,argparse
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert a COCO database to labelme annotation format')
+    parser.add_argument(
+        'coco_file',
+        type=str,
+        help='Path to COCO data file (.json)')
+    parser.add_argument(
+        'image_base',
+        type=str,
+        help='Path to images (also the output folder)')
+    parser.add_argument(
+        '--overwrite',
+        action='store_true',
+        help='Overwrite existing labelme .json files')
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    coco_to_labelme(coco_data=args.coco_file,image_base=args.image_base,overwrite=args.overwrite)
+if __name__ == '__main__':
+    main()