PyPI - megadetector - Versions diffs - 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl - Mend

megadetector 5.0.11py3-none-any.whl → 5.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show

megadetector/data_management/importers/idfg_iwildcam_lila_prep.py ADDED Viewed

@@ -0,0 +1,294 @@
+"""
+ idfg_iwildcam_lila_prep.py
+ Adding class labels (from the private test .csv) to the iWildCam 2019 IDFG
+ test set, in preparation for release on LILA.
+ This version works with the public iWildCam release images.
+"""
+#%% ############ Take one, from iWildCam .json files ############
+#%% Imports and constants
+import uuid
+import json
+import os
+from tqdm import tqdm
+base_folder = r'h:\iWildCam_2019_IDFG'
+input_json = os.path.join(base_folder,'iWildCam_2019_IDFG_info.json')
+input_csv = os.path.join(base_folder,'IDFG_eval_public_v_private.csv')
+output_json = os.path.join(base_folder,'idaho_camera_traps.json')
+assert os.path.isfile(input_json)
+assert os.path.isfile(input_csv)
+#%% Read input files
+with open(input_json,'r') as f:
+    input_data = json.load(f)
+with open(input_csv,'r') as f:
+    private_csv_lines = f.readlines()
+private_csv_lines = [s.strip() for s in private_csv_lines]
+# Remove the header line
+assert private_csv_lines[0] == 'Id,Category,Usage'
+private_csv_lines = private_csv_lines[1:]
+print('Read {} annotations for {} images'.format(len(private_csv_lines),len(input_data['images'])))
+assert len(private_csv_lines) == len(input_data['images'])
+n_images = len(input_data['images'])
+#%% Parse annotations
+image_id_to_category_ids = {}
+for line in tqdm(private_csv_lines):
+    # Lines look like:
+    #
+    # b005e5b2-2c0b-11e9-bcad-06f1011196c4,1,Private
+    tokens = line.split(',')
+    assert len(tokens) == 3
+    assert tokens[2] in ['Private','Public']
+    image_id_to_category_ids[tokens[0]] = int(tokens[1])
+assert len(image_id_to_category_ids) == n_images
+#%% Minor cleanup re: images
+for im in tqdm(input_data['images']):
+    image_id = im['id']
+    im['file_name'] = im['file_name'].replace('iWildCam_IDFG_images/','')
+    assert isinstance(im['location'],int)
+    im['location'] = str(im['location'])
+#%% Create annotations
+annotations = []
+for image_id in tqdm(image_id_to_category_ids):
+    category_id = image_id_to_category_ids[image_id]
+    ann = {}
+    ann['id'] = str(uuid.uuid1())
+    ann['image_id'] = image_id
+    ann['category_id'] = category_id
+    annotations.append(ann)
+#%% Prepare info
+info = input_data['info']
+info['contributor'] = 'Images acquired by the Idaho Department of Fish and Game, dataset curated by Sara Beery'
+info['description'] = 'Idaho Camera traps'
+info['version'] = '2021.07.19'
+#%% Minor adjustments to categories
+input_categories = input_data['categories']
+category_id_to_name = {cat['id']:cat['name'] for cat in input_categories}
+category_name_to_id = {cat['name']:cat['id'] for cat in input_categories}
+assert category_id_to_name[0] == 'empty'
+category_names_to_counts = {}
+for category in input_categories:
+    category_names_to_counts[category['name']] = 0
+for ann in annotations:
+    category_id = ann['category_id']
+    category_name = category_id_to_name[category_id]
+    category_names_to_counts[category_name] = category_names_to_counts[category_name] + 1
+categories = []
+for category_name in category_names_to_counts:
+    count = category_names_to_counts[category_name]
+    # Remove unused categories
+    if count == 0:
+        continue
+    category_id = category_name_to_id[category_name]
+    # Name adjustments
+    if category_name == 'prongs':
+        category_name = 'pronghorn'
+    categories.append({'id':category_id,'name':category_name})
+#%% Create output
+output_data = {}
+output_data['images'] = input_data['images']
+output_data['annotations'] = annotations
+output_data['categories'] = categories
+output_data['info'] = info
+#%% Write output
+with open(output_json,'w') as f:
+    json.dump(output_data,f,indent=2)
+#%% Validate .json file
+from megadetector.data_management.databases import integrity_check_json_db
+options = integrity_check_json_db.IntegrityCheckOptions()
+options.baseDir = os.path.join(base_folder,'images'); assert os.path.isdir(options.baseDir)
+options.bCheckImageSizes = False
+options.bCheckImageExistence = False
+options.bFindUnusedImages = False
+_, _, _ = integrity_check_json_db.integrity_check_json_db(output_json, options)
+#%% Preview labels
+from megadetector.visualization import visualize_db
+viz_options = visualize_db.DbVizOptions()
+viz_options.num_to_visualize = 100
+viz_options.trim_to_images_with_bboxes = False
+viz_options.add_search_links = False
+viz_options.sort_by_filename = False
+viz_options.parallelize_rendering = True
+viz_options.include_filename_links = True
+# viz_options.classes_to_exclude = ['test']
+html_output_file, _ = visualize_db.visualize_db(db_path=output_json,
+                                                         output_dir=os.path.join(
+                                                         base_folder,'preview'),
+                                                         image_base_dir=os.path.join(base_folder,'images'),
+                                                         options=viz_options)
+os.startfile(html_output_file)
+#%% ############ Take two, from pre-iWildCam .json files created from IDFG .csv files ############
+#%% Imports and constants
+import json
+import os
+base_folder = r'h:\idaho-camera-traps'
+input_json_sl = os.path.join(base_folder,'iWildCam_IDFG.json')
+input_json = os.path.join(base_folder,'iWildCam_IDFG_ml.json')
+output_json = os.path.join(base_folder,'idaho_camera_traps.json')
+remote_image_base_dir = r'z:\idfg'
+assert os.path.isfile(input_json)
+#%% One-time line break addition
+if not os.path.isfile(input_json):
+    sl_json = input_json_sl
+    ml_json = input_json
+    with open(sl_json,'r') as f:
+        d = json.load(f)
+    with open(ml_json,'w') as f:
+        json.dump(d,f,indent=2)
+#%% Read input files
+with open(input_json,'r') as f:
+    input_data = json.load(f)
+print('Read {} annotations for {} images'.format(len(input_data['annotations']),len(input_data['images'])))
+n_images = len(input_data['images'])
+#%% Prepare info
+info = {}
+info['contributor'] = 'Images acquired by the Idaho Department of Fish and Game, dataset curated by Sara Beery'
+info['description'] = 'Idaho Camera traps'
+info['version'] = '2021.07.19'
+#%% Minor adjustments to categories
+input_categories = input_data['categories']
+output_categories = []
+for c in input_categories:
+    category_name = c['name']
+    category_id = c['id']
+    if category_name == 'prong':
+        category_name = 'pronghorn'
+    category_name = category_name.lower()
+    output_categories.append({'name':category_name,'id':category_id})
+#%% Minor adjustments to annotations
+for ann in input_data['annotations']:
+    ann['id'] = str(ann['id'])
+#%% Create output
+output_data = {}
+output_data['images'] = input_data['images']
+output_data['annotations'] = input_data['annotations']
+output_data['categories'] = output_categories
+output_data['info'] = info
+#%% Write output
+with open(output_json,'w') as f:
+    json.dump(output_data,f,indent=2)
+#%% Validate .json file
+from megadetector.data_management.databases import integrity_check_json_db
+options = integrity_check_json_db.IntegrityCheckOptions()
+options.baseDir = remote_image_base_dir
+options.bCheckImageSizes = False
+options.bCheckImageExistence = False
+options.bFindUnusedImages = False
+_, _, _ = integrity_check_json_db.integrity_check_json_db(output_json, options)
+#%% Preview labels
+from megadetector.visualization import visualize_db
+viz_options = visualize_db.DbVizOptions()
+viz_options.num_to_visualize = 100
+viz_options.trim_to_images_with_bboxes = False
+viz_options.add_search_links = False
+viz_options.sort_by_filename = False
+viz_options.parallelize_rendering = True
+viz_options.include_filename_links = True
+# viz_options.classes_to_exclude = ['test']
+html_output_file, _ = visualize_db.visualize_db(db_path=output_json,
+                                                         output_dir=os.path.join(
+                                                         base_folder,'preview'),
+                                                         image_base_dir=remote_image_base_dir,
+                                                         options=viz_options)
+os.startfile(html_output_file)

megadetector/data_management/importers/jb_csv_to_json.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""
+ jb_csv_to_json.py
+ Convert a particular .csv file to CCT format.  Images were not available at
+ the time I wrote this script, so this is much shorter than other scripts
+ in this folder.
+"""
+#%% Constants and environment
+import pandas as pd
+import uuid
+import json
+input_metadata_file = r'd:\temp\pre_bounding_box.csv'
+output_file = r'd:\temp\pre_bounding_box.json'
+filename_col = 'filename'
+label_col = 'category'
+#%% Read source data
+input_metadata = pd.read_csv(input_metadata_file)
+print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
+      len(input_metadata)))
+#%% Confirm filename uniqueness (this data set has one label per image)
+imageFilenames = input_metadata[filename_col]
+duplicateRows = []
+filenamesToRows = {}
+# Build up a map from filenames to a list of rows, checking image existence as we go
+for iFile,fn in enumerate(imageFilenames):
+    if (fn in filenamesToRows):
+        duplicateRows.append(iFile)
+        filenamesToRows[fn].append(iFile)
+    else:
+        filenamesToRows[fn] = [iFile]
+assert(len(duplicateRows) == 0)
+#%% Create CCT dictionaries
+images = []
+annotations = []
+# Map categories to integer IDs (that's what COCO likes)
+nextCategoryID = 1
+categories = []
+categoryNamesToCategories = {}
+cat = {}
+cat['name'] = 'empty'
+cat['id'] = 0
+categories.append(cat)
+categoryNamesToCategories['empty'] = cat
+# For each image
+#
+# Because in practice images are 1:1 with annotations in this data set,
+# this is also a loop over annotations.
+# imageName = imageFilenames[0]
+for imageName in imageFilenames:
+    rows = filenamesToRows[imageName]
+    # As per above, this is convenient and appears to be true; asserting to be safe
+    assert(len(rows) == 1)
+    iRow = rows[0]
+    row = input_metadata.iloc[iRow]
+    im = {}
+    # Filenames look like "290716114012001a1116.jpg"
+    im['id'] = imageName.split('.')[0]
+    im['file_name'] = imageName
+    im['seq_id'] = '-1'
+    images.append(im)
+    categoryName = row[label_col].lower()
+    # Have we seen this category before?
+    if categoryName in categoryNamesToCategories:
+        categoryID = categoryNamesToCategories[categoryName]['id']
+    else:
+        cat = {}
+        categoryID = nextCategoryID
+        cat['name'] = categoryName
+        cat['id'] = nextCategoryID
+        categories.append(cat)
+        categoryNamesToCategories[categoryName] = cat
+        nextCategoryID += 1
+    # Create an annotation
+    ann = {}
+    # The Internet tells me this guarantees uniqueness to a reasonable extent, even
+    # beyond the sheer improbability of collisions.
+    ann['id'] = str(uuid.uuid1())
+    ann['image_id'] = im['id']
+    ann['category_id'] = categoryID
+    annotations.append(ann)
+# ...for each image
+print('Finished creating dictionaries')
+#%% Create info struct
+info = {}
+info['year'] = 2019
+info['version'] = 1
+info['description'] = 'COCO style database'
+info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
+info['contributor'] = ''
+#%% Write output
+json_data = {}
+json_data['images'] = images
+json_data['annotations'] = annotations
+json_data['categories'] = categories
+json_data['info'] = info
+json.dump(json_data, open(output_file,'w'), indent=4)
+print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
+        len(images),len(annotations),len(categories)))
+#%% Validate
+from megadetector.data_management.databases import integrity_check_json_db
+options = integrity_check_json_db.IntegrityCheckOptions()
+sortedCategories,data = integrity_check_json_db.integrity_check_json_db(output_file, options)

megadetector/data_management/importers/mcgill_to_json.py ADDED Viewed

@@ -0,0 +1,250 @@
+"""
+ mcgill_to_json.py
+ Convert the .csv file provided for the McGill test data set to a
+ COCO-camera-traps .json file
+"""
+#%% Constants and environment
+import pandas as pd
+import os
+import glob
+import json
+import uuid
+import time
+import ntpath
+import humanfriendly
+import PIL
+import math
+baseDir = r'D:\wildlife_data\mcgill_test'
+input_metadata_file = os.path.join(baseDir, 'dan_500_photos_metadata.csv')
+output_file = os.path.join(baseDir, 'mcgill_test.json')
+image_directory = baseDir
+assert(os.path.isdir(image_directory))
+assert(os.path.isfile(input_metadata_file))
+#%% Read source data
+input_metadata = pd.read_csv(input_metadata_file)
+print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
+      len(input_metadata)))
+#%% Map filenames to rows, verify image existence
+# Create an additional column for concatenated filenames
+input_metadata['relative_path'] = ''
+input_metadata['full_path'] = ''
+startTime = time.time()
+# Maps relative filenames to rows
+filenamesToRows = {}
+duplicateRows = []
+# Build up a map from filenames to a list of rows, checking image existence as we go
+# row = input_metadata.iloc[0]
+for iFile,row in input_metadata.iterrows():
+    relativePath = os.path.join(row['site'],row['date_range'],str(row['camera']),
+                                str(row['folder']),row['filename'])
+    fullPath = os.path.join(baseDir,relativePath)
+    if (relativePath in filenamesToRows):
+        duplicateRows.append(iFile)
+        filenamesToRows[relativePath].append(iFile)
+    else:
+        filenamesToRows[relativePath] = [iFile]
+        assert(os.path.isfile(fullPath))
+    row['relative_path'] = relativePath
+    row['full_path'] = fullPath
+    input_metadata.iloc[iFile] = row
+elapsed = time.time() - startTime
+print('Finished verifying image existence in {}, found {} filenames with multiple labels'.format(
+      humanfriendly.format_timespan(elapsed),len(duplicateRows)))
+# I didn't expect this to be true a priori, but it appears to be true, and
+# it saves us the trouble of checking consistency across multiple occurrences
+# of an image.
+assert(len(duplicateRows) == 0)
+#%% Check for images that aren't included in the metadata file
+# Enumerate all images
+imageFullPaths = glob.glob(os.path.join(image_directory,'**/*.JPG'), recursive=True)
+for iImage,imagePath in enumerate(imageFullPaths):
+    imageRelPath = ntpath.relpath(imagePath, image_directory)
+    assert(imageRelPath in filenamesToRows)
+print('Finished checking {} images to make sure they\'re in the metadata'.format(
+        len(imageFullPaths)))
+#%% Create CCT dictionaries
+# Also gets image sizes, so this takes ~6 minutes
+#
+# Implicitly checks images for overt corruptness, i.e. by not crashing.
+images = []
+annotations = []
+categories = []
+emptyCategory = {}
+emptyCategory['id'] = 0
+emptyCategory['name'] = 'empty'
+emptyCategory['latin'] = 'empty'
+emptyCategory['count'] = 0
+categories.append(emptyCategory)
+# Map categories to integer IDs (that's what COCO likes)
+nextCategoryID = 1
+labelToCategory = {'empty':emptyCategory}
+# For each image
+#
+# Because in practice images are 1:1 with annotations in this data set,
+# this is also a loop over annotations.
+startTime = time.time()
+# row = input_metadata.iloc[0]
+for iFile,row in input_metadata.iterrows():
+    relPath = row['relative_path'].replace('\\','/')
+    im = {}
+    # Filenames look like "290716114012001a1116.jpg"
+    im['id'] = relPath.replace('/','_').replace(' ','_')
+    im['file_name'] = relPath
+    im['seq_id'] = -1
+    im['frame_num'] = -1
+    # In the form "001a"
+    im['site']= row['site']
+    # Can be in the form '111' or 's46'
+    im['camera'] = row['camera']
+    # In the form "7/29/2016 11:40"
+    im['datetime'] = row['timestamp']
+    otherFields = ['motion','temp_F','n_present','n_waterhole','n_contact','notes']
+    for s in otherFields:
+        im[s] = row[s]
+    # Check image height and width
+    fullPath = row['full_path']
+    assert(os.path.isfile(fullPath))
+    pilImage = PIL.Image.open(fullPath)
+    width, height = pilImage.size
+    im['width'] = width
+    im['height'] = height
+    images.append(im)
+    label = row['species']
+    if not isinstance(label,str):
+        # NaN is the only thing we should see that's not a string
+        assert math.isnan(label)
+        label = 'empty'
+    else:
+        label = label.lower()
+    latin = row['binomial']
+    if not isinstance(latin,str):
+        # NaN is the only thing we should see that's not a string
+        assert math.isnan(latin)
+        latin = 'empty'
+    else:
+        latin = latin.lower()
+    if label == 'empty':
+        if latin != 'empty':
+            latin = 'empty'
+    if label == 'unknown':
+        if latin != 'unknown':
+            latin = 'unknown'
+    if label not in labelToCategory:
+        print('Adding category {} ({})'.format(label,latin))
+        category = {}
+        categoryID = nextCategoryID
+        category['id'] = categoryID
+        nextCategoryID += 1
+        category['name'] = label
+        category['latin'] = latin
+        category['count'] = 1
+        labelToCategory[label] = category
+        categories.append(category)
+    else:
+        category = labelToCategory[label]
+        category['count'] = category['count'] + 1
+        categoryID = category['id']
+    # Create an annotation
+    ann = {}
+    # The Internet tells me this guarantees uniqueness to a reasonable extent, even
+    # beyond the sheer improbability of collisions.
+    ann['id'] = str(uuid.uuid1())
+    ann['image_id'] = im['id']
+    ann['category_id'] = categoryID
+    annotations.append(ann)
+# ...for each image
+# Convert categories to a CCT-style dictionary
+for category in categories:
+    print('Category {}, count {}'.format(category['name'],category['count']))
+elapsed = time.time() - startTime
+print('Finished creating CCT dictionaries in {}'.format(
+      humanfriendly.format_timespan(elapsed)))
+#%% Create info struct
+info = {}
+info['year'] = 2019
+info['version'] = 1
+info['description'] = 'COCO style database'
+info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
+info['contributor'] = 'McGill University'
+#%% Write output
+json_data = {}
+json_data['images'] = images
+json_data['annotations'] = annotations
+json_data['categories'] = categories
+json_data['info'] = info
+json.dump(json_data, open(output_file,'w'), indent=4)
+print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
+        len(images),len(annotations),len(categories)))

megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.11py3-none-any.whl → 5.0.12py3-none-any.whl