PyPI - megadetector - Versions diffs - 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl - Mend

megadetector 5.0.11py3-none-any.whl → 5.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show

megadetector/data_management/importers/auckland_doc_to_json.py ADDED Viewed

@@ -0,0 +1,201 @@
+"""
+ auckland_doc_to_json.py
+ Convert Auckland DOC data set to COCO camera traps format.  This was
+ for a training data set where class names were encoded in path names.
+"""
+#%% Constants and imports
+import json
+import os
+import uuid
+import datetime
+from tqdm import tqdm
+from megadetector.visualization import visualize_db
+from megadetector.data_management.databases import integrity_check_json_db
+from megadetector.utils.path_utils import find_images, split_path, insert_before_extension
+# Filenames will be stored in the output .json relative to this base dir
+input_base_dir = 'y:\\'
+output_base_dir = r'f:\auckland-doc'
+output_json_filename = os.path.join(output_base_dir, 'auckland-doc-Maukahuka_Auckland_Island.json')
+assert os.path.isdir(input_base_dir)
+os.makedirs(output_base_dir,exist_ok=True)
+output_encoding = 'utf-8'
+read_image_sizes = True
+info = {}
+info['year'] = 2019
+info['version'] = '1.0'
+info['description'] = 'Auckaland DOC Camera Traps'
+info['contributor'] = 'Auckland DOC'
+info['date_created'] = str(datetime.date.today())
+#%% Enumerate files
+print('Enumerating files from {}'.format(input_base_dir))
+image_files = find_images(input_base_dir, bRecursive=True)
+print('Enumerated {} images'.format(len(image_files)))
+#%% Assemble dictionaries
+images = []
+image_id_to_image = {}
+annotations = []
+categories = []
+category_name_to_category = {}
+category_id_to_category = {}
+# Force the empty category to be ID 0
+empty_category = {}
+empty_category['name'] = 'empty'
+empty_category['id'] = 0
+category_id_to_category[0] = empty_category
+categories.append(empty_category)
+next_id = 1
+behaviors = set()
+# fn = image_files[0]; print(fn)
+for fn in tqdm(image_files):
+    # Typically y:\Maukahuka_Auckland_Island\1_Training\Winter_Trial_2019\cat\cat\eat\20190903_IDdY_34_E3_tmp_201908240051.JPG
+    relative_path = os.path.relpath(fn,input_base_dir)
+    tokens = split_path(fn)
+    assert tokens[1] == 'Maukahuka_Auckland_Island'
+    trainval_split = tokens[2]
+    assert trainval_split in ['1_Training','2_Testing']
+    # This data set has two top-level folders, "1_Training" (which has class names encoded
+    # in paths) and "2_Testing" (which has no class information).
+    if trainval_split == '2_Testing':
+        category_name = 'test'
+    else:
+        category_name = tokens[-3]
+        if category_name.startswith('2_'):
+            category_name = category_name.replace('2_', '')
+        category_name = category_name.lower().strip()
+    if category_name not in category_name_to_category:
+        category_id = next_id
+        next_id += 1
+        category = {}
+        category['id'] = category_id
+        category['name'] = category_name
+        category['count'] = 0
+        categories.append(category)
+        category_name_to_category[category_name] = category
+        category_id_to_category[category_id] = category
+    else:
+        category = category_name_to_category[category_name]
+    category_id = category['id']
+    category['count'] += 1
+    behavior = None
+    if (category_name) != 'test':
+        behavior = fn.split('\\')[-2]
+        behaviors.add(behavior)
+    im = {}
+    im['id'] = str(uuid.uuid1())
+    im['file_name'] = relative_path
+    image_id_to_image[im['id']] = im
+    images.append(im)
+    ann = {}
+    ann['id'] = str(uuid.uuid1())
+    ann['image_id'] = im['id']
+    ann['category_id'] = category_id
+    if behavior is not None:
+        ann['behavior'] = behavior
+    annotations.append(ann)
+# ...for each image
+#%% Write output .json
+data = {}
+data['info'] = info
+data['images'] = images
+data['annotations'] = annotations
+data['categories'] = categories
+json.dump(data, open(output_json_filename, 'w'), indent=2)
+print('Finished writing json to {}'.format(output_json_filename))
+#%% Write train/test .jsons
+train_images = []; test_images = []
+train_annotations = []; test_annotations = []
+for ann in tqdm(annotations):
+    category_id = ann['category_id']
+    image_id = ann['image_id']
+    category_name = category_id_to_category[category_id]['name']
+    im = image_id_to_image[image_id]
+    if category_name == 'test':
+        test_images.append(im)
+        test_annotations.append(ann)
+    else:
+        train_images.append(im)
+        train_annotations.append(ann)
+train_fn = insert_before_extension(output_json_filename,'train')
+test_fn = insert_before_extension(output_json_filename,'test')
+data['images'] = train_images
+data['annotations'] = train_annotations
+json.dump(data, open(train_fn, 'w'), indent=2)
+data['images'] = test_images
+data['annotations'] = test_annotations
+json.dump(data, open(test_fn, 'w'), indent=2)
+#%% Validate .json files
+options = integrity_check_json_db.IntegrityCheckOptions()
+options.baseDir = input_base_dir
+options.bCheckImageSizes = False
+options.bCheckImageExistence = True
+options.bFindUnusedImages = True
+sorted_categories, data, _ = integrity_check_json_db.integrity_check_json_db(output_json_filename, options)
+sorted_categories, data, _ = integrity_check_json_db.integrity_check_json_db(train_fn, options)
+sorted_categories, data, _ = integrity_check_json_db.integrity_check_json_db(test_fn, options)
+#%% Preview labels
+viz_options = visualize_db.DbVizOptions()
+viz_options.num_to_visualize = 2000
+viz_options.trim_to_images_with_bboxes = False
+viz_options.add_search_links = False
+viz_options.sort_by_filename = False
+viz_options.parallelize_rendering = True
+viz_options.classes_to_exclude = ['test']
+html_output_file, image_db = visualize_db.visualize_db(db_path=output_json_filename,
+                                                         output_dir=os.path.join(
+                                                         output_base_dir, 'preview'),
+                                                         image_base_dir=input_base_dir,
+                                                         options=viz_options)
+os.startfile(html_output_file)

megadetector/data_management/importers/awc_to_json.py ADDED Viewed

@@ -0,0 +1,191 @@
+"""
+ awc_to_json.py
+ Convert a particular .csv file from Australian Wildlife Conservancy to CCT format.
+"""
+#%% Constants and environment
+import pandas as pd
+import uuid
+import json
+import time
+import humanfriendly
+import os
+import PIL
+from tqdm import tqdm
+from megadetector.visualization import visualize_db
+from megadetector.utils import path_utils
+input_metadata_file = r"D:\wildlife_data\awc\awc_imageinfo.csv"
+output_file = r"D:\wildlife_data\awc\awc_imageinfo.json"
+image_base = r"D:\wildlife_data\awc"
+preview_base = r"D:\wildlife_data\awc\label_preview"
+filename_replacements = {'D:\\Wet Tropics':'WetTropics'}
+category_mappings = {'none':'empty'}
+#%% Read source data
+input_metadata = pd.read_csv(input_metadata_file)
+print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
+      len(input_metadata)))
+#%% Main loop over labels
+startTime = time.time()
+relativePathToImage = {}
+images = []
+annotations = []
+categoryIDToCategories = {}
+missingFiles = []
+duplicateImageIDs = set()
+# Force the empty category to be ID 0
+emptyCat = {}
+emptyCat['name'] = 'empty'
+emptyCat['id'] = 0
+categoryIDToCategories[0] = emptyCat
+# iRow = 0; row = input_metadata.iloc[iRow]
+for iRow,row in tqdm(input_metadata.iterrows(),total=len(input_metadata)):
+    # ImageID,FileName,FilePath,SpeciesID,CommonName
+    imageID = str(row['ImageID'])
+    fn = row['FileName']
+    for k in filename_replacements:
+        dirName = row['FilePath'].replace(k,filename_replacements[k])
+    relativePath = os.path.join(dirName,fn)
+    # This makes an assumption of one annotation per image, which happens to be
+    # true in this data set.
+    if relativePath in relativePathToImage:
+        im = relativePathToImage[relativePath]
+        assert im['id'] == imageID
+        duplicateImageIDs.add(imageID)
+    else:
+        im = {}
+        im['id'] = imageID
+        im['file_name'] = relativePath
+        im['seq_id'] = '-1'
+        images.append(im)
+        relativePathToImage[relativePath] = im
+        fullPath = os.path.join(image_base,relativePath)
+        if not os.path.isfile(fullPath):
+            missingFiles.append(fullPath)
+        else:
+            # Retrieve image width and height
+            pilImage = PIL.Image.open(fullPath)
+            width, height = pilImage.size
+            im['width'] = width
+            im['height'] = height
+    categoryName = row['CommonName'].lower()
+    if categoryName in category_mappings:
+        categoryName = category_mappings[categoryName]
+    categoryID = row['SpeciesID']
+    assert isinstance(categoryID,int)
+    if categoryID not in categoryIDToCategories:
+        category = {}
+        category['name'] = categoryName
+        category['id'] = categoryID
+        categoryIDToCategories[categoryID] = category
+    else:
+        assert categoryIDToCategories[categoryID]['name'] == categoryName
+    # Create an annotation
+    ann = {}
+    # The Internet tells me this guarantees uniqueness to a reasonable extent, even
+    # beyond the sheer improbability of collisions.
+    ann['id'] = str(uuid.uuid1())
+    ann['image_id'] = im['id']
+    ann['category_id'] = categoryID
+    annotations.append(ann)
+categories = list(categoryIDToCategories.values())
+elapsed = time.time() - startTime
+print('Finished verifying file loop in {}, {} images, {} missing images, {} repeat labels'.format(
+        humanfriendly.format_timespan(elapsed), len(images), len(missingFiles), len(duplicateImageIDs)))
+#%% Check for images that aren't included in the metadata file
+# Enumerate all images
+# list(relativePathToImage.keys())[0]
+imageFullPaths = path_utils.find_images(image_base,bRecursive=True)
+unmatchedFiles = []
+for iImage,imagePath in enumerate(imageFullPaths):
+    fn = os.path.relpath(imagePath,image_base)
+    if fn not in relativePathToImage:
+        unmatchedFiles.append(fn)
+print('Finished checking {} images to make sure they\'re in the metadata, found {} mismatches'.format(
+        len(imageFullPaths),len(unmatchedFiles)))
+#%% Create info struct
+info = {}
+info['year'] = 2019
+info['version'] = 1
+info['description'] = 'COCO style database'
+info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
+info['contributor'] = ''
+#%% Write output
+json_data = {}
+json_data['images'] = images
+json_data['annotations'] = annotations
+json_data['categories'] = categories
+json_data['info'] = info
+json.dump(json_data, open(output_file,'w'), indent=4)
+print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
+        len(images),len(annotations),len(categories)))
+#%% Validate the database's integrity
+from megadetector.data_management.databases import integrity_check_json_db
+options = integrity_check_json_db.IntegrityCheckOptions()
+sortedCategories,data = integrity_check_json_db.integrity_check_json_db(output_file, options)
+#%% Render a bunch of images to make sure the labels got carried along correctly
+bbox_db_path = output_file
+output_dir = preview_base
+options = visualize_db.BboxDbVizOptions()
+options.num_to_visualize = 1000
+options.sort_by_filename = False
+htmlOutputFile = visualize_db.visualize_db(bbox_db_path,output_dir,image_base,options)

megadetector/data_management/importers/bellevue_to_json.py ADDED Viewed

@@ -0,0 +1,273 @@
+"""
+ bellevue_to_json.py
+ "Bellevue Camera Traps" is the rather unremarkable camera trap data set
+ used by one of the repo's maintainers for testing.  It's organized as:
+ approximate_date/[loose_camera_specifier/]/species
+ E.g.:
+ "2018.03.30\coyote\DSCF0091.JPG"
+ "2018.07.18\oldcam\empty\DSCF0001.JPG"
+"""
+#%% Constants and imports
+import json
+import os
+import uuid
+import datetime
+from PIL import Image
+from PIL.ExifTags import TAGS
+from tqdm import tqdm
+from megadetector.utils.path_utils import find_images
+# Filenames will be stored in the output .json relative to this base dir
+base_dir = r'C:\temp\camera_trap_images_no_people'
+output_base = r'c:\temp\previews'
+output_filename = os.path.join(base_dir,'bellevue_camera_traps.{}.json'.format(str(datetime.date.today())))
+class_mappings = {'transitional':'unlabeled','moving':'unlabeled','setup':'unlabeled','blurry':'unlabeled','transitional':'unlabeled','junk':'unlabeled','unknown':'unlabeled','blurry':'unlabeled'}
+class_mappings['dan'] = 'human'
+class_mappings['dan_and_dog'] = 'human,dog'
+class_mappings['dan and dog'] = 'human,dog'
+class_mappings['unknown'] = 'unknown animal'
+class_mappings['racoon'] = 'raccoon'
+info = {}
+info['year'] = 2020
+info['version'] = '2.0'
+info['description'] = 'Bellevue Camera Traps'
+info['contributor'] = 'Dan Morris'
+info['date_created'] = str(datetime.date.today())
+max_files = -1
+max_seconds_within_sequence = 10.0
+assert os.path.isdir(base_dir)
+#%% Exif functions
+def get_exif_tags(fn=None,im=None):
+    assert (fn is not None) or (im is not None)
+    ret = {}
+    if im is None:
+        im = Image.open(fn)
+    info = im._getexif()
+    for tag, value in info.items():
+        decoded = TAGS.get(tag, tag)
+        ret[decoded] = value
+    return ret
+#%% Enumerate files, create image/annotation/category info
+annotations = []
+category_name_to_category = {}
+# Force the empty category to be ID 0
+empty_category = {}
+empty_category['id'] = 0
+empty_category['name'] = 'empty'
+category_name_to_category['empty'] = empty_category
+next_category_id = 1
+# Keep track of unique camera folders
+camera_folders = set()
+# Each element will be a dictionary with fields:
+#
+# relative_path, width, height, datetime
+images = []
+non_image_files = []
+print('Enumerating files from {}'.format(base_dir))
+image_files = find_images(base_dir,recursive=True)
+print('Enumerated {} images'.format(len(image_files)))
+# fname = image_files[0]
+for fname in tqdm(image_files):
+    if max_files >= 0 and len(images) > max_files:
+        print('Warning: early break at {} files'.format(max_files))
+        break
+    full_path = fname
+    relative_path = os.path.relpath(full_path,base_dir)
+    try:
+        im = Image.open(full_path)
+        h = im.height
+        w = im.width
+        tags = get_exif_tags(None,im)
+        s = tags['DateTimeOriginal']
+        dt = datetime.datetime.strptime(s,'%Y:%m:%d %H:%M:%S')
+    except:
+        # Corrupt or not an image
+        print('Warning: could not read {}'.format(fname))
+        non_image_files.append(full_path)
+        continue
+    # Store file info
+    image_info = {}
+    image_info['file_name'] = relative_path
+    image_info['width'] = w
+    image_info['height'] = h
+    image_info['datetime'] = dt
+    image_info['location'] = 'unknown'
+    image_info['id'] = str(uuid.uuid4())
+    images.append(image_info)
+    # E.g. 2018.03.30/coyote/DSCF0091.JPG
+    relative_path = image_info['file_name'].replace('\\','/')
+    tokens = relative_path.split('/')
+    camera_path_tokens = tokens[0:-2]
+    camera_path = '/'.join(camera_path_tokens)
+    camera_folders.add(camera_path)
+    image_info['camera_path'] = camera_path
+    category_name = tokens[-2].lower()
+    if category_name in class_mappings:
+        category_name = class_mappings[category_name]
+    if category_name not in category_name_to_category:
+        category = {}
+        category['id'] = next_category_id
+        category['name'] = category_name
+        next_category_id = next_category_id + 1
+        category_name_to_category[category_name] = category
+    else:
+        category = category_name_to_category[category_name]
+    annotation = {}
+    annotation['sequence_level_annotation'] = False
+    annotation['id'] = str(uuid.uuid4())
+    annotation['category_id'] = category['id']
+    annotation['image_id'] = image_info['id']
+    annotations.append(annotation)
+# ...for each image file
+assert len(annotations) == len(images)
+categories = list(category_name_to_category.values())
+#%% Synthesize sequence information
+print('Found {} camera folders'.format(len(camera_folders)))
+camera_folders = list(camera_folders)
+all_sequences = set()
+# Sort images by time within each folder
+# camera_path = camera_folders[0]
+for i_camera,camera_path in enumerate(camera_folders):
+    images_this_camera = [im for im in images if im['camera_path'] == camera_path]
+    sorted_images_this_camera = sorted(images_this_camera, key = lambda im: im['datetime'])
+    current_sequence_id = None
+    next_sequence_index = 0
+    previous_datetime = None
+    # previous_datetime = sorted_images_this_camera[0]['datetime']
+    # im = sorted_images_this_camera[1]
+    for im in sorted_images_this_camera:
+        if previous_datetime is None:
+            delta = None
+        else:
+            delta = (im['datetime'] - previous_datetime).total_seconds()
+        # Start a new sequence if necessary
+        if delta is None or delta > max_seconds_within_sequence:
+            next_sequence_index = 0
+            current_sequence_id = str(uuid.uuid4())
+            all_sequences.add(current_sequence_id)
+        im['seq_id'] = current_sequence_id
+        im['seq_num_frames'] = None
+        im['frame_num'] = next_sequence_index
+        next_sequence_index = next_sequence_index + 1
+        previous_datetime = im['datetime']
+    # ...for each image in this camera
+# ...for each camera
+print('Created {} sequences from {} images'.format(len(all_sequences),len(images)))
+# Fill in seq_num_frames
+num_frames_per_sequence = {}
+for seq_id in all_sequences:
+    images_this_sequence = [im for im in images if im['seq_id'] == seq_id]
+    num_frames_per_sequence[seq_id] = len(images_this_sequence)
+    for im in images_this_sequence:
+        im['seq_num_frames'] = len(images_this_sequence)
+#%% A little cleanup
+for im in tqdm(images):
+    if 'camera_path' in im:
+        del im['camera_path']
+    if not isinstance(im['datetime'],str):
+        im['datetime'] = str(im['datetime'])
+#%% Write output .json
+data = {}
+data['info'] = info
+data['images'] = images
+data['annotations'] = annotations
+data['categories'] = categories
+json.dump(data, open(output_filename,'w'), indent=1)
+print('Finished writing json to {}'.format(output_filename))
+#%% Validate data
+from megadetector.data_management.databases import integrity_check_json_db
+options = integrity_check_json_db.IntegrityCheckOptions()
+options.baseDir = base_dir
+options.bCheckImageSizes = False
+options.bCheckImageExistence = True
+options.bFindUnusedImages = False
+sorted_categories  = integrity_check_json_db.integrity_check_json_db(output_filename,options)
+#%% Label previews
+from megadetector.visualization import visualize_db
+viz_options = visualize_db.DbVizOptions()
+viz_options.num_to_visualize = None
+viz_options.parallelize_rendering_n_cores = 8
+viz_options.parallelize_rendering = True
+viz_options.trim_to_images_with_bboxes = False
+viz_options.add_search_links = True
+viz_options.sort_by_filename = False
+html_output_file,image_db = visualize_db.visualize_db(output_filename,
+                                                        os.path.join(output_base,'preview'),
+                                                        base_dir,viz_options)
+os.startfile(html_output_file)

megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.11py3-none-any.whl → 5.0.12py3-none-any.whl