PyPI - megadetector - Versions diffs - 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl - Mend

megadetector 5.0.11py3-none-any.whl → 5.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show

megadetector/data_management/importers/noaa_seals_2019.py ADDED Viewed

@@ -0,0 +1,181 @@
+"""
+ noaa_seals_2019.py
+ Prepare the NOAA Arctic Seals 2019 metadata for LILA.
+"""
+#%% Imports and constants
+import os
+import pandas as pd
+from tqdm import tqdm
+from megadetector.utils import url_utils
+from megadetector.visualization import visualization_utils
+# A list of files in the lilablobssc container for this data set
+container_file_list = r'C:\temp\seals\seal_files.txt'
+# The raw detection files provided by NOAA
+detections_fn = r'C:\temp\seals\surv_test_kamera_detections_20210212.csv'
+# A version of the above with filename columns added
+detections_fn_full_paths = detections_fn.replace('.csv','_full_paths.csv')
+base_url = 'https://lilablobssc.blob.core.windows.net/noaa-kotz'
+#%% Read input .csv
+df = pd.read_csv(detections_fn)
+df['rgb_image_path'] = ''
+df['ir_image_path'] = ''
+print('Read {} rows from {}'.format(len(df),detections_fn))
+camera_view_to_path = {}
+camera_view_to_path['C'] = 'CENT'
+camera_view_to_path['L'] = 'LEFT'
+valid_flights = set(['fl04','fl05','fl06','fl07'])
+#%% Read list of files
+with open(container_file_list,'r') as f:
+    all_files = f.readlines()
+all_files = [s.strip() for s in all_files]
+all_files = set(all_files)
+#%% Convert paths to full paths
+missing_ir_files = []
+# i_row = 0; row = df.iloc[i_row]
+for i_row,row in tqdm(df.iterrows(),total=len(df)):
+    assert row['flight'] in valid_flights
+    assert row['camera_view'] in camera_view_to_path
+    assert isinstance(row['rgb_image_name'],str)
+    rgb_image_path = 'Images/{}/{}/{}'.format(row['flight'],camera_view_to_path[row['camera_view']],
+                                     row['rgb_image_name'])
+    assert rgb_image_path in all_files
+    df.loc[i_row,'rgb_image_path'] = rgb_image_path
+    if not isinstance(row['ir_image_name'],str):
+        continue
+    ir_image_path = 'Images/{}/{}/{}'.format(row['flight'],camera_view_to_path[row['camera_view']],
+                                     row['ir_image_name'])
+    # assert ir_image_path in all_files
+    if ir_image_path not in all_files:
+        missing_ir_files.append(ir_image_path)
+    df.loc[i_row,'ir_image_path'] = ir_image_path
+# ...for each row
+missing_ir_files = list(set(missing_ir_files))
+missing_ir_files.sort()
+print('{} missing IR files (of {})'.format(len(missing_ir_files),len(df)))
+for s in missing_ir_files:
+    print(s)
+#%% Write results
+df.to_csv(detections_fn_full_paths,index=False)
+#%% Load output file, just to be sure
+df = pd.read_csv(detections_fn_full_paths)
+#%% Render annotations on an image
+import random; i_image = random.randint(0,len(df))
+# i_image = 2004
+row = df.iloc[i_image]
+rgb_image_path = row['rgb_image_path']
+rgb_image_url = base_url + '/' + rgb_image_path
+ir_image_path = row['ir_image_path']
+ir_image_url = base_url + '/' + ir_image_path
+#%% Download the image
+rgb_image_fn = url_utils.download_url(rgb_image_url,progress_updater=True)
+ir_image_fn = url_utils.download_url(ir_image_url,progress_updater=True)
+#%% Find all the rows (detections) associated with this image
+# as l,r,t,b
+rgb_boxes = []
+ir_boxes = []
+for i_row,row in df.iterrows():
+    if row['rgb_image_path'] == rgb_image_path:
+        box_l = row['rgb_left']
+        box_r = row['rgb_right']
+        box_t = row['rgb_top']
+        box_b = row['rgb_bottom']
+        rgb_boxes.append([box_l,box_r,box_t,box_b])
+    if row['ir_image_path'] == ir_image_path:
+        box_l = row['ir_left']
+        box_r = row['ir_right']
+        box_t = row['ir_top']
+        box_b = row['ir_bottom']
+        ir_boxes.append([box_l,box_r,box_t,box_b])
+print('Found {} RGB, {} IR annotations for this image'.format(len(rgb_boxes),
+                                                              len(ir_boxes)))
+#%% Render the detections on the image(s)
+img_rgb = visualization_utils.load_image(rgb_image_fn)
+img_ir = visualization_utils.load_image(ir_image_fn)
+for b in rgb_boxes:
+    # In pixel coordinates
+    box_left = b[0]; box_right = b[1]; box_top = b[2]; box_bottom = b[3]
+    assert box_top > box_bottom; assert box_right > box_left
+    ymin = box_bottom; ymax = box_top; xmin = box_left; xmax = box_right
+    visualization_utils.draw_bounding_box_on_image(img_rgb,ymin,xmin,ymax,xmax,
+                                                   use_normalized_coordinates=False,
+                                                   thickness=3)
+for b in ir_boxes:
+    # In pixel coordinates
+    box_left = b[0]; box_right = b[1]; box_top = b[2]; box_bottom = b[3]
+    assert box_top > box_bottom; assert box_right > box_left
+    ymin = box_bottom; ymax = box_top; xmin = box_left; xmax = box_right
+    visualization_utils.draw_bounding_box_on_image(img_ir,ymin,xmin,ymax,xmax,
+                                                   use_normalized_coordinates=False,
+                                                   thickness=3)
+# visualization_utils.show_images_in_a_row([img_rgb,img_ir])
+#%% Save images
+img_rgb.save(r'c:\temp\seals_rgb.png')
+img_ir.save(r'c:\temp\seals_ir.png')
+#%% Clean up
+import shutil
+tmp_dir = os.path.dirname(rgb_image_fn)
+shutil.rmtree(tmp_dir)

megadetector/data_management/importers/pc_to_json.py ADDED Viewed

@@ -0,0 +1,365 @@
+"""
+ pc_to_json.py
+ Convert a particular collection of .csv files from Parks Canada to CCT format.
+"""
+#%% Constants and environment
+import pandas as pd
+import uuid
+import json
+import time
+import numpy as np
+from tqdm import tqdm
+import humanfriendly
+import os
+import PIL
+from megadetector.data_management.databases import integrity_check_json_db
+from megadetector.data_management.cct_json_utils import IndexedJsonDb
+from megadetector.data_management import cct_json_to_filename_json
+from megadetector.visualization import visualize_db
+from megadetector.utils import path_utils
+input_base = r"g:\20190715"
+output_file = r"D:\wildlife_data\parks_canada\pc_20190715.json"
+preview_base = r"D:\wildlife_data\parks_canada\preview"
+filename_replacements = {}
+category_mappings = {'':'unlabeled'}
+csv_prefix = 'ImageData_Microsoft___'
+expected_columns = 'Location,DateImage,TimeImage,Species,Total,Horses,DogsOnLeash,DogsOffLeash,AdultFemale,AdultMale,AdultUnknown,Subadult,YLY,YOY,ImageName'.split(',')
+columns_to_copy = {'Total':'count','Horses':'horses','DogsOnLeash':'dogsonleash','DogsOffLeash':'dogsoffleash',
+                   'AdultFemale':'adultfemale','AdultMale':'adultmale','AdultUnknown':'adultunknown',
+                   'Subadult':'subadult','YLY':'yearling','YOY':'youngofyear'}
+retrieve_image_sizes = False
+max_num_csvs = -1
+db_sampling_scheme = 'preview' # 'labeled','all'
+n_unlabeled_to_sample = -1
+cap_unlabeled_to_labeled = True
+#%% Read and concatenate source data
+# List files
+input_files = os.listdir(input_base)
+# List of dataframes, one per .csv file; we'll concatenate later
+all_input_metadata = []
+# i_file = 87; fn = input_files[i_file]
+for i_file,fn in enumerate(input_files):
+    if max_num_csvs > 0 and len(all_input_metadata) >= max_num_csvs:
+        break
+    if not fn.endswith('.csv'):
+        continue
+    if not fn.startswith(csv_prefix):
+        continue
+    dirname = fn.replace(csv_prefix,'').replace('.csv','')
+    dirfullpath = os.path.join(input_base,dirname)
+    if not os.path.isdir(dirfullpath):
+        dirname = fn.replace(csv_prefix,'').replace('.csv','').replace('  ',' ')
+        dirfullpath = os.path.join(input_base,dirname)
+    assert(os.path.isdir(dirfullpath))
+    metadata_fullpath = os.path.join(input_base,fn)
+    print('Reading {}'.format(metadata_fullpath))
+    df = pd.read_csv(metadata_fullpath)
+    assert list(df.columns) == expected_columns
+    df['DirName'] = dirname
+    all_input_metadata.append(df)
+# Concatenate into a giant data frame
+input_metadata = pd.concat(all_input_metadata)
+print('Read {} rows total'.format(len(input_metadata)))
+#%% List files
+print('Listing images...')
+image_full_paths = path_utils.find_images(input_base,bRecursive=True)
+print('Finished listing {} images'.format(len(image_full_paths)))
+image_relative_paths = []
+for s in image_full_paths:
+    image_relative_paths.append(os.path.relpath(s,input_base))
+image_relative_paths = set(image_relative_paths)
+image_relative_paths_lower = set()
+for s in image_relative_paths:
+    image_relative_paths_lower.add(s.lower())
+#%% Main loop over labels (prep)
+start_time = time.time()
+relative_path_to_image = {}
+images = []
+annotations = []
+category_name_to_category = {}
+missing_files = []
+# Force the empty category to be ID 0
+empty_category = {}
+empty_category['name'] = 'empty'
+empty_category['id'] = 0
+category_name_to_category['empty'] = empty_category
+next_category_id = 1
+labeled_images = []
+unlabeled_images = []
+#%% Main loop over labels (loop)
+# iRow = 0; row = input_metadata.iloc[iRow]
+for iRow,row in tqdm(input_metadata.iterrows(),total=len(input_metadata)):
+    # ImageID,FileName,FilePath,SpeciesID,CommonName
+    image_id = str(uuid.uuid1())
+    relative_path = os.path.normpath(row['ImageName'])
+    if relative_path not in image_relative_paths:
+        if relative_path.lower() in image_relative_paths_lower:
+            print('Warning: lower-case version of {} in path list'.format(relative_path))
+        else:
+            missing_files.append(relative_path)
+            continue
+    full_path = os.path.join(input_base,relative_path)
+    # assert os.path.isfile(full_path)
+    if relative_path in relative_path_to_image:
+        im = relative_path_to_image[relative_path]
+    else:
+        im = {}
+        im['id'] = image_id
+        im['file_name'] = relative_path
+        im['seq_id'] = '-1'
+        im['location'] = row['Location']
+        im['datetime'] = row['DateImage'] + ' ' + row['TimeImage']
+        images.append(im)
+        relative_path_to_image[relative_path] = im
+        if retrieve_image_sizes:
+            # Retrieve image width and height
+            pil_im = PIL.Image.open(full_path)
+            width, height = pil_im.size
+            im['width'] = width
+            im['height'] = height
+    species = row['Species']
+    if isinstance(species,float):
+        assert np.isnan(species)
+        species = 'unlabeled'
+    category_name = species.lower().strip()
+    if category_name in category_mappings:
+        category_name = category_mappings[category_name]
+    if category_name not in category_name_to_category:
+        category = {}
+        category['name'] = category_name
+        category['id'] = next_category_id
+        next_category_id += 1
+        category_name_to_category[category_name] = category
+    else:
+        category = category_name_to_category[category_name]
+        assert category['name'] == category_name
+    category_id = category['id']
+    if category_name == 'unlabeled':
+        unlabeled_images.append(im)
+    else:
+        labeled_images.append(im)
+    # Create an annotation
+    ann = {}
+    # The Internet tells me this guarantees uniqueness to a reasonable extent, even
+    # beyond the sheer improbability of collisions.
+    ann['id'] = str(uuid.uuid1())
+    ann['image_id'] = im['id']
+    ann['category_id'] = category_id
+    for col in columns_to_copy:
+        ann[columns_to_copy[col]] = row[col]
+    annotations.append(ann)
+# ...for each image
+categories = list(category_name_to_category.values())
+elapsed = time.time() - start_time
+print('Finished verifying file loop in {}, {} matched images, {} missing images, {} unlabeled images'.format(
+        humanfriendly.format_timespan(elapsed), len(images), len(missing_files), len(unlabeled_images)))
+#%% See what's up with missing files
+dirnames = set()
+# s = list(image_relative_paths)[0]
+for s in image_relative_paths:
+    image_dir = os.path.dirname(s)
+    dirnames.add(image_dir)
+missing_images_with_missing_dirs = []
+missing_images_with_non_missing_dirs = []
+missing_dirs = set()
+# s = missing_files[0]
+for s in missing_files:
+    assert s not in image_relative_paths
+    dirname = os.path.dirname(s)
+    if dirname not in dirnames:
+        missing_images_with_missing_dirs.append(s)
+        missing_dirs.add(dirname)
+    else:
+        missing_images_with_non_missing_dirs.append(s)
+print('Of {} missing files, {} are due to {} missing folders'.format(
+        len(missing_files),len(missing_images_with_missing_dirs),len(missing_dirs)))
+#%% Check for images that aren't included in the metadata file
+unmatched_files = []
+for i_image,relative_path in tqdm(enumerate(image_relative_paths),total=len(image_relative_paths)):
+    if relative_path not in relative_path_to_image:
+        unmatched_files.append(relative_path)
+print('Finished checking {} images to make sure they\'re in the metadata, found {} mismatches'.format(
+        len(image_relative_paths),len(unmatched_files)))
+#%% Sample the database
+images_all = images
+annotations_all = annotations
+#%%
+if db_sampling_scheme == 'all':
+    pass
+elif db_sampling_scheme == 'labeled' or db_sampling_scheme == 'preview':
+    json_data = {}
+    json_data['images'] = images
+    json_data['annotations'] = annotations
+    json_data['categories'] = categories
+    indexed_db = IndexedJsonDb(json_data)
+    # Collect the images we want
+    sampled_images = []
+    for im in images:
+        classes = indexed_db.get_classes_for_image(im)
+        if 'unlabeled' in classes and len(classes) == 1:
+            pass
+        else:
+            sampled_images.append(im)
+    if db_sampling_scheme == 'preview':
+        n_sample = n_unlabeled_to_sample
+        if n_sample == -1:
+            n_sample = len(labeled_images)
+        if n_sample > len(labeled_images) and cap_unlabeled_to_labeled:
+            n_sample = len(labeled_images)
+        if n_sample > len(unlabeled_images):
+            n_sample = len(unlabeled_images)
+        print('Sampling {} of {} unlabeled images'.format(n_sample,len(unlabeled_images)))
+        from random import sample
+        sampled_images.extend(sample(unlabeled_images,n_sample))
+    sampled_annotations = []
+    for im in sampled_images:
+        sampled_annotations.extend(indexed_db.get_annotations_for_image(im))
+    print('Sampling {} of {} images, {} of {} annotations'.format(
+            len(sampled_images),len(images),len(sampled_annotations),len(annotations)))
+    images = sampled_images
+    annotations = sampled_annotations
+else:
+    raise ValueError('Unrecognized DB sampling scheme {}'.format(db_sampling_scheme))
+#%% Create info struct
+info = {}
+info['year'] = 2019
+info['version'] = 1
+info['description'] = 'COCO style database'
+info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
+info['contributor'] = 'Parks Canada'
+#%% Write output
+json_data = {}
+json_data['images'] = images
+json_data['annotations'] = annotations
+json_data['categories'] = categories
+json_data['info'] = info
+json.dump(json_data, open(output_file,'w'), indent=4)
+print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
+        len(images),len(annotations),len(categories)))
+#%% Validate the database's integrity
+json_data = json.load(open(output_file))
+options = integrity_check_json_db.IntegrityCheckOptions()
+sortedCategories,data = integrity_check_json_db.integrity_check_json_db(json_data, options)
+#%% Render a bunch of images to make sure the labels got carried along correctly
+output_dir = preview_base
+options = visualize_db.DbVizOptions()
+options.num_to_visualize = 100
+options.sort_by_filename = False
+# options.classes_to_exclude = ['unlabeled']
+options.classes_to_exclude = None
+htmlOutputFile,_ = visualize_db.visualize_db(json_data,output_dir,input_base,options)
+os.startfile(htmlOutputFile)
+#%% Write out a list of files to annotate
+_,file_list = cct_json_to_filename_json.convertJsonToStringList(output_file,prepend="20190715/")
+os.startfile(file_list)

megadetector/data_management/importers/plot_wni_giraffes.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""
+ plot_wni_giraffes.py
+ Plot keypoints on a random sample of images from the wni-giraffes data set.
+"""
+#%% Constants and imports
+import os
+import json
+import random
+from PIL import Image
+from PIL import ImageDraw
+from tqdm import tqdm
+input_file = r"G:\data_staging\wni-out\wni_giraffes_train.json"
+image_base = r"G:\data_staging\wni-out\images"
+output_base = r"G:\data_staging\wni-out\test-plots"
+os.makedirs(output_base,exist_ok=True)
+tool_colors = ['red','green','blue','magenta']
+use_fancy_ellipses = True
+draw_individual_samples = False
+median_radius = 20
+median_linewidth = 8
+sample_radius = 10
+n_images_to_plot = 100
+#%% Load and select data
+with open(input_file,'r') as f:
+    d = json.load(f)
+annotations = d['annotations']
+print(d['info'])
+short_tool_names = list(d['info']['tool_names'].keys())
+annotations_to_plot = random.sample(annotations,n_images_to_plot)
+#%% Support functions
+# https://stackoverflow.com/questions/32504246/draw-ellipse-in-python-pil-with-line-thickness
+def draw_fancy_ellipse(image, x, y, radius, width=1, outline='white', antialias=4):
+    bounds = (x-radius,y-radius,x+radius,y+radius)
+    # Use a single channel image (mode='L') as mask.
+    # The size of the mask can be increased relative to the input image
+    # to get smoother looking results.
+    mask = Image.new(
+        size=[int(dim * antialias) for dim in image.size],
+        mode='L', color='black')
+    draw = ImageDraw.Draw(mask)
+    # draw outer shape in white (color) and inner shape in black (transparent)
+    for offset, fill in (width/-2.0, 'white'), (width/2.0, 'black'):
+        left, top = [(value + offset) * antialias for value in bounds[:2]]
+        right, bottom = [(value - offset) * antialias for value in bounds[2:]]
+        draw.ellipse([left, top, right, bottom], fill=fill)
+    # downsample the mask using PIL.Image.LANCZOS
+    # (a high-quality downsampling filter).
+    mask = mask.resize(image.size, Image.LANCZOS)
+    # paste outline color to input image through the mask
+    image.paste(outline, mask=mask)
+def draw_ellipse(image, x, y, radius, linewidth, color_index, use_imagedraw=False):
+    if use_imagedraw:
+        draw_fancy_ellipse(image, x, y, radius=radius, width=linewidth, outline=tool_colors[color_index])
+    else:
+        draw = ImageDraw.Draw(image)
+        bounds = (x-radius,y-radius,x+radius,y+radius)
+        draw.ellipse(bounds, fill=tool_colors[color_index])
+#%% Plot some images
+# ann = annotations_to_plot[0]
+for ann in tqdm(annotations_to_plot):
+    input_path = os.path.join(image_base,ann['filename'])
+    output_path = os.path.join(output_base,ann['filename'].replace('/','_'))
+    im = None
+    im = Image.open(input_path)
+    # i_tool = 0; tool_name = short_tool_names[i_tool]
+    for i_tool,tool_name in enumerate(short_tool_names):
+        tool_keypoints = ann['keypoints'][tool_name]
+        # Don't plot tools that don't have a consensus annotation
+        if tool_keypoints['median_x'] is None:
+            continue
+        median_x = tool_keypoints['median_x']
+        median_y = tool_keypoints['median_y']
+        draw_ellipse(im, median_x, median_y, median_radius, median_linewidth, color_index=i_tool,
+                     use_imagedraw=use_fancy_ellipses)
+        if draw_individual_samples:
+            for i_sample in range(0,len(tool_keypoints['x'])):
+                x = tool_keypoints['x'][i_sample]
+                y = tool_keypoints['y'][i_sample]
+                draw_ellipse(im, x, y, sample_radius, None, color_index=i_tool,
+                         use_imagedraw=False)
+    # ...for each tool
+    im.save(output_path)
+# ...for each annotation

megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.11py3-none-any.whl → 5.0.12py3-none-any.whl