PyPI - megadetector - Versions diffs - 10.0.10__py3-none-any.whl → 10.0.11__py3-none-any.whl - Mend

megadetector 10.0.10py3-none-any.whl → 10.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (80) hide show

megadetector/data_management/animl_to_md.py CHANGED Viewed

@@ -67,8 +67,11 @@ def animl_results_to_md_results(input_file,output_file=None):
             im['file'] = row['file']
             filename_to_results[im['file']] = im
-        assert isinstance(row['category'],int),'Invalid category identifier in row {}'.format(im['file'])
-        detection_category_id = str(row['category'])
+        # Pandas often reads integer columns as float64, so check integer-ness
+        # rather than just isinstance(..., int)
+        assert pd.notna(row['category']) and float(row['category']).is_integer(), \
+            'Invalid category identifier in row {} (file: {})'.format(i_row, im['file'])
+        detection_category_id = str(int(row['category']))
         assert detection_category_id in detection_category_id_to_name,\
             'Unrecognized detection category ID {}'.format(detection_category_id)

megadetector/data_management/cct_json_utils.py CHANGED Viewed

@@ -231,7 +231,6 @@ class IndexedJsonDb:
         # Image ID --> annotations
         # Each image can potentially multiple annotations, hence using lists
-        self.image_id_to_annotations = {}
         self.image_id_to_annotations = defaultdict(list)
         for ann in self.db['annotations']:
             self.image_id_to_annotations[ann['image_id']].append(ann)
@@ -355,7 +354,9 @@ def parse_datetimes_from_cct_image_list(images,
     assert isinstance(images,list)
-    for im in images:
+    print('Parsing datetimes from CCT image list...')
+    for im in tqdm(images):
         if 'datetime' not in im:
             continue
@@ -445,6 +446,7 @@ def create_sequences(image_info,options=None):
         to_return = image_info
     elif isinstance(image_info,str):
+        print('Reading image information from {}'.format(image_info))
         with open(image_info,'r') as f:
             d = json.load(f)
             to_return = d

megadetector/data_management/cct_to_md.py CHANGED Viewed

@@ -64,7 +64,7 @@ def cct_to_md(input_filename,output_filename=None):
         d = json.load(f)
     for s in ['annotations','images','categories']:
-        assert s in d.keys(), 'Cannot find category {} in input file, is this a CCT file?'.format(s)
+        assert s in d.keys(), 'Cannot find key {} in input file, is this a CCT file?'.format(s)
     ## Prepare metadata
@@ -149,10 +149,11 @@ def cct_to_md(input_filename,output_filename=None):
     results['images'] = images_out
-    with open(output_filename,'w') as f:
-        json.dump(results, f, indent=1)
+    if output_filename is not None:
+        with open(output_filename,'w') as f:
+            json.dump(results, f, indent=1)
-    return output_filename
+    return results
 # ...cct_to_md()

megadetector/data_management/cct_to_wi.py CHANGED Viewed

@@ -263,8 +263,10 @@ def main(): # noqa
         else:
             row['number_of_objects'] = 1
+        assert isinstance(im['datetime'],str)
         row['uncertainty'] = None
-        row['timestamp'] = im['datetime']; assert isinstance(im['datetime'],str)
+        row['timestamp'] = im['datetime']
         row['highlighted'] = 0
         row['age'] = None
         row['sex'] = None
@@ -276,6 +278,8 @@ def main(): # noqa
         assert len(row) == len(images_fields)
         rows.append(row)
+    # ...for each image
     df = pd.DataFrame(rows)
     df.to_csv(os.path.join(output_base,images_file_name),index=False)

megadetector/data_management/coco_to_yolo.py CHANGED Viewed

@@ -81,7 +81,7 @@ def write_yolo_dataset_file(yolo_dataset_file,
         if val_folder_relative is not None:
             f.write('val: {}\n'.format(val_folder_relative))
         if test_folder_relative is not None:
-            f.write('val: {}\n'.format(test_folder_relative))
+            f.write('test: {}\n'.format(test_folder_relative))
         f.write('\n')
@@ -454,13 +454,14 @@ def coco_to_yolo(input_image_folder,
                 # Category IDs should range from 0..N-1
                 assert i_class in yolo_id_to_name
                 f.write(yolo_id_to_name[i_class] + '\n')
+    else:
+        class_list_filename = None
     if image_id_to_output_image_json_file is not None:
         print('Writing image ID mapping to {}'.format(image_id_to_output_image_json_file))
         with open(image_id_to_output_image_json_file,'w') as f:
             json.dump(image_id_to_output_image_name,f,indent=1)
     if (output_folder == input_image_folder) and (not create_image_and_label_folders):
         print('Creating annotation files (not copying images, input and output folder are the same)')
     else:

megadetector/data_management/databases/combine_coco_camera_traps_files.py CHANGED Viewed

@@ -130,7 +130,7 @@ def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
         for im in input_dict['images']:
             if 'seq_id' in im:
-                im['seq_id'] = index_string + im['seq_id']
+                im['seq_id'] = index_string + str(im['seq_id'])
             if 'location' in im:
                 im['location'] = index_string + im['location']
@@ -143,7 +143,7 @@ def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
                     print('Redundant image {}'.format(im_file))
             # Create a unique ID
-            im['id'] = index_string + im['id']
+            im['id'] = index_string + str(im['id'])
             filename_to_image[im_file] = im
         # ...for each image
@@ -152,8 +152,8 @@ def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
         # Same for annotations
         for ann in input_dict['annotations']:
-            ann['image_id'] = index_string + ann['image_id']
-            ann['id'] = index_string + ann['id']
+            ann['image_id'] = index_string + str(ann['image_id'])
+            ann['id'] = index_string + str(ann['id'])
             assert ann['category_id'] in old_cat_id_to_new_cat_id
             ann['category_id'] = old_cat_id_to_new_cat_id[ann['category_id']]

megadetector/data_management/databases/integrity_check_json_db.py CHANGED Viewed

@@ -111,11 +111,11 @@ def _check_image_existence_and_size(image,options=None):
         # width, height = Image.open(file_path).size
         try:
             pil_im = open_image(file_path)
+            width,height = pil_im.size
+            pil_im.close()
         except Exception as e:
             s = 'Error opening {}: {}'.format(file_path,str(e))
             return s
-        width,height = pil_im.size
         if (not (width == image['width'] and height == image['height'])):
             s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
                     image['id'], file_path, image['width'], image['height'], width, height)

megadetector/data_management/databases/subset_json_db.py CHANGED Viewed

@@ -12,7 +12,6 @@ subset_json_detector_output.py.
 #%% Constants and imports
-import os
 import sys
 import json
 import argparse
@@ -151,8 +150,6 @@ def subset_json_db(input_json,
     if output_json is not None:
         if verbose:
             print('Writing output .json to {}'.format(output_json))
-        output_dir = os.path.dirname(output_json)
-        os.makedirs(output_dir,exist_ok=True)
         ct_utils.write_json(output_json, output_data)
     if verbose:

megadetector/data_management/generate_crops_from_cct.py CHANGED Viewed

@@ -72,7 +72,7 @@ def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=
     # im = d['images'][0]
     for im in tqdm(d['images']):
-        input_image_fn = os.path.join(os.path.join(image_dir,im['file_name']))
+        input_image_fn = os.path.join(image_dir,im['file_name'])
         assert os.path.isfile(input_image_fn), 'Could not find image {}'.format(input_image_fn)
         if im['id'] not in image_id_to_boxes:
@@ -102,15 +102,17 @@ def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=
             xmin = max(xmin,0)
             ymin = max(ymin,0)
-            xmax = min(xmax,img.width-1)
-            ymax = min(ymax,img.height-1)
+            # PIL's crop() method uses exclusive upper bounds for the right and lower
+            # edges, hence "img.width" rather than "img.width-1" here.
+            xmax = min(xmax,img.width)
+            ymax = min(ymax,img.height)
             crop = img.crop(box=[xmin, ymin, xmax, ymax])
             output_fn = os.path.splitext(im['file_name'])[0].replace('\\','/')
             if flat_output:
                 output_fn = output_fn.replace('/','_')
-            output_fn = output_fn + '_crop' + str(i_ann).zfill(3) + '_id_' + ann['id']
+            output_fn = output_fn + '_crop' + str(i_ann).zfill(3) + '_id_' + str(ann['id'])
             output_fn = output_fn + '.jpg'
             output_full_path = os.path.join(output_dir,output_fn)

megadetector/data_management/get_image_sizes.py CHANGED Viewed

@@ -75,7 +75,7 @@ def get_image_sizes(filenames,image_prefix=None,output_file=None,
         image_prefix (str, optional): optional prefix to add to images to get to full paths;
             useful when [filenames] contains relative files, in which case [image_prefix] is the
             base folder for the source images.
-        output_file (str, optional): a .json file to write the imgae sizes
+        output_file (str, optional): a .json file to write the image sizes
         n_workers (int, optional): number of parallel workers to use, set to <=1 to
             disable parallelization
         use_threads (bool, optional): whether to use threads (True) or processes (False)
@@ -88,8 +88,10 @@ def get_image_sizes(filenames,image_prefix=None,output_file=None,
     """
     if output_file is not None:
-        assert os.path.isdir(os.path.dirname(output_file)), \
-            'Illegal output file {}, parent folder does not exist'.format(output_file)
+        output_dir = os.path.dirname(output_file)
+        if len(output_dir) > 0:
+            assert os.path.isdir(output_dir), \
+                'Illegal output file {}, parent folder does not exist'.format(output_file)
     if isinstance(filenames,str) and os.path.isfile(filenames):
         with open(filenames,'r') as f:
@@ -130,38 +132,6 @@ def get_image_sizes(filenames,image_prefix=None,output_file=None,
     return all_results
-#%% Interactive driver
-if False:
-    pass
-    #%%
-    # List images in a test folder
-    base_dir = r'c:\temp\test_images'
-    image_list_file = os.path.join(base_dir,'images.json')
-    relative_image_list_file = os.path.join(base_dir,'images_relative.json')
-    image_size_file = os.path.join(base_dir,'image_sizes.json')
-    from megadetector.utils import path_utils
-    image_names = path_utils.find_images(base_dir,recursive=True)
-    with open(image_list_file,'w') as f:
-        json.dump(image_names,f,indent=1)
-    relative_image_names = []
-    for s in image_names:
-        relative_image_names.append(os.path.relpath(s,base_dir))
-    with open(relative_image_list_file,'w') as f:
-        json.dump(relative_image_names,f,indent=1)
-    #%%
-    get_image_sizes(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
 #%% Command-line driver
 def main(): # noqa

megadetector/data_management/labelme_to_coco.py CHANGED Viewed

@@ -292,7 +292,8 @@ def labelme_to_coco(input_folder,
     # Enumerate images
     print('Enumerating images in {}'.format(input_folder))
-    image_filenames_relative = path_utils.find_images(input_folder,recursive=recursive,
+    image_filenames_relative = path_utils.find_images(input_folder,
+                                                      recursive=recursive,
                                                       return_relative_paths=True,
                                                       convert_slashes=True)
@@ -352,9 +353,10 @@ def labelme_to_coco(input_folder,
                     allow_new_categories=False
                     ),image_filenames_relative), total=len(image_filenames_relative)))
         finally:
-            pool.close()
-            pool.join()
-            print("Pool closed and joined for labelme file processing")
+            if pool is not None:
+                pool.close()
+                pool.join()
+                print("Pool closed and joined for labelme file processing")
     images = []
     annotations = []
@@ -423,7 +425,9 @@ def find_empty_labelme_files(input_folder,recursive=True):
             - images_with_non_empty_json_files: a list of images in [input_folder] associated with .json
               files that have at least one box
     """
-    image_filenames_relative = path_utils.find_images(input_folder,recursive=True,
+    image_filenames_relative = path_utils.find_images(input_folder,
+                                                      recursive=recursive,
                                                       return_relative_paths=True)
     images_with_empty_json_files = []
@@ -500,7 +504,7 @@ if False:
     options.bFindUnusedImages = True
     options.bRequireLocation = False
-    sortec_categories, _, error_info = integrity_check_json_db.integrity_check_json_db(output_file,options)
+    sorted_categories, _, error_info = integrity_check_json_db.integrity_check_json_db(output_file,options)
     #%% Preview

megadetector/data_management/labelme_to_yolo.py CHANGED Viewed

@@ -107,10 +107,21 @@ def labelme_file_to_yolo_file(labelme_file,
         minx_abs = max(minx_abs,0.0)
         miny_abs = max(miny_abs,0.0)
-        minx_rel = minx_abs / (im_width-1)
-        maxx_rel = maxx_abs / (im_width-1)
-        miny_rel = miny_abs / (im_height-1)
-        maxy_rel = maxy_abs / (im_height-1)
+        # Handle degenerate cases where image is one pixel wide
+        if im_width == 1:
+            minx_rel = 0.0
+            maxx_rel = 0.0
+        else:
+            minx_rel = minx_abs / (im_width-1)
+            maxx_rel = maxx_abs / (im_width-1)
+        # Handle degenerate cases where image is one pixel tall
+        if im_height == 1:
+            miny_rel = 0.0
+            maxy_rel = 0.0
+        else:
+            miny_rel = miny_abs / (im_height-1)
+            maxy_rel = maxy_abs / (im_height-1)
         assert maxx_rel >= minx_rel
         assert maxy_rel >= miny_rel
@@ -252,9 +263,10 @@ def labelme_folder_to_yolo(labelme_folder,
                         valid_labelme_files_abs),
                         total=len(valid_labelme_files_abs)))
         finally:
-            pool.close()
-            pool.join()
-            print('Pool closed and joined for labelme conversion to YOLO')
+            if pool is not None:
+                pool.close()
+                pool.join()
+                print('Pool closed and joined for labelme conversion to YOLO')
     assert len(valid_labelme_files_relative) == len(image_results)
@@ -270,27 +282,6 @@ def labelme_folder_to_yolo(labelme_folder,
 # ...def labelme_folder_to_yolo(...)
-#%% Interactive driver
-if False:
-    pass
-    #%%
-    labelme_file = os.path.expanduser('~/tmp/labels/x.json')
-    required_token = 'saved_by_labelme'
-    category_name_to_category_id = {'animal':0}
-    labelme_folder = os.path.expanduser('~/tmp/labels')
-    #%%
-    category_name_to_category_id = \
-        labelme_folder_to_yolo(labelme_folder,
-                               category_name_to_category_id=category_name_to_category_id,
-                               required_token=required_token,
-                               overwrite_behavior='overwrite')
 #%% Command-line driver
 def main():

megadetector/data_management/lila/create_lila_test_set.py CHANGED Viewed

@@ -16,6 +16,7 @@ import random
 from megadetector.data_management.lila.lila_common import \
     read_lila_metadata, read_metadata_file_for_dataset
 from megadetector.utils.url_utils import parallel_download_urls
+from megadetector.utils.path_utils import open_file
 n_empty_images_per_dataset = 1
 n_non_empty_images_per_dataset = 1
@@ -50,6 +51,8 @@ for ds_name in metadata_table.keys():
 # Takes ~60 seconds
+empty_category_names = ['empty','blank']
 # ds_name = (list(metadata_table.keys()))[0]
 for ds_name in metadata_table.keys():
@@ -65,10 +68,22 @@ for ds_name in metadata_table.keys():
     ## Find empty images
-    if 'empty' not in category_name_to_id:
+    empty_category_present = False
+    for category_name in category_name_to_id:
+        if category_name in empty_category_names:
+            empty_category_present = True
+            break
+    if not empty_category_present:
         empty_annotations_to_download = []
     else:
-        empty_category_id = category_name_to_id['empty']
+        empty_category_id = None
+        for category_name in empty_category_names:
+            if category_name in category_name_to_id:
+                if empty_category_id is not None:
+                    print('Warning: multiple empty categories in dataset {}'.format(ds_name))
+                else:
+                    empty_category_id = category_name_to_id[category_name]
+        assert empty_category_id is not None
         empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] == empty_category_id]
         try:
             empty_annotations_to_download = random.sample(empty_annotations,n_empty_images_per_dataset)
@@ -165,3 +180,8 @@ download_results = parallel_download_urls(url_to_target_file,
 # r = download_results[0]
 for r in download_results:
    assert r['status'] in ('skipped','success')
+#%% Open the test test
+open_file(output_dir)

megadetector/data_management/lila/generate_lila_per_image_labels.py CHANGED Viewed

@@ -21,7 +21,7 @@ import os
 import json
 import pandas as pd
 import numpy as np
-import dateparser
+import dateparser # type: ignore
 import csv
 from collections import defaultdict
@@ -148,7 +148,6 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
             data = json.load(f)
         categories = data['categories']
-        category_ids = [c['id'] for c in categories]
         for c in categories:
             category_id_to_name = {c['id']:c['name'] for c in categories}
@@ -355,7 +354,7 @@ print('\nProcessed {} datasets'.format(len(metadata_table)))
 #%% Read the .csv back
-df = pd.read_csv(output_file)
+df = pd.read_csv(output_file, low_memory=False)
 print('Read {} rows from {}'.format(len(df),output_file))
@@ -426,6 +425,8 @@ os.makedirs(preview_folder,exist_ok=True)
 #%% Choose images to download
+# Takes ~60 seconds
 np.random.seed(0)
 images_to_download = []
@@ -533,7 +534,7 @@ zipped_output_file = zip_file(output_file,verbose=True,overwrite=True)
 print('Zipped {} to {}'.format(output_file,zipped_output_file))
-#%% Convert to .json
+#%% Experimental: convert to .json
 """
 The .csv file "output_file" (already loaded into the variable "df" at this point) has the following columns:
@@ -733,7 +734,8 @@ with open(output_file, 'r', encoding='utf-8') as csvfile:
         common_name = _clearnan(row['common_name'])
-        frame_num = int(row['frame_num'])
+        # Convert to float first in case this appears in the .csv file as, e.g. "3.0"
+        frame_num = int(float(row['frame_num']))
         # Image data
         image_entry = {

megadetector/data_management/lila/lila_common.py CHANGED Viewed

@@ -65,7 +65,7 @@ def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
     wi_taxonomy_csv_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_csv_filename)
-    if os.path.exists(wi_taxonomy_csv_path):
+    if os.path.exists(wi_taxonomy_csv_path) and (not force_download):
         df = pd.read_csv(wi_taxonomy_csv_path)
     else:
         wi_taxonomy_json_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_json_filename)
@@ -114,7 +114,7 @@ def read_lila_taxonomy_mapping(metadata_dir, force_download=False):
     download_url(lila_taxonomy_mapping_url, taxonomy_filename,
                  force_download=force_download)
-    df = pd.read_csv(lila_taxonomy_mapping_url)
+    df = pd.read_csv(taxonomy_filename)
     return df

megadetector/data_management/lila/test_lila_metadata_urls.py CHANGED Viewed

@@ -162,4 +162,3 @@ for i_url,url in enumerate(urls_to_test):
             status_codes[i_url],url,url_to_source[url]))
 print('Tested {} URLs'.format(len(urls_to_test)))

megadetector/data_management/ocr_tools.py CHANGED Viewed

@@ -271,11 +271,6 @@ def crop_to_solid_region(rough_crop,crop_location,options=None):
     w = max_x-min_x
     h = max_y-min_y
-    x = min_x
-    y = min_y
-    w = max_x-min_x
-    h = max_y-min_y
     # Crop the image
     crop_np = rough_crop_np[y:y+h,x:x+w]
@@ -650,9 +645,10 @@ def get_datetimes_for_folder(folder_name,output_file=None,n_to_sample=-1,options
                 partial(try_get_datetime_from_image,options=options),image_file_names),
                 total=len(image_file_names)))
         finally:
-            pool.close()
-            pool.join()
-            print("Pool closed and joined for datetime extraction")
+            if pool is not None:
+                pool.close()
+                pool.join()
+                print("Pool closed and joined for datetime extraction")
     filename_to_results = {}
@@ -728,8 +724,8 @@ if False:
         if 'text_results' not in results:
             raise Exception('no results available for {} ({})'.format(i_fn,fn))
-            print('Skipping {}, no results'.format(i_fn))
-            continue
+            # print('Skipping {}, no results'.format(i_fn))
+            # continue
         s = ' '.join([x[0] for x in results['text_results']])

megadetector 10.0.10__py3-none-any.whl → 10.0.11__py3-none-any.whl

Potentially problematic release.

megadetector 10.0.10py3-none-any.whl → 10.0.11py3-none-any.whl