PyPI - megadetector - Versions diffs - 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.8py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +65 -65
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
api/batch_processing/postprocessing/compare_batch_results.py +113 -43
api/batch_processing/postprocessing/convert_output_format.py +41 -16
api/batch_processing/postprocessing/load_api_results.py +16 -17
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +52 -22
api/batch_processing/postprocessing/merge_detections.py +14 -14
api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +102 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -263
data_management/coco_to_yolo.py +79 -58
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +62 -24
data_management/databases/subset_json_db.py +24 -15
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -162
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -158
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +7 -7
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +65 -24
data_management/labelme_to_yolo.py +8 -8
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +13 -13
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +44 -110
data_management/lila/generate_lila_per_image_labels.py +55 -42
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +96 -33
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +110 -97
data_management/remap_coco_categories.py +83 -83
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +30 -23
data_management/wi_download_csv_to_coco.py +246 -239
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +300 -60
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +179 -113
detection/run_inference_with_yolov5_val.py +108 -48
detection/run_tiled_inference.py +111 -40
detection/tf_detector.py +51 -29
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +228 -68
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -871
md_utils/path_utils.py +460 -134
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +176 -60
md_utils/write_html_image_list.py +40 -33
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +597 -291
md_visualization/visualize_db.py +76 -48
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
megadetector-5.0.8.dist-info/RECORD +0 -205
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0

data_management/labelme_to_coco.py CHANGED Viewed

@@ -1,10 +1,10 @@
-########
-#
-# labelme_to_coco.py
-#
-# Converts a folder of labelme-formatted .json files to COCO format.
-#
-########
+"""
+labelme_to_coco.py
+Converts a folder of labelme-formatted .json files to COCO.
+"""
 #%% Constants and imports
@@ -23,10 +23,10 @@ from tqdm import tqdm
 #%% Support functions
-def add_category(category_name,category_name_to_id,candidate_category_id=0):
+def _add_category(category_name,category_name_to_id,candidate_category_id=0):
     """
-    Add the category [category_name] to the dict [category_name_to_id], by default
-    using the next available integer index.
+    Adds the category [category_name] to the dict [category_name_to_id], by default
+    using the next available integer index.
     """
     if category_name in category_name_to_id:
@@ -121,7 +121,7 @@ def _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
     if len(shapes) == 0:
         if allow_new_categories:
-            category_id = add_category('empty',category_name_to_id)
+            category_id = _add_category('empty',category_name_to_id)
         else:
             assert 'empty' in category_name_to_id
             category_id = category_name_to_id['empty']
@@ -148,7 +148,7 @@ def _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
                 category_name = shape['label']
             if allow_new_categories:
-                category_id = add_category(category_name,category_name_to_id)
+                category_id = _add_category(category_name,category_name_to_id)
             else:
                 assert category_name in category_name_to_id
                 category_id = category_name_to_id[category_name]
@@ -202,7 +202,7 @@ def labelme_to_coco(input_folder,
                     max_workers=1,
                     use_threads=True):
     """
-    Find all images in [input_folder] that have corresponding .json files, and convert
+    Finds all images in [input_folder] that have corresponding .json files, and converts
     to a COCO .json file.
     Currently only supports bounding box annotations and image-level flags (i.e., does not
@@ -224,11 +224,38 @@ def labelme_to_coco(input_folder,
     file.  Empty images in the "lion" folder will still be given the label "empty" (or
     [empty_category_name]).
-    no_json_handling can be:
+    Args:
+        input_folder (str): input folder to search for images and Labelme .json files
+        output_file (str, optional): output file to which we should write COCO-formatted data; if None
+            this function just returns the COCO-formatted dict
+        category_id_to_category_name (dict, optional): dict mapping category IDs to category names;
+            really used to map Labelme category names to COCO category IDs.  IDs will be auto-generated
+            if this is None.
+        empty_category_id (int, optional): category ID to use for the not-very-COCO-like "empty" category;
+            also see the no_json_handling parameter.
+        info_struct (dict, optional): dict to stash in the "info" field of the resulting COCO dict
+        relative_paths_to_include (list, optional): allowlist of relative paths to include in the COCO
+            dict; there's no reason to specify this along with relative_paths_to_exclude.
+        relative_paths_to_exclude (list, optional): blocklist of relative paths to exclude from the COCO
+            dict; there's no reason to specify this along with relative_paths_to_include.
+        use_folders_as_labels (bool, optional): if this is True, class names will be pulled from folder names,
+            useful if you have images like a/b/cat/image001.jpg, a/b/dog/image002.jpg, etc.
+        recursive (bool, optional): whether to recurse into [input_folder]
+        no_json_handling (str, optional): how to deal with image files that have no corresponding .json files,
+            can be:
+                - 'skip': ignore image files with no corresponding .json files
+                - 'empty': treat image files with no corresponding .json files as empty
+                - 'error': throw an error when an image file has no corresponding .json file
+        validate_image_sizes (bool, optional): whether to load images to verify that the sizes specified
+            in the labelme files are correct
+        max_workers (int, optional): number of workers to use for parallelization, set to <=1 to disable
+            parallelization
+        use_threads (bool, optional): whether to use threads (True) or processes (False) for parallelization,
+            not relevant if max_workers <= 1
-    * 'skip': ignore image files with no corresponding .json files
-    * 'empty': treat image files with no corresponding .json files as empty
-    * 'error': throw an error when an image file has no corresponding .json file
+    Returns:
+        dict: a COCO-formatted dictionary, identical to what's written to [output_file] if [output_file] is not None.
     """
     if max_workers > 1:
@@ -288,7 +315,7 @@ def labelme_to_coco(input_folder,
             raise ValueError('Category IDs must be ints or string-formatted ints')
     if empty_category_id is None:
-        empty_category_id = add_category(empty_category_name,category_name_to_id)
+        empty_category_id = _add_category(empty_category_name,category_name_to_id)
     if max_workers <= 1:
@@ -366,12 +393,26 @@ def find_empty_labelme_files(input_folder,recursive=True):
     Returns a list of all image files in in [input_folder] associated with .json files that have
     no boxes in them.  Also returns a list of images with no associated .json files.  Specifically,
     returns a dict:
-    {
-       'images_with_empty_json_files':[list],
-       'images_with_no_json_files':[list],
-       'images_with_non_empty_json_files':[list]
-    }
+    .. code-block: none
+        {
+            'images_with_empty_json_files':[list],
+            'images_with_no_json_files':[list],
+            'images_with_non_empty_json_files':[list]
+        }
+    Args:
+        input_folder (str): the folder to search for empty (i.e., box-less) Labelme .json files
+        recursive (bool, optional): whether to recurse into [input_folder]
+    Returns:
+        dict: a dict with fields:
+            - images_with_empty_json_files: a list of all image files in [input_folder] associated with
+              .json files that have no boxes in them
+            - images_with_no_json_files: a list of images in [input_folder] with no associated .json files
+            - images_with_non_empty_json_files: a list of images in [input_folder] associated with .json
+              files that have at least one box
     """
     image_filenames_relative = path_utils.find_images(input_folder,recursive=True,
                                                       return_relative_paths=True)

data_management/labelme_to_yolo.py CHANGED Viewed

@@ -1,10 +1,10 @@
-########
-#
-# labelme_to_yolo.py
-#
-# Create YOLO .txt files in a folder containing labelme .json files.
-#
-########
+"""
+labelme_to_yolo.py
+Create YOLO .txt files in a folder containing labelme .json files.
+"""
 #%% Imports
@@ -77,7 +77,7 @@ def labelme_file_to_yolo_file(labelme_file,
         p0 = shape['points'][0]
         p1 = shape['points'][1]
-        # LabelMe: [[x0,y0],[x1,y1]] (arbitrarily sorted) (absolute coordinates)
+        # Labelme: [[x0,y0],[x1,y1]] (arbitrarily sorted) (absolute coordinates)
         #
         # YOLO: [class, x_center, y_center, width, height] (normalized coordinates)
         minx_abs = min(p0[0],p1[0])

data_management/lila/__init__.py ADDED Viewed

File without changes

data_management/lila/add_locations_to_island_camera_traps.py CHANGED Viewed

@@ -1,12 +1,12 @@
-########
-#
-# add_locations_to_island_camera_traps.py
-#
-# The Island Conservation Camera Traps dataset had unique camera identifiers embedded
-# in filenames, but not in the proper metadata fields.  This script copies that information
-# to metadata.
-#
-########
+"""
+add_locations_to_island_camera_traps.py
+The Island Conservation Camera Traps dataset had unique camera identifiers embedded
+in filenames, but not in the proper metadata fields.  This script copies that information
+to metadata.
+"""
 #%% Imports and constants

data_management/lila/add_locations_to_nacti.py CHANGED Viewed

@@ -1,147 +1,147 @@
-########
-#
-# add_locations_to_nacti.py
-#
-# As of 10.2023, NACTI metadata only has very coarse location information (e.g. "Florida"),
-# but camera IDs are embedded in filenames.  This script pulls that information from filenames
-# and adds it to metadata.
-#
-########
-#%% Imports and constants
-import os
-import json
-import shutil
-from tqdm import tqdm
-from collections import defaultdict
-input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
-output_file = r'g:\temp\nacti_metadata.1.14.json'
-#%% Read metadata
-with open(input_file,'r') as f:
-    d = json.load(f)
-assert d['info']['version'] == 1.13
-#%% Map images to locations (according to the metadata)
-file_name_to_original_location = {}
-# im = dataset_labels['images'][0]
-for im in tqdm(d['images']):
-    file_name_to_original_location[im['file_name']] = im['location']
-original_locations = set(file_name_to_original_location.values())
-print('Found {} locations in the original metadata:'.format(len(original_locations)))
-for loc in original_locations:
-    print('[{}]'.format(loc))
-#%% Map images to new locations
-def path_to_location(relative_path):
-    relative_path = relative_path.replace('\\','/')
-    if relative_path in file_name_to_original_location:
-        location_name = file_name_to_original_location[relative_path]
-        if location_name == 'San Juan Mntns, Colorado':
-            # "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
-            tokens = relative_path.split('/')[-1].split('_')
-            assert tokens[1].startswith('Unit')
-            location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
-        elif location_name == 'Lebec, California':
-            # "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
-            tokens = relative_path.split('/')[-1].split('_')
-            assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
-            location_name = 'lebec_{}'.format(tokens[0])
-        elif location_name == 'Archbold, FL':
-            # "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
-            tokens = relative_path.split('/')[-1].split('_')
-            assert tokens[0].startswith('FL-')
-            location_name = 'archbold_{}'.format(tokens[0])
-        else:
-            assert location_name == ''
-            tokens = relative_path.split('/')[-1].split('_')
-            if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
-                location_name = '{}'.format(tokens[0])
-    else:
-        location_name = 'unknown'
-    # print('Returning location {} for file {}'.format(location_name,relative_path))
-    return location_name
-file_name_to_updated_location = {}
-updated_location_to_count = defaultdict(int)
-for im in tqdm(d['images']):
-    updated_location = path_to_location(im['file_name'])
-    file_name_to_updated_location[im['file_name']] = updated_location
-    updated_location_to_count[updated_location] += 1
-updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
-                         key=lambda item: item[1],
-                         reverse=True)}
-updated_locations = set(file_name_to_updated_location.values())
-print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
-for loc in updated_location_to_count:
-    print('{}: {}'.format(loc,updated_location_to_count[loc]))
-#%% Re-write metadata
-for im in d['images']:
-    im['location'] = file_name_to_updated_location[im['file_name']]
-d['info']['version'] = 1.14
-with open(output_file,'w') as f:
-    json.dump(d,f,indent=1)
-#%% For each location, sample some random images to make sure they look consistent
-input_base = r'd:\lila\nacti-unzipped'
-assert os.path.isdir(input_base)
-location_to_images = defaultdict(list)
-for im in d['images']:
-    location_to_images[im['location']].append(im)
-n_to_sample = 10
-import random
-random.seed(0)
-sampling_folder_base = r'g:\temp\nacti_samples'
-for location in tqdm(location_to_images):
-    images_this_location = location_to_images[location]
-    if len(images_this_location) > n_to_sample:
-        images_this_location = random.sample(images_this_location,n_to_sample)
-    for i_image,im in enumerate(images_this_location):
-        fn_relative = im['file_name']
-        source_fn_abs = os.path.join(input_base,fn_relative)
-        assert os.path.isfile(source_fn_abs)
-        ext = os.path.splitext(fn_relative)[1]
-        target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
-            location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
-        os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
-        shutil.copyfile(source_fn_abs,target_fn_abs)
-    # ...for each image
-# ...for each location
+"""
+add_locations_to_nacti.py
+As of 10.2023, NACTI metadata only has very coarse location information (e.g. "Florida"),
+but camera IDs are embedded in filenames.  This script pulls that information from filenames
+and adds it to metadata.
+"""
+#%% Imports and constants
+import os
+import json
+import shutil
+from tqdm import tqdm
+from collections import defaultdict
+input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
+output_file = r'g:\temp\nacti_metadata.1.14.json'
+#%% Read metadata
+with open(input_file,'r') as f:
+    d = json.load(f)
+assert d['info']['version'] == 1.13
+#%% Map images to locations (according to the metadata)
+file_name_to_original_location = {}
+# im = dataset_labels['images'][0]
+for im in tqdm(d['images']):
+    file_name_to_original_location[im['file_name']] = im['location']
+original_locations = set(file_name_to_original_location.values())
+print('Found {} locations in the original metadata:'.format(len(original_locations)))
+for loc in original_locations:
+    print('[{}]'.format(loc))
+#%% Map images to new locations
+def path_to_location(relative_path):
+    relative_path = relative_path.replace('\\','/')
+    if relative_path in file_name_to_original_location:
+        location_name = file_name_to_original_location[relative_path]
+        if location_name == 'San Juan Mntns, Colorado':
+            # "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
+            tokens = relative_path.split('/')[-1].split('_')
+            assert tokens[1].startswith('Unit')
+            location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
+        elif location_name == 'Lebec, California':
+            # "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
+            tokens = relative_path.split('/')[-1].split('_')
+            assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
+            location_name = 'lebec_{}'.format(tokens[0])
+        elif location_name == 'Archbold, FL':
+            # "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
+            tokens = relative_path.split('/')[-1].split('_')
+            assert tokens[0].startswith('FL-')
+            location_name = 'archbold_{}'.format(tokens[0])
+        else:
+            assert location_name == ''
+            tokens = relative_path.split('/')[-1].split('_')
+            if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
+                location_name = '{}'.format(tokens[0])
+    else:
+        location_name = 'unknown'
+    # print('Returning location {} for file {}'.format(location_name,relative_path))
+    return location_name
+file_name_to_updated_location = {}
+updated_location_to_count = defaultdict(int)
+for im in tqdm(d['images']):
+    updated_location = path_to_location(im['file_name'])
+    file_name_to_updated_location[im['file_name']] = updated_location
+    updated_location_to_count[updated_location] += 1
+updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
+                         key=lambda item: item[1],
+                         reverse=True)}
+updated_locations = set(file_name_to_updated_location.values())
+print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
+for loc in updated_location_to_count:
+    print('{}: {}'.format(loc,updated_location_to_count[loc]))
+#%% Re-write metadata
+for im in d['images']:
+    im['location'] = file_name_to_updated_location[im['file_name']]
+d['info']['version'] = 1.14
+with open(output_file,'w') as f:
+    json.dump(d,f,indent=1)
+#%% For each location, sample some random images to make sure they look consistent
+input_base = r'd:\lila\nacti-unzipped'
+assert os.path.isdir(input_base)
+location_to_images = defaultdict(list)
+for im in d['images']:
+    location_to_images[im['location']].append(im)
+n_to_sample = 10
+import random
+random.seed(0)
+sampling_folder_base = r'g:\temp\nacti_samples'
+for location in tqdm(location_to_images):
+    images_this_location = location_to_images[location]
+    if len(images_this_location) > n_to_sample:
+        images_this_location = random.sample(images_this_location,n_to_sample)
+    for i_image,im in enumerate(images_this_location):
+        fn_relative = im['file_name']
+        source_fn_abs = os.path.join(input_base,fn_relative)
+        assert os.path.isfile(source_fn_abs)
+        ext = os.path.splitext(fn_relative)[1]
+        target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
+            location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
+        os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
+        shutil.copyfile(source_fn_abs,target_fn_abs)
+    # ...for each image
+# ...for each location

data_management/lila/create_lila_blank_set.py CHANGED Viewed

@@ -1,16 +1,16 @@
-########
-#
-# create_lila_blank_set.py
-#
-# Create a folder of blank images sampled from LILA.  We'll aim for diversity, so less-common
-# locations will be oversampled relative to more common locations.  We'll also run MegaDetector
-# (with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
-# blank set.
-#
-# We'll store location information for each image in a .json file, so we can split locations
-# into train/val in downstream tasks.
-#
-########
+"""
+create_lila_blank_set.py
+Create a folder of blank images sampled from LILA.  We'll aim for diversity, so less-common
+locations will be oversampled relative to more common locations.  We'll also run MegaDetector
+(with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
+blank set.
+We'll store location information for each image in a .json file, so we can split locations
+into train/val in downstream tasks.
+"""
 #%% Constants and imports

data_management/lila/create_lila_test_set.py CHANGED Viewed

@@ -1,11 +1,11 @@
-########
-#
-# create_lila_test_set.py
-#
-# Create a test set of camera trap images, containing N empty and N non-empty
-# images from each LILA data set.
-#
-########
+"""
+create_lila_test_set.py
+Create a test set of camera trap images, containing N empty and N non-empty
+images from each LILA data set.
+"""
 #%% Constants and imports

megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.8py3-none-any.whl → 5.0.9py3-none-any.whl