PyPI - megadetector - Versions diffs - 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl - Mend

megadetector 5.0.10py3-none-any.whl → 5.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show

{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
megadetector-5.0.11.dist-info/RECORD +5 -0
megadetector-5.0.11.dist-info/top_level.txt +1 -0
api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -439
api/batch_processing/api_core/server.py +0 -294
api/batch_processing/api_core/server_api_config.py +0 -98
api/batch_processing/api_core/server_app_config.py +0 -55
api/batch_processing/api_core/server_batch_job_manager.py +0 -220
api/batch_processing/api_core/server_job_status_table.py +0 -152
api/batch_processing/api_core/server_orchestration.py +0 -360
api/batch_processing/api_core/server_utils.py +0 -92
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -152
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
api/batch_processing/data_preparation/manage_video_batch.py +0 -327
api/batch_processing/integration/digiKam/setup.py +0 -6
api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +0 -64
api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
api/batch_processing/postprocessing/compare_batch_results.py +0 -958
api/batch_processing/postprocessing/convert_output_format.py +0 -397
api/batch_processing/postprocessing/load_api_results.py +0 -195
api/batch_processing/postprocessing/md_to_coco.py +0 -310
api/batch_processing/postprocessing/md_to_labelme.py +0 -330
api/batch_processing/postprocessing/merge_detections.py +0 -401
api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
api/synchronous/api_core/animal_detection_api/config.py +0 -35
api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +0 -110
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +0 -108
classification/analyze_failed_images.py +0 -227
classification/cache_batchapi_outputs.py +0 -198
classification/create_classification_dataset.py +0 -627
classification/crop_detections.py +0 -516
classification/csv_to_json.py +0 -226
classification/detect_and_crop.py +0 -855
classification/efficientnet/__init__.py +0 -9
classification/efficientnet/model.py +0 -415
classification/efficientnet/utils.py +0 -610
classification/evaluate_model.py +0 -520
classification/identify_mislabeled_candidates.py +0 -152
classification/json_to_azcopy_list.py +0 -63
classification/json_validator.py +0 -695
classification/map_classification_categories.py +0 -276
classification/merge_classification_detection_output.py +0 -506
classification/prepare_classification_script.py +0 -194
classification/prepare_classification_script_mc.py +0 -228
classification/run_classifier.py +0 -286
classification/save_mislabeled.py +0 -110
classification/train_classifier.py +0 -825
classification/train_classifier_tf.py +0 -724
classification/train_utils.py +0 -322
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +0 -34
data_management/camtrap_dp_to_coco.py +0 -238
data_management/cct_json_utils.py +0 -395
data_management/cct_to_md.py +0 -176
data_management/cct_to_wi.py +0 -289
data_management/coco_to_labelme.py +0 -272
data_management/coco_to_yolo.py +0 -662
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +0 -33
data_management/databases/combine_coco_camera_traps_files.py +0 -206
data_management/databases/integrity_check_json_db.py +0 -477
data_management/databases/subset_json_db.py +0 -115
data_management/generate_crops_from_cct.py +0 -149
data_management/get_image_sizes.py +0 -188
data_management/importers/add_nacti_sizes.py +0 -52
data_management/importers/add_timestamps_to_icct.py +0 -79
data_management/importers/animl_results_to_md_results.py +0 -158
data_management/importers/auckland_doc_test_to_json.py +0 -372
data_management/importers/auckland_doc_to_json.py +0 -200
data_management/importers/awc_to_json.py +0 -189
data_management/importers/bellevue_to_json.py +0 -273
data_management/importers/cacophony-thermal-importer.py +0 -796
data_management/importers/carrizo_shrubfree_2018.py +0 -268
data_management/importers/carrizo_trail_cam_2017.py +0 -287
data_management/importers/cct_field_adjustments.py +0 -57
data_management/importers/channel_islands_to_cct.py +0 -913
data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
data_management/importers/eMammal/eMammal_helpers.py +0 -249
data_management/importers/eMammal/make_eMammal_json.py +0 -223
data_management/importers/ena24_to_json.py +0 -275
data_management/importers/filenames_to_json.py +0 -385
data_management/importers/helena_to_cct.py +0 -282
data_management/importers/idaho-camera-traps.py +0 -1407
data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
data_management/importers/jb_csv_to_json.py +0 -150
data_management/importers/mcgill_to_json.py +0 -250
data_management/importers/missouri_to_json.py +0 -489
data_management/importers/nacti_fieldname_adjustments.py +0 -79
data_management/importers/noaa_seals_2019.py +0 -181
data_management/importers/pc_to_json.py +0 -365
data_management/importers/plot_wni_giraffes.py +0 -123
data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
data_management/importers/prepare_zsl_imerit.py +0 -131
data_management/importers/rspb_to_json.py +0 -356
data_management/importers/save_the_elephants_survey_A.py +0 -320
data_management/importers/save_the_elephants_survey_B.py +0 -332
data_management/importers/snapshot_safari_importer.py +0 -758
data_management/importers/snapshot_safari_importer_reprise.py +0 -665
data_management/importers/snapshot_serengeti_lila.py +0 -1067
data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
data_management/importers/sulross_get_exif.py +0 -65
data_management/importers/timelapse_csv_set_to_json.py +0 -490
data_management/importers/ubc_to_json.py +0 -399
data_management/importers/umn_to_json.py +0 -507
data_management/importers/wellington_to_json.py +0 -263
data_management/importers/wi_to_json.py +0 -441
data_management/importers/zamba_results_to_md_results.py +0 -181
data_management/labelme_to_coco.py +0 -548
data_management/labelme_to_yolo.py +0 -272
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +0 -97
data_management/lila/add_locations_to_nacti.py +0 -147
data_management/lila/create_lila_blank_set.py +0 -557
data_management/lila/create_lila_test_set.py +0 -151
data_management/lila/create_links_to_md_results_files.py +0 -106
data_management/lila/download_lila_subset.py +0 -177
data_management/lila/generate_lila_per_image_labels.py +0 -515
data_management/lila/get_lila_annotation_counts.py +0 -170
data_management/lila/get_lila_image_counts.py +0 -111
data_management/lila/lila_common.py +0 -300
data_management/lila/test_lila_metadata_urls.py +0 -132
data_management/ocr_tools.py +0 -874
data_management/read_exif.py +0 -681
data_management/remap_coco_categories.py +0 -84
data_management/remove_exif.py +0 -66
data_management/resize_coco_dataset.py +0 -189
data_management/wi_download_csv_to_coco.py +0 -246
data_management/yolo_output_to_md_output.py +0 -441
data_management/yolo_to_coco.py +0 -676
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/detector_training/model_main_tf2.py +0 -114
detection/process_video.py +0 -703
detection/pytorch_detector.py +0 -337
detection/run_detector.py +0 -779
detection/run_detector_batch.py +0 -1219
detection/run_inference_with_yolov5_val.py +0 -917
detection/run_tiled_inference.py +0 -935
detection/tf_detector.py +0 -188
detection/video_utils.py +0 -606
docs/source/conf.py +0 -43
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +0 -174
md_utils/ct_utils.py +0 -612
md_utils/directory_listing.py +0 -246
md_utils/md_tests.py +0 -968
md_utils/path_utils.py +0 -1044
md_utils/process_utils.py +0 -157
md_utils/sas_blob_utils.py +0 -509
md_utils/split_locations_into_train_val.py +0 -228
md_utils/string_utils.py +0 -92
md_utils/url_utils.py +0 -323
md_utils/write_html_image_list.py +0 -225
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +0 -293
md_visualization/render_images_with_thumbnails.py +0 -275
md_visualization/visualization_utils.py +0 -1537
md_visualization/visualize_db.py +0 -551
md_visualization/visualize_detector_output.py +0 -406
megadetector-5.0.10.dist-info/RECORD +0 -224
megadetector-5.0.10.dist-info/top_level.txt +0 -8
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
taxonomy_mapping/map_new_lila_datasets.py +0 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
taxonomy_mapping/preview_lila_taxonomy.py +0 -591
taxonomy_mapping/retrieve_sample_image.py +0 -71
taxonomy_mapping/simple_image_download.py +0 -218
taxonomy_mapping/species_lookup.py +0 -834
taxonomy_mapping/taxonomy_csv_checker.py +0 -159
taxonomy_mapping/taxonomy_graph.py +0 -346
taxonomy_mapping/validate_lila_category_mappings.py +0 -83
{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0

data_management/lila/create_lila_blank_set.py DELETED Viewed

@@ -1,557 +0,0 @@
-"""
-create_lila_blank_set.py
-Create a folder of blank images sampled from LILA.  We'll aim for diversity, so less-common
-locations will be oversampled relative to more common locations.  We'll also run MegaDetector
-(with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
-blank set.
-We'll store location information for each image in a .json file, so we can split locations
-into train/val in downstream tasks.
-"""
-#%% Constants and imports
-import os
-import random
-import math
-import json
-import numpy as np
-from tqdm import tqdm
-from multiprocessing.pool import ThreadPool
-from urllib.parse import urlparse
-from collections import defaultdict
-from data_management.lila.lila_common import read_lila_all_images_file
-from md_utils.url_utils import download_url
-from md_visualization import visualization_utils as vis_utils
-from md_utils.path_utils import recursive_file_list
-# We'll write images, metadata downloads, and temporary files here
-lila_local_base = os.path.expanduser('~/lila')
-metadata_dir = os.path.join(lila_local_base,'metadata')
-os.makedirs(metadata_dir,exist_ok=True)
-project_base = os.path.join(lila_local_base,'lila_blanks')
-candidate_blanks_base = os.path.join(project_base,'candidate_blanks')
-os.makedirs(candidate_blanks_base,exist_ok=True)
-confirmed_blanks_base = os.path.join(project_base,'confirmed_blanks')
-os.makedirs(confirmed_blanks_base,exist_ok=True)
-md_possible_non_blanks_folder = os.path.join(project_base,'candidate_non_blanks')
-os.makedirs(md_possible_non_blanks_folder,exist_ok=True)
-location_to_blank_image_urls_cache_file = os.path.join(project_base,
-                                                       'location_to_blank_image_urls.json')
-md_results_file = os.path.join(project_base,'lila_blanks_md_results.json')
-all_fn_relative_to_location_file = os.path.join(project_base,'all_fn_relative_to_location.json')
-confirmed_fn_relative_to_location_file = os.path.join(project_base,'confirmed_fn_relative_to_location.json')
-preferred_image_download_source = 'gcp'
-# Number of concurrent download threads
-n_download_threads = 20
-n_blanks = 100000
-random.seed(0)
-#%% Download and open the giant table of image URLs and labels
-# ~60 seconds to download, unzip, and open
-df = read_lila_all_images_file(metadata_dir)
-#%% Explore blank labels
-# Original labels we're treating as blank:
-blank_original_labels = (
-    'empty','misfire'
-)
-# Some notable original labels we're *not* treating as blank:
-nonblank_original_labels = (
-    'unclassifiable', 'unidentifiable', 'unidentified', 'unknown', 'fire',
-    'foggy lens', 'foggy weather', 'blurred', 'end', 'eye_shine', 'ignore',
-    'lens obscured', 'misdirected', 'other', 'start', 'sun', 'problem',
-    'tilted', 'vegetation obstruction', 'snow on lens', 'malfunction'
-)
-other_labels_without_common_names = (
-    'car', 'motorcycle', 'vehicle'
-)
-common_names = sorted(list(df['common_name'].unique()),
-                      key=lambda x:str(x) if isinstance(x,float) else x)
-original_labels = sorted(list(df['original_label'].unique()),
-                         key=lambda x:str(x) if isinstance(x,float) else x)
-# Blanks are represented as NaN in the "common_name" column (though not all NaN's are blanks)
-assert '' not in common_names
-assert all([s not in common_names for s in blank_original_labels])
-assert all([s not in common_names for s in nonblank_original_labels])
-assert np.nan in common_names
-# Blanks are represented as "empty" or "misfire" in the "original_label" column
-assert all([s in original_labels for s in blank_original_labels])
-assert all([s in original_labels for s in nonblank_original_labels])
-assert all([s in original_labels for s in other_labels_without_common_names])
-assert all([s not in original_labels for s in ('','blank','none',np.nan)])
-#%% Count empty labels and common names
-common_names_with_empty_original_labels = set()
-original_labels_with_nan_common_names = set()
-common_name_to_count = defaultdict(int)
-original_label_to_count = defaultdict(int)
-# This loop takes ~10 mins
-for i_row,row in tqdm(df.iterrows(),total=len(df)):
-    common_name = row['common_name']
-    original_label = row['original_label']
-    if isinstance(common_name,float):
-        assert np.isnan(common_name)
-        original_labels_with_nan_common_names.add(original_label)
-    common_name = str(common_name)
-    assert isinstance(original_label,str)
-    if original_label in blank_original_labels:
-        common_names_with_empty_original_labels.add(common_name)
-    common_name_to_count[common_name] += 1
-    original_label_to_count[original_label] += 1
-#%% Look at the most common labels and common names
-from md_utils.ct_utils import sort_dictionary_by_value
-common_name_to_count = sort_dictionary_by_value(common_name_to_count,reverse=True)
-original_label_to_count = sort_dictionary_by_value(original_label_to_count,reverse=True)
-k = 10
-print('\nMost frequent common names:\n')
-i_label = 0
-for i_label,s in enumerate(common_name_to_count):
-    if i_label >= k:
-        break
-    print('{}: {}'.format(s,common_name_to_count[s]))
-print('\nMost frequent original labels:\n')
-i_label = 0
-for i_label,s in enumerate(original_label_to_count):
-    if i_label >= k:
-        break
-    print('{}: {}'.format(s,original_label_to_count[s]))
-#%% Do some consistency checks over the empty labels and stats
-# All images called 'empty' should have NaN as their common name
-assert (len(common_names_with_empty_original_labels) == 1)
-assert next(iter(common_names_with_empty_original_labels)) == 'nan'
-# 'empty' should be the most frequent original label overall
-assert next(iter(original_label_to_count)) == 'empty'
-# NaN should be the most frequent common name overall
-assert next(iter(common_name_to_count)) == 'nan'
-for s in original_labels_with_nan_common_names:
-    assert \
-        (s in blank_original_labels) or \
-        (s in nonblank_original_labels) or \
-        (s in other_labels_without_common_names)
-#%% Map locations to blank images
-force_map_locations = False
-# Load from .json if available
-if (not force_map_locations) and (os.path.isfile(location_to_blank_image_urls_cache_file)):
-    with open(location_to_blank_image_urls_cache_file,'r') as f:
-        location_to_blank_image_urls = json.load(f)
-else:
-    location_to_blank_image_urls = defaultdict(list)
-    # i_row = 0; row = df.iloc[i_row]
-    for i_row,row in tqdm(df.iterrows(),total=len(df)):
-        location_id = row['location_id']
-        url = row['url']
-        original_label = row['original_label']
-        if original_label in blank_original_labels:
-            assert np.isnan(row['common_name'])
-            location_to_blank_image_urls[location_id].append(url)
-    with open(location_to_blank_image_urls_cache_file,'w') as f:
-        json.dump(location_to_blank_image_urls,f,indent=1)
-n_locations_with_blanks = len(location_to_blank_image_urls)
-print('Found {} locations with blank images'.format(n_locations_with_blanks))
-#%% Sample blanks
-random.seed(0)
-# Make a fresh copy of the lists
-location_to_unsampled_blank_image_urls = {}
-# location = next(iter(location_to_blank_image_urls.keys()))
-for location in location_to_blank_image_urls:
-    blank_image_urls_this_location = location_to_blank_image_urls[location]
-    unsampled_blank_image_urls_this_location = blank_image_urls_this_location.copy()
-    location_to_unsampled_blank_image_urls[location] = unsampled_blank_image_urls_this_location
-# Put locations in a random order
-location_ids = list(location_to_unsampled_blank_image_urls.keys())
-random.shuffle(location_ids)
-blank_urls = []
-location_to_sampled_blanks = defaultdict(list)
-fully_sampled_locations = set()
-# Pick from each location until we hit our limit or have no blanks left
-while(True):
-    found_sample = False
-    # location = location_ids[0]
-    for location in location_ids:
-        unsampled_images_this_location = location_to_unsampled_blank_image_urls[location]
-        if len(unsampled_images_this_location) == 0:
-            fully_sampled_locations.add(location)
-            continue
-        url = random.choice(unsampled_images_this_location)
-        blank_urls.append(url)
-        location_to_unsampled_blank_image_urls[location].remove(url)
-        location_to_sampled_blanks[location].append(url)
-        found_sample = True
-        if len(blank_urls) == n_blanks:
-            break
-    # ...for each location
-    if not found_sample:
-        print('Terminating after {} blanks, we ran out before hitting {}'.format(
-            len(blank_urls),n_blanks))
-    if len(blank_urls) == n_blanks:
-        break
-# ...while(True)
-assert len(blank_urls) <= n_blanks
-min_blanks_per_location = math.floor(n_blanks/n_locations_with_blanks)
-max_blanks_per_location = -1
-for location in location_to_sampled_blanks:
-    n_blanks_this_location = len(location_to_sampled_blanks[location])
-    if n_blanks_this_location >= max_blanks_per_location:
-        max_blanks_per_location = n_blanks_this_location
-    assert (location in fully_sampled_locations) or \
-        n_blanks_this_location >= min_blanks_per_location
-print('Choose {} blanks from {} locations'.format(n_blanks,len(location_ids)))
-print('Fully sampled {} locations'.format(len(fully_sampled_locations)))
-print('Max samples per location: {}'.format(max_blanks_per_location))
-#%% Download those image files (prep)
-container_to_url_base = {
-                         'lilablobssc.blob.core.windows.net':'/',
-                         'storage.googleapis.com':'/public-datasets-lila/'
-                         }
-def download_relative_filename(url, output_base, verbose=False, url_base=None, overwrite=False):
-    """
-    Download a URL to output_base, preserving relative path
-    """
-    result = {'status':'unknown','url':url,'destination_filename':None}
-    if url_base is None:
-        assert url.startswith('https://')
-        container = url.split('/')[2]
-        assert container in container_to_url_base
-        url_base = container_to_url_base[container]
-    assert url_base.startswith('/') and url_base.endswith('/')
-    p = urlparse(url)
-    relative_filename = str(p.path)
-    # remove the leading '/'
-    assert relative_filename.startswith(url_base)
-    relative_filename = relative_filename.replace(url_base,'',1)
-    destination_filename = os.path.join(output_base,relative_filename)
-    result['destination_filename'] = destination_filename
-    if ((os.path.isfile(destination_filename)) and (not overwrite)):
-        result['status'] = 'skipped'
-        return result
-    try:
-        download_url(url, destination_filename, verbose=verbose)
-    except Exception as e:
-        print('Warning: error downloading URL {}: {}'.format(
-            url,str(e)))
-        result['status'] = 'error: {}'.format(str(e))
-        return result
-    result['status'] = 'success'
-    return result
-def azure_url_to_gcp_http_url(url,error_if_not_azure_url=True):
-    """
-    Most URLs point to Azure by default, but most files are available on both Azure and GCP.
-    This function converts an Azure URL to the corresponding GCP http:// url.
-    """
-    lila_azure_storage_account = 'https://lilablobssc.blob.core.windows.net'
-    gcp_bucket_api_url = 'https://storage.googleapis.com/public-datasets-lila'
-    error_if_not_azure_url = False
-    if error_if_not_azure_url:
-        assert url.startswith(lila_azure_storage_account)
-    gcp_url = url.replace(lila_azure_storage_account,gcp_bucket_api_url,1)
-    return gcp_url
-# Convert Azure URLs to GCP URLs if necessary
-if preferred_image_download_source != 'azure':
-    assert preferred_image_download_source == 'gcp'
-    blank_urls = [azure_url_to_gcp_http_url(url) for url in blank_urls]
-#%% Download those image files (execution)
-print('Downloading {} images on {} workers'.format(len(blank_urls),n_download_threads))
-if n_download_threads <= 1:
-    results = []
-    # url = all_urls[0]
-    for url in tqdm(blank_urls):
-        results.append(download_relative_filename(url,candidate_blanks_base,url_base=None))
-else:
-    pool = ThreadPool(n_download_threads)
-    results = list(tqdm(pool.imap(lambda s: download_relative_filename(
-        s,candidate_blanks_base,url_base=None),
-        blank_urls), total=len(blank_urls)))
-# pool.terminate()
-#%% Review results
-error_urls = []
-for r in results:
-    if r['status'] != 'success':
-        error_urls.append(r['url'])
-print('Errors on {} of {} downloads'.format(len(error_urls),len(results)))
-#%% Run MegaDetector on the folder
-cmd = 'python run_detector_batch.py MDV5A "{}" "{}"'.format(
-    candidate_blanks_base,md_results_file)
-cmd += ' --recursive --output_relative_filenames'
-import clipboard; clipboard.copy(cmd); print(cmd)
-#%% Review MD results that suggests images are non-empty
-assert os.path.isfile(md_results_file)
-category_name_to_threshold = {'animal':0.25,'person':0.25,'vehicle':0.25}
-min_threshold = min(category_name_to_threshold.values())
-with open(md_results_file,'r') as f:
-    md_results = json.load(f)
-images_to_review_to_detections = {}
-category_id_to_threshold = {}
-for category_id in md_results['detection_categories']:
-    category_name = md_results['detection_categories'][category_id]
-    category_id_to_threshold[category_id] = category_name_to_threshold[category_name]
-# im = md_results['images'][0]
-for im in md_results['images']:
-    if 'detections' not in im:
-        continue
-    found_object = False
-    for det in im['detections']:
-        threshold = category_id_to_threshold[det['category']]
-        if det['conf'] >= threshold:
-            found_object = True
-            break
-    if found_object:
-        images_to_review_to_detections[im['file']] = im['detections']
-print('Flagging {} of {} images for review'.format(len(images_to_review_to_detections),len(md_results['images'])))
-output_file_to_source_file = {}
-# i_fn = 0; source_file_relative = images_to_review[i_fn]
-for i_fn,source_file_relative in tqdm(enumerate(images_to_review_to_detections),
-                                      total=len(images_to_review_to_detections)):
-    source_file_abs = os.path.join(candidate_blanks_base,source_file_relative)
-    assert os.path.isfile(source_file_abs)
-    ext = os.path.splitext(source_file_abs)[1]
-    target_file_relative = str(i_fn).zfill(8) + ext
-    target_file_abs = os.path.join(md_possible_non_blanks_folder,target_file_relative)
-    output_file_to_source_file[target_file_relative] = source_file_relative
-    # shutil.copyfile(source_file_abs,target_file_abs)
-    vis_utils.draw_bounding_boxes_on_file(input_file=source_file_abs,
-                                          output_file=target_file_abs,
-                                          detections=images_to_review_to_detections[source_file_relative],
-                                          confidence_threshold=min_threshold,
-                                          target_size=(1280,-1))
-# This is a temporary file I just used during debugging
-with open(os.path.join(project_base,'output_file_to_source_file.json'),'w') as f:
-    json.dump(output_file_to_source_file,f,indent=1)
-#%% Manual review
-# Delete images that are *not* empty
-#%% Figure out which images are still there; these are the actually-blank ones
-remaining_images = set(os.listdir(md_possible_non_blanks_folder))
-print('Kept {} of {} candidate blank images'.format(len(remaining_images),
-                                                    len(images_to_review_to_detections)))
-removed_blank_images_relative = []
-# output_file = next(iter(output_file_to_source_file.keys()))
-for output_file in tqdm(output_file_to_source_file.keys()):
-    if output_file not in remaining_images:
-        source_file_relative = output_file_to_source_file[output_file]
-        removed_blank_images_relative.append(source_file_relative)
-removed_blank_images_relative_set = set(removed_blank_images_relative)
-assert len(removed_blank_images_relative) + len(remaining_images) == len(output_file_to_source_file)
-#%% Copy only the confirmed blanks to the confirmed folder
-from md_utils.path_utils import is_image_file
-all_candidate_blanks = recursive_file_list(candidate_blanks_base,return_relative_paths=True)
-print('Found {} candidate blanks'.format(len(all_candidate_blanks)))
-skipped_images_relative = []
-skipped_non_images = []
-for source_fn_relative in tqdm(all_candidate_blanks):
-    # Skip anything we removed from the "candidate non-blanks" folder; these weren't really
-    # blank.
-    if source_fn_relative in removed_blank_images_relative_set:
-        skipped_images_relative.append(source_fn_relative)
-        continue
-    if not is_image_file(source_fn_relative):
-        # Not a typo; "skipped images" really means "skipped files"
-        skipped_images_relative.append(source_fn_relative)
-        skipped_non_images.append(source_fn_relative)
-    source_fn_abs = os.path.join(candidate_blanks_base,source_fn_relative)
-    assert os.path.isfile(source_fn_abs)
-    target_fn_abs = os.path.join(confirmed_blanks_base,source_fn_relative)
-    os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
-    # shutil.copyfile(source_fn_abs,target_fn_abs)
-print('Skipped {} files ({} non-image files)'.format(len(skipped_images_relative),
-                                                     len(skipped_non_images)))
-#%% Validate the folder of confirmed blanks
-from md_utils.path_utils import find_images
-# all_confirmed_blanks = recursive_file_list(confirmed_blanks_base,return_relative_paths=True)
-all_confirmed_blanks = find_images(confirmed_blanks_base,return_relative_paths=True,recursive=True)
-assert len(all_confirmed_blanks) < len(all_candidate_blanks)
-print('Found {} confirmed blanks'.format(len(all_confirmed_blanks)))
-#%% Manually review a few of the images we skipped
-# ...to make sure they're non-blank
-i_image = random.randint(0, len(skipped_images_relative))
-fn_relative = skipped_images_relative[i_image]
-fn_abs = os.path.join(candidate_blanks_base,fn_relative)
-assert os.path.isfile(fn_abs)
-import clipboard
-clipboard.copy('feh --scale-down "{}"'.format(fn_abs))
-#%% Record location information for each confirmed file
-# Map every URL's path to the corresponding location
-#
-# This is *all empty URLs*, not just the ones we downloaded
-all_fn_relative_to_location = {}
-# location = next(iter(location_to_blank_image_urls.keys()))
-for location in tqdm(location_to_blank_image_urls):
-    urls_this_location = location_to_blank_image_urls[location]
-    # url = urls_this_location[0]
-    for url in urls_this_location:
-        # Turn:
-        #
-        # https://lilablobssc.blob.core.windows.net/caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
-        #
-        # ...into:
-        #
-        # caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
-        p = urlparse(url)
-        fn_relative = str(p.path)[1:]
-        all_fn_relative_to_location[fn_relative] = location
-# Build a much smaller mapping of just the confirmed blanks
-confirmed_fn_relative_to_location = {}
-for i_fn,fn_relative in tqdm(enumerate(all_confirmed_blanks),total=len(all_confirmed_blanks)):
-    confirmed_fn_relative_to_location[fn_relative] = all_fn_relative_to_location[fn_relative]
-with open(all_fn_relative_to_location_file,'w') as f:
-    json.dump(all_fn_relative_to_location,f,indent=1)
-with open(confirmed_fn_relative_to_location_file,'w') as f:
-    json.dump(confirmed_fn_relative_to_location,f,indent=1)

data_management/lila/create_lila_test_set.py DELETED Viewed

@@ -1,151 +0,0 @@
-"""
-create_lila_test_set.py
-Create a test set of camera trap images, containing N empty and N non-empty
-images from each LILA data set.
-"""
-#%% Constants and imports
-import json
-import os
-import random
-from data_management.lila.lila_common import read_lila_metadata, read_metadata_file_for_dataset
-from md_utils.url_utils import download_url
-n_empty_images_per_dataset = 1
-n_non_empty_images_per_dataset = 1
-# We'll write images, metadata downloads, and temporary files here
-lila_local_base = os.path.expanduser('~/lila')
-output_dir = os.path.join(lila_local_base,'lila_test_set')
-os.makedirs(output_dir,exist_ok=True)
-metadata_dir = os.path.join(lila_local_base,'metadata')
-os.makedirs(metadata_dir,exist_ok=True)
-random.seed(0)
-#%% Download and parse the metadata file
-metadata_table = read_lila_metadata(metadata_dir)
-#%% Download and extract metadata for every dataset
-for ds_name in metadata_table.keys():
-    metadata_table[ds_name]['metadata_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
-                                                                         metadata_dir=metadata_dir,
-                                                                         metadata_table=metadata_table)
-#%% Choose images from each dataset
-# ds_name = (list(metadata_table.keys()))[0]
-for ds_name in metadata_table.keys():
-    print('Choosing images for {}'.format(ds_name))
-    json_filename = metadata_table[ds_name]['metadata_filename']
-    with open(json_filename,'r') as f:
-        d = json.load(f)
-    category_id_to_name = {c['id']:c['name'] for c in d['categories']}
-    category_name_to_id = {c['name']:c['id'] for c in d['categories']}
-    ## Find empty images
-    if 'empty' not in category_name_to_id:
-        empty_annotations_to_download = []
-    else:
-        empty_category_id = category_name_to_id['empty']
-        empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] == empty_category_id]
-        try:
-            empty_annotations_to_download = random.sample(empty_annotations,n_empty_images_per_dataset)
-        except ValueError:
-            print('No empty images available for dataset {}'.format(ds_name))
-            empty_annotations_to_download = []
-    ## Find non-empty images
-    non_empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] != empty_category_id]
-    try:
-        non_empty_annotations_to_download = random.sample(non_empty_annotations,n_non_empty_images_per_dataset)
-    except ValueError:
-        print('No non-empty images available for dataset {}'.format(ds_name))
-        non_empty_annotations_to_download = []
-    annotations_to_download = empty_annotations_to_download + non_empty_annotations_to_download
-    image_ids_to_download = set([ann['image_id'] for ann in annotations_to_download])
-    assert len(image_ids_to_download) == len(set(image_ids_to_download))
-    images_to_download = []
-    for im in d['images']:
-        if im['id'] in image_ids_to_download:
-            images_to_download.append(im)
-    assert len(images_to_download) == len(image_ids_to_download)
-    metadata_table[ds_name]['images_to_download'] = images_to_download
-# ...for each dataset
-#%% Convert to URLs
-# ds_name = (list(metadata_table.keys()))[0]
-for ds_name in metadata_table.keys():
-    base_url = metadata_table[ds_name]['image_base_url']
-    assert not base_url.endswith('/')
-    # Retrieve image file names
-    filenames = [im['file_name'] for im in metadata_table[ds_name]['images_to_download']]
-    urls_to_download = []
-    # Convert to URLs
-    for fn in filenames:
-        url = base_url + '/' + fn
-        urls_to_download.append(url)
-    metadata_table[ds_name]['urls_to_download'] = urls_to_download
-# ...for each dataset
-#%% Download those image files
-# TODO: trivially parallelizable
-#
-# ds_name = (list(metadata_table.keys()))[0]
-for ds_name in metadata_table.keys():
-    base_url = metadata_table[ds_name]['image_base_url']
-    assert not base_url.endswith('/')
-    base_url += '/'
-    urls_to_download = metadata_table[ds_name]['urls_to_download']
-    # url = urls_to_download[0]
-    for url in urls_to_download:
-        assert base_url in url
-        output_file_relative = ds_name.lower().replace(' ','_') + '_' + url.replace(base_url,'').replace('/','_').replace('\\','_')
-        output_file_absolute = os.path.join(output_dir,output_file_relative)
-        try:
-            download_url(url, destination_filename=output_file_absolute, force_download=False, verbose=True)
-        except Exception as e:
-            print('\n*** Error downloading {} ***\n{}'.format(url,str(e)))
-    # ...for each url
-# ...for each dataset

megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.10py3-none-any.whl → 5.0.11py3-none-any.whl