PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +93 -79
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
api/batch_processing/postprocessing/compare_batch_results.py +114 -44
api/batch_processing/postprocessing/convert_output_format.py +62 -19
api/batch_processing/postprocessing/load_api_results.py +17 -20
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +165 -68
api/batch_processing/postprocessing/merge_detections.py +40 -15
api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +107 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -0
data_management/coco_to_yolo.py +86 -62
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +130 -83
data_management/databases/subset_json_db.py +25 -16
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -144
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -160
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +8 -8
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +309 -159
data_management/labelme_to_yolo.py +103 -60
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +114 -31
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +92 -90
data_management/lila/generate_lila_per_image_labels.py +56 -43
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +103 -70
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +161 -99
data_management/remap_coco_categories.py +84 -0
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +32 -44
data_management/wi_download_csv_to_coco.py +246 -0
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +535 -95
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +189 -114
detection/run_inference_with_yolov5_val.py +118 -51
detection/run_tiled_inference.py +113 -42
detection/tf_detector.py +51 -28
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +249 -70
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -862
md_utils/path_utils.py +655 -155
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +208 -27
md_utils/write_html_image_list.py +51 -35
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +908 -311
md_visualization/visualize_db.py +109 -58
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
md_visualization/visualize_megadb.py +0 -183
megadetector-5.0.7.dist-info/RECORD +0 -202
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0

data_management/lila/download_lila_subset.py CHANGED Viewed

@@ -1,17 +1,11 @@
-########
-#
-# download_lila_subset.py
-#
-# Example of how to download a list of files from LILA, e.g. all the files
-# in a data set corresponding to a particular species.
-#
-# Organizes the downloaded images by dataset.  How you actually want to organize files,
-# what you want to query for, etc., is very application-specific; this is just meant as a
-# demo.
-#
-# Can download from either Azure or GCP.
-#
-########
+"""
+download_lila_subset.py
+Example of how to download a list of files from LILA, e.g. all the files
+in a data set corresponding to a particular species.
+"""
 #%% Constants and imports
@@ -19,16 +13,15 @@ import os
 import random
 from tqdm import tqdm
-from multiprocessing.pool import ThreadPool
-from urllib.parse import urlparse
 from collections import defaultdict
-from data_management.lila.lila_common import \
-    read_lila_all_images_file, is_empty, azure_url_to_gcp_http_url
-from md_utils.url_utils import download_url
+from data_management.lila.lila_common import read_lila_all_images_file, is_empty, lila_base_urls
+for s in lila_base_urls.values():
+    assert s.endswith('/')
 # If any of these strings appear in the common name of a species, we'll download that image
-species_of_interest = ['grey fox','red fox','leopard cat','kiwi']
+species_of_interest = ['grey fox','gray fox','cape fox','red fox','kit fox']
 # We'll write images, metadata downloads, and temporary files here
 lila_local_base = os.path.expanduser('~/lila')
@@ -44,24 +37,22 @@ n_download_threads = 20
 max_images_per_dataset = 10 # None
-# This impacts the data download, but not the metadata download
-#
-# "Azure" really means "Azure if available"; recent datasets are only available
-# on GCP.
-image_download_source = 'azure' # 'azure' or 'gcp'
+preferred_provider = 'gcp' # 'azure', 'gcp', 'aws'
 random.seed(0)
 #%% Download and open the giant table of image URLs and labels
-# ~60 seconds to download, unzip, and open
+# Takes ~60 seconds to download, unzip, and open
 df = read_lila_all_images_file(metadata_dir)
 #%% Find all the images we want to download
-# ~2 minutes
+# Takes ~2 minutes
+common_name_to_count = defaultdict(int)
 ds_name_to_urls = defaultdict(list)
@@ -72,26 +63,33 @@ def find_items(row):
     match = False
+    # This is the only bit of this file that's specific to a particular query.  In this case
+    # we're checking whether each row is on a list of species of interest, but you do you.
     for species_name in species_of_interest:
         if species_name in row['common_name']:
             match = True
+            common_name_to_count[species_name] += 1
             break
     if match:
-        ds_name_to_urls[row['dataset_name']].append(row['url'])
+        ds_name_to_urls[row['dataset_name']].append(row['url_' + preferred_provider])
 tqdm.pandas()
 _ = df.progress_apply(find_items,axis=1)
+# We have a list of URLs for each dataset, flatten them all into a list of URLs
 all_urls = list(ds_name_to_urls.values())
 all_urls = [item for sublist in all_urls for item in sublist]
 print('Found {} matching URLs across {} datasets'.format(len(all_urls),len(ds_name_to_urls)))
+for common_name in common_name_to_count:
+    print('{}: {}'.format(common_name,common_name_to_count[common_name]))
 from copy import deepcopy
 ds_name_to_urls_raw = deepcopy(ds_name_to_urls)
-#%% Trim to a fixed number of URLs per dataset
+#%% Optionally trim to a fixed number of URLs per dataset
 if max_images_per_dataset is None:
     pass
@@ -102,74 +100,78 @@ else:
             ds_name_to_urls[ds_name] = random.sample(ds_name_to_urls[ds_name],max_images_per_dataset)
-#%% Download those image files
+#%% Choose target files for each URL
-container_to_url_base = {
-                         'lilablobssc.blob.core.windows.net':'/',
-                         'storage.googleapis.com':'/public-datasets-lila/'
-                         }
+from data_management.lila.lila_common import lila_base_urls
-def download_relative_filename(url, output_base, verbose=False, url_base=None, overwrite=False):
-    """
-    Download a URL to output_base, preserving relative path
-    """
-    result = {'status':'unknown','url':url,'destination_filename':None}
-    if url_base is None:
-        assert url.startswith('https://')
-        container = url.split('/')[2]
-        assert container in container_to_url_base
-        url_base = container_to_url_base[container]
-    assert url_base.startswith('/') and url_base.endswith('/')
-    p = urlparse(url)
-    relative_filename = str(p.path)
-    # remove the leading '/'
-    assert relative_filename.startswith(url_base)
-    relative_filename = relative_filename.replace(url_base,'',1)
-    destination_filename = os.path.join(output_base,relative_filename)
-    result['destination_filename'] = destination_filename
-    if ((os.path.isfile(destination_filename)) and (not overwrite)):
-        result['status'] = 'skipped'
-        return result
-    try:
-        download_url(url, destination_filename, verbose=verbose)
-    except Exception as e:
-        print('Warning: error downloading URL {}: {}'.format(
-            url,str(e)))
-        result['status'] = 'error: {}'.format(str(e))
-        return result
-    result['status'] = 'success'
-    return result
+# We have a list of URLs per dataset, flatten that into a single list of URLs
+urls_to_download = set()
+for ds_name in ds_name_to_urls:
+    for url in ds_name_to_urls[ds_name]:
+        urls_to_download.add(url)
+urls_to_download = sorted(list(urls_to_download))
+# A URL might look like this:
+#
+# https://storage.googleapis.com/public-datasets-lila/wcs-unzipped/animals/0667/0302.jpg
+#
+# We'll write that to an output file that looks like this (relative to output_dir):
+#
+# wcs-unzipped/animals/0667/0302.jpg
+#
+# ...so we need to remove the base URL to get the target file.
+base_url = lila_base_urls[preferred_provider]
+assert base_url.endswith('/')
-# ds_name_to_urls maps dataset names to lists of URLs; flatten to a single list of URLs
-all_urls = list(ds_name_to_urls.values())
-all_urls = [item for sublist in all_urls for item in sublist]
+url_to_target_file = {}
-# Convert Azure URLs to GCP URLs if necessary
-if image_download_source != 'azure':
-    assert image_download_source == 'gcp'
-    all_urls = [azure_url_to_gcp_http_url(url) for url in all_urls]
+for url in urls_to_download:
+    assert url.startswith(base_url)
+    target_fn_relative = url.replace(base_url,'')
+    target_fn_abs = os.path.join(output_dir,target_fn_relative)
+    url_to_target_file[url] = target_fn_abs
-print('Downloading {} images on {} workers'.format(len(all_urls),n_download_threads))
-if n_download_threads <= 1:
+#%% Download image files
-    results = []
-    # url = all_urls[0]
-    for url in tqdm(all_urls):
-        results.append(download_relative_filename(url,output_dir,url_base=None))
+from md_utils.url_utils import parallel_download_urls
+download_results = parallel_download_urls(url_to_target_file=url_to_target_file,
+                                          verbose=False,
+                                          overwrite=False,
+                                          n_workers=n_download_threads,
+                                          pool_type='thread')
+#%% Scrap
+if False:
-else:
+    pass
+    #%% Find all the reptiles on LILA
-    pool = ThreadPool(n_download_threads)
-    results = list(tqdm(pool.imap(lambda s: download_relative_filename(
-        s,output_dir,url_base=None),
-        all_urls), total=len(all_urls)))
+    reptile_rows = df.loc[df['class'] == 'reptilia']
+    # i_row = 0; row = reptile_rows.iloc[i_row]
+    common_name_to_count = defaultdict(int)
+    dataset_to_count = defaultdict(int)
+    for i_row,row in reptile_rows.iterrows():
+        common_name_to_count[row['common_name']] += 1
+        dataset_to_count[row['dataset_name']] += 1
+    from md_utils.ct_utils import sort_dictionary_by_value
+    print('Found {} reptiles\n'.format(len(reptile_rows)))
+    common_name_to_count = sort_dictionary_by_value(common_name_to_count,reverse=True)
+    dataset_to_count = sort_dictionary_by_value(dataset_to_count,reverse=True)
+    print('Common names by count:\n')
+    for k in common_name_to_count:
+        print('{} ({})'.format(k,common_name_to_count[k]))
+    print('\nDatasets by count:\n')
+    for k in dataset_to_count:
+        print('{} ({})'.format(k,dataset_to_count[k]))

data_management/lila/generate_lila_per_image_labels.py CHANGED Viewed

@@ -1,19 +1,19 @@
-########
-#
-# generate_lila_per_image_labels.py
-#
-# Generate a .csv file with one row per annotation, containing full URLs to every
-# camera trap image on LILA, with taxonomically expanded labels.
-#
-# Typically there will be one row per image, though images with multiple annotations
-# will have multiple rows.
-#
-# Some images may not physically exist, particularly images that are labeled as "human".
-# This script does not validate image URLs.
-#
-# Does not include bounding box annotations.
-#
-########
+"""
+generate_lila_per_image_labels.py
+Generate a .csv file with one row per annotation, containing full URLs to every
+camera trap image on LILA, with taxonomically expanded labels.
+Typically there will be one row per image, though images with multiple annotations
+will have multiple rows.
+Some images may not physically exist, particularly images that are labeled as "human".
+This script does not validate image URLs.
+Does not include bounding box annotations.
+"""
 #%% Constants and imports
@@ -23,8 +23,6 @@ import pandas as pd
 import numpy as np
 import dateparser
 import csv
-import urllib
-import urllib.request
 from collections import defaultdict
 from tqdm import tqdm
@@ -36,7 +34,6 @@ from data_management.lila.lila_common import read_lila_metadata, \
 from md_utils import write_html_image_list
 from md_utils.path_utils import zip_file
 from md_utils.path_utils import open_file
-from md_utils.url_utils import download_url
 # We'll write images, metadata downloads, and temporary files here
 lila_local_base = os.path.expanduser('~/lila')
@@ -107,12 +104,15 @@ for i_row,row in taxonomy_df.iterrows():
 # Takes several hours
-header = ['dataset_name','url','image_id','sequence_id','location_id','frame_num','original_label',\
-          'scientific_name','common_name','datetime','annotation_level']
+# The order of these headers needs to match the order in which fields are added later in this cell;
+# don't mess with this order.
+header = ['dataset_name','url_gcp','url_aws','url_azure',
+          'image_id','sequence_id','location_id','frame_num',
+          'original_label','scientific_name','common_name','datetime','annotation_level']
 taxonomy_levels_to_include = \
     ['kingdom','phylum','subphylum','superclass','class','subclass','infraclass','superorder','order',
-     'suborder','infraorder','superfamily','family','subfamily','tribe','genus','species','subspecies',\
+     'suborder','infraorder','superfamily','family','subfamily','tribe','genus','species','subspecies',
      'variety']
 header.extend(taxonomy_levels_to_include)
@@ -179,10 +179,17 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
                 break
             file_name = im['file_name'].replace('\\','/')
-            base_url = metadata_table[ds_name]['image_base_url']
-            assert not base_url.endswith('/')
-            url = base_url + '/' + file_name
+            base_url_gcp = metadata_table[ds_name]['image_base_url_gcp']
+            base_url_aws = metadata_table[ds_name]['image_base_url_aws']
+            base_url_azure = metadata_table[ds_name]['image_base_url_azure']
+            assert not base_url_gcp.endswith('/')
+            assert not base_url_aws.endswith('/')
+            assert not base_url_azure.endswith('/')
+            url_gcp = base_url_gcp + '/' + file_name
+            url_aws = base_url_aws + '/' + file_name
+            url_azure = base_url_azure + '/' + file_name
             for k in im.keys():
                 if ('date' in k or 'time' in k) and (k not in ['datetime','date_captured']):
                     raise ValueError('Unrecognized datetime field')
@@ -297,7 +304,9 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
                 row = []
                 row.append(ds_name)
-                row.append(url)
+                row.append(url_gcp)
+                row.append(url_aws)
+                row.append(url_azure)
                 row.append(image_id)
                 row.append(sequence_id)
                 row.append(location_id)
@@ -338,7 +347,7 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
 # ...with open()
-print('Processed {} datsets'.format(len(metadata_table)))
+print('Processed {} datasets'.format(len(metadata_table)))
 #%% Read the .csv back
@@ -365,7 +374,8 @@ dataset_name_to_locations = defaultdict(set)
 def check_row(row):
     assert row['dataset_name'] in metadata_table.keys()
-    assert row['url'].startswith('https://')
+    for url_column in ['url_gcp','url_aws','url_azure']:
+        assert row[url_column].startswith('https://') or row[url_column].startswith('http://')
     assert ' : ' in row['image_id']
     assert 'seq' not in row['location_id'].lower()
     assert row['annotation_level'] in valid_annotation_levels
@@ -446,28 +456,31 @@ for ds_name in metadata_table.keys():
 print('Selected {} total images'.format(len(images_to_download)))
-#%% Download images
+#%% Download images (prep)
 # Expect a few errors for images with human or vehicle labels (or things like "ignore" that *could* be humans)
-# TODO: trivially parallelizable
-#
+preferred_cloud = 'aws'
+url_to_target_file = {}
 # i_image = 10; image = images_to_download[i_image]
 for i_image,image in tqdm(enumerate(images_to_download),total=len(images_to_download)):
-    url = image['url']
+    url = image['url_' + preferred_cloud]
     ext = os.path.splitext(url)[1]
-    image_file = os.path.join(preview_folder,'image_{}'.format(str(i_image).zfill(4)) + ext)
-    relative_file = os.path.relpath(image_file,preview_folder)
-    try:
-        download_url(url,image_file,verbose=False)
-        image['relative_file'] = relative_file
-    except urllib.error.HTTPError:
-        print('Image {} does not exist ({}:{})'.format(
-            i_image,image['dataset_name'],image['original_label']))
-        image['relative_file'] = None
+    fn_relative = 'image_{}'.format(str(i_image).zfill(4)) + ext
+    fn_abs = os.path.join(preview_folder,fn_relative)
+    image['relative_file'] = fn_relative
+    image['url'] = url
+    url_to_target_file[url] = fn_abs
+#%% Download images (execution)
-# ...for each image we need to download
+from md_utils.url_utils import parallel_download_urls
+download_results = parallel_download_urls(url_to_target_file,verbose=False,overwrite=True,
+                                          n_workers=20,pool_type='thread')
 #%% Write preview HTML
@@ -499,4 +512,4 @@ open_file(html_filename)
 zipped_output_file = zip_file(output_file,verbose=True)
-print('Zipped {} to {}'.format(output_file,zipped_output_file))
+print('Zipped {} to {}'.format(output_file,zipped_output_file))

data_management/lila/get_lila_annotation_counts.py CHANGED Viewed

@@ -1,16 +1,16 @@
-########
-#
-# get_lila_annotation_counts.py
-#
-# Generates a .json-formatted dictionary mapping each LILA dataset to all categories
-# that exist for that dataset, with counts for the number of occurrences of each category
-# (the number of *annotations* for each category, not the number of *images*).
-#
-# Also loads the taxonomy mapping file, to include scientific names for each category.
-#
-# get_lila_image_counts.py counts the number of *images* for each category in each dataset.
-#
-########
+"""
+get_lila_annotation_counts.py
+Generates a .json-formatted dictionary mapping each LILA dataset to all categories
+that exist for that dataset, with counts for the number of occurrences of each category
+(the number of *annotations* for each category, not the number of *images*).
+Also loads the taxonomy mapping file, to include scientific names for each category.
+get_lila_image_counts.py counts the number of *images* for each category in each dataset.
+"""
 #%% Constants and imports
@@ -20,6 +20,9 @@ import os
 from data_management.lila.lila_common import read_lila_metadata,\
     read_metadata_file_for_dataset, read_lila_taxonomy_mapping
+# cloud provider to use for downloading images; options are 'gcp', 'azure', or 'aws'
+preferred_cloud = 'gcp'
 # array to fill for output
 category_list = []
@@ -96,9 +99,9 @@ for ds_name in metadata_table.keys():
         print('Warning: taxonomy mapping not available for {}'.format(ds_name))
     print('Finding categories in {}'.format(ds_name))
     json_filename = metadata_table[ds_name]['json_filename']
-    base_url = metadata_table[ds_name]['image_base_url']
+    base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
     assert not base_url.endswith('/')
     # Open the metadata file

data_management/lila/get_lila_image_counts.py CHANGED Viewed

@@ -1,14 +1,14 @@
-########
-#
-# get_lila_image_counts.py
-#
-# Count the number of images and bounding boxes with each label in one or more LILA datasets.
-#
-# This script doesn't write these counts out anywhere other than the console, it's just intended
-# as a template for doing operations like this on LILA data.  get_lila_annotation_counts.py writes
-# information out to a .json file, but it counts *annotations*, not *images*, for each category.
-#
-########
+"""
+get_lila_image_counts.py
+Count the number of images and bounding boxes with each label in one or more LILA datasets.
+This script doesn't write these counts out anywhere other than the console, it's just intended
+as a template for doing operations like this on LILA data.  get_lila_annotation_counts.py writes
+information out to a .json file, but it counts *annotations*, not *images*, for each category.
+"""
 #%% Constants and imports

megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl