PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/data_management/lila/get_lila_annotation_counts.py CHANGED Viewed

@@ -3,7 +3,7 @@
 get_lila_annotation_counts.py
 Generates a .json-formatted dictionary mapping each LILA dataset to all categories
-that exist for that dataset, with counts for the number of occurrences of each category
+that exist for that dataset, with counts for the number of occurrences of each category
 (the number of *annotations* for each category, not the number of *images*).
 Also loads the taxonomy mapping file, to include scientific names for each category.
@@ -17,8 +17,11 @@ get_lila_image_counts.py counts the number of *images* for each category in each
 import json
 import os
+from collections import defaultdict
 from megadetector.data_management.lila.lila_common import \
     read_lila_metadata, read_metadata_file_for_dataset, read_lila_taxonomy_mapping
+from megadetector.utils import ct_utils
 # cloud provider to use for downloading images; options are 'gcp', 'azure', or 'aws'
 preferred_cloud = 'gcp'
@@ -53,21 +56,21 @@ datasets_with_taxonomy_mapping = set()
 # i_row = 1; row = taxonomy_df.iloc[i_row]; row
 for i_row,row in taxonomy_df.iterrows():
     datasets_with_taxonomy_mapping.add(row['dataset_name'])
     ds_query = row['dataset_name'] + ':' + row['query']
     ds_query = ds_query.lower()
     if not isinstance(row['scientific_name'],str):
         unmapped_queries.add(ds_query)
         ds_query_to_scientific_name[ds_query] = 'unmapped'
         continue
     ds_query_to_scientific_name[ds_query] = row['scientific_name']
 print('Loaded taxonomy mappings for {} datasets'.format(len(datasets_with_taxonomy_mapping)))
 #%% Download and parse the metadata file
@@ -78,55 +81,55 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
 #%% Download and extract metadata for each dataset
-for ds_name in metadata_table.keys():
-    metadata_table[ds_name]['json_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
-                                                                         metadata_dir=metadata_dir,
-                                                                         metadata_table=metadata_table)
+for ds_name in metadata_table.keys():
+    metadata_table[ds_name]['json_filename'] = \
+        read_metadata_file_for_dataset(ds_name=ds_name,
+                                       metadata_dir=metadata_dir,
+                                       metadata_table=metadata_table,
+                                       preferred_cloud=preferred_cloud)
 #%% Get category names and counts for each dataset
 # Takes ~5 minutes
-from collections import defaultdict
 dataset_to_categories = {}
 # ds_name = 'NACTI'
 for ds_name in metadata_table.keys():
     taxonomy_mapping_available = (ds_name in datasets_with_taxonomy_mapping)
     if not taxonomy_mapping_available:
         print('Warning: taxonomy mapping not available for {}'.format(ds_name))
     print('Finding categories in {}'.format(ds_name))
     json_filename = metadata_table[ds_name]['json_filename']
     base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
     assert not base_url.endswith('/')
-    # Open the metadata file
+    # Open the metadata file
     with open(json_filename, 'r') as f:
         data = json.load(f)
     # Collect list of categories and mappings to category name
     categories = data['categories']
     category_id_to_count = defaultdict(int)
-    annotations = data['annotations']
+    annotations = data['annotations']
     # ann = annotations[0]
     for ann in annotations:
         category_id_to_count[ann['category_id']] = category_id_to_count[ann['category_id']] + 1
     # c = categories[0]
     for c in categories:
-       count = category_id_to_count[c['id']]
+       count = category_id_to_count[c['id']]
        if 'count' in c:
-           assert 'bbox' in ds_name or c['count'] == count
+           assert 'bbox' in ds_name or c['count'] == count
        c['count'] = count
        # Don't do taxonomy mapping for bbox data sets, which are sometimes just binary and are
        # always redundant with the class-level data sets.
        if 'bbox' in ds_name:
@@ -144,7 +147,7 @@ for ds_name in metadata_table.keys():
                sn = ds_query_to_scientific_name[taxonomy_query_string]
                assert sn is not None and len(sn) > 0
                c['scientific_name_from_taxonomy_mapping'] = sn
     dataset_to_categories[ds_name] = categories
 # ...for each dataset
@@ -154,19 +157,18 @@ for ds_name in metadata_table.keys():
 # ds_name = list(dataset_to_categories.keys())[0]
 for ds_name in dataset_to_categories:
     print('\n** Category counts for {} **\n'.format(ds_name))
     categories = dataset_to_categories[ds_name]
     categories = sorted(categories, key=lambda x: x['count'], reverse=True)
     for c in categories:
         print('{} ({}): {}'.format(c['name'],c['scientific_name_from_taxonomy_mapping'],c['count']))
 # ...for each dataset
 #%% Save the results
-with open(output_file, 'w') as f:
-    json.dump(dataset_to_categories,f,indent=1)
+ct_utils.write_json(output_file, dataset_to_categories)

megadetector/data_management/lila/get_lila_image_counts.py CHANGED Viewed

@@ -5,7 +5,7 @@ get_lila_image_counts.py
 Count the number of images and bounding boxes with each label in one or more LILA datasets.
 This script doesn't write these counts out anywhere other than the console, it's just intended
-as a template for doing operations like this on LILA data.  get_lila_annotation_counts.py writes
+as a template for doing operations like this on LILA data.  get_lila_annotation_counts.py writes
 information out to a .json file, but it counts *annotations*, not *images*, for each category.
 """
@@ -40,53 +40,53 @@ metadata_table = read_lila_metadata(metadata_dir)
 if datasets_of_interest is None:
     datasets_of_interest = list(metadata_table.keys())
-for ds_name in datasets_of_interest:
+for ds_name in datasets_of_interest:
     metadata_table[ds_name]['json_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
                                                                          metadata_dir=metadata_dir,
                                                                          metadata_table=metadata_table)
 #%% Count categories
 ds_name_to_category_counts = {}
 # ds_name = datasets_of_interest[0]
 for ds_name in datasets_of_interest:
     category_to_image_count = {}
     category_to_bbox_count = {}
     print('Counting categories in: ' + ds_name)
     json_filename = metadata_table[ds_name]['json_filename']
     with open(json_filename, 'r') as f:
         data = json.load(f)
     categories = data['categories']
     category_ids = [c['id'] for c in categories]
     for c in categories:
         category_id_to_name = {c['id']:c['name'] for c in categories}
     annotations = data['annotations']
     images = data['images']
-    for category_id in category_ids:
-        category_name = category_id_to_name[category_id]
+    for category_id in category_ids:
+        category_name = category_id_to_name[category_id]
         category_to_image_count[category_name] = 0
         category_to_bbox_count[category_name] = 0
     image_id_to_category_names = defaultdict(set)
     # Go through annotations, marking each image with the categories that are present
     #
     # ann = annotations[0]
     for ann in annotations:
         category_name = category_id_to_name[ann['category_id']]
         image_id_to_category_names[ann['image_id']].add(category_name)
     # Now go through images and count categories
     category_to_count = defaultdict(int)
     # im = images[0]
     for im in images:
         categories_this_image = image_id_to_category_names[im['id']]
@@ -94,19 +94,19 @@ for ds_name in datasets_of_interest:
             category_to_count[category_name] += 1
     ds_name_to_category_counts[ds_name] = category_to_count
 # ...for each dataset
 #%% Print the results
 for ds_name in ds_name_to_category_counts:
     print('\n** Category counts for {} **\n'.format(ds_name))
     category_to_count = ds_name_to_category_counts[ds_name]
-    category_to_count = {k: v for k, v in sorted(category_to_count.items(), reverse=True,
+    category_to_count = {k: v for k, v in sorted(category_to_count.items(), reverse=True,
                                                  key=lambda item: item[1])}
-    for category_name in category_to_count.keys():
+    for category_name in category_to_count.keys():
         print('{}: {}'.format(category_name,category_to_count[category_name]))

megadetector/data_management/lila/lila_common.py CHANGED Viewed

@@ -53,30 +53,30 @@ for url in lila_base_urls.values():
 def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
     """
     Reads the WI taxonomy mapping file, downloading the .json data (and writing to .csv) if necessary.
     Args:
         metadata_dir (str): folder to use for temporary LILA metadata files
-        force_download (bool, optional): download the taxonomy mapping file
+        force_download (bool, optional): download the taxonomy mapping file
             even if the local file exists.
     Returns:
         pd.dataframe: A DataFrame with taxonomy information
     """
     wi_taxonomy_csv_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_csv_filename)
     if os.path.exists(wi_taxonomy_csv_path):
         df = pd.read_csv(wi_taxonomy_csv_path)
     else:
         wi_taxonomy_json_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_json_filename)
-        download_url(wildlife_insights_taxonomy_url, wi_taxonomy_json_path,
+        download_url(wildlife_insights_taxonomy_url, wi_taxonomy_json_path,
                      force_download=force_download)
         with open(wi_taxonomy_json_path,'r') as f:
             d = json.load(f)
         # We haven't implemented paging, make sure that's not an issue
         assert d['meta']['totalItems'] < wildlife_insights_page_size
         # d['data'] is a list of items that look like:
         """
          {'id': 2000003,
@@ -92,46 +92,46 @@ def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
         """
         df = pd.DataFrame(d['data'])
         df.to_csv(wi_taxonomy_csv_path,index=False)
     return df
 def read_lila_taxonomy_mapping(metadata_dir, force_download=False):
     """
     Reads the LILA taxonomy mapping file, downloading the .csv file if necessary.
     Args:
         metadata_dir (str): folder to use for temporary LILA metadata files
-        force_download (bool, optional): download the taxonomy mapping file
-            even if the local file exists.
+        force_download (bool, optional): download the taxonomy mapping file
+            even if the local file exists.
     Returns:
         pd.DataFrame: a DataFrame with one row per identification
     """
     p = urlparse(lila_taxonomy_mapping_url)
     taxonomy_filename = os.path.join(metadata_dir,os.path.basename(p.path))
-    download_url(lila_taxonomy_mapping_url, taxonomy_filename,
+    download_url(lila_taxonomy_mapping_url, taxonomy_filename,
                  force_download=force_download)
     df = pd.read_csv(lila_taxonomy_mapping_url)
     return df
 def read_lila_metadata(metadata_dir, force_download=False):
     """
     Reads LILA metadata (URLs to each dataset), downloading the .csv file if necessary.
     Args:
         metadata_dir (str): folder to use for temporary LILA metadata files
-        force_download (bool, optional): download the metadata file even if
+        force_download (bool, optional): download the metadata file even if
             the local file exists.
     Returns:
         dict: a dict mapping dataset names (e.g. "Caltech Camera Traps") to dicts
         with keys corresponding to the headers in the .csv file, currently:
         - name
         - short_name
         - continent
@@ -153,65 +153,65 @@ def read_lila_metadata(metadata_dir, force_download=False):
         - md_results_with_rde
         - json_filename
     """
     # Put the master metadata file in the same folder where we're putting images
     p = urlparse(lila_metadata_url)
     metadata_filename = os.path.join(metadata_dir,os.path.basename(p.path))
     download_url(lila_metadata_url, metadata_filename, force_download=force_download)
     df = pd.read_csv(metadata_filename)
     records = df.to_dict('records')
     # Parse into a table keyed by dataset name
     metadata_table = {}
     # r = records[0]
     for r in records:
         if is_empty(r['name']):
             continue
         # Convert NaN's to None
         for k in r.keys():
             if is_empty(r[k]):
                 r[k] = None
         metadata_table[r['name']] = r
-    return metadata_table
+    return metadata_table
 def read_lila_all_images_file(metadata_dir, force_download=False):
     """
     Downloads if necessary - then unzips if necessary - the .csv file with label mappings for
     all LILA files, and opens the resulting .csv file as a Pandas DataFrame.
     Args:
         metadata_dir (str): folder to use for temporary LILA metadata files
-        force_download (bool, optional): download the metadata file even if
+        force_download (bool, optional): download the metadata file even if
             the local file exists.
     Returns:
         pd.DataFrame: a DataFrame containing one row per identification in a LILA camera trap image
     """
     p = urlparse(lila_all_images_url)
     lila_all_images_zip_filename = os.path.join(metadata_dir,os.path.basename(p.path))
     download_url(lila_all_images_url, lila_all_images_zip_filename,
                  force_download=force_download)
     with zipfile.ZipFile(lila_all_images_zip_filename,'r') as z:
         files = z.namelist()
     assert len(files) == 1
     unzipped_csv_filename = os.path.join(metadata_dir,files[0])
     if not os.path.isfile(unzipped_csv_filename):
         unzip_file(lila_all_images_zip_filename,metadata_dir)
     else:
-        print('{} already unzipped'.format(unzipped_csv_filename))
+        print('{} already unzipped'.format(unzipped_csv_filename))
     df = pd.read_csv(unzipped_csv_filename)
     return df
@@ -223,94 +223,97 @@ def read_metadata_file_for_dataset(ds_name,
                                    force_download=False):
     """
     Downloads if necessary - then unzips if necessary - the .json file for a specific dataset.
     Args:
         ds_name (str): the name of the dataset for which you want to retrieve metadata (e.g.
-            "Caltech Camera Traps")
+            "Caltech Camera Traps")
         metadata_dir (str): folder to use for temporary LILA metadata files
         metadata_table (dict, optional): an optional dictionary already loaded via
             read_lila_metadata()
         json_url (str, optional): the URL of the metadata file, if None will be retrieved
             via read_lila_metadata()
         preferred_cloud (str, optional): 'gcp' (default), 'azure', or 'aws'
-        force_download (bool, optional): download the metadata file even if
+        force_download (bool, optional): download the metadata file even if
             the local file exists.
     Returns:
         str: the .json filename on the local disk
     """
+    if preferred_cloud is None:
+        preferred_cloud = 'gcp'
     assert preferred_cloud in lila_base_urls.keys()
     if json_url is None:
         if metadata_table is None:
             metadata_table = read_lila_metadata(metadata_dir)
         json_url = metadata_table[ds_name]['metadata_url_' + preferred_cloud]
     p = urlparse(json_url)
     json_filename = os.path.join(metadata_dir,os.path.basename(p.path))
     download_url(json_url, json_filename, force_download=force_download)
     # Unzip if necessary
     if json_filename.endswith('.zip'):
         with zipfile.ZipFile(json_filename,'r') as z:
             files = z.namelist()
         assert len(files) == 1
         unzipped_json_filename = os.path.join(metadata_dir,files[0])
         if not os.path.isfile(unzipped_json_filename):
-            unzip_file(json_filename,metadata_dir)
+            unzip_file(json_filename,metadata_dir)
         else:
             print('{} already unzipped'.format(unzipped_json_filename))
         json_filename = unzipped_json_filename
     return json_filename
 #%% Interactive test driver
 if False:
     pass
     #%% Verify that all base URLs exist
     # LILA camera trap primary metadata file
     urls = (lila_metadata_url,
             lila_taxonomy_mapping_url,
             lila_all_images_url,
             wildlife_insights_taxonomy_url)
     from megadetector.utils import url_utils
     status_codes = url_utils.test_urls(urls,timeout=2.0)
     assert all([code == 200 for code in status_codes])
     #%% Verify that the metadata URLs exist for individual datasets
     metadata_dir = os.path.expanduser('~/lila/metadata')
     dataset_metadata = read_lila_metadata(metadata_dir)
     urls_to_test = []
     # ds_name = next(iter(dataset_metadata.keys()))
     for ds_name in dataset_metadata.keys():
         ds_info = dataset_metadata[ds_name]
         for cloud_name in lila_base_urls.keys():
             urls_to_test.append(ds_info['metadata_url_' + cloud_name])
-            if ds_info['bbox_url_relative'] != None:
+            if ds_info['bbox_url_relative'] is not None:
                 urls_to_test.append(ds_info['bbox_url_' + cloud_name])
     status_codes = url_utils.test_urls(urls_to_test,
                                        error_on_failure=True,
                                        n_workers=10,
                                        pool_type='process',
                                        timeout=2.0)
     assert all([code == 200 for code in status_codes])

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl