PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/data_management/importers/zamba_results_to_md_results.py DELETED Viewed

@@ -1,180 +0,0 @@
-"""
- zamba_results_to_md_results.py
- Convert a labels.csv file produced by Zamba Cloud to a MD results file suitable
- for import into Timelapse.
- Columns are expected to be:
- video_uuid (not used)
- original_filename (assumed to be a relative path name)
- top_k_label,top_k_probability, for k = 1..N
- [category name 1],[category name 2],...
- corrected_label
- Because the MD results file fundamentally stores detections, what we'll
- actually do is create bogus detections that fill the entire image.
- There is no special handling of empty/blank categories; because these results are
- based on a classifier, rather than a detector (where "blank" would be the absence of
- all other categories), "blank" can be queried in Timelapse just like any other class.
-"""
-#%% Imports and constants
-import pandas as pd
-import json
-#%% Main function
-def zamba_results_to_md_results(input_file,output_file=None):
-    """
-    Converts the .csv file [input_file] to the MD-formatted .json file [output_file].
-    If [output_file] is None, '.json' will be appended to the input file.
-    """
-    if output_file is None:
-        output_file = input_file + '.json'
-    df = pd.read_csv(input_file)
-    expected_columns = ('video_uuid','corrected_label','original_filename')
-    for s in expected_columns:
-        assert s in df.columns,\
-            'Expected column {} not found, are you sure this is a Zamba results .csv file?'.format(
-                s)
-    # How many results are included per file?
-    assert 'top_1_probability' in df.columns and 'top_1_label' in df.columns
-    top_k = 2
-    while(True):
-        p_string = 'top_' + str(top_k) + '_probability'
-        label_string = 'top_' + str(top_k) + '_label'
-        if p_string in df.columns:
-            assert label_string in df.columns,\
-                'Oops, {} is a column but {} is not'.format(
-                    p_string,label_string)
-            top_k += 1
-            continue
-        else:
-            assert label_string not in df.columns,\
-                'Oops, {} is a column but {} is not'.format(
-                    label_string,p_string)
-            top_k -= 1
-            break
-    print('Found {} probability column pairs'.format(top_k))
-    # Category names start after the fixed columns and the probability columns
-    category_names = []
-    column_names = list(df.columns)
-    first_category_name_index = 0
-    while('top_' in column_names[first_category_name_index] or \
-          column_names[first_category_name_index] in expected_columns):
-        first_category_name_index += 1
-    i_column = first_category_name_index
-    while( (i_column < len(column_names)) and (column_names[i_column] != 'corrected_label') ):
-        category_names.append(column_names[i_column])
-        i_column += 1
-    print('Found {} categories:\n'.format(len(category_names)))
-    for s in category_names:
-        print(s)
-    info = {}
-    info['format_version'] = '1.3'
-    info['detector'] = 'Zamba Cloud'
-    info['classifier'] = 'Zamba Cloud'
-    detection_category_id_to_name = {}
-    for category_id,category_name in enumerate(category_names):
-        detection_category_id_to_name[str(category_id)] = category_name
-    detection_category_name_to_id = {v: k for k, v in detection_category_id_to_name.items()}
-    images = []
-    # i_row = 0; row = df.iloc[i_row]
-    for i_row,row in df.iterrows():
-        im = {}
-        images.append(im)
-        im['file'] = row['original_filename']
-        detections = []
-        # k = 1
-        for k in range(1,top_k+1):
-            label = row['top_{}_label'.format(k)]
-            confidence = row['top_{}_probability'.format(k)]
-            det = {}
-            det['category'] = detection_category_name_to_id[label]
-            det['conf'] = confidence
-            det['bbox'] = [0,0,1.0,1.0]
-            detections.append(det)
-        im['detections'] = detections
-    # ...for each row
-    results = {}
-    results['info'] = info
-    results['detection_categories'] = detection_category_id_to_name
-    results['images'] = images
-    with open(output_file,'w') as f:
-        json.dump(results,f,indent=1)
-# ...zamba_results_to_md_results(...)
-#%% Interactive driver
-if False:
-    pass
-    #%%
-    input_file = r"G:\temp\labels-job-b95a4b76-e332-4e17-ab40-03469392d36a-2023-11-04_16-28-50.060130.csv"
-    output_file = None
-    zamba_results_to_md_results(input_file,output_file)
-#%% Command-line driver
-import sys,argparse
-def main():
-    parser = argparse.ArgumentParser(
-        description='Convert a Zamba-formatted .csv results file to a MD-formatted .json results file')
-    parser.add_argument(
-        'input_file',
-        type=str,
-        help='input .csv file')
-    parser.add_argument(
-        '--output_file',
-        type=str,
-        default=None,
-        help='output .json file (defaults to input file appended with ".json")')
-    if len(sys.argv[1:]) == 0:
-        parser.print_help()
-        parser.exit()
-    args = parser.parse_args()
-    zamba_results_to_md_results(args.input_file,args.output_file)
-if __name__ == '__main__':
-    main()

megadetector/data_management/lila/add_locations_to_island_camera_traps.py DELETED Viewed

@@ -1,101 +0,0 @@
-"""
-add_locations_to_island_camera_traps.py
-The Island Conservation Camera Traps dataset had unique camera identifiers embedded
-in filenames, but not in the proper metadata fields.  This script copies that information
-to metadata.
-"""
-#%% Imports and constants
-import os
-import json
-from tqdm import tqdm
-input_fn = os.path.expanduser('~/lila/metadata/island_conservation.json')
-output_fn = os.path.expanduser('~/tmp/island_conservation.json')
-preview_folder = os.path.expanduser('~/tmp/island_conservation_preview')
-image_directory = os.path.expanduser('~/data/icct/public/')
-#%% Prevent imports during testing
-if False:
-    #%% Read input file
-    with open(input_fn,'r') as f:
-        d = json.load(f)
-    d['info']
-    d['info']['version'] = '1.01'
-    #%% Find locations
-    images = d['images']
-    locations = set()
-    for i_image,im in tqdm(enumerate(images),total=len(images)):
-        tokens_fn = im['file_name'].split('/')
-        tokens_id = im['id'].split('_')
-        assert tokens_fn[0] == tokens_id[0]
-        assert tokens_fn[1] == tokens_id[1]
-        location = tokens_fn[0] + '_' + tokens_fn[1]
-        im['location'] = location
-        locations.add(location)
-    locations = sorted(list(locations))
-    for s in locations:
-        print(s)
-    #%% Write output file
-    with open(output_fn,'w') as f:
-        json.dump(d,f,indent=1)
-    #%% Validate .json files
-    from megadetector.data_management.databases import integrity_check_json_db
-    options = integrity_check_json_db.IntegrityCheckOptions()
-    options.baseDir = image_directory
-    options.bCheckImageSizes = False
-    options.bCheckImageExistence = True
-    options.bFindUnusedImages = True
-    sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_fn, options)
-    #%% Preview labels
-    from megadetector.visualization import visualize_db
-    viz_options = visualize_db.DbVizOptions()
-    viz_options.num_to_visualize = 2000
-    viz_options.trim_to_images_with_bboxes = False
-    viz_options.add_search_links = False
-    viz_options.sort_by_filename = False
-    viz_options.parallelize_rendering = True
-    viz_options.classes_to_exclude = ['test']
-    html_output_file, image_db = visualize_db.visualize_db(db_path=output_fn,
-                                                             output_dir=preview_folder,
-                                                             image_base_dir=image_directory,
-                                                             options=viz_options)
-    from megadetector.utils import path_utils
-    path_utils.open_file(html_output_file)
-    #%% Zip output file
-    from megadetector.utils.path_utils import zip_file
-    zip_file(output_fn, verbose=True)
-    assert os.path.isfile(output_fn + '.zip')

megadetector/data_management/lila/add_locations_to_nacti.py DELETED Viewed

@@ -1,151 +0,0 @@
-"""
-add_locations_to_nacti.py
-As of 10.2023, NACTI metadata only has very coarse location information (e.g. "Florida"),
-but camera IDs are embedded in filenames.  This script pulls that information from filenames
-and adds it to metadata.
-"""
-#%% Imports and constants
-import os
-import json
-import shutil
-from tqdm import tqdm
-from collections import defaultdict
-input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
-output_file = r'g:\temp\nacti_metadata.1.14.json'
-#%% Prevent execution during testing
-if False:
-    #%% Read metadata
-    with open(input_file,'r') as f:
-        d = json.load(f)
-    assert d['info']['version'] == 1.13
-    #%% Map images to locations (according to the metadata)
-    file_name_to_original_location = {}
-    # im = dataset_labels['images'][0]
-    for im in tqdm(d['images']):
-        file_name_to_original_location[im['file_name']] = im['location']
-    original_locations = set(file_name_to_original_location.values())
-    print('Found {} locations in the original metadata:'.format(len(original_locations)))
-    for loc in original_locations:
-        print('[{}]'.format(loc))
-    #%% Map images to new locations
-    def path_to_location(relative_path):
-        relative_path = relative_path.replace('\\','/')
-        if relative_path in file_name_to_original_location:
-            location_name = file_name_to_original_location[relative_path]
-            if location_name == 'San Juan Mntns, Colorado':
-                # "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
-                tokens = relative_path.split('/')[-1].split('_')
-                assert tokens[1].startswith('Unit')
-                location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
-            elif location_name == 'Lebec, California':
-                # "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
-                tokens = relative_path.split('/')[-1].split('_')
-                assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
-                location_name = 'lebec_{}'.format(tokens[0])
-            elif location_name == 'Archbold, FL':
-                # "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
-                tokens = relative_path.split('/')[-1].split('_')
-                assert tokens[0].startswith('FL-')
-                location_name = 'archbold_{}'.format(tokens[0])
-            else:
-                assert location_name == ''
-                tokens = relative_path.split('/')[-1].split('_')
-                if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
-                    location_name = '{}'.format(tokens[0])
-        else:
-            location_name = 'unknown'
-        # print('Returning location {} for file {}'.format(location_name,relative_path))
-        return location_name
-    file_name_to_updated_location = {}
-    updated_location_to_count = defaultdict(int)
-    for im in tqdm(d['images']):
-        updated_location = path_to_location(im['file_name'])
-        file_name_to_updated_location[im['file_name']] = updated_location
-        updated_location_to_count[updated_location] += 1
-    updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
-                             key=lambda item: item[1],
-                             reverse=True)}
-    updated_locations = set(file_name_to_updated_location.values())
-    print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
-    for loc in updated_location_to_count:
-        print('{}: {}'.format(loc,updated_location_to_count[loc]))
-    #%% Re-write metadata
-    for im in d['images']:
-        im['location'] = file_name_to_updated_location[im['file_name']]
-    d['info']['version'] = 1.14
-    with open(output_file,'w') as f:
-        json.dump(d,f,indent=1)
-    #%% For each location, sample some random images to make sure they look consistent
-    input_base = r'd:\lila\nacti-unzipped'
-    assert os.path.isdir(input_base)
-    location_to_images = defaultdict(list)
-    for im in d['images']:
-        location_to_images[im['location']].append(im)
-    n_to_sample = 10
-    import random
-    random.seed(0)
-    sampling_folder_base = r'g:\temp\nacti_samples'
-    for location in tqdm(location_to_images):
-        images_this_location = location_to_images[location]
-        if len(images_this_location) > n_to_sample:
-            images_this_location = random.sample(images_this_location,n_to_sample)
-        for i_image,im in enumerate(images_this_location):
-            fn_relative = im['file_name']
-            source_fn_abs = os.path.join(input_base,fn_relative)
-            assert os.path.isfile(source_fn_abs)
-            ext = os.path.splitext(fn_relative)[1]
-            target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
-                location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
-            os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
-            shutil.copyfile(source_fn_abs,target_fn_abs)
-        # ...for each image
-    # ...for each location

megadetector/utils/azure_utils.py DELETED Viewed

@@ -1,178 +0,0 @@
-"""
-azure_utils.py
-Miscellaneous Azure Blob Storage utilities
-Requires azure-storage-blob>=12.4.0
-"""
-#%% Imports
-import json
-from typing import Any, Iterable, List, Optional, Tuple, Union
-from azure.storage.blob import BlobPrefix, ContainerClient
-from megadetector.utils import path_utils
-from megadetector.utils import sas_blob_utils
-#%% Functions
-def walk_container(container_client: ContainerClient,
-                   max_depth: int = -1,
-                   prefix: str = '',
-                   store_folders: bool = True,
-                   store_blobs: bool = True,
-                   debug_max_items: int = -1) -> Tuple[List[str], List[str]]:
-    """
-    Recursively walk folders a Azure Blob Storage container.
-    Based on:
-    https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/storage/azure-storage-blob/samples/blob_samples_walk_blob_hierarchy.py
-    """
-    depth = 1
-    def walk_blob_hierarchy(prefix: str,
-                            folders: Optional[List[str]] = None,
-                            blobs: Optional[List[str]] = None
-                            ) -> Tuple[List[str], List[str]]:
-        if folders is None:
-            folders = []
-        if blobs is None:
-            blobs = []
-        nonlocal depth
-        if 0 < max_depth < depth:
-            return folders, blobs
-        for item in container_client.walk_blobs(name_starts_with=prefix):
-            short_name = item.name[len(prefix):]
-            if isinstance(item, BlobPrefix):
-                # print('F: ' + prefix + short_name)
-                if store_folders:
-                    folders.append(prefix + short_name)
-                depth += 1
-                walk_blob_hierarchy(item.name, folders=folders, blobs=blobs)
-                if (debug_max_items > 0
-                        and len(folders) + len(blobs) > debug_max_items):
-                    return folders, blobs
-                depth -= 1
-            else:
-                if store_blobs:
-                    blobs.append(prefix + short_name)
-        return folders, blobs
-    folders, blobs = walk_blob_hierarchy(prefix=prefix)
-    assert all(s.endswith('/') for s in folders)
-    folders = [s.strip('/') for s in folders]
-    return folders, blobs
-def list_top_level_blob_folders(container_client: ContainerClient) -> List[str]:
-    """
-    List all top-level folders in a container.
-    """
-    top_level_folders, _ = walk_container(
-        container_client, max_depth=1, store_blobs=False)
-    return top_level_folders
-def concatenate_json_lists(input_files: Iterable[str],
-                           output_file: Optional[str] = None
-                           ) -> List[Any]:
-    """
-    Given a list of JSON files that contain lists (typically string
-    filenames), concatenates the lists into a single list and optionally
-    writes out this list to a new output JSON file.
-    """
-    output_list = []
-    for fn in input_files:
-        with open(fn, 'r') as f:
-            file_list = json.load(f)
-        output_list.extend(file_list)
-    if output_file is not None:
-        with open(output_file, 'w') as f:
-            json.dump(output_list, f, indent=1)
-    return output_list
-def upload_file_to_blob(account_name: str,
-                        container_name: str,
-                        local_path: str,
-                        blob_name: str,
-                        sas_token: str,
-                        overwrite: bool=False) -> str:
-    """
-    Uploads a local file to Azure Blob Storage and returns the uploaded
-    blob URI with SAS token.
-    """
-    container_uri = sas_blob_utils.build_azure_storage_uri(
-        account=account_name, container=container_name, sas_token=sas_token)
-    with open(local_path, 'rb') as data:
-        return sas_blob_utils.upload_blob(
-            container_uri=container_uri, blob_name=blob_name, data=data,
-            overwrite=overwrite)
-def enumerate_blobs_to_file(
-        output_file: str,
-        account_name: str,
-        container_name: str,
-        sas_token: Optional[str] = None,
-        blob_prefix: Optional[str] = None,
-        blob_suffix: Optional[Union[str, Tuple[str]]] = None,
-        rsearch: Optional[str] = None,
-        limit: Optional[int] = None,
-        verbose: Optional[bool] = True
-        ) -> List[str]:
-    """
-    Enumerates blobs in a container, and writes the blob names to an output
-    file.
-    Args:
-        output_file: str, path to save list of files in container
-            If ends in '.json', writes a JSON string. Otherwise, writes a
-            newline-delimited list. Can be None, in which case this is just a
-            convenient wrapper for blob enumeration.
-        account_name: str, Azure Storage account name
-        container_name: str, Azure Blob Storage container name
-        sas_token: optional str, container SAS token, leading ? will be removed if present.
-        blob_prefix: optional str, returned results will only contain blob names
-            to with this prefix
-        blob_suffix: optional str or tuple of str, returned results will only
-            contain blob names with this/these suffix(es). The blob names will
-            be lowercased first before comparing with the suffix(es).
-        rsearch: optional str, returned results will only contain blob names
-            that match this regex. Can also be a list of regexes, in which case
-            blobs matching *any* of the regex's will be returned.
-        limit: int, maximum # of blob names to list
-            if None, then returns all blob names
-    Returns: list of str, sorted blob names, of length limit or shorter.
-    """
-    if sas_token is not None and len(sas_token) > 9 and sas_token[0] == '?':
-        sas_token = sas_token[1:]
-    container_uri = sas_blob_utils.build_azure_storage_uri(
-        account=account_name, container=container_name, sas_token=sas_token)
-    matched_blobs = sas_blob_utils.list_blobs_in_container(
-        container_uri=container_uri, blob_prefix=blob_prefix,
-        blob_suffix=blob_suffix, rsearch=rsearch, limit=limit, verbose=verbose)
-    if output_file is not None:
-        path_utils.write_list_to_file(output_file, matched_blobs)
-    return matched_blobs

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl