PyPI - megadetector - Versions diffs - 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl - Mend

megadetector 5.0.10py3-none-any.whl → 5.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show

{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
megadetector-5.0.11.dist-info/RECORD +5 -0
megadetector-5.0.11.dist-info/top_level.txt +1 -0
api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -439
api/batch_processing/api_core/server.py +0 -294
api/batch_processing/api_core/server_api_config.py +0 -98
api/batch_processing/api_core/server_app_config.py +0 -55
api/batch_processing/api_core/server_batch_job_manager.py +0 -220
api/batch_processing/api_core/server_job_status_table.py +0 -152
api/batch_processing/api_core/server_orchestration.py +0 -360
api/batch_processing/api_core/server_utils.py +0 -92
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -152
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
api/batch_processing/data_preparation/manage_video_batch.py +0 -327
api/batch_processing/integration/digiKam/setup.py +0 -6
api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +0 -64
api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
api/batch_processing/postprocessing/compare_batch_results.py +0 -958
api/batch_processing/postprocessing/convert_output_format.py +0 -397
api/batch_processing/postprocessing/load_api_results.py +0 -195
api/batch_processing/postprocessing/md_to_coco.py +0 -310
api/batch_processing/postprocessing/md_to_labelme.py +0 -330
api/batch_processing/postprocessing/merge_detections.py +0 -401
api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
api/synchronous/api_core/animal_detection_api/config.py +0 -35
api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +0 -110
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +0 -108
classification/analyze_failed_images.py +0 -227
classification/cache_batchapi_outputs.py +0 -198
classification/create_classification_dataset.py +0 -627
classification/crop_detections.py +0 -516
classification/csv_to_json.py +0 -226
classification/detect_and_crop.py +0 -855
classification/efficientnet/__init__.py +0 -9
classification/efficientnet/model.py +0 -415
classification/efficientnet/utils.py +0 -610
classification/evaluate_model.py +0 -520
classification/identify_mislabeled_candidates.py +0 -152
classification/json_to_azcopy_list.py +0 -63
classification/json_validator.py +0 -695
classification/map_classification_categories.py +0 -276
classification/merge_classification_detection_output.py +0 -506
classification/prepare_classification_script.py +0 -194
classification/prepare_classification_script_mc.py +0 -228
classification/run_classifier.py +0 -286
classification/save_mislabeled.py +0 -110
classification/train_classifier.py +0 -825
classification/train_classifier_tf.py +0 -724
classification/train_utils.py +0 -322
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +0 -34
data_management/camtrap_dp_to_coco.py +0 -238
data_management/cct_json_utils.py +0 -395
data_management/cct_to_md.py +0 -176
data_management/cct_to_wi.py +0 -289
data_management/coco_to_labelme.py +0 -272
data_management/coco_to_yolo.py +0 -662
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +0 -33
data_management/databases/combine_coco_camera_traps_files.py +0 -206
data_management/databases/integrity_check_json_db.py +0 -477
data_management/databases/subset_json_db.py +0 -115
data_management/generate_crops_from_cct.py +0 -149
data_management/get_image_sizes.py +0 -188
data_management/importers/add_nacti_sizes.py +0 -52
data_management/importers/add_timestamps_to_icct.py +0 -79
data_management/importers/animl_results_to_md_results.py +0 -158
data_management/importers/auckland_doc_test_to_json.py +0 -372
data_management/importers/auckland_doc_to_json.py +0 -200
data_management/importers/awc_to_json.py +0 -189
data_management/importers/bellevue_to_json.py +0 -273
data_management/importers/cacophony-thermal-importer.py +0 -796
data_management/importers/carrizo_shrubfree_2018.py +0 -268
data_management/importers/carrizo_trail_cam_2017.py +0 -287
data_management/importers/cct_field_adjustments.py +0 -57
data_management/importers/channel_islands_to_cct.py +0 -913
data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
data_management/importers/eMammal/eMammal_helpers.py +0 -249
data_management/importers/eMammal/make_eMammal_json.py +0 -223
data_management/importers/ena24_to_json.py +0 -275
data_management/importers/filenames_to_json.py +0 -385
data_management/importers/helena_to_cct.py +0 -282
data_management/importers/idaho-camera-traps.py +0 -1407
data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
data_management/importers/jb_csv_to_json.py +0 -150
data_management/importers/mcgill_to_json.py +0 -250
data_management/importers/missouri_to_json.py +0 -489
data_management/importers/nacti_fieldname_adjustments.py +0 -79
data_management/importers/noaa_seals_2019.py +0 -181
data_management/importers/pc_to_json.py +0 -365
data_management/importers/plot_wni_giraffes.py +0 -123
data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
data_management/importers/prepare_zsl_imerit.py +0 -131
data_management/importers/rspb_to_json.py +0 -356
data_management/importers/save_the_elephants_survey_A.py +0 -320
data_management/importers/save_the_elephants_survey_B.py +0 -332
data_management/importers/snapshot_safari_importer.py +0 -758
data_management/importers/snapshot_safari_importer_reprise.py +0 -665
data_management/importers/snapshot_serengeti_lila.py +0 -1067
data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
data_management/importers/sulross_get_exif.py +0 -65
data_management/importers/timelapse_csv_set_to_json.py +0 -490
data_management/importers/ubc_to_json.py +0 -399
data_management/importers/umn_to_json.py +0 -507
data_management/importers/wellington_to_json.py +0 -263
data_management/importers/wi_to_json.py +0 -441
data_management/importers/zamba_results_to_md_results.py +0 -181
data_management/labelme_to_coco.py +0 -548
data_management/labelme_to_yolo.py +0 -272
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +0 -97
data_management/lila/add_locations_to_nacti.py +0 -147
data_management/lila/create_lila_blank_set.py +0 -557
data_management/lila/create_lila_test_set.py +0 -151
data_management/lila/create_links_to_md_results_files.py +0 -106
data_management/lila/download_lila_subset.py +0 -177
data_management/lila/generate_lila_per_image_labels.py +0 -515
data_management/lila/get_lila_annotation_counts.py +0 -170
data_management/lila/get_lila_image_counts.py +0 -111
data_management/lila/lila_common.py +0 -300
data_management/lila/test_lila_metadata_urls.py +0 -132
data_management/ocr_tools.py +0 -874
data_management/read_exif.py +0 -681
data_management/remap_coco_categories.py +0 -84
data_management/remove_exif.py +0 -66
data_management/resize_coco_dataset.py +0 -189
data_management/wi_download_csv_to_coco.py +0 -246
data_management/yolo_output_to_md_output.py +0 -441
data_management/yolo_to_coco.py +0 -676
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/detector_training/model_main_tf2.py +0 -114
detection/process_video.py +0 -703
detection/pytorch_detector.py +0 -337
detection/run_detector.py +0 -779
detection/run_detector_batch.py +0 -1219
detection/run_inference_with_yolov5_val.py +0 -917
detection/run_tiled_inference.py +0 -935
detection/tf_detector.py +0 -188
detection/video_utils.py +0 -606
docs/source/conf.py +0 -43
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +0 -174
md_utils/ct_utils.py +0 -612
md_utils/directory_listing.py +0 -246
md_utils/md_tests.py +0 -968
md_utils/path_utils.py +0 -1044
md_utils/process_utils.py +0 -157
md_utils/sas_blob_utils.py +0 -509
md_utils/split_locations_into_train_val.py +0 -228
md_utils/string_utils.py +0 -92
md_utils/url_utils.py +0 -323
md_utils/write_html_image_list.py +0 -225
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +0 -293
md_visualization/render_images_with_thumbnails.py +0 -275
md_visualization/visualization_utils.py +0 -1537
md_visualization/visualize_db.py +0 -551
md_visualization/visualize_detector_output.py +0 -406
megadetector-5.0.10.dist-info/RECORD +0 -224
megadetector-5.0.10.dist-info/top_level.txt +0 -8
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
taxonomy_mapping/map_new_lila_datasets.py +0 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
taxonomy_mapping/preview_lila_taxonomy.py +0 -591
taxonomy_mapping/retrieve_sample_image.py +0 -71
taxonomy_mapping/simple_image_download.py +0 -218
taxonomy_mapping/species_lookup.py +0 -834
taxonomy_mapping/taxonomy_csv_checker.py +0 -159
taxonomy_mapping/taxonomy_graph.py +0 -346
taxonomy_mapping/validate_lila_category_mappings.py +0 -83
{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0

md_utils/split_locations_into_train_val.py DELETED Viewed

@@ -1,228 +0,0 @@
-"""
-split_locations_into_train_val.py
-Splits a list of location IDs into training and validation, targeting a specific
-train/val split for each category, but allowing some categories to be tighter or looser
-than others.  Does nothing particularly clever, just randomly splits locations into
-train/val lots of times using the target val fraction, and picks the one that meets the
-specified constraints and minimizes weighted error, where "error" is defined as the
-sum of each class's absolute divergence from the target val fraction.
-"""
-#%% Imports/constants
-import random
-import numpy as np
-from collections import defaultdict
-from md_utils.ct_utils import sort_dictionary_by_value
-from tqdm import tqdm
-#%% Main function
-def split_locations_into_train_val(location_to_category_counts,
-                                   n_random_seeds=10000,
-                                   target_val_fraction=0.15,
-                                   category_to_max_allowable_error=None,
-                                   category_to_error_weight=None,
-                                   default_max_allowable_error=0.1):
-    """
-    Splits a list of location IDs into training and validation, targeting a specific
-    train/val split for each category, but allowing some categories to be tighter or looser
-    than others.  Does nothing particularly clever, just randomly splits locations into
-    train/val lots of times using the target val fraction, and picks the one that meets the
-    specified constraints and minimizes weighted error, where "error" is defined as the
-    sum of each class's absolute divergence from the target val fraction.
-    Args:
-        location_to_category_counts (dict): a dict mapping location IDs to dicts,
-            with each dict mapping a category name to a count.  Any categories not present
-            in a particular dict are assumed to have a count of zero for that location.
-            For example:
-            .. code-block:: none
-                {'location-000': {'bear':4,'wolf':10},
-                 'location-001': {'bear':12,'elk':20}}
-        n_random_seeds (int, optional): number of random seeds to try, always starting from zero
-        target_val_fraction (float, optional): fraction of images containing each species we'd
-            like to put in the val split
-        category_to_max_allowable_error (dict, optional): a dict mapping category names
-            to maximum allowable errors.  These are hard constraints (i.e., we will error
-            if we can't meet them).  Does not need to include all categories; categories not
-            included will be assigned a maximum error according to [default_max_allowable_error].
-            If this is None, no hard constraints are applied.
-        category_to_error_weight (dict, optional): a dict mapping category names to
-            error weights.  You can specify a subset of categories; categories not included here
-            have a weight of 1.0.  If None, all categories have the same weight.
-        default_max_allowable_error (float, optional): the maximum allowable error for categories not
-            present in [category_to_max_allowable_error].  Set to None (or >= 1.0) to disable hard
-            constraints for categories not present in [category_to_max_allowable_error]
-    Returns:
-        tuple: A two-element tuple:
-            - list of location IDs in the val split
-            - a dict mapping category names to the fraction of images in the val split
-    """
-    location_ids = list(location_to_category_counts.keys())
-    n_val_locations = int(target_val_fraction*len(location_ids))
-    if category_to_max_allowable_error is None:
-        category_to_max_allowable_error = {}
-    if category_to_error_weight is None:
-        category_to_error_weight = {}
-    # category ID to total count; the total count is used only for printouts
-    category_id_to_count = {}
-    for location_id in location_to_category_counts:
-        for category_id in location_to_category_counts[location_id].keys():
-            if category_id not in category_id_to_count:
-                category_id_to_count[category_id] = 0
-            category_id_to_count[category_id] += \
-                location_to_category_counts[location_id][category_id]
-    category_ids = set(category_id_to_count.keys())
-    print('Splitting {} categories over {} locations'.format(
-        len(category_ids),len(location_ids)))
-    # random_seed = 0
-    def compute_seed_errors(random_seed):
-        """
-        Computes the per-category error for a specific random seed.
-        returns weighted_average_error,category_to_val_fraction
-        """
-        # Randomly split into train/val
-        random.seed(random_seed)
-        val_locations = random.sample(location_ids,k=n_val_locations)
-        val_locations_set = set(val_locations)
-        # For each category, measure the % of images that went into the val set
-        category_to_val_fraction = defaultdict(float)
-        for category_id in category_ids:
-            category_val_count = 0
-            category_train_count = 0
-            for location_id in location_to_category_counts:
-                if category_id not in location_to_category_counts[location_id]:
-                    location_category_count = 0
-                else:
-                    location_category_count = location_to_category_counts[location_id][category_id]
-                if location_id in val_locations_set:
-                    category_val_count += location_category_count
-                else:
-                    category_train_count += location_category_count
-            category_val_fraction = category_val_count / (category_val_count + category_train_count)
-            category_to_val_fraction[category_id] = category_val_fraction
-        # Absolute deviation from the target val fraction for each categorys
-        category_errors = {}
-        weighted_category_errors = {}
-        # category = next(iter(category_to_val_fraction))
-        for category in category_to_val_fraction:
-            category_val_fraction = category_to_val_fraction[category]
-            category_error = abs(category_val_fraction-target_val_fraction)
-            category_errors[category] = category_error
-            category_weight = 1.0
-            if category in category_to_error_weight:
-                category_weight = category_to_error_weight[category]
-            weighted_category_error = category_error * category_weight
-            weighted_category_errors[category] = weighted_category_error
-        weighted_average_error = np.mean(list(weighted_category_errors.values()))
-        return weighted_average_error,weighted_category_errors,category_to_val_fraction
-    # ... def compute_seed_errors(...)
-    # This will only include random seeds that satisfy the hard constraints
-    random_seed_to_weighted_average_error = {}
-    # random_seed = 0
-    for random_seed in tqdm(range(0,n_random_seeds)):
-        weighted_average_error,weighted_category_errors,category_to_val_fraction = \
-            compute_seed_errors(random_seed)
-        seed_satisfies_hard_constraints = True
-        for category in category_to_val_fraction:
-            if category in category_to_max_allowable_error:
-                max_allowable_error = category_to_max_allowable_error[category]
-            else:
-                if default_max_allowable_error is None:
-                    continue
-                max_allowable_error = default_max_allowable_error
-            val_fraction = category_to_val_fraction[category]
-            category_error = abs(val_fraction - target_val_fraction)
-            if category_error > max_allowable_error:
-                seed_satisfies_hard_constraints = False
-                break
-        if seed_satisfies_hard_constraints:
-            random_seed_to_weighted_average_error[random_seed] = weighted_average_error
-    # ...for each random seed
-    assert len(random_seed_to_weighted_average_error) > 0, \
-        'No random seed met all the hard constraints'
-    print('\n{} of {} random seeds satisfied hard constraints'.format(
-        len(random_seed_to_weighted_average_error),n_random_seeds))
-    min_error = None
-    min_error_seed = None
-    for random_seed in random_seed_to_weighted_average_error.keys():
-        error_metric = random_seed_to_weighted_average_error[random_seed]
-        if min_error is None or error_metric < min_error:
-            min_error = error_metric
-            min_error_seed = random_seed
-    random.seed(min_error_seed)
-    val_locations = random.sample(location_ids,k=n_val_locations)
-    train_locations = []
-    for location_id in location_ids:
-        if location_id not in val_locations:
-            train_locations.append(location_id)
-    print('\nVal locations:\n')
-    for loc in val_locations:
-        print('{}'.format(loc))
-    print('')
-    weighted_average_error,weighted_category_errors,category_to_val_fraction = \
-        compute_seed_errors(min_error_seed)
-    random_seed = min_error_seed
-    category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,reverse=True)
-    category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,
-                                                        sort_values=category_id_to_count,
-                                                        reverse=True)
-    print('Val fractions by category:\n')
-    for category in category_to_val_fraction:
-        print('{} ({}) {:.2f}'.format(
-            category,category_id_to_count[category],
-            category_to_val_fraction[category]))
-    return val_locations,category_to_val_fraction
-# ...def split_locations_into_train_val(...)

md_utils/string_utils.py DELETED Viewed

@@ -1,92 +0,0 @@
-"""
-string_utils.py
-Miscellaneous string utilities.
-"""
-#%% Imports
-import re
-#%% Functions
-def is_float(s):
-    """
-    Checks whether [s] is an object (typically a string) that can be cast to a float
-    Args:
-        s (object): object to evaluate
-    Returns:
-        bool: True if s successfully casts to a float, otherwise False
-    """
-    try:
-        _ = float(s)
-    except ValueError:
-        return False
-    return True
-def human_readable_to_bytes(size):
-    """
-    Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),
-    returns the number of bytes.  Will return 0 if the argument has
-    unexpected form.
-    https://gist.github.com/beugley/ccd69945346759eb6142272a6d69b4e0
-    Args:
-        size (str): string representing a size
-    Returns:
-        int: the corresponding size in bytes
-    """
-    size = re.sub(r'\s+', '', size)
-    if (size[-1] == 'B'):
-        size = size[:-1]
-    if (size.isdigit()):
-        bytes = int(size)
-    elif (is_float(size)):
-        bytes = float(size)
-    else:
-        bytes = size[:-1]
-        unit = size[-1]
-        try:
-            bytes = float(bytes)
-            if (unit == 'T'):
-                bytes *= 1024*1024*1024*1024
-            elif (unit == 'G'):
-                bytes *= 1024*1024*1024
-            elif (unit == 'M'):
-                bytes *= 1024*1024
-            elif (unit == 'K'):
-                bytes *= 1024
-            else:
-                bytes = 0
-        except ValueError:
-            bytes = 0
-    return bytes
-def remove_ansi_codes(s):
-    """
-    Removes ANSI escape codes from a string.
-    https://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python#14693789
-    Args:
-        s (str): the string to de-ANSI-i-fy
-    Returns:
-        str: A copy of [s] without ANSI codes
-    """
-    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
-    return ansi_escape.sub('', s)

md_utils/url_utils.py DELETED Viewed

@@ -1,323 +0,0 @@
-"""
-url_utils.py
-Frequently-used functions for downloading or manipulating URLs
-"""
-#%% Imports and constants
-import os
-import re
-import urllib
-import tempfile
-import requests
-from functools import partial
-from tqdm import tqdm
-from urllib.parse import urlparse
-from multiprocessing.pool import ThreadPool
-from multiprocessing.pool import Pool
-url_utils_temp_dir = None
-max_path_len = 255
-#%% Download functions
-class DownloadProgressBar():
-    """
-    Progress updater based on the progressbar2 package.
-    https://stackoverflow.com/questions/37748105/how-to-use-progressbar-module-with-urlretrieve
-    """
-    def __init__(self):
-        self.pbar = None
-    def __call__(self, block_num, block_size, total_size):
-        if not self.pbar:
-            # This is a pretty random import I'd rather not depend on outside of the
-            # rare case where it's used, so importing locally
-            # pip install progressbar2
-            import progressbar
-            self.pbar = progressbar.ProgressBar(max_value=total_size)
-            self.pbar.start()
-        downloaded = block_num * block_size
-        if downloaded < total_size:
-            self.pbar.update(downloaded)
-        else:
-            self.pbar.finish()
-def get_temp_folder(preferred_name='url_utils'):
-    """
-    Gets a temporary folder for use within this module.
-    Args:
-        preferred_name (str, optional): subfolder to use within the system temp folder
-    Returns:
-        str: the full path to the temporary subfolder
-    """
-    global url_utils_temp_dir
-    if url_utils_temp_dir is None:
-        url_utils_temp_dir = os.path.join(tempfile.gettempdir(),preferred_name)
-        os.makedirs(url_utils_temp_dir,exist_ok=True)
-    return url_utils_temp_dir
-def download_url(url,
-                 destination_filename=None,
-                 progress_updater=None,
-                 force_download=False,
-                 verbose=True):
-    """
-    Downloads a URL to a file.  If no file is specified, creates a temporary file,
-    making a best effort to avoid filename collisions.
-    Prints some diagnostic information and makes sure to omit SAS tokens from printouts.
-    Args:
-        url (str): the URL to download
-        destination_filename (str, optional): the target filename; if None, will create
-            a file in system temp space
-        progress_updater (object or bool, optional): can be "None", "False", "True", or a
-            specific callable object.  If None or False, no progress updated will be
-            displayed.  If True, a default progress bar will be created.
-        force_download (bool, optional): download this file even if [destination_filename]
-            exists.
-        verbose (bool, optional): enable additional debug console output
-    Returns:
-        str: the filename to which [url] was downloaded, the same as [destination_filename]
-        if [destination_filename] was not None
-    """
-    if progress_updater is not None and isinstance(progress_updater,bool):
-        if not progress_updater:
-            progress_updater = None
-        else:
-            progress_updater = DownloadProgressBar()
-    url_no_sas = url.split('?')[0]
-    if destination_filename is None:
-        target_folder = get_temp_folder()
-        url_without_sas = url.split('?', 1)[0]
-        # This does not guarantee uniqueness, hence "semi-best-effort"
-        url_as_filename = re.sub(r'\W+', '', url_without_sas)
-        n_folder_chars = len(url_utils_temp_dir)
-        if len(url_as_filename) + n_folder_chars > max_path_len:
-            print('Warning: truncating filename target to {} characters'.format(max_path_len))
-            url_as_filename = url_as_filename[-1*(max_path_len-n_folder_chars):]
-        destination_filename = \
-            os.path.join(target_folder,url_as_filename)
-    if (not force_download) and (os.path.isfile(destination_filename)):
-        if verbose:
-            print('Bypassing download of already-downloaded file {}'.format(os.path.basename(url_no_sas)))
-    else:
-        if verbose:
-            print('Downloading file {} to {}'.format(os.path.basename(url_no_sas),destination_filename),end='')
-        target_dir = os.path.dirname(destination_filename)
-        os.makedirs(target_dir,exist_ok=True)
-        urllib.request.urlretrieve(url, destination_filename, progress_updater)
-        assert(os.path.isfile(destination_filename))
-        nBytes = os.path.getsize(destination_filename)
-        if verbose:
-            print('...done, {} bytes.'.format(nBytes))
-    return destination_filename
-def download_relative_filename(url, output_base, verbose=False):
-    """
-    Download a URL to output_base, preserving relative path.  Path is relative to
-    the site, so:
-        https://abc.com/xyz/123.txt
-    ...will get downloaded to:
-        output_base/xyz/123.txt
-    Args:
-        url (str): the URL to download
-        output_base (str): the base folder to which we should download this file
-        verbose (bool, optional): enable additional debug console output
-    Returns:
-        str: the local destination filename
-    """
-    p = urlparse(url)
-    # remove the leading '/'
-    assert p.path.startswith('/'); relative_filename = p.path[1:]
-    destination_filename = os.path.join(output_base,relative_filename)
-    return download_url(url, destination_filename, verbose=verbose)
-def _do_parallelized_download(download_info,overwrite=False,verbose=False):
-    """
-    Internal function for download parallelization.
-    """
-    url = download_info['url']
-    target_file = download_info['target_file']
-    result = {'status':'unknown','url':url,'target_file':target_file}
-    if ((os.path.isfile(target_file)) and (not overwrite)):
-        if verbose:
-            print('Skipping existing file {}'.format(target_file))
-        result['status'] = 'skipped'
-        return result
-    try:
-        download_url(url=url,
-                     destination_filename=target_file,
-                     verbose=verbose,
-                     force_download=overwrite)
-    except Exception as e:
-        print('Warning: error downloading URL {}: {}'.format(
-            url,str(e)))
-        result['status'] = 'error: {}'.format(str(e))
-        return result
-    result['status'] = 'success'
-    return result
-def parallel_download_urls(url_to_target_file,verbose=False,overwrite=False,
-                           n_workers=20,pool_type='thread'):
-    """
-    Downloads a list of URLs to local files.
-    Catches exceptions and reports them in the returned "results" array.
-    Args:
-        url_to_target_file: a dict mapping URLs to local filenames.
-        verbose (bool, optional): enable additional debug console output
-        overwrite (bool, optional): whether to overwrite existing local files
-        n_workers (int, optional): number of concurrent workers, set to <=1 to disable
-            parallelization
-        pool_type (str, optional): worker type to use; should be 'thread' or 'process'
-    Returns:
-        list: list of dicts with keys:
-            - 'url': the url this item refers to
-            - 'status': 'skipped', 'success', or a string starting with 'error'
-            - 'target_file': the local filename to which we downloaded (or tried to
-              download) this URL
-    """
-    all_download_info = []
-    print('Preparing download list')
-    for url in tqdm(url_to_target_file):
-        download_info = {}
-        download_info['url'] = url
-        download_info['target_file'] = url_to_target_file[url]
-        all_download_info.append(download_info)
-    print('Downloading {} images on {} workers'.format(
-        len(all_download_info),n_workers))
-    if n_workers <= 1:
-        results = []
-        for download_info in tqdm(all_download_info):
-            result = _do_parallelized_download(download_info,overwrite=overwrite,verbose=verbose)
-            results.append(result)
-    else:
-        if pool_type == 'thread':
-            pool = ThreadPool(n_workers)
-        else:
-            assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
-            pool = Pool(n_workers)
-        print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
-        results = list(tqdm(pool.imap(
-            partial(_do_parallelized_download,overwrite=overwrite,verbose=verbose),
-            all_download_info), total=len(all_download_info)))
-    return results
-def test_url(url, error_on_failure=True, timeout=None):
-    """
-    Tests the availability of [url], returning an http status code.
-    Args:
-        url (str): URL to test
-        error_on_failure (bool, optional): whether to error (vs. just returning an
-            error code) if accessing this URL fails
-        timeout (int, optional): timeout in seconds to wait before considering this
-            access attempt to be a failure; see requests.head() for precise documentation
-    Returns:
-        int: http status code (200 for success)
-    """
-    # r = requests.get(url, stream=True, verify=True, timeout=timeout)
-    r = requests.head(url, stream=True, verify=True, timeout=timeout)
-    if error_on_failure and r.status_code != 200:
-        raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
-    return r.status_code
-def test_urls(urls, error_on_failure=True, n_workers=1, pool_type='thread', timeout=None):
-    """
-    Verify that URLs are available (i.e., returns status 200).  By default,
-    errors if any URL is unavailable.
-    Args:
-        urls (list): list of URLs to test
-        error_on_failure (bool, optional): whether to error (vs. just returning an
-            error code) if accessing this URL fails
-        n_workers (int, optional): number of concurrent workers, set to <=1 to disable
-            parallelization
-        pool_type (str, optional): worker type to use; should be 'thread' or 'process'
-        timeout (int, optional): timeout in seconds to wait before considering this
-            access attempt to be a failure; see requests.head() for precise documentation
-    Returns:
-        list: a list of http status codes, the same length and order as [urls]
-    """
-    if n_workers <= 1:
-        status_codes = []
-        for url in tqdm(urls):
-            r = requests.get(url, timeout=timeout)
-            if error_on_failure and r.status_code != 200:
-                raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
-            status_codes.append(r.status_code)
-    else:
-        if pool_type == 'thread':
-            pool = ThreadPool(n_workers)
-        else:
-            assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
-            pool = Pool(n_workers)
-        print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
-        status_codes = list(tqdm(pool.imap(
-            partial(test_url,error_on_failure=error_on_failure,timeout=timeout),
-            urls), total=len(urls)))
-    return status_codes

megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.10py3-none-any.whl → 5.0.11py3-none-any.whl