PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +93 -79
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
api/batch_processing/postprocessing/compare_batch_results.py +114 -44
api/batch_processing/postprocessing/convert_output_format.py +62 -19
api/batch_processing/postprocessing/load_api_results.py +17 -20
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +165 -68
api/batch_processing/postprocessing/merge_detections.py +40 -15
api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +107 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -0
data_management/coco_to_yolo.py +86 -62
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +130 -83
data_management/databases/subset_json_db.py +25 -16
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -144
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -160
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +8 -8
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +309 -159
data_management/labelme_to_yolo.py +103 -60
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +114 -31
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +92 -90
data_management/lila/generate_lila_per_image_labels.py +56 -43
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +103 -70
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +161 -99
data_management/remap_coco_categories.py +84 -0
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +32 -44
data_management/wi_download_csv_to_coco.py +246 -0
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +535 -95
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +189 -114
detection/run_inference_with_yolov5_val.py +118 -51
detection/run_tiled_inference.py +113 -42
detection/tf_detector.py +51 -28
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +249 -70
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -862
md_utils/path_utils.py +655 -155
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +208 -27
md_utils/write_html_image_list.py +51 -35
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +908 -311
md_visualization/visualize_db.py +109 -58
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
md_visualization/visualize_megadb.py +0 -183
megadetector-5.0.7.dist-info/RECORD +0 -202
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0

md_utils/process_utils.py CHANGED Viewed

@@ -1,133 +1,157 @@
-########
-#
-# process_utils.py
-#
-# Run something at the command line and capture the output, based on:
-#
-# https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
-#
-# Includes handy example code for doing this on multiple processes/threads.
-#
-########
-#%% Constants, imports, and environment
-import os
-import subprocess
-os.environ["PYTHONUNBUFFERED"] = "1"
-def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
-    """
-    Run [cmd] (a single string) in a shell, yielding each line of output to the caller.
-    The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
-    "verbose" only impacts output about process management, it is not related to printing
-    output from the child process.
-    """
-    if verbose:
-        if encoding is not None:
-            print('Launching child process with non-default encoding {}'.format(encoding))
-        if errors is not None:
-            print('Launching child process with non-default text error handling {}'.format(errors))
-        if env is not None:
-            print('Launching child process with non-default environment {}'.format(str(env)))
-    # https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
-    popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                             shell=True, universal_newlines=True, encoding=encoding,
-                             errors=errors, env=env)
-    for stdout_line in iter(popen.stdout.readline, ""):
-        yield stdout_line
-    popen.stdout.close()
-    return_code = popen.wait()
-    if return_code:
-        raise subprocess.CalledProcessError(return_code, cmd)
-def execute_and_print(cmd,print_output=True,encoding=None,errors=None,env=None,verbose=False):
-    """
-    Run [cmd] (a single string) in a shell, capturing and printing output.  Returns
-    a dictionary with fields "status" and "output".
-    The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
-    "verbose" only impacts output about process management, it is not related to printing
-    output from the child process.
-    """
-    to_return = {'status':'unknown','output':''}
-    output = []
-    try:
-        for s in execute(cmd,encoding=encoding,errors=errors,env=env,verbose=verbose):
-            output.append(s)
-            if print_output:
-                print(s,end='',flush=True)
-        to_return['status'] = 0
-    except subprocess.CalledProcessError as cpe:
-        print('execute_and_print caught error: {} ({})'.format(cpe.output,str(cpe)))
-        to_return['status'] = cpe.returncode
-    to_return['output'] = output
-    return to_return
-#%% Single-threaded test driver for execute_and_print
-if False:
-    pass
-    #%%
-    if os.name == 'nt':
-        execute_and_print('echo hello && ping -n 5 127.0.0.1 && echo goodbye')
-    else:
-        execute_and_print('echo hello && sleep 1 && echo goodbye')
-#%% Parallel test driver for execute_and_print
-if False:
-    pass
-    #%%
-    from functools import partial
-    from multiprocessing.pool import ThreadPool as ThreadPool
-    from multiprocessing.pool import Pool as Pool
-    n_workers = 10
-    # Should we use threads (vs. processes) for parallelization?
-    use_threads = True
-    test_data = ['a','b','c','d']
-    def process_sample(s):
-        return execute_and_print('echo ' + s,True)
-    if n_workers == 1:
-        results = []
-        for i_sample,sample in enumerate(test_data):
-            results.append(process_sample(sample))
-    else:
-        n_threads = min(n_workers,len(test_data))
-        if use_threads:
-            print('Starting parallel thread pool with {} workers'.format(n_threads))
-            pool = ThreadPool(n_threads)
-        else:
-            print('Starting parallel process pool with {} workers'.format(n_threads))
-            pool = Pool(n_threads)
-        results = list(pool.map(partial(process_sample),test_data))
-        for r in results:
-            print(r)
+"""
+process_utils.py
+Run something at the command line and capture the output, based on:
+https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
+Includes handy example code for doing this on multiple processes/threads.
+"""
+#%% Constants, imports, and environment
+import os
+import subprocess
+os.environ["PYTHONUNBUFFERED"] = "1"
+def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
+    """
+    Run [cmd] (a single string) in a shell, yielding each line of output to the caller.
+    The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
+    "verbose" only impacts output about process management, it is not related to printing
+    output from the child process.
+    Args:
+        cmd (str): command to run
+        encoding (str, optional): stdout encoding, see Popen() documentation
+        errors (str, optional): error handling, see Popen() documentation
+        env (dict, optional): environment variables, see Popen() documentation
+        verbose (bool, optional): enable additional debug console output
+    Returns:
+        int: the command's return code, always zero, otherwise a CalledProcessError is raised
+    """
+    if verbose:
+        if encoding is not None:
+            print('Launching child process with non-default encoding {}'.format(encoding))
+        if errors is not None:
+            print('Launching child process with non-default text error handling {}'.format(errors))
+        if env is not None:
+            print('Launching child process with non-default environment {}'.format(str(env)))
+    # https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
+    popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                             shell=True, universal_newlines=True, encoding=encoding,
+                             errors=errors, env=env)
+    for stdout_line in iter(popen.stdout.readline, ""):
+        yield stdout_line
+    popen.stdout.close()
+    return_code = popen.wait()
+    if return_code:
+        raise subprocess.CalledProcessError(return_code, cmd)
+    return return_code
+def execute_and_print(cmd,print_output=True,encoding=None,errors=None,env=None,verbose=False):
+    """
+    Run [cmd] (a single string) in a shell, capturing and printing output.  Returns
+    a dictionary with fields "status" and "output".
+    The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
+    "verbose" only impacts output about process management, it is not related to printing
+    output from the child process.
+    Args:
+        cmd (str): command to run
+        print_output (bool, optional): whether to print output from [cmd]
+        encoding (str, optional): stdout encoding, see Popen() documentation
+        errors (str, optional): error handling, see Popen() documentation
+        env (dict, optional): environment variables, see Popen() documentation
+        verbose (bool, optional): enable additional debug console output
+    Returns:
+        dict: a dictionary with fields "status" (the process return code) and "output"
+        (the content of stdout)
+    """
+    to_return = {'status':'unknown','output':''}
+    output = []
+    try:
+        for s in execute(cmd,encoding=encoding,errors=errors,env=env,verbose=verbose):
+            output.append(s)
+            if print_output:
+                print(s,end='',flush=True)
+        to_return['status'] = 0
+    except subprocess.CalledProcessError as cpe:
+        print('execute_and_print caught error: {} ({})'.format(cpe.output,str(cpe)))
+        to_return['status'] = cpe.returncode
+    to_return['output'] = output
+    return to_return
+#%% Single-threaded test driver for execute_and_print
+if False:
+    pass
+    #%%
+    if os.name == 'nt':
+        execute_and_print('echo hello && ping -n 5 127.0.0.1 && echo goodbye')
+    else:
+        execute_and_print('echo hello && sleep 1 && echo goodbye')
+#%% Parallel test driver for execute_and_print
+if False:
+    pass
+    #%%
+    from functools import partial
+    from multiprocessing.pool import ThreadPool as ThreadPool
+    from multiprocessing.pool import Pool as Pool
+    n_workers = 10
+    # Should we use threads (vs. processes) for parallelization?
+    use_threads = True
+    test_data = ['a','b','c','d']
+    def process_sample(s):
+        return execute_and_print('echo ' + s,True)
+    if n_workers == 1:
+        results = []
+        for i_sample,sample in enumerate(test_data):
+            results.append(process_sample(sample))
+    else:
+        n_threads = min(n_workers,len(test_data))
+        if use_threads:
+            print('Starting parallel thread pool with {} workers'.format(n_threads))
+            pool = ThreadPool(n_threads)
+        else:
+            print('Starting parallel process pool with {} workers'.format(n_threads))
+            pool = Pool(n_threads)
+        results = list(pool.map(partial(process_sample),test_data))
+        for r in results:
+            print(r)

md_utils/sas_blob_utils.py CHANGED Viewed

@@ -1,23 +1,23 @@
-########
-#
-# sas_blob_utils.py
-#
-# This module contains helper functions for dealing with Shared Access Signatures
-# (SAS) tokens for Azure Blob Storage.
-#
-# The default Azure Storage SAS URI format is:
-#
-# https://<account>.blob.core.windows.net/<container>/<blob>?<sas_token>
-#
-# This module assumes azure-storage-blob version 12.5.
-#
-# Documentation for Azure Blob Storage:
-# docs.microsoft.com/en-us/azure/developer/python/sdk/storage/storage-blob-readme
-#
-# Documentation for SAS:
-# docs.microsoft.com/en-us/azure/storage/common/storage-sas-overview
-#
-########
+"""
+sas_blob_utils.py
+This module contains helper functions for dealing with Shared Access Signatures
+(SAS) tokens for Azure Blob Storage.
+The default Azure Storage SAS URI format is:
+https://<account>.blob.core.windows.net/<container>/<blob>?<sas_token>
+This module assumes azure-storage-blob version 12.5.
+Documentation for Azure Blob Storage:
+docs.microsoft.com/en-us/azure/developer/python/sdk/storage/storage-blob-readme
+Documentation for SAS:
+docs.microsoft.com/en-us/azure/storage/common/storage-sas-overview
+"""
 #%% Imports

md_utils/split_locations_into_train_val.py CHANGED Viewed

@@ -1,15 +1,15 @@
-########
-#
-# split_locations_into_train_val.py
-#
-# Split a list of location IDs into training and validation, targeting a specific
-# train/val split for each category, but allowing some categories to be tighter or looser
-# than others.  Does nothing particularly clever, just randomly splits locations into
-# train/val lots of times using the target val fraction, and picks the one that meets the
-# specified constraints and minimizes weighted error, where "error" is defined as the
-# sum of each class's absolute divergence from the target val fraction.
-#
-########
+"""
+split_locations_into_train_val.py
+Splits a list of location IDs into training and validation, targeting a specific
+train/val split for each category, but allowing some categories to be tighter or looser
+than others.  Does nothing particularly clever, just randomly splits locations into
+train/val lots of times using the target val fraction, and picks the one that meets the
+specified constraints and minimizes weighted error, where "error" is defined as the
+sum of each class's absolute divergence from the target val fraction.
+"""
 #%% Imports/constants
@@ -30,31 +30,44 @@ def split_locations_into_train_val(location_to_category_counts,
                                    category_to_error_weight=None,
                                    default_max_allowable_error=0.1):
     """
-    Split a list of location IDs into training and validation, targeting a specific
+    Splits a list of location IDs into training and validation, targeting a specific
     train/val split for each category, but allowing some categories to be tighter or looser
     than others.  Does nothing particularly clever, just randomly splits locations into
     train/val lots of times using the target val fraction, and picks the one that meets the
     specified constraints and minimizes weighted error, where "error" is defined as the
     sum of each class's absolute divergence from the target val fraction.
-    location_to_category_counts should be a dict mapping location IDs to dicts,
-    with each dict mapping a category name to a count.  Any categories not present in a
-    particular dict are assumed to have a count of zero for that location.
-    If not None, category_to_max_allowable_error should be a dict mapping category names
-    to maximum allowable errors.  These are hard constraints, but you can specify a subset
-    of categories.  Categories not included here have a maximum error of Inf.
-    If not None, category_to_error_weight should be a dict mapping category names to
-    error weights.  You can specify a subset of categories.  Categories not included here
-    have a weight of 1.0.
-    default_max_allowable_error is the maximum allowable error for categories not present in
-    category_to_max_allowable_error.  Set to None (or >= 1.0) to disable hard constraints for
-    categories not present in category_to_max_allowable_error
-    returns val_locations,category_to_val_fraction
+    Args:
+        location_to_category_counts (dict): a dict mapping location IDs to dicts,
+            with each dict mapping a category name to a count.  Any categories not present
+            in a particular dict are assumed to have a count of zero for that location.
+            For example:
+            .. code-block:: none
+                {'location-000': {'bear':4,'wolf':10},
+                 'location-001': {'bear':12,'elk':20}}
+        n_random_seeds (int, optional): number of random seeds to try, always starting from zero
+        target_val_fraction (float, optional): fraction of images containing each species we'd
+            like to put in the val split
+        category_to_max_allowable_error (dict, optional): a dict mapping category names
+            to maximum allowable errors.  These are hard constraints (i.e., we will error
+            if we can't meet them).  Does not need to include all categories; categories not
+            included will be assigned a maximum error according to [default_max_allowable_error].
+            If this is None, no hard constraints are applied.
+        category_to_error_weight (dict, optional): a dict mapping category names to
+            error weights.  You can specify a subset of categories; categories not included here
+            have a weight of 1.0.  If None, all categories have the same weight.
+        default_max_allowable_error (float, optional): the maximum allowable error for categories not
+            present in [category_to_max_allowable_error].  Set to None (or >= 1.0) to disable hard
+            constraints for categories not present in [category_to_max_allowable_error]
+    Returns:
+        tuple: A two-element tuple:
+            - list of location IDs in the val split
+            - a dict mapping category names to the fraction of images in the val split
     """
     location_ids = list(location_to_category_counts.keys())
@@ -84,7 +97,7 @@ def split_locations_into_train_val(location_to_category_counts,
     # random_seed = 0
     def compute_seed_errors(random_seed):
         """
-        Compute the per-category error for a specific random seed.
+        Computes the per-category error for a specific random seed.
         returns weighted_average_error,category_to_val_fraction
         """

md_utils/string_utils.py CHANGED Viewed

@@ -1,16 +1,27 @@
-########
-#
-# string_utils.py
-#
-# Miscellaneous string utilities
-#
-########
+"""
+string_utils.py
+Miscellaneous string utilities.
+"""
+#%% Imports
 import re
+#%% Functions
 def is_float(s):
     """
-    Checks whether a string represents a valid float
+    Checks whether [s] is an object (typically a string) that can be cast to a float
+    Args:
+        s (object): object to evaluate
+    Returns:
+        bool: True if s successfully casts to a float, otherwise False
     """
     try:
@@ -23,10 +34,16 @@ def is_float(s):
 def human_readable_to_bytes(size):
     """
     Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),
-    return the number of bytes.  Will return 0 if the argument has
+    returns the number of bytes.  Will return 0 if the argument has
     unexpected form.
     https://gist.github.com/beugley/ccd69945346759eb6142272a6d69b4e0
+    Args:
+        size (str): string representing a size
+    Returns:
+        int: the corresponding size in bytes
     """
     size = re.sub(r'\s+', '', size)
@@ -61,9 +78,15 @@ def human_readable_to_bytes(size):
 def remove_ansi_codes(s):
     """
-    Remove ANSI escape codes from a string.
+    Removes ANSI escape codes from a string.
     https://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python#14693789
+    Args:
+        s (str): the string to de-ANSI-i-fy
+    Returns:
+        str: A copy of [s] without ANSI codes
     """
     ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
     return ansi_escape.sub('', s)

megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl