PyPI - megadetector - Versions diffs - 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.8py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +65 -65
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
api/batch_processing/postprocessing/compare_batch_results.py +113 -43
api/batch_processing/postprocessing/convert_output_format.py +41 -16
api/batch_processing/postprocessing/load_api_results.py +16 -17
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +52 -22
api/batch_processing/postprocessing/merge_detections.py +14 -14
api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +102 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -263
data_management/coco_to_yolo.py +79 -58
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +62 -24
data_management/databases/subset_json_db.py +24 -15
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -162
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -158
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +7 -7
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +65 -24
data_management/labelme_to_yolo.py +8 -8
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +13 -13
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +44 -110
data_management/lila/generate_lila_per_image_labels.py +55 -42
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +96 -33
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +110 -97
data_management/remap_coco_categories.py +83 -83
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +30 -23
data_management/wi_download_csv_to_coco.py +246 -239
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +300 -60
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +179 -113
detection/run_inference_with_yolov5_val.py +108 -48
detection/run_tiled_inference.py +111 -40
detection/tf_detector.py +51 -29
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +228 -68
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -871
md_utils/path_utils.py +460 -134
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +176 -60
md_utils/write_html_image_list.py +40 -33
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +597 -291
md_visualization/visualize_db.py +76 -48
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
megadetector-5.0.8.dist-info/RECORD +0 -205
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0

md_utils/url_utils.py CHANGED Viewed

@@ -1,10 +1,10 @@
-########
-#
-# url_utils.py
-#
-# Frequently-used functions for downloading or manipulating URLs
-#
-########
+"""
+url_utils.py
+Frequently-used functions for downloading or manipulating URLs
+"""
 #%% Imports and constants
@@ -14,9 +14,11 @@ import urllib
 import tempfile
 import requests
+from functools import partial
 from tqdm import tqdm
 from urllib.parse import urlparse
 from multiprocessing.pool import ThreadPool
+from multiprocessing.pool import Pool
 url_utils_temp_dir = None
 max_path_len = 255
@@ -26,6 +28,8 @@ max_path_len = 255
 class DownloadProgressBar():
     """
+    Progress updater based on the progressbar2 package.
     https://stackoverflow.com/questions/37748105/how-to-use-progressbar-module-with-urlretrieve
     """
@@ -49,7 +53,15 @@ class DownloadProgressBar():
 def get_temp_folder(preferred_name='url_utils'):
+    """
+    Gets a temporary folder for use within this module.
+    Args:
+        preferred_name (str, optional): subfolder to use within the system temp folder
+    Returns:
+        str: the full path to the temporary subfolder
+    """
     global url_utils_temp_dir
     if url_utils_temp_dir is None:
@@ -59,15 +71,31 @@ def get_temp_folder(preferred_name='url_utils'):
     return url_utils_temp_dir
-def download_url(url, destination_filename=None, progress_updater=None,
-                 force_download=False, verbose=True):
+def download_url(url,
+                 destination_filename=None,
+                 progress_updater=None,
+                 force_download=False,
+                 verbose=True):
     """
-    Download a URL to a file.  If no file is specified, creates a temporary file,
-    with a semi-best-effort to avoid filename collisions.
+    Downloads a URL to a file.  If no file is specified, creates a temporary file,
+    making a best effort to avoid filename collisions.
     Prints some diagnostic information and makes sure to omit SAS tokens from printouts.
-    progress_updater can be "None", "True", or a specific callback.
+    Args:
+        url (str): the URL to download
+        destination_filename (str, optional): the target filename; if None, will create
+            a file in system temp space
+        progress_updater (object or bool, optional): can be "None", "False", "True", or a
+            specific callable object.  If None or False, no progress updated will be
+            displayed.  If True, a default progress bar will be created.
+        force_download (bool, optional): download this file even if [destination_filename]
+            exists.
+        verbose (bool, optional): enable additional debug console output
+    Returns:
+        str: the filename to which [url] was downloaded, the same as [destination_filename]
+        if [destination_filename] was not None
     """
     if progress_updater is not None and isinstance(progress_updater,bool):
@@ -118,46 +146,79 @@ def download_relative_filename(url, output_base, verbose=False):
     ...will get downloaded to:
         output_base/xyz/123.txt
+    Args:
+        url (str): the URL to download
+        output_base (str): the base folder to which we should download this file
+        verbose (bool, optional): enable additional debug console output
+    Returns:
+        str: the local destination filename
     """
     p = urlparse(url)
     # remove the leading '/'
     assert p.path.startswith('/'); relative_filename = p.path[1:]
     destination_filename = os.path.join(output_base,relative_filename)
-    download_url(url, destination_filename, verbose=verbose)
+    return download_url(url, destination_filename, verbose=verbose)
-def parallel_download_urls(url_to_target_file,verbose=False,overwrite=False,
-                           n_workers=20):
-    """
-    Download a list of URLs to local files.  url_to_target_file should
-    be a dict mapping URLs to output files.  Catches exceptions and reports
-    them in the returned "results" array.
-    """
-    def _do_parallelized_download(download_info,overwrite=False):
-        url = download_info['url']
-        target_file = download_info['target_file']
-        result = {'status':'unknown','url':url,'target_file':target_file}
-        if ((os.path.isfile(target_file)) and (not overwrite)):
-            result['status'] = 'skipped'
-            return result
-        try:
-            download_url(url=url,
-                         destination_filename=target_file,
-                         verbose=verbose, force_download=overwrite)
-        except Exception as e:
-            print('Warning: error downloading URL {}: {}'.format(
-                url,str(e)))
-            result['status'] = 'error: {}'.format(str(e))
-            return result
-        result['status'] = 'success'
+def _do_parallelized_download(download_info,overwrite=False,verbose=False):
+    """
+    Internal function for download parallelization.
+    """
+    url = download_info['url']
+    target_file = download_info['target_file']
+    result = {'status':'unknown','url':url,'target_file':target_file}
+    if ((os.path.isfile(target_file)) and (not overwrite)):
+        if verbose:
+            print('Skipping existing file {}'.format(target_file))
+        result['status'] = 'skipped'
+        return result
+    try:
+        download_url(url=url,
+                     destination_filename=target_file,
+                     verbose=verbose,
+                     force_download=overwrite)
+    except Exception as e:
+        print('Warning: error downloading URL {}: {}'.format(
+            url,str(e)))
+        result['status'] = 'error: {}'.format(str(e))
         return result
+    result['status'] = 'success'
+    return result
+def parallel_download_urls(url_to_target_file,verbose=False,overwrite=False,
+                           n_workers=20,pool_type='thread'):
+    """
+    Downloads a list of URLs to local files.
+    Catches exceptions and reports them in the returned "results" array.
+    Args:
+        url_to_target_file: a dict mapping URLs to local filenames.
+        verbose (bool, optional): enable additional debug console output
+        overwrite (bool, optional): whether to overwrite existing local files
+        n_workers (int, optional): number of concurrent workers, set to <=1 to disable
+            parallelization
+        pool_type (str, optional): worker type to use; should be 'thread' or 'process'
+    Returns:
+        list: list of dicts with keys:
+            - 'url': the url this item refers to
+            - 'status': 'skipped', 'success', or a string starting with 'error'
+            - 'target_file': the local filename to which we downloaded (or tried to
+              download) this URL
+    """
     all_download_info = []
-    for url in url_to_target_file:
+    print('Preparing download list')
+    for url in tqdm(url_to_target_file):
         download_info = {}
         download_info['url'] = url
         download_info['target_file'] = url_to_target_file[url]
@@ -171,37 +232,92 @@ def parallel_download_urls(url_to_target_file,verbose=False,overwrite=False,
         results = []
         for download_info in tqdm(all_download_info):
-            result = _do_parallelized_download(download_info,overwrite=overwrite)
+            result = _do_parallelized_download(download_info,overwrite=overwrite,verbose=verbose)
             results.append(result)
     else:
-        pool = ThreadPool(n_workers)
-        results = list(tqdm(pool.imap(lambda download_info: _do_parallelized_download(
-            download_info,overwrite=overwrite),all_download_info),
-            total=len(all_download_info)))
+        if pool_type == 'thread':
+            pool = ThreadPool(n_workers)
+        else:
+            assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
+            pool = Pool(n_workers)
+        print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
+        results = list(tqdm(pool.imap(
+            partial(_do_parallelized_download,overwrite=overwrite,verbose=verbose),
+            all_download_info), total=len(all_download_info)))
     return results
-def test_urls(urls, error_on_failure=True):
+def test_url(url, error_on_failure=True, timeout=None):
+    """
+    Tests the availability of [url], returning an http status code.
+    Args:
+        url (str): URL to test
+        error_on_failure (bool, optional): whether to error (vs. just returning an
+            error code) if accessing this URL fails
+        timeout (int, optional): timeout in seconds to wait before considering this
+            access attempt to be a failure; see requests.head() for precise documentation
+    Returns:
+        int: http status code (200 for success)
     """
-    Verify that a list of URLs is available (returns status 200).  By default,
-    errors if any URL is unavailable.  If error_on_failure is False, returns
-    status codes for each URL.
-    TODO: trivially parallelizable.
+    # r = requests.get(url, stream=True, verify=True, timeout=timeout)
+    r = requests.head(url, stream=True, verify=True, timeout=timeout)
+    if error_on_failure and r.status_code != 200:
+        raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
+    return r.status_code
+def test_urls(urls, error_on_failure=True, n_workers=1, pool_type='thread', timeout=None):
     """
+    Verify that URLs are available (i.e., returns status 200).  By default,
+    errors if any URL is unavailable.
-    status_codes = []
+    Args:
+        urls (list): list of URLs to test
+        error_on_failure (bool, optional): whether to error (vs. just returning an
+            error code) if accessing this URL fails
+        n_workers (int, optional): number of concurrent workers, set to <=1 to disable
+            parallelization
+        pool_type (str, optional): worker type to use; should be 'thread' or 'process'
+        timeout (int, optional): timeout in seconds to wait before considering this
+            access attempt to be a failure; see requests.head() for precise documentation
-    for url in tqdm(urls):
+    Returns:
+        list: a list of http status codes, the same length and order as [urls]
+    """
+    if n_workers <= 1:
+        status_codes = []
-        r = requests.get(url)
+        for url in tqdm(urls):
+            r = requests.get(url, timeout=timeout)
+            if error_on_failure and r.status_code != 200:
+                raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
+            status_codes.append(r.status_code)
+    else:
+        if pool_type == 'thread':
+            pool = ThreadPool(n_workers)
+        else:
+            assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
+            pool = Pool(n_workers)
-        if error_on_failure and r.status_code != 200:
-            raise ValueError('Could not access {}: error {}'.format(url,r.status_code))
-        status_codes.append(r.status_code)
+        print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
+        status_codes = list(tqdm(pool.imap(
+            partial(test_url,error_on_failure=error_on_failure,timeout=timeout),
+            urls), total=len(urls)))
     return status_codes

md_utils/write_html_image_list.py CHANGED Viewed

@@ -1,14 +1,14 @@
-########
-#
-# write_html_image_list.py
-#
-# Given a list of image file names, writes an HTML file that
-# shows all those images, with optional one-line headers above each.
-#
-# Each "filename" can also be a dict with elements 'filename','title',
-# 'imageStyle','textStyle', 'linkTarget'
-#
-########
+"""
+write_html_image_list.py
+Given a list of image file names, writes an HTML file that
+shows all those images, with optional one-line headers above each.
+Each "filename" can also be a dict with elements 'filename','title',
+'imageStyle','textStyle', 'linkTarget'
+"""
 #%% Constants and imports
@@ -23,27 +23,34 @@ from md_utils import path_utils
 def write_html_image_list(filename=None,images=None,options=None):
     """
-    filename: the output file
-    image: a list of image filenames or dictionaries with one or more of the following fields:
-        filename
-        imageStyle
-        textStyle
-        title
-        linkTarget
-    options: a dict with one or more of the following fields:
-        fHtml
-        headerHtml
-        trailerHtml
-        defaultTextStyle
-        defaultImageStyle
-        maxFiguresPerHtmlFile
-        urlEncodeFilenames (default True, e.g. '#' will be replaced by '%23')
-        urlEncodeLinkTargets (default True, e.g. '#' will be replaced by '%23')
+    Given a list of image file names, writes an HTML file that shows all those images,
+    with optional one-line headers above each.
+    Args:
+        filename (str, optional): the .html output file; if None, just returns a valid
+            options dict
+        images (list, optional): the images to write to the .html file; if None, just returns
+            a valid options dict.  This can be a flat list of image filenames, or this can
+            be a list of dictionaries with one or more of the following fields:
+            - filename (image filename) (required, all other fields are optional)
+            - imageStyle (css style for this image)
+            - textStyle (css style for the title associated with this image)
+            - title (text label for this image)
+            - linkTarget (URL to which this image should link on click)
+        options (dict, optional): a dict with one or more of the following fields:
+            - fHtml (file pointer to write to, used for splitting write operations over multiple calls)
+            - headerHtml (html text to include before the image list)
+            - trailerHtml (html text to include after the image list)
+            - defaultImageStyle (default css style for images)
+            - defaultTextStyle (default css style for image titles)
+            - maxFiguresPerHtmlFile (max figures for a single HTML file; overflow will be handled by creating
+              multiple files and a TOC with links)
+            - urlEncodeFilenames (default True, e.g. '#' will be replaced by '%23')
+            - urlEncodeLinkTargets (default True, e.g. '#' will be replaced by '%23')
     """
     # returns an options struct
@@ -78,7 +85,7 @@ def write_html_image_list(filename=None,images=None,options=None):
     if 'maxFiguresPerHtmlFile' not in options or options['maxFiguresPerHtmlFile'] is None:
         options['maxFiguresPerHtmlFile'] = math.inf
-    if filename is None:
+    if filename is None or images is None:
         return options
     # images may be a list of images or a list of image/style/title dictionaries,

md_visualization/__init__.py ADDED Viewed

File without changes

megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.8py3-none-any.whl → 5.0.9py3-none-any.whl