PyPI - megadetector - Versions diffs - 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl - Mend

megadetector 5.0.9py3-none-any.whl → 5.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show

{megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
{megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
megadetector-5.0.11.dist-info/RECORD +5 -0
megadetector-5.0.11.dist-info/top_level.txt +1 -0
api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -439
api/batch_processing/api_core/server.py +0 -294
api/batch_processing/api_core/server_api_config.py +0 -98
api/batch_processing/api_core/server_app_config.py +0 -55
api/batch_processing/api_core/server_batch_job_manager.py +0 -220
api/batch_processing/api_core/server_job_status_table.py +0 -152
api/batch_processing/api_core/server_orchestration.py +0 -360
api/batch_processing/api_core/server_utils.py +0 -92
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -152
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
api/batch_processing/data_preparation/manage_video_batch.py +0 -327
api/batch_processing/integration/digiKam/setup.py +0 -6
api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +0 -64
api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
api/batch_processing/postprocessing/compare_batch_results.py +0 -958
api/batch_processing/postprocessing/convert_output_format.py +0 -397
api/batch_processing/postprocessing/load_api_results.py +0 -195
api/batch_processing/postprocessing/md_to_coco.py +0 -310
api/batch_processing/postprocessing/md_to_labelme.py +0 -330
api/batch_processing/postprocessing/merge_detections.py +0 -401
api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
api/synchronous/api_core/animal_detection_api/config.py +0 -35
api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +0 -110
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +0 -108
classification/analyze_failed_images.py +0 -227
classification/cache_batchapi_outputs.py +0 -198
classification/create_classification_dataset.py +0 -627
classification/crop_detections.py +0 -516
classification/csv_to_json.py +0 -226
classification/detect_and_crop.py +0 -855
classification/efficientnet/__init__.py +0 -9
classification/efficientnet/model.py +0 -415
classification/efficientnet/utils.py +0 -610
classification/evaluate_model.py +0 -520
classification/identify_mislabeled_candidates.py +0 -152
classification/json_to_azcopy_list.py +0 -63
classification/json_validator.py +0 -695
classification/map_classification_categories.py +0 -276
classification/merge_classification_detection_output.py +0 -506
classification/prepare_classification_script.py +0 -194
classification/prepare_classification_script_mc.py +0 -228
classification/run_classifier.py +0 -286
classification/save_mislabeled.py +0 -110
classification/train_classifier.py +0 -825
classification/train_classifier_tf.py +0 -724
classification/train_utils.py +0 -322
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +0 -34
data_management/camtrap_dp_to_coco.py +0 -238
data_management/cct_json_utils.py +0 -395
data_management/cct_to_md.py +0 -176
data_management/cct_to_wi.py +0 -289
data_management/coco_to_labelme.py +0 -272
data_management/coco_to_yolo.py +0 -662
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +0 -33
data_management/databases/combine_coco_camera_traps_files.py +0 -206
data_management/databases/integrity_check_json_db.py +0 -477
data_management/databases/subset_json_db.py +0 -115
data_management/generate_crops_from_cct.py +0 -149
data_management/get_image_sizes.py +0 -188
data_management/importers/add_nacti_sizes.py +0 -52
data_management/importers/add_timestamps_to_icct.py +0 -79
data_management/importers/animl_results_to_md_results.py +0 -158
data_management/importers/auckland_doc_test_to_json.py +0 -372
data_management/importers/auckland_doc_to_json.py +0 -200
data_management/importers/awc_to_json.py +0 -189
data_management/importers/bellevue_to_json.py +0 -273
data_management/importers/cacophony-thermal-importer.py +0 -796
data_management/importers/carrizo_shrubfree_2018.py +0 -268
data_management/importers/carrizo_trail_cam_2017.py +0 -287
data_management/importers/cct_field_adjustments.py +0 -57
data_management/importers/channel_islands_to_cct.py +0 -913
data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
data_management/importers/eMammal/eMammal_helpers.py +0 -249
data_management/importers/eMammal/make_eMammal_json.py +0 -223
data_management/importers/ena24_to_json.py +0 -275
data_management/importers/filenames_to_json.py +0 -385
data_management/importers/helena_to_cct.py +0 -282
data_management/importers/idaho-camera-traps.py +0 -1407
data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
data_management/importers/jb_csv_to_json.py +0 -150
data_management/importers/mcgill_to_json.py +0 -250
data_management/importers/missouri_to_json.py +0 -489
data_management/importers/nacti_fieldname_adjustments.py +0 -79
data_management/importers/noaa_seals_2019.py +0 -181
data_management/importers/pc_to_json.py +0 -365
data_management/importers/plot_wni_giraffes.py +0 -123
data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
data_management/importers/prepare_zsl_imerit.py +0 -131
data_management/importers/rspb_to_json.py +0 -356
data_management/importers/save_the_elephants_survey_A.py +0 -320
data_management/importers/save_the_elephants_survey_B.py +0 -332
data_management/importers/snapshot_safari_importer.py +0 -758
data_management/importers/snapshot_safari_importer_reprise.py +0 -665
data_management/importers/snapshot_serengeti_lila.py +0 -1067
data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
data_management/importers/sulross_get_exif.py +0 -65
data_management/importers/timelapse_csv_set_to_json.py +0 -490
data_management/importers/ubc_to_json.py +0 -399
data_management/importers/umn_to_json.py +0 -507
data_management/importers/wellington_to_json.py +0 -263
data_management/importers/wi_to_json.py +0 -441
data_management/importers/zamba_results_to_md_results.py +0 -181
data_management/labelme_to_coco.py +0 -548
data_management/labelme_to_yolo.py +0 -272
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +0 -97
data_management/lila/add_locations_to_nacti.py +0 -147
data_management/lila/create_lila_blank_set.py +0 -557
data_management/lila/create_lila_test_set.py +0 -151
data_management/lila/create_links_to_md_results_files.py +0 -106
data_management/lila/download_lila_subset.py +0 -177
data_management/lila/generate_lila_per_image_labels.py +0 -515
data_management/lila/get_lila_annotation_counts.py +0 -170
data_management/lila/get_lila_image_counts.py +0 -111
data_management/lila/lila_common.py +0 -300
data_management/lila/test_lila_metadata_urls.py +0 -132
data_management/ocr_tools.py +0 -874
data_management/read_exif.py +0 -681
data_management/remap_coco_categories.py +0 -84
data_management/remove_exif.py +0 -66
data_management/resize_coco_dataset.py +0 -189
data_management/wi_download_csv_to_coco.py +0 -246
data_management/yolo_output_to_md_output.py +0 -441
data_management/yolo_to_coco.py +0 -676
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/detector_training/model_main_tf2.py +0 -114
detection/process_video.py +0 -703
detection/pytorch_detector.py +0 -337
detection/run_detector.py +0 -779
detection/run_detector_batch.py +0 -1219
detection/run_inference_with_yolov5_val.py +0 -917
detection/run_tiled_inference.py +0 -935
detection/tf_detector.py +0 -188
detection/video_utils.py +0 -606
docs/source/conf.py +0 -43
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +0 -174
md_utils/ct_utils.py +0 -612
md_utils/directory_listing.py +0 -246
md_utils/md_tests.py +0 -968
md_utils/path_utils.py +0 -1044
md_utils/process_utils.py +0 -157
md_utils/sas_blob_utils.py +0 -509
md_utils/split_locations_into_train_val.py +0 -228
md_utils/string_utils.py +0 -92
md_utils/url_utils.py +0 -323
md_utils/write_html_image_list.py +0 -225
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +0 -293
md_visualization/render_images_with_thumbnails.py +0 -275
md_visualization/visualization_utils.py +0 -1537
md_visualization/visualize_db.py +0 -551
md_visualization/visualize_detector_output.py +0 -406
megadetector-5.0.9.dist-info/RECORD +0 -224
megadetector-5.0.9.dist-info/top_level.txt +0 -8
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
taxonomy_mapping/map_new_lila_datasets.py +0 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
taxonomy_mapping/preview_lila_taxonomy.py +0 -591
taxonomy_mapping/retrieve_sample_image.py +0 -71
taxonomy_mapping/simple_image_download.py +0 -218
taxonomy_mapping/species_lookup.py +0 -834
taxonomy_mapping/taxonomy_csv_checker.py +0 -159
taxonomy_mapping/taxonomy_graph.py +0 -346
taxonomy_mapping/validate_lila_category_mappings.py +0 -83
{megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0

api/synchronous/api_core/tests/__init__.py DELETED Viewed

File without changes

api/synchronous/api_core/tests/load_test.py DELETED Viewed

@@ -1,110 +0,0 @@
-import os
-import json
-import io
-import random
-import requests
-from PIL import Image
-from multiprocessing import Pool
-from datetime import datetime
-from requests_toolbelt import MultipartEncoder
-from requests_toolbelt.multipart import decoder
-ip_address = '100.100.200.200'
-port = 5050
-base_url = 'http://{}:{}/v1/camera-trap/sync/'.format(ip_address, port)
-def call_api(args):
-    start = datetime.now()
-    index, url, params, data, headers = args['index'],args['url'], args['params'], args['data'], args['headers']
-    print('calling api: {} starttime: {}'.format(index, start))
-    response = requests.post(url, params=params, data=data, headers=headers)
-    elapsed_time = datetime.now() - start
-    print('\napi {} status code: {}, elapsed time in seconds {}'.format(index, response.status_code, elapsed_time.total_seconds()))
-    get_detections(response)
-    return response
-def get_detections(response):
-    results = decoder.MultipartDecoder.from_response(response)
-    text_results = {}
-    images = {}
-    for part in results.parts:
-        # part is a BodyPart object with b'Content-Type', and b'Content-Disposition', the later includes 'name' and 'filename' info
-        headers = {}
-        for k, v in part.headers.items():
-            headers[k.decode(part.encoding)] = v.decode(part.encoding)
-        if headers.get('Content-Type', None) == 'application/json':
-            text_result = json.loads(part.content.decode())
-    print(text_result)
-def test_load(num_requests, params, max_images=1):
-    requests = []
-    # read the images anew for each request
-    index = 0
-    for i in range(num_requests):
-        index += 1
-        files = {}
-        sample_input_dir = '../../../api/synchronous/sample_input/test_images'
-        image_files = os.listdir(sample_input_dir)
-        random.shuffle(image_files)
-        num_images = 0
-        for i, image_name in enumerate(image_files):
-            if not image_name.lower().endswith('.jpg'):
-                continue
-            if num_images >= max_images:
-                break
-            else:
-                num_images += 1
-            img_path = os.path.join(sample_input_dir, image_name)
-            with open(img_path, 'rb') as f:
-                content = f.read()
-            files[image_name] = (image_name, content, 'image/jpeg')
-        m = MultipartEncoder(fields=files)
-        args = {
-            'index': index,
-            'url': base_url + 'detect',
-            'params': params,
-            'data': m,
-            'headers': {'Content-Type': m.content_type}
-        }
-        requests.append(args)
-    print('starting', num_requests, 'threads...')
-    # images are read and in each request by the time we call the API in map()
-    with Pool(num_requests) as pool:
-        results = pool.map(call_api, requests)
-    return results
-if __name__ == "__main__":
-    params = {
-    'min_confidence': 0.05,
-    'min_rendering_confidence': 0.2,
-    'render': True
-    }
-    num_requests = 10
-    max_images = 1
-    start = datetime.now()
-    responses = test_load(num_requests, params, max_images=max_images)
-    end = datetime.now()
-    total_time = end - start
-    print('Total time for {} requests: {}'.format(num_requests, total_time))

classification/__init__.py DELETED Viewed

File without changes

classification/aggregate_classifier_probs.py DELETED Viewed

@@ -1,108 +0,0 @@
-"""
-aggregate_classifier_probs.py
-Aggregate probabilities from a classifier's outputs according to a mapping
-from the desired (target) categories to the classifier's categories.
-Using the mapping, create a new version of the classifier output CSV with
-probabilities summed within each target category. Also output a new
-"index-to-name" JSON file which identifies the sequential order of the target
-categories.
-"""
-#%% Imports
-from __future__ import annotations
-import argparse
-import json
-import pandas as pd
-from tqdm import tqdm
-#%%  Example usage
-"""
-python aggregate_classifier_probs.py \
-    classifier_output.csv.gz \
-    --target-mapping target_to_classifier_labels.json \
-    --output-csv classifier_output_remapped.csv.gz \
-    --output-label-index label_index_remapped.json
-"""
-#%% Main function
-def main(classifier_results_csv_path: str,
-         target_mapping_json_path: str,
-         output_csv_path: str,
-         output_label_index_json_path: str) -> None:
-    """
-    Main function.
-    Because the output CSV is often very large, we process it in chunks of 1000
-    rows at a time.
-    """
-    chunked_df_iterator = pd.read_csv(
-        classifier_results_csv_path, chunksize=1000, float_precision='high',
-        index_col='path')
-    with open(target_mapping_json_path, 'r') as f:
-        target_mapping = json.load(f)
-    target_names = sorted(target_mapping.keys())
-    all_classifier_labels: set[str] = set()
-    for classifier_labels in target_mapping.values():
-        assert all_classifier_labels.isdisjoint(classifier_labels)
-        all_classifier_labels.update(classifier_labels)
-    for i, chunk_df in tqdm(enumerate(chunked_df_iterator)):
-        if i == 0:
-            assert set(chunk_df.columns) == all_classifier_labels
-            header, mode = True, 'w'
-        else:
-            header, mode = False, 'a'
-        agg_df = pd.DataFrame(
-            data=0., index=chunk_df.index, columns=target_names)
-        for target in target_names:
-            classifier_labels = target_mapping[target]
-            agg_df[target] = chunk_df[classifier_labels].sum(axis=1)
-        agg_df.to_csv(output_csv_path, index=True, header=header, mode=mode)
-    with open(output_label_index_json_path, 'w') as f:
-        json.dump(dict(enumerate(target_names)), f, indent=1)
-#%% Command-line driver
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        description='Aggregate classifier probabilities to target classes.')
-    parser.add_argument(
-        'classifier_results_csv',
-        help='path to CSV with classifier probabilities')
-    parser.add_argument(
-        '-t', '--target-mapping', required=True,
-        help='path to JSON file mapping target categories to classifier labels')
-    parser.add_argument(
-        '-o', '--output-csv', required=True,
-        help='path to save output CSV with aggregated probabilities')
-    parser.add_argument(
-        '-i', '--output-label-index', required=True,
-        help='path to save output label index JSON')
-    return parser.parse_args()
-if __name__ == '__main__':
-    args = _parse_args()
-    main(classifier_results_csv_path=args.classifier_results_csv,
-         target_mapping_json_path=args.target_mapping,
-         output_csv_path=args.output_csv,
-         output_label_index_json_path=args.output_label_index)

classification/analyze_failed_images.py DELETED Viewed

@@ -1,227 +0,0 @@
-"""
-analyze_failed_images.py
-"""
-#%% Imports and constants
-import argparse
-from collections.abc import Mapping, Sequence
-from concurrent import futures
-import json
-from pprint import pprint
-import threading
-from typing import Any, Optional
-from PIL import Image, ImageFile
-import requests
-from tqdm import tqdm
-from data_management.megadb.megadb_utils import MegadbUtils
-from md_utils import path_utils
-from md_utils import sas_blob_utils
-#%% Example usage
-"""
-    python analyze_failed_images.py failed.json \
-        -a ACCOUNT -c CONTAINER -s SAS_TOKEN
-"""
-ImageFile.LOAD_TRUNCATED_IMAGES = False
-#%% Support functions
-def check_image_condition(img_path: str,
-                          truncated_images_lock: threading.Lock,
-                          account: Optional[str] = None,
-                          container: Optional[str] = None,
-                          sas_token: Optional[str] = None,
-                          datasets_table: Optional[Mapping[str, Any]] = None
-                          ) -> tuple[str, str]:
-    """
-    Args:
-        img_path: str, either <blob_name> if datasets_table is None, or
-            <dataset>/<blob_name> if datasets_table is given
-        account: str, name of Azure Blob Storage account
-        container: str, name of Azure Blob Storage container
-        sas_token: str, optional SAS token (without leading '?') if the
-            container is not publicly accessible
-        datasets_table: dict, maps dataset name to dict of information
-    Returns: (img_file, status) tuple, where status is one of
-        'nonexistent': blob does not exist in the container
-        'non_image': img_file does not have valid file extension
-        'good': image exists and is able to be opened without setting
-            ImageFile.LOAD_TRUNCATED_IMAGES=True
-        'truncated': image exists but can only be opened by setting
-            ImageFile.LOAD_TRUNCATED_IMAGES=True
-        'bad': image exists, but cannot be opened even when setting
-            ImageFile.LOAD_TRUNCATED_IMAGES=True
-    """
-    if (account is None) or (container is None) or (datasets_table is not None):
-        assert account is None
-        assert container is None
-        assert sas_token is None
-        assert datasets_table is not None
-        dataset, img_file = img_path.split('/', maxsplit=1)
-        account = datasets_table[dataset]['storage_account']
-        container = datasets_table[dataset]['container']
-        sas_token = datasets_table[dataset]['container_sas_key']
-        if sas_token[0] == '?':  # strip leading '?' from SAS token
-            sas_token = sas_token[1:]
-    else:
-        img_file = img_path
-    if not path_utils.is_image_file(img_file):
-        return img_file, 'non_image'
-    blob_url = sas_blob_utils.build_azure_storage_uri(
-        account=account, container=container, sas_token=sas_token,
-        blob=img_file)
-    blob_exists = sas_blob_utils.check_blob_exists(blob_url)
-    if not blob_exists:
-        return img_file, 'nonexistent'
-    stream, _ = sas_blob_utils.download_blob_to_stream(blob_url)
-    stream.seek(0)
-    try:
-        with truncated_images_lock:
-            ImageFile.LOAD_TRUNCATED_IMAGES = False
-            with Image.open(stream) as img:
-                img.load()
-        return img_file, 'good'
-    except OSError:  # PIL.UnidentifiedImageError is a subclass of OSError
-        try:
-            stream.seek(0)
-            with truncated_images_lock:
-                ImageFile.LOAD_TRUNCATED_IMAGES = True
-                with Image.open(stream) as img:
-                    img.load()
-            return img_file, 'truncated'
-        except Exception as e:  # pylint: disable=broad-except
-            exception_type = type(e).__name__
-            tqdm.write(f'Unable to load {img_file}. {exception_type}: {e}.')
-            return img_file, 'bad'
-#%% Main function
-def analyze_images(url_or_path: str, json_keys: Optional[Sequence[str]] = None,
-                   account: Optional[str] = None,
-                   container: Optional[str] = None,
-                   sas_token: Optional[str] = None) -> None:
-    """
-    Args:
-        url_or_path: str, URL or local path to a file containing a list
-            of image paths. Each image path is either <blob_name> if account and
-            container are given, or <dataset>/<blob_name> if account and
-            container are None. File can either be a list of image paths, or a
-            JSON file containing image paths.
-        json_keys: optional list of str, only relevant if url_or_path is a JSON
-            file. If json_keys=None, then the JSON file at url_or_path is
-            assumed to be a JSON list of image paths. If json_keys is not None,
-            then the JSON file should be a dict, whose values corresponding to
-            json_keys are lists of image paths.
-        account: str, name of Azure Blob Storage account
-        container: str, name of Azure Blob Storage container
-        sas_token: str, optional SAS token (without leading '?') if the
-            container is not publicly accessible
-    """
-    datasets_table = None
-    if (account is None) or (container is None):
-        assert account is None
-        assert container is None
-        assert sas_token is None
-        datasets_table = MegadbUtils().get_datasets_table()
-    is_json = ('.json' in url_or_path)
-    if url_or_path.startswith(('http://', 'https://')):
-        r = requests.get(url_or_path)
-        if is_json:
-            img_paths = r.json()
-        else:
-            img_paths = r.text.splitlines()
-    else:
-        with open(url_or_path, 'r') as f:
-            if is_json:
-                img_paths = json.load(f)
-            else:
-                img_paths = f.readlines()
-    if is_json and json_keys is not None:
-        img_paths_json = img_paths
-        img_paths = []
-        for k in json_keys:
-            img_paths += img_paths_json[k]
-    mapping: dict[str, list[str]] = {
-        status: []
-        for status in ['good', 'nonexistent', 'non_image', 'truncated', 'bad']
-    }
-    pool = futures.ThreadPoolExecutor(max_workers=100)
-    # lock before changing ImageFile.LOAD_TRUNCATED_IMAGES
-    truncated_images_lock = threading.Lock()
-    futures_list = []
-    for img_path in tqdm(img_paths):
-        future = pool.submit(
-            check_image_condition, img_path, truncated_images_lock, account,
-            container, sas_token, datasets_table)
-        futures_list.append(future)
-    total = len(futures_list)
-    for future in tqdm(futures.as_completed(futures_list), total=total):
-        img_file, status = future.result()
-        mapping[status].append(img_file)
-    for status, img_list in mapping.items():
-        print(f'{status}: {len(img_list)}')
-        pprint(sorted(img_list))
-#%% Command-line driver
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        description='Analyze a list of images that failed to download or crop.')
-    parser.add_argument(
-        'failed_images', metavar='URL_OR_PATH',
-        help='URL or path to text or JSON file containing list of image paths')
-    parser.add_argument(
-        '-k', '--json-keys', nargs='*',
-        help='list of keys in JSON file containing image paths')
-    parser.add_argument(
-        '-a', '--account',
-        help='name of Azure Blob Storage account. If not given, then image '
-             'paths are assumed to start with the dataset name, so we can look '
-             'up the account from MegaDB.')
-    parser.add_argument(
-        '-c', '--container',
-        help='name of Azure Blob Storage container. If not given, then image '
-             'paths are assumed to start with the dataset name, so we can look '
-             'up the container from MegaDB.')
-    parser.add_argument(
-        '-s', '--sas-token',
-        help='optional SAS token (without leading "?") if the container is not '
-             'publicly accessible. If account and container not given, then '
-             'image paths are assumed to start with the dataset name, so we '
-             'can look up the SAS Token from MegaDB.')
-    return parser.parse_args()
-if __name__ == '__main__':
-    args = _parse_args()
-    analyze_images(url_or_path=args.failed_images, json_keys=args.json_keys,
-                   account=args.account, container=args.container,
-                   sas_token=args.sas_token)

classification/cache_batchapi_outputs.py DELETED Viewed

@@ -1,198 +0,0 @@
-"""
-cache_batchapi_outputs.py
-Script to cache Batch Detection API outputs.
-This script can handle either the Batch Detection API JSON Response or the
-detections JSON.
-Batch Detection API Response format:
-    {
-        "Status": {
-            "request_status": "completed",
-            "message": {
-                "num_failed_shards": 0,
-                "output_file_urls": {
-                    "detections": "https://url/to/detections.json",
-                    "failed_images": "https://url/to/failed_images.json",
-                    "images": https://url/to/images.json",
-                }
-            },
-        },
-        "Endpoint": "/v3/camera-trap/detection-batch/request_detections",
-        "TaskId": "ea26326e-7e0d-4524-a9ea-f57a5799d4ba"
-    }
-Detections JSON format:
-    {
-        "info": {...}
-        "detection_categories": {...}
-        "classification_categories": {...}
-        "images": [
-            {
-                "file": "path/from/base/dir/image1.jpg",
-                "max_detection_conf": 0.926,
-                "detections": [{
-                        "category": "1",
-                        "conf": 0.061,
-                        "bbox": [0.0451, 0.1849, 0.3642, 0.4636]
-                }]
-            }
-        ]
-    }
-Batch Detection API Output Format:
-github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#api-outputs
-"""
-#%% Imports
-from __future__ import annotations
-import argparse
-from collections.abc import Mapping
-import json
-import os
-from typing import Any, Optional
-import requests
-from api.batch_processing.data_preparation.prepare_api_submission import (
-    TaskStatus, Task)
-from api.batch_processing.postprocessing.combine_api_outputs import (
-    combine_api_output_dictionaries)
-#%% Support functions
-def cache_json(json_path: str,
-               is_detections: bool,
-               dataset: str,
-               detector_output_cache_base_dir: str,
-               detector_version: Optional[str]) -> None:
-    """
-    Args:
-        json_path: str, path to JSON file
-        is_detections: bool, True if <json_path> is a detections JSON file,
-            False if <json_path> is a API response JSON file
-        dataset: str
-        detector_output_cache_base_dir: str
-        detector_version: str
-    """
-    with open(json_path, 'r') as f:
-        js = json.load(f)
-    if is_detections:
-        detections = js
-    else:
-        response = js
-        # task finished successfully
-        status = TaskStatus(response['Status']['request_status'])
-        assert status == TaskStatus.COMPLETED
-        # parse the task ID
-        task_id = response['TaskId']
-        message = response['Status']['message']
-        detections_url = message['output_file_urls']['detections']
-        assert detections_url.split('/')[-2] == task_id
-        # print info about missing and failed images
-        task = Task(name=task_id, task_id=task_id)
-        task.response = response
-        task.status = status
-        task.get_missing_images(verbose=True)
-        # get the detections
-        detections = requests.get(detections_url).json()
-    # add detections to the detections cache
-    api_det_version = detections['info']['detector'].rsplit('v', maxsplit=1)[1]
-    if detector_version is not None:
-        assert api_det_version == detector_version
-    detector_output_cache_dir = os.path.join(
-        detector_output_cache_base_dir, f'v{api_det_version}')
-    msg = cache_detections(
-        detections=detections, dataset=dataset,
-        detector_output_cache_dir=detector_output_cache_dir)
-    print(msg)
-def cache_detections(detections: Mapping[str, Any], dataset: str,
-                     detector_output_cache_dir: str) -> str:
-    """
-    Args:
-        detections: dict, represents JSON output of detector
-        dataset: str, name of dataset
-        detector_output_cache_dir: str, path to folder where detector outputs
-            are cached, stored as 1 JSON file per dataset, directory must
-            already exist
-    Returns: str, message
-    """
-    # combine detections with cache
-    dataset_cache_path = os.path.join(
-        detector_output_cache_dir, f'{dataset}.json')
-    merged_dataset_cache: Mapping[str, Any]
-    if os.path.exists(dataset_cache_path):
-        with open(dataset_cache_path, 'r') as f:
-            dataset_cache = json.load(f)
-        merged_dataset_cache = combine_api_output_dictionaries(
-            input_dicts=[dataset_cache, detections], require_uniqueness=False)
-        msg = f'Merging detection output with {dataset_cache_path}'
-    else:
-        merged_dataset_cache = detections
-        msg = ('No cached detection outputs found. Saving detection output to '
-               f'{dataset_cache_path}')
-    # write combined detections back out to cache
-    with open(dataset_cache_path, 'w') as f:
-        json.dump(merged_dataset_cache, f, indent=1)
-    return msg
-#%% Command-line driver
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        description='Caches detector outputs.')
-    parser.add_argument(
-        'json_file',
-        help='path to JSON file containing response of Batch Detection API')
-    parser.add_argument(
-        '-f', '--format', choices=['response', 'detections'], required=True,
-        help='(required) whether <json_file> is a Batch API response or a '
-             'detections JSON file')
-    parser.add_argument(
-        '-d', '--dataset', required=True,
-        help='(required) name of dataset corresponding to the API task')
-    parser.add_argument(
-        '-c', '--detector-output-cache-dir', required=True,
-        help='(required) path to directory where detector outputs are cached')
-    parser.add_argument(
-        '-v', '--detector-version',
-        help='detector version string, e.g., "4.1", inferred from detections '
-             'file if not given')
-    return parser.parse_args()
-if __name__ == '__main__':
-    args = _parse_args()
-    cache_json(
-        json_path=args.json_file,
-        is_detections=(args.format == 'detections'),
-        dataset=args.dataset,
-        detector_output_cache_base_dir=args.detector_output_cache_dir,
-        detector_version=args.detector_version)

megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.9py3-none-any.whl → 5.0.11py3-none-any.whl