PyPI - megadetector - Versions diffs - 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl - Mend

megadetector 5.0.10py3-none-any.whl → 5.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show

{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
megadetector-5.0.11.dist-info/RECORD +5 -0
megadetector-5.0.11.dist-info/top_level.txt +1 -0
api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -439
api/batch_processing/api_core/server.py +0 -294
api/batch_processing/api_core/server_api_config.py +0 -98
api/batch_processing/api_core/server_app_config.py +0 -55
api/batch_processing/api_core/server_batch_job_manager.py +0 -220
api/batch_processing/api_core/server_job_status_table.py +0 -152
api/batch_processing/api_core/server_orchestration.py +0 -360
api/batch_processing/api_core/server_utils.py +0 -92
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -152
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
api/batch_processing/data_preparation/manage_video_batch.py +0 -327
api/batch_processing/integration/digiKam/setup.py +0 -6
api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +0 -64
api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
api/batch_processing/postprocessing/compare_batch_results.py +0 -958
api/batch_processing/postprocessing/convert_output_format.py +0 -397
api/batch_processing/postprocessing/load_api_results.py +0 -195
api/batch_processing/postprocessing/md_to_coco.py +0 -310
api/batch_processing/postprocessing/md_to_labelme.py +0 -330
api/batch_processing/postprocessing/merge_detections.py +0 -401
api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
api/synchronous/api_core/animal_detection_api/config.py +0 -35
api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +0 -110
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +0 -108
classification/analyze_failed_images.py +0 -227
classification/cache_batchapi_outputs.py +0 -198
classification/create_classification_dataset.py +0 -627
classification/crop_detections.py +0 -516
classification/csv_to_json.py +0 -226
classification/detect_and_crop.py +0 -855
classification/efficientnet/__init__.py +0 -9
classification/efficientnet/model.py +0 -415
classification/efficientnet/utils.py +0 -610
classification/evaluate_model.py +0 -520
classification/identify_mislabeled_candidates.py +0 -152
classification/json_to_azcopy_list.py +0 -63
classification/json_validator.py +0 -695
classification/map_classification_categories.py +0 -276
classification/merge_classification_detection_output.py +0 -506
classification/prepare_classification_script.py +0 -194
classification/prepare_classification_script_mc.py +0 -228
classification/run_classifier.py +0 -286
classification/save_mislabeled.py +0 -110
classification/train_classifier.py +0 -825
classification/train_classifier_tf.py +0 -724
classification/train_utils.py +0 -322
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +0 -34
data_management/camtrap_dp_to_coco.py +0 -238
data_management/cct_json_utils.py +0 -395
data_management/cct_to_md.py +0 -176
data_management/cct_to_wi.py +0 -289
data_management/coco_to_labelme.py +0 -272
data_management/coco_to_yolo.py +0 -662
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +0 -33
data_management/databases/combine_coco_camera_traps_files.py +0 -206
data_management/databases/integrity_check_json_db.py +0 -477
data_management/databases/subset_json_db.py +0 -115
data_management/generate_crops_from_cct.py +0 -149
data_management/get_image_sizes.py +0 -188
data_management/importers/add_nacti_sizes.py +0 -52
data_management/importers/add_timestamps_to_icct.py +0 -79
data_management/importers/animl_results_to_md_results.py +0 -158
data_management/importers/auckland_doc_test_to_json.py +0 -372
data_management/importers/auckland_doc_to_json.py +0 -200
data_management/importers/awc_to_json.py +0 -189
data_management/importers/bellevue_to_json.py +0 -273
data_management/importers/cacophony-thermal-importer.py +0 -796
data_management/importers/carrizo_shrubfree_2018.py +0 -268
data_management/importers/carrizo_trail_cam_2017.py +0 -287
data_management/importers/cct_field_adjustments.py +0 -57
data_management/importers/channel_islands_to_cct.py +0 -913
data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
data_management/importers/eMammal/eMammal_helpers.py +0 -249
data_management/importers/eMammal/make_eMammal_json.py +0 -223
data_management/importers/ena24_to_json.py +0 -275
data_management/importers/filenames_to_json.py +0 -385
data_management/importers/helena_to_cct.py +0 -282
data_management/importers/idaho-camera-traps.py +0 -1407
data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
data_management/importers/jb_csv_to_json.py +0 -150
data_management/importers/mcgill_to_json.py +0 -250
data_management/importers/missouri_to_json.py +0 -489
data_management/importers/nacti_fieldname_adjustments.py +0 -79
data_management/importers/noaa_seals_2019.py +0 -181
data_management/importers/pc_to_json.py +0 -365
data_management/importers/plot_wni_giraffes.py +0 -123
data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
data_management/importers/prepare_zsl_imerit.py +0 -131
data_management/importers/rspb_to_json.py +0 -356
data_management/importers/save_the_elephants_survey_A.py +0 -320
data_management/importers/save_the_elephants_survey_B.py +0 -332
data_management/importers/snapshot_safari_importer.py +0 -758
data_management/importers/snapshot_safari_importer_reprise.py +0 -665
data_management/importers/snapshot_serengeti_lila.py +0 -1067
data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
data_management/importers/sulross_get_exif.py +0 -65
data_management/importers/timelapse_csv_set_to_json.py +0 -490
data_management/importers/ubc_to_json.py +0 -399
data_management/importers/umn_to_json.py +0 -507
data_management/importers/wellington_to_json.py +0 -263
data_management/importers/wi_to_json.py +0 -441
data_management/importers/zamba_results_to_md_results.py +0 -181
data_management/labelme_to_coco.py +0 -548
data_management/labelme_to_yolo.py +0 -272
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +0 -97
data_management/lila/add_locations_to_nacti.py +0 -147
data_management/lila/create_lila_blank_set.py +0 -557
data_management/lila/create_lila_test_set.py +0 -151
data_management/lila/create_links_to_md_results_files.py +0 -106
data_management/lila/download_lila_subset.py +0 -177
data_management/lila/generate_lila_per_image_labels.py +0 -515
data_management/lila/get_lila_annotation_counts.py +0 -170
data_management/lila/get_lila_image_counts.py +0 -111
data_management/lila/lila_common.py +0 -300
data_management/lila/test_lila_metadata_urls.py +0 -132
data_management/ocr_tools.py +0 -874
data_management/read_exif.py +0 -681
data_management/remap_coco_categories.py +0 -84
data_management/remove_exif.py +0 -66
data_management/resize_coco_dataset.py +0 -189
data_management/wi_download_csv_to_coco.py +0 -246
data_management/yolo_output_to_md_output.py +0 -441
data_management/yolo_to_coco.py +0 -676
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/detector_training/model_main_tf2.py +0 -114
detection/process_video.py +0 -703
detection/pytorch_detector.py +0 -337
detection/run_detector.py +0 -779
detection/run_detector_batch.py +0 -1219
detection/run_inference_with_yolov5_val.py +0 -917
detection/run_tiled_inference.py +0 -935
detection/tf_detector.py +0 -188
detection/video_utils.py +0 -606
docs/source/conf.py +0 -43
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +0 -174
md_utils/ct_utils.py +0 -612
md_utils/directory_listing.py +0 -246
md_utils/md_tests.py +0 -968
md_utils/path_utils.py +0 -1044
md_utils/process_utils.py +0 -157
md_utils/sas_blob_utils.py +0 -509
md_utils/split_locations_into_train_val.py +0 -228
md_utils/string_utils.py +0 -92
md_utils/url_utils.py +0 -323
md_utils/write_html_image_list.py +0 -225
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +0 -293
md_visualization/render_images_with_thumbnails.py +0 -275
md_visualization/visualization_utils.py +0 -1537
md_visualization/visualize_db.py +0 -551
md_visualization/visualize_detector_output.py +0 -406
megadetector-5.0.10.dist-info/RECORD +0 -224
megadetector-5.0.10.dist-info/top_level.txt +0 -8
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
taxonomy_mapping/map_new_lila_datasets.py +0 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
taxonomy_mapping/preview_lila_taxonomy.py +0 -591
taxonomy_mapping/retrieve_sample_image.py +0 -71
taxonomy_mapping/simple_image_download.py +0 -218
taxonomy_mapping/species_lookup.py +0 -834
taxonomy_mapping/taxonomy_csv_checker.py +0 -159
taxonomy_mapping/taxonomy_graph.py +0 -346
taxonomy_mapping/validate_lila_category_mappings.py +0 -83
{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0

api/batch_processing/api_core/server_job_status_table.py DELETED Viewed

@@ -1,152 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-"""
-A class to manage updating the status of an API request / Azure Batch Job using
-the Cosmos DB table "batch_api_jobs".
-"""
-import logging
-import os
-import unittest
-import uuid
-from typing import Union, Optional
-from azure.cosmos.cosmos_client import CosmosClient
-from azure.cosmos.exceptions import CosmosResourceNotFoundError
-from server_api_config import API_INSTANCE_NAME, COSMOS_ENDPOINT, COSMOS_WRITE_KEY
-from server_utils import get_utc_time
-log = logging.getLogger(os.environ['FLASK_APP'])
-class JobStatusTable:
-    """
-    A wrapper around the Cosmos DB client. Each item in the table "batch_api_jobs" represents
-    a request/Batch Job, and should have the following fields:
-        - id: this is the job_id
-        - api_instance
-        - status
-        - last_updated
-        - call_params: the dict representing the body of the POST request from the user
-    The 'status' field is a dict with the following fields:
-        - request_status
-        - message
-        - num_tasks  (present after Batch Job created)
-        - num_images  (present after Batch Job created)
-    """
-    # a job moves from created to running/problem after the Batch Job has been submitted
-    allowed_statuses = ['created', 'running', 'failed', 'problem', 'completed', 'canceled']
-    def __init__(self, api_instance=None):
-        self.api_instance = api_instance if api_instance is not None else API_INSTANCE_NAME
-        cosmos_client = CosmosClient(COSMOS_ENDPOINT, credential=COSMOS_WRITE_KEY)
-        db_client = cosmos_client.get_database_client('camera-trap')
-        self.db_jobs_client = db_client.get_container_client('batch_api_jobs')
-    def create_job_status(self, job_id: str, status: Union[dict, str], call_params: dict) -> dict:
-        assert 'request_status' in status and 'message' in status
-        assert status['request_status'] in JobStatusTable.allowed_statuses
-        # job_id should be unique across all instances, and is also the partition key
-        cur_time = get_utc_time()
-        item = {
-            'id': job_id,
-            'api_instance': self.api_instance,
-            'status': status,
-            'job_submission_time': cur_time,
-            'last_updated': cur_time,
-            'call_params': call_params
-        }
-        created_item = self.db_jobs_client.create_item(item)
-        return created_item
-    def update_job_status(self, job_id: str, status: Union[dict, str]) -> dict:
-        assert 'request_status' in status and 'message' in status
-        assert status['request_status'] in JobStatusTable.allowed_statuses
-        # TODO do not read the entry first to get the call_params when the Cosmos SDK add a
-        # patching functionality:
-        # https://feedback.azure.com/forums/263030-azure-cosmos-db/suggestions/6693091-be-able-to-do-partial-updates-on-document
-        item_old = self.read_job_status(job_id)
-        if item_old is None:
-            raise ValueError
-        # need to retain other fields in 'status' to be able to restart monitoring thread
-        if 'status' in item_old and isinstance(item_old['status'], dict):
-            # retain existing fields; update as needed
-            for k, v in item_old['status'].items():
-                if k not in status:
-                    status[k] = v
-        item = {
-            'id': job_id,
-            'api_instance': self.api_instance,
-            'status': status,
-            'job_submission_time': item_old['job_submission_time'],
-            'last_updated': get_utc_time(),
-            'call_params': item_old['call_params']
-        }
-        replaced_item = self.db_jobs_client.replace_item(job_id, item)
-        return replaced_item
-    def read_job_status(self, job_id) -> Optional[dict]:
-        """
-        Read the status of the job from the Cosmos DB table of job status.
-        Note that it does not check the actual status of the job on Batch, and just returns what
-        the monitoring thread wrote to the database.
-        job_id is also the partition key
-        """
-        try:
-            read_item = self.db_jobs_client.read_item(job_id, partition_key=job_id)
-            assert read_item['api_instance'] == self.api_instance, 'Job does not belong to this API instance'
-        except CosmosResourceNotFoundError:
-            return None  # job_id not a key
-        except Exception as e:
-            logging.error(f'server_job_status_table, read_job_status, exception: {e}')
-            raise
-        else:
-            item = {k: v for k, v in read_item.items() if not k.startswith('_')}
-            return item
-class TestJobStatusTable(unittest.TestCase):
-    api_instance = 'api_test'
-    def test_insert(self):
-        table = JobStatusTable(TestJobStatusTable.api_instance)
-        status = {
-            'request_status': 'running',
-            'message': 'this is a test'
-        }
-        job_id = uuid.uuid4().hex
-        item = table.create_job_status(job_id, status, {'container_sas': 'random_string'})
-        self.assertTrue(job_id == item['id'], 'Expect job_id to be the id of the item')
-        self.assertTrue(item['status']['request_status'] == 'running', 'Expect fields to be inserted correctly')
-    def test_update_and_read(self):
-        table = JobStatusTable(TestJobStatusTable.api_instance)
-        status = {
-            'request_status': 'running',
-            'message': 'this is a test'
-        }
-        job_id = uuid.uuid4().hex
-        res = table.create_job_status(job_id, status, {'container_sas': 'random_string'})
-        status = {
-            'request_status': 'completed',
-            'message': 'this is a test again'
-        }
-        res = table.update_job_status(job_id, status)
-        item_read = table.read_job_status(job_id)
-        self.assertTrue(item_read['status']['request_status'] == 'completed', 'Expect field to have updated')
-    def test_read_invalid_id(self):
-        table = JobStatusTable(TestJobStatusTable.api_instance)
-        job_id = uuid.uuid4().hex  # should not be in the database
-        item_read = table.read_job_status(job_id)
-        self.assertIsNone(item_read)
-if __name__ == '__main__':
-    unittest.main()

api/batch_processing/api_core/server_orchestration.py DELETED Viewed

@@ -1,360 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-"""
-Functions to submit images to the Azure Batch node pool for processing, monitor
-the Job and fetch results when completed.
-"""
-import io
-import json
-import threading
-import time
-import logging
-import os
-import urllib.parse
-from datetime import timedelta
-from random import shuffle
-import sas_blob_utils
-import requests
-from azure.storage.blob import ContainerClient, BlobSasPermissions, generate_blob_sas
-from tqdm import tqdm
-from server_utils import *
-import server_api_config as api_config
-from server_batch_job_manager import BatchJobManager
-from server_job_status_table import JobStatusTable
-# Gunicorn logger handler will get attached if needed in server.py
-log = logging.getLogger(os.environ['FLASK_APP'])
-def create_batch_job(job_id: str, body: dict):
-    """
-    This is the target to be run in a thread to submit a batch processing job and monitor progress
-    """
-    job_status_table = JobStatusTable()
-    try:
-        log.info(f'server_job, create_batch_job, job_id {job_id}, {body}')
-        input_container_sas = body.get('input_container_sas', None)
-        use_url = body.get('use_url', False)
-        images_requested_json_sas = body.get('images_requested_json_sas', None)
-        image_path_prefix = body.get('image_path_prefix', None)
-        first_n = body.get('first_n', None)
-        first_n = int(first_n) if first_n else None
-        sample_n = body.get('sample_n', None)
-        sample_n = int(sample_n) if sample_n else None
-        model_version = body.get('model_version', '')
-        if model_version == '':
-            model_version = api_config.DEFAULT_MD_VERSION
-        # request_name and request_submission_timestamp are for appending to
-        # output file names
-        job_name = body.get('request_name', '')  # in earlier versions we used "request" to mean a "job"
-        job_submission_timestamp = get_utc_time()
-        # image_paths can be a list of strings (Azure blob names or public URLs)
-        # or a list of length-2 lists where each is a [image_id, metadata] pair
-        # Case 1: listing all images in the container
-        # - not possible to have attached metadata if listing images in a blob
-        if images_requested_json_sas is None:
-            log.info('server_job, create_batch_job, listing all images to process.')
-            # list all images to process
-            image_paths = sas_blob_utils.list_blobs_in_container(
-                container_uri=input_container_sas,
-                blob_prefix=image_path_prefix,  # check will be case-sensitive
-                blob_suffix=api_config.IMAGE_SUFFIXES_ACCEPTED,  # check will be case-insensitive
-                limit=api_config.MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB + 1
-                # + 1 so if the number of images listed > MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB
-                # we will know and not proceed
-            )
-        # Case 2: user supplied a list of images to process; can include metadata
-        else:
-            log.info('server_job, create_batch_job, using provided list of images.')
-            response = requests.get(images_requested_json_sas) # could be a file hosted anywhere
-            image_paths = response.json()
-            log.info('server_job, create_batch_job, length of image_paths provided by the user: {}'.format(
-                len(image_paths)))
-            if len(image_paths) == 0:
-                job_status = get_job_status(
-                    'completed', '0 images found in provided list of images.')
-                job_status_table.update_job_status(job_id, job_status)
-                return
-            error, metadata_available = validate_provided_image_paths(image_paths)
-            if error is not None:
-                msg = 'image paths provided in the json are not valid: {}'.format(error)
-                raise ValueError(msg)
-            # filter down to those conforming to the provided prefix and accepted suffixes (image file types)
-            valid_image_paths = []
-            for p in image_paths:
-                locator = p[0] if metadata_available else p
-                # prefix is case-sensitive; suffix is not
-                if image_path_prefix is not None and not locator.startswith(image_path_prefix):
-                    continue
-                # Although urlparse(p).path preserves the extension on local paths, it will not work for
-                # blob file names that contains "#", which will be treated as indication of a query.
-                # If the URL is generated via Azure Blob Storage, the "#" char will be properly encoded
-                path = urllib.parse.urlparse(locator).path if use_url else locator
-                if path.lower().endswith(api_config.IMAGE_SUFFIXES_ACCEPTED):
-                    valid_image_paths.append(p)
-            image_paths = valid_image_paths
-            log.info(('server_job, create_batch_job, length of image_paths provided by user, '
-                      f'after filtering to jpg: {len(image_paths)}'))
-        # apply the first_n and sample_n filters
-        if first_n:
-            assert first_n > 0, 'parameter first_n is 0.'
-            # OK if first_n > total number of images
-            image_paths = image_paths[:first_n]
-        if sample_n:
-            assert sample_n > 0, 'parameter sample_n is 0.'
-            if sample_n > len(image_paths):
-                msg = ('parameter sample_n specifies more images than '
-                       'available (after filtering by other provided params).')
-                raise ValueError(msg)
-            # sample by shuffling image paths and take the first sample_n images
-            log.info('First path before shuffling:', image_paths[0])
-            shuffle(image_paths)
-            log.info('First path after shuffling:', image_paths[0])
-            image_paths = image_paths[:sample_n]
-        num_images = len(image_paths)
-        log.info(f'server_job, create_batch_job, num_images after applying all filters: {num_images}')
-        if num_images < 1:
-            job_status = get_job_status('completed', (
-                'Zero images found in container or in provided list of images '
-                'after filtering with the provided parameters.'))
-            job_status_table.update_job_status(job_id, job_status)
-            return
-        if num_images > api_config.MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB:
-            job_status = get_job_status(
-                'failed',
-                (f'The number of images ({num_images}) requested for processing exceeds the maximum '
-                 f'accepted {api_config.MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB} in one call'))
-            job_status_table.update_job_status(job_id, job_status)
-            return
-        # upload the image list to the container, which is also mounted on all nodes
-        # all sharding and scoring use the uploaded list
-        images_list_str_as_bytes = bytes(json.dumps(image_paths, ensure_ascii=False), encoding='utf-8')
-        container_url = sas_blob_utils.build_azure_storage_uri(account=api_config.STORAGE_ACCOUNT_NAME,
-                                                               container=api_config.STORAGE_CONTAINER_API)
-        with ContainerClient.from_container_url(container_url,
-                                                credential=api_config.STORAGE_ACCOUNT_KEY) as api_container_client:
-            _ = api_container_client.upload_blob(
-                name=f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/{job_id}_images.json',
-                data=images_list_str_as_bytes)
-        job_status = get_job_status('created', f'{num_images} images listed; submitting the job...')
-        job_status_table.update_job_status(job_id, job_status)
-    except Exception as e:
-        job_status = get_job_status('failed', f'Error occurred while preparing the Batch job: {e}')
-        job_status_table.update_job_status(job_id, job_status)
-        log.error(f'server_job, create_batch_job, Error occurred while preparing the Batch job: {e}')
-        return  # do not start monitoring
-    try:
-        batch_job_manager = BatchJobManager()
-        model_rel_path = api_config.MD_VERSIONS_TO_REL_PATH[model_version]
-        batch_job_manager.create_job(job_id,
-                                     model_rel_path,
-                                     input_container_sas,
-                                     use_url)
-        num_tasks, task_ids_failed_to_submit = batch_job_manager.submit_tasks(job_id, num_images)
-        # now request_status moves from created to running
-        job_status = get_job_status('running',
-                                    (f'Submitted {num_images} images to cluster in {num_tasks} shards. '
-                                     f'Number of shards failed to be submitted: {len(task_ids_failed_to_submit)}'))
-        # an extra field to allow the monitoring thread to restart after an API restart: total number of tasks
-        job_status['num_tasks'] = num_tasks
-        # also record the number of images to process for reporting
-        job_status['num_images'] = num_images
-        job_status_table.update_job_status(job_id, job_status)
-    except Exception as e:
-        job_status = get_job_status('problem', f'Please contact us. Error occurred while submitting the Batch job: {e}')
-        job_status_table.update_job_status(job_id, job_status)
-        log.error(f'server_job, create_batch_job, Error occurred while submitting the Batch job: {e}')
-        return
-    # start the monitor thread with the same name
-    try:
-        thread = threading.Thread(
-            target=monitor_batch_job,
-            name=f'job_{job_id}',
-            kwargs={
-                'job_id': job_id,
-                'num_tasks': num_tasks,
-                'model_version': model_version,
-                'job_name': job_name,
-                'job_submission_timestamp': job_submission_timestamp
-            }
-        )
-        thread.start()
-    except Exception as e:
-        job_status = get_job_status('problem', f'Error occurred while starting the monitoring thread: {e}')
-        job_status_table.update_job_status(job_id, job_status)
-        log.error(f'server_job, create_batch_job, Error occurred while starting the monitoring thread: {e}')
-        return
-def monitor_batch_job(job_id: str,
-                      num_tasks: int,
-                      model_version: str,
-                      job_name: str,
-                      job_submission_timestamp: str):
-    job_status_table = JobStatusTable()
-    batch_job_manager = BatchJobManager()
-    try:
-        num_checks = 0
-        while True:
-            time.sleep(api_config.MONITOR_PERIOD_MINUTES * 60)
-            num_checks += 1
-            # both succeeded and failed tasks are marked "completed" on Batch
-            num_tasks_succeeded, num_tasks_failed = batch_job_manager.get_num_completed_tasks(job_id)
-            job_status = get_job_status('running',
-                                        (f'Check number {num_checks}, '
-                                         f'{num_tasks_succeeded} out of {num_tasks} shards have completed '
-                                         f'successfully, {num_tasks_failed} shards have failed.'))
-            job_status_table.update_job_status(job_id, job_status)
-            log.info(f'job_id {job_id}. '
-                f'Check number {num_checks}, {num_tasks_succeeded} out of {num_tasks} shards completed, '
-                f'{num_tasks_failed} shards failed.')
-            if (num_tasks_succeeded + num_tasks_failed) >= num_tasks:
-                break
-            if num_checks > api_config.MAX_MONITOR_CYCLES:
-                job_status = get_job_status('problem',
-                    (
-                        f'Job unfinished after {num_checks} x {api_config.MONITOR_PERIOD_MINUTES} minutes, '
-                        f'please contact us to retrieve the results. Number of succeeded shards: {num_tasks_succeeded}')
-                    )
-                job_status_table.update_job_status(job_id, job_status)
-                log.warning(f'server_job, create_batch_job, MAX_MONITOR_CYCLES reached, ending thread')
-                break  # still aggregate the Tasks' outputs
-    except Exception as e:
-        job_status = get_job_status('problem', f'Error occurred while monitoring the Batch job: {e}')
-        job_status_table.update_job_status(job_id, job_status)
-        log.error(f'server_job, create_batch_job, Error occurred while monitoring the Batch job: {e}')
-        return
-    try:
-        output_sas_url = aggregate_results(job_id, model_version, job_name, job_submission_timestamp)
-        # preserving format from before, but SAS URL to 'failed_images' and 'images' are no longer provided
-        # failures should be contained in the output entries, indicated by an 'error' field
-        msg = {
-            'num_failed_shards': num_tasks_failed,
-            'output_file_urls': {
-                'detections': output_sas_url
-            }
-        }
-        job_status = get_job_status('completed', msg)
-        job_status_table.update_job_status(job_id, job_status)
-    except Exception as e:
-        job_status = get_job_status('problem',
-                        f'Please contact us to retrieve the results. Error occurred while aggregating results: {e}')
-        job_status_table.update_job_status(job_id, job_status)
-        log.error(f'server_job, create_batch_job, Error occurred while aggregating results: {e}')
-        return
-def aggregate_results(job_id: str,
-                      model_version: str,
-                      job_name: str,
-                      job_submission_timestamp: str) -> str:
-    log.info(f'server_job, aggregate_results starting, job_id: {job_id}')
-    container_url = sas_blob_utils.build_azure_storage_uri(account=api_config.STORAGE_ACCOUNT_NAME,
-                                                           container=api_config.STORAGE_CONTAINER_API)
-    # when people download this, the timestamp will have : replaced by _
-    output_file_path = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/{job_id}_detections_{job_name}_{job_submission_timestamp}.json'
-    with ContainerClient.from_container_url(container_url,
-                                            credential=api_config.STORAGE_ACCOUNT_KEY) as container_client:
-        # check if the result blob has already been written (could be another instance of the API / worker thread)
-        # and if so, skip aggregating and uploading the results, and just generate the SAS URL, which
-        # could be needed still if the previous request_status was `problem`.
-        blob_client = container_client.get_blob_client(output_file_path)
-        if blob_client.exists():
-            log.warning(f'The output file already exists, likely because another monitoring thread already wrote it.')
-        else:
-            task_outputs_dir = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_outputs/'
-            generator = container_client.list_blobs(name_starts_with=task_outputs_dir)
-            blobs = [i for i in generator if i.name.endswith('.json')]
-            all_results = []
-            for blob_props in tqdm(blobs):
-                with container_client.get_blob_client(blob_props) as blob_client:
-                    stream = io.BytesIO()
-                    blob_client.download_blob().readinto(stream)
-                    stream.seek(0)
-                    task_results = json.load(stream)
-                    all_results.extend(task_results)
-            api_output = {
-                'info': {
-                    'detector': f'megadetector_v{model_version}',
-                    'detection_completion_time': get_utc_time(),
-                    'format_version': api_config.OUTPUT_FORMAT_VERSION
-                },
-                'detection_categories': api_config.DETECTOR_LABEL_MAP,
-                'images': all_results
-            }
-            # upload the output JSON to the Job folder
-            api_output_as_bytes = bytes(json.dumps(api_output, ensure_ascii=False, indent=1), encoding='utf-8')
-            _ = container_client.upload_blob(name=output_file_path, data=api_output_as_bytes)
-    output_sas = generate_blob_sas(
-        account_name=api_config.STORAGE_ACCOUNT_NAME,
-        container_name=api_config.STORAGE_CONTAINER_API,
-        blob_name=output_file_path,
-        account_key=api_config.STORAGE_ACCOUNT_KEY,
-        permission=BlobSasPermissions(read=True, write=False),
-        expiry=datetime.utcnow() + timedelta(days=api_config.OUTPUT_SAS_EXPIRATION_DAYS)
-    )
-    output_sas_url = sas_blob_utils.build_azure_storage_uri(
-        account=api_config.STORAGE_ACCOUNT_NAME,
-        container=api_config.STORAGE_CONTAINER_API,
-        blob=output_file_path,
-        sas_token=output_sas
-    )
-    log.info(f'server_job, aggregate_results done, job_id: {job_id}')
-    log.info(f'output_sas_url: {output_sas_url}')
-    return output_sas_url

api/batch_processing/api_core/server_utils.py DELETED Viewed

@@ -1,92 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-"""
-Helper functions for the batch processing API.
-"""
-import logging
-import os
-from datetime import datetime
-from typing import Tuple, Any, Sequence, Optional
-import sas_blob_utils
-log = logging.getLogger(os.environ['FLASK_APP'])
-#%% helper classes and functions
-def make_error(error_code: int, error_message: str) -> Tuple[dict, int]:
-    # TODO log exception when we have more telemetry
-    log.error(f'Error {error_code} - {error_message}')
-    return {'error': error_message}, error_code
-def check_data_container_sas(input_container_sas: str) -> Optional[Tuple[int, str]]:
-    """
-    Returns a tuple (error_code, msg) if not a usable SAS URL, else returns None
-    """
-    # TODO check that the expiry date of input_container_sas is at least a month
-    # into the future
-    permissions = sas_blob_utils.get_permissions_from_uri(input_container_sas)
-    data = sas_blob_utils.get_all_query_parts(input_container_sas)
-    msg = ('input_container_sas provided does not have both read and list '
-           'permissions.')
-    if 'read' not in permissions or 'list' not in permissions:
-        if 'si' in data:
-            # if no permission specified explicitly but has an access policy, assumes okay
-            # TODO - check based on access policy as well
-            return None
-        return 400, msg
-    return None
-def get_utc_time() -> str:
-    # return current UTC time as a string in the ISO 8601 format (so we can query by
-    # timestamp in the Cosmos DB job status table.
-    # example: '2021-02-08T20:02:05.699689Z'
-    return datetime.utcnow().isoformat(timespec='microseconds') + 'Z'
-def get_job_status(request_status: str, message: Any) -> dict:
-    return {
-        'request_status': request_status,
-        'message': message
-    }
-def validate_provided_image_paths(image_paths: Sequence[Any]) -> Tuple[Optional[str], bool]:
-    """Given a list of image_paths (list length at least 1), validate them and
-    determine if metadata is available.
-    Args:
-        image_paths: a list of string (image_id) or a list of 2-item lists
-            ([image_id, image_metadata])
-    Returns:
-        error: None if checks passed, otherwise a string error message
-        metadata_available: bool, True if available
-    """
-    # image_paths will have length at least 1, otherwise would have ended before this step
-    first_item = image_paths[0]
-    metadata_available = False
-    if isinstance(first_item, str):
-        for i in image_paths:
-            if not isinstance(i, str):
-                error = 'Not all items in image_paths are of type string.'
-                return error, metadata_available
-        return None, metadata_available
-    elif isinstance(first_item, list):
-        metadata_available = True
-        for i in image_paths:
-            if len(i) != 2:  # i should be [image_id, metadata_string]
-                error = ('Items in image_paths are lists, but not all lists '
-                         'are of length 2 [image locator, metadata].')
-                return error, metadata_available
-        return None, metadata_available
-    else:
-        error = 'image_paths contain items that are not strings nor lists.'
-        return error, metadata_available

api/batch_processing/api_core_support/__init__.py DELETED Viewed

File without changes

api/batch_processing/api_core_support/aggregate_results_manually.py DELETED Viewed

@@ -1,46 +0,0 @@
-#
-# If a request has been sent to AML for batch scoring but the monitoring thread of the API was
-# interrupted (uncaught exception or having to re-start the API container), we could manually
-# aggregate results from each shard using this script, assuming all jobs submitted to AML have finished.
-#
-# Need to have set environment variables STORAGE_ACCOUNT_NAME and STORAGE_ACCOUNT_KEY to those of the
-# storage account backing the API. Also need to adjust the INTERNAL_CONTAINER, AML_CONTAINER and
-# AML_CONFIG fields in api_core/orchestrator_api/api_config.py to match the instance of the API that this
-# request was submitted to.
-#
-# May need to change the import statement in api_core/orchestrator_api/orchestrator.py
-# "from sas_blob_utils import SasBlob" to
-# "from .sas_blob_utils import SasBlob" to not confuse with the module in AI4Eutils;
-# and change "import api_config" to
-# "from api.batch_processing.api_core.orchestrator_api import api_config"
-# Execute this script from the root of the repository. You may need to add the repository to PYTHONPATH.
-import argparse
-import json
-from api.batch_processing.api_core.orchestrator_api.orchestrator import AMLMonitor
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('shortened_request_id', type=str,
-                        help='the request ID to restart monitoring')
-    parser.add_argument('model_version', type=str, help='version of megadetector used; this is used to fill in the meta info section of the output file')
-    parser.add_argument('request_name', type=str, help='easy to remember name for that job, optional', default='')
-    args = parser.parse_args()
-    # list_jobs_submitted cannot be serialized ("can't pickle _thread.RLock objects "), but
-    # do not need it for aggregating results
-    aml_monitor = AMLMonitor(request_id=args.request_id,
-                             list_jobs_submitted=None,
-                             request_name=args.request_name,
-                             request_submission_timestamp='',
-                             model_version=args.model_version)
-    output_file_urls = aml_monitor.aggregate_results()
-    output_file_urls_str = json.dumps(output_file_urls)
-    print(output_file_urls_str)
-if __name__ == '__main__':
-    main()

api/batch_processing/api_support/__init__.py DELETED Viewed

File without changes

megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.10py3-none-any.whl → 5.0.11py3-none-any.whl