PyPI - megadetector - Versions diffs - 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl - Mend

megadetector 5.0.10py3-none-any.whl → 5.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show

{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
megadetector-5.0.11.dist-info/RECORD +5 -0
megadetector-5.0.11.dist-info/top_level.txt +1 -0
api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -439
api/batch_processing/api_core/server.py +0 -294
api/batch_processing/api_core/server_api_config.py +0 -98
api/batch_processing/api_core/server_app_config.py +0 -55
api/batch_processing/api_core/server_batch_job_manager.py +0 -220
api/batch_processing/api_core/server_job_status_table.py +0 -152
api/batch_processing/api_core/server_orchestration.py +0 -360
api/batch_processing/api_core/server_utils.py +0 -92
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -152
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
api/batch_processing/data_preparation/manage_video_batch.py +0 -327
api/batch_processing/integration/digiKam/setup.py +0 -6
api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +0 -64
api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
api/batch_processing/postprocessing/compare_batch_results.py +0 -958
api/batch_processing/postprocessing/convert_output_format.py +0 -397
api/batch_processing/postprocessing/load_api_results.py +0 -195
api/batch_processing/postprocessing/md_to_coco.py +0 -310
api/batch_processing/postprocessing/md_to_labelme.py +0 -330
api/batch_processing/postprocessing/merge_detections.py +0 -401
api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
api/synchronous/api_core/animal_detection_api/config.py +0 -35
api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +0 -110
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +0 -108
classification/analyze_failed_images.py +0 -227
classification/cache_batchapi_outputs.py +0 -198
classification/create_classification_dataset.py +0 -627
classification/crop_detections.py +0 -516
classification/csv_to_json.py +0 -226
classification/detect_and_crop.py +0 -855
classification/efficientnet/__init__.py +0 -9
classification/efficientnet/model.py +0 -415
classification/efficientnet/utils.py +0 -610
classification/evaluate_model.py +0 -520
classification/identify_mislabeled_candidates.py +0 -152
classification/json_to_azcopy_list.py +0 -63
classification/json_validator.py +0 -695
classification/map_classification_categories.py +0 -276
classification/merge_classification_detection_output.py +0 -506
classification/prepare_classification_script.py +0 -194
classification/prepare_classification_script_mc.py +0 -228
classification/run_classifier.py +0 -286
classification/save_mislabeled.py +0 -110
classification/train_classifier.py +0 -825
classification/train_classifier_tf.py +0 -724
classification/train_utils.py +0 -322
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +0 -34
data_management/camtrap_dp_to_coco.py +0 -238
data_management/cct_json_utils.py +0 -395
data_management/cct_to_md.py +0 -176
data_management/cct_to_wi.py +0 -289
data_management/coco_to_labelme.py +0 -272
data_management/coco_to_yolo.py +0 -662
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +0 -33
data_management/databases/combine_coco_camera_traps_files.py +0 -206
data_management/databases/integrity_check_json_db.py +0 -477
data_management/databases/subset_json_db.py +0 -115
data_management/generate_crops_from_cct.py +0 -149
data_management/get_image_sizes.py +0 -188
data_management/importers/add_nacti_sizes.py +0 -52
data_management/importers/add_timestamps_to_icct.py +0 -79
data_management/importers/animl_results_to_md_results.py +0 -158
data_management/importers/auckland_doc_test_to_json.py +0 -372
data_management/importers/auckland_doc_to_json.py +0 -200
data_management/importers/awc_to_json.py +0 -189
data_management/importers/bellevue_to_json.py +0 -273
data_management/importers/cacophony-thermal-importer.py +0 -796
data_management/importers/carrizo_shrubfree_2018.py +0 -268
data_management/importers/carrizo_trail_cam_2017.py +0 -287
data_management/importers/cct_field_adjustments.py +0 -57
data_management/importers/channel_islands_to_cct.py +0 -913
data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
data_management/importers/eMammal/eMammal_helpers.py +0 -249
data_management/importers/eMammal/make_eMammal_json.py +0 -223
data_management/importers/ena24_to_json.py +0 -275
data_management/importers/filenames_to_json.py +0 -385
data_management/importers/helena_to_cct.py +0 -282
data_management/importers/idaho-camera-traps.py +0 -1407
data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
data_management/importers/jb_csv_to_json.py +0 -150
data_management/importers/mcgill_to_json.py +0 -250
data_management/importers/missouri_to_json.py +0 -489
data_management/importers/nacti_fieldname_adjustments.py +0 -79
data_management/importers/noaa_seals_2019.py +0 -181
data_management/importers/pc_to_json.py +0 -365
data_management/importers/plot_wni_giraffes.py +0 -123
data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
data_management/importers/prepare_zsl_imerit.py +0 -131
data_management/importers/rspb_to_json.py +0 -356
data_management/importers/save_the_elephants_survey_A.py +0 -320
data_management/importers/save_the_elephants_survey_B.py +0 -332
data_management/importers/snapshot_safari_importer.py +0 -758
data_management/importers/snapshot_safari_importer_reprise.py +0 -665
data_management/importers/snapshot_serengeti_lila.py +0 -1067
data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
data_management/importers/sulross_get_exif.py +0 -65
data_management/importers/timelapse_csv_set_to_json.py +0 -490
data_management/importers/ubc_to_json.py +0 -399
data_management/importers/umn_to_json.py +0 -507
data_management/importers/wellington_to_json.py +0 -263
data_management/importers/wi_to_json.py +0 -441
data_management/importers/zamba_results_to_md_results.py +0 -181
data_management/labelme_to_coco.py +0 -548
data_management/labelme_to_yolo.py +0 -272
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +0 -97
data_management/lila/add_locations_to_nacti.py +0 -147
data_management/lila/create_lila_blank_set.py +0 -557
data_management/lila/create_lila_test_set.py +0 -151
data_management/lila/create_links_to_md_results_files.py +0 -106
data_management/lila/download_lila_subset.py +0 -177
data_management/lila/generate_lila_per_image_labels.py +0 -515
data_management/lila/get_lila_annotation_counts.py +0 -170
data_management/lila/get_lila_image_counts.py +0 -111
data_management/lila/lila_common.py +0 -300
data_management/lila/test_lila_metadata_urls.py +0 -132
data_management/ocr_tools.py +0 -874
data_management/read_exif.py +0 -681
data_management/remap_coco_categories.py +0 -84
data_management/remove_exif.py +0 -66
data_management/resize_coco_dataset.py +0 -189
data_management/wi_download_csv_to_coco.py +0 -246
data_management/yolo_output_to_md_output.py +0 -441
data_management/yolo_to_coco.py +0 -676
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/detector_training/model_main_tf2.py +0 -114
detection/process_video.py +0 -703
detection/pytorch_detector.py +0 -337
detection/run_detector.py +0 -779
detection/run_detector_batch.py +0 -1219
detection/run_inference_with_yolov5_val.py +0 -917
detection/run_tiled_inference.py +0 -935
detection/tf_detector.py +0 -188
detection/video_utils.py +0 -606
docs/source/conf.py +0 -43
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +0 -174
md_utils/ct_utils.py +0 -612
md_utils/directory_listing.py +0 -246
md_utils/md_tests.py +0 -968
md_utils/path_utils.py +0 -1044
md_utils/process_utils.py +0 -157
md_utils/sas_blob_utils.py +0 -509
md_utils/split_locations_into_train_val.py +0 -228
md_utils/string_utils.py +0 -92
md_utils/url_utils.py +0 -323
md_utils/write_html_image_list.py +0 -225
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +0 -293
md_visualization/render_images_with_thumbnails.py +0 -275
md_visualization/visualization_utils.py +0 -1537
md_visualization/visualize_db.py +0 -551
md_visualization/visualize_detector_output.py +0 -406
megadetector-5.0.10.dist-info/RECORD +0 -224
megadetector-5.0.10.dist-info/top_level.txt +0 -8
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
taxonomy_mapping/map_new_lila_datasets.py +0 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
taxonomy_mapping/preview_lila_taxonomy.py +0 -591
taxonomy_mapping/retrieve_sample_image.py +0 -71
taxonomy_mapping/simple_image_download.py +0 -218
taxonomy_mapping/species_lookup.py +0 -834
taxonomy_mapping/taxonomy_csv_checker.py +0 -159
taxonomy_mapping/taxonomy_graph.py +0 -346
taxonomy_mapping/validate_lila_category_mappings.py +0 -83
{megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0

data_management/databases/combine_coco_camera_traps_files.py DELETED Viewed

@@ -1,206 +0,0 @@
-"""
-combine_coco_camera_traps_files.py
-Merges two or more .json files in COCO Camera Traps format, optionally
-writing the results to another .json file.
-- Concatenates image lists, erroring if images are not unique.
-- Errors on unrecognized fields.
-- Checks compatibility in info structs, within reason.
-*Example command-line invocation*
-combine_coco_camera_traps_files input1.json input2.json ... inputN.json output.json
-"""
-#%% Constants and imports
-import argparse
-import json
-import sys
-#%% Merge functions
-def combine_cct_files(input_files, output_file=None, require_uniqueness=True,
-                      filename_prefixes=None):
-    """
-    Merges the list of COCO Camera Traps files [input_files] into a single
-    dictionary, optionally writing the result to [output_file].
-    Args:
-        input_files (list): paths to CCT .json files
-        output_file (str, optional): path to write merged .json file
-        require_uniqueness (bool): whether to require that the images in
-            each input_dict be unique
-    Returns:
-        dict: the merged COCO-formatted .json dict
-    """
-    input_dicts = []
-    print('Loading input files')
-    for fn in input_files:
-        with open(fn, 'r', encoding='utf-8') as f:
-            d = json.load(f)
-            if filename_prefixes is not None:
-                assert fn in filename_prefixes
-                d['filename_prefix'] = filename_prefixes[fn]
-            input_dicts.append(d)
-    print('Merging results')
-    merged_dict = combine_cct_dictionaries(
-        input_dicts, require_uniqueness=require_uniqueness)
-    print('Writing output')
-    if output_file is not None:
-        with open(output_file, 'w') as f:
-            json.dump(merged_dict, f, indent=1)
-    return merged_dict
-def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
-    """
-    Merges the list of COCO Camera Traps dictionaries [input_dicts].  See module header
-    comment for details on merge rules.
-    Args:
-        input_dicts: list of CCT dicts
-        require_uniqueness: bool, whether to require that the images in
-            each input_dict be unique
-    Returns:
-        dict: the merged COCO-formatted .json dict
-    """
-    filename_to_image = {}
-    all_annotations = []
-    info = None
-    category_name_to_id = {}
-    category_name_to_id['empty'] = 0
-    next_category_id = 1
-    known_fields = ['info', 'categories', 'annotations','images','filename_prefix']
-    # i_input_dict = 0; input_dict = input_dicts[i_input_dict]
-    for i_input_dict,input_dict in enumerate(input_dicts):
-        filename_prefix = ''
-        if ('filename_prefix' in input_dict.keys()):
-            filename_prefix = input_dict['filename_prefix']
-        for k in input_dict.keys():
-            if k not in known_fields:
-                raise ValueError(f'Unrecognized CCT field: {k}')
-        # We will prepend an index to every ID to guarantee uniqueness
-        index_string = 'ds' + str(i_input_dict).zfill(3) + '_'
-        old_cat_id_to_new_cat_id = {}
-        # Map detection categories from the original data set into the merged data set
-        for original_category in input_dict['categories']:
-            original_cat_id = original_category['id']
-            cat_name = original_category['name']
-            if cat_name in category_name_to_id:
-                new_cat_id = category_name_to_id[cat_name]
-            else:
-                new_cat_id = next_category_id
-                next_category_id += 1
-                category_name_to_id[cat_name] = new_cat_id
-            if original_cat_id in old_cat_id_to_new_cat_id:
-                assert old_cat_id_to_new_cat_id[original_cat_id] == new_cat_id
-            else:
-                old_cat_id_to_new_cat_id[original_cat_id] = new_cat_id
-        # ...for each category
-        # Merge original image list into the merged data set
-        for im in input_dict['images']:
-            if 'seq_id' in im:
-                im['seq_id'] = index_string + im['seq_id']
-            if 'location' in im:
-                im['location'] = index_string + im['location']
-            im_file = filename_prefix + im['file_name']
-            im['file_name'] = im_file
-            if require_uniqueness:
-                assert im_file not in filename_to_image, f'Duplicate image: {im_file}'
-            else:
-                if im_file in filename_to_image:
-                    print('Redundant image {}'.format(im_file))
-            # Create a unique ID
-            im['id'] = index_string + im['id']
-            filename_to_image[im_file] = im
-        # ...for each image
-        # Same for annotations
-        for ann in input_dict['annotations']:
-            ann['image_id'] = index_string + ann['image_id']
-            ann['id'] = index_string + ann['id']
-            assert ann['category_id'] in old_cat_id_to_new_cat_id
-            ann['category_id'] = old_cat_id_to_new_cat_id[ann['category_id']]
-        # ...for each annotation
-        all_annotations.extend(input_dict['annotations'])
-        # Merge info dicts, don't check completion time fields
-        if info is None:
-            import copy
-            info = copy.deepcopy(input_dict['info'])
-            info['original_info'] = [input_dict['info']]
-        else:
-            info['original_info'].append(input_dict['info'])
-    # ...for each dictionary
-    # Convert merged image dictionaries to a sorted list
-    sorted_images = sorted(filename_to_image.values(), key=lambda im: im['file_name'])
-    all_categories = [{'id':category_name_to_id[cat_name],'name':cat_name} for\
-                      cat_name in category_name_to_id.keys()]
-    merged_dict = {'info': info,
-                   'categories': all_categories,
-                   'images': sorted_images,
-                   'annotations': all_annotations}
-    return merged_dict
-# ...combine_cct_dictionaries(...)
-#%% Command-line driver
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        'input_paths', nargs='+',
-        help='List of input .json files')
-    parser.add_argument(
-        'output_path',
-        help='Output .json file')
-    if len(sys.argv[1:]) == 0:
-        parser.print_help()
-        parser.exit()
-    args = parser.parse_args()
-    combine_cct_files(args.input_paths, args.output_path)
-if __name__ == '__main__':
-    main()

data_management/databases/integrity_check_json_db.py DELETED Viewed

@@ -1,477 +0,0 @@
-"""
-integrity_check_json_db.py
-Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file, specifically:
-* Verifies that required fields are present and have the right types
-* Verifies that annotations refer to valid images
-* Verifies that annotations refer to valid categories
-* Verifies that image, category, and annotation IDs are unique
-* Optionally checks file existence
-* Finds un-annotated images
-* Finds unused categories
-* Prints a list of categories sorted by count
-"""
-#%% Constants and environment
-import argparse
-import json
-import os
-import sys
-from multiprocessing.pool import ThreadPool
-from operator import itemgetter
-from tqdm import tqdm
-from md_visualization.visualization_utils import open_image
-from md_utils import ct_utils
-#%% Classes and environment
-class IntegrityCheckOptions:
-    """
-    Options for integrity_check_json_db()
-    """
-    #: Image path; the filenames in the .json file should be relative to this folder
-    baseDir = ''
-    #: Should we validate the image sizes?
-    bCheckImageSizes = False
-    #: Should we check that all the images in the .json file exist on disk?
-    bCheckImageExistence = False
-    #: Should we search [baseDir] for images that are not used in the .json file?
-    bFindUnusedImages = False
-    #: Should we require that all images in the .json file have a 'location' field?
-    bRequireLocation = True
-    #: For debugging, limit the number of images we'll process
-    iMaxNumImages = -1
-    #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
-    nThreads = 10
-    #: Enable additional debug output
-    verbose = True
-# This is used in a medium-hacky way to share modified options across threads
-defaultOptions = IntegrityCheckOptions()
-#%% Functions
-def _check_image_existence_and_size(image,options=None):
-    """
-    Validate the image represented in the CCT image dict [image], which should have fields:
-    * file_name
-    * width
-    * height
-    Args:
-        image (dict): image to validate
-        options (IntegrityCheckOptions): parameters impacting validation
-    Returns:
-        bool: whether this image passes validation
-    """
-    if options is None:
-        options = defaultOptions
-    assert options.bCheckImageExistence
-    filePath = os.path.join(options.baseDir,image['file_name'])
-    if not os.path.isfile(filePath):
-        # print('Image path {} does not exist'.format(filePath))
-        return False
-    if options.bCheckImageSizes:
-        if not ('height' in image and 'width' in image):
-            print('Missing image size in {}'.format(filePath))
-            return False
-        # width, height = Image.open(filePath).size
-        pil_im = open_image(filePath)
-        width,height = pil_im.size
-        if (not (width == image['width'] and height == image['height'])):
-            print('Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
-                    image['id'], filePath, image['width'], image['height'], width, height))
-            return False
-    return True
-def integrity_check_json_db(jsonFile, options=None):
-    """
-    Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file; see
-    module header comment for a list of the validation steps.
-    Args:
-        jsonFile (str): filename to validate, or an already-loaded dict
-    Returns:
-        tuple: tuple containing:
-            - sortedCategories (dict): list of categories used in [jsonFile], sorted by frequency
-            - data (dict): the data loaded from [jsonFile]
-            - errorInfo (dict): specific validation errors
-    """
-    if options is None:
-        options = IntegrityCheckOptions()
-    if options.bCheckImageSizes:
-        options.bCheckImageExistence = True
-    if options.verbose:
-        print(options.__dict__)
-    if options.baseDir is None:
-        options.baseDir = ''
-    baseDir = options.baseDir
-    ##%% Read .json file if necessary, integrity-check fields
-    if isinstance(jsonFile,dict):
-        data = jsonFile
-    elif isinstance(jsonFile,str):
-        assert os.path.isfile(jsonFile), '.json file {} does not exist'.format(jsonFile)
-        if options.verbose:
-            print('Reading .json {} with base dir [{}]...'.format(
-                    jsonFile,baseDir))
-        with open(jsonFile,'r') as f:
-            data = json.load(f)
-    else:
-        raise ValueError('Illegal value for jsonFile')
-    images = data['images']
-    annotations = data['annotations']
-    categories = data['categories']
-    # info = data['info']
-    assert 'info' in data, 'No info struct in database'
-    if len(baseDir) > 0:
-        assert os.path.isdir(baseDir), 'Base directory {} does not exist'.format(baseDir)
-    ##%% Build dictionaries, checking ID uniqueness and internal validity as we go
-    imageIdToImage = {}
-    annIdToAnn = {}
-    catIdToCat = {}
-    catNameToCat = {}
-    imageLocationSet = set()
-    if options.verbose:
-        print('Checking categories...')
-    for cat in tqdm(categories):
-        # Confirm that required fields are present
-        assert 'name' in cat
-        assert 'id' in cat
-        assert isinstance(cat['id'],int), 'Illegal category ID type: [{}]'.format(str(cat['id']))
-        assert isinstance(cat['name'],str), 'Illegal category name type [{}]'.format(str(cat['name']))
-        catId = cat['id']
-        catName = cat['name']
-        # Confirm ID uniqueness
-        assert catId not in catIdToCat, 'Category ID {} is used more than once'.format(catId)
-        catIdToCat[catId] = cat
-        cat['_count'] = 0
-        assert catName not in catNameToCat, 'Category name {} is used more than once'.format(catName)
-        catNameToCat[catName] = cat
-    # ...for each category
-    if options.verbose:
-        print('\nChecking images...')
-    if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
-        if options.verbose:
-            print('Trimming image list to {}'.format(options.iMaxNumImages))
-        images = images[0:options.iMaxNumImages]
-    imagePathsInJson = set()
-    sequences = set()
-    # image = images[0]
-    for image in tqdm(images):
-        image['_count'] = 0
-        # Confirm that required fields are present
-        assert 'file_name' in image
-        assert 'id' in image
-        image['file_name'] = os.path.normpath(image['file_name'])
-        imagePathsInJson.add(image['file_name'])
-        assert isinstance(image['file_name'],str), 'Illegal image filename type'
-        assert isinstance(image['id'],str), 'Illegal image ID type'
-        imageId = image['id']
-        # Confirm ID uniqueness
-        assert imageId not in imageIdToImage, 'Duplicate image ID {}'.format(imageId)
-        imageIdToImage[imageId] = image
-        if 'height' in image:
-            assert 'width' in image, 'Image with height but no width: {}'.format(image['id'])
-        if 'width' in image:
-            assert 'height' in image, 'Image with width but no height: {}'.format(image['id'])
-        if options.bRequireLocation:
-            assert 'location' in image, 'No location available for: {}'.format(image['id'])
-        if 'location' in image:
-            # We previously supported ints here; this should be strings now
-            # assert isinstance(image['location'], str) or isinstance(image['location'], int), \
-            #  'Illegal image location type'
-            assert isinstance(image['location'], str)
-            imageLocationSet.add(image['location'])
-        if 'seq_id' in image:
-            sequences.add(image['seq_id'])
-        assert not ('sequence_id' in image or 'sequence' in image), 'Illegal sequence identifier'
-    unusedFiles = []
-    # Are we checking for unused images?
-    if (len(baseDir) > 0) and options.bFindUnusedImages:
-        if options.verbose:
-            print('\nEnumerating images...')
-        # Recursively enumerate images
-        imagePaths = []
-        for root, dirs, files in os.walk(baseDir):
-            for file in files:
-                if file.lower().endswith(('.jpeg', '.jpg', '.png')):
-                    relDir = os.path.relpath(root, baseDir)
-                    relFile = os.path.join(relDir,file)
-                    relFile = os.path.normpath(relFile)
-                    if len(relFile) > 2 and \
-                        (relFile[0:2] == './' or relFile[0:2] == '.\\'):
-                            relFile = relFile[2:]
-                    imagePaths.append(relFile)
-        for p in imagePaths:
-            if p not in imagePathsInJson:
-                # print('Image {} is unused'.format(p))
-                unusedFiles.append(p)
-    validationErrors = []
-    # Are we checking file existence and/or image size?
-    if options.bCheckImageSizes or options.bCheckImageExistence:
-        if len(baseDir) == 0:
-            print('Warning: checking image sizes without a base directory, assuming "."')
-        if options.verbose:
-            print('Checking image existence and/or image sizes...')
-        if options.nThreads is not None and options.nThreads > 1:
-            pool = ThreadPool(options.nThreads)
-            # results = pool.imap_unordered(lambda x: fetch_url(x,nImages), indexedUrlList)
-            defaultOptions.baseDir = options.baseDir
-            defaultOptions.bCheckImageSizes = options.bCheckImageSizes
-            defaultOptions.bCheckImageExistence = options.bCheckImageExistence
-            results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
-        else:
-            results = []
-            for im in tqdm(images):
-                results.append(_check_image_existence_and_size(im,options))
-        for iImage,r in enumerate(results):
-            if not r:
-                validationErrors.append(os.path.join(options.baseDir,images[iImage]['file_name']))
-    # ...for each image
-    if options.verbose:
-        print('{} validation errors (of {})'.format(len(validationErrors),len(images)))
-        print('Checking annotations...')
-    nBoxes = 0
-    for ann in tqdm(annotations):
-        # Confirm that required fields are present
-        assert 'image_id' in ann
-        assert 'id' in ann
-        assert 'category_id' in ann
-        assert isinstance(ann['id'],str), 'Illegal annotation ID type'
-        assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
-        assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
-        if 'bbox' in ann:
-            nBoxes += 1
-        annId = ann['id']
-        # Confirm ID uniqueness
-        assert annId not in annIdToAnn
-        annIdToAnn[annId] = ann
-        # Confirm validity
-        assert ann['category_id'] in catIdToCat, \
-            'Category {} not found in category list'.format(ann['category_id'])
-        assert ann['image_id'] in imageIdToImage, \
-          'Image ID {} referred to by annotation {}, not available'.format(
-            ann['image_id'],ann['id'])
-        imageIdToImage[ann['image_id']]['_count'] += 1
-        catIdToCat[ann['category_id']]['_count'] +=1
-    # ...for each annotation
-    sortedCategories = sorted(categories, key=itemgetter('_count'), reverse=True)
-    ##%% Print statistics
-    if options.verbose:
-        # Find un-annotated images and multi-annotation images
-        nUnannotated = 0
-        nMultiAnnotated = 0
-        for image in images:
-            if image['_count'] == 0:
-                nUnannotated += 1
-            elif image['_count'] > 1:
-                nMultiAnnotated += 1
-        print('Found {} unannotated images, {} images with multiple annotations'.format(
-                nUnannotated,nMultiAnnotated))
-        if (len(baseDir) > 0) and options.bFindUnusedImages:
-            print('Found {} unused image files'.format(len(unusedFiles)))
-        nUnusedCategories = 0
-        # Find unused categories
-        for cat in categories:
-            if cat['_count'] == 0:
-                print('Unused category: {}'.format(cat['name']))
-                nUnusedCategories += 1
-        print('Found {} unused categories'.format(nUnusedCategories))
-        sequenceString = 'no sequence info'
-        if len(sequences) > 0:
-            sequenceString = '{} sequences'.format(len(sequences))
-        print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
-                len(images),len(annotations),nBoxes,len(categories),sequenceString))
-        if len(imageLocationSet) > 0:
-            print('DB contains images from {} locations\n'.format(len(imageLocationSet)))
-        print('Categories and annotation (not image) counts:\n')
-        for cat in sortedCategories:
-            print('{:6} {}'.format(cat['_count'],cat['name']))
-        print('')
-    errorInfo = {}
-    errorInfo['unusedFiles'] = unusedFiles
-    errorInfo['validationErrors'] = validationErrors
-    return sortedCategories, data, errorInfo
-# ...def integrity_check_json_db()
-#%% Command-line driver
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('jsonFile')
-    parser.add_argument('--bCheckImageSizes', action='store_true',
-                        help='Validate image size, requires baseDir to be specified. ' + \
-                             'Implies existence checking.')
-    parser.add_argument('--bCheckImageExistence', action='store_true',
-                        help='Validate image existence, requires baseDir to be specified')
-    parser.add_argument('--bFindUnusedImages', action='store_true',
-                        help='Check for images in baseDir that aren\'t in the database, ' + \
-                             'requires baseDir to be specified')
-    parser.add_argument('--baseDir', action='store', type=str, default='',
-                        help='Base directory for images')
-    parser.add_argument('--bAllowNoLocation', action='store_true',
-                        help='Disable errors when no location is specified for an image')
-    parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
-                        help='Cap on total number of images to check')
-    parser.add_argument('--nThreads', action='store', type=int, default=10,
-                        help='Number of threads (only relevant when verifying image ' + \
-                             'sizes and/or existence)')
-    if len(sys.argv[1:])==0:
-        parser.print_help()
-        parser.exit()
-    args = parser.parse_args()
-    args.bRequireLocation = (not args.bAllowNoLocation)
-    options = IntegrityCheckOptions()
-    ct_utils.args_to_object(args, options)
-    integrity_check_json_db(args.jsonFile,options)
-if __name__ == '__main__':
-    main()
-#%% Interactive driver(s)
-if False:
-    #%%
-    """
-    python integrity_check_json_db.py ~/data/ena24.json --baseDir ~/data/ENA24 --bAllowNoLocation
-    """
-    # Integrity-check .json files for LILA
-    json_files = [os.path.expanduser('~/data/ena24.json')]
-    options = IntegrityCheckOptions()
-    options.baseDir = os.path.expanduser('~/data/ENA24')
-    options.bCheckImageSizes = False
-    options.bFindUnusedImages = True
-    options.bRequireLocation = False
-    # options.iMaxNumImages = 10
-    for json_file in json_files:
-        sortedCategories,data,_ = integrity_check_json_db(json_file, options)

megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.10py3-none-any.whl → 5.0.11py3-none-any.whl