PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/data_management/databases/integrity_check_json_db.py CHANGED Viewed

@@ -7,7 +7,7 @@ Does some integrity-checking and computes basic statistics on a COCO Camera Trap
 * Verifies that required fields are present and have the right types
 * Verifies that annotations refer to valid images
 * Verifies that annotations refer to valid categories
-* Verifies that image, category, and annotation IDs are unique
+* Verifies that image, category, and annotation IDs are unique
 * Optionally checks file existence
 * Finds un-annotated images
 * Finds unused categories
@@ -22,7 +22,8 @@ import json
 import os
 import sys
-from multiprocessing.pool import ThreadPool
+from functools import partial
+from multiprocessing.pool import Pool, ThreadPool
 from operator import itemgetter
 from tqdm import tqdm
@@ -37,38 +38,41 @@ class IntegrityCheckOptions:
     """
     Options for integrity_check_json_db()
     """
     def __init__(self):
         #: Image path; the filenames in the .json file should be relative to this folder
         self.baseDir = ''
         #: Should we validate the image sizes?
         self.bCheckImageSizes = False
         #: Should we check that all the images in the .json file exist on disk?
         self.bCheckImageExistence = False
         #: Should we search [baseDir] for images that are not used in the .json file?
         self.bFindUnusedImages = False
         #: Should we require that all images in the .json file have a 'location' field?
         self.bRequireLocation = True
         #: For debugging, limit the number of images we'll process
         self.iMaxNumImages = -1
         #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
         self.nThreads = 10
+        #: Whether to use threads (rather than processes for parallelization)
+        self.parallelizeWithThreads = True
         #: Enable additional debug output
         self.verbose = True
         #: Allow integer-valued image and annotation IDs (COCO uses this, CCT files use strings)
         self.allowIntIDs = False
-# This is used in a medium-hacky way to share modified options across threads
-default_options = IntegrityCheckOptions()
+        #: If True, error if the 'info' field is not present
+        self.requireInfo = False
 #%% Functions
@@ -76,7 +80,7 @@ default_options = IntegrityCheckOptions()
 def _check_image_existence_and_size(image,options=None):
     """
     Validate the image represented in the CCT image dict [image], which should have fields:
     * file_name
     * width
     * height
@@ -84,266 +88,290 @@ def _check_image_existence_and_size(image,options=None):
     Args:
         image (dict): image to validate
         options (IntegrityCheckOptions): parameters impacting validation
     Returns:
         str: None if this image passes validation, otherwise an error string
     """
-    if options is None:
-        options = default_options
+    if options is None:
+        options = IntegrityCheckOptions()
     assert options.bCheckImageExistence
-    filePath = os.path.join(options.baseDir,image['file_name'])
-    if not os.path.isfile(filePath):
-        s = 'Image path {} does not exist'.format(filePath)
+    file_path = os.path.join(options.baseDir,image['file_name'])
+    if not os.path.isfile(file_path):
+        s = 'Image path {} does not exist'.format(file_path)
         return s
     if options.bCheckImageSizes:
         if not ('height' in image and 'width' in image):
-            s = 'Missing image size in {}'.format(filePath)
+            s = 'Missing image size in {}'.format(file_path)
+            return s
+        # width, height = Image.open(file_path).size
+        try:
+            pil_im = open_image(file_path)
+        except Exception as e:
+            s = 'Error opening {}: {}'.format(file_path,str(e))
             return s
-        # width, height = Image.open(filePath).size
-        pil_im = open_image(filePath)
         width,height = pil_im.size
         if (not (width == image['width'] and height == image['height'])):
             s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
-                    image['id'], filePath, image['width'], image['height'], width, height)
+                    image['id'], file_path, image['width'], image['height'], width, height)
             return s
     return None
-def integrity_check_json_db(jsonFile, options=None):
+def integrity_check_json_db(json_file, options=None):
     """
     Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file; see
     module header comment for a list of the validation steps.
     Args:
-        jsonFile (str): filename to validate, or an already-loaded dict
+        json_file (str): filename to validate, or an already-loaded dict
+        options (IntegrityCheckOptions, optional): see IntegrityCheckOptions
     Returns:
         tuple: tuple containing:
-            - sorted_categories (dict): list of categories used in [jsonFile], sorted by frequency
-            - data (dict): the data loaded from [jsonFile]
+            - sorted_categories (dict): list of categories used in [json_file], sorted by frequency
+            - data (dict): the data loaded from [json_file]
             - error_info (dict): specific validation errors
     """
-    if options is None:
+    if options is None:
         options = IntegrityCheckOptions()
-    if options.bCheckImageSizes:
+    if options.bCheckImageSizes:
         options.bCheckImageExistence = True
     if options.verbose:
         print(options.__dict__)
     if options.baseDir is None:
         options.baseDir = ''
     base_dir = options.baseDir
     ##%% Read .json file if necessary, integrity-check fields
-    if isinstance(jsonFile,dict):
-        data = jsonFile
-    elif isinstance(jsonFile,str):
-        assert os.path.isfile(jsonFile), '.json file {} does not exist'.format(jsonFile)
+    if isinstance(json_file,dict):
+        data = json_file
+    elif isinstance(json_file,str):
+        assert os.path.isfile(json_file), '.json file {} does not exist'.format(json_file)
         if options.verbose:
             print('Reading .json {} with base dir [{}]...'.format(
-                    jsonFile,base_dir))
-        with open(jsonFile,'r') as f:
-            data = json.load(f)
+                    json_file,base_dir))
+        with open(json_file,'r') as f:
+            data = json.load(f)
     else:
-        raise ValueError('Illegal value for jsonFile')
+        raise ValueError('Illegal value for json_file')
     images = data['images']
     annotations = data['annotations']
     categories = data['categories']
-    # info = data['info']
-    assert 'info' in data, 'No info struct in database'
-    if len(base_dir) > 0:
-        assert os.path.isdir(base_dir), 'Base directory {} does not exist'.format(base_dir)
+    if options.requireInfo:
+        assert 'info' in data, 'No info struct in database'
+    if len(base_dir) > 0:
+        assert os.path.isdir(base_dir), \
+            'Base directory {} does not exist'.format(base_dir)
     ##%% Build dictionaries, checking ID uniqueness and internal validity as we go
     image_id_to_image = {}
     ann_id_to_ann = {}
     category_id_to_category = {}
     category_name_to_category = {}
     image_location_set = set()
     if options.verbose:
         print('Checking categories...')
     for cat in tqdm(categories):
         # Confirm that required fields are present
         assert 'name' in cat
         assert 'id' in cat
-        assert isinstance(cat['id'],int), 'Illegal category ID type: [{}]'.format(str(cat['id']))
-        assert isinstance(cat['name'],str), 'Illegal category name type [{}]'.format(str(cat['name']))
+        assert isinstance(cat['id'],int), \
+            'Illegal category ID type: [{}]'.format(str(cat['id']))
+        assert isinstance(cat['name'],str), \
+            'Illegal category name type [{}]'.format(str(cat['name']))
         category_id = cat['id']
         category_name = cat['name']
         # Confirm ID uniqueness
-        assert category_id not in category_id_to_category, 'Category ID {} is used more than once'.format(category_id)
+        assert category_id not in category_id_to_category, \
+            'Category ID {} is used more than once'.format(category_id)
         category_id_to_category[category_id] = cat
         cat['_count'] = 0
-        assert category_name not in category_name_to_category, 'Category name {} is used more than once'.format(category_name)
-        category_name_to_category[category_name] = cat
+        assert category_name not in category_name_to_category, \
+            'Category name {} is used more than once'.format(category_name)
+        category_name_to_category[category_name] = cat
     # ...for each category
     if options.verbose:
-        print('\nChecking images...')
+        print('\nChecking image records...')
     if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
         if options.verbose:
             print('Trimming image list to {}'.format(options.iMaxNumImages))
         images = images[0:options.iMaxNumImages]
     image_paths_in_json = set()
     sequences = set()
     # image = images[0]
     for image in tqdm(images):
         image['_count'] = 0
         # Confirm that required fields are present
         assert 'file_name' in image
         assert 'id' in image
         image['file_name'] = image['file_name'].replace('\\','/')
         image_paths_in_json.add(image['file_name'])
         assert isinstance(image['file_name'],str), 'Illegal image filename type'
         if options.allowIntIDs:
             assert isinstance(image['id'],str) or isinstance(image['id'],int), \
                 'Illegal image ID type'
         else:
             assert isinstance(image['id'],str), 'Illegal image ID type'
-        image_id = image['id']
+        image_id = image['id']
         # Confirm ID uniqueness
         assert image_id not in image_id_to_image, 'Duplicate image ID {}'.format(image_id)
         image_id_to_image[image_id] = image
         if 'height' in image:
             assert 'width' in image, 'Image with height but no width: {}'.format(image['id'])
         if 'width' in image:
             assert 'height' in image, 'Image with width but no height: {}'.format(image['id'])
         if options.bRequireLocation:
             assert 'location' in image, 'No location available for: {}'.format(image['id'])
         if 'location' in image:
             # We previously supported ints here; this should be strings now
             # assert isinstance(image['location'], str) or isinstance(image['location'], int), \
             #  'Illegal image location type'
             assert isinstance(image['location'], str)
             image_location_set.add(image['location'])
         if 'seq_id' in image:
             sequences.add(image['seq_id'])
         assert not ('sequence_id' in image or 'sequence' in image), 'Illegal sequence identifier'
     unused_files = []
     image_paths_relative = None
     # Are we checking for unused images?
-    if (len(base_dir) > 0) and options.bFindUnusedImages:
+    if (len(base_dir) > 0) and options.bFindUnusedImages:
         if options.verbose:
             print('\nEnumerating images...')
         image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
         for fn_relative in image_paths_relative:
             if fn_relative not in image_paths_in_json:
                 unused_files.append(fn_relative)
     # List of (filename,error_string) tuples
     validation_errors = []
     # If we're checking image existence but not image size, we don't need to read the images
     if options.bCheckImageExistence and not options.bCheckImageSizes:
         if image_paths_relative is None:
             image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
         image_paths_relative_set = set(image_paths_relative)
         for im in images:
-            if im['file_name'] not in image_paths_relative_set:
+            if im['file_name'] not in image_paths_relative_set:
                 validation_errors.append((im['file_name'],'not found in relative path list'))
     # If we're checking image size, we need to read the images
     if options.bCheckImageSizes:
         if len(base_dir) == 0:
             print('Warning: checking image sizes without a base directory, assuming "."')
         if options.verbose:
             print('Checking image existence and/or image sizes...')
         if options.nThreads is not None and options.nThreads > 1:
+            if options.parallelizeWithThreads:
+                worker_string = 'threads'
+            else:
+                worker_string = 'processes'
             if options.verbose:
-                print('Starting a pool of {} workers'.format(options.nThreads))
-            pool = ThreadPool(options.nThreads)
-            # results = pool.imap_unordered(lambda x: fetch_url(x,nImages), indexedUrlList)
-            default_options.baseDir = options.baseDir
-            default_options.bCheckImageSizes = options.bCheckImageSizes
-            default_options.bCheckImageExistence = options.bCheckImageExistence
-            results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
+                print('Starting a pool of {} {}'.format(options.nThreads,worker_string))
+            if options.parallelizeWithThreads:
+                pool = ThreadPool(options.nThreads)
+            else:
+                pool = Pool(options.nThreads)
+            try:
+                results = list(tqdm(pool.imap(
+                    partial(_check_image_existence_and_size,options=options), images),
+                    total=len(images)))
+            finally:
+                pool.close()
+                pool.join()
+                print("Pool closed and joined for image size checks")
         else:
             results = []
-            for im in tqdm(images):
+            for im in tqdm(images):
                 results.append(_check_image_existence_and_size(im,options))
         for i_image,result in enumerate(results):
             if result is not None:
                 validation_errors.append((images[i_image]['file_name'],result))
     # ...for each image
     if options.verbose:
         print('{} validation errors (of {})'.format(len(validation_errors),len(images)))
         print('Checking annotations...')
-    nBoxes = 0
+    n_boxes = 0
     for ann in tqdm(annotations):
         # Confirm that required fields are present
         assert 'image_id' in ann
         assert 'id' in ann
         assert 'category_id' in ann
         if options.allowIntIDs:
             assert isinstance(ann['id'],str) or isinstance(ann['id'],int), \
                 'Illegal annotation ID type'
@@ -352,149 +380,149 @@ def integrity_check_json_db(jsonFile, options=None):
         else:
             assert isinstance(ann['id'],str), 'Illegal annotation ID type'
             assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
         assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
         if 'bbox' in ann:
-            nBoxes += 1
-        annId = ann['id']
+            n_boxes += 1
+        ann_id = ann['id']
         # Confirm ID uniqueness
-        assert annId not in ann_id_to_ann
-        ann_id_to_ann[annId] = ann
+        assert ann_id not in ann_id_to_ann
+        ann_id_to_ann[ann_id] = ann
         # Confirm validity
         assert ann['category_id'] in category_id_to_category, \
             'Category {} not found in category list'.format(ann['category_id'])
         assert ann['image_id'] in image_id_to_image, \
           'Image ID {} referred to by annotation {}, not available'.format(
             ann['image_id'],ann['id'])
         image_id_to_image[ann['image_id']]['_count'] += 1
-        category_id_to_category[ann['category_id']]['_count'] +=1
+        category_id_to_category[ann['category_id']]['_count'] +=1
     # ...for each annotation
     sorted_categories = sorted(categories, key=itemgetter('_count'), reverse=True)
     ##%% Print statistics
     if options.verbose:
         # Find un-annotated images and multi-annotation images
-        nUnannotated = 0
-        nMultiAnnotated = 0
+        n_unannotated = 0
+        n_multi_annotated = 0
         for image in images:
             if image['_count'] == 0:
-                nUnannotated += 1
+                n_unannotated += 1
             elif image['_count'] > 1:
-                nMultiAnnotated += 1
+                n_multi_annotated += 1
         print('\nFound {} unannotated images, {} images with multiple annotations'.format(
-                nUnannotated,nMultiAnnotated))
+                n_unannotated,n_multi_annotated))
         if (len(base_dir) > 0) and options.bFindUnusedImages:
             print('Found {} unused image files'.format(len(unused_files)))
         n_unused_categories = 0
         # Find unused categories
         for cat in categories:
             if cat['_count'] == 0:
                 print('Unused category: {}'.format(cat['name']))
                 n_unused_categories += 1
         print('Found {} unused categories'.format(n_unused_categories))
-        sequenceString = 'no sequence info'
+        sequence_string = 'no sequence info'
         if len(sequences) > 0:
-            sequenceString = '{} sequences'.format(len(sequences))
+            sequence_string = '{} sequences'.format(len(sequences))
         print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
-                len(images),len(annotations),nBoxes,len(categories),sequenceString))
+                len(images),len(annotations),n_boxes,len(categories),sequence_string))
         if len(image_location_set) > 0:
             print('DB contains images from {} locations\n'.format(len(image_location_set)))
         print('Categories and annotation (not image) counts:\n')
         for cat in sorted_categories:
             print('{:6} {}'.format(cat['_count'],cat['name']))
         print('')
     error_info = {}
     error_info['unused_files'] = unused_files
     error_info['validation_errors'] = validation_errors
     return sorted_categories, data, error_info
 # ...def integrity_check_json_db()
 #%% Command-line driver
-def main():
+def main(): # noqa
     parser = argparse.ArgumentParser()
-    parser.add_argument('jsonFile',type=str,
+    parser.add_argument('json_file',type=str,
                         help='COCO-formatted .json file to validate')
-    parser.add_argument('--bCheckImageSizes', action='store_true',
+    parser.add_argument('--bCheckImageSizes', action='store_true',
                         help='Validate image size, requires baseDir to be specified. ' + \
                              'Implies existence checking.')
-    parser.add_argument('--bCheckImageExistence', action='store_true',
+    parser.add_argument('--bCheckImageExistence', action='store_true',
                         help='Validate image existence, requires baseDir to be specified')
-    parser.add_argument('--bFindUnusedImages', action='store_true',
+    parser.add_argument('--bFindUnusedImages', action='store_true',
                         help='Check for images in baseDir that aren\'t in the database, ' + \
                              'requires baseDir to be specified')
-    parser.add_argument('--baseDir', action='store', type=str, default='',
+    parser.add_argument('--baseDir', action='store', type=str, default='',
                         help='Base directory for images')
     parser.add_argument('--bAllowNoLocation', action='store_true',
                         help='Disable errors when no location is specified for an image')
-    parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
+    parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
                         help='Cap on total number of images to check')
-    parser.add_argument('--nThreads', action='store', type=int, default=10,
+    parser.add_argument('--nThreads', action='store', type=int, default=10,
                         help='Number of threads (only relevant when verifying image ' + \
                              'sizes and/or existence)')
     if len(sys.argv[1:])==0:
         parser.print_help()
         parser.exit()
     args = parser.parse_args()
     args.bRequireLocation = (not args.bAllowNoLocation)
     options = IntegrityCheckOptions()
     ct_utils.args_to_object(args, options)
-    integrity_check_json_db(args.jsonFile,options)
+    integrity_check_json_db(args.json_file,options)
-if __name__ == '__main__':
+if __name__ == '__main__':
     main()
 #%% Interactive driver(s)
 if False:
     #%%
-    """
+    """
     python integrity_check_json_db.py ~/data/ena24.json --baseDir ~/data/ENA24 --bAllowNoLocation
     """
     # Integrity-check .json files for LILA
     json_files = [os.path.expanduser('~/data/ena24.json')]
     options = IntegrityCheckOptions()
     options.baseDir = os.path.expanduser('~/data/ENA24')
     options.bCheckImageSizes = False
     options.bFindUnusedImages = True
     options.bRequireLocation = False
-    # options.iMaxNumImages = 10
+    # options.iMaxNumImages = 10
     for json_file in json_files:
         sorted_categories,data,_ = integrity_check_json_db(json_file, options)

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl