PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/data_management/lila/create_lila_blank_set.py CHANGED Viewed

@@ -4,7 +4,7 @@ create_lila_blank_set.py
 Create a folder of blank images sampled from LILA.  We'll aim for diversity, so less-common
 locations will be oversampled relative to more common locations.  We'll also run MegaDetector
-(with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
+(with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
 blank set.
 We'll store location information for each image in a .json file, so we can split locations
@@ -27,8 +27,15 @@ from collections import defaultdict
 from megadetector.data_management.lila.lila_common import read_lila_all_images_file
 from megadetector.utils.url_utils import download_url
+from megadetector.utils.ct_utils import sort_dictionary_by_value
+from megadetector.utils.path_utils import is_image_file
+from megadetector.utils.path_utils import find_images
 from megadetector.visualization import visualization_utils as vis_utils
 from megadetector.utils.path_utils import recursive_file_list
+from megadetector.utils import ct_utils
+#%% Environment
 # We'll write images, metadata downloads, and temporary files here
 lila_local_base = os.path.expanduser('~/lila')
@@ -48,7 +55,7 @@ md_possible_non_blanks_folder = os.path.join(project_base,'candidate_non_blanks'
 os.makedirs(md_possible_non_blanks_folder,exist_ok=True)
 location_to_blank_image_urls_cache_file = os.path.join(project_base,
-                                                       'location_to_blank_image_urls.json')
+                                                    'location_to_blank_image_urls.json')
 md_results_file = os.path.join(project_base,'lila_blanks_md_results.json')
@@ -90,10 +97,10 @@ other_labels_without_common_names = (
     'car', 'motorcycle', 'vehicle'
 )
-common_names = sorted(list(df['common_name'].unique()),
-                      key=lambda x:str(x) if isinstance(x,float) else x)
+common_names = sorted(list(df['common_name'].unique()),
+                    key=lambda x:str(x) if isinstance(x,float) else x)
 original_labels = sorted(list(df['original_label'].unique()),
-                         key=lambda x:str(x) if isinstance(x,float) else x)
+                        key=lambda x:str(x) if isinstance(x,float) else x)
 # Blanks are represented as NaN in the "common_name" column (though not all NaN's are blanks)
 assert '' not in common_names
@@ -118,16 +125,16 @@ original_label_to_count = defaultdict(int)
 # This loop takes ~10 mins
 for i_row,row in tqdm(df.iterrows(),total=len(df)):
     common_name = row['common_name']
     original_label = row['original_label']
     if isinstance(common_name,float):
         assert np.isnan(common_name)
         original_labels_with_nan_common_names.add(original_label)
     common_name = str(common_name)
     assert isinstance(original_label,str)
     if original_label in blank_original_labels:
         common_names_with_empty_original_labels.add(common_name)
@@ -137,7 +144,6 @@ for i_row,row in tqdm(df.iterrows(),total=len(df)):
 #%% Look at the most common labels and common names
-from megadetector.utils.ct_utils import sort_dictionary_by_value
 common_name_to_count = sort_dictionary_by_value(common_name_to_count,reverse=True)
 original_label_to_count = sort_dictionary_by_value(original_label_to_count,reverse=True)
@@ -185,32 +191,31 @@ force_map_locations = False
 # Load from .json if available
 if (not force_map_locations) and (os.path.isfile(location_to_blank_image_urls_cache_file)):
     with open(location_to_blank_image_urls_cache_file,'r') as f:
         location_to_blank_image_urls = json.load(f)
 else:
     location_to_blank_image_urls = defaultdict(list)
     # i_row = 0; row = df.iloc[i_row]
     for i_row,row in tqdm(df.iterrows(),total=len(df)):
         location_id = row['location_id']
         url = row['url']
         original_label = row['original_label']
         if original_label in blank_original_labels:
             assert np.isnan(row['common_name'])
             location_to_blank_image_urls[location_id].append(url)
-    with open(location_to_blank_image_urls_cache_file,'w') as f:
-        json.dump(location_to_blank_image_urls,f,indent=1)
+    ct_utils.write_json(location_to_blank_image_urls_cache_file, location_to_blank_image_urls)
 n_locations_with_blanks = len(location_to_blank_image_urls)
 print('Found {} locations with blank images'.format(n_locations_with_blanks))
 #%% Sample blanks
 random.seed(0)
@@ -223,7 +228,7 @@ for location in location_to_blank_image_urls:
     blank_image_urls_this_location = location_to_blank_image_urls[location]
     unsampled_blank_image_urls_this_location = blank_image_urls_this_location.copy()
     location_to_unsampled_blank_image_urls[location] = unsampled_blank_image_urls_this_location
 # Put locations in a random order
 location_ids = list(location_to_unsampled_blank_image_urls.keys())
 random.shuffle(location_ids)
@@ -234,32 +239,32 @@ fully_sampled_locations = set()
 # Pick from each location until we hit our limit or have no blanks left
 while(True):
     found_sample = False
     # location = location_ids[0]
     for location in location_ids:
         unsampled_images_this_location = location_to_unsampled_blank_image_urls[location]
         if len(unsampled_images_this_location) == 0:
             fully_sampled_locations.add(location)
             continue
         url = random.choice(unsampled_images_this_location)
-        blank_urls.append(url)
+        blank_urls.append(url)
         location_to_unsampled_blank_image_urls[location].remove(url)
         location_to_sampled_blanks[location].append(url)
         found_sample = True
         if len(blank_urls) == n_blanks:
             break
     # ...for each location
     if not found_sample:
         print('Terminating after {} blanks, we ran out before hitting {}'.format(
             len(blank_urls),n_blanks))
     if len(blank_urls) == n_blanks:
         break
@@ -278,39 +283,39 @@ for location in location_to_sampled_blanks:
 print('Choose {} blanks from {} locations'.format(n_blanks,len(location_ids)))
 print('Fully sampled {} locations'.format(len(fully_sampled_locations)))
 print('Max samples per location: {}'.format(max_blanks_per_location))
 #%% Download those image files (prep)
 container_to_url_base = {
-                         'lilawildlife.blob.core.windows.net':'/lila-wildlide/',
-                         'storage.googleapis.com':'/public-datasets-lila/'
-                         }
+                        'lilawildlife.blob.core.windows.net':'/lila-wildlide/',
+                        'storage.googleapis.com':'/public-datasets-lila/'
+                        }
 def download_relative_filename(url, output_base, verbose=False, url_base=None, overwrite=False):
     """
     Download a URL to output_base, preserving relative path
     """
     result = {'status':'unknown','url':url,'destination_filename':None}
     if url_base is None:
         assert url.startswith('https://')
         container = url.split('/')[2]
         assert container in container_to_url_base
         url_base = container_to_url_base[container]
     assert url_base.startswith('/') and url_base.endswith('/')
     p = urlparse(url)
     relative_filename = str(p.path)
     # remove the leading '/'
     assert relative_filename.startswith(url_base)
-    relative_filename = relative_filename.replace(url_base,'',1)
+    relative_filename = relative_filename.replace(url_base,'',1)
     destination_filename = os.path.join(output_base,relative_filename)
     result['destination_filename'] = destination_filename
     if ((os.path.isfile(destination_filename)) and (not overwrite)):
         result['status'] = 'skipped'
         return result
@@ -318,10 +323,10 @@ def download_relative_filename(url, output_base, verbose=False, url_base=None, o
         download_url(url, destination_filename, verbose=verbose)
     except Exception as e:
         print('Warning: error downloading URL {}: {}'.format(
-            url,str(e)))
+            url,str(e)))
         result['status'] = 'error: {}'.format(str(e))
         return result
     result['status'] = 'success'
     return result
@@ -331,11 +336,11 @@ def azure_url_to_gcp_http_url(url,error_if_not_azure_url=True):
     Most URLs point to Azure by default, but most files are available on both Azure and GCP.
     This function converts an Azure URL to the corresponding GCP http:// url.
     """
     lila_azure_storage_account = 'https://lilawildlife.blob.core.windows.net'
     gcp_bucket_api_url = 'https://storage.googleapis.com/public-datasets-lila'
     error_if_not_azure_url = False
     if error_if_not_azure_url:
         assert url.startswith(lila_azure_storage_account)
     gcp_url = url.replace(lila_azure_storage_account,gcp_bucket_api_url,1)
@@ -344,7 +349,7 @@ def azure_url_to_gcp_http_url(url,error_if_not_azure_url=True):
 # Convert Azure URLs to GCP URLs if necessary
 if preferred_image_download_source != 'azure':
     assert preferred_image_download_source == 'gcp'
-    blank_urls = [azure_url_to_gcp_http_url(url) for url in blank_urls]
+    blank_urls = [azure_url_to_gcp_http_url(url) for url in blank_urls]
 #%% Download those image files (execution)
@@ -354,16 +359,16 @@ print('Downloading {} images on {} workers'.format(len(blank_urls),n_download_th
 if n_download_threads <= 1:
     results = []
     # url = all_urls[0]
-    for url in tqdm(blank_urls):
+    for url in tqdm(blank_urls):
         results.append(download_relative_filename(url,candidate_blanks_base,url_base=None))
 else:
-    pool = ThreadPool(n_download_threads)
+    pool = ThreadPool(n_download_threads)
     results = list(tqdm(pool.imap(lambda s: download_relative_filename(
-        s,candidate_blanks_base,url_base=None),
+        s,candidate_blanks_base,url_base=None),
         blank_urls), total=len(blank_urls)))
 # pool.terminate()
@@ -385,7 +390,7 @@ cmd = 'python run_detector_batch.py MDV5A "{}" "{}"'.format(
     candidate_blanks_base,md_results_file)
 cmd += ' --recursive --output_relative_filenames'
-import clipboard; clipboard.copy(cmd); print(cmd)
+# import clipboard; clipboard.copy(cmd); print(cmd)
 #%% Review MD results that suggests images are non-empty
@@ -406,11 +411,11 @@ for category_id in md_results['detection_categories']:
 # im = md_results['images'][0]
 for im in md_results['images']:
     if 'detections' not in im:
         continue
-    found_object = False
+    found_object = False
     for det in im['detections']:
         threshold = category_id_to_threshold[det['category']]
         if det['conf'] >= threshold:
@@ -425,8 +430,8 @@ output_file_to_source_file = {}
 # i_fn = 0; source_file_relative = images_to_review[i_fn]
 for i_fn,source_file_relative in tqdm(enumerate(images_to_review_to_detections),
-                                      total=len(images_to_review_to_detections)):
+                                    total=len(images_to_review_to_detections)):
     source_file_abs = os.path.join(candidate_blanks_base,source_file_relative)
     assert os.path.isfile(source_file_abs)
     ext = os.path.splitext(source_file_abs)[1]
@@ -435,16 +440,15 @@ for i_fn,source_file_relative in tqdm(enumerate(images_to_review_to_detections),
     output_file_to_source_file[target_file_relative] = source_file_relative
     # shutil.copyfile(source_file_abs,target_file_abs)
     vis_utils.draw_bounding_boxes_on_file(input_file=source_file_abs,
-                                          output_file=target_file_abs,
-                                          detections=images_to_review_to_detections[source_file_relative],
-                                          confidence_threshold=min_threshold,
-                                          target_size=(1280,-1))
+                                        output_file=target_file_abs,
+                                        detections=images_to_review_to_detections[source_file_relative],
+                                        confidence_threshold=min_threshold,
+                                        target_size=(1280,-1))
 # This is a temporary file I just used during debugging
-with open(os.path.join(project_base,'output_file_to_source_file.json'),'w') as f:
-    json.dump(output_file_to_source_file,f,indent=1)
+ct_utils.write_json(os.path.join(project_base,'output_file_to_source_file.json'), output_file_to_source_file)
 #%% Manual review
 # Delete images that are *not* empty
@@ -463,15 +467,13 @@ for output_file in tqdm(output_file_to_source_file.keys()):
     if output_file not in remaining_images:
         source_file_relative = output_file_to_source_file[output_file]
         removed_blank_images_relative.append(source_file_relative)
 removed_blank_images_relative_set = set(removed_blank_images_relative)
 assert len(removed_blank_images_relative) + len(remaining_images) == len(output_file_to_source_file)
 #%% Copy only the confirmed blanks to the confirmed folder
-from megadetector.utils.path_utils import is_image_file
 all_candidate_blanks = recursive_file_list(candidate_blanks_base,return_relative_paths=True)
 print('Found {} candidate blanks'.format(len(all_candidate_blanks)))
@@ -479,19 +481,19 @@ skipped_images_relative = []
 skipped_non_images = []
 for source_fn_relative in tqdm(all_candidate_blanks):
     # Skip anything we removed from the "candidate non-blanks" folder; these weren't really
     # blank.
     if source_fn_relative in removed_blank_images_relative_set:
         skipped_images_relative.append(source_fn_relative)
         continue
     if not is_image_file(source_fn_relative):
         # Not a typo; "skipped images" really means "skipped files"
         skipped_images_relative.append(source_fn_relative)
         skipped_non_images.append(source_fn_relative)
     source_fn_abs = os.path.join(candidate_blanks_base,source_fn_relative)
     assert os.path.isfile(source_fn_abs)
     target_fn_abs = os.path.join(confirmed_blanks_base,source_fn_relative)
@@ -499,12 +501,11 @@ for source_fn_relative in tqdm(all_candidate_blanks):
     # shutil.copyfile(source_fn_abs,target_fn_abs)
 print('Skipped {} files ({} non-image files)'.format(len(skipped_images_relative),
-                                                     len(skipped_non_images)))
+                                                    len(skipped_non_images)))
 #%% Validate the folder of confirmed blanks
-from megadetector.utils.path_utils import find_images
 # all_confirmed_blanks = recursive_file_list(confirmed_blanks_base,return_relative_paths=True)
 all_confirmed_blanks = find_images(confirmed_blanks_base,return_relative_paths=True,recursive=True)
 assert len(all_confirmed_blanks) < len(all_candidate_blanks)
@@ -518,8 +519,8 @@ i_image = random.randint(0, len(skipped_images_relative))
 fn_relative = skipped_images_relative[i_image]
 fn_abs = os.path.join(candidate_blanks_base,fn_relative)
 assert os.path.isfile(fn_abs)
-import clipboard
-clipboard.copy('feh --scale-down "{}"'.format(fn_abs))
+# import clipboard; clipboard.copy('feh --scale-down "{}"'.format(fn_abs))
 #%% Record location information for each confirmed file
@@ -532,27 +533,24 @@ all_fn_relative_to_location = {}
 # location = next(iter(location_to_blank_image_urls.keys()))
 for location in tqdm(location_to_blank_image_urls):
     urls_this_location = location_to_blank_image_urls[location]
     # url = urls_this_location[0]
     for url in urls_this_location:
         # Turn:
-        #
+        #
         # https://lilablobssc.blob.core.windows.net/caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
         #
         # ...into:
         #
-        # caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
+        # caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
         p = urlparse(url)
         fn_relative = str(p.path)[1:]
         all_fn_relative_to_location[fn_relative] = location
 # Build a much smaller mapping of just the confirmed blanks
-confirmed_fn_relative_to_location = {}
+confirmed_fn_relative_to_location = {}
 for i_fn,fn_relative in tqdm(enumerate(all_confirmed_blanks),total=len(all_confirmed_blanks)):
     confirmed_fn_relative_to_location[fn_relative] = all_fn_relative_to_location[fn_relative]
-with open(all_fn_relative_to_location_file,'w') as f:
-    json.dump(all_fn_relative_to_location,f,indent=1)
-with open(confirmed_fn_relative_to_location_file,'w') as f:
-    json.dump(confirmed_fn_relative_to_location,f,indent=1)
+ct_utils.write_json(all_fn_relative_to_location_file, all_fn_relative_to_location)
+ct_utils.write_json(confirmed_fn_relative_to_location_file, confirmed_fn_relative_to_location)

megadetector/data_management/lila/create_lila_test_set.py CHANGED Viewed

@@ -2,7 +2,7 @@
 create_lila_test_set.py
-Create a test set of camera trap images, containing N empty and N non-empty
+Create a test set of camera trap images, containing N empty and N non-empty
 images from each LILA data set.
 """
@@ -15,6 +15,7 @@ import random
 from megadetector.data_management.lila.lila_common import \
     read_lila_metadata, read_metadata_file_for_dataset
+from megadetector.utils.url_utils import parallel_download_urls
 n_empty_images_per_dataset = 1
 n_non_empty_images_per_dataset = 1
@@ -39,9 +40,10 @@ metadata_table = read_lila_metadata(metadata_dir)
 #%% Download and extract metadata for every dataset
 for ds_name in metadata_table.keys():
-    metadata_table[ds_name]['metadata_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
-                                                                         metadata_dir=metadata_dir,
-                                                                         metadata_table=metadata_table)
+    metadata_table[ds_name]['metadata_filename'] = \
+        read_metadata_file_for_dataset(ds_name=ds_name,
+                                       metadata_dir=metadata_dir,
+                                       metadata_table=metadata_table)
 #%% Choose images from each dataset
@@ -52,49 +54,49 @@ for ds_name in metadata_table.keys():
 for ds_name in metadata_table.keys():
     print('Choosing images for {}'.format(ds_name))
     json_filename = metadata_table[ds_name]['metadata_filename']
     with open(json_filename,'r') as f:
         d = json.load(f)
     category_id_to_name = {c['id']:c['name'] for c in d['categories']}
     category_name_to_id = {c['name']:c['id'] for c in d['categories']}
     ## Find empty images
     if 'empty' not in category_name_to_id:
         empty_annotations_to_download = []
     else:
-        empty_category_id = category_name_to_id['empty']
+        empty_category_id = category_name_to_id['empty']
         empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] == empty_category_id]
         try:
-            empty_annotations_to_download = random.sample(empty_annotations,n_empty_images_per_dataset)
+            empty_annotations_to_download = random.sample(empty_annotations,n_empty_images_per_dataset)
         except ValueError:
             print('No empty images available for dataset {}'.format(ds_name))
             empty_annotations_to_download = []
     ## Find non-empty images
-    non_empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] != empty_category_id]
+    non_empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] != empty_category_id]
     try:
         non_empty_annotations_to_download = random.sample(non_empty_annotations,n_non_empty_images_per_dataset)
     except ValueError:
         print('No non-empty images available for dataset {}'.format(ds_name))
         non_empty_annotations_to_download = []
     annotations_to_download = empty_annotations_to_download + non_empty_annotations_to_download
     image_ids_to_download = set([ann['image_id'] for ann in annotations_to_download])
     assert len(image_ids_to_download) == len(set(image_ids_to_download))
     images_to_download = []
     for im in d['images']:
         if im['id'] in image_ids_to_download:
             images_to_download.append(im)
     assert len(images_to_download) == len(image_ids_to_download)
     metadata_table[ds_name]['images_to_download'] = images_to_download
 # ...for each dataset
@@ -109,19 +111,19 @@ for ds_name in metadata_table.keys():
     base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
     assert not base_url.endswith('/')
     # Retrieve image file names
     filenames = [im['file_name'] for im in metadata_table[ds_name]['images_to_download']]
     urls_to_download = []
     # Convert to URLs
-    for fn in filenames:
+    for fn in filenames:
         url = base_url + '/' + fn
         urls_to_download.append(url)
     metadata_table[ds_name]['urls_to_download'] = urls_to_download
 # ...for each dataset
@@ -135,26 +137,25 @@ for ds_name in metadata_table.keys():
     base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
     assert not base_url.endswith('/')
     base_url += '/'
     urls_to_download = metadata_table[ds_name]['urls_to_download']
     # url = urls_to_download[0]
     for url in urls_to_download:
         assert base_url in url
-        output_file_relative = ds_name.lower().replace(' ','_') + '_' + url.replace(base_url,'').replace('/','_').replace('\\','_')
+        output_file_relative = ds_name.lower().replace(' ','_') + \
+            '_' + url.replace(base_url,'').replace('/','_').replace('\\','_')
         output_file_absolute = os.path.join(output_dir,output_file_relative)
         url_to_target_file[url] = output_file_absolute
     # ...for each url
 # ...for each dataset
 #%% Download image files (execution)
-from megadetector.utils.url_utils import parallel_download_urls
 download_results = parallel_download_urls(url_to_target_file,
                                           verbose=False,
                                           overwrite=False,

megadetector/data_management/lila/create_links_to_md_results_files.py CHANGED Viewed

@@ -19,7 +19,7 @@ md_results_local_folder = r'g:\temp\lila-md-results'
 md_base_url = 'https://lila.science/public/lila-md-results/'
 assert md_base_url.endswith('/')
-# No RDE files for datasets with no location information
+# No RDE files for datasets with no location information
 datasets_without_location_info = ('ena24','missouri-camera-traps')
 md_results_column_names = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw','md_results_with_rde']
@@ -32,8 +32,8 @@ validate_urls = False
 df = pd.read_csv(input_csv_file)
 for s in md_results_column_names:
     df[s] = ''
 #%% Find matching files locally, and create URLs
 local_files = os.listdir(md_results_local_folder)
@@ -41,14 +41,14 @@ local_files = [fn for fn in local_files if fn.endswith('.zip')]
 # i_row = 0; row = df.iloc[i_row]
 for i_row,row in df.iterrows():
     if not isinstance(row['name'],str):
         continue
     dataset_shortname = row['short_name']
     matching_files = [fn for fn in local_files if dataset_shortname in fn]
-    # No RDE files for datasets with no location information
+    # No RDE files for datasets with no location information
     if dataset_shortname in datasets_without_location_info:
         assert len(matching_files) == 2
         mdv5a_files = [fn for fn in matching_files if 'mdv5a' in fn]
@@ -57,10 +57,10 @@ for i_row,row in df.iterrows():
         df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
         df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
     else:
-        # Exclude single-season files for snapshot-serengeti
+        # Exclude single-season files for snapshot-serengeti
         if dataset_shortname == 'snapshot-serengeti':
             matching_files = [fn for fn in matching_files if '_S' not in fn]
-            assert len(matching_files) == 2
+            assert len(matching_files) == 2
             assert all(['mdv4' in fn for fn in matching_files])
             rde_files = [fn for fn in matching_files if 'rde' in fn]
             raw_files = [fn for fn in matching_files if 'rde' not in fn]
@@ -76,28 +76,28 @@ for i_row,row in df.iterrows():
             df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
             df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
             df.loc[i_row,'md_results_with_rde'] = md_base_url + rde_files[0]
     print('Found {} matching files for {}'.format(len(matching_files),dataset_shortname))
-# ...for each row
+# ...for each row
 #%% Validate URLs
 if validate_urls:
     from megadetector.utils.url_utils import test_urls
     urls = set()
     for i_row,row in df.iterrows():
         for column_name in md_results_column_names:
             if len(row[column_name]) > 0:
-                assert row[column_name] not in urls
+                assert row[column_name] not in urls
                 urls.add(row[column_name])
-    test_urls(urls,error_on_failure=True)
+    test_urls(urls,error_on_failure=True)
     print('Validated {} URLs'.format(len(urls)))

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl