PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 5.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show

megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +23 -23
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -1
megadetector/data_management/camtrap_dp_to_coco.py +45 -45
megadetector/data_management/cct_json_utils.py +101 -101
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +189 -189
megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
megadetector/data_management/databases/integrity_check_json_db.py +202 -188
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +38 -38
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +130 -124
megadetector/data_management/labelme_to_yolo.py +78 -72
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +70 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
megadetector/data_management/mewc_to_md.py +339 -340
megadetector/data_management/ocr_tools.py +258 -252
megadetector/data_management/read_exif.py +231 -224
megadetector/data_management/remap_coco_categories.py +26 -26
megadetector/data_management/remove_exif.py +31 -20
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +41 -41
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +117 -120
megadetector/data_management/yolo_to_coco.py +195 -188
megadetector/detection/change_detection.py +831 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +304 -262
megadetector/detection/run_detector.py +177 -164
megadetector/detection/run_detector_batch.py +364 -363
megadetector/detection/run_inference_with_yolov5_val.py +328 -325
megadetector/detection/run_tiled_inference.py +256 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +290 -282
megadetector/postprocessing/add_max_conf.py +15 -11
megadetector/postprocessing/categorize_detections_by_size.py +44 -44
megadetector/postprocessing/classification_postprocessing.py +415 -415
megadetector/postprocessing/combine_batch_outputs.py +20 -21
megadetector/postprocessing/compare_batch_results.py +528 -517
megadetector/postprocessing/convert_output_format.py +97 -97
megadetector/postprocessing/create_crop_folder.py +219 -146
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +23 -20
megadetector/postprocessing/md_to_coco.py +129 -98
megadetector/postprocessing/md_to_labelme.py +89 -83
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +87 -114
megadetector/postprocessing/postprocess_batch_results.py +313 -298
megadetector/postprocessing/remap_detection_categories.py +36 -36
megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +33 -33
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/azure_utils.py +22 -22
megadetector/utils/ct_utils.py +1018 -200
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +541 -518
megadetector/utils/path_utils.py +1457 -398
megadetector/utils/process_utils.py +41 -41
megadetector/utils/sas_blob_utils.py +53 -49
megadetector/utils/split_locations_into_train_val.py +61 -61
megadetector/utils/string_utils.py +147 -26
megadetector/utils/url_utils.py +463 -173
megadetector/utils/wi_utils.py +2629 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +21 -21
megadetector/visualization/render_images_with_thumbnails.py +37 -73
megadetector/visualization/visualization_utils.py +401 -397
megadetector/visualization/visualize_db.py +197 -190
megadetector/visualization/visualize_detector_output.py +79 -73
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
megadetector-5.0.29.dist-info/RECORD +163 -0
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector-5.0.28.dist-info/RECORD +0 -209

megadetector/data_management/lila/download_lila_subset.py CHANGED Viewed

@@ -14,9 +14,12 @@ import random
 from tqdm import tqdm
 from collections import defaultdict
+from copy import deepcopy
 from megadetector.data_management.lila.lila_common import \
     read_lila_all_images_file, is_empty, lila_base_urls
+from megadetector.utils.url_utils import parallel_download_urls
+from megadetector.utils.path_utils import open_file
 for s in lila_base_urls.values():
     assert s.endswith('/')
@@ -58,13 +61,13 @@ common_name_to_count = defaultdict(int)
 ds_name_to_urls = defaultdict(list)
-def find_items(row):
+def find_items(row): # noqa
     if is_empty(row['common_name']):
         return
     match = False
     # This is the only bit of this file that's specific to a particular query.  In this case
     # we're checking whether each row is on a list of species of interest, but you do you.
     for species_name in species_of_interest:
@@ -72,7 +75,7 @@ def find_items(row):
             match = True
             common_name_to_count[species_name] += 1
             break
     if match:
         ds_name_to_urls[row['dataset_name']].append(row['url_' + preferred_provider])
@@ -86,8 +89,7 @@ print('Found {} matching URLs across {} datasets'.format(len(all_urls),len(ds_na
 for common_name in common_name_to_count:
     print('{}: {}'.format(common_name,common_name_to_count[common_name]))
-from copy import deepcopy
 ds_name_to_urls_raw = deepcopy(ds_name_to_urls)
@@ -104,19 +106,17 @@ else:
 #%% Choose target files for each URL
-from megadetector.data_management.lila.lila_common import lila_base_urls
 # We have a list of URLs per dataset, flatten that into a single list of URLs
 urls_to_download = set()
 for ds_name in ds_name_to_urls:
     for url in ds_name_to_urls[ds_name]:
         urls_to_download.add(url)
-urls_to_download = sorted(list(urls_to_download))
+urls_to_download = sorted(list(urls_to_download))
 # A URL might look like this:
 #
 # https://storage.googleapis.com/public-datasets-lila/wcs-unzipped/animals/0667/0302.jpg
-#
+#
 # We'll write that to an output file that looks like this (relative to output_dir):
 #
 # wcs-unzipped/animals/0667/0302.jpg
@@ -128,7 +128,7 @@ assert base_url.endswith('/')
 url_to_target_file = {}
 for url in urls_to_download:
-    assert url.startswith(base_url)
+    assert url.startswith(base_url)
     target_fn_relative = url.replace(base_url,'')
     target_fn_abs = os.path.join(output_dir,target_fn_relative)
     url_to_target_file[url] = target_fn_abs
@@ -136,8 +136,6 @@ for url in urls_to_download:
 #%% Download image files
-from megadetector.utils.url_utils import parallel_download_urls
 download_results = parallel_download_urls(url_to_target_file=url_to_target_file,
                                           verbose=False,
                                           overwrite=False,
@@ -147,39 +145,38 @@ download_results = parallel_download_urls(url_to_target_file=url_to_target_file,
 #%% Open output folder
-from megadetector.utils.path_utils import open_file
 open_file(output_dir)
 #%% Scrap
 if False:
     pass
     #%% Find all the reptiles on LILA
     reptile_rows = df.loc[df['class'] == 'reptilia']
     # i_row = 0; row = reptile_rows.iloc[i_row]
     common_name_to_count = defaultdict(int)
     dataset_to_count = defaultdict(int)
     for i_row,row in reptile_rows.iterrows():
         common_name_to_count[row['common_name']] += 1
         dataset_to_count[row['dataset_name']] += 1
     from megadetector.utils.ct_utils import sort_dictionary_by_value
     print('Found {} reptiles\n'.format(len(reptile_rows)))
     common_name_to_count = sort_dictionary_by_value(common_name_to_count,reverse=True)
     dataset_to_count = sort_dictionary_by_value(dataset_to_count,reverse=True)
     print('Common names by count:\n')
     for k in common_name_to_count:
         print('{} ({})'.format(k,common_name_to_count[k]))
-    print('\nDatasets by count:\n')
+    print('\nDatasets by count:\n')
     for k in dataset_to_count:
         print('{} ({})'.format(k,dataset_to_count[k]))

megadetector/data_management/lila/generate_lila_per_image_labels.py CHANGED Viewed

@@ -35,6 +35,7 @@ from megadetector.data_management.lila.lila_common import \
 from megadetector.utils import write_html_image_list
 from megadetector.utils.path_utils import zip_file
 from megadetector.utils.path_utils import open_file
+from megadetector.utils.url_utils import parallel_download_urls
 # We'll write images, metadata downloads, and temporary files here
 lila_local_base = os.path.expanduser('~/lila')
@@ -47,7 +48,7 @@ os.makedirs(metadata_dir,exist_ok=True)
 output_file = os.path.join(lila_local_base,'lila_image_urls_and_labels.csv')
-# Some datasets don't have "sequence_level_annotation" fields populated, but we know their
+# Some datasets don't have "sequence_level_annotation" fields populated, but we know their
 # annotation level
 ds_name_to_annotation_level = {}
 ds_name_to_annotation_level['Caltech Camera Traps'] = 'image'
@@ -79,11 +80,11 @@ if False:
 #%% Download and extract metadata for each dataset
-for ds_name in metadata_table.keys():
+for ds_name in metadata_table.keys():
     metadata_table[ds_name]['metadata_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
                                                                          metadata_dir=metadata_dir,
                                                                          metadata_table=metadata_table)
 #%% Load taxonomy data
 taxonomy_df = read_lila_taxonomy_mapping(metadata_dir)
@@ -95,12 +96,12 @@ ds_label_to_taxonomy = {}
 # i_row = 0; row = taxonomy_df.iloc[i_row]
 for i_row,row in taxonomy_df.iterrows():
     ds_label = row['dataset_name'] + ':' + row['query']
     assert ds_label.strip() == ds_label
     assert ds_label not in ds_label_to_taxonomy
     ds_label_to_taxonomy[ds_label] = row.to_dict()
 #%% Process annotations for each dataset
@@ -116,12 +117,12 @@ taxonomy_levels_to_include = \
     ['kingdom','phylum','subphylum','superclass','class','subclass','infraclass','superorder','order',
      'suborder','infraorder','superfamily','family','subfamily','tribe','genus','species','subspecies',
      'variety']
 header.extend(taxonomy_levels_to_include)
 missing_annotations = set()
-def clearnan(v):
+def _clearnan(v):
     if isinstance(v,float):
         assert np.isnan(v)
         v = ''
@@ -129,57 +130,57 @@ def clearnan(v):
     return v
 with open(output_file,'w',encoding='utf-8',newline='') as f:
     csv_writer = csv.writer(f)
     csv_writer.writerow(header)
     # ds_name = list(metadata_table.keys())[0]
     for ds_name in metadata_table.keys():
         if 'bbox' in ds_name:
             print('Skipping bbox dataset {}'.format(ds_name))
             continue
         print('Processing dataset {}'.format(ds_name))
         json_filename = metadata_table[ds_name]['metadata_filename']
         with open(json_filename, 'r') as f:
             data = json.load(f)
         categories = data['categories']
         category_ids = [c['id'] for c in categories]
         for c in categories:
             category_id_to_name = {c['id']:c['name'] for c in categories}
         annotations = data['annotations']
         images = data['images']
         image_id_to_annotations = defaultdict(list)
         # Go through annotations, marking each image with the categories that are present
         #
         # ann = annotations[0]
-        for ann in annotations:
+        for ann in annotations:
             image_id_to_annotations[ann['image_id']].append(ann)
         unannotated_images = []
         found_date = False
         found_location = False
         found_annotation_level = False
         if ds_name in ds_name_to_annotation_level:
             expected_annotation_level = ds_name_to_annotation_level[ds_name]
         else:
             expected_annotation_level = None
         # im = images[10]
         for i_image,im in tqdm(enumerate(images),total=len(images)):
             if (debug_max_images_per_dataset is not None) and (debug_max_images_per_dataset > 0) \
                 and (i_image >= debug_max_images_per_dataset):
                 break
             file_name = im['file_name'].replace('\\','/')
             base_url_gcp = metadata_table[ds_name]['image_base_url_gcp']
             base_url_aws = metadata_table[ds_name]['image_base_url_aws']
@@ -187,21 +188,21 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
             assert not base_url_gcp.endswith('/')
             assert not base_url_aws.endswith('/')
             assert not base_url_azure.endswith('/')
             url_gcp = base_url_gcp + '/' + file_name
             url_aws = base_url_aws + '/' + file_name
             url_azure = base_url_azure + '/' + file_name
             for k in im.keys():
                 if ('date' in k or 'time' in k) and (k not in ['datetime','date_captured']):
                     raise ValueError('Unrecognized datetime field')
             # This field name was only used for Caltech Camera Traps
             if 'date_captured' in im:
                 assert ds_name == 'Caltech Camera Traps'
                 im['datetime'] = im['date_captured']
-            def has_valid_datetime(im):
+            def _has_valid_datetime(im):
                 if 'datetime' not in im:
                     return False
                 v = im['datetime']
@@ -212,29 +213,29 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
                 else:
                     assert isinstance(v,float) and np.isnan(v)
                     return False
-            dt_string = ''
-            if (has_valid_datetime(im)):
+            dt_string = ''
+            if (_has_valid_datetime(im)):
                 dt = dateparser.parse(im['datetime'])
                 if dt is None or dt.year < 1990 or dt.year > 2025:
                     # raise ValueError('Suspicious date parsing result')
-                    # Special case we don't want to print a warning about... this is
+                    # Special case we don't want to print a warning about... this is
                     # in invalid date that very likely originates on the camera, not at
                     # some intermediate processing step.
                     #
                     # print('Suspicious date for image {}: {} ({})'.format(
                     #    im['id'], im['datetime'], ds_name))
-                    pass
+                    pass
                 else:
                     found_date = True
                     dt_string = dt.strftime("%m-%d-%Y %H:%M:%S")
             # Location, sequence, and image IDs are only guaranteed to be unique within
             # a dataset, so for the output .csv file, include both
             if 'location' in im:
@@ -242,25 +243,25 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
                 location_id = ds_name + ' : ' + str(im['location'])
             else:
                 location_id = ds_name
             image_id = ds_name + ' : ' + str(im['id'])
             if 'seq_id' in im:
                 sequence_id = ds_name + ' : ' + str(im['seq_id'])
             else:
                 sequence_id = ds_name + ' : ' + 'unknown'
             if 'frame_num' in im:
                 frame_num = im['frame_num']
             else:
                 frame_num = -1
             annotations_this_image = image_id_to_annotations[im['id']]
             categories_this_image = set()
             annotation_level = 'unknown'
             for ann in annotations_this_image:
                 assert ann['image_id'] == im['id']
                 categories_this_image.add(category_id_to_name[ann['category_id']])
@@ -275,35 +276,35 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
                             'Unexpected annotation level'
                 elif expected_annotation_level is not None:
                     annotation_level = expected_annotation_level
             if len(categories_this_image) == 0:
                 unannotated_images.append(im)
                 continue
             # category_name = list(categories_this_image)[0]
             for category_name in categories_this_image:
                 ds_label = ds_name + ':' + category_name.lower()
                 if ds_label not in ds_label_to_taxonomy:
                     assert ds_label in known_unmapped_labels
                     # Only print a warning the first time we see an unmapped label
                     if ds_label not in missing_annotations:
                         print('Warning: {} not in taxonomy file'.format(ds_label))
                     missing_annotations.add(ds_label)
                     continue
                 taxonomy_labels = ds_label_to_taxonomy[ds_label]
                 """
-                header =
+                header =
                     ['dataset_name','url','image_id','sequence_id','location_id',
                      'frame_num','original_label','scientific_name','common_name',
                      'datetime','annotation_level']
                 """
                 row = []
                 row.append(ds_name)
                 row.append(url_gcp)
@@ -314,37 +315,37 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
                 row.append(location_id)
                 row.append(frame_num)
                 row.append(taxonomy_labels['query'])
-                row.append(clearnan(taxonomy_labels['scientific_name']))
-                row.append(clearnan(taxonomy_labels['common_name']))
+                row.append(_clearnan(taxonomy_labels['scientific_name']))
+                row.append(_clearnan(taxonomy_labels['common_name']))
                 row.append(dt_string)
                 row.append(annotation_level)
                 for s in taxonomy_levels_to_include:
-                    row.append(clearnan(taxonomy_labels[s]))
+                    row.append(_clearnan(taxonomy_labels[s]))
                 assert len(row) == len(header)
                 csv_writer.writerow(row)
             # ...for each category that was applied at least once to this image
         # ...for each image in this dataset
         if not found_date:
             pass
             # print('Warning: no date information available for this dataset')
         if not found_location:
             pass
             # print('Warning: no location information available for this dataset')
         if not found_annotation_level and (ds_name not in ds_name_to_annotation_level):
             print('Warning: no annotation level information available for this dataset')
         if len(unannotated_images) > 0:
             print('Warning: {} of {} images are un-annotated\n'.\
                   format(len(unannotated_images),len(images)))
     # ...for each dataset
 # ...with open()
@@ -364,7 +365,7 @@ print('Read {} rows from {}'.format(len(df),output_file))
 tqdm.pandas()
-def isint(v):
+def _isint(v):
     return isinstance(v,int) or isinstance(v,np.int64)
 valid_annotation_levels = set(['sequence','image','unknown'])
@@ -373,8 +374,8 @@ valid_annotation_levels = set(['sequence','image','unknown'])
 # in the next cell to look for datasets that only have a single location
 dataset_name_to_locations = defaultdict(set)
-def check_row(row):
+def _check_row(row):
     assert row['dataset_name'] in metadata_table.keys()
     for url_column in ['url_gcp','url_aws','url_azure']:
         assert row[url_column].startswith('https://') or row[url_column].startswith('http://')
@@ -387,21 +388,21 @@ def check_row(row):
         assert np.isnan(row['frame_num'])
     else:
         # -1 is sometimes used for sequences of unknown length
-        assert isint(row['frame_num']) and row['frame_num'] >= -1
+        assert _isint(row['frame_num']) and row['frame_num'] >= -1
     ds_name = row['dataset_name']
     dataset_name_to_locations[ds_name].add(row['location_id'])
 # Faster, but more annoying to debug
 if True:
-    df.progress_apply(check_row, axis=1)
+    df.progress_apply(_check_row, axis=1)
 else:
     # i_row = 0; row = df.iloc[i_row]
     for i_row,row in tqdm(df.iterrows(),total=len(df)):
-        check_row(row)
+        _check_row(row)
 #%% Check for datasets that have only one location string (typically "unknown")
@@ -428,19 +429,19 @@ images_to_download = []
 # ds_name = list(metadata_table.keys())[2]
 for ds_name in metadata_table.keys():
     if 'bbox' in ds_name:
         continue
     # Find all rows for this dataset
     ds_rows = df.loc[df['dataset_name'] == ds_name]
     print('{} rows available for {}'.format(len(ds_rows),ds_name))
     assert len(ds_rows) > 0
     empty_rows = ds_rows[ds_rows['scientific_name'].isnull()]
     non_empty_rows = ds_rows[~ds_rows['scientific_name'].isnull()]
     if len(empty_rows) == 0:
         print('No empty images available for {}'.format(ds_name))
     elif len(empty_rows) > n_empty_images_per_dataset:
@@ -452,7 +453,7 @@ for ds_name in metadata_table.keys():
     elif len(non_empty_rows) > n_non_empty_images_per_dataset:
         non_empty_rows = non_empty_rows.sample(n=n_non_empty_images_per_dataset)
     images_to_download.extend(non_empty_rows.to_dict('records'))
  # ...for each dataset
 print('Selected {} total images'.format(len(images_to_download)))
@@ -468,7 +469,7 @@ url_to_target_file = {}
 # i_image = 10; image = images_to_download[i_image]
 for i_image,image in tqdm(enumerate(images_to_download),total=len(images_to_download)):
     url = image['url_' + preferred_cloud]
     ext = os.path.splitext(url)[1]
     fn_relative = 'image_{}'.format(str(i_image).zfill(4)) + ext
@@ -476,11 +477,10 @@ for i_image,image in tqdm(enumerate(images_to_download),total=len(images_to_down
     image['relative_file'] = fn_relative
     image['url'] = url
     url_to_target_file[url] = fn_abs
 #%% Download images (execution)
-from megadetector.utils.url_utils import parallel_download_urls
 download_results = parallel_download_urls(url_to_target_file,verbose=False,overwrite=True,
                                           n_workers=20,pool_type='thread')
@@ -493,10 +493,10 @@ html_images = []
 # im = images_to_download[0]
 for im in images_to_download:
     if im['relative_file'] is None:
         continue
     output_im = {}
     output_im['filename'] = im['relative_file']
     output_im['linkTarget'] = im['url']
@@ -504,7 +504,7 @@ for im in images_to_download:
     output_im['imageStyle'] = 'width:600px;'
     output_im['textStyle'] = 'font-weight:normal;font-size:100%;'
     html_images.append(output_im)
 write_html_image_list.write_html_image_list(html_filename,html_images)
 open_file(html_filename)

megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 5.0.29py3-none-any.whl