megadetector 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (203) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +97 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +149 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +88 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +263 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +607 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +237 -0
  58. megadetector/data_management/cct_json_utils.py +404 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +283 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +493 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +793 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +870 -0
  129. megadetector/data_management/read_exif.py +809 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/rename_images.py +187 -0
  133. megadetector/data_management/resize_coco_dataset.py +189 -0
  134. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  135. megadetector/data_management/yolo_output_to_md_output.py +446 -0
  136. megadetector/data_management/yolo_to_coco.py +676 -0
  137. megadetector/detection/__init__.py +0 -0
  138. megadetector/detection/detector_training/__init__.py +0 -0
  139. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  140. megadetector/detection/process_video.py +846 -0
  141. megadetector/detection/pytorch_detector.py +355 -0
  142. megadetector/detection/run_detector.py +779 -0
  143. megadetector/detection/run_detector_batch.py +1219 -0
  144. megadetector/detection/run_inference_with_yolov5_val.py +1087 -0
  145. megadetector/detection/run_tiled_inference.py +934 -0
  146. megadetector/detection/tf_detector.py +192 -0
  147. megadetector/detection/video_utils.py +698 -0
  148. megadetector/postprocessing/__init__.py +0 -0
  149. megadetector/postprocessing/add_max_conf.py +64 -0
  150. megadetector/postprocessing/categorize_detections_by_size.py +165 -0
  151. megadetector/postprocessing/classification_postprocessing.py +716 -0
  152. megadetector/postprocessing/combine_api_outputs.py +249 -0
  153. megadetector/postprocessing/compare_batch_results.py +966 -0
  154. megadetector/postprocessing/convert_output_format.py +396 -0
  155. megadetector/postprocessing/load_api_results.py +195 -0
  156. megadetector/postprocessing/md_to_coco.py +310 -0
  157. megadetector/postprocessing/md_to_labelme.py +330 -0
  158. megadetector/postprocessing/merge_detections.py +412 -0
  159. megadetector/postprocessing/postprocess_batch_results.py +1908 -0
  160. megadetector/postprocessing/remap_detection_categories.py +170 -0
  161. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  162. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  163. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  164. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1635 -0
  165. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  166. megadetector/postprocessing/subset_json_detector_output.py +700 -0
  167. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  168. megadetector/taxonomy_mapping/__init__.py +0 -0
  169. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  170. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  171. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  172. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +588 -0
  173. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  174. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  175. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  176. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  177. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  178. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  179. megadetector/utils/__init__.py +0 -0
  180. megadetector/utils/azure_utils.py +178 -0
  181. megadetector/utils/ct_utils.py +613 -0
  182. megadetector/utils/directory_listing.py +246 -0
  183. megadetector/utils/md_tests.py +1164 -0
  184. megadetector/utils/path_utils.py +1045 -0
  185. megadetector/utils/process_utils.py +160 -0
  186. megadetector/utils/sas_blob_utils.py +509 -0
  187. megadetector/utils/split_locations_into_train_val.py +228 -0
  188. megadetector/utils/string_utils.py +92 -0
  189. megadetector/utils/url_utils.py +323 -0
  190. megadetector/utils/write_html_image_list.py +225 -0
  191. megadetector/visualization/__init__.py +0 -0
  192. megadetector/visualization/plot_utils.py +293 -0
  193. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  194. megadetector/visualization/visualization_utils.py +1536 -0
  195. megadetector/visualization/visualize_db.py +552 -0
  196. megadetector/visualization/visualize_detector_output.py +405 -0
  197. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/LICENSE +0 -0
  198. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/METADATA +2 -2
  199. megadetector-5.0.13.dist-info/RECORD +201 -0
  200. megadetector-5.0.13.dist-info/top_level.txt +1 -0
  201. megadetector-5.0.11.dist-info/RECORD +0 -5
  202. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  203. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,106 @@
1
+ """
2
+
3
+ create_links_to_md_results_files.py
4
+
5
+ One-off script to populate the columns in the camera trap data .csv file that point to MD results.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import os
12
+
13
+ import pandas as pd
14
+
15
+ input_csv_file = r'g:\temp\lila_camera_trap_datasets_no_md_results.csv'
16
+ output_csv_file = r'g:\temp\lila_camera_trap_datasets.csv'
17
+
18
+ md_results_local_folder = r'g:\temp\lila-md-results'
19
+ md_base_url = 'https://lila.science/public/lila-md-results/'
20
+ assert md_base_url.endswith('/')
21
+
22
+ # No RDE files for datasets with no location information
23
+ datasets_without_location_info = ('ena24','missouri-camera-traps')
24
+
25
+ md_results_column_names = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw','md_results_with_rde']
26
+
27
+ validate_urls = False
28
+
29
+
30
+ #%% Read input data
31
+
32
+ df = pd.read_csv(input_csv_file)
33
+ for s in md_results_column_names:
34
+ df[s] = ''
35
+
36
+
37
+ #%% Find matching files locally, and create URLs
38
+
39
+ local_files = os.listdir(md_results_local_folder)
40
+ local_files = [fn for fn in local_files if fn.endswith('.zip')]
41
+
42
+ # i_row = 0; row = df.iloc[i_row]
43
+ for i_row,row in df.iterrows():
44
+
45
+ if not isinstance(row['name'],str):
46
+ continue
47
+
48
+ dataset_shortname = row['short_name']
49
+ matching_files = [fn for fn in local_files if dataset_shortname in fn]
50
+
51
+ # No RDE files for datasets with no location information
52
+ if dataset_shortname in datasets_without_location_info:
53
+ assert len(matching_files) == 2
54
+ mdv5a_files = [fn for fn in matching_files if 'mdv5a' in fn]
55
+ mdv5b_files = [fn for fn in matching_files if 'mdv5b' in fn]
56
+ assert len(mdv5a_files) == 1 and len(mdv5b_files) == 1
57
+ df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
58
+ df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
59
+ else:
60
+ # Exclude single-season files for snapshot-serengeti
61
+ if dataset_shortname == 'snapshot-serengeti':
62
+ matching_files = [fn for fn in matching_files if '_S' not in fn]
63
+ assert len(matching_files) == 2
64
+ assert all(['mdv4' in fn for fn in matching_files])
65
+ rde_files = [fn for fn in matching_files if 'rde' in fn]
66
+ raw_files = [fn for fn in matching_files if 'rde' not in fn]
67
+ assert len(rde_files) == 1 and len(raw_files) == 1
68
+ df.loc[i_row,'mdv4_results_raw'] = md_base_url + raw_files[0]
69
+ df.loc[i_row,'md_results_with_rde'] = md_base_url + rde_files[0]
70
+ else:
71
+ assert len(matching_files) == 3
72
+ mdv5a_files = [fn for fn in matching_files if 'mdv5a' in fn and 'rde' not in fn]
73
+ mdv5b_files = [fn for fn in matching_files if 'mdv5b' in fn and 'rde' not in fn]
74
+ rde_files = [fn for fn in matching_files if 'rde' in fn]
75
+ assert len(mdv5a_files) == 1 and len(mdv5b_files) == 1 and len(rde_files) == 1
76
+ df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
77
+ df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
78
+ df.loc[i_row,'md_results_with_rde'] = md_base_url + rde_files[0]
79
+
80
+ print('Found {} matching files for {}'.format(len(matching_files),dataset_shortname))
81
+
82
+ # ...for each row
83
+
84
+
85
+ #%% Validate URLs
86
+
87
+ if validate_urls:
88
+
89
+ from megadetector.utils.url_utils import test_urls
90
+
91
+ urls = set()
92
+
93
+ for i_row,row in df.iterrows():
94
+ for column_name in md_results_column_names:
95
+ if len(row[column_name]) > 0:
96
+ assert row[column_name] not in urls
97
+ urls.add(row[column_name])
98
+
99
+ test_urls(urls,error_on_failure=True)
100
+
101
+ print('Validated {} URLs'.format(len(urls)))
102
+
103
+
104
+ #%% Write new .csv file
105
+
106
+ df.to_csv(output_csv_file,header=True,index=False)
@@ -0,0 +1,178 @@
1
+ """
2
+
3
+ download_lila_subset.py
4
+
5
+ Example of how to download a list of files from LILA, e.g. all the files
6
+ in a data set corresponding to a particular species.
7
+
8
+ """
9
+
10
+ #%% Constants and imports
11
+
12
+ import os
13
+ import random
14
+
15
+ from tqdm import tqdm
16
+ from collections import defaultdict
17
+
18
+ from megadetector.data_management.lila.lila_common import \
19
+ read_lila_all_images_file, is_empty, lila_base_urls
20
+
21
+ for s in lila_base_urls.values():
22
+ assert s.endswith('/')
23
+
24
+ # If any of these strings appear in the common name of a species, we'll download that image
25
+ species_of_interest = ['grey fox','gray fox','cape fox','red fox','kit fox']
26
+
27
+ # We'll write images, metadata downloads, and temporary files here
28
+ lila_local_base = os.path.expanduser('~/lila')
29
+
30
+ metadata_dir = os.path.join(lila_local_base,'metadata')
31
+ os.makedirs(metadata_dir,exist_ok=True)
32
+
33
+ output_dir = os.path.join(lila_local_base,'lila_downloads_by_dataset')
34
+ os.makedirs(output_dir,exist_ok=True)
35
+
36
+ # Number of concurrent download threads
37
+ n_download_threads = 20
38
+
39
+ max_images_per_dataset = 10 # None
40
+
41
+ preferred_provider = 'gcp' # 'azure', 'gcp', 'aws'
42
+
43
+ random.seed(0)
44
+
45
+
46
+ #%% Download and open the giant table of image URLs and labels
47
+
48
+ # Takes ~60 seconds to download, unzip, and open
49
+ df = read_lila_all_images_file(metadata_dir)
50
+
51
+
52
+ #%% Find all the images we want to download
53
+
54
+ # Takes ~2 minutes
55
+
56
+ common_name_to_count = defaultdict(int)
57
+
58
+ ds_name_to_urls = defaultdict(list)
59
+
60
+ def find_items(row):
61
+
62
+ if is_empty(row['common_name']):
63
+ return
64
+
65
+ match = False
66
+
67
+ # This is the only bit of this file that's specific to a particular query. In this case
68
+ # we're checking whether each row is on a list of species of interest, but you do you.
69
+ for species_name in species_of_interest:
70
+ if species_name in row['common_name']:
71
+ match = True
72
+ common_name_to_count[species_name] += 1
73
+ break
74
+
75
+ if match:
76
+ ds_name_to_urls[row['dataset_name']].append(row['url_' + preferred_provider])
77
+
78
+ tqdm.pandas()
79
+ _ = df.progress_apply(find_items,axis=1)
80
+
81
+ # We have a list of URLs for each dataset, flatten them all into a list of URLs
82
+ all_urls = list(ds_name_to_urls.values())
83
+ all_urls = [item for sublist in all_urls for item in sublist]
84
+ print('Found {} matching URLs across {} datasets'.format(len(all_urls),len(ds_name_to_urls)))
85
+
86
+ for common_name in common_name_to_count:
87
+ print('{}: {}'.format(common_name,common_name_to_count[common_name]))
88
+
89
+ from copy import deepcopy
90
+ ds_name_to_urls_raw = deepcopy(ds_name_to_urls)
91
+
92
+
93
+ #%% Optionally trim to a fixed number of URLs per dataset
94
+
95
+ if max_images_per_dataset is None:
96
+ pass
97
+ else:
98
+ # ds_name = next(iter(ds_name_to_urls.keys()))
99
+ for ds_name in ds_name_to_urls:
100
+ if len(ds_name_to_urls[ds_name]) > max_images_per_dataset:
101
+ ds_name_to_urls[ds_name] = random.sample(ds_name_to_urls[ds_name],max_images_per_dataset)
102
+
103
+
104
+ #%% Choose target files for each URL
105
+
106
+ from megadetector.data_management.lila.lila_common import lila_base_urls
107
+
108
+ # We have a list of URLs per dataset, flatten that into a single list of URLs
109
+ urls_to_download = set()
110
+ for ds_name in ds_name_to_urls:
111
+ for url in ds_name_to_urls[ds_name]:
112
+ urls_to_download.add(url)
113
+ urls_to_download = sorted(list(urls_to_download))
114
+
115
+ # A URL might look like this:
116
+ #
117
+ # https://storage.googleapis.com/public-datasets-lila/wcs-unzipped/animals/0667/0302.jpg
118
+ #
119
+ # We'll write that to an output file that looks like this (relative to output_dir):
120
+ #
121
+ # wcs-unzipped/animals/0667/0302.jpg
122
+ #
123
+ # ...so we need to remove the base URL to get the target file.
124
+ base_url = lila_base_urls[preferred_provider]
125
+ assert base_url.endswith('/')
126
+
127
+ url_to_target_file = {}
128
+
129
+ for url in urls_to_download:
130
+ assert url.startswith(base_url)
131
+ target_fn_relative = url.replace(base_url,'')
132
+ target_fn_abs = os.path.join(output_dir,target_fn_relative)
133
+ url_to_target_file[url] = target_fn_abs
134
+
135
+
136
+ #%% Download image files
137
+
138
+ from megadetector.utils.url_utils import parallel_download_urls
139
+
140
+ download_results = parallel_download_urls(url_to_target_file=url_to_target_file,
141
+ verbose=False,
142
+ overwrite=False,
143
+ n_workers=n_download_threads,
144
+ pool_type='thread')
145
+
146
+
147
+ #%% Scrap
148
+
149
+ if False:
150
+
151
+ pass
152
+
153
+ #%% Find all the reptiles on LILA
154
+
155
+ reptile_rows = df.loc[df['class'] == 'reptilia']
156
+
157
+ # i_row = 0; row = reptile_rows.iloc[i_row]
158
+
159
+ common_name_to_count = defaultdict(int)
160
+ dataset_to_count = defaultdict(int)
161
+ for i_row,row in reptile_rows.iterrows():
162
+ common_name_to_count[row['common_name']] += 1
163
+ dataset_to_count[row['dataset_name']] += 1
164
+
165
+ from megadetector.utils.ct_utils import sort_dictionary_by_value
166
+
167
+ print('Found {} reptiles\n'.format(len(reptile_rows)))
168
+
169
+ common_name_to_count = sort_dictionary_by_value(common_name_to_count,reverse=True)
170
+ dataset_to_count = sort_dictionary_by_value(dataset_to_count,reverse=True)
171
+
172
+ print('Common names by count:\n')
173
+ for k in common_name_to_count:
174
+ print('{} ({})'.format(k,common_name_to_count[k]))
175
+
176
+ print('\nDatasets by count:\n')
177
+ for k in dataset_to_count:
178
+ print('{} ({})'.format(k,dataset_to_count[k]))