megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.10.dist-info/RECORD +0 -224
  214. megadetector-5.0.10.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,106 +0,0 @@
1
- """
2
-
3
- create_links_to_md_results_files.py
4
-
5
- One-off script to populate the columns in the camera trap data .csv file that point to MD results.
6
-
7
- """
8
-
9
- #%% Imports and constants
10
-
11
- import os
12
-
13
- import pandas as pd
14
-
15
- input_csv_file = r'g:\temp\lila_camera_trap_datasets_no_md_results.csv'
16
- output_csv_file = r'g:\temp\lila_camera_trap_datasets.csv'
17
-
18
- md_results_local_folder = r'g:\temp\lila-md-results'
19
- md_base_url = 'https://lila.science/public/lila-md-results/'
20
- assert md_base_url.endswith('/')
21
-
22
- # No RDE files for datasets with no location information
23
- datasets_without_location_info = ('ena24','missouri-camera-traps')
24
-
25
- md_results_column_names = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw','md_results_with_rde']
26
-
27
- validate_urls = False
28
-
29
-
30
- #%% Read input data
31
-
32
- df = pd.read_csv(input_csv_file)
33
- for s in md_results_column_names:
34
- df[s] = ''
35
-
36
-
37
- #%% Find matching files locally, and create URLs
38
-
39
- local_files = os.listdir(md_results_local_folder)
40
- local_files = [fn for fn in local_files if fn.endswith('.zip')]
41
-
42
- # i_row = 0; row = df.iloc[i_row]
43
- for i_row,row in df.iterrows():
44
-
45
- if not isinstance(row['name'],str):
46
- continue
47
-
48
- dataset_shortname = row['short_name']
49
- matching_files = [fn for fn in local_files if dataset_shortname in fn]
50
-
51
- # No RDE files for datasets with no location information
52
- if dataset_shortname in datasets_without_location_info:
53
- assert len(matching_files) == 2
54
- mdv5a_files = [fn for fn in matching_files if 'mdv5a' in fn]
55
- mdv5b_files = [fn for fn in matching_files if 'mdv5b' in fn]
56
- assert len(mdv5a_files) == 1 and len(mdv5b_files) == 1
57
- df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
58
- df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
59
- else:
60
- # Exclude single-season files for snapshot-serengeti
61
- if dataset_shortname == 'snapshot-serengeti':
62
- matching_files = [fn for fn in matching_files if '_S' not in fn]
63
- assert len(matching_files) == 2
64
- assert all(['mdv4' in fn for fn in matching_files])
65
- rde_files = [fn for fn in matching_files if 'rde' in fn]
66
- raw_files = [fn for fn in matching_files if 'rde' not in fn]
67
- assert len(rde_files) == 1 and len(raw_files) == 1
68
- df.loc[i_row,'mdv4_results_raw'] = md_base_url + raw_files[0]
69
- df.loc[i_row,'md_results_with_rde'] = md_base_url + rde_files[0]
70
- else:
71
- assert len(matching_files) == 3
72
- mdv5a_files = [fn for fn in matching_files if 'mdv5a' in fn and 'rde' not in fn]
73
- mdv5b_files = [fn for fn in matching_files if 'mdv5b' in fn and 'rde' not in fn]
74
- rde_files = [fn for fn in matching_files if 'rde' in fn]
75
- assert len(mdv5a_files) == 1 and len(mdv5b_files) == 1 and len(rde_files) == 1
76
- df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
77
- df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
78
- df.loc[i_row,'md_results_with_rde'] = md_base_url + rde_files[0]
79
-
80
- print('Found {} matching files for {}'.format(len(matching_files),dataset_shortname))
81
-
82
- # ...for each row
83
-
84
-
85
- #%% Validate URLs
86
-
87
- if validate_urls:
88
-
89
- from md_utils.url_utils import test_urls
90
-
91
- urls = set()
92
-
93
- for i_row,row in df.iterrows():
94
- for column_name in md_results_column_names:
95
- if len(row[column_name]) > 0:
96
- assert row[column_name] not in urls
97
- urls.add(row[column_name])
98
-
99
- test_urls(urls,error_on_failure=True)
100
-
101
- print('Validated {} URLs'.format(len(urls)))
102
-
103
-
104
- #%% Write new .csv file
105
-
106
- df.to_csv(output_csv_file,header=True,index=False)
@@ -1,177 +0,0 @@
1
- """
2
-
3
- download_lila_subset.py
4
-
5
- Example of how to download a list of files from LILA, e.g. all the files
6
- in a data set corresponding to a particular species.
7
-
8
- """
9
-
10
- #%% Constants and imports
11
-
12
- import os
13
- import random
14
-
15
- from tqdm import tqdm
16
- from collections import defaultdict
17
-
18
- from data_management.lila.lila_common import read_lila_all_images_file, is_empty, lila_base_urls
19
-
20
- for s in lila_base_urls.values():
21
- assert s.endswith('/')
22
-
23
- # If any of these strings appear in the common name of a species, we'll download that image
24
- species_of_interest = ['grey fox','gray fox','cape fox','red fox','kit fox']
25
-
26
- # We'll write images, metadata downloads, and temporary files here
27
- lila_local_base = os.path.expanduser('~/lila')
28
-
29
- metadata_dir = os.path.join(lila_local_base,'metadata')
30
- os.makedirs(metadata_dir,exist_ok=True)
31
-
32
- output_dir = os.path.join(lila_local_base,'lila_downloads_by_dataset')
33
- os.makedirs(output_dir,exist_ok=True)
34
-
35
- # Number of concurrent download threads
36
- n_download_threads = 20
37
-
38
- max_images_per_dataset = 10 # None
39
-
40
- preferred_provider = 'gcp' # 'azure', 'gcp', 'aws'
41
-
42
- random.seed(0)
43
-
44
-
45
- #%% Download and open the giant table of image URLs and labels
46
-
47
- # Takes ~60 seconds to download, unzip, and open
48
- df = read_lila_all_images_file(metadata_dir)
49
-
50
-
51
- #%% Find all the images we want to download
52
-
53
- # Takes ~2 minutes
54
-
55
- common_name_to_count = defaultdict(int)
56
-
57
- ds_name_to_urls = defaultdict(list)
58
-
59
- def find_items(row):
60
-
61
- if is_empty(row['common_name']):
62
- return
63
-
64
- match = False
65
-
66
- # This is the only bit of this file that's specific to a particular query. In this case
67
- # we're checking whether each row is on a list of species of interest, but you do you.
68
- for species_name in species_of_interest:
69
- if species_name in row['common_name']:
70
- match = True
71
- common_name_to_count[species_name] += 1
72
- break
73
-
74
- if match:
75
- ds_name_to_urls[row['dataset_name']].append(row['url_' + preferred_provider])
76
-
77
- tqdm.pandas()
78
- _ = df.progress_apply(find_items,axis=1)
79
-
80
- # We have a list of URLs for each dataset, flatten them all into a list of URLs
81
- all_urls = list(ds_name_to_urls.values())
82
- all_urls = [item for sublist in all_urls for item in sublist]
83
- print('Found {} matching URLs across {} datasets'.format(len(all_urls),len(ds_name_to_urls)))
84
-
85
- for common_name in common_name_to_count:
86
- print('{}: {}'.format(common_name,common_name_to_count[common_name]))
87
-
88
- from copy import deepcopy
89
- ds_name_to_urls_raw = deepcopy(ds_name_to_urls)
90
-
91
-
92
- #%% Optionally trim to a fixed number of URLs per dataset
93
-
94
- if max_images_per_dataset is None:
95
- pass
96
- else:
97
- # ds_name = next(iter(ds_name_to_urls.keys()))
98
- for ds_name in ds_name_to_urls:
99
- if len(ds_name_to_urls[ds_name]) > max_images_per_dataset:
100
- ds_name_to_urls[ds_name] = random.sample(ds_name_to_urls[ds_name],max_images_per_dataset)
101
-
102
-
103
- #%% Choose target files for each URL
104
-
105
- from data_management.lila.lila_common import lila_base_urls
106
-
107
- # We have a list of URLs per dataset, flatten that into a single list of URLs
108
- urls_to_download = set()
109
- for ds_name in ds_name_to_urls:
110
- for url in ds_name_to_urls[ds_name]:
111
- urls_to_download.add(url)
112
- urls_to_download = sorted(list(urls_to_download))
113
-
114
- # A URL might look like this:
115
- #
116
- # https://storage.googleapis.com/public-datasets-lila/wcs-unzipped/animals/0667/0302.jpg
117
- #
118
- # We'll write that to an output file that looks like this (relative to output_dir):
119
- #
120
- # wcs-unzipped/animals/0667/0302.jpg
121
- #
122
- # ...so we need to remove the base URL to get the target file.
123
- base_url = lila_base_urls[preferred_provider]
124
- assert base_url.endswith('/')
125
-
126
- url_to_target_file = {}
127
-
128
- for url in urls_to_download:
129
- assert url.startswith(base_url)
130
- target_fn_relative = url.replace(base_url,'')
131
- target_fn_abs = os.path.join(output_dir,target_fn_relative)
132
- url_to_target_file[url] = target_fn_abs
133
-
134
-
135
- #%% Download image files
136
-
137
- from md_utils.url_utils import parallel_download_urls
138
-
139
- download_results = parallel_download_urls(url_to_target_file=url_to_target_file,
140
- verbose=False,
141
- overwrite=False,
142
- n_workers=n_download_threads,
143
- pool_type='thread')
144
-
145
-
146
- #%% Scrap
147
-
148
- if False:
149
-
150
- pass
151
-
152
- #%% Find all the reptiles on LILA
153
-
154
- reptile_rows = df.loc[df['class'] == 'reptilia']
155
-
156
- # i_row = 0; row = reptile_rows.iloc[i_row]
157
-
158
- common_name_to_count = defaultdict(int)
159
- dataset_to_count = defaultdict(int)
160
- for i_row,row in reptile_rows.iterrows():
161
- common_name_to_count[row['common_name']] += 1
162
- dataset_to_count[row['dataset_name']] += 1
163
-
164
- from md_utils.ct_utils import sort_dictionary_by_value
165
-
166
- print('Found {} reptiles\n'.format(len(reptile_rows)))
167
-
168
- common_name_to_count = sort_dictionary_by_value(common_name_to_count,reverse=True)
169
- dataset_to_count = sort_dictionary_by_value(dataset_to_count,reverse=True)
170
-
171
- print('Common names by count:\n')
172
- for k in common_name_to_count:
173
- print('{} ({})'.format(k,common_name_to_count[k]))
174
-
175
- print('\nDatasets by count:\n')
176
- for k in dataset_to_count:
177
- print('{} ({})'.format(k,dataset_to_count[k]))