megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +297 -202
  2. api/batch_processing/data_preparation/manage_video_batch.py +7 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
  5. api/batch_processing/postprocessing/compare_batch_results.py +111 -61
  6. api/batch_processing/postprocessing/convert_output_format.py +24 -6
  7. api/batch_processing/postprocessing/load_api_results.py +56 -72
  8. api/batch_processing/postprocessing/md_to_labelme.py +119 -51
  9. api/batch_processing/postprocessing/merge_detections.py +30 -5
  10. api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
  11. api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
  12. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
  13. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  14. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  15. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
  16. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  17. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  18. classification/prepare_classification_script.py +191 -191
  19. data_management/cct_json_utils.py +7 -2
  20. data_management/coco_to_labelme.py +263 -0
  21. data_management/coco_to_yolo.py +72 -48
  22. data_management/databases/integrity_check_json_db.py +75 -64
  23. data_management/databases/subset_json_db.py +1 -1
  24. data_management/generate_crops_from_cct.py +1 -1
  25. data_management/get_image_sizes.py +44 -26
  26. data_management/importers/animl_results_to_md_results.py +3 -5
  27. data_management/importers/noaa_seals_2019.py +2 -2
  28. data_management/importers/zamba_results_to_md_results.py +2 -2
  29. data_management/labelme_to_coco.py +264 -127
  30. data_management/labelme_to_yolo.py +96 -53
  31. data_management/lila/create_lila_blank_set.py +557 -0
  32. data_management/lila/create_lila_test_set.py +2 -1
  33. data_management/lila/create_links_to_md_results_files.py +1 -1
  34. data_management/lila/download_lila_subset.py +138 -45
  35. data_management/lila/generate_lila_per_image_labels.py +23 -14
  36. data_management/lila/get_lila_annotation_counts.py +16 -10
  37. data_management/lila/lila_common.py +15 -42
  38. data_management/lila/test_lila_metadata_urls.py +116 -0
  39. data_management/read_exif.py +65 -16
  40. data_management/remap_coco_categories.py +84 -0
  41. data_management/resize_coco_dataset.py +14 -31
  42. data_management/wi_download_csv_to_coco.py +239 -0
  43. data_management/yolo_output_to_md_output.py +40 -13
  44. data_management/yolo_to_coco.py +313 -100
  45. detection/process_video.py +36 -14
  46. detection/pytorch_detector.py +1 -1
  47. detection/run_detector.py +73 -18
  48. detection/run_detector_batch.py +116 -27
  49. detection/run_inference_with_yolov5_val.py +135 -27
  50. detection/run_tiled_inference.py +153 -43
  51. detection/tf_detector.py +2 -1
  52. detection/video_utils.py +4 -2
  53. md_utils/ct_utils.py +101 -6
  54. md_utils/md_tests.py +264 -17
  55. md_utils/path_utils.py +326 -47
  56. md_utils/process_utils.py +26 -7
  57. md_utils/split_locations_into_train_val.py +215 -0
  58. md_utils/string_utils.py +10 -0
  59. md_utils/url_utils.py +66 -3
  60. md_utils/write_html_image_list.py +12 -2
  61. md_visualization/visualization_utils.py +380 -74
  62. md_visualization/visualize_db.py +41 -10
  63. md_visualization/visualize_detector_output.py +185 -104
  64. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
  65. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
  66. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
  67. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  68. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  69. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  70. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  71. taxonomy_mapping/species_lookup.py +33 -13
  72. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  73. md_visualization/visualize_megadb.py +0 -183
  74. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
  75. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
@@ -8,29 +8,31 @@ api/batch_processing/api_core/server_utils.py,sha256=oFusP1E29op5DN1nEaR-jQZgREx
8
8
  api/batch_processing/api_core/batch_service/score.py,sha256=ZuPQV7O1u9QNPhWVSYxQqvYgXo9p15e-XhnUyuz0vLE,17347
9
9
  api/batch_processing/api_core_support/aggregate_results_manually.py,sha256=8yDXbw12G8Y6SOv09tY-0hPXMNG_iRPv6mzxBiccsaU,2275
10
10
  api/batch_processing/api_support/summarize_daily_activity.py,sha256=SmRGAMWTKXf9bDXUPsTySMiIg8K1LDkAC8KVBVH_mPg,5383
11
- api/batch_processing/data_preparation/manage_local_batch.py,sha256=nNpQWPkdYP5Z122fg8otvr7bHJFkVIoQANh6f09yTlw,85371
12
- api/batch_processing/data_preparation/manage_video_batch.py,sha256=ISpM0bO5RJ_byWhuJqddGZuaE_rxntidhxJz6teFH5E,9592
11
+ api/batch_processing/data_preparation/manage_local_batch.py,sha256=z1smCyfw1Pet1AuXALPhGhpt53ClM4qW1OypMre31ac,89186
12
+ api/batch_processing/data_preparation/manage_video_batch.py,sha256=fobPIMmfvdqa1OzxsurEYCFVnUTHGrtrGBiCq3xnYHs,9668
13
13
  api/batch_processing/integration/digiKam/setup.py,sha256=7P1X3JYrBDXmLUeLRrzxNfDkL5lo-pY8nXsp9Cz8rOI,203
14
14
  api/batch_processing/integration/digiKam/xmp_integration.py,sha256=AbGPTe9RjjOkKdiZDSElai61QyfeiLQQqJR2fiJpymA,17775
15
15
  api/batch_processing/integration/eMammal/test_scripts/config_template.py,sha256=UnvrgaFRBu59MuVUJa2WpG8ebcOJWcNeZEx6GWuYLzc,73
16
16
  api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py,sha256=eIzEKiwzCfifCOCGf-jf8G4dMuzyxQMWlrFzt-Z-nVk,3608
17
17
  api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py,sha256=OYMu97p8vprSv03QcnS6aSxPBocn9sgaozfUqq_JpyM,1369
18
- api/batch_processing/postprocessing/add_max_conf.py,sha256=pqD-I-e0vqtf49Jm-SNhh9u7jhnVlyi4Lf5prNksldU,1529
18
+ api/batch_processing/postprocessing/add_max_conf.py,sha256=y4Xr_OHRxcop3vsLWQJ56eYIemm2HHKqVfvKJonTcQA,1530
19
19
  api/batch_processing/postprocessing/categorize_detections_by_size.py,sha256=b_O2OM44zIXewR4RjzeS2ue-32k5jE6KgjiPn8JRxAA,4877
20
- api/batch_processing/postprocessing/combine_api_outputs.py,sha256=WsOV4EsK9JUCMka1_-u-vinmWc6Ko8B0PD5fmwoXHh0,8233
21
- api/batch_processing/postprocessing/compare_batch_results.py,sha256=puzEA02yrCjtfjdhur0RSnqyIjG09hvxJQdEMS-srgU,32705
22
- api/batch_processing/postprocessing/convert_output_format.py,sha256=aLboVVZdlGUNYOZhme0w8LtYrd04i15oK0apOgZaYWk,12947
23
- api/batch_processing/postprocessing/load_api_results.py,sha256=VcqVjNzDjLFQCCn-gtFEHR19rulm7stLiXsWcz_XlUg,7416
20
+ api/batch_processing/postprocessing/combine_api_outputs.py,sha256=7NR_QbozC6xLTwXpUrw7c1WIL2guAHiHIg4hj1XTbeU,8237
21
+ api/batch_processing/postprocessing/compare_batch_results.py,sha256=8oJGwQ3ykNSBjrQNleuEhl2j_z74YynwnW19r0dmBG0,34652
22
+ api/batch_processing/postprocessing/convert_output_format.py,sha256=b4hEQOaXSyH6GZQfHr_PLSM85r5aPrriwUOdgH1RkOo,13600
23
+ api/batch_processing/postprocessing/load_api_results.py,sha256=aqmsWpqzDIcmompRwzF4oFkV7QPAazFyIGwEWRVd8Ng,6895
24
24
  api/batch_processing/postprocessing/md_to_coco.py,sha256=dRAkCGWtcNy_vsSTkX1h_0DZAsW6zNO7F-8XSkR8wAo,10139
25
- api/batch_processing/postprocessing/md_to_labelme.py,sha256=abtUHJA0WMt3QMpEvFVrYaKsAJhkaSetl51RwP_DMyI,7026
26
- api/batch_processing/postprocessing/merge_detections.py,sha256=B4QnqW9nvcEJpXzAK20TVB0t6L8c7PR5OjPy8FX-5Z8,15930
27
- api/batch_processing/postprocessing/postprocess_batch_results.py,sha256=WqMSWQUpJLt48IytGKLpAcfm9NX92zokL7CIdMsHAtM,68706
25
+ api/batch_processing/postprocessing/md_to_labelme.py,sha256=0evnkCFC7TucHgFJ1i12rp_C1pEflxFD44Jp98AbGrw,9779
26
+ api/batch_processing/postprocessing/merge_detections.py,sha256=O9fjLTZz7PJN19L19BYHVMGr20-gB9M3sIaBFY1_w_8,17192
27
+ api/batch_processing/postprocessing/postprocess_batch_results.py,sha256=4lDpYqpA_wcwMWVP5xKpQH6_JfkBFV8N93j_CjkvPh8,74165
28
+ api/batch_processing/postprocessing/remap_detection_categories.py,sha256=hcX2-Thk59X0df6H6x1rH1v15IwJrm3NQJK94yOt95w,6042
29
+ api/batch_processing/postprocessing/render_detection_confusion_matrix.py,sha256=kIFBgu_JDagm8FhEd3GislSMoBHAz2Fq4ySeYHPKTd0,25047
28
30
  api/batch_processing/postprocessing/separate_detections_into_folders.py,sha256=l5NKxDDxROc2EXt8EslrswXAZkQXgWTy5FSqCqa09Ug,28720
29
- api/batch_processing/postprocessing/subset_json_detector_output.py,sha256=Q4V7RWSjrBCt7VEsakx0c_haHlRdrLSzy0uqD5ELjp4,20809
30
- api/batch_processing/postprocessing/top_folders_to_bottom.py,sha256=crm_UX8jE6JurUHmS2yI-awYn3_MXqvo2xOlFZ_qig8,5475
31
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py,sha256=-XQ5IKY33y4CR1ELj-DOFbvtH51d-CtB6Dc-wiDOXoE,6899
32
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py,sha256=Q0dRNGRO6l83pTbqkfiF0qOFsy2wfbpher3fd23Y_TU,2184
33
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py,sha256=USbK5LKNXu6MTbfznK2su3nV085ivC39-a9AhEKVOWc,54850
31
+ api/batch_processing/postprocessing/subset_json_detector_output.py,sha256=EROwcj4K-abAwzyZjPCQocuayIVma85lV-D6WvvRMuc,26368
32
+ api/batch_processing/postprocessing/top_folders_to_bottom.py,sha256=etJK9DmHppMe3WqGXypuilW-n-7hOjOO9w_k1khlaVU,5476
33
+ api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py,sha256=fYqPZhaL-6cbpKHz96O3Ch65Y8xux2LQ2-ZlMGhOlM0,9053
34
+ api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py,sha256=YdMvM814TX0ZRTnP7BfowE62PoMoCOYcJOFl69DlKhQ,2189
35
+ api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py,sha256=79F6sLwGWOSBNZ5GbZMeuZEH3kfRsjYUTl4ouLhwnA8,62880
34
36
  api/synchronous/api_core/animal_detection_api/api_backend.py,sha256=PJXV0RFb6FoPBmdRug5W_5nbFwY2C_8CvDpFHjDs9w4,4934
35
37
  api/synchronous/api_core/animal_detection_api/api_frontend.py,sha256=_FGLf5C2tXQABFEGaA2Kzq05hj_D60BaIfWLCI-Os_4,10690
36
38
  api/synchronous/api_core/animal_detection_api/config.py,sha256=yEf7JZwRJCtHEV80kYvnNnUFJNds_AYLhomffwfFQi0,1017
@@ -70,7 +72,7 @@ classification/json_to_azcopy_list.py,sha256=o57wLHJPDrP9OPSY-3x81WI3mmcH1DyIOUh
70
72
  classification/json_validator.py,sha256=ZizcEPpW1J26p-oGyfvcffBy2voNRKCNXKF8NtxIt5A,26618
71
73
  classification/map_classification_categories.py,sha256=2B4K-TdE77VNw8XG1h8X7CuUvw0JSIrALdy6a1FvkXw,10738
72
74
  classification/merge_classification_detection_output.py,sha256=2FDTauvkbMZ3putJH837Ux67HTGsCAnGCOXhnnqjt6g,20123
73
- classification/prepare_classification_script.py,sha256=6BRxiEPSIl3nTVrZHE5qqUGDe_AxAEqnUg7sxMZSFMg,6141
75
+ classification/prepare_classification_script.py,sha256=7xwqws57Fkn0rmH5sADlem9trOJkiriSRwPredeIXho,5952
74
76
  classification/prepare_classification_script_mc.py,sha256=IMCsLyGL70cViVTH0eow0sYDM9E81AsBGrctNveXP10,7440
75
77
  classification/run_classifier.py,sha256=eBpkZzP7TtrnwOIlc99fTpe1QocmDuERaIw9mXqwAWI,9363
76
78
  classification/save_mislabeled.py,sha256=WmOKNXgrvvIkUdehiiWHNiKc5M7q0UM2If0vea0_7K8,3466
@@ -81,30 +83,33 @@ classification/efficientnet/__init__.py,sha256=e-jfknjzCc5a0CSW-TaZ2vi0SPU1OMIsa
81
83
  classification/efficientnet/model.py,sha256=qJHWV9-rYKa4E_TIee5N_OjRhqDdM-icPpoMap3Q5HM,17040
82
84
  classification/efficientnet/utils.py,sha256=TF5S2cn2lvlCO2dFhdFPdzjbBBs-SyZGZmEx_dsJMbo,24846
83
85
  data_management/cct_json_to_filename_json.py,sha256=AxYeOzZHkHF1UxHCUBtofXNKVdUK-5xTfZp3_iE7hFc,2416
84
- data_management/cct_json_utils.py,sha256=JX5pJh5QLyKDhcXheUYgbRSd99uB9Ui-EfsRZ_Fhw_I,12570
86
+ data_management/cct_json_utils.py,sha256=tZD2SftV4Vw9dlZQRnOumVoOYIsIkWU_iOAiim8fRMs,12767
85
87
  data_management/cct_to_csv.py,sha256=urIL8IUByhKZ4FLLa9TKlzT6mu8upLzAPN1WNnDZdIY,3859
86
88
  data_management/cct_to_md.py,sha256=0QtqUdUkrema2BSNTeJqHYkDuwOLc7tOQwq1KxTbtPE,4485
87
89
  data_management/cct_to_wi.py,sha256=nJKUhLXcZXKE5tAR9UxbqCjeikfaABfB746wpv-1BmI,8336
88
- data_management/coco_to_yolo.py,sha256=mfjv7ELP8AgrdRRbql_7YRaIQPRtfLWppJXsRnX2aSo,25050
89
- data_management/generate_crops_from_cct.py,sha256=6N4pj51gOVTXOi8REfrR5-X56NlBI_p-MHZd4hxBT_Q,4282
90
- data_management/get_image_sizes.py,sha256=yUCI5zQmA9G_GDaQiApwoafmO37cUi97dw-Ekh5leOE,4220
91
- data_management/labelme_to_coco.py,sha256=E9EdXsHkEBZ6KLZzYNGA3tQMJdwDpzJzJtF5kZvRKWA,12923
92
- data_management/labelme_to_yolo.py,sha256=H0bLty1yLm4Cz7cGuecwxUIaRk72PVokNLZTYB8hkCI,8479
90
+ data_management/coco_to_labelme.py,sha256=YmztZdYHGenPj6lRe-jB1R5J4jG0kX7Ni9SIAGZTK7s,8077
91
+ data_management/coco_to_yolo.py,sha256=IgW51lxyr_j9Y4ZlzIsW8NsgtWgvIkDeNKICVeYDx3s,26042
92
+ data_management/generate_crops_from_cct.py,sha256=m6HJ8bB4N50HYV4SXAUV43k1XJl71QZmmWZ4L-9T45Y,4283
93
+ data_management/get_image_sizes.py,sha256=34o5LTwanc0XfC7XqKnBJiX8mPiEq_JiiqqVMHglQqI,4961
94
+ data_management/labelme_to_coco.py,sha256=yoGr7o-qwogTbvLLJQ6NjCmaYqY6-kHyn2C2860phkQ,18344
95
+ data_management/labelme_to_yolo.py,sha256=7oHS-wV9jXqq1c03agsKnc9-YWSN_BeDjJ4pbX_lDW0,10041
93
96
  data_management/ocr_tools.py,sha256=8cVJMQvzvv_6HXV8zMR4nJH72-L_f3Dy9IjIb3E32Js,29920
94
- data_management/read_exif.py,sha256=HGCp5gvD8MHQ0xGGTX2_PD5JHFTdMtqjZ8ql2pt5Fn0,19036
97
+ data_management/read_exif.py,sha256=9zdMkn4L_Do-4R4yenZw9ffFgb1TEBZuXhiCewVnP1A,21446
98
+ data_management/remap_coco_categories.py,sha256=pQSJKHxYY2p3HwDB1JXLchvNIyPDbg7TXBLZdBzAw_g,3002
95
99
  data_management/remove_exif.py,sha256=_WDrKfRwK0UWCkj4SiLutGCd7-WRaKYoTgLfBWPDhGU,1555
96
- data_management/resize_coco_dataset.py,sha256=whm-XN7btC914duBZBJDfphVujosJKzJdH1XHM5XPDU,7414
97
- data_management/yolo_output_to_md_output.py,sha256=PznmANlgfMNzou7OI1JH0e76j5pS9VcYMxuXau-Ys30,14974
98
- data_management/yolo_to_coco.py,sha256=ybNqFlfasMfxzPx9u3A-GH1TCbMlnO5s2gX2CmyzXoA,7448
100
+ data_management/resize_coco_dataset.py,sha256=3PSrV5T0FzUiHnZFdX7F9jKXLEqZSsNJN0jk-Yf9qh0,6100
101
+ data_management/wi_download_csv_to_coco.py,sha256=cQs7b3j-6QcSTcVKocUK0Nl0sV9jepy3Bqi9wM1dzok,7704
102
+ data_management/yolo_output_to_md_output.py,sha256=vxUdn4bqg0S67ozvxtnlX77X6S7nCvyZbKAyvCh_Suc,16313
103
+ data_management/yolo_to_coco.py,sha256=E0Z0O6O2vyNDks9cC2INJjezstwujjdkz4QZ3PuPMf8,15196
99
104
  data_management/annotations/annotation_constants.py,sha256=P2CZCbAE0ImLLfaNRb1SMlP3q1fULWAIjgrYOrF9L0g,1566
100
105
  data_management/databases/add_width_and_height_to_db.py,sha256=71mOEK3xo9gbxK1TVZzBA9nNi-1ElmBZbIPKrUm9XG0,619
101
106
  data_management/databases/combine_coco_camera_traps_files.py,sha256=cwu_REQXdHWfVLtCvTvFEIvM7z8GwHVoawVuHcWv2aw,6888
102
- data_management/databases/integrity_check_json_db.py,sha256=x9BO7fDXCfdyHc-oiwVoeesIp_M9Q2bUOdh9DsohTbc,14377
107
+ data_management/databases/integrity_check_json_db.py,sha256=mZxwc42uL_e4VqmuzRwh0lxvDN7WrwtKsDVPObp9hWc,14887
103
108
  data_management/databases/remove_corrupted_images_from_db.py,sha256=Dod8UQDFveAUJlrH9Svcp_HezdILRHp74TbAl3YGf84,6138
104
- data_management/databases/subset_json_db.py,sha256=UeVn3jlcpEw-K9E-QyRwxdzl7zaV80iv_u4v6kHUd_E,2749
109
+ data_management/databases/subset_json_db.py,sha256=7oHGuiyoPyv8JBh5IrgP4Qez8xs86z94UAPss2_FlfQ,2749
105
110
  data_management/importers/add_nacti_sizes.py,sha256=qsBHPyJ7MPzl0vgJX5iErZxWkTJ6QRcyLJ8GM2YBu2U,1172
106
111
  data_management/importers/add_timestamps_to_icct.py,sha256=8XhQAIt_qw63qTMPobCKGl4O9RQZvZmhbmiSetOyNvA,2459
107
- data_management/importers/animl_results_to_md_results.py,sha256=muhwUQxMSoMSeNKEYNJu2E2d4h0tv_RrKoSgUWX1hiQ,4896
112
+ data_management/importers/animl_results_to_md_results.py,sha256=Q43z0TQ5JTRVNbbCrb4E6lZB0kvWbngZMPrRmr43rp0,4884
108
113
  data_management/importers/auckland_doc_test_to_json.py,sha256=9Fg-n_Kj2jK5iZVaPrioNkhlLNxGnrU5GS_44lsadKo,12910
109
114
  data_management/importers/auckland_doc_to_json.py,sha256=qSjBcR7FTd5_J2LO6WOoIFxSnE2IiIIqRkhbydULV7s,5952
110
115
  data_management/importers/awc_to_json.py,sha256=jLXmwGaq81wgH7HcpbAJoNMQP2CqkdfI1mvShdTGeqw,5307
@@ -123,7 +128,7 @@ data_management/importers/jb_csv_to_json.py,sha256=u3IZwDboObYlxtUSa35G8P3t_L48m
123
128
  data_management/importers/mcgill_to_json.py,sha256=ZxsNW9qFi6Kyu8SJ0BB5uK7AMuBW92QOOKXHPbIgPwY,6718
124
129
  data_management/importers/missouri_to_json.py,sha256=y9lbLaD8bGM4m9iqGHIicyZOByeJGfZOF51RikHMSFU,14840
125
130
  data_management/importers/nacti_fieldname_adjustments.py,sha256=57PyfOft2Ws-1AcG4_9mzOcB3uW4IFxaZ3z0LsItUUU,2045
126
- data_management/importers/noaa_seals_2019.py,sha256=WHXqDmOetJYcrBkyNww4ZFQbh4VpCjQ7V4U3RpbBt94,5144
131
+ data_management/importers/noaa_seals_2019.py,sha256=2_XJl-jxSiN1IOzGmRvsUR-bI4Pclo_jO4qSW2ytqks,5147
127
132
  data_management/importers/pc_to_json.py,sha256=9Nin7R47aaE5bjXjvq7A2trv2vFdJVYzhLHwLFji5Tg,10718
128
133
  data_management/importers/plot_wni_giraffes.py,sha256=V_kAzbYjtXEBUCdSwSGsEEemLN9aVyZuKhoSZQEvkCI,3787
129
134
  data_management/importers/prepare-noaa-fish-data-for-lila.py,sha256=WIkuR4ozEeHwzQPs54jIDIbAgKf1I4taZNgpHHzh-Rc,12774
@@ -140,7 +145,7 @@ data_management/importers/ubc_to_json.py,sha256=MP_whIR-CVhNPCE3vQF_tk-6_EpmxWwR
140
145
  data_management/importers/umn_to_json.py,sha256=emUVCtNfbJmgHS22fBL8GaAMiblaJen52-IuqiFiWyI,16177
141
146
  data_management/importers/wellington_to_json.py,sha256=QFcVfAxflUVHTMuGhXGxe3z3iMKJ0B8Nziwpx6XcoLE,7671
142
147
  data_management/importers/wi_to_json.py,sha256=xuHFXE6tuaUnZmiFik18_3UCSCQx9abaG_2TnaRn0Xg,13656
143
- data_management/importers/zamba_results_to_md_results.py,sha256=acU63-F4NVPNj7h0WW53mfUrvSWf9dn14-WzZxrog94,5591
148
+ data_management/importers/zamba_results_to_md_results.py,sha256=UV0Iczxf_ghR3yL8D8KUAEg1j81_BavdzWhAFtg6wHQ,5594
144
149
  data_management/importers/eMammal/copy_and_unzip_emammal.py,sha256=gVB0drYUeCghWXFDpaJkCL0qdmFjMW75YAEEhFe38js,6080
145
150
  data_management/importers/eMammal/eMammal_helpers.py,sha256=Sv6PBAMDdlgwiek6Q3R6Rjio2RjtA-JpfgBr_Fmr9kA,6838
146
151
  data_management/importers/eMammal/make_eMammal_json.py,sha256=6C_-6Qk-Xhz_87DEPHA-txw90AvXrybJy1PbQXQbqwo,6987
@@ -148,51 +153,53 @@ data_management/importers/snapshotserengeti/make_full_SS_json.py,sha256=khE3W0pO
148
153
  data_management/importers/snapshotserengeti/make_per_season_SS_json.py,sha256=sAwvcR2siwblgY3LfTsbH4mXOXvJZCA246QIsQWuQBA,4316
149
154
  data_management/lila/add_locations_to_island_camera_traps.py,sha256=nsIJXyw2IhOwwM9A0SCn108Fg297fRUdADXGUAN8Y34,2561
150
155
  data_management/lila/add_locations_to_nacti.py,sha256=KVMWwSJx-gYI_J6J8y-AqsWnOTgidtebotJjYPfsj00,5017
151
- data_management/lila/create_lila_test_set.py,sha256=oYgOsUJjjbpfCs2Gx1CM6l2UK4xXmJPgRlwW4DOs1yY,4791
152
- data_management/lila/create_links_to_md_results_files.py,sha256=19YyXy4smcbjGbLYJOx0usbu5Rq2GQ1iDG4tLgOj1UM,3916
153
- data_management/lila/download_lila_subset.py,sha256=b25Pnql93Gtn-ZZmxQ0y30LiJMqxv9rB14z6Hn5T1TA,4382
154
- data_management/lila/generate_lila_per_image_labels.py,sha256=2dIQaVYTnyb5X_zfqQj1DpSqUh8XU0I8SgZkBAHQJiA,16856
155
- data_management/lila/get_lila_annotation_counts.py,sha256=m_tiXkpz3_KxVH4adDay_anUQ4OSdCkmOg7cUSP0tbI,5326
156
+ data_management/lila/create_lila_blank_set.py,sha256=09b2tvMPG9jThyJoLQ3tHiaoQJ_kBEJ4Yn2dACh_McY,19764
157
+ data_management/lila/create_lila_test_set.py,sha256=WxM-LuhtNiee3CLeVPxUEWsfbybgZd7oluZu6epl69A,4825
158
+ data_management/lila/create_links_to_md_results_files.py,sha256=f0pXY2Lyj9DtRlgFpmLqNBs2qWd--B8N6UAt8E26tcM,3916
159
+ data_management/lila/download_lila_subset.py,sha256=yDTv_TApQWCi3XzGe8-i4VLku2-gSayd4vRUP-W2kMI,7531
160
+ data_management/lila/generate_lila_per_image_labels.py,sha256=pZ6WJJmIRvFrypbjE4LP2-jdwGhEiBYdA2FlL7HK5eA,17352
161
+ data_management/lila/get_lila_annotation_counts.py,sha256=QVSKCmeLzliFZimjzi8AClS0Gz94pDMYckjw2cOm-7E,5490
156
162
  data_management/lila/get_lila_image_counts.py,sha256=r5p2wPL5vuKKO8DWia3Tll-EZZWFNUvax6ljaYtrKsk,3625
157
- data_management/lila/lila_common.py,sha256=olg_eR6Ul2kUQ7tIsndzVIJpils5oXNANYSGBmS455E,8667
158
- detection/process_video.py,sha256=tV7MTaS5mlSaWhAdDP4tvaMN_VCG27s-qr9yzf9Cwow,25915
159
- detection/pytorch_detector.py,sha256=K2mk-zlO81naypKzqdYYxlzX9xcaletDuq0MnNCCc10,11993
160
- detection/run_detector.py,sha256=TwkR0BSNeUvUmg5y-DJ6TtTnIq8Lv547JL4a9aiWpwI,24225
161
- detection/run_detector_batch.py,sha256=-8URucPBn0DVTZ7_U1bYA8fUYmiehTHWVb_ObZ7YVTo,43443
162
- detection/run_inference_with_yolov5_val.py,sha256=N77PkRM4dyOQCVik_BxPqNtIqbYb7jHphByGtB4XOf0,28323
163
- detection/run_tiled_inference.py,sha256=cs1IehE2DXj8Nr3CbnYMXqwcFM1vUBT1Rm5We5nlcSM,28785
164
- detection/tf_detector.py,sha256=xOO8kzd-Um2X_sAZyop524LM53nipv5pNx8YueGTJrc,6760
165
- detection/video_utils.py,sha256=w3Ym9YxHCb-sFHTZyapKT0FaxTH-coILOJLpnguTEN0,19390
163
+ data_management/lila/lila_common.py,sha256=lR_kW6qz5bqOFiu_Io5Ax4CLbyOH4Rikev6cJXQHob8,7691
164
+ data_management/lila/test_lila_metadata_urls.py,sha256=jDInoM5WD_EoahR_b5yTjrj6pkiitvj_Kz_1U0uSDzE,3966
165
+ detection/process_video.py,sha256=wuMoV-DJde_QlTiNAxsRjlDttiLl2e2BiJuyTQBINIE,26825
166
+ detection/pytorch_detector.py,sha256=WG6Q4KueBoA8lCZCdR2PrgbQAHs3HCO6MF01Ful4tfc,11992
167
+ detection/run_detector.py,sha256=XmQ4s-B7IlkxJye56y6uvx2vx6Ml3IBTo3Wx0SalO1Q,26036
168
+ detection/run_detector_batch.py,sha256=NC_tqgnshlmPijyM61WE1kyfA6SLfNNPQfAYqjhlxz0,47426
169
+ detection/run_inference_with_yolov5_val.py,sha256=ny1aM94U-sjgJWb-cY1y9HG8r7Qx-RJsi9zCWYH_z1w,33975
170
+ detection/run_tiled_inference.py,sha256=SMK6aaeOYZu5Yr3wezW3Pf0Q2TXyf6tHC-19p7854Rc,33931
171
+ detection/tf_detector.py,sha256=p3P5ippLo79jdneXilqEuywUbOjUN79VqksL7ayPWy4,6769
172
+ detection/video_utils.py,sha256=Rp5H7yl2vPgYKctztPGJFNSsrI3m021oh_uuT8SBmvI,19501
166
173
  detection/detector_training/copy_checkpoints.py,sha256=t2c3Q4Pf82tAp_OjSG-veIjRPrX0tJYi-bSQmGL2m4c,1091
167
174
  detection/detector_training/model_main_tf2.py,sha256=YwNsZ7hkIFaEuwKU0rHG_VyqiR_0E01BbdlD0Yx4Smo,4936
168
175
  md_utils/azure_utils.py,sha256=SVoQNSknYlBcpZeGrH2v3Qgm5kXxBrqM5Sx2L_Lax-I,6243
169
- md_utils/ct_utils.py,sha256=ywcKF1Rg8E_9XduxmVOSzcW_fgLFnDjI5IILjoZWiuY,10402
176
+ md_utils/ct_utils.py,sha256=NkxEH5S2qCBcQlqVMgdY07jYU6sOUXPbjDv0GWapca0,13311
170
177
  md_utils/directory_listing.py,sha256=dgxMczSOEH352YXdWRuNo_ujsonRrPJXFeuS7ocpe9k,9615
171
- md_utils/md_tests.py,sha256=DL8xhTm_np3B_2I8wdneh6f9PhprVdViZsckrrhKLLI,21688
172
- md_utils/path_utils.py,sha256=ZK2YScgWgogx-oj9WJxHR5xUhDawcClYyWHZvqyI3WY,13202
173
- md_utils/process_utils.py,sha256=ullaq8AIZNbUjG9ftB1kK3q8zJmfgbZdUNo1v3oXmvA,3191
178
+ md_utils/md_tests.py,sha256=7NQ_c-a4LfDceeHKU3gVr7e94fh7R1pZMLev-925fx8,33520
179
+ md_utils/path_utils.py,sha256=Hsk-Rtx3Y_LRR4rAPNtAP9GBgfWmRkt7tVkLnOhgAFg,23176
180
+ md_utils/process_utils.py,sha256=YkD38KLgceuqvMvDXIcVyzY51npUuUT3tOAjjF5Mvf8,4316
174
181
  md_utils/sas_blob_utils.py,sha256=GpjHn33N2b-XeBAtU3xhGbTIYcBs4YrXHtbQDmlGFvY,16955
175
- md_utils/string_utils.py,sha256=tFTC9TarPFGa7_UkKF0_t6Q2naH9IEsDVM2DOxwkFTQ,1277
176
- md_utils/url_utils.py,sha256=aFN7_WvzMJqYHL9t-SGCP2A0-mlYu1-P0HkZwgzUsKg,4598
177
- md_utils/write_html_image_list.py,sha256=9s9Y20MdDIWoLugyyNUAQNsM8-m46mN46EiIUSpOeu8,6986
182
+ md_utils/split_locations_into_train_val.py,sha256=psiWoXkYYLLOfjVHUyOhaa3fh9mmlm7HGFthklWbMaA,9241
183
+ md_utils/string_utils.py,sha256=Edwa07IWu7-QTNoMmvQYNnYgpwxxNh9VhXQ8AXMX3Qg,1600
184
+ md_utils/url_utils.py,sha256=4PLqoRLVVN-W_ovcGEEicQ4zb7-Nc5u8sTCtm3qR8CM,6824
185
+ md_utils/write_html_image_list.py,sha256=U4JantMnHe7_aEL5oLOFyz2XvbK34M5ZZ4QVbiA5H98,7532
178
186
  md_visualization/plot_utils.py,sha256=eppaGgI0k73jhvOCruNrNO-VLH3EEFpFP2la_rZo57E,10712
179
187
  md_visualization/render_images_with_thumbnails.py,sha256=XJcL5qxu5pe2LQ4MqnD-O6dM_cPxGGNoqk5U_rZzFUQ,10391
180
- md_visualization/visualization_utils.py,sha256=EmZFOeYbhv1la53T3eFqJ_QKs7rGgcKkAOGfDcilYfg,34811
181
- md_visualization/visualize_db.py,sha256=QdPbxEmiPqOTQt8oa32-ax61bTw8N9aDx_pOJn6DF90,18665
182
- md_visualization/visualize_detector_output.py,sha256=C-UJZhwz1v4EccTm1Z4CldG1NSDCRQ-TaJ__G6ouM88,11595
183
- md_visualization/visualize_megadb.py,sha256=ZHFMgQv-zjXwvyT6gEfLe2BzodvBNfQYEh0b6P_59TE,6188
184
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py,sha256=zDercLXW2tdAdLSsyO7mJYzQD93Ie3hRvW8zHPz_N0k,16516
185
- taxonomy_mapping/map_new_lila_datasets.py,sha256=tPM-uv5nDhCtUaCLV4UtHwN3AS-4GggxLOKfuPVR7iY,4032
186
- taxonomy_mapping/prepare_lila_taxonomy_release.py,sha256=1bZ7QkRjDLkRapytQsWbLvt21_sgDPQAr5Juj2fvde0,4266
187
- taxonomy_mapping/preview_lila_taxonomy.py,sha256=OfiQeVUWBesWE52ZuAMsd53Iv_ZOzqqZhS2Emxs-bac,20079
188
+ md_visualization/visualization_utils.py,sha256=AZyetIxwnAa5eLPPkMvArVCX2ORf6UFJ3Uw39D5mXsY,46496
189
+ md_visualization/visualize_db.py,sha256=fleCCGRBoKkdfBjgt277_EjbidnGI4RcunZosVN5_Ms,19616
190
+ md_visualization/visualize_detector_output.py,sha256=aeg8DbwfddW5CDe84V2dt07eWMyxr4QdFcwTYbC_Lnk,15814
191
+ taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py,sha256=kFDp6r25LhYVkyrm-35TgBc2vgXXh6SmoARqO4aE9PU,16517
192
+ taxonomy_mapping/map_new_lila_datasets.py,sha256=rJlj-HuP9wNN2RvIlcWfqnW5N7cyiGLWglbe3FsFG0Q,4324
193
+ taxonomy_mapping/prepare_lila_taxonomy_release.py,sha256=Ser_lwpbYR75fMMcE49uT90t7o02dRZ7wY0GUyhzK9c,4357
194
+ taxonomy_mapping/preview_lila_taxonomy.py,sha256=iCcvf8dDsh1WZIZWq2ppua5wloAst6F6lphsS9vRTKQ,20144
188
195
  taxonomy_mapping/retrieve_sample_image.py,sha256=BySUy69DeGwPiIs9Ws5vIILJCTXeVImE5AVoawOiECM,1992
189
196
  taxonomy_mapping/simple_image_download.py,sha256=dXXVhhaR_bI-Elmk4Tmjt2ftdYzHbkuJCTzIMOJfLKs,6874
190
- taxonomy_mapping/species_lookup.py,sha256=wuyrZdFjIGmMdOoRgbON1AHcGEVZ1LjyOhfPE1jTJjY,27402
191
- taxonomy_mapping/taxonomy_csv_checker.py,sha256=db6Biubc0vLxIYe8fhkEW-GqYulT6tfPe8HOyDf3ksc,4795
197
+ taxonomy_mapping/species_lookup.py,sha256=oRqaUbiH_xULH7z5mkrtaFhacxlyM8KT-V-c4FnNq4w,28303
198
+ taxonomy_mapping/taxonomy_csv_checker.py,sha256=xmV2SBOfQEuZBMGmXyUzbuNxvd_oXKysXkxU6-IhKJg,4874
192
199
  taxonomy_mapping/taxonomy_graph.py,sha256=ZDm2enGanBlm8KXWvCndqmeerOp9LREaetSl-Lxy07s,12361
193
200
  taxonomy_mapping/validate_lila_category_mappings.py,sha256=CApYVWIZ8TTJ3vvQTgfjIvWDGHpPo-Zn9jqJFaw3DNw,2314
194
- megadetector-5.0.6.dist-info/LICENSE,sha256=RMa3qq-7Cyk7DdtqRj_bP1oInGFgjyHn9-PZ3PcrqIs,1100
195
- megadetector-5.0.6.dist-info/METADATA,sha256=srthdZm95WW0GdGlpAqRRBSp3Uinsdwr3iGacKR3PKk,7512
196
- megadetector-5.0.6.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
197
- megadetector-5.0.6.dist-info/top_level.txt,sha256=-mFGpqnmviVz0Vyr2GxZ_kTo_PBPNoK6h4JtqIMjZGQ,88
198
- megadetector-5.0.6.dist-info/RECORD,,
201
+ megadetector-5.0.8.dist-info/LICENSE,sha256=RMa3qq-7Cyk7DdtqRj_bP1oInGFgjyHn9-PZ3PcrqIs,1100
202
+ megadetector-5.0.8.dist-info/METADATA,sha256=tqdjGFYteHST4x9vGmougFqF76DH8J0Hr-wcyaAEaXg,7383
203
+ megadetector-5.0.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
204
+ megadetector-5.0.8.dist-info/top_level.txt,sha256=-mFGpqnmviVz0Vyr2GxZ_kTo_PBPNoK6h4JtqIMjZGQ,88
205
+ megadetector-5.0.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.3)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -481,4 +481,4 @@ with open(wi_mapping_table_file,'w') as f:
481
481
 
482
482
  # ...for each dataset
483
483
 
484
- # ...with open()
484
+ # ...with open()
@@ -15,15 +15,25 @@ import json
15
15
  # Created by get_lila_category_list.py
16
16
  input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
17
17
 
18
- output_file = os.path.expanduser('~/lila/lila_additions_2022.08.22.csv')
18
+ output_file = os.path.expanduser('~/lila/lila_additions_2023.12.29.csv')
19
19
 
20
20
  datasets_to_map = [
21
- # 'NACTI'
22
- # 'Channel Islands Camera Traps'
23
- 'ENA24'
21
+ 'Trail Camera Images of New Zealand Animals'
24
22
  ]
25
23
 
26
24
 
25
+ #%% Initialize taxonomic lookup
26
+
27
+ from taxonomy_mapping.species_lookup import (
28
+ initialize_taxonomy_lookup,
29
+ get_preferred_taxonomic_match)
30
+
31
+ # from taxonomy_mapping.species_lookup import (
32
+ # get_taxonomic_info, print_taxonomy_matche)
33
+
34
+ initialize_taxonomy_lookup(force_init=False)
35
+
36
+
27
37
  #%% Read the list of datasets
28
38
 
29
39
  with open(input_lila_category_list_file,'r') as f:
@@ -57,46 +67,14 @@ for dataset_name in datasets_to_map:
57
67
  print('Need to create {} mappings'.format(len(category_mappings)))
58
68
 
59
69
 
60
- #%% Initialize taxonomic lookup
61
-
62
- from taxonomy_mapping.species_lookup import (
63
- initialize_taxonomy_lookup,
64
- get_preferred_taxonomic_match)
65
-
66
- # from taxonomy_mapping.species_lookup import (
67
- # get_taxonomic_info, print_taxonomy_matche)
68
-
69
- initialize_taxonomy_lookup()
70
-
71
-
72
- #%% Manual lookup
73
-
74
- if False:
75
-
76
- #%%
77
-
78
- # q = 'white-throated monkey'
79
- q = 'cingulata'
80
- taxonomy_preference = 'inat'
81
- m = get_preferred_taxonomic_match(q,taxonomy_preference)
82
-
83
- if m is None:
84
- print('No match')
85
- else:
86
- if m.source != taxonomy_preference:
87
- print('\n*** non-preferred match ***\n')
88
- # raise ValueError('')
89
- print(m.source)
90
- print(m.taxonomy_string)
91
- import clipboard; clipboard.copy(m.taxonomy_string)
92
-
93
-
94
70
  #%% Match every query against our taxonomies
95
71
 
96
72
  output_rows = []
97
73
 
98
74
  taxonomy_preference = 'inat'
99
75
 
76
+ allow_non_preferred_matches = True
77
+
100
78
  # mapping_string = category_mappings[1]; print(mapping_string)
101
79
  for mapping_string in category_mappings:
102
80
 
@@ -108,7 +86,7 @@ for mapping_string in category_mappings:
108
86
 
109
87
  taxonomic_match = get_preferred_taxonomic_match(query,taxonomy_preference=taxonomy_preference)
110
88
 
111
- if taxonomic_match.source == taxonomy_preference:
89
+ if (taxonomic_match.source == taxonomy_preference) or allow_non_preferred_matches:
112
90
 
113
91
  output_row = {
114
92
  'dataset_name': dataset_name,
@@ -148,3 +126,29 @@ output_df = pd.DataFrame(data=output_rows, columns=[
148
126
  'dataset_name', 'query', 'source', 'taxonomy_level',
149
127
  'scientific_name', 'common_name', 'taxonomy_string'])
150
128
  output_df.to_csv(output_file, index=None, header=True)
129
+
130
+
131
+ #%% Manual lookup
132
+
133
+ if False:
134
+
135
+ #%%
136
+
137
+ # q = 'white-throated monkey'
138
+ # q = 'cingulata'
139
+ # q = 'notamacropus'
140
+ q = 'porzana'
141
+ taxonomy_preference = 'inat'
142
+ m = get_preferred_taxonomic_match(q,taxonomy_preference)
143
+ # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
144
+
145
+ if m is None:
146
+ print('No match')
147
+ else:
148
+ if m.source != taxonomy_preference:
149
+ print('\n*** non-preferred match ***\n')
150
+ # raise ValueError('')
151
+ print(m.source)
152
+ print(m.taxonomy_string)
153
+ # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
154
+ import clipboard; clipboard.copy(m.taxonomy_string)
@@ -13,8 +13,9 @@ import os
13
13
  import json
14
14
  import pandas as pd
15
15
 
16
- lila_taxonomy_file = os.path.expanduser('~/git/agentmorrisprivate/lila-taxonomy/lila-taxonomy-mapping.csv')
17
- release_taxonomy_file = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.22.08.22.0000.csv')
16
+ lila_taxonomy_file = 'c:/git/agentmorrisprivate/lila-taxonomy/lila-taxonomy-mapping.csv'
17
+ release_taxonomy_file = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
18
+ # import clipboard; clipboard.copy(release_taxonomy_file)
18
19
 
19
20
  # Created by get_lila_category_list.py... contains counts for each category
20
21
  lila_dataset_to_categories_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
@@ -129,3 +130,5 @@ for i_row,row in df.iterrows():
129
130
 
130
131
  df = df.drop('source',axis=1)
131
132
  df.to_csv(release_taxonomy_file,header=True,index=False)
133
+
134
+ print('Wrote final output to {}'.format(release_taxonomy_file))
@@ -15,11 +15,10 @@ from tqdm import tqdm
15
15
  import os
16
16
  import pandas as pd
17
17
 
18
- # lila_taxonomy_file = r"G:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
19
- lila_taxonomy_file = r"G:\temp\lila\lila-taxonomy-mapping_release.22.07.03.1608.csv"
20
- # lila_taxonomy_file = r"G:\temp\lila\lila_additions_2022.06.29.csv"
18
+ # lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
19
+ lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2023.12.29.csv')
21
20
 
22
- preview_base = r'g:\temp\lila\lila_taxonomy_preview'
21
+ preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
23
22
  os.makedirs(preview_base,exist_ok=True)
24
23
  html_output_file = os.path.join(preview_base,'index.html')
25
24
 
@@ -172,15 +171,14 @@ for i_row,row in tqdm(df.iterrows(),total=len(df)):
172
171
 
173
172
  print('\nMade {} taxonomy changes'.format(n_taxonomy_changes))
174
173
 
174
+ # Optionally re-write
175
175
  if False:
176
176
  df.to_csv(lila_taxonomy_file,header=True,index=False)
177
177
 
178
178
 
179
179
  #%% List null mappings
180
180
 
181
- #
182
- # These should all be things like "unidentified" and "fire"
183
- #
181
+ # These should all be things like "empty", "unidentified", "fire", "car", etc.
184
182
 
185
183
  # i_row = 0; row = df.iloc[i_row]
186
184
  for i_row,row in df.iterrows():
@@ -393,20 +391,20 @@ remapped_queries = {'papio':'papio+baboon',
393
391
 
394
392
  import os
395
393
  from taxonomy_mapping import retrieve_sample_image
394
+
396
395
  scientific_name_to_paths = {}
397
396
  image_base = os.path.join(preview_base,'images')
398
397
  images_per_query = 15
399
398
  min_valid_images_per_query = 3
400
399
  min_valid_image_size = 3000
401
400
 
401
+ # TODO: trivially prallelizable
402
+ #
402
403
  # i_row = 0; row = df.iloc[i_row]
403
404
  for i_row,row in df.iterrows():
404
405
 
405
406
  s = row['scientific_name']
406
407
 
407
- # if s != 'mirafra':
408
- # continue
409
-
410
408
  if (not isinstance(s,str)) or (len(s)==0):
411
409
  continue
412
410
 
@@ -416,17 +414,17 @@ for i_row,row in df.iterrows():
416
414
  query = remapped_queries[query]
417
415
 
418
416
  query_folder = os.path.join(image_base,query)
417
+ os.makedirs(query_folder,exist_ok=True)
419
418
 
420
419
  # Check whether we already have enough images for this query
421
- if os.path.isdir(query_folder):
422
- image_files = os.listdir(query_folder)
423
- image_fullpaths = [os.path.join(query_folder,fn) for fn in image_files]
424
- sizes = [os.path.getsize(p) for p in image_fullpaths]
425
- sizes_above_threshold = [x for x in sizes if x > min_valid_image_size]
426
- if len(sizes_above_threshold) > min_valid_images_per_query:
427
- # print('Skipping query {}, already have {} images'.format(s,len(sizes_above_threshold)))
428
- continue
429
-
420
+ image_files = os.listdir(query_folder)
421
+ image_fullpaths = [os.path.join(query_folder,fn) for fn in image_files]
422
+ sizes = [os.path.getsize(p) for p in image_fullpaths]
423
+ sizes_above_threshold = [x for x in sizes if x > min_valid_image_size]
424
+ if len(sizes_above_threshold) > min_valid_images_per_query:
425
+ print('Skipping query {}, already have {} images'.format(s,len(sizes_above_threshold)))
426
+ continue
427
+
430
428
  # Check whether we've already run this query for a previous row
431
429
  if query in scientific_name_to_paths:
432
430
  continue
@@ -448,14 +446,16 @@ from md_utils import path_utils
448
446
  all_images = path_utils.recursive_file_list(image_base,False)
449
447
 
450
448
  for fn in tqdm(all_images):
451
- if fn.endswith('.jpeg'):
449
+ if fn.lower().endswith('.jpeg'):
452
450
  new_fn = fn[0:-5] + '.jpg'
453
- # print('Renaming {} to {}'.format(fn,new_fn))
454
451
  os.rename(fn, new_fn)
455
452
 
456
453
 
457
454
  #%% Choose representative images for each scientific name
458
455
 
456
+ # Specifically, sort by size, and take the largest unique sizes. Very small files tend
457
+ # to be bogus thumbnails, etc.
458
+
459
459
  max_images_per_query = 4
460
460
  scientific_name_to_preferred_images = {}
461
461
 
@@ -506,7 +506,7 @@ for images in scientific_name_to_preferred_images.values():
506
506
  print('Using a total of {} images'.format(len(used_images)))
507
507
  used_images_set = set(used_images)
508
508
 
509
- import path_utils
509
+ from md_utils import path_utils
510
510
  all_images = path_utils.recursive_file_list(image_base,False)
511
511
 
512
512
  unused_images = []
@@ -523,7 +523,7 @@ for fn in tqdm(unused_images):
523
523
 
524
524
  #%% Produce HTML preview
525
525
 
526
- with open(html_output_file, 'w') as f:
526
+ with open(html_output_file, 'w', encoding='utf-8') as f:
527
527
 
528
528
  f.write('<html><head></head><body>\n')
529
529
 
@@ -555,10 +555,11 @@ with open(html_output_file, 'w') as f:
555
555
  f.write('<p class="speciesinfo_p" style="font-weight:bold;font-size:130%">')
556
556
 
557
557
  if isinstance(row.scientific_name,str):
558
- f.write('{}: <b><u>{}</u></b> mapped to {} {} ({}) ({})</p>\n'.format(
558
+ output_string = '{}: <b><u>{}</u></b> mapped to {} {} ({}) ({})</p>\n'.format(
559
559
  row.dataset_name, row.query,
560
560
  row.taxonomy_level, row.scientific_name, common_name_string,
561
- row.common_name))
561
+ row.common_name)
562
+ f.write(output_string)
562
563
  else:
563
564
  f.write('{}: <b><u>{}</u></b> unmapped'.format(row.dataset_name,row.query))
564
565
 
@@ -586,6 +587,5 @@ with open(html_output_file, 'w') as f:
586
587
 
587
588
  #%% Open HTML preview
588
589
 
589
- from md_utils.path_utils import open_file # from ai4eutils
590
+ from md_utils.path_utils import open_file
590
591
  open_file(html_output_file)
591
-