megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@
2
2
 
3
3
  map_lila_taxonomy_to_wi_taxonomy.py
4
4
 
5
- Loads the LILA category mapping (in which taxonomy information comes from an
5
+ Loads the LILA category mapping (in which taxonomy information comes from an
6
6
  iNat taxonomy snapshot) and tries to map each class to the Wildlife Insights taxonomy.
7
7
 
8
8
  """
@@ -22,9 +22,9 @@ from megadetector.data_management.lila.lila_common import \
22
22
  #%% Prevent execution during infrastructural imports
23
23
 
24
24
  if False:
25
-
25
+
26
26
  #%%
27
-
27
+
28
28
  lila_local_base = os.path.expanduser('~/lila')
29
29
 
30
30
  metadata_dir = os.path.join(lila_local_base, 'metadata')
@@ -65,9 +65,9 @@ if False:
65
65
 
66
66
  #%% Cache WI taxonomy lookups
67
67
 
68
- def is_empty_wi_item(v):
68
+ def _is_empty_wi_item(v):
69
69
  if isinstance(v, str):
70
- return len(v) == 0
70
+ return len(v) == 0
71
71
  elif v is None:
72
72
  return True
73
73
  else:
@@ -75,7 +75,7 @@ if False:
75
75
  return True
76
76
 
77
77
 
78
- def taxonomy_items_equal(a, b):
78
+ def _taxonomy_items_equal(a, b):
79
79
  if isinstance(a, str) and (not isinstance(b, str)):
80
80
  return False
81
81
  if isinstance(b, str) and (not isinstance(a, str)):
@@ -121,7 +121,7 @@ if False:
121
121
  v = taxon[k]
122
122
  if isinstance(v,str):
123
123
  taxon[k] = v.strip()
124
-
124
+
125
125
  if taxon['commonNameEnglish'] in ignore_taxa:
126
126
  continue
127
127
 
@@ -152,37 +152,37 @@ if False:
152
152
  continue
153
153
 
154
154
  # Do we have a species name?
155
- if not is_empty_wi_item(taxon['species']):
155
+ if not _is_empty_wi_item(taxon['species']):
156
156
 
157
157
  # If 'species' is populated, 'genus' should always be populated; one item currently breaks
158
158
  # this rule.
159
- assert not is_empty_wi_item(taxon['genus'])
160
-
159
+ assert not _is_empty_wi_item(taxon['genus'])
160
+
161
161
  taxon_name = (taxon['genus'].strip() + ' ' +
162
162
  taxon['species'].strip()).strip().lower()
163
- assert not is_empty_wi_item(taxon['class']) and \
164
- not is_empty_wi_item(taxon['order']) and \
165
- not is_empty_wi_item(taxon['family'])
163
+ assert not _is_empty_wi_item(taxon['class']) and \
164
+ not _is_empty_wi_item(taxon['order']) and \
165
+ not _is_empty_wi_item(taxon['family'])
166
166
 
167
- elif not is_empty_wi_item(taxon['genus']):
167
+ elif not _is_empty_wi_item(taxon['genus']):
168
168
 
169
- assert not is_empty_wi_item(taxon['class']) and \
170
- not is_empty_wi_item(taxon['order']) and \
171
- not is_empty_wi_item(taxon['family'])
169
+ assert not _is_empty_wi_item(taxon['class']) and \
170
+ not _is_empty_wi_item(taxon['order']) and \
171
+ not _is_empty_wi_item(taxon['family'])
172
172
  taxon_name = taxon['genus'].strip().lower()
173
173
 
174
- elif not is_empty_wi_item(taxon['family']):
174
+ elif not _is_empty_wi_item(taxon['family']):
175
175
 
176
- assert not is_empty_wi_item(taxon['class']) and \
177
- not is_empty_wi_item(taxon['order'])
176
+ assert not _is_empty_wi_item(taxon['class']) and \
177
+ not _is_empty_wi_item(taxon['order'])
178
178
  taxon_name = taxon['family'].strip().lower()
179
179
 
180
- elif not is_empty_wi_item(taxon['order']):
180
+ elif not _is_empty_wi_item(taxon['order']):
181
181
 
182
- assert not is_empty_wi_item(taxon['class'])
182
+ assert not _is_empty_wi_item(taxon['class'])
183
183
  taxon_name = taxon['order'].strip().lower()
184
184
 
185
- elif not is_empty_wi_item(taxon['class']):
185
+ elif not _is_empty_wi_item(taxon['class']):
186
186
 
187
187
  taxon_name = taxon['class'].strip().lower()
188
188
 
@@ -204,8 +204,8 @@ if False:
204
204
  level,previous_taxon[level],
205
205
  previous_taxon['taxon_name'],
206
206
  level,taxon[level])
207
- assert taxonomy_items_equal(previous_taxon[level], taxon[level]), error_string
208
-
207
+ assert _taxonomy_items_equal(previous_taxon[level], taxon[level]), error_string
208
+
209
209
  taxon['taxon_name'] = taxon_name
210
210
  if taxon_name == 'homo sapiens':
211
211
  human_taxa.append(taxon)
@@ -234,7 +234,7 @@ if False:
234
234
  pass
235
235
 
236
236
  #%% Manual review of redundant taxa
237
-
237
+
238
238
  s = taxon_names_with_multiple_entries[15]
239
239
  taxa = wi_taxon_name_to_taxa[s]
240
240
  for t in taxa:
@@ -249,19 +249,19 @@ if False:
249
249
  taxon_name_to_preferred_taxon_id = {}
250
250
 
251
251
  # "helmeted guineafowl" vs "domestic guineafowl"
252
- taxon_name_to_preferred_taxon_id['numida meleagris'] = '83133617-8358-4910-82ee-4c23e40ba3dc' # 2005826
252
+ taxon_name_to_preferred_taxon_id['numida meleagris'] = '83133617-8358-4910-82ee-4c23e40ba3dc' # 2005826
253
253
 
254
254
  # "domestic turkey" vs. "wild turkey"
255
- taxon_name_to_preferred_taxon_id['meleagris gallopavo'] = 'c10547c3-1748-48bf-a451-8066c820f22f' # 2021598
255
+ taxon_name_to_preferred_taxon_id['meleagris gallopavo'] = 'c10547c3-1748-48bf-a451-8066c820f22f' # 2021598
256
256
 
257
257
  # multiple sensible human entries
258
- taxon_name_to_preferred_taxon_id['homo sapiens'] = '990ae9dd-7a59-4344-afcb-1b7b21368000' # 2002045
258
+ taxon_name_to_preferred_taxon_id['homo sapiens'] = '990ae9dd-7a59-4344-afcb-1b7b21368000' # 2002045
259
259
 
260
260
  # "domestic dog" and "dog-on-leash"
261
- taxon_name_to_preferred_taxon_id['canis familiaris'] = '3d80f1d6-b1df-4966-9ff4-94053c7a902a' # 2021548
261
+ taxon_name_to_preferred_taxon_id['canis familiaris'] = '3d80f1d6-b1df-4966-9ff4-94053c7a902a' # 2021548
262
262
 
263
263
  # "small mammal" vs. "mammal"
264
- taxon_name_to_preferred_taxon_id['mammalia'] = 'f2d233e3-80e3-433d-9687-e29ecc7a467a' # 2021108
264
+ taxon_name_to_preferred_taxon_id['mammalia'] = 'f2d233e3-80e3-433d-9687-e29ecc7a467a' # 2021108
265
265
 
266
266
  # "Hispaniolan Mango" vs. NaN
267
267
  taxon_name_to_preferred_taxon_id['anthracothorax dominicus'] = 'f94e6d97-59cf-4d38-a05a-a75efdd2863b'
@@ -276,19 +276,19 @@ if False:
276
276
  taxon_name_to_preferred_taxon_id['stagonopleura bella'] = '7fec8e7e-fd3b-4d7f-99fd-3ade6f3bbaa5' # 2021939
277
277
 
278
278
  # "yellow wagtail" vs. "yellow crowned-wagtail"
279
- taxon_name_to_preferred_taxon_id['motacilla flava'] = 'ac6669bc-9f9e-4473-b609-b9082f9bf50c' # 2016194
279
+ taxon_name_to_preferred_taxon_id['motacilla flava'] = 'ac6669bc-9f9e-4473-b609-b9082f9bf50c' # 2016194
280
280
 
281
281
  # "dremomys species" vs. "dremomys genus"
282
282
  taxon_name_to_preferred_taxon_id['dremomys'] = '1507d153-af11-46f1-bfb8-77918d035ab3' # 2019370
283
283
 
284
284
  # "elk" vs. "domestic elk"
285
- taxon_name_to_preferred_taxon_id['cervus canadensis'] = 'c5ce946f-8f0d-4379-992b-cc0982381f5e'
285
+ taxon_name_to_preferred_taxon_id['cervus canadensis'] = 'c5ce946f-8f0d-4379-992b-cc0982381f5e'
286
286
 
287
287
  # "American bison" vs. "domestic bison"
288
- taxon_name_to_preferred_taxon_id['bison bison'] = '539ebd55-081b-429a-9ae6-5a6a0f6999d4' # 2021593
288
+ taxon_name_to_preferred_taxon_id['bison bison'] = '539ebd55-081b-429a-9ae6-5a6a0f6999d4' # 2021593
289
289
 
290
290
  # "woodrat or rat or mouse species" vs. "mouse species"
291
- taxon_name_to_preferred_taxon_id['muridae'] = 'e7503287-468c-45af-a1bd-a17821bb62f2' # 2021642
291
+ taxon_name_to_preferred_taxon_id['muridae'] = 'e7503287-468c-45af-a1bd-a17821bb62f2' # 2021642
292
292
 
293
293
  # both "southern sand frog"
294
294
  taxon_name_to_preferred_taxon_id['tomopterna adiastola'] = 'a5dc63cb-41be-4090-84a7-b944b16dcee4' # 2021834
@@ -296,18 +296,18 @@ if False:
296
296
  # sericornis species vs. scrubwren species
297
297
  taxon_name_to_preferred_taxon_id['sericornis'] = 'ad82c0ac-df48-4028-bf71-d2b2f4bc4129' # 2021776
298
298
 
299
-
299
+
300
300
  # taxon_name = list(taxon_name_to_preferred_taxon_id.keys())[0]
301
301
  for taxon_name in taxon_name_to_preferred_taxon_id.keys():
302
-
302
+
303
303
  candidate_taxa = wi_taxon_name_to_taxa[taxon_name]
304
-
304
+
305
305
  # If we've gotten this far, we should be choosing from multiple taxa.
306
306
  #
307
307
  # This will become untrue if any of these are resolved later, at which point we should
308
308
  # remove them from taxon_name_to_preferred_id
309
309
  assert len(candidate_taxa) > 1, 'Only one taxon available for {}'.format(taxon_name)
310
-
310
+
311
311
  # Choose the preferred taxa
312
312
  selected_taxa = [t for t in candidate_taxa if t[id_column] == \
313
313
  taxon_name_to_preferred_taxon_id[taxon_name]]
@@ -365,7 +365,7 @@ if False:
365
365
  query = None
366
366
 
367
367
  lila_dataset_category = lila_taxon['dataset_name'] + ':' + lila_taxon['query']
368
-
368
+
369
369
  # Go from kingdom --> species, choosing the lowest-level description as the query
370
370
  for level in lila_taxonomy_levels:
371
371
  if isinstance(lila_taxon[level], str):
@@ -455,37 +455,37 @@ if False:
455
455
  #%% Map LILA datasets to WI taxa, and count the number of each taxon available in each dataset
456
456
 
457
457
  with open(wi_mapping_table_file,'w') as f:
458
-
458
+
459
459
  f.write('lila_dataset_name,lila_category_name,wi_guid,wi_taxon_name,wi_common,count\n')
460
-
460
+
461
461
  # dataset_name = list(lila_dataset_to_categories.keys())[0]
462
462
  for dataset_name in lila_dataset_to_categories.keys():
463
-
463
+
464
464
  if '_bbox' in dataset_name:
465
465
  continue
466
-
466
+
467
467
  dataset_categories = lila_dataset_to_categories[dataset_name]
468
-
468
+
469
469
  # dataset_category = dataset_categories[0]
470
470
  for category in dataset_categories:
471
-
471
+
472
472
  lila_dataset_category = dataset_name + ':' + category['name'].strip().lower()
473
473
  if '#' in lila_dataset_category:
474
474
  continue
475
475
  assert lila_dataset_category in lila_dataset_category_to_lila_taxon
476
476
  assert lila_dataset_category in lila_dataset_category_to_wi_taxon
477
477
  assert 'count' in category
478
-
478
+
479
479
  wi_taxon = lila_dataset_category_to_wi_taxon[lila_dataset_category]
480
-
481
- # Write out the dataset name, category name, WI GUID, WI scientific name, WI common name,
480
+
481
+ # Write out the dataset name, category name, WI GUID, WI scientific name, WI common name,
482
482
  # and count
483
483
  s = f"{dataset_name},{category['name']},{wi_taxon['uniqueIdentifier']},"+\
484
484
  f"{wi_taxon['taxon_name']},{wi_taxon['commonNameEnglish']},{category['count']}\n"
485
485
  f.write(s)
486
-
486
+
487
487
  # ...for each category in this dataset
488
-
489
- # ...for each dataset
488
+
489
+ # ...for each dataset
490
490
 
491
491
  # ...with open()
@@ -15,10 +15,10 @@ import json
15
15
  # Created by get_lila_category_list.py
16
16
  input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
17
17
 
18
- output_file = os.path.expanduser('~/lila/lila_additions_2025.03.24.csv')
18
+ output_file = os.path.expanduser('~/lila/lila_additions_2025.06.23.csv')
19
19
 
20
20
  datasets_to_map = [
21
- 'UNSW Predators'
21
+ 'Nkhotakota Camera Traps'
22
22
  ]
23
23
 
24
24
 
@@ -48,7 +48,7 @@ for s in datasets_to_map:
48
48
  assert s in lila_datasets
49
49
 
50
50
 
51
- #%% Find all categories
51
+ #%% Find all categories
52
52
 
53
53
  category_mappings = []
54
54
 
@@ -75,17 +75,17 @@ allow_non_preferred_matches = True
75
75
 
76
76
  # mapping_string = category_mappings[1]; print(mapping_string)
77
77
  for mapping_string in category_mappings:
78
-
78
+
79
79
  tokens = mapping_string.split(':')
80
- assert len(tokens) == 2
80
+ assert len(tokens) == 2
81
81
 
82
82
  dataset_name = tokens[0]
83
83
  query = tokens[1]
84
84
 
85
85
  taxonomic_match = get_preferred_taxonomic_match(query,taxonomy_preference=taxonomy_preference)
86
-
86
+
87
87
  if (taxonomic_match.source == taxonomy_preference) or allow_non_preferred_matches:
88
-
88
+
89
89
  output_row = {
90
90
  'dataset_name': dataset_name,
91
91
  'query': query,
@@ -95,9 +95,9 @@ for mapping_string in category_mappings:
95
95
  'common_name': taxonomic_match.common_name,
96
96
  'taxonomy_string': taxonomic_match.taxonomy_string
97
97
  }
98
-
98
+
99
99
  else:
100
-
100
+
101
101
  output_row = {
102
102
  'dataset_name': dataset_name,
103
103
  'query': query,
@@ -107,10 +107,10 @@ for mapping_string in category_mappings:
107
107
  'common_name': '',
108
108
  'taxonomy_string': ''
109
109
  }
110
-
110
+
111
111
  output_rows.append(output_row)
112
-
113
- # ...for each mapping
112
+
113
+ # ...for each mapping
114
114
 
115
115
 
116
116
  #%% Write output rows
@@ -133,19 +133,19 @@ output_df.to_csv(output_file, index=None, header=True)
133
133
  if False:
134
134
 
135
135
  #%% You probably want to open the .csv file first
136
-
136
+
137
137
  from megadetector.utils.path_utils import open_file
138
138
  open_file(output_file)
139
139
 
140
-
140
+
141
141
  #%%
142
-
143
- q = 'dasyurus maculatus'
144
-
142
+
143
+ q = 'animalia'
144
+
145
145
  taxonomy_preference = 'inat'
146
146
  m = get_preferred_taxonomic_match(q,taxonomy_preference)
147
147
  # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
148
-
148
+
149
149
  if (m is None) or (len(m.taxonomy_string) == 0):
150
150
  print('No match')
151
151
  else:
@@ -154,5 +154,4 @@ if False:
154
154
  # raise ValueError('')
155
155
  print(m.source)
156
156
  print(m.taxonomy_string)
157
- # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
158
157
  import clipboard; clipboard.copy(m.taxonomy_string)
@@ -2,7 +2,7 @@
2
2
 
3
3
  prepare_lila_taxonomy_release.py
4
4
 
5
- Given the private intermediate taxonomy mapping (produced by map_new_lila_datasets.py),
5
+ Given the private intermediate taxonomy mapping (produced by map_new_lila_datasets.py),
6
6
  prepare the public (release) taxonomy mapping file.
7
7
 
8
8
  """
@@ -17,9 +17,9 @@ import pandas as pd
17
17
  #%% Prevent execution during infrastructural imports
18
18
 
19
19
  if False:
20
-
20
+
21
21
  #%% Filenames
22
-
22
+
23
23
  lila_taxonomy_file = 'c:/git/agentmorrisprivate/lila-taxonomy/lila-taxonomy-mapping.csv'
24
24
  release_taxonomy_file = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
25
25
  # import clipboard; clipboard.copy(release_taxonomy_file)
@@ -42,7 +42,7 @@ if False:
42
42
 
43
43
  # dataset_name = datasets_to_map[0]
44
44
  for dataset_name in lila_dataset_to_categories.keys():
45
-
45
+
46
46
  ds_categories = lila_dataset_to_categories[dataset_name]
47
47
  for category in ds_categories:
48
48
  category_name = category['name'].lower()
@@ -52,6 +52,8 @@ if False:
52
52
 
53
53
  df['used'] = False
54
54
 
55
+ n_dropped = 0
56
+
55
57
  # i_row = 0; row = df.iloc[i_row]; row
56
58
  for i_row,row in df.iterrows():
57
59
  ds_name = row['dataset_name']
@@ -60,8 +62,11 @@ if False:
60
62
  if mapping_name in used_category_mappings:
61
63
  df.loc[i_row,'used'] = True
62
64
  else:
65
+ n_dropped += 1
63
66
  print('Dropping unused mapping {}'.format(mapping_name))
64
67
 
68
+ print('Dropping {} of {} mappings'.format(n_dropped,len(df)))
69
+
65
70
  df = df[df.used]
66
71
  df = df.drop('used',axis=1)
67
72
 
@@ -71,66 +76,82 @@ if False:
71
76
  assert not os.path.isfile(release_taxonomy_file), \
72
77
  'File {} exists, delete it manually before proceeding'.format(release_taxonomy_file)
73
78
 
74
- known_levels = ['stateofmatter', #noqa
75
- 'kingdom',
76
- 'phylum','subphylum',
77
- 'superclass','class','subclass','infraclass',
78
- 'superorder','order','parvorder','suborder','infraorder',
79
- 'zoosection',
80
- 'superfamily','family','subfamily','tribe',
81
- 'genus',
82
- 'species','subspecies','variety']
83
-
84
79
  levels_to_include = ['kingdom',
85
- 'phylum','subphylum',
86
- 'superclass','class','subclass','infraclass',
87
- 'superorder','order','suborder','infraorder',
88
- 'superfamily','family','subfamily','tribe',
89
- 'genus',
90
- 'species','subspecies','variety']
91
-
92
- levels_to_exclude = ['stateofmatter','zoosection','parvorder','complex','epifamily']
80
+ 'phylum',
81
+ 'subphylum',
82
+ 'superclass',
83
+ 'class',
84
+ 'subclass',
85
+ 'infraclass',
86
+ 'superorder',
87
+ 'order',
88
+ 'suborder',
89
+ 'infraorder',
90
+ 'superfamily',
91
+ 'family',
92
+ 'subfamily',
93
+ 'tribe',
94
+ 'genus',
95
+ 'subgenus',
96
+ 'species',
97
+ 'subspecies',
98
+ 'variety']
99
+
100
+ levels_to_exclude = ['stateofmatter',
101
+ 'zoosection',
102
+ 'parvorder',
103
+ 'complex',
104
+ 'epifamily']
105
+
106
+ for x in [levels_to_include,levels_to_exclude]:
107
+ assert len(x) == len(set(x))
93
108
 
94
109
  for s in levels_to_exclude:
95
110
  assert s not in levels_to_include
96
-
111
+
112
+ known_levels = levels_to_include + levels_to_exclude
113
+
97
114
  levels_used = set()
98
115
 
99
116
  # i_row = 0; row = df.iloc[i_row]; row
100
117
  for i_row,row in df.iterrows():
101
-
118
+
102
119
  if not isinstance(row['scientific_name'],str):
103
120
  assert not isinstance(row['taxonomy_string'],str)
104
121
  continue
105
-
122
+
123
+ # This is a list of length-4 tuples that each look like:
124
+ #
125
+ # (41789, 'species', 'taxidea taxus', ['american badger'])
106
126
  taxonomic_match = eval(row['taxonomy_string'])
107
-
127
+
108
128
  # match_at_level = taxonomic_match[0]
109
129
  for match_at_level in taxonomic_match:
110
130
  assert len(match_at_level) == 4
131
+ # E.g. "species"
111
132
  levels_used.add(match_at_level[1])
112
-
133
+
113
134
  levels_used = [s for s in levels_used if isinstance(s,str)]
114
135
 
115
136
  for s in levels_used:
116
- assert s in levels_to_exclude or s in levels_to_include, 'Unrecognized level {}'.format(s)
137
+ assert s in known_levels, 'Unrecognized level {}'.format(s)
117
138
 
118
139
  for s in levels_to_include:
119
140
  assert s in levels_used
120
-
141
+
121
142
  for s in levels_to_include:
122
143
  df[s] = ''
123
-
144
+
124
145
  # i_row = 0; row = df.iloc[i_row]; row
125
146
  for i_row,row in df.iterrows():
126
-
147
+
127
148
  if not isinstance(row['scientific_name'],str):
128
149
  assert not isinstance(row['taxonomy_string'],str)
129
150
  continue
130
-
151
+
131
152
  # E.g.: (43117, 'genus', 'lepus', ['hares and jackrabbits']
132
153
  taxonomic_match = eval(row['taxonomy_string'])
133
-
154
+
134
155
  for match_at_level in taxonomic_match:
135
156
  level = match_at_level[1]
136
157
  if level in levels_to_include: