megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +701 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +563 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +192 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +665 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +984 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2172 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1604 -0
  81. megadetector/detection/run_tiled_inference.py +1044 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1943 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2140 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +231 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2872 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1766 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1973 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +498 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.15.dist-info/METADATA +115 -0
  144. megadetector-10.0.15.dist-info/RECORD +147 -0
  145. megadetector-10.0.15.dist-info/WHEEL +5 -0
  146. megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1766 @@
1
+ """
2
+
3
+ wi_taxonomy_utils.py
4
+
5
+ Functions related to working with the SpeciesNet / Wildlife Insights taxonomy.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import os
12
+ import json
13
+
14
+ import pandas as pd
15
+
16
+ from copy import deepcopy
17
+ from collections import defaultdict
18
+ from tqdm import tqdm
19
+
20
+ from megadetector.utils.path_utils import \
21
+ insert_before_extension, find_images
22
+
23
+ from megadetector.utils.ct_utils import (
24
+ split_list_into_n_chunks,
25
+ round_floats_in_nested_dict,
26
+ is_list_sorted,
27
+ invert_dictionary,
28
+ sort_list_of_dicts_by_key,
29
+ sort_dictionary_by_value,
30
+ )
31
+
32
+ from megadetector.postprocessing.validate_batch_results import \
33
+ validate_batch_results, ValidateBatchResultsOptions
34
+
35
+ from megadetector.detection.run_detector import DEFAULT_DETECTOR_LABEL_MAP
36
+
37
+ md_category_id_to_name = DEFAULT_DETECTOR_LABEL_MAP
38
+ md_category_name_to_id = invert_dictionary(md_category_id_to_name)
39
+
40
+ blank_prediction_string = \
41
+ 'f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank'
42
+ no_cv_result_prediction_string = \
43
+ 'f2efdae9-efb8-48fb-8a91-eccf79ab4ffb;no cv result;no cv result;no cv result;no cv result;no cv result;no cv result'
44
+ animal_prediction_string = \
45
+ '1f689929-883d-4dae-958c-3d57ab5b6c16;;;;;;animal'
46
+ human_prediction_string = \
47
+ '990ae9dd-7a59-4344-afcb-1b7b21368000;mammalia;primates;hominidae;homo;sapiens;human'
48
+ vehicle_prediction_string = \
49
+ 'e2895ed5-780b-48f6-8a11-9e27cb594511;;;;;;vehicle'
50
+
51
+ non_taxonomic_prediction_strings = [blank_prediction_string,
52
+ no_cv_result_prediction_string,
53
+ animal_prediction_string,
54
+ vehicle_prediction_string]
55
+
56
+ non_taxonomic_prediction_short_strings = [';'.join(s.split(';')[1:-1]) for s in \
57
+ non_taxonomic_prediction_strings]
58
+
59
+ # Ignore some files when generating instances.json from a folder
60
+ default_tokens_to_ignore = ['$RECYCLE.BIN']
61
+
62
+
63
+ #%% Miscellaneous taxonomy support functions
64
+
65
+ def is_valid_prediction_string(s):
66
+ """
67
+ Determine whether [s] is a valid WI prediction string. Prediction strings look like:
68
+
69
+ '90d950db-2106-4bd9-a4c1-777604c3eada;mammalia;rodentia;;;;rodent'
70
+
71
+ Args:
72
+ s (str): the string to be tested for validity
73
+
74
+ Returns:
75
+ bool: True if this looks more or less like a WI prediction string
76
+ """
77
+
78
+ # Note to self... don't get tempted to remove spaces here; spaces are used
79
+ # to indicate subspecies.
80
+ return isinstance(s,str) and (len(s.split(';')) == 7) and (s == s.lower())
81
+
82
+
83
+ def is_valid_taxonomy_string(s):
84
+ """
85
+ Determine whether [s] is a valid 5-token WI taxonomy string. Taxonomy strings
86
+ look like:
87
+
88
+ 'mammalia;rodentia;;;;rodent'
89
+ 'mammalia;chordata;canidae;canis;lupus dingo'
90
+
91
+ Args:
92
+ s (str): the string to be tested for validity
93
+
94
+ Returns:
95
+ bool: True if this looks more or less like a WI taxonomy string
96
+ """
97
+ return isinstance(s,str) and (len(s.split(';')) == 5) and (s == s.lower())
98
+
99
+
100
+ def clean_taxonomy_string(s, truncate_multiple_description_strings=True):
101
+ """
102
+ If [s] is a seven-token prediction string, trim the GUID and common name to produce
103
+ a "clean" taxonomy string. Else if [s] is a five-token string, return it. Else error.
104
+
105
+ Args:
106
+ s (str): the seven- or five-token taxonomy/prediction string to clean
107
+ truncate_multiple_description_strings (bool, optional): we use | to delimit
108
+ multiple descriptions in the same string; if this is True, clean and
109
+ return just the first, else error.
110
+
111
+ Returns:
112
+ str: the five-token taxonomy string
113
+ """
114
+
115
+ if truncate_multiple_description_strings:
116
+ tokens = s.split('|')
117
+ s = tokens[0]
118
+
119
+ if is_valid_taxonomy_string(s):
120
+ return s
121
+ elif is_valid_prediction_string(s):
122
+ tokens = s.split(';')
123
+ assert len(tokens) == 7
124
+ return ';'.join(tokens[1:-1])
125
+ else:
126
+ raise ValueError('Invalid taxonomy string')
127
+
128
+
129
+ taxonomy_level_names = \
130
+ ['non-taxonomic','kingdom','phylum','class','order','family','genus','species','subspecies']
131
+
132
+
133
+ def taxonomy_level_to_string(k):
134
+ """
135
+ Maps taxonomy level indices (0 for kindgom, 1 for phylum, etc.) to strings.
136
+
137
+ Args:
138
+ k (int): taxonomy level index
139
+
140
+ Returns:
141
+ str: taxonomy level string
142
+ """
143
+
144
+ assert k >= 0 and k < len(taxonomy_level_names), \
145
+ 'Illegal taxonomy level index {}'.format(k)
146
+
147
+ return taxonomy_level_names[k]
148
+
149
+
150
+ def taxonomy_level_string_to_index(s):
151
+ """
152
+ Maps strings ('kingdom', 'species', etc.) to level indices.
153
+
154
+ Args:
155
+ s (str): taxonomy level string
156
+
157
+ Returns:
158
+ int: taxonomy level index
159
+ """
160
+
161
+ assert s in taxonomy_level_names, 'Unrecognized taxonomy level string {}'.format(s)
162
+ return taxonomy_level_names.index(s)
163
+
164
+
165
+ def taxonomy_level_index(s):
166
+ """
167
+ Returns the taxonomy level up to which [s] is defined (0 for non-taxnomic, 1 for kingdom,
168
+ 2 for phylum, etc. Empty strings and non-taxonomic strings are treated as level 0. 1 and 2
169
+ will never be returned; "animal" doesn't look like other taxonomic strings, so here we treat
170
+ it as non-taxonomic.
171
+
172
+ Args:
173
+ s (str): 5-token or 7-token taxonomy string
174
+
175
+ Returns:
176
+ int: taxonomy level
177
+ """
178
+
179
+ if s in non_taxonomic_prediction_strings or s in non_taxonomic_prediction_short_strings:
180
+ return 0
181
+
182
+ tokens = s.split(';')
183
+ assert len(tokens) in (5,7)
184
+
185
+ if len(tokens) == 7:
186
+ tokens = tokens[1:-1]
187
+
188
+ # Anything without a class is considered non-taxonomic
189
+ if len(tokens[0]) == 0:
190
+ return 0
191
+
192
+ # WI taxonomy strings start at class, so we'll never return 1 (kingdom) or 2 (phylum)
193
+ elif len(tokens[1]) == 0:
194
+ return 3
195
+ elif len(tokens[2]) == 0:
196
+ return 4
197
+ elif len(tokens[3]) == 0:
198
+ return 5
199
+ elif len(tokens[4]) == 0:
200
+ return 6
201
+ # Subspecies are delimited with a space
202
+ elif ' ' not in tokens[4]:
203
+ return 7
204
+ else:
205
+ return 8
206
+
207
+
208
+ def is_taxonomic_prediction_string(s):
209
+ """
210
+ Determines whether [s] is a classification string that has taxonomic properties; this
211
+ does not include, e.g., blanks/vehicles/no cv result. It also excludes "animal".
212
+
213
+ Args:
214
+ s (str): a five- or seven-token taxonomic string
215
+
216
+ Returns:
217
+ bool: whether [s] is a taxonomic category
218
+ """
219
+
220
+ return (taxonomy_level_index(s) > 0)
221
+
222
+
223
+
224
+ def get_kingdom(prediction_string):
225
+ """
226
+ Return the kingdom field from a WI prediction string
227
+
228
+ Args:
229
+ prediction_string (str): a string in the semicolon-delimited prediction string format
230
+
231
+ Returns:
232
+ str: the kingdom field from the input string
233
+ """
234
+ tokens = prediction_string.split(';')
235
+ assert is_valid_prediction_string(prediction_string)
236
+ return tokens[1]
237
+
238
+
239
+ def is_human_classification(prediction_string):
240
+ """
241
+ Determines whether the input string represents a human classification, which includes a variety
242
+ of common names (hiker, person, etc.)
243
+
244
+ Args:
245
+ prediction_string (str): a string in the semicolon-delimited prediction string format
246
+
247
+ Returns:
248
+ bool: whether this string corresponds to a human category
249
+ """
250
+ return prediction_string == human_prediction_string or 'homo;sapiens' in prediction_string
251
+
252
+
253
+ def is_vehicle_classification(prediction_string):
254
+ """
255
+ Determines whether the input string represents a vehicle classification.
256
+
257
+ Args:
258
+ prediction_string (str): a string in the semicolon-delimited prediction string format
259
+
260
+ Returns:
261
+ bool: whether this string corresponds to the vehicle category
262
+ """
263
+ return prediction_string == vehicle_prediction_string
264
+
265
+
266
+ def is_animal_classification(prediction_string):
267
+ """
268
+ Determines whether the input string represents an animal classification, which excludes, e.g.,
269
+ humans, blanks, vehicles, unknowns
270
+
271
+ Args:
272
+ prediction_string (str): a string in the semicolon-delimited prediction string format
273
+
274
+ Returns:
275
+ bool: whether this string corresponds to an animal category
276
+ """
277
+
278
+ if prediction_string == animal_prediction_string:
279
+ return True
280
+ if prediction_string == human_prediction_string or 'homo;sapiens' in prediction_string:
281
+ return False
282
+ if prediction_string == blank_prediction_string:
283
+ return False
284
+ if prediction_string == no_cv_result_prediction_string:
285
+ return False
286
+ if len(get_kingdom(prediction_string)) == 0:
287
+ return False
288
+ return True
289
+
290
+
291
+ def taxonomy_info_to_taxonomy_string(taxonomy_info, include_taxon_id_and_common_name=False):
292
+ """
293
+ Convert a taxonomy record in dict format to a five- or seven-token semicolon-delimited string
294
+
295
+ Args:
296
+ taxonomy_info (dict): dict in the format stored in, e.g., taxonomy_string_to_taxonomy_info
297
+ include_taxon_id_and_common_name (bool, optional): by default, this function returns a
298
+ five-token string of latin names; if this argument is True, it includes the leading
299
+ (GUID) and trailing (common name) tokens
300
+
301
+ Returns:
302
+ str: string in the format used as keys in, e.g., taxonomy_string_to_taxonomy_info
303
+ """
304
+ s = taxonomy_info['class'] + ';' + \
305
+ taxonomy_info['order'] + ';' + \
306
+ taxonomy_info['family'] + ';' + \
307
+ taxonomy_info['genus'] + ';' + \
308
+ taxonomy_info['species']
309
+
310
+ if include_taxon_id_and_common_name:
311
+ s = taxonomy_info['taxon_id'] + ';' + s + ';' + taxonomy_info['common_name']
312
+
313
+ return s
314
+
315
+
316
+ #%% Functions used to manipulate results files
317
+
318
+ def generate_whole_image_detections_for_classifications(classifications_json_file,
319
+ detections_json_file,
320
+ ensemble_json_file=None,
321
+ ignore_blank_classifications=True,
322
+ verbose=True):
323
+ """
324
+ Given a set of classification results in SpeciesNet format that were likely run on
325
+ already-cropped images, generate a file of [fake] detections in SpeciesNet format in which each
326
+ image is covered in a single whole-image detection.
327
+
328
+ Args:
329
+ classifications_json_file (str): SpeciesNet-formatted file containing classifications
330
+ detections_json_file (str): SpeciesNet-formatted file to write with detections
331
+ ensemble_json_file (str, optional): SpeciesNet-formatted file to write with detections
332
+ and classfications
333
+ ignore_blank_classifications (bool, optional): use non-top classifications when
334
+ the top classification is "blank" or "no CV result"
335
+ verbose (bool, optional): enable additional debug output
336
+
337
+ Returns:
338
+ dict: the contents of [detections_json_file]
339
+ """
340
+
341
+ with open(classifications_json_file,'r') as f:
342
+ classification_results = json.load(f)
343
+ predictions = classification_results['predictions']
344
+
345
+ output_predictions = []
346
+ ensemble_predictions = []
347
+
348
+ # i_prediction = 0; prediction = predictions[i_prediction]
349
+ for i_prediction,prediction in enumerate(predictions):
350
+
351
+ output_prediction = {}
352
+ output_prediction['filepath'] = prediction['filepath']
353
+ i_score = 0
354
+
355
+ if ignore_blank_classifications:
356
+
357
+ while (prediction['classifications']['classes'][i_score] in \
358
+ (blank_prediction_string,no_cv_result_prediction_string)):
359
+
360
+ i_score += 1
361
+ if (i_score >= len(prediction['classifications']['classes'])):
362
+
363
+ if verbose:
364
+
365
+ print('Ignoring blank classifications, but ' + \
366
+ 'image {} has no non-blank values'.format(
367
+ i_prediction))
368
+
369
+ # Just use the first one
370
+ i_score = 0
371
+ break
372
+
373
+ # ...if we passed the last prediction
374
+
375
+ # ...iterate over classes within this prediction
376
+
377
+ # ...if we're supposed to ignore blank classifications
378
+
379
+ top_classification = prediction['classifications']['classes'][i_score]
380
+ top_classification_score = prediction['classifications']['scores'][i_score]
381
+ if is_animal_classification(top_classification):
382
+ category_name = 'animal'
383
+ elif is_human_classification(top_classification):
384
+ category_name = 'human'
385
+ else:
386
+ category_name = 'vehicle'
387
+
388
+ if category_name == 'human':
389
+ md_category_name = 'person'
390
+ else:
391
+ md_category_name = category_name
392
+
393
+ output_detection = {}
394
+ output_detection['label'] = category_name
395
+ output_detection['category'] = md_category_name_to_id[md_category_name]
396
+ output_detection['conf'] = 1.0
397
+ output_detection['bbox'] = [0.0, 0.0, 1.0, 1.0]
398
+ output_prediction['detections'] = [output_detection]
399
+ output_predictions.append(output_prediction)
400
+
401
+ ensemble_prediction = {}
402
+ ensemble_prediction['filepath'] = prediction['filepath']
403
+ ensemble_prediction['detections'] = [output_detection]
404
+ ensemble_prediction['prediction'] = top_classification
405
+ ensemble_prediction['prediction_score'] = top_classification_score
406
+ ensemble_prediction['prediction_source'] = 'fake_ensemble_file_utility'
407
+ ensemble_prediction['classifications'] = prediction['classifications']
408
+ ensemble_predictions.append(ensemble_prediction)
409
+
410
+ # ...for each image
411
+
412
+ ## Write output
413
+
414
+ if ensemble_json_file is not None:
415
+
416
+ ensemble_output_data = {'predictions':ensemble_predictions}
417
+ with open(ensemble_json_file,'w') as f:
418
+ json.dump(ensemble_output_data,f,indent=1)
419
+ _ = validate_predictions_file(ensemble_json_file)
420
+
421
+ output_data = {'predictions':output_predictions}
422
+ with open(detections_json_file,'w') as f:
423
+ json.dump(output_data,f,indent=1)
424
+ return validate_predictions_file(detections_json_file)
425
+
426
+ # ...def generate_whole_image_detections_for_classifications(...)
427
+
428
+
429
+ def generate_md_results_from_predictions_json(predictions_json_file,
430
+ md_results_file=None,
431
+ base_folder=None,
432
+ max_decimals=5,
433
+ convert_human_to_person=True,
434
+ convert_homo_species_to_human=True,
435
+ verbose=False):
436
+ """
437
+ Generate an MD-formatted .json file from a predictions.json file, generated by the
438
+ SpeciesNet ensemble. Typically, MD results files use relative paths, and predictions.json
439
+ files use absolute paths, so this function optionally removes the leading string
440
+ [base_folder] from all file names.
441
+
442
+ Uses the classification from the "prediction" field if it's available, otherwise
443
+ uses the "classifications" field.
444
+
445
+ When using the "prediction" field, records the top class in the "classifications" field to
446
+ a field in each image called "top_classification_common_name". This is often different
447
+ from the value of the "prediction" field.
448
+
449
+ speciesnet_to_md.py is a command-line driver for this function.
450
+
451
+ Args:
452
+ predictions_json_file (str): path to a predictions.json file, or a dict
453
+ md_results_file (str, optional): path to which we should write an MD-formatted .json file
454
+ base_folder (str, optional): leading string to remove from each path in the
455
+ predictions.json file
456
+ max_decimals (int, optional): number of decimal places to which we should round
457
+ all values
458
+ convert_human_to_person (bool, optional): WI predictions.json files sometimes use the
459
+ detection category "human"; MD files usually use "person". If True, switches "human"
460
+ to "person".
461
+ convert_homo_species_to_human (bool, optional): the ensemble often rolls human predictions
462
+ up to "homo species", which isn't wrong, but looks odd. This forces these back to
463
+ "homo sapiens".
464
+ verbose (bool, optional): enable additional debug output
465
+
466
+ Returns:
467
+ dict: results in MD format
468
+ """
469
+
470
+ # Read predictions file
471
+ if isinstance(predictions_json_file,str):
472
+ with open(predictions_json_file,'r') as f:
473
+ predictions = json.load(f)
474
+ else:
475
+ assert isinstance(predictions_json_file,dict)
476
+ predictions = predictions_json_file
477
+
478
+ # Round floating-point values (confidence scores, coordinates) to a
479
+ # reasonable number of decimal places
480
+ if (max_decimals is not None) and (max_decimals > 0):
481
+ round_floats_in_nested_dict(predictions, decimal_places=max_decimals)
482
+
483
+ predictions = predictions['predictions']
484
+ assert isinstance(predictions,list)
485
+
486
+ # Convert backslashes to forward slashes in both filenames and the base folder string
487
+ for im in predictions:
488
+ im['filepath'] = im['filepath'].replace('\\','/')
489
+ if base_folder is not None:
490
+ base_folder = base_folder.replace('\\','/')
491
+
492
+ detection_category_id_to_name = {}
493
+ classification_category_name_to_id = {}
494
+
495
+ # Keep track of detections that don't have an assigned detection category; these
496
+ # are fake detections we create for non-blank images with non-empty detection lists.
497
+ # We need to go back later and give them a legitimate detection category ID.
498
+ all_unknown_detections = []
499
+
500
+ # Create the output images list
501
+ images_out = []
502
+
503
+ base_folder_replacements = 0
504
+
505
+ # im_in = predictions[0]
506
+ for im_in in predictions:
507
+
508
+ im_out = {}
509
+
510
+ fn = im_in['filepath']
511
+ if base_folder is not None:
512
+ if fn.startswith(base_folder):
513
+ base_folder_replacements += 1
514
+ fn = fn.replace(base_folder,'',1)
515
+
516
+ im_out['file'] = fn
517
+
518
+ if 'failures' in im_in:
519
+
520
+ im_out['failure'] = str(im_in['failures'])
521
+ im_out['detections'] = None
522
+
523
+ else:
524
+
525
+ im_out['detections'] = []
526
+
527
+ if 'detections' in im_in:
528
+
529
+ if len(im_in['detections']) == 0:
530
+ im_out['detections'] = []
531
+ else:
532
+ # det_in = im_in['detections'][0]
533
+ for det_in in im_in['detections']:
534
+ det_out = {}
535
+ if det_in['category'] in detection_category_id_to_name:
536
+ assert detection_category_id_to_name[det_in['category']] == det_in['label']
537
+ else:
538
+ detection_category_id_to_name[det_in['category']] = det_in['label']
539
+ det_out = {}
540
+ for s in ['category','conf','bbox']:
541
+ det_out[s] = det_in[s]
542
+ im_out['detections'].append(det_out)
543
+
544
+ # ...if detections are present
545
+
546
+ class_to_assign = None
547
+ class_confidence = None
548
+ top_classification_common_name = None
549
+
550
+ if 'classifications' in im_in:
551
+
552
+ classifications = im_in['classifications']
553
+ assert len(classifications['scores']) == len(classifications['classes'])
554
+ assert is_list_sorted(classifications['scores'],reverse=True)
555
+ class_to_assign = classifications['classes'][0]
556
+ class_confidence = classifications['scores'][0]
557
+
558
+ tokens = class_to_assign.split(';')
559
+ assert len(tokens) == 7
560
+ top_classification_common_name = tokens[-1]
561
+ if len(top_classification_common_name) == 0:
562
+ top_classification_common_name = 'undefined'
563
+
564
+ if 'prediction' in im_in:
565
+
566
+ class_to_assign = None
567
+ im_out['top_classification_common_name'] = top_classification_common_name
568
+ class_to_assign = im_in['prediction']
569
+ if convert_homo_species_to_human and class_to_assign.endswith('homo species'):
570
+ class_to_assign = human_prediction_string
571
+ class_confidence = im_in['prediction_score']
572
+
573
+ if class_to_assign is not None:
574
+
575
+ if class_to_assign == blank_prediction_string:
576
+
577
+ # We have a blank prediction with detections present. For now, don't do anything
578
+ # special here, just making a note of this, in case I want to handle this differently
579
+ # later.
580
+ if len(im_out['detections']) > 0:
581
+ pass
582
+
583
+ else:
584
+
585
+ assert not class_to_assign.endswith('blank')
586
+
587
+ # This is a scenario that's not captured well by the MD format: no detections present,
588
+ # but a non-blank prediction. For now, create a fake detection to handle this prediction.
589
+ if len(im_out['detections']) == 0:
590
+
591
+ if verbose:
592
+ print('Warning: creating fake detection for non-blank whole-image classification' + \
593
+ ' in {}'.format(im_in['file']))
594
+ det_out = {}
595
+ all_unknown_detections.append(det_out)
596
+
597
+ # We will change this to a string-int later
598
+ det_out['category'] = 'unknown'
599
+ det_out['conf'] = class_confidence
600
+ det_out['bbox'] = [0,0,1,1]
601
+ im_out['detections'].append(det_out)
602
+
603
+ # ...if this is/isn't a blank classification
604
+
605
+ # Attach that classification to each detection
606
+
607
+ # Create a new category ID if necessary
608
+ if class_to_assign in classification_category_name_to_id:
609
+ classification_category_id = classification_category_name_to_id[class_to_assign]
610
+ else:
611
+ classification_category_id = str(len(classification_category_name_to_id))
612
+ classification_category_name_to_id[class_to_assign] = classification_category_id
613
+
614
+ for det in im_out['detections']:
615
+ det['classifications'] = []
616
+ det['classifications'].append([classification_category_id,class_confidence])
617
+
618
+ # ...if we have some type of classification for this image
619
+
620
+ # ...if this is/isn't a failure
621
+
622
+ images_out.append(im_out)
623
+
624
+ # ...for each image
625
+
626
+ if base_folder is not None:
627
+ if base_folder_replacements == 0:
628
+ print('Warning: you supplied {} as the base folder, but I made zero replacements'.format(
629
+ base_folder))
630
+
631
+ # Fix the 'unknown' category
632
+ if len(all_unknown_detections) > 0:
633
+
634
+ if len(detection_category_id_to_name) == 0:
635
+ max_detection_category_id = -1
636
+ else:
637
+ max_detection_category_id = max([int(x) for x in detection_category_id_to_name.keys()])
638
+ unknown_category_id = str(max_detection_category_id + 1)
639
+ detection_category_id_to_name[unknown_category_id] = 'unknown'
640
+
641
+ for det in all_unknown_detections:
642
+ assert det['category'] == 'unknown'
643
+ det['category'] = unknown_category_id
644
+
645
+
646
+ # Sort by filename
647
+
648
+ images_out = sort_list_of_dicts_by_key(images_out,'file')
649
+
650
+ # Prepare friendly classification names
651
+
652
+ classification_category_descriptions = \
653
+ invert_dictionary(classification_category_name_to_id)
654
+ classification_categories_out = {}
655
+ for category_id in classification_category_descriptions.keys():
656
+ category_name = classification_category_descriptions[category_id].split(';')[-1]
657
+ classification_categories_out[category_id] = category_name
658
+
659
+ # Prepare the output dict
660
+
661
+ detection_categories_out = detection_category_id_to_name
662
+ info = {}
663
+ info['format_version'] = 1.4
664
+ info['detector'] = 'converted_from_predictions_json'
665
+
666
+ if convert_human_to_person:
667
+ for k in detection_categories_out.keys():
668
+ if detection_categories_out[k] == 'human':
669
+ detection_categories_out[k] = 'person'
670
+
671
+ output_dict = {}
672
+ output_dict['info'] = info
673
+ output_dict['detection_categories'] = detection_categories_out
674
+ output_dict['classification_categories'] = classification_categories_out
675
+ output_dict['classification_category_descriptions'] = classification_category_descriptions
676
+ output_dict['images'] = images_out
677
+
678
+ if md_results_file is not None:
679
+ with open(md_results_file,'w') as f:
680
+ json.dump(output_dict,f,indent=1)
681
+
682
+ validation_options = ValidateBatchResultsOptions()
683
+ validation_options.raise_errors = True
684
+ _ = validate_batch_results(md_results_file, options=validation_options)
685
+
686
+ return output_dict
687
+
688
+ # ...def generate_md_results_from_predictions_json(...)
689
+
690
+
691
+ def generate_predictions_json_from_md_results(md_results_file,
692
+ predictions_json_file,
693
+ base_folder=None):
694
+ """
695
+ Generate a predictions.json file from the MD-formatted .json file [md_results_file]. Typically,
696
+ MD results files use relative paths, and predictions.json files use absolute paths, so
697
+ this function optionally prepends [base_folder]. Does not handle classification results in
698
+ MD format, since this is intended to prepare data for passing through the WI classifier.
699
+
700
+ md_to_wi.py is a command-line driver for this function.
701
+
702
+ Args:
703
+ md_results_file (str): path to an MD-formatted .json file
704
+ predictions_json_file (str): path to which we should write a predictions.json file
705
+ base_folder (str, optional): folder name to prepend to each path in md_results_file,
706
+ to convert relative paths to absolute paths.
707
+ """
708
+
709
+ # Validate the input file
710
+ validation_options = ValidateBatchResultsOptions()
711
+ validation_options.raise_errors = True
712
+ validation_options.return_data = True
713
+ md_results = validate_batch_results(md_results_file, options=validation_options)
714
+ category_id_to_name = md_results['detection_categories']
715
+
716
+ output_dict = {}
717
+ output_dict['predictions'] = []
718
+
719
+ # im = md_results['images'][0]
720
+ for im in md_results['images']:
721
+
722
+ prediction = {}
723
+ fn = im['file']
724
+ if base_folder is not None:
725
+ fn = os.path.join(base_folder,fn)
726
+ fn = fn.replace('\\','/')
727
+ prediction['filepath'] = fn
728
+ if 'failure' in im and im['failure'] is not None:
729
+ prediction['failures'] = ['DETECTOR']
730
+ else:
731
+ assert 'detections' in im and im['detections'] is not None
732
+ detections = []
733
+ for det in im['detections']:
734
+ output_det = deepcopy(det)
735
+ output_det['label'] = category_id_to_name[det['category']]
736
+ detections.append(output_det)
737
+
738
+ # detections *must* be sorted in descending order by confidence
739
+ detections = sort_list_of_dicts_by_key(detections,'conf', reverse=True)
740
+ prediction['detections'] = detections
741
+
742
+ assert len(prediction.keys()) >= 2
743
+ output_dict['predictions'].append(prediction)
744
+
745
+ # ...for each image
746
+
747
+ output_dir = os.path.dirname(predictions_json_file)
748
+ if len(output_dir) > 0:
749
+ os.makedirs(output_dir,exist_ok=True)
750
+ with open(predictions_json_file,'w') as f:
751
+ json.dump(output_dict,f,indent=1)
752
+
753
+ # ...def generate_predictions_json_from_md_results(...)
754
+
755
+
756
+ def generate_instances_json_from_folder(folder,
757
+ country=None,
758
+ admin1_region=None,
759
+ lat=None,
760
+ lon=None,
761
+ output_file=None,
762
+ filename_replacements=None,
763
+ tokens_to_ignore=default_tokens_to_ignore):
764
+ """
765
+ Generate an instances.json record that contains all images in [folder], optionally
766
+ including location information, in a format suitable for run_model.py. Optionally writes
767
+ the results to [output_file].
768
+
769
+ Args:
770
+ folder (str): the folder to recursively search for images
771
+ country (str, optional): a three-letter country code
772
+ admin1_region (str, optional): an administrative region code, typically a two-letter
773
+ US state code
774
+ lat (float, optional): latitude to associate with all images
775
+ lon (float, optional): longitude to associate with all images
776
+ output_file (str, optional): .json file to which we should write instance records
777
+ filename_replacements (dict, optional): str --> str dict indicating filename substrings
778
+ that should be replaced with other strings. Replacement occurs *after* converting
779
+ backslashes to forward slashes.
780
+ tokens_to_ignore (list, optional): ignore any images with these tokens in their
781
+ names, typically used to avoid $RECYCLE.BIN. Can be None.
782
+
783
+ Returns:
784
+ dict: dict with at least the field "instances"
785
+ """
786
+
787
+ assert os.path.isdir(folder)
788
+
789
+ print('Enumerating images in {}'.format(folder))
790
+ image_files_abs = find_images(folder,recursive=True,return_relative_paths=False)
791
+
792
+ if tokens_to_ignore is not None:
793
+ n_images_before_ignore_tokens = len(image_files_abs)
794
+ for token in tokens_to_ignore:
795
+ image_files_abs = [fn for fn in image_files_abs if token not in fn]
796
+ print('After ignoring {} tokens, kept {} of {} images'.format(
797
+ len(tokens_to_ignore),len(image_files_abs),n_images_before_ignore_tokens))
798
+
799
+ instances = []
800
+
801
+ # image_fn_abs = image_files_abs[0]
802
+ for image_fn_abs in image_files_abs:
803
+ instance = {}
804
+ instance['filepath'] = image_fn_abs.replace('\\','/')
805
+ if filename_replacements is not None:
806
+ for s in filename_replacements:
807
+ instance['filepath'] = instance['filepath'].replace(s,filename_replacements[s])
808
+ if country is not None:
809
+ instance['country'] = country
810
+ if admin1_region is not None:
811
+ instance['admin1_region'] = admin1_region
812
+ if lat is not None:
813
+ assert lon is not None, 'Latitude provided without longitude'
814
+ instance['latitude'] = lat
815
+ if lon is not None:
816
+ assert lat is not None, 'Longitude provided without latitude'
817
+ instance['longitude'] = lon
818
+ instances.append(instance)
819
+
820
+ to_return = {'instances':instances}
821
+
822
+ if output_file is not None:
823
+ output_dir = os.path.dirname(output_file)
824
+ if len(output_dir) > 0:
825
+ os.makedirs(output_dir,exist_ok=True)
826
+ with open(output_file,'w') as f:
827
+ json.dump(to_return,f,indent=1)
828
+
829
+ return to_return
830
+
831
+ # ...def generate_instances_json_from_folder(...)
832
+
833
+
834
+ def split_instances_into_n_batches(instances_json,n_batches,output_files=None):
835
+ """
836
+ Given an instances.json file, split it into batches of equal size.
837
+
838
+ Args:
839
+ instances_json (str): input .json file in
840
+ n_batches (int): number of new files to generate
841
+ output_files (list, optional): output .json files for each
842
+ batch. If supplied, should have length [n_batches]. If not
843
+ supplied, filenames will be generated based on [instances_json].
844
+
845
+ Returns:
846
+ list: list of output files that were written; identical to [output_files]
847
+ if it was supplied as input.
848
+ """
849
+
850
+ with open(instances_json,'r') as f:
851
+ instances = json.load(f)
852
+ assert isinstance(instances,dict) and 'instances' in instances
853
+ instances = instances['instances']
854
+
855
+ if output_files is not None:
856
+ assert len(output_files) == n_batches, \
857
+ 'Expected {} output files, received {}'.format(
858
+ n_batches,len(output_files))
859
+ else:
860
+ output_files = []
861
+ for i_batch in range(0,n_batches):
862
+ batch_string = 'batch_{}'.format(str(i_batch).zfill(3))
863
+ output_files.append(insert_before_extension(instances_json,batch_string))
864
+
865
+ batches = split_list_into_n_chunks(instances, n_batches)
866
+
867
+ for i_batch,batch in enumerate(batches):
868
+ batch_dict = {'instances':batch}
869
+ with open(output_files[i_batch],'w') as f:
870
+ json.dump(batch_dict,f,indent=1)
871
+
872
+ print('Wrote {} batches to file'.format(n_batches))
873
+
874
+ return output_files
875
+
876
+ # ...def split_instances_into_n_batches(...)
877
+
878
+
879
+ def merge_prediction_json_files(input_prediction_files,output_prediction_file):
880
+ """
881
+ Merge all predictions.json files in [files] into a single .json file.
882
+
883
+ Args:
884
+ input_prediction_files (list): list of predictions.json files to merge
885
+ output_prediction_file (str): output .json file
886
+ """
887
+
888
+ predictions = []
889
+ image_filenames_processed = set()
890
+
891
+ # input_json_fn = input_prediction_files[0]
892
+ for input_json_fn in tqdm(input_prediction_files):
893
+
894
+ assert os.path.isfile(input_json_fn), \
895
+ 'Could not find prediction file {}'.format(input_json_fn)
896
+ with open(input_json_fn,'r') as f:
897
+ results_this_file = json.load(f)
898
+ assert isinstance(results_this_file,dict)
899
+ predictions_this_file = results_this_file['predictions']
900
+ for prediction in predictions_this_file:
901
+ image_fn = prediction['filepath']
902
+ assert image_fn not in image_filenames_processed
903
+ predictions.extend(predictions_this_file)
904
+
905
+ output_dict = {'predictions':predictions}
906
+
907
+ output_dir = os.path.dirname(output_prediction_file)
908
+ if len(output_dir) > 0:
909
+ os.makedirs(output_dir,exist_ok=True)
910
+ with open(output_prediction_file,'w') as f:
911
+ json.dump(output_dict,f,indent=1)
912
+
913
+ # ...def merge_prediction_json_files(...)
914
+
915
+
916
+ def load_md_or_speciesnet_file(fn,verbose=True):
917
+ """
918
+ Load a .json file that may be in MD or SpeciesNet format. Typically used so
919
+ SpeciesNet files can be supplied to functions originally written to support MD
920
+ format.
921
+
922
+ Args:
923
+ fn (str): a .json file in predictions.json (MD or SpeciesNet) format
924
+ verbose (bool, optional): enable additional debug output
925
+
926
+ Returns:
927
+ dict: the contents of [fn], in MD format.
928
+ """
929
+
930
+ with open(fn,'r') as f:
931
+ detector_output = json.load(f)
932
+
933
+ # If this is a SpeciesNet file, convert to MD format
934
+ if 'predictions' in detector_output:
935
+
936
+ if verbose:
937
+ print('This appears to be a SpeciesNet output file, converting to MD format')
938
+ detector_output = generate_md_results_from_predictions_json(predictions_json_file=fn,
939
+ md_results_file=None,
940
+ base_folder=None)
941
+
942
+ # ...if this is a SpeciesNet file
943
+
944
+ assert 'images' in detector_output, \
945
+ 'Detector output file should be a json file with an "images" field.'
946
+
947
+ return detector_output
948
+
949
+ # ...def load_md_or_speciesnet_file(...)
950
+
951
+
952
+ def validate_predictions_file(fn,instances=None,verbose=True):
953
+ """
954
+ Validate the predictions.json file [fn].
955
+
956
+ Args:
957
+ fn (str): a .json file in predictions.json (SpeciesNet) format
958
+ instances (str or list, optional): a folder, instances.json file,
959
+ or dict loaded from an instances.json file. If supplied, this
960
+ function will verify that [fn] contains the same number of
961
+ images as [instances].
962
+ verbose (bool, optional): enable additional debug output
963
+
964
+ Returns:
965
+ dict: the contents of [fn]
966
+ """
967
+
968
+ with open(fn,'r') as f:
969
+ d = json.load(f)
970
+ predictions = d['predictions']
971
+
972
+ failures = []
973
+
974
+ for im in predictions:
975
+ if 'failures' in im:
976
+ failures.append(im)
977
+
978
+ if verbose:
979
+ print('Read predictions for {} images, with {} failure(s)'.format(
980
+ len(d['predictions']),len(failures)))
981
+
982
+ if instances is not None:
983
+
984
+ if isinstance(instances,str):
985
+ if os.path.isdir(instances):
986
+ instances = generate_instances_json_from_folder(folder=instances)
987
+ elif os.path.isfile(instances):
988
+ with open(instances,'r') as f:
989
+ instances = json.load(f)
990
+ else:
991
+ raise ValueError('Could not find instances file/folder {}'.format(
992
+ instances))
993
+ assert isinstance(instances,dict)
994
+ assert 'instances' in instances
995
+ instances = instances['instances']
996
+ if verbose:
997
+ print('Expected results for {} files'.format(len(instances)))
998
+ assert len(instances) == len(predictions), \
999
+ '{} instances expected, {} found'.format(
1000
+ len(instances),len(predictions))
1001
+
1002
+ expected_files = set([instance['filepath'] for instance in instances])
1003
+ found_files = set([prediction['filepath'] for prediction in predictions])
1004
+ assert expected_files == found_files
1005
+
1006
+ # ...if a list of instances was supplied
1007
+
1008
+ return d
1009
+
1010
+ # ...def validate_predictions_file(...)
1011
+
1012
+
1013
+ #%% Functions related to geofencing
1014
+
1015
+ def find_geofence_adjustments(ensemble_json_file,use_latin_names=False):
1016
+ """
1017
+ Count the number of instances of each unique change made by the geofence.
1018
+
1019
+ Args:
1020
+ ensemble_json_file (str): SpeciesNet-formatted .json file produced
1021
+ by the full ensemble.
1022
+ use_latin_names (bool, optional): return a mapping using binomial names
1023
+ rather than common names.
1024
+
1025
+ Returns:
1026
+ dict: maps strings that look like "puma,felidae family" to integers,
1027
+ where that entry would indicate the number of times that "puma" was
1028
+ predicted, but mapped to family level by the geofence. Sorted in
1029
+ descending order by count.
1030
+ """
1031
+
1032
+ # Load and validate ensemble results
1033
+ ensemble_results = validate_predictions_file(ensemble_json_file)
1034
+
1035
+ assert isinstance(ensemble_results,dict)
1036
+ predictions = ensemble_results['predictions']
1037
+
1038
+ # Maps comma-separated pairs of common names (or binomial names) to
1039
+ # the number of times that transition (first --> second) happened
1040
+ rollup_pair_to_count = defaultdict(int)
1041
+
1042
+ # prediction = predictions[0]
1043
+ for prediction in tqdm(predictions):
1044
+
1045
+ if 'failures' in prediction and \
1046
+ prediction['failures'] is not None and \
1047
+ len(prediction['failures']) > 0:
1048
+ continue
1049
+
1050
+ assert 'prediction_source' in prediction, \
1051
+ 'Prediction present without [prediction_source] field, are you sure this ' + \
1052
+ 'is an ensemble output file?'
1053
+
1054
+ if 'geofence' in prediction['prediction_source']:
1055
+
1056
+ classification_taxonomy_string = \
1057
+ prediction['classifications']['classes'][0]
1058
+ prediction_taxonomy_string = prediction['prediction']
1059
+ assert is_valid_prediction_string(classification_taxonomy_string)
1060
+ assert is_valid_prediction_string(prediction_taxonomy_string)
1061
+
1062
+ # Typical examples:
1063
+ # '86f5b978-4f30-40cc-bd08-be9e3fba27a0;mammalia;rodentia;sciuridae;sciurus;carolinensis;eastern gray squirrel'
1064
+ # 'e4d1e892-0e4b-475a-a8ac-b5c3502e0d55;mammalia;rodentia;sciuridae;;;sciuridae family'
1065
+ classification_common_name = classification_taxonomy_string.split(';')[-1]
1066
+ prediction_common_name = prediction_taxonomy_string.split(';')[-1]
1067
+ classification_binomial_name = classification_taxonomy_string.split(';')[-2]
1068
+ prediction_binomial_name = prediction_taxonomy_string.split(';')[-2]
1069
+
1070
+ input_name = classification_binomial_name if use_latin_names else \
1071
+ classification_common_name
1072
+ output_name = prediction_binomial_name if use_latin_names else \
1073
+ prediction_common_name
1074
+
1075
+ rollup_pair = input_name.strip() + ',' + output_name.strip()
1076
+ rollup_pair_to_count[rollup_pair] += 1
1077
+
1078
+ # ...if we made a geofencing change
1079
+
1080
+ # ...for each prediction
1081
+
1082
+ rollup_pair_to_count = sort_dictionary_by_value(rollup_pair_to_count,reverse=True)
1083
+
1084
+ return rollup_pair_to_count
1085
+
1086
+ # ...def find_geofence_adjustments(...)
1087
+
1088
+
1089
+ def generate_geofence_adjustment_html_summary(rollup_pair_to_count,min_count=10):
1090
+ """
1091
+ Given a list of geofence rollups, likely generated by find_geofence_adjustments,
1092
+ generate an HTML summary of the changes made by geofencing. The resulting HTML
1093
+ is wrapped in <div>, but not, for example, in <html> or <body>.
1094
+
1095
+ Args:
1096
+ rollup_pair_to_count (dict): list of changes made by geofencing, see
1097
+ find_geofence_adjustments for details
1098
+ min_count (int, optional): minimum number of changes a pair needs in order
1099
+ to be included in the report.
1100
+ """
1101
+
1102
+ geofence_footer = ''
1103
+
1104
+ # Restrict to the list of taxa that were impacted by geofencing
1105
+ rollup_pair_to_count = \
1106
+ {key: value for key, value in rollup_pair_to_count.items() if value >= min_count}
1107
+
1108
+ # rollup_pair_to_count is sorted in descending order by count
1109
+ assert is_list_sorted(list(rollup_pair_to_count.values()),reverse=True)
1110
+
1111
+ if len(rollup_pair_to_count) > 0:
1112
+
1113
+ geofence_footer = \
1114
+ '<h3>Geofence changes that occurred more than {} times</h3>\n'.format(min_count)
1115
+ geofence_footer += '<div class="contentdiv">\n'
1116
+
1117
+ print('\nRollup changes with count > {}:'.format(min_count))
1118
+ for rollup_pair in rollup_pair_to_count.keys():
1119
+ count = rollup_pair_to_count[rollup_pair]
1120
+ rollup_pair_s = rollup_pair.replace(',',' --> ')
1121
+ print('{}: {}'.format(rollup_pair_s,count))
1122
+ rollup_pair_html = rollup_pair.replace(',',' &rarr; ')
1123
+ geofence_footer += '{} ({})<br/>\n'.format(rollup_pair_html,count)
1124
+
1125
+ geofence_footer += '</div>\n'
1126
+
1127
+ return geofence_footer
1128
+
1129
+ # ...def generate_geofence_adjustment_html_summary(...)
1130
+
1131
+
1132
+ #%% TaxonomyHandler class
1133
+
1134
+ class TaxonomyHandler:
1135
+ """
1136
+ Handler for taxonomy mapping and geofencing operations.
1137
+ """
1138
+
1139
+ def __init__(self, taxonomy_file, geofencing_file, country_code_file):
1140
+ """
1141
+ Initialize TaxonomyHandler with taxonomy information.
1142
+
1143
+ Args:
1144
+ taxonomy_file (str): .csv file containing the SpeciesNet (or WI) taxonomy,
1145
+ as seven-token taxonomic specifiers. Distributed with the SpeciesNet model.
1146
+ geofencing_file (str): .json file containing the SpeciesNet geofencing rules.
1147
+ Distributed with the SpeciesNet model.
1148
+ country_code_file: .csv file mapping country codes to names. Should include columns
1149
+ called "name" and "alpha-3". A compatible file is available at
1150
+ https://github.com/lukes/ISO-3166-Countries-with-Regional-Codes
1151
+ """
1152
+
1153
+ #: Maps a taxonomy string (e.g. mammalia;cetartiodactyla;cervidae;odocoileus;virginianus) to
1154
+ #: a dict with keys taxon_id, common_name, kingdom, phylum, class, order, family, genus, species
1155
+ self.taxonomy_string_to_taxonomy_info = None
1156
+
1157
+ #: Maps a binomial name (one, two, or three ws-delimited tokens) to the same dict described above.
1158
+ self.binomial_name_to_taxonomy_info = None
1159
+
1160
+ #: Maps a common name to the same dict described above
1161
+ self.common_name_to_taxonomy_info = None
1162
+
1163
+ #: Dict mapping 5-token semicolon-delimited taxonomy strings to geofencing rules
1164
+ self.taxonomy_string_to_geofencing_rules = None
1165
+
1166
+ #: Maps lower-case country names to upper-case country codes
1167
+ self.country_to_country_code = None
1168
+
1169
+ #: Maps upper-case country codes to lower-case country names
1170
+ self.country_code_to_country = None
1171
+
1172
+ self._load_taxonomy_info(taxonomy_file=taxonomy_file)
1173
+ self._initialize_geofencing(geofencing_file=geofencing_file,
1174
+ country_code_file=country_code_file)
1175
+
1176
+
1177
+ def _load_taxonomy_info(self, taxonomy_file):
1178
+ """
1179
+ Load WI/SpeciesNet taxonomy information from a .csv file. Stores information in the
1180
+ instance dicts [taxonomy_string_to_taxonomy_info], [binomial_name_to_taxonomy_info],
1181
+ and [common_name_to_taxonomy_info].
1182
+
1183
+ Args:
1184
+ taxonomy_file (str): .csv file containing the SpeciesNet (or WI) taxonomy,
1185
+ as seven-token taxonomic specifiers. Distributed with the SpeciesNet model.
1186
+ """
1187
+
1188
+ """
1189
+ Taxonomy keys are five-token taxonomy strings, e.g.:
1190
+
1191
+ 'mammalia;cetartiodactyla;cervidae;odocoileus;virginianus'
1192
+
1193
+ Taxonomy values are seven-token strings w/Taxon IDs and common names, e.g.:
1194
+
1195
+ '5c7ce479-8a45-40b3-ae21-7c97dfae22f5;mammalia;cetartiodactyla;cervidae;odocoileus;virginianus;white-tailed deer'
1196
+ """
1197
+
1198
+ with open(taxonomy_file,'r') as f:
1199
+ taxonomy_lines = f.readlines()
1200
+ taxonomy_lines = [s.strip() for s in taxonomy_lines]
1201
+
1202
+ self.taxonomy_string_to_taxonomy_info = {}
1203
+ self.binomial_name_to_taxonomy_info = {}
1204
+ self.common_name_to_taxonomy_info = {}
1205
+
1206
+ five_token_string_to_seven_token_string = {}
1207
+
1208
+ for line in taxonomy_lines:
1209
+ tokens = line.split(';')
1210
+ assert len(tokens) == 7, 'Illegal line {} in taxonomy file {}'.format(
1211
+ line,taxonomy_file)
1212
+ five_token_string = ';'.join(tokens[1:-1])
1213
+ assert len(five_token_string.split(';')) == 5
1214
+ five_token_string_to_seven_token_string[five_token_string] = line
1215
+
1216
+ for taxonomy_string in five_token_string_to_seven_token_string.keys():
1217
+
1218
+ taxonomy_string = taxonomy_string.lower()
1219
+
1220
+ taxon_info = {}
1221
+ extended_string = five_token_string_to_seven_token_string[taxonomy_string]
1222
+ tokens = extended_string.split(';')
1223
+ assert len(tokens) == 7
1224
+ taxon_info['taxon_id'] = tokens[0]
1225
+ assert len(taxon_info['taxon_id']) == 36
1226
+ taxon_info['kingdom'] = 'animal'
1227
+ taxon_info['phylum'] = 'chordata'
1228
+ taxon_info['class'] = tokens[1]
1229
+ taxon_info['order'] = tokens[2]
1230
+ taxon_info['family'] = tokens[3]
1231
+ taxon_info['genus'] = tokens[4]
1232
+ taxon_info['species'] = tokens[5]
1233
+ taxon_info['common_name'] = tokens[6]
1234
+
1235
+ if taxon_info['common_name'] != '':
1236
+ self.common_name_to_taxonomy_info[taxon_info['common_name']] = taxon_info
1237
+
1238
+ self.taxonomy_string_to_taxonomy_info[taxonomy_string] = taxon_info
1239
+
1240
+ binomial_name = None
1241
+ if len(tokens[4]) > 0 and len(tokens[5]) > 0:
1242
+ # strip(), but don't remove spaces from the species name;
1243
+ # subspecies are separated with a space, e.g. canis;lupus dingo
1244
+ binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
1245
+ elif len(tokens[4]) > 0:
1246
+ binomial_name = tokens[4].strip()
1247
+ elif len(tokens[3]) > 0:
1248
+ binomial_name = tokens[3].strip()
1249
+ elif len(tokens[2]) > 0:
1250
+ binomial_name = tokens[2].strip()
1251
+ elif len(tokens[1]) > 0:
1252
+ binomial_name = tokens[1].strip()
1253
+ if binomial_name is None:
1254
+ # print('Warning: no binomial name for {}'.format(taxonomy_string))
1255
+ pass
1256
+ else:
1257
+ self.binomial_name_to_taxonomy_info[binomial_name] = taxon_info
1258
+
1259
+ taxon_info['binomial_name'] = binomial_name
1260
+
1261
+ # ...for each taxonomy string in the file
1262
+
1263
+ print('Created {} records in taxonomy_string_to_taxonomy_info'.format(len(self.taxonomy_string_to_taxonomy_info)))
1264
+ print('Created {} records in common_name_to_taxonomy_info'.format(len(self.common_name_to_taxonomy_info)))
1265
+
1266
+ # ...def _load_taxonomy_info(...)
1267
+
1268
+
1269
+ def _initialize_geofencing(self, geofencing_file, country_code_file):
1270
+ """
1271
+ Load geofencing information from a .json file, and country code mappings from
1272
+ a .csv file. Stores results in the instance tables [taxonomy_string_to_geofencing_rules],
1273
+ [country_to_country_code], and [country_code_to_country].
1274
+
1275
+ Args:
1276
+ geofencing_file (str): .json file with geofencing rules
1277
+ country_code_file (str): .csv file with country code mappings, in columns
1278
+ called "name" and "alpha-3", e.g. from
1279
+ https://github.com/lukes/ISO-3166-Countries-with-Regional-Codes/blob/master/all/all.csv
1280
+ """
1281
+
1282
+ # Read country code information
1283
+ country_code_df = pd.read_csv(country_code_file)
1284
+ self.country_to_country_code = {}
1285
+ self.country_code_to_country = {}
1286
+ for i_row,row in country_code_df.iterrows():
1287
+ self.country_to_country_code[row['name'].lower()] = row['alpha-3'].upper()
1288
+ self.country_code_to_country[row['alpha-3'].upper()] = row['name'].lower()
1289
+
1290
+ # Read geofencing information
1291
+ with open(geofencing_file,'r',encoding='utf-8') as f:
1292
+ self.taxonomy_string_to_geofencing_rules = json.load(f)
1293
+
1294
+ """
1295
+ Geofencing keys are five-token taxonomy strings, e.g.:
1296
+
1297
+ 'mammalia;cetartiodactyla;cervidae;odocoileus;virginianus'
1298
+
1299
+ Geofencing values are tables mapping allow/block to country codes, optionally including region/state codes, e.g.:
1300
+
1301
+ {'allow': {
1302
+ 'ALA': [],
1303
+ 'ARG': [],
1304
+ ...
1305
+ 'SUR': [],
1306
+ 'TTO': [],
1307
+ 'USA': ['AL',
1308
+ 'AR',
1309
+ 'AZ',
1310
+ ...
1311
+ }
1312
+ """
1313
+
1314
+ # Validate
1315
+
1316
+ # species_string = next(iter(taxonomy_string_to_geofencing_rules.keys()))
1317
+ for species_string in self.taxonomy_string_to_geofencing_rules.keys():
1318
+
1319
+ species_rules = self.taxonomy_string_to_geofencing_rules[species_string]
1320
+
1321
+ if len(species_rules.keys()) > 1:
1322
+ print('Warning: taxon {} has both allow and block rules'.format(species_string))
1323
+
1324
+ for rule_type in species_rules.keys():
1325
+
1326
+ assert rule_type in ('allow','block')
1327
+ all_country_rules_this_species = species_rules[rule_type]
1328
+
1329
+ for country_code in all_country_rules_this_species.keys():
1330
+
1331
+ assert country_code in self.country_code_to_country
1332
+ region_rules = all_country_rules_this_species[country_code]
1333
+ # Right now we only have regional rules for the USA; these may be part of
1334
+ # allow or block rules.
1335
+ if len(region_rules) > 0:
1336
+ assert country_code == 'USA'
1337
+
1338
+ # ...for each country code in this rule set
1339
+
1340
+ # ...for each rule set for this species
1341
+
1342
+ # ...for each species
1343
+
1344
+ # ...def _initialize_geofencing(...)
1345
+
1346
+
1347
+ def _parse_region_code_list(self, codes):
1348
+ """
1349
+ Turn a list of country or state codes in string, delimited string, or list format
1350
+ into a list. Also does basic validity checking.
1351
+ """
1352
+
1353
+ if not isinstance(codes,list):
1354
+
1355
+ assert isinstance(codes,str)
1356
+
1357
+ codes = codes.strip()
1358
+
1359
+ # This is just a single codes
1360
+ if ',' not in codes:
1361
+ codes = [codes]
1362
+ else:
1363
+ codes = codes.split(',')
1364
+ codes = [c.strip() for c in codes]
1365
+
1366
+ assert isinstance(codes,list)
1367
+
1368
+ codes = [c.upper().strip() for c in codes]
1369
+
1370
+ for c in codes:
1371
+ assert len(c) in (2,3)
1372
+
1373
+ return codes
1374
+
1375
+ # ...def _parse_region_code_list(...)
1376
+
1377
+
1378
+ def generate_csv_rows_to_block_all_countries_except(self, species_string, block_except_list):
1379
+ """
1380
+ Generate rows in the format expected by geofence_fixes.csv, representing a list of
1381
+ allow and block rules to block all countries currently allowed for this species
1382
+ except [allow_countries], and add allow rules these countries.
1383
+
1384
+ Args:
1385
+ species_string (str): five-token taxonomy string
1386
+ block_except_list (list): list of country codes not to block
1387
+
1388
+ Returns:
1389
+ list of str: strings compatible with geofence_fixes.csv
1390
+ """
1391
+
1392
+ assert is_valid_taxonomy_string(species_string), \
1393
+ '{} is not a valid taxonomy string'.format(species_string)
1394
+
1395
+ assert self.taxonomy_string_to_geofencing_rules is not None, \
1396
+ 'Initialize geofencing prior to species lookup'
1397
+ assert self.taxonomy_string_to_taxonomy_info is not None, \
1398
+ 'Initialize taxonomy lookup prior to species lookup'
1399
+
1400
+ geofencing_rules_this_species = \
1401
+ self.taxonomy_string_to_geofencing_rules[species_string]
1402
+
1403
+ allowed_countries = []
1404
+ if 'allow' in geofencing_rules_this_species:
1405
+ allowed_countries.extend(geofencing_rules_this_species['allow'])
1406
+
1407
+ blocked_countries = []
1408
+ if 'block' in geofencing_rules_this_species:
1409
+ blocked_countries.extend(geofencing_rules_this_species['block'])
1410
+
1411
+ block_except_list = self._parse_region_code_list(block_except_list)
1412
+
1413
+ countries_to_block = []
1414
+ countries_to_allow = []
1415
+
1416
+ # country = allowed_countries[0]
1417
+ for country in allowed_countries:
1418
+ if country not in block_except_list and country not in blocked_countries:
1419
+ countries_to_block.append(country)
1420
+
1421
+ for country in block_except_list:
1422
+ if country in blocked_countries:
1423
+ raise ValueError("I can't allow a country that has already been blocked")
1424
+ if country not in allowed_countries:
1425
+ countries_to_allow.append(country)
1426
+
1427
+ rows = self.generate_csv_rows_for_species(species_string,
1428
+ allow_countries=countries_to_allow,
1429
+ block_countries=countries_to_block)
1430
+
1431
+ return rows
1432
+
1433
+ # ...def generate_csv_rows_to_block_all_countries_except(...)
1434
+
1435
+
1436
+ def generate_csv_rows_for_species(self, species_string,
1437
+ allow_countries=None,
1438
+ block_countries=None,
1439
+ allow_states=None,
1440
+ block_states=None):
1441
+ """
1442
+ Generate rows in the format expected by geofence_fixes.csv, representing a list of
1443
+ allow and/or block rules for the specified species and countries/states. Does not check
1444
+ that the rules make sense; e.g. nothing will stop you in this function from both allowing
1445
+ and blocking a country.
1446
+
1447
+ Args:
1448
+ species_string (str): five-token string in semicolon-delimited WI taxonomy format
1449
+ allow_countries (list or str, optional): three-letter country codes, list of
1450
+ country codes, or comma-separated list of country codes to allow
1451
+ block_countries (list or str, optional): three-letter country codes, list of
1452
+ country codes, or comma-separated list of country codes to block
1453
+ allow_states (list or str, optional): two-letter state codes, list of
1454
+ state codes, or comma-separated list of state codes to allow
1455
+ block_states (list or str, optional): two-letter state code, list of
1456
+ state codes, or comma-separated list of state codes to block
1457
+
1458
+ Returns:
1459
+ list of str: lines ready to be pasted into geofence_fixes.csv
1460
+ """
1461
+
1462
+ assert is_valid_taxonomy_string(species_string), \
1463
+ '{} is not a valid taxonomy string'.format(species_string)
1464
+
1465
+ lines = []
1466
+
1467
+ if allow_countries is not None:
1468
+ allow_countries = self._parse_region_code_list(allow_countries)
1469
+ for country in allow_countries:
1470
+ lines.append(species_string + ',allow,' + country + ',')
1471
+
1472
+ if block_countries is not None:
1473
+ block_countries = self._parse_region_code_list(block_countries)
1474
+ for country in block_countries:
1475
+ lines.append(species_string + ',block,' + country + ',')
1476
+
1477
+ if allow_states is not None:
1478
+ allow_states = self._parse_region_code_list(allow_states)
1479
+ for state in allow_states:
1480
+ lines.append(species_string + ',allow,USA,' + state)
1481
+
1482
+ if block_states is not None:
1483
+ block_states = self._parse_region_code_list(block_states)
1484
+ for state in block_states:
1485
+ lines.append(species_string + ',block,USA,' + state)
1486
+
1487
+ return lines
1488
+
1489
+ # ...def generate_csv_rows_for_species(...)
1490
+
1491
+
1492
+ def species_string_to_canonical_species_string(self, species):
1493
+ """
1494
+ Convert a string that may be a 5-token species string, a binomial name,
1495
+ or a common name into a 5-token species string, using taxonomic lookup.
1496
+
1497
+ Args:
1498
+ species (str): 5-token species string, binomial name, or common name
1499
+
1500
+ Returns:
1501
+ str: common name
1502
+
1503
+ Raises:
1504
+ ValueError: if [species] is not in our dictionary
1505
+ """
1506
+
1507
+ species = species.lower().strip()
1508
+
1509
+ # Turn "species" into a taxonomy string
1510
+
1511
+ # If this is already a taxonomy string...
1512
+ if len(species.split(';')) == 5:
1513
+ taxonomy_string = species
1514
+ # If this is a common name...
1515
+ elif species in self.common_name_to_taxonomy_info:
1516
+ taxonomy_info = self.common_name_to_taxonomy_info[species]
1517
+ taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
1518
+ # If this is a binomial name...
1519
+ elif (species in self.binomial_name_to_taxonomy_info):
1520
+ taxonomy_info = self.binomial_name_to_taxonomy_info[species]
1521
+ taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
1522
+ else:
1523
+ raise ValueError('Could not find taxonomic information for {}'.format(species))
1524
+
1525
+ return taxonomy_string
1526
+
1527
+ # ...def species_string_to_canonical_species_string(...)
1528
+
1529
+
1530
+ def species_string_to_taxonomy_info(self,species):
1531
+ """
1532
+ Convert a string that may be a 5-token species string, a binomial name,
1533
+ or a common name into a taxonomic info dictionary, using taxonomic lookup.
1534
+
1535
+ Args:
1536
+ species (str): 5-token species string, binomial name, or common name
1537
+
1538
+ Returns:
1539
+ dict: taxonomy information
1540
+
1541
+ Raises:
1542
+ ValueError: if [species] is not in our dictionary
1543
+ """
1544
+
1545
+ species = species.lower().strip()
1546
+ canonical_string = self.species_string_to_canonical_species_string(species)
1547
+ return self.taxonomy_string_to_taxonomy_info[canonical_string]
1548
+
1549
+
1550
+ def species_allowed_in_country(self, species, country, state=None, return_status=False):
1551
+ """
1552
+ Determines whether [species] is allowed in [country], according to
1553
+ already-initialized geofencing rules.
1554
+
1555
+ Args:
1556
+ species (str): can be a common name, a binomial name, or a species string
1557
+ country (str): country name or three-letter code
1558
+ state (str, optional): two-letter US state code
1559
+ return_status (bool, optional): by default, this function returns a bool;
1560
+ if you want to know *why* [species] is allowed/not allowed, settings
1561
+ return_status to True will return additional information.
1562
+
1563
+ Returns:
1564
+ bool or str: typically returns True if [species] is allowed in [country], else
1565
+ False. Returns a more detailed string if return_status is set.
1566
+ """
1567
+
1568
+ assert self.taxonomy_string_to_geofencing_rules is not None, \
1569
+ 'Initialize geofencing prior to species lookup'
1570
+ assert self.taxonomy_string_to_taxonomy_info is not None, \
1571
+ 'Initialize taxonomy lookup prior to species lookup'
1572
+
1573
+ taxonomy_string = self.species_string_to_canonical_species_string(species)
1574
+
1575
+ # Normalize [state]
1576
+
1577
+ if state is not None:
1578
+ state = state.upper()
1579
+ assert len(state) == 2
1580
+
1581
+ # Turn "country" into a country code
1582
+
1583
+ if len(country) == 3:
1584
+ assert country.upper() in self.country_code_to_country
1585
+ country = country.upper()
1586
+ else:
1587
+ assert country.lower() in self.country_to_country_code
1588
+ country = self.country_to_country_code[country.lower()]
1589
+
1590
+ country_code = country.upper()
1591
+
1592
+ # Species with no rules are allowed everywhere
1593
+ if taxonomy_string not in self.taxonomy_string_to_geofencing_rules:
1594
+ status = 'allow_by_default'
1595
+ if return_status:
1596
+ return status
1597
+ else:
1598
+ return True
1599
+
1600
+ geofencing_rules_this_species = self.taxonomy_string_to_geofencing_rules[taxonomy_string]
1601
+ allowed_countries = []
1602
+ blocked_countries = []
1603
+
1604
+ rule_types_this_species = list(geofencing_rules_this_species.keys())
1605
+ for rule_type in rule_types_this_species:
1606
+ assert rule_type in ('allow','block')
1607
+
1608
+ if 'block' in rule_types_this_species:
1609
+ blocked_countries = list(geofencing_rules_this_species['block'])
1610
+ if 'allow' in rule_types_this_species:
1611
+ allowed_countries = list(geofencing_rules_this_species['allow'])
1612
+
1613
+ status = None
1614
+
1615
+ # The convention is that block rules win over allow rules
1616
+ if country_code in blocked_countries:
1617
+ if country_code in allowed_countries:
1618
+ status = 'blocked_over_allow'
1619
+ else:
1620
+ status = 'blocked'
1621
+ elif country_code in allowed_countries:
1622
+ status = 'allowed'
1623
+ elif len(allowed_countries) > 0:
1624
+ # The convention is that if allow rules exist, any country not on that list
1625
+ # is blocked.
1626
+ status = 'block_not_on_country_allow_list'
1627
+ else:
1628
+ # Only block rules exist for this species, and they don't include this country
1629
+ assert len(blocked_countries) > 0
1630
+ status = 'allow_not_on_block_list'
1631
+
1632
+ # Now let's see whether we have to deal with any regional rules.
1633
+ #
1634
+ # Right now regional rules only exist for the US.
1635
+ if (country_code == 'USA') and ('USA' in geofencing_rules_this_species[rule_type]):
1636
+
1637
+ if state is None:
1638
+
1639
+ state_list = geofencing_rules_this_species[rule_type][country_code]
1640
+ if len(state_list) > 0:
1641
+ assert status.startswith('allow')
1642
+ status = 'allow_no_state'
1643
+
1644
+ else:
1645
+
1646
+ state_list = geofencing_rules_this_species[rule_type][country_code]
1647
+
1648
+ if state in state_list:
1649
+ # If the state is on the list, do what the list says
1650
+ if rule_type == 'allow':
1651
+ status = 'allow_on_state_allow_list'
1652
+ else:
1653
+ status = 'block_on_state_block_list'
1654
+ else:
1655
+ # If the state is not on the list, do the opposite of what the list says
1656
+ if rule_type == 'allow':
1657
+ status = 'block_not_on_state_allow_list'
1658
+ else:
1659
+ status = 'allow_not_on_state_block_list'
1660
+
1661
+ if return_status:
1662
+ return status
1663
+ else:
1664
+ if status.startswith('allow'):
1665
+ return True
1666
+ else:
1667
+ assert status.startswith('block')
1668
+ return False
1669
+
1670
+ # ...def species_allowed_in_country(...)
1671
+
1672
+
1673
+ def export_geofence_data_to_csv(self, csv_fn=None, include_common_names=True):
1674
+ """
1675
+ Converts the geofence .json representation into an equivalent .csv representation,
1676
+ with one taxon per row and one region per column. Empty values indicate non-allowed
1677
+ combinations, positive numbers indicate allowed combinations. Negative values
1678
+ are reserved for specific non-allowed combinations.
1679
+
1680
+ Args:
1681
+ csv_fn (str): output .csv file
1682
+ include_common_names (bool, optional): include a column for common names
1683
+
1684
+ Returns:
1685
+ dataframe: the pandas representation of the csv output file
1686
+ """
1687
+
1688
+ all_taxa = sorted(list(self.taxonomy_string_to_geofencing_rules.keys()))
1689
+ print('Preparing geofencing export for {} taxa'.format(len(all_taxa)))
1690
+
1691
+ all_regions = set()
1692
+
1693
+ # taxon = all_taxa[0]
1694
+ for taxon in all_taxa:
1695
+
1696
+ taxon_rules = self.taxonomy_string_to_geofencing_rules[taxon]
1697
+ for rule_type in taxon_rules.keys():
1698
+
1699
+ assert rule_type in ('allow','block')
1700
+ all_country_rules_this_species = taxon_rules[rule_type]
1701
+
1702
+ for country_code in all_country_rules_this_species.keys():
1703
+ all_regions.add(country_code)
1704
+ assert country_code in self.country_code_to_country
1705
+ assert len(country_code) == 3
1706
+ region_rules = all_country_rules_this_species[country_code]
1707
+ if len(region_rules) > 0:
1708
+ assert country_code == 'USA'
1709
+ for region_name in region_rules:
1710
+ assert len(region_name) == 2
1711
+ assert isinstance(region_name,str)
1712
+ all_regions.add(country_code + ':' + region_name)
1713
+
1714
+ all_regions = sorted(list(all_regions))
1715
+
1716
+ print('Found {} regions'.format(len(all_regions)))
1717
+
1718
+ n_allowed = 0
1719
+ df = pd.DataFrame(index=all_taxa,columns=all_regions)
1720
+ # df = df.fillna(np.nan)
1721
+
1722
+ for taxon in tqdm(all_taxa):
1723
+ for region in all_regions:
1724
+ tokens = region.split(':')
1725
+ country_code = tokens[0]
1726
+ state_code = None
1727
+ if len(tokens) > 1:
1728
+ state_code = tokens[1]
1729
+ allowed = self.species_allowed_in_country(species=taxon,
1730
+ country=country_code,
1731
+ state=state_code,
1732
+ return_status=False)
1733
+ if allowed:
1734
+ n_allowed += 1
1735
+ df.loc[taxon,region] = 1
1736
+
1737
+ # ...for each region
1738
+
1739
+ # ...for each taxon
1740
+
1741
+ print('Allowed {} of {} combinations'.format(n_allowed,len(all_taxa)*len(all_regions)))
1742
+
1743
+ # Before saving, convert columns with numeric values to integers
1744
+ for col in df.columns:
1745
+ # Check whether each column has any non-NaN values that could be integers
1746
+ if df[col].notna().any() and pd.to_numeric(df[col], errors='coerce').notna().any():
1747
+ # Convert column to Int64 type (pandas nullable integer type)
1748
+ df[col] = pd.to_numeric(df[col], errors='coerce').astype('Int64')
1749
+
1750
+ if include_common_names:
1751
+ df.insert(loc=0,column='common_name',value='')
1752
+ for taxon in all_taxa:
1753
+ if taxon in self.taxonomy_string_to_taxonomy_info:
1754
+ taxonomy_info = self.taxonomy_string_to_taxonomy_info[taxon]
1755
+ common_name = taxonomy_info['common_name']
1756
+ assert isinstance(common_name,str) and len(common_name) < 50
1757
+ df.loc[taxon,'common_name'] = common_name
1758
+
1759
+ if csv_fn is not None:
1760
+ df.to_csv(csv_fn,index=True,header=True)
1761
+
1762
+ return df
1763
+
1764
+ # ...def export_geofence_data_to_csv(...)
1765
+
1766
+ # ...class TaxonomyHandler