megadetector 10.0.4__py3-none-any.whl → 10.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

@@ -1,2674 +0,0 @@
1
- """
2
-
3
- wi_utils.py
4
-
5
- Functions related to working with the WI insights platform, specifically for:
6
-
7
- * Retrieving images based on .csv downloads
8
- * Pushing results to the ProcessCVResponse() API (requires an API key)
9
- * Working with WI taxonomy records and geofencing data
10
-
11
- """
12
-
13
- #%% Imports and constants
14
-
15
- import os
16
- import requests
17
- import json
18
- import tempfile
19
- import uuid
20
-
21
- import numpy as np
22
- import pandas as pd
23
-
24
- from copy import deepcopy
25
- from collections import defaultdict
26
- from multiprocessing.pool import Pool, ThreadPool
27
- from functools import partial
28
- from tqdm import tqdm
29
-
30
- from megadetector.utils.path_utils import insert_before_extension
31
- from megadetector.utils.path_utils import find_images
32
-
33
- from megadetector.utils.ct_utils import split_list_into_n_chunks
34
- from megadetector.utils.ct_utils import round_floats_in_nested_dict
35
- from megadetector.utils.ct_utils import is_list_sorted
36
- from megadetector.utils.ct_utils import invert_dictionary
37
- from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
38
- from megadetector.utils.ct_utils import sort_dictionary_by_value
39
-
40
- from megadetector.postprocessing.validate_batch_results import \
41
- validate_batch_results, ValidateBatchResultsOptions
42
-
43
- md_category_id_to_name = {'1':'animal','2':'person','3':'vehicle'}
44
- md_category_name_to_id = invert_dictionary(md_category_id_to_name)
45
-
46
- # Only used when pushing results directly to the platform via the API; any detections we want
47
- # to show in the UI should have at least this confidence value.
48
- min_md_output_confidence = 0.25
49
-
50
- # Fields expected to be present in a valid WI result
51
- wi_result_fields = ['wi_taxon_id','class','order','family','genus','species','common_name']
52
-
53
-
54
- #%% Miscellaneous WI support functions
55
-
56
- def is_valid_prediction_string(s):
57
- """
58
- Determine whether [s] is a valid WI prediction string. Prediction strings look like:
59
-
60
- '90d950db-2106-4bd9-a4c1-777604c3eada;mammalia;rodentia;;;;rodent'
61
-
62
- Args:
63
- s (str): the string to be tested for validity
64
-
65
- Returns:
66
- bool: True if this looks more or less like a WI prediction string
67
- """
68
-
69
- # Note to self... don't get tempted to remove spaces here; spaces are used
70
- # to indicate subspecies.
71
- return isinstance(s,str) and (len(s.split(';')) == 7) and (s == s.lower())
72
-
73
-
74
- def is_valid_taxonomy_string(s):
75
- """
76
- Determine whether [s] is a valid 5-token WI taxonomy string. Taxonomy strings
77
- look like:
78
-
79
- 'mammalia;rodentia;;;;rodent'
80
- 'mammalia;chordata;canidae;canis;lupus dingo'
81
-
82
- Args:
83
- s (str): the string to be tested for validity
84
-
85
- Returns:
86
- bool: True if this looks more or less like a WI taxonomy string
87
- """
88
- return isinstance(s,str) and (len(s.split(';')) == 5) and (s == s.lower())
89
-
90
-
91
- def clean_taxonomy_string(s):
92
- """
93
- If [s] is a seven-token prediction string, trim the GUID and common name to produce
94
- a "clean" taxonomy string. Else if [s] is a five-token string, return it. Else error.
95
-
96
- Args:
97
- s (str): the seven- or five-token taxonomy/prediction string to clean
98
-
99
- Returns:
100
- str: the five-token taxonomy string
101
- """
102
-
103
- if is_valid_taxonomy_string(s):
104
- return s
105
- elif is_valid_prediction_string(s):
106
- tokens = s.split(';')
107
- assert len(tokens) == 7
108
- return ';'.join(tokens[1:-1])
109
- else:
110
- raise ValueError('Invalid taxonomy string')
111
-
112
-
113
- taxonomy_level_names = \
114
- ['non-taxonomic','kingdom','phylum','class','order','family','genus','species','subspecies']
115
-
116
-
117
- def taxonomy_level_to_string(k):
118
- """
119
- Maps taxonomy level indices (0 for kindgom, 1 for phylum, etc.) to strings.
120
-
121
- Args:
122
- k (int): taxonomy level index
123
-
124
- Returns:
125
- str: taxonomy level string
126
- """
127
-
128
- assert k >= 0 and k < len(taxonomy_level_names), \
129
- 'Illegal taxonomy level index {}'.format(k)
130
-
131
- return taxonomy_level_names[k]
132
-
133
-
134
- def taxonomy_level_string_to_index(s):
135
- """
136
- Maps strings ('kingdom', 'species', etc.) to level indices.
137
-
138
- Args:
139
- s (str): taxonomy level string
140
-
141
- Returns:
142
- int: taxonomy level index
143
- """
144
-
145
- assert s in taxonomy_level_names, 'Unrecognized taxonomy level string {}'.format(s)
146
- return taxonomy_level_names.index(s)
147
-
148
-
149
- def taxonomy_level_index(s):
150
- """
151
- Returns the taxonomy level up to which [s] is defined (0 for non-taxnomic, 1 for kingdom,
152
- 2 for phylum, etc. Empty strings and non-taxonomic strings are treated as level 0. 1 and 2
153
- will never be returned; "animal" doesn't look like other taxonomic strings, so here we treat
154
- it as non-taxonomic.
155
-
156
- Args:
157
- s (str): 5-token or 7-token taxonomy string
158
-
159
- Returns:
160
- int: taxonomy level
161
- """
162
-
163
- if s in non_taxonomic_prediction_strings or s in non_taxonomic_prediction_short_strings:
164
- return 0
165
-
166
- tokens = s.split(';')
167
- assert len(tokens) in (5,7)
168
-
169
- if len(tokens) == 7:
170
- tokens = tokens[1:-1]
171
-
172
- if len(tokens[0]) == 0:
173
- return 0
174
- # WI taxonomy strings start at class, so we'll never return 1 (kingdom) or 2 (phylum)
175
- elif len(tokens[1]) == 0:
176
- return 3
177
- elif len(tokens[2]) == 0:
178
- return 4
179
- elif len(tokens[3]) == 0:
180
- return 5
181
- elif len(tokens[4]) == 0:
182
- return 6
183
- # Subspecies are delimited with a space
184
- elif ' ' not in tokens[4]:
185
- return 7
186
- else:
187
- return 8
188
-
189
-
190
- def wi_result_to_prediction_string(r):
191
- """
192
- Convert the dict [r] - typically loaded from a row in a downloaded .csv file - to
193
- a valid prediction string, e.g.:
194
-
195
- 1f689929-883d-4dae-958c-3d57ab5b6c16;;;;;;animal
196
- 90d950db-2106-4bd9-a4c1-777604c3eada;mammalia;rodentia;;;;rodent
197
-
198
- Args:
199
- r (dict): dict containing WI prediction information, with at least the fields
200
- specified in wi_result_fields.
201
-
202
- Returns:
203
- str: the result in [r], as a semicolon-delimited prediction string
204
- """
205
-
206
- values = []
207
- for field in wi_result_fields:
208
- if isinstance(r[field],str):
209
- values.append(r[field].lower())
210
- else:
211
- assert isinstance(r[field],float) and np.isnan(r[field])
212
- values.append('')
213
- s = ';'.join(values)
214
- assert is_valid_prediction_string(s)
215
- return s
216
-
217
-
218
- def compare_values(v0,v1):
219
- """
220
- Utility function for comparing two values when we want to return True if both
221
- values are NaN.
222
-
223
- Args:
224
- v0 (object): the first value to compare
225
- v1 (object): the second value to compare
226
-
227
- Returns:
228
- bool: True if v0 == v1, or if both v0 and v1 are NaN
229
- """
230
-
231
- if isinstance(v0,float) and isinstance(v1,float) and np.isnan(v0) and np.isnan(v1):
232
- return True
233
- return v0 == v1
234
-
235
-
236
- def record_is_unidentified(record):
237
- """
238
- A record is considered "unidentified" if the "identified by" field is either NaN or "computer vision"
239
-
240
- Args:
241
- record (dict): dict representing a WI result loaded from a .csv file, with at least the
242
- field "identified_by"
243
-
244
- Returns:
245
- bool: True if the "identified_by" field is either NaN or a string indicating that this
246
- record has not yet been human-reviewed.
247
- """
248
-
249
- identified_by = record['identified_by']
250
- assert isinstance(identified_by,float) or isinstance(identified_by,str)
251
- if isinstance(identified_by,float):
252
- assert np.isnan(identified_by)
253
- return True
254
- else:
255
- return identified_by == 'Computer vision'
256
-
257
-
258
- def record_lists_are_identical(records_0,records_1,verbose=False):
259
- """
260
- Takes two lists of records in the form returned by read_images_from_download_bundle and
261
- determines whether they are the same.
262
-
263
- Args:
264
- records_0 (list of dict): the first list of records to compare
265
- records_1 (list of dict): the second list of records to compare
266
- verbose (bool, optional): enable additional debug output
267
-
268
- Returns:
269
- bool: True if the two lists are identical
270
- """
271
-
272
- if len(records_0) != len(records_1):
273
- return False
274
-
275
- # i_record = 0; record_0 = records_0[i_record]
276
- for i_record,record_0 in enumerate(records_0):
277
- record_1 = records_1[i_record]
278
- assert set(record_0.keys()) == set(record_1.keys())
279
- for k in record_0.keys():
280
- if not compare_values(record_0[k],record_1[k]):
281
- if verbose:
282
- print('Image ID: {} ({})\nRecord 0/{}: {}\nRecord 1/{}: {}'.format(
283
- record_0['image_id'],record_1['image_id'],
284
- k,record_0[k],k,record_1[k]))
285
- return False
286
-
287
- return True
288
-
289
-
290
- #%% Functions for managing WI downloads
291
-
292
- def read_sequences_from_download_bundle(download_folder):
293
- """
294
- Reads sequences.csv from [download_folder], returning a list of dicts. This is a
295
- thin wrapper around pd.read_csv, it's just here for future-proofing.
296
-
297
- Args:
298
- download_folder (str): a folder containing exactly one file called sequences.csv, typically
299
- representing a Wildlife Insights download bundle.
300
-
301
- Returns:
302
- list of dict: a direct conversion of the .csv file to a list of dicts
303
- """
304
-
305
- print('Reading sequences from {}'.format(download_folder))
306
-
307
- sequence_list_files = os.listdir(download_folder)
308
- sequence_list_files = \
309
- [fn for fn in sequence_list_files if fn == 'sequences.csv']
310
- assert len(sequence_list_files) == 1, \
311
- 'Could not find sequences.csv in {}'.format(download_folder)
312
-
313
- sequence_list_file = os.path.join(download_folder,sequence_list_files[0])
314
-
315
- df = pd.read_csv(sequence_list_file)
316
- sequence_records = df.to_dict('records')
317
- return sequence_records
318
-
319
-
320
- def read_images_from_download_bundle(download_folder):
321
- """
322
- Reads all images.csv files from [download_folder], returns a dict mapping image IDs
323
- to a list of dicts that describe each image. It's a list of dicts rather than a single dict
324
- because images may appear more than once.
325
-
326
- Args:
327
- download_folder (str): a folder containing one or more images.csv files, typically
328
- representing a Wildlife Insights download bundle.
329
-
330
- Returns:
331
- dict: Maps image GUIDs to dicts with at least the following fields:
332
- * project_id (int)
333
- * deployment_id (str)
334
- * image_id (str, should match the key)
335
- * filename (str, the filename without path at the time of upload)
336
- * location (str, starting with gs://)
337
-
338
- May also contain classification fields: wi_taxon_id (str), species, etc.
339
- """
340
-
341
- print('Reading images from {}'.format(download_folder))
342
-
343
- ##%% Find lists of images
344
-
345
- image_list_files = os.listdir(download_folder)
346
- image_list_files = \
347
- [fn for fn in image_list_files if fn.startswith('images_') and fn.endswith('.csv')]
348
- image_list_files = \
349
- [os.path.join(download_folder,fn) for fn in image_list_files]
350
- print('Found {} image list files'.format(len(image_list_files)))
351
-
352
-
353
- ##%% Read lists of images by deployment
354
-
355
- image_id_to_image_records = defaultdict(list)
356
-
357
- # image_list_file = image_list_files[0]
358
- for image_list_file in image_list_files:
359
-
360
- print('Reading images from list file {}'.format(
361
- os.path.basename(image_list_file)))
362
-
363
- df = pd.read_csv(image_list_file)
364
-
365
- # i_row = 0; row = df.iloc[i_row]
366
- for i_row,row in tqdm(df.iterrows(),total=len(df)):
367
-
368
- row_dict = row.to_dict()
369
- image_id = row_dict['image_id']
370
- image_id_to_image_records[image_id].append(row_dict)
371
-
372
- # ...for each image
373
-
374
- # ...for each list file
375
-
376
- deployment_ids = set()
377
- for image_id in image_id_to_image_records:
378
- image_records = image_id_to_image_records[image_id]
379
- for image_record in image_records:
380
- deployment_ids.add(image_record['deployment_id'])
381
-
382
- print('Found {} rows in {} deployments'.format(
383
- len(image_id_to_image_records),
384
- len(deployment_ids)))
385
-
386
- return image_id_to_image_records
387
-
388
-
389
- def find_images_in_identify_tab(download_folder_with_identify,download_folder_excluding_identify):
390
- """
391
- Based on extracted download packages with and without the "exclude images in 'identify' tab
392
- checkbox" checked, figure out which images are in the identify tab. Returns a list of dicts (one
393
- per image).
394
-
395
- Args:
396
- download_folder_with_identify (str): the folder containing the download bundle that
397
- includes images from the "identify" tab
398
- download_folder_excluding_identify (str): the folder containing the download bundle that
399
- excludes images from the "identify" tab
400
-
401
- Returns:
402
- list of dict: list of image records that are present in the identify tab
403
- """
404
-
405
- ##%% Read data (~30 seconds)
406
-
407
- image_id_to_image_records_with_identify = \
408
- read_images_from_download_bundle(download_folder_with_identify)
409
- image_id_to_image_records_excluding_identify = \
410
- read_images_from_download_bundle(download_folder_excluding_identify)
411
-
412
-
413
- ##%% Find images that have not been identified
414
-
415
- all_image_ids_with_identify = set(image_id_to_image_records_with_identify.keys())
416
- all_image_ids_excluding_identify = set(image_id_to_image_records_excluding_identify.keys())
417
-
418
- image_ids_in_identify_tab = all_image_ids_with_identify.difference(all_image_ids_excluding_identify)
419
-
420
- assert len(image_ids_in_identify_tab) == \
421
- len(all_image_ids_with_identify) - len(all_image_ids_excluding_identify)
422
-
423
- print('Found {} images with identify, {} in identify tab, {} excluding'.format(
424
- len(all_image_ids_with_identify),
425
- len(image_ids_in_identify_tab),
426
- len(all_image_ids_excluding_identify)))
427
-
428
- image_records_in_identify_tab = []
429
- deployment_ids_for_downloaded_images = set()
430
-
431
- for image_id in image_ids_in_identify_tab:
432
- image_records_this_image = image_id_to_image_records_with_identify[image_id]
433
- assert len(image_records_this_image) > 0
434
- image_records_in_identify_tab.extend(image_records_this_image)
435
- for image_record in image_records_this_image:
436
- deployment_ids_for_downloaded_images.add(image_record['deployment_id'])
437
-
438
- print('Found {} records for {} unique images in {} deployments'.format(
439
- len(image_records_in_identify_tab),
440
- len(image_ids_in_identify_tab),
441
- len(deployment_ids_for_downloaded_images)))
442
-
443
- return image_records_in_identify_tab
444
-
445
- # ...def find_images_in_identify_tab(...)
446
-
447
-
448
- def write_download_commands(image_records_to_download,
449
- download_dir_base,
450
- force_download=False,
451
- n_download_workers=25,
452
- download_command_file_base=None):
453
- """
454
- Given a list of dicts with at least the field 'location' (a gs:// URL), prepare a set of "gcloud
455
- storage" commands to download images, and write those to a series of .sh scripts, along with one
456
- .sh script that runs all the others and blocks.
457
-
458
- gcloud commands will use relative paths.
459
-
460
- image_records_to_download can also be a dict mapping IDs to lists of records.
461
-
462
- Args:
463
- image_records_to_download (list of dict): list of dicts with at least the field 'location'
464
- download_dir_base (str): local destination folder
465
- force_download (bool, optional): include gs commands even if the target file exists
466
- n_download_workers (int, optional): number of scripts to write (that's our hacky way
467
- of controlling parallelization)
468
- download_command_file_base (str, optional): path of the .sh script we should write, defaults
469
- to "download_wi_images.sh" in the destination folder. Individual worker scripts will
470
- have a number added, e.g. download_wi_images_00.sh.
471
- """
472
-
473
- if isinstance(image_records_to_download,dict):
474
-
475
- all_image_records = []
476
- for k in image_records_to_download:
477
- records_this_image = image_records_to_download[k]
478
- all_image_records.extend(records_this_image)
479
- return write_download_commands(all_image_records,
480
- download_dir_base=download_dir_base,
481
- force_download=force_download,
482
- n_download_workers=n_download_workers,
483
- download_command_file_base=download_command_file_base)
484
-
485
- ##%% Make list of gcloud storage commands
486
-
487
- if download_command_file_base is None:
488
- download_command_file_base = os.path.join(download_dir_base,'download_wi_images.sh')
489
-
490
- commands = []
491
- skipped_urls = []
492
- downloaded_urls = set()
493
-
494
- # image_record = image_records_to_download[0]
495
- for image_record in tqdm(image_records_to_download):
496
-
497
- url = image_record['location']
498
- if url in downloaded_urls:
499
- continue
500
-
501
- assert url.startswith('gs://')
502
-
503
- relative_path = url.replace('gs://','')
504
- abs_path = os.path.join(download_dir_base,relative_path)
505
-
506
- # Skip files that already exist
507
- if (not force_download) and (os.path.isfile(abs_path)):
508
- skipped_urls.append(url)
509
- continue
510
-
511
- # command = 'gsutil cp "{}" "./{}"'.format(url,relative_path)
512
- command = 'gcloud storage cp --no-clobber "{}" "./{}"'.format(url,relative_path)
513
- commands.append(command)
514
-
515
- print('Generated {} commands for {} image records'.format(
516
- len(commands),len(image_records_to_download)))
517
-
518
- print('Skipped {} URLs'.format(len(skipped_urls)))
519
-
520
-
521
- ##%% Write those commands out to n .sh files
522
-
523
- commands_by_script = split_list_into_n_chunks(commands,n_download_workers)
524
-
525
- local_download_commands = []
526
-
527
- output_dir = os.path.dirname(download_command_file_base)
528
- os.makedirs(output_dir,exist_ok=True)
529
-
530
- # Write out the download script for each chunk
531
- # i_script = 0
532
- for i_script in range(0,n_download_workers):
533
- download_command_file = insert_before_extension(download_command_file_base,str(i_script).zfill(2))
534
- local_download_commands.append(os.path.basename(download_command_file))
535
- with open(download_command_file,'w',newline='\n') as f:
536
- for command in commands_by_script[i_script]:
537
- f.write(command + '\n')
538
-
539
- # Write out the main download script
540
- with open(download_command_file_base,'w',newline='\n') as f:
541
- for local_download_command in local_download_commands:
542
- f.write('./' + local_download_command + ' &\n')
543
- f.write('wait\n')
544
- f.write('echo done\n')
545
-
546
- # ...def write_download_commands(...)
547
-
548
-
549
- #%% Functions and constants related to pushing results to the DB
550
-
551
- # Sample payload for validation
552
- sample_update_payload = {
553
-
554
- "predictions": [
555
- {
556
- "project_id": "1234",
557
- "ignore_data_file_checks": True,
558
- "prediction": "f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank",
559
- "prediction_score": 0.81218224763870239,
560
- "classifications": {
561
- "classes": [
562
- "f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank",
563
- "b1352069-a39c-4a84-a949-60044271c0c1;aves;;;;;bird",
564
- "90d950db-2106-4bd9-a4c1-777604c3eada;mammalia;rodentia;;;;rodent",
565
- "f2d233e3-80e3-433d-9687-e29ecc7a467a;mammalia;;;;;mammal",
566
- "ac068717-6079-4aec-a5ab-99e8d14da40b;mammalia;rodentia;sciuridae;dremomys;rufigenis;red-cheeked squirrel"
567
- ],
568
- "scores": [
569
- 0.81218224763870239,
570
- 0.1096673980355263,
571
- 0.02707692421972752,
572
- 0.00771023565903306,
573
- 0.0049269795417785636
574
- ]
575
- },
576
- "detections": [
577
- {
578
- "category": "1",
579
- "label": "animal",
580
- "conf": 0.181,
581
- "bbox": [
582
- 0.02421,
583
- 0.35823999999999989,
584
- 0.051560000000000009,
585
- 0.070826666666666746
586
- ]
587
- }
588
- ],
589
- "model_version": "3.1.2",
590
- "prediction_source": "manual_update",
591
- "data_file_id": "2ea1d2b2-7f84-43f9-af1f-8be0e69c7015"
592
- }
593
- ]
594
- }
595
-
596
- blank_prediction_string = 'f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank'
597
- no_cv_result_prediction_string = 'f2efdae9-efb8-48fb-8a91-eccf79ab4ffb;no cv result;no cv result;no cv result;no cv result;no cv result;no cv result'
598
- animal_prediction_string = '1f689929-883d-4dae-958c-3d57ab5b6c16;;;;;;animal'
599
- human_prediction_string = '990ae9dd-7a59-4344-afcb-1b7b21368000;mammalia;primates;hominidae;homo;sapiens;human'
600
- vehicle_prediction_string = 'e2895ed5-780b-48f6-8a11-9e27cb594511;;;;;;vehicle'
601
-
602
- non_taxonomic_prediction_strings = [blank_prediction_string,
603
- no_cv_result_prediction_string,
604
- animal_prediction_string,
605
- vehicle_prediction_string]
606
-
607
- non_taxonomic_prediction_short_strings = [';'.join(s.split(';')[1:-1]) for s in \
608
- non_taxonomic_prediction_strings]
609
-
610
-
611
- process_cv_response_url = 'https://placeholder'
612
-
613
-
614
- def prepare_data_update_auth_headers(auth_token_file):
615
- """
616
- Read the authorization token from a text file and prepare http headers.
617
-
618
- Args:
619
- auth_token_file (str): a single-line text file containing a write-enabled
620
- API token.
621
-
622
- Returns:
623
- dict: http headers, with fields 'Authorization' and 'Content-Type'
624
- """
625
-
626
- with open(auth_token_file,'r') as f:
627
- auth_token = f.read()
628
-
629
- headers = {
630
- 'Authorization': 'Bearer ' + auth_token,
631
- 'Content-Type': 'application/json'
632
- }
633
-
634
- return headers
635
-
636
-
637
- def push_results_for_images(payload,
638
- headers,
639
- url=process_cv_response_url,
640
- verbose=False):
641
- """
642
- Push results for one or more images represented in [payload] to the
643
- process_cv_response API, to write to the WI DB.
644
-
645
- Args:
646
- payload (dict): payload to upload to the API
647
- headers (dict): authorization headers, see prepare_data_update_auth_headers
648
- url (str, optional): API URL
649
- verbose (bool, optional): enable additional debug output
650
-
651
- Return:
652
- int: response status code
653
- """
654
-
655
- if verbose:
656
- print('Sending header {} to URL {}'.format(
657
- headers,url))
658
-
659
- response = requests.post(url, headers=headers, json=payload)
660
-
661
- # Check the response status code
662
- if response.status_code in (200,201):
663
- if verbose:
664
- print('Successfully pushed results for {} images'.format(len(payload['predictions'])))
665
- print(response.headers)
666
- print(str(response))
667
- else:
668
- print(f'Error: {response.status_code} {response.text}')
669
-
670
- return response.status_code
671
-
672
-
673
- def parallel_push_results_for_images(payloads,
674
- headers,
675
- url=process_cv_response_url,
676
- verbose=False,
677
- pool_type='thread',
678
- n_workers=10):
679
- """
680
- Push results for the list of payloads in [payloads] to the process_cv_response API,
681
- parallelized over multiple workers.
682
-
683
- Args:
684
- payloads (list of dict): payloads to upload to the API
685
- headers (dict): authorization headers, see prepare_data_update_auth_headers
686
- url (str, optional): API URL
687
- verbose (bool, optional): enable additional debug output
688
- pool_type (str, optional): 'thread' or 'process'
689
- n_workers (int, optional): number of parallel workers
690
-
691
- Returns:
692
- list of int: list of http response codes, one per payload
693
- """
694
-
695
- if n_workers == 1:
696
-
697
- results = []
698
- for payload in payloads:
699
- results.append(push_results_for_images(payload,
700
- headers=headers,
701
- url=url,
702
- verbose=verbose))
703
- return results
704
-
705
- else:
706
-
707
- assert pool_type in ('thread','process')
708
-
709
- try:
710
- if pool_type == 'thread':
711
- pool_string = 'thread'
712
- pool = ThreadPool(n_workers)
713
- else:
714
- pool_string = 'process'
715
- pool = Pool(n_workers)
716
-
717
- print('Created a {} pool of {} workers'.format(
718
- pool_string,n_workers))
719
-
720
- results = list(tqdm(pool.imap(
721
- partial(push_results_for_images,headers=headers,url=url,verbose=verbose),payloads),
722
- total=len(payloads)))
723
- finally:
724
- pool.close()
725
- pool.join()
726
- print("Pool closed and joined for WI result uploads")
727
-
728
- assert len(results) == len(payloads)
729
- return results
730
-
731
-
732
- def generate_payload_with_replacement_detections(wi_result,
733
- detections,
734
- prediction_score=0.9,
735
- model_version='3.1.2',
736
- prediction_source='manual_update'):
737
- """
738
- Generate a payload for a single image that keeps the classifications from
739
- [wi_result], but replaces the detections with the MD-formatted list [detections].
740
-
741
- Args:
742
- wi_result (dict): dict representing a WI prediction result, with at least the
743
- fields in the constant wi_result_fields
744
- detections (list): list of WI-formatted detection dicts (with fields ['conf'] and ['category'])
745
- prediction_score (float, optional): confidence value to use for the combined prediction
746
- model_version (str, optional): model version string to include in the payload
747
- prediction_source (str, optional): prediction source string to include in the payload
748
-
749
- Returns:
750
- dict: dictionary suitable for uploading via push_results_for_images
751
- """
752
-
753
- payload_detections = []
754
-
755
- # detection = detections[0]
756
- for detection in detections:
757
- detection_out = detection.copy()
758
- detection_out['label'] = md_category_id_to_name[detection['category']]
759
- if detection_out['conf'] < min_md_output_confidence:
760
- detection_out['conf'] = min_md_output_confidence
761
- payload_detections.append(detection_out)
762
-
763
- prediction_string = wi_result_to_prediction_string(wi_result)
764
-
765
- prediction = {}
766
- prediction['ignore_data_file_checks'] = True
767
- prediction['prediction'] = prediction_string
768
- prediction['prediction_score'] = prediction_score
769
-
770
- classifications = {}
771
- classifications['classes'] = [prediction_string]
772
- classifications['scores'] = [prediction_score]
773
-
774
- prediction['classifications'] = classifications
775
- prediction['detections'] = payload_detections
776
- prediction['model_version'] = model_version
777
- prediction['prediction_source'] = prediction_source
778
- prediction['data_file_id'] = wi_result['image_id']
779
- prediction['project_id'] = str(wi_result['project_id'])
780
- payload = {}
781
- payload['predictions'] = [prediction]
782
-
783
- return payload
784
-
785
-
786
- def generate_blank_prediction_payload(data_file_id,
787
- project_id,
788
- blank_confidence=0.9,
789
- model_version='3.1.2',
790
- prediction_source='manual_update'):
791
- """
792
- Generate a payload that will set a single image to the blank classification, with
793
- no detections. Suitable for upload via push_results_for_images.
794
-
795
- Args:
796
- data_file_id (str): unique identifier for this image used in the WI DB
797
- project_id (int): WI project ID
798
- blank_confidence (float, optional): confidence value to associate with this
799
- prediction
800
- model_version (str, optional): model version string to include in the payload
801
- prediction_source (str, optional): prediction source string to include in the payload
802
-
803
- Returns:
804
- dict: dictionary suitable for uploading via push_results_for_images
805
- """
806
-
807
- prediction = {}
808
- prediction['ignore_data_file_checks'] = True
809
- prediction['prediction'] = blank_prediction_string
810
- prediction['prediction_score'] = blank_confidence
811
- prediction['classifications'] = {}
812
- prediction['classifications']['classes'] = [blank_prediction_string]
813
- prediction['classifications']['scores'] = [blank_confidence]
814
- prediction['detections'] = []
815
- prediction['model_version'] = model_version
816
- prediction['prediction_source'] = prediction_source
817
- prediction['data_file_id'] = data_file_id
818
- prediction['project_id'] = project_id
819
- payload = {}
820
- payload['predictions'] = [prediction]
821
-
822
- return payload
823
-
824
-
825
- def generate_no_cv_result_payload(data_file_id,
826
- project_id,
827
- no_cv_confidence=0.9,
828
- model_version='3.1.2',
829
- prediction_source='manual_update'):
830
- """
831
- Generate a payload that will set a single image to the blank classification, with
832
- no detections. Suitable for uploading via push_results_for_images.
833
-
834
- Args:
835
- data_file_id (str): unique identifier for this image used in the WI DB
836
- project_id (int): WI project ID
837
- no_cv_confidence (float, optional): confidence value to associate with this
838
- prediction
839
- model_version (str, optional): model version string to include in the payload
840
- prediction_source (str, optional): prediction source string to include in the payload
841
-
842
- Returns:
843
- dict: dictionary suitable for uploading via push_results_for_images
844
- """
845
-
846
- prediction = {}
847
- prediction['ignore_data_file_checks'] = True
848
- prediction['prediction'] = no_cv_result_prediction_string
849
- prediction['prediction_score'] = no_cv_confidence
850
- prediction['classifications'] = {}
851
- prediction['classifications']['classes'] = [no_cv_result_prediction_string]
852
- prediction['classifications']['scores'] = [no_cv_confidence]
853
- prediction['detections'] = []
854
- prediction['model_version'] = model_version
855
- prediction['prediction_source'] = prediction_source
856
- prediction['data_file_id'] = data_file_id
857
- prediction['project_id'] = project_id
858
- payload = {}
859
- payload['predictions'] = [prediction]
860
-
861
- return payload
862
-
863
-
864
- def generate_payload_for_prediction_string(data_file_id,
865
- project_id,
866
- prediction_string,
867
- prediction_confidence=0.8,
868
- detections=None,
869
- model_version='3.1.2',
870
- prediction_source='manual_update'):
871
- """
872
- Generate a payload that will set a single image to a particular prediction, optionally
873
- including detections. Suitable for uploading via push_results_for_images.
874
-
875
- Args:
876
- data_file_id (str): unique identifier for this image used in the WI DB
877
- project_id (int): WI project ID
878
- prediction_string (str): WI-formatted prediction string to include in the payload
879
- prediction_confidence (float, optional): confidence value to associate with this
880
- prediction
881
- detections (list, optional): list of MD-formatted detection dicts, with fields
882
- ['category'] and 'conf'
883
- model_version (str, optional): model version string to include in the payload
884
- prediction_source (str, optional): prediction source string to include in the payload
885
-
886
-
887
- Returns:
888
- dict: dictionary suitable for uploading via push_results_for_images
889
- """
890
-
891
- assert is_valid_prediction_string(prediction_string), \
892
- 'Invalid prediction string: {}'.format(prediction_string)
893
-
894
- payload_detections = []
895
-
896
- if detections is not None:
897
- # detection = detections[0]
898
- for detection in detections:
899
- detection_out = detection.copy()
900
- detection_out['label'] = md_category_id_to_name[detection['category']]
901
- if detection_out['conf'] < min_md_output_confidence:
902
- detection_out['conf'] = min_md_output_confidence
903
- payload_detections.append(detection_out)
904
-
905
- prediction = {}
906
- prediction['ignore_data_file_checks'] = True
907
- prediction['prediction'] = prediction_string
908
- prediction['prediction_score'] = prediction_confidence
909
- prediction['classifications'] = {}
910
- prediction['classifications']['classes'] = [prediction_string]
911
- prediction['classifications']['scores'] = [prediction_confidence]
912
- prediction['detections'] = payload_detections
913
- prediction['model_version'] = model_version
914
- prediction['prediction_source'] = prediction_source
915
- prediction['data_file_id'] = data_file_id
916
- prediction['project_id'] = project_id
917
-
918
- payload = {}
919
- payload['predictions'] = [prediction]
920
-
921
- return payload
922
-
923
-
924
- def validate_payload(payload):
925
- """
926
- Verifies that the dict [payload] is compatible with the ProcessCVResponse() API. Throws an
927
- error if [payload] is invalid.
928
-
929
- Args:
930
- payload (dict): payload in the format expected by push_results_for_images.
931
-
932
- Returns:
933
- bool: successful validation; this is just future-proofing, currently never returns False
934
- """
935
-
936
- assert isinstance(payload,dict)
937
- assert len(payload.keys()) == 1 and 'predictions' in payload
938
-
939
- # prediction = payload['predictions'][0]
940
- for prediction in payload['predictions']:
941
-
942
- assert 'project_id' in prediction
943
- if not isinstance(prediction['project_id'],int):
944
- _ = int(prediction['project_id'])
945
- assert 'ignore_data_file_checks' in prediction and \
946
- isinstance(prediction['ignore_data_file_checks'],bool)
947
- assert 'prediction' in prediction and \
948
- isinstance(prediction['prediction'],str) and \
949
- len(prediction['prediction'].split(';')) == 7
950
- assert 'prediction_score' in prediction and \
951
- isinstance(prediction['prediction_score'],float)
952
- assert 'model_version' in prediction and \
953
- isinstance(prediction['model_version'],str)
954
- assert 'data_file_id' in prediction and \
955
- isinstance(prediction['data_file_id'],str) and \
956
- len(prediction['data_file_id']) == 36
957
- assert 'classifications' in prediction and \
958
- isinstance(prediction['classifications'],dict)
959
- classifications = prediction['classifications']
960
- assert 'classes' in classifications and isinstance(classifications['classes'],list)
961
- assert 'scores' in classifications and isinstance(classifications['scores'],list)
962
- assert len(classifications['classes']) == len(classifications['scores'])
963
- for c in classifications['classes']:
964
- assert is_valid_prediction_string(c)
965
- for score in classifications['scores']:
966
- assert isinstance(score,float) and score >= 0 and score <= 1.0
967
- assert 'detections' in prediction and isinstance(prediction['detections'],list)
968
-
969
- for detection in prediction['detections']:
970
-
971
- assert isinstance(detection,dict)
972
- assert 'category' in detection and detection['category'] in ('1','2','3')
973
- assert 'label' in detection and detection['label'] in ('animal','person','vehicle')
974
- assert 'conf' in detection and \
975
- isinstance(detection['conf'],float) and \
976
- detection['conf'] >= 0 and detection['conf'] <= 1.0
977
- assert 'bbox' in detection and \
978
- isinstance(detection['bbox'],list) and \
979
- len(detection['bbox']) == 4
980
-
981
- # ...for each detection
982
-
983
- # ...for each prediction
984
-
985
- return True
986
-
987
- # ...def validate_payload(...)
988
-
989
-
990
- #%% Validate constants
991
-
992
- # This is executed at the time this module gets imported.
993
-
994
- blank_payload = generate_blank_prediction_payload('70ede9c6-d056-4dd1-9a0b-3098d8113e0e','1234')
995
- validate_payload(sample_update_payload)
996
- validate_payload(blank_payload)
997
-
998
-
999
- #%% Functions and constants related to working with batch predictions
1000
-
1001
- def get_kingdom(prediction_string):
1002
- """
1003
- Return the kingdom field from a WI prediction string
1004
-
1005
- Args:
1006
- prediction_string (str): a string in the semicolon-delimited prediction string format
1007
-
1008
- Returns:
1009
- str: the kingdom field from the input string
1010
- """
1011
- tokens = prediction_string.split(';')
1012
- assert is_valid_prediction_string(prediction_string)
1013
- return tokens[1]
1014
-
1015
-
1016
- def is_human_classification(prediction_string):
1017
- """
1018
- Determines whether the input string represents a human classification, which includes a variety
1019
- of common names (hiker, person, etc.)
1020
-
1021
- Args:
1022
- prediction_string (str): a string in the semicolon-delimited prediction string format
1023
-
1024
- Returns:
1025
- bool: whether this string corresponds to a human category
1026
- """
1027
- return prediction_string == human_prediction_string or 'homo;sapiens' in prediction_string
1028
-
1029
-
1030
- def is_vehicle_classification(prediction_string):
1031
- """
1032
- Determines whether the input string represents a vehicle classification.
1033
-
1034
- Args:
1035
- prediction_string (str): a string in the semicolon-delimited prediction string format
1036
-
1037
- Returns:
1038
- bool: whether this string corresponds to the vehicle category
1039
- """
1040
- return prediction_string == vehicle_prediction_string
1041
-
1042
-
1043
- def is_animal_classification(prediction_string):
1044
- """
1045
- Determines whether the input string represents an animal classification, which excludes, e.g.,
1046
- humans, blanks, vehicles, unknowns
1047
-
1048
- Args:
1049
- prediction_string (str): a string in the semicolon-delimited prediction string format
1050
-
1051
- Returns:
1052
- bool: whether this string corresponds to an animal category
1053
- """
1054
-
1055
- if prediction_string == animal_prediction_string:
1056
- return True
1057
- if prediction_string == human_prediction_string or 'homo;sapiens' in prediction_string:
1058
- return False
1059
- if prediction_string == blank_prediction_string:
1060
- return False
1061
- if prediction_string == no_cv_result_prediction_string:
1062
- return False
1063
- if len(get_kingdom(prediction_string)) == 0:
1064
- return False
1065
- return True
1066
-
1067
-
1068
- def generate_whole_image_detections_for_classifications(classifications_json_file,
1069
- detections_json_file,
1070
- ensemble_json_file=None,
1071
- ignore_blank_classifications=True):
1072
- """
1073
- Given a set of classification results in SpeciesNet format that were likely run on
1074
- already-cropped images, generate a file of [fake] detections in SpeciesNet format in which each
1075
- image is covered in a single whole-image detection.
1076
-
1077
- Args:
1078
- classifications_json_file (str): SpeciesNet-formatted file containing classifications
1079
- detections_json_file (str): SpeciesNet-formatted file to write with detections
1080
- ensemble_json_file (str, optional): SpeciesNet-formatted file to write with detections
1081
- and classfications
1082
- ignore_blank_classifications (bool, optional): use non-top classifications when
1083
- the top classification is "blank" or "no CV result"
1084
-
1085
- Returns:
1086
- dict: the contents of [detections_json_file]
1087
- """
1088
-
1089
- with open(classifications_json_file,'r') as f:
1090
- classification_results = json.load(f)
1091
- predictions = classification_results['predictions']
1092
-
1093
- output_predictions = []
1094
- ensemble_predictions = []
1095
-
1096
- # prediction = predictions[0]
1097
- for prediction in predictions:
1098
-
1099
- output_prediction = {}
1100
- output_prediction['filepath'] = prediction['filepath']
1101
- i_score = 0
1102
- if ignore_blank_classifications:
1103
- while (prediction['classifications']['classes'][i_score] in \
1104
- (blank_prediction_string,no_cv_result_prediction_string)):
1105
- i_score += 1
1106
- top_classification = prediction['classifications']['classes'][i_score]
1107
- top_classification_score = prediction['classifications']['scores'][i_score]
1108
- if is_animal_classification(top_classification):
1109
- category_name = 'animal'
1110
- elif is_human_classification(top_classification):
1111
- category_name = 'human'
1112
- else:
1113
- category_name = 'vehicle'
1114
-
1115
- if category_name == 'human':
1116
- md_category_name = 'person'
1117
- else:
1118
- md_category_name = category_name
1119
-
1120
- output_detection = {}
1121
- output_detection['label'] = category_name
1122
- output_detection['category'] = md_category_name_to_id[md_category_name]
1123
- output_detection['conf'] = 1.0
1124
- output_detection['bbox'] = [0.0, 0.0, 1.0, 1.0]
1125
- output_prediction['detections'] = [output_detection]
1126
- output_predictions.append(output_prediction)
1127
-
1128
- ensemble_prediction = {}
1129
- ensemble_prediction['filepath'] = prediction['filepath']
1130
- ensemble_prediction['detections'] = [output_detection]
1131
- ensemble_prediction['prediction'] = top_classification
1132
- ensemble_prediction['prediction_score'] = top_classification_score
1133
- ensemble_prediction['prediction_source'] = 'fake_ensemble_file_utility'
1134
- ensemble_prediction['classifications'] = prediction['classifications']
1135
- ensemble_predictions.append(ensemble_prediction)
1136
-
1137
- # ...for each image
1138
-
1139
- ## Write output
1140
-
1141
- if ensemble_json_file is not None:
1142
-
1143
- ensemble_output_data = {'predictions':ensemble_predictions}
1144
- with open(ensemble_json_file,'w') as f:
1145
- json.dump(ensemble_output_data,f,indent=1)
1146
- _ = validate_predictions_file(ensemble_json_file)
1147
-
1148
- output_data = {'predictions':output_predictions}
1149
- with open(detections_json_file,'w') as f:
1150
- json.dump(output_data,f,indent=1)
1151
- return validate_predictions_file(detections_json_file)
1152
-
1153
- # ...def generate_whole_image_detections_for_classifications(...)
1154
-
1155
-
1156
- def generate_md_results_from_predictions_json(predictions_json_file,
1157
- md_results_file,
1158
- base_folder=None,
1159
- max_decimals=5,
1160
- convert_human_to_person=True,
1161
- convert_homo_species_to_human=True):
1162
- """
1163
- Generate an MD-formatted .json file from a predictions.json file, generated by the
1164
- SpeciesNet ensemble. Typically, MD results files use relative paths, and predictions.json
1165
- files use absolute paths, so this function optionally removes the leading string
1166
- [base_folder] from all file names.
1167
-
1168
- Currently just applies the top classification category to every detection. If the top
1169
- classification is "blank", writes an empty detection list.
1170
-
1171
- Uses the classification from the "prediction" field if it's available, otherwise
1172
- uses the "classifications" field.
1173
-
1174
- When using the "prediction" field, records the top class in the "classifications" field to
1175
- a field in each image called "top_classification_common_name". This is often different
1176
- from the value of the "prediction" field.
1177
-
1178
- speciesnet_to_md.py is a command-line driver for this function.
1179
-
1180
- Args:
1181
- predictions_json_file (str): path to a predictions.json file, or a dict
1182
- md_results_file (str): path to which we should write an MD-formatted .json file
1183
- base_folder (str, optional): leading string to remove from each path in the
1184
- predictions.json file
1185
- max_decimals (int, optional): number of decimal places to which we should round
1186
- all values
1187
- convert_human_to_person (bool, optional): WI predictions.json files sometimes use the
1188
- detection category "human"; MD files usually use "person". If True, switches "human"
1189
- to "person".
1190
- convert_homo_species_to_human (bool, optional): the ensemble often rolls human predictions
1191
- up to "homo species", which isn't wrong, but looks odd. This forces these back to
1192
- "homo sapiens".
1193
- """
1194
-
1195
- # Read predictions file
1196
- if isinstance(predictions_json_file,str):
1197
- with open(predictions_json_file,'r') as f:
1198
- predictions = json.load(f)
1199
- else:
1200
- assert isinstance(predictions_json_file,dict)
1201
- predictions = predictions_json_file
1202
-
1203
- # Round floating-point values (confidence scores, coordinates) to a
1204
- # reasonable number of decimal places
1205
- if max_decimals is not None and max_decimals > 0:
1206
- round_floats_in_nested_dict(predictions)
1207
-
1208
- predictions = predictions['predictions']
1209
- assert isinstance(predictions,list)
1210
-
1211
- # Convert backslashes to forward slashes in both filenames and the base folder string
1212
- for im in predictions:
1213
- im['filepath'] = im['filepath'].replace('\\','/')
1214
- if base_folder is not None:
1215
- base_folder = base_folder.replace('\\','/')
1216
-
1217
- detection_category_id_to_name = {}
1218
- classification_category_name_to_id = {}
1219
-
1220
- # Keep track of detections that don't have an assigned detection category; these
1221
- # are fake detections we create for non-blank images with non-empty detection lists.
1222
- # We need to go back later and give them a legitimate detection category ID.
1223
- all_unknown_detections = []
1224
-
1225
- # Create the output images list
1226
- images_out = []
1227
-
1228
- base_folder_replacements = 0
1229
-
1230
- # im_in = predictions[0]
1231
- for im_in in predictions:
1232
-
1233
- im_out = {}
1234
-
1235
- fn = im_in['filepath']
1236
- if base_folder is not None:
1237
- if fn.startswith(base_folder):
1238
- base_folder_replacements += 1
1239
- fn = fn.replace(base_folder,'',1)
1240
-
1241
- im_out['file'] = fn
1242
-
1243
- if 'failures' in im_in:
1244
-
1245
- im_out['failure'] = str(im_in['failures'])
1246
- im_out['detections'] = None
1247
-
1248
- else:
1249
-
1250
- im_out['detections'] = []
1251
-
1252
- if 'detections' in im_in:
1253
-
1254
- if len(im_in['detections']) == 0:
1255
- im_out['detections'] = []
1256
- else:
1257
- # det_in = im_in['detections'][0]
1258
- for det_in in im_in['detections']:
1259
- det_out = {}
1260
- if det_in['category'] in detection_category_id_to_name:
1261
- assert detection_category_id_to_name[det_in['category']] == det_in['label']
1262
- else:
1263
- detection_category_id_to_name[det_in['category']] = det_in['label']
1264
- det_out = {}
1265
- for s in ['category','conf','bbox']:
1266
- det_out[s] = det_in[s]
1267
- im_out['detections'].append(det_out)
1268
-
1269
- # ...if detections are present
1270
-
1271
- class_to_assign = None
1272
- class_confidence = None
1273
- top_classification_common_name = None
1274
-
1275
- if 'classifications' in im_in:
1276
-
1277
- classifications = im_in['classifications']
1278
- assert len(classifications['scores']) == len(classifications['classes'])
1279
- assert is_list_sorted(classifications['scores'],reverse=True)
1280
- class_to_assign = classifications['classes'][0]
1281
- class_confidence = classifications['scores'][0]
1282
-
1283
- tokens = class_to_assign.split(';')
1284
- assert len(tokens) == 7
1285
- top_classification_common_name = tokens[-1]
1286
- if len(top_classification_common_name) == 0:
1287
- top_classification_common_name = 'undefined'
1288
-
1289
- if 'prediction' in im_in:
1290
-
1291
- class_to_assign = None
1292
- im_out['top_classification_common_name'] = top_classification_common_name
1293
- class_to_assign = im_in['prediction']
1294
- if convert_homo_species_to_human and class_to_assign.endswith('homo species'):
1295
- class_to_assign = human_prediction_string
1296
- class_confidence = im_in['prediction_score']
1297
-
1298
- if class_to_assign is not None:
1299
-
1300
- if class_to_assign == blank_prediction_string:
1301
-
1302
- # This is a scenario that's not captured well by the MD format: a blank prediction
1303
- # with detections present. But, for now, don't do anything special here, just making
1304
- # a note of this.
1305
- if len(im_out['detections']) > 0:
1306
- pass
1307
-
1308
- else:
1309
-
1310
- assert not class_to_assign.endswith('blank')
1311
-
1312
- # This is a scenario that's not captured well by the MD format: no detections present,
1313
- # but a non-blank prediction. For now, create a fake detection to handle this prediction.
1314
- if len(im_out['detections']) == 0:
1315
-
1316
- print('Warning: creating fake detection for non-blank whole-image classification')
1317
- det_out = {}
1318
- all_unknown_detections.append(det_out)
1319
-
1320
- # We will change this to a string-int later
1321
- det_out['category'] = 'unknown'
1322
- det_out['conf'] = class_confidence
1323
- det_out['bbox'] = [0,0,1,1]
1324
- im_out['detections'].append(det_out)
1325
-
1326
- # ...if this is/isn't a blank classification
1327
-
1328
- # Attach that classification to each detection
1329
-
1330
- # Create a new category ID if necessary
1331
- if class_to_assign in classification_category_name_to_id:
1332
- classification_category_id = classification_category_name_to_id[class_to_assign]
1333
- else:
1334
- classification_category_id = str(len(classification_category_name_to_id))
1335
- classification_category_name_to_id[class_to_assign] = classification_category_id
1336
-
1337
- for det in im_out['detections']:
1338
- det['classifications'] = []
1339
- det['classifications'].append([classification_category_id,class_confidence])
1340
-
1341
- # ...if we have some type of classification for this image
1342
-
1343
- # ...if this is/isn't a failure
1344
-
1345
- images_out.append(im_out)
1346
-
1347
- # ...for each image
1348
-
1349
- if base_folder is not None:
1350
- if base_folder_replacements == 0:
1351
- print('Warning: you supplied {} as the base folder, but I made zero replacements'.format(
1352
- base_folder))
1353
-
1354
- # Fix the 'unknown' category
1355
- if len(all_unknown_detections) > 0:
1356
-
1357
- max_detection_category_id = max([int(x) for x in detection_category_id_to_name.keys()])
1358
- unknown_category_id = str(max_detection_category_id + 1)
1359
- detection_category_id_to_name[unknown_category_id] = 'unknown'
1360
-
1361
- for det in all_unknown_detections:
1362
- assert det['category'] == 'unknown'
1363
- det['category'] = unknown_category_id
1364
-
1365
-
1366
- # Sort by filename
1367
-
1368
- images_out = sort_list_of_dicts_by_key(images_out,'file')
1369
-
1370
- # Prepare friendly classification names
1371
-
1372
- classification_category_descriptions = \
1373
- invert_dictionary(classification_category_name_to_id)
1374
- classification_categories_out = {}
1375
- for category_id in classification_category_descriptions.keys():
1376
- category_name = classification_category_descriptions[category_id].split(';')[-1]
1377
- classification_categories_out[category_id] = category_name
1378
-
1379
- # Prepare the output dict
1380
-
1381
- detection_categories_out = detection_category_id_to_name
1382
- info = {}
1383
- info['format_version'] = 1.4
1384
- info['detector'] = 'converted_from_predictions_json'
1385
-
1386
- if convert_human_to_person:
1387
- for k in detection_categories_out.keys():
1388
- if detection_categories_out[k] == 'human':
1389
- detection_categories_out[k] = 'person'
1390
-
1391
- output_dict = {}
1392
- output_dict['info'] = info
1393
- output_dict['detection_categories'] = detection_categories_out
1394
- output_dict['classification_categories'] = classification_categories_out
1395
- output_dict['classification_category_descriptions'] = classification_category_descriptions
1396
- output_dict['images'] = images_out
1397
-
1398
- with open(md_results_file,'w') as f:
1399
- json.dump(output_dict,f,indent=1)
1400
-
1401
- validation_options = ValidateBatchResultsOptions()
1402
- validation_options.raise_errors = True
1403
- _ = validate_batch_results(md_results_file, options=validation_options)
1404
-
1405
- # ...def generate_md_results_from_predictions_json(...)
1406
-
1407
-
1408
- def generate_predictions_json_from_md_results(md_results_file,
1409
- predictions_json_file,
1410
- base_folder=None):
1411
- """
1412
- Generate a predictions.json file from the MD-formatted .json file [md_results_file]. Typically,
1413
- MD results files use relative paths, and predictions.json files use absolute paths, so
1414
- this function optionally prepends [base_folder]. Does not handle classification results in
1415
- MD format, since this is intended to prepare data for passing through the WI classifier.
1416
-
1417
- md_to_wi.py is a command-line driver for this function.
1418
-
1419
- Args:
1420
- md_results_file (str): path to an MD-formatted .json file
1421
- predictions_json_file (str): path to which we should write a predictions.json file
1422
- base_folder (str, optional): folder name to prepend to each path in md_results_file,
1423
- to convert relative paths to absolute paths.
1424
- """
1425
-
1426
- # Validate the input file
1427
- validation_options = ValidateBatchResultsOptions()
1428
- validation_options.raise_errors = True
1429
- validation_options.return_data = True
1430
- md_results = validate_batch_results(md_results_file, options=validation_options)
1431
- category_id_to_name = md_results['detection_categories']
1432
-
1433
- output_dict = {}
1434
- output_dict['predictions'] = []
1435
-
1436
- # im = md_results['images'][0]
1437
- for im in md_results['images']:
1438
-
1439
- prediction = {}
1440
- fn = im['file']
1441
- if base_folder is not None:
1442
- fn = os.path.join(base_folder,fn)
1443
- fn = fn.replace('\\','/')
1444
- prediction['filepath'] = fn
1445
- if 'failure' in im and im['failure'] is not None:
1446
- prediction['failures'] = ['DETECTOR']
1447
- else:
1448
- assert 'detections' in im and im['detections'] is not None
1449
- detections = []
1450
- for det in im['detections']:
1451
- output_det = deepcopy(det)
1452
- output_det['label'] = category_id_to_name[det['category']]
1453
- detections.append(output_det)
1454
-
1455
- # detections *must* be sorted in descending order by confidence
1456
- detections = sort_list_of_dicts_by_key(detections,'conf', reverse=True)
1457
- prediction['detections'] = detections
1458
-
1459
- assert len(prediction.keys()) >= 2
1460
- output_dict['predictions'].append(prediction)
1461
-
1462
- # ...for each image
1463
-
1464
- os.makedirs(os.path.dirname(predictions_json_file),exist_ok=True)
1465
- with open(predictions_json_file,'w') as f:
1466
- json.dump(output_dict,f,indent=1)
1467
-
1468
- # ...def generate_predictions_json_from_md_results(...)
1469
-
1470
-
1471
- default_tokens_to_ignore = ['$RECYCLE.BIN']
1472
-
1473
- def generate_instances_json_from_folder(folder,
1474
- country=None,
1475
- admin1_region=None,
1476
- lat=None,
1477
- lon=None,
1478
- output_file=None,
1479
- filename_replacements=None,
1480
- tokens_to_ignore=default_tokens_to_ignore):
1481
- """
1482
- Generate an instances.json record that contains all images in [folder], optionally
1483
- including location information, in a format suitable for run_model.py. Optionally writes
1484
- the results to [output_file].
1485
-
1486
- Args:
1487
- folder (str): the folder to recursively search for images
1488
- country (str, optional): a three-letter country code
1489
- admin1_region (str, optional): an administrative region code, typically a two-letter
1490
- US state code
1491
- lat (float, optional): latitude to associate with all images
1492
- lon (float, optional): longitude to associate with all images
1493
- output_file (str, optional): .json file to which we should write instance records
1494
- filename_replacements (dict, optional): str --> str dict indicating filename substrings
1495
- that should be replaced with other strings. Replacement occurs *after* converting
1496
- backslashes to forward slashes.
1497
- tokens_to_ignore (list, optional): ignore any images with these tokens in their
1498
- names, typically used to avoid $RECYCLE.BIN. Can be None.
1499
-
1500
- Returns:
1501
- dict: dict with at least the field "instances"
1502
- """
1503
-
1504
- assert os.path.isdir(folder)
1505
-
1506
- image_files_abs = find_images(folder,recursive=True,return_relative_paths=False)
1507
-
1508
- if tokens_to_ignore is not None:
1509
- n_images_before_ignore_tokens = len(image_files_abs)
1510
- for token in tokens_to_ignore:
1511
- image_files_abs = [fn for fn in image_files_abs if token not in fn]
1512
- print('After ignoring {} tokens, kept {} of {} images'.format(
1513
- len(tokens_to_ignore),len(image_files_abs),n_images_before_ignore_tokens))
1514
-
1515
- instances = []
1516
-
1517
- # image_fn_abs = image_files_abs[0]
1518
- for image_fn_abs in image_files_abs:
1519
- instance = {}
1520
- instance['filepath'] = image_fn_abs.replace('\\','/')
1521
- if filename_replacements is not None:
1522
- for s in filename_replacements:
1523
- instance['filepath'] = instance['filepath'].replace(s,filename_replacements[s])
1524
- if country is not None:
1525
- instance['country'] = country
1526
- if admin1_region is not None:
1527
- instance['admin1_region'] = admin1_region
1528
- if lat is not None:
1529
- assert lon is not None, 'Latitude provided without longitude'
1530
- instance['latitude'] = lat
1531
- if lon is not None:
1532
- assert lat is not None, 'Longitude provided without latitude'
1533
- instance['longitude'] = lon
1534
- instances.append(instance)
1535
-
1536
- to_return = {'instances':instances}
1537
-
1538
- if output_file is not None:
1539
- os.makedirs(os.path.dirname(output_file),exist_ok=True)
1540
- with open(output_file,'w') as f:
1541
- json.dump(to_return,f,indent=1)
1542
-
1543
- return to_return
1544
-
1545
- # ...def generate_instances_json_from_folder(...)
1546
-
1547
-
1548
- def split_instances_into_n_batches(instances_json,n_batches,output_files=None):
1549
- """
1550
- Given an instances.json file, split it into batches of equal size.
1551
-
1552
- Args:
1553
- instances_json (str): input .json file in
1554
- n_batches (int): number of new files to generate
1555
- output_files (list, optional): output .json files for each
1556
- batch. If supplied, should have length [n_batches]. If not
1557
- supplied, filenames will be generated based on [instances_json].
1558
-
1559
- Returns:
1560
- list: list of output files that were written; identical to [output_files]
1561
- if it was supplied as input.
1562
- """
1563
-
1564
- with open(instances_json,'r') as f:
1565
- instances = json.load(f)
1566
- assert isinstance(instances,dict) and 'instances' in instances
1567
- instances = instances['instances']
1568
-
1569
- if output_files is not None:
1570
- assert len(output_files) == n_batches, \
1571
- 'Expected {} output files, received {}'.format(
1572
- n_batches,len(output_files))
1573
- else:
1574
- output_files = []
1575
- for i_batch in range(0,n_batches):
1576
- batch_string = 'batch_{}'.format(str(i_batch).zfill(3))
1577
- output_files.append(insert_before_extension(instances_json,batch_string))
1578
-
1579
- batches = split_list_into_n_chunks(instances, n_batches)
1580
-
1581
- for i_batch,batch in enumerate(batches):
1582
- batch_dict = {'instances':batch}
1583
- with open(output_files[i_batch],'w') as f:
1584
- json.dump(batch_dict,f,indent=1)
1585
-
1586
- print('Wrote {} batches to file'.format(n_batches))
1587
-
1588
- return output_files
1589
-
1590
-
1591
- def merge_prediction_json_files(input_prediction_files,output_prediction_file):
1592
- """
1593
- Merge all predictions.json files in [files] into a single .json file.
1594
-
1595
- Args:
1596
- input_prediction_files (list): list of predictions.json files to merge
1597
- output_prediction_file (str): output .json file
1598
- """
1599
-
1600
- predictions = []
1601
- image_filenames_processed = set()
1602
-
1603
- # input_json_fn = input_prediction_files[0]
1604
- for input_json_fn in tqdm(input_prediction_files):
1605
-
1606
- assert os.path.isfile(input_json_fn), \
1607
- 'Could not find prediction file {}'.format(input_json_fn)
1608
- with open(input_json_fn,'r') as f:
1609
- results_this_file = json.load(f)
1610
- assert isinstance(results_this_file,dict)
1611
- predictions_this_file = results_this_file['predictions']
1612
- for prediction in predictions_this_file:
1613
- image_fn = prediction['filepath']
1614
- assert image_fn not in image_filenames_processed
1615
- predictions.extend(predictions_this_file)
1616
-
1617
- output_dict = {'predictions':predictions}
1618
-
1619
- os.makedirs(os.path.dirname(output_prediction_file),exist_ok=True)
1620
- with open(output_prediction_file,'w') as f:
1621
- json.dump(output_dict,f,indent=1)
1622
-
1623
- # ...def merge_prediction_json_files(...)
1624
-
1625
-
1626
- def load_md_or_speciesnet_file(fn,verbose=True):
1627
- """
1628
- Load a .json file that may be in MD or SpeciesNet format. Typically used so
1629
- SpeciesNet files can be supplied to functions originally written to support MD
1630
- format.
1631
-
1632
- Args:
1633
- fn (str): a .json file in predictions.json (MD or SpeciesNet) format
1634
- verbose (bool, optional): enable additional debug output
1635
-
1636
- Returns:
1637
- dict: the contents of [fn], in MD format.
1638
- """
1639
-
1640
- with open(fn,'r') as f:
1641
- detector_output = json.load(f)
1642
-
1643
- # Convert to MD format if necessary
1644
- if 'predictions' in detector_output:
1645
- if verbose:
1646
- print('This appears to be a SpeciesNet output file, converting to MD format')
1647
- md_temp_dir = os.path.join(tempfile.gettempdir(), 'megadetector_temp_files')
1648
- os.makedirs(md_temp_dir,exist_ok=True)
1649
- temp_results_file = os.path.join(md_temp_dir,str(uuid.uuid1()) + '.json')
1650
- print('Writing temporary results to {}'.format(temp_results_file))
1651
- generate_md_results_from_predictions_json(predictions_json_file=fn,
1652
- md_results_file=temp_results_file,
1653
- base_folder=None)
1654
- with open(temp_results_file,'r') as f:
1655
- detector_output = json.load(f)
1656
- try:
1657
- os.remove(temp_results_file)
1658
- except Exception:
1659
- if verbose:
1660
- print('Warning: error removing temporary .json {}'.format(temp_results_file))
1661
-
1662
- assert 'images' in detector_output, \
1663
- 'Detector output file should be a json file with an "images" field.'
1664
-
1665
- return detector_output
1666
-
1667
- # ...def load_md_or_speciesnet_file(...)
1668
-
1669
-
1670
- def validate_predictions_file(fn,instances=None,verbose=True):
1671
- """
1672
- Validate the predictions.json file [fn].
1673
-
1674
- Args:
1675
- fn (str): a .json file in predictions.json (SpeciesNet) format
1676
- instances (str or list, optional): a folder, instances.json file,
1677
- or dict loaded from an instances.json file. If supplied, this
1678
- function will verify that [fn] contains the same number of
1679
- images as [instances].
1680
- verbose (bool, optional): enable additional debug output
1681
-
1682
- Returns:
1683
- dict: the contents of [fn]
1684
- """
1685
-
1686
- with open(fn,'r') as f:
1687
- d = json.load(f)
1688
- predictions = d['predictions']
1689
-
1690
- failures = []
1691
-
1692
- for im in predictions:
1693
- if 'failures' in im:
1694
- failures.append(im)
1695
-
1696
- if verbose:
1697
- print('Read predictions for {} images, with {} failure(s)'.format(
1698
- len(d['predictions']),len(failures)))
1699
-
1700
- if instances is not None:
1701
-
1702
- if isinstance(instances,str):
1703
- if os.path.isdir(instances):
1704
- instances = generate_instances_json_from_folder(folder=instances)
1705
- elif os.path.isfile(instances):
1706
- with open(instances,'r') as f:
1707
- instances = json.load(f)
1708
- else:
1709
- raise ValueError('Could not find instances file/folder {}'.format(
1710
- instances))
1711
- assert isinstance(instances,dict)
1712
- assert 'instances' in instances
1713
- instances = instances['instances']
1714
- if verbose:
1715
- print('Expected results for {} files'.format(len(instances)))
1716
- assert len(instances) == len(predictions), \
1717
- '{} instances expected, {} found'.format(
1718
- len(instances),len(predictions))
1719
-
1720
- expected_files = set([instance['filepath'] for instance in instances])
1721
- found_files = set([prediction['filepath'] for prediction in predictions])
1722
- assert expected_files == found_files
1723
-
1724
- # ...if a list of instances was supplied
1725
-
1726
- return d
1727
-
1728
- # ...def validate_predictions_file(...)
1729
-
1730
-
1731
- def find_geofence_adjustments(ensemble_json_file,use_latin_names=False):
1732
- """
1733
- Count the number of instances of each unique change made by the geofence.
1734
-
1735
- Args:
1736
- ensemble_json_file (str): SpeciesNet-formatted .json file produced
1737
- by the full ensemble.
1738
- use_latin_names (bool, optional): return a mapping using binomial names
1739
- rather than common names.
1740
-
1741
- Returns:
1742
- dict: maps strings that look like "puma,felidae family" to integers,
1743
- where that entry would indicate the number of times that "puma" was
1744
- predicted, but mapped to family level by the geofence. Sorted in
1745
- descending order by count.
1746
- """
1747
-
1748
- # Load and validate ensemble results
1749
- ensemble_results = validate_predictions_file(ensemble_json_file)
1750
-
1751
- assert isinstance(ensemble_results,dict)
1752
- predictions = ensemble_results['predictions']
1753
-
1754
- # Maps comma-separated pairs of common names (or binomial names) to
1755
- # the number of times that transition (first --> second) happened
1756
- rollup_pair_to_count = defaultdict(int)
1757
-
1758
- # prediction = predictions[0]
1759
- for prediction in tqdm(predictions):
1760
-
1761
- if 'failures' in prediction and \
1762
- prediction['failures'] is not None and \
1763
- len(prediction['failures']) > 0:
1764
- continue
1765
-
1766
- assert 'prediction_source' in prediction, \
1767
- 'Prediction present without [prediction_source] field, are you sure this ' + \
1768
- 'is an ensemble output file?'
1769
-
1770
- if 'geofence' in prediction['prediction_source']:
1771
-
1772
- classification_taxonomy_string = \
1773
- prediction['classifications']['classes'][0]
1774
- prediction_taxonomy_string = prediction['prediction']
1775
- assert is_valid_prediction_string(classification_taxonomy_string)
1776
- assert is_valid_prediction_string(prediction_taxonomy_string)
1777
-
1778
- # Typical examples:
1779
- # '86f5b978-4f30-40cc-bd08-be9e3fba27a0;mammalia;rodentia;sciuridae;sciurus;carolinensis;eastern gray squirrel'
1780
- # 'e4d1e892-0e4b-475a-a8ac-b5c3502e0d55;mammalia;rodentia;sciuridae;;;sciuridae family'
1781
- classification_common_name = classification_taxonomy_string.split(';')[-1]
1782
- prediction_common_name = prediction_taxonomy_string.split(';')[-1]
1783
- classification_binomial_name = classification_taxonomy_string.split(';')[-2]
1784
- prediction_binomial_name = prediction_taxonomy_string.split(';')[-2]
1785
-
1786
- input_name = classification_binomial_name if use_latin_names else \
1787
- classification_common_name
1788
- output_name = prediction_binomial_name if use_latin_names else \
1789
- prediction_common_name
1790
-
1791
- rollup_pair = input_name.strip() + ',' + output_name.strip()
1792
- rollup_pair_to_count[rollup_pair] += 1
1793
-
1794
- # ...if we made a geofencing change
1795
-
1796
- # ...for each prediction
1797
-
1798
- rollup_pair_to_count = sort_dictionary_by_value(rollup_pair_to_count,reverse=True)
1799
-
1800
- return rollup_pair_to_count
1801
-
1802
- # ...def find_geofence_adjustments(...)
1803
-
1804
-
1805
- def generate_geofence_adjustment_html_summary(rollup_pair_to_count,min_count=10):
1806
- """
1807
- Given a list of geofence rollups, likely generated by find_geofence_adjustments,
1808
- generate an HTML summary of the changes made by geofencing. The resulting HTML
1809
- is wrapped in <div>, but not, for example, in <html> or <body>.
1810
-
1811
- Args:
1812
- rollup_pair_to_count (dict): list of changes made by geofencing, see
1813
- find_geofence_adjustments for details
1814
- min_count (int, optional): minimum number of changes a pair needs in order
1815
- to be included in the report.
1816
- """
1817
-
1818
- geofence_footer = ''
1819
-
1820
- # Restrict to the list of taxa that were impacted by geofencing
1821
- rollup_pair_to_count = \
1822
- {key: value for key, value in rollup_pair_to_count.items() if value >= min_count}
1823
-
1824
- # rollup_pair_to_count is sorted in descending order by count
1825
- assert is_list_sorted(list(rollup_pair_to_count.values()),reverse=True)
1826
-
1827
- if len(rollup_pair_to_count) > 0:
1828
-
1829
- geofence_footer = \
1830
- '<h3>Geofence changes that occurred more than {} times</h3>\n'.format(min_count)
1831
- geofence_footer += '<div class="contentdiv">\n'
1832
-
1833
- print('\nRollup changes with count > {}:'.format(min_count))
1834
- for rollup_pair in rollup_pair_to_count.keys():
1835
- count = rollup_pair_to_count[rollup_pair]
1836
- rollup_pair_s = rollup_pair.replace(',',' --> ')
1837
- print('{}: {}'.format(rollup_pair_s,count))
1838
- rollup_pair_html = rollup_pair.replace(',',' &rarr; ')
1839
- geofence_footer += '{} ({})<br/>\n'.format(rollup_pair_html,count)
1840
-
1841
- geofence_footer += '</div>\n'
1842
-
1843
- return geofence_footer
1844
-
1845
- # ...def generate_geofence_adjustment_html_summary(...)
1846
-
1847
-
1848
- #%% Module-level globals related to taxonomy mapping and geofencing
1849
-
1850
- # This maps a taxonomy string (e.g. mammalia;cetartiodactyla;cervidae;odocoileus;virginianus) to
1851
- # a dict with keys taxon_id, common_name, kingdom, phylum, class, order, family, genus, species
1852
- taxonomy_string_to_taxonomy_info = None
1853
-
1854
- # Maps a binomial name (one, two, or three ws-delimited tokens) to the same dict described above.
1855
- binomial_name_to_taxonomy_info = None
1856
-
1857
- # Maps a common name to the same dict described above
1858
- common_name_to_taxonomy_info = None
1859
-
1860
- # Dict mapping 5-token semicolon-delimited taxonomy strings to geofencing rules
1861
- taxonomy_string_to_geofencing_rules = None
1862
-
1863
- # Maps lower-case country names to upper-case country codes
1864
- country_to_country_code = None
1865
-
1866
- # Maps upper-case country codes to lower-case country names
1867
- country_code_to_country = None
1868
-
1869
-
1870
- #%% Functions related to geofencing and taxonomy mapping
1871
-
1872
- def taxonomy_info_to_taxonomy_string(taxonomy_info, include_taxon_id_and_common_name=False):
1873
- """
1874
- Convert a taxonomy record in dict format to a five- or seven-token semicolon-delimited string
1875
-
1876
- Args:
1877
- taxonomy_info (dict): dict in the format stored in, e.g., taxonomy_string_to_taxonomy_info
1878
- include_taxon_id_and_common_name (bool, optional): by default, this function returns a
1879
- five-token string of latin names; if this argument is True, it includes the leading
1880
- (GUID) and trailing (common name) tokens
1881
-
1882
- Returns:
1883
- str: string in the format used as keys in, e.g., taxonomy_string_to_taxonomy_info
1884
- """
1885
- s = taxonomy_info['class'] + ';' + \
1886
- taxonomy_info['order'] + ';' + \
1887
- taxonomy_info['family'] + ';' + \
1888
- taxonomy_info['genus'] + ';' + \
1889
- taxonomy_info['species']
1890
-
1891
- if include_taxon_id_and_common_name:
1892
- s = taxonomy_info['taxon_id'] + ';' + s + ';' + taxonomy_info['common_name']
1893
-
1894
- return s
1895
-
1896
-
1897
- def initialize_taxonomy_info(taxonomy_file,force_init=False,encoding='cp1252'):
1898
- """
1899
- Load WI taxonomy information from a .json file. Stores information in the global
1900
- dicts [taxonomy_string_to_taxonomy_info], [binomial_name_to_taxonomy_info], and
1901
- [common_name_to_taxonomy_info].
1902
-
1903
- Args:
1904
- taxonomy_file (str): .json file containing mappings from the short taxonomy strings
1905
- to the longer strings with GUID and common name, see example below.
1906
- force_init (bool, optional): if the output dicts already exist, should we
1907
- re-initialize anyway?
1908
- encoding (str, optional): character encoding to use when opening the .json file
1909
- """
1910
-
1911
- if encoding is None:
1912
- encoding = 'cp1252'
1913
-
1914
- global taxonomy_string_to_taxonomy_info
1915
- global binomial_name_to_taxonomy_info
1916
- global common_name_to_taxonomy_info
1917
-
1918
- if (taxonomy_string_to_taxonomy_info is not None) and (not force_init):
1919
- return
1920
-
1921
- """
1922
- Taxonomy keys are taxonomy strings, e.g.:
1923
-
1924
- 'mammalia;cetartiodactyla;cervidae;odocoileus;virginianus'
1925
-
1926
- Taxonomy values are extended strings w/Taxon IDs and common names, e.g.:
1927
-
1928
- '5c7ce479-8a45-40b3-ae21-7c97dfae22f5;mammalia;cetartiodactyla;cervidae;odocoileus;virginianus;white-tailed deer'
1929
- """
1930
-
1931
- with open(taxonomy_file,encoding=encoding,errors='ignore') as f:
1932
- taxonomy_table = json.load(f,strict=False)
1933
-
1934
- # Right now I'm punting on some unusual-character issues, but here is some scrap that
1935
- # might help address this in the future
1936
- if False:
1937
- import codecs
1938
- with codecs.open(taxonomy_file,'r',encoding=encoding,errors='ignore') as f:
1939
- s = f.read()
1940
- import unicodedata
1941
- s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
1942
- taxonomy_table = json.loads(s,strict=False)
1943
-
1944
- taxonomy_string_to_taxonomy_info = {}
1945
- binomial_name_to_taxonomy_info = {}
1946
- common_name_to_taxonomy_info = {}
1947
-
1948
- # taxonomy_string = next(iter(taxonomy_table.keys()))
1949
- for taxonomy_string in taxonomy_table.keys():
1950
-
1951
- taxonomy_string = taxonomy_string.lower()
1952
-
1953
- taxon_info = {}
1954
- extended_string = taxonomy_table[taxonomy_string]
1955
- tokens = extended_string.split(';')
1956
- assert len(tokens) == 7
1957
- taxon_info['taxon_id'] = tokens[0]
1958
- assert len(taxon_info['taxon_id']) == 36
1959
- taxon_info['kingdom'] = 'animal'
1960
- taxon_info['phylum'] = 'chordata'
1961
- taxon_info['class'] = tokens[1]
1962
- taxon_info['order'] = tokens[2]
1963
- taxon_info['family'] = tokens[3]
1964
- taxon_info['genus'] = tokens[4]
1965
- taxon_info['species'] = tokens[5]
1966
- taxon_info['common_name'] = tokens[6]
1967
-
1968
- if taxon_info['common_name'] != '':
1969
- common_name_to_taxonomy_info[taxon_info['common_name']] = taxon_info
1970
-
1971
- taxonomy_string_to_taxonomy_info[taxonomy_string] = taxon_info
1972
-
1973
- binomial_name = None
1974
- if len(tokens[4]) > 0 and len(tokens[5]) > 0:
1975
- # strip(), but don't remove spaces from the species name;
1976
- # subspecies are separated with a space, e.g. canis;lupus dingo
1977
- binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
1978
- elif len(tokens[4]) > 0:
1979
- binomial_name = tokens[4].strip()
1980
- elif len(tokens[3]) > 0:
1981
- binomial_name = tokens[3].strip()
1982
- elif len(tokens[2]) > 0:
1983
- binomial_name = tokens[2].strip()
1984
- elif len(tokens[1]) > 0:
1985
- binomial_name = tokens[1].strip()
1986
- if binomial_name is None:
1987
- # print('Warning: no binomial name for {}'.format(taxonomy_string))
1988
- pass
1989
- else:
1990
- binomial_name_to_taxonomy_info[binomial_name] = taxon_info
1991
-
1992
- print('Created {} records in taxonomy_string_to_taxonomy_info'.format(len(taxonomy_string_to_taxonomy_info)))
1993
- print('Created {} records in common_name_to_taxonomy_info'.format(len(common_name_to_taxonomy_info)))
1994
-
1995
- # ...def initialize_taxonomy_info(...)
1996
-
1997
-
1998
- def _parse_code_list(codes):
1999
- """
2000
- Turn a list of country or state codes in string, delimited string, or list format
2001
- into a list. Also does basic validity checking.
2002
- """
2003
-
2004
- if not isinstance(codes,list):
2005
-
2006
- assert isinstance(codes,str)
2007
-
2008
- codes = codes.strip()
2009
-
2010
- # This is just a single codes
2011
- if ',' not in codes:
2012
- codes = [codes]
2013
- else:
2014
- codes = codes.split(',')
2015
- codes = [c.strip() for c in codes]
2016
-
2017
- assert isinstance(codes,list)
2018
-
2019
- codes = [c.upper().strip() for c in codes]
2020
-
2021
- for c in codes:
2022
- assert len(c) in (2,3)
2023
-
2024
- return codes
2025
-
2026
-
2027
- def _generate_csv_rows_to_block_all_countries_except(
2028
- species_string,
2029
- block_except_list):
2030
- """
2031
- Generate rows in the format expected by geofence_fixes.csv, representing a list of
2032
- allow and block rules to block all countries currently allowed for this species
2033
- except [allow_countries], and add allow rules these countries.
2034
- """
2035
-
2036
- assert is_valid_taxonomy_string(species_string), \
2037
- '{} is not a valid taxonomy string'.format(species_string)
2038
-
2039
- global taxonomy_string_to_taxonomy_info
2040
- global binomial_name_to_taxonomy_info
2041
- global common_name_to_taxonomy_info
2042
-
2043
- assert taxonomy_string_to_geofencing_rules is not None, \
2044
- 'Initialize geofencing prior to species lookup'
2045
- assert taxonomy_string_to_taxonomy_info is not None, \
2046
- 'Initialize taxonomy lookup prior to species lookup'
2047
-
2048
- geofencing_rules_this_species = \
2049
- taxonomy_string_to_geofencing_rules[species_string]
2050
-
2051
- allowed_countries = []
2052
- if 'allow' in geofencing_rules_this_species:
2053
- allowed_countries.extend(geofencing_rules_this_species['allow'])
2054
-
2055
- blocked_countries = []
2056
- if 'block' in geofencing_rules_this_species:
2057
- blocked_countries.extend(geofencing_rules_this_species['block'])
2058
-
2059
- block_except_list = _parse_code_list(block_except_list)
2060
-
2061
- countries_to_block = []
2062
- countries_to_allow = []
2063
-
2064
- # country = allowed_countries[0]
2065
- for country in allowed_countries:
2066
- if country not in block_except_list and country not in blocked_countries:
2067
- countries_to_block.append(country)
2068
-
2069
- for country in block_except_list:
2070
- if country in blocked_countries:
2071
- raise ValueError("I can't allow a country that has already been blocked")
2072
- if country not in allowed_countries:
2073
- countries_to_allow.append(country)
2074
-
2075
- rows = generate_csv_rows_for_species(species_string,
2076
- allow_countries=countries_to_allow,
2077
- block_countries=countries_to_block)
2078
-
2079
- return rows
2080
-
2081
- # ...def _generate_csv_rows_to_block_all_countries_except(...)
2082
-
2083
-
2084
- def generate_csv_rows_for_species(species_string,
2085
- allow_countries=None,
2086
- block_countries=None,
2087
- allow_states=None,
2088
- block_states=None):
2089
- """
2090
- Generate rows in the format expected by geofence_fixes.csv, representing a list of
2091
- allow and/or block rules for the specified species and countries/states. Does not check
2092
- that the rules make sense; e.g. nothing will stop you in this function from both allowing
2093
- and blocking a country.
2094
-
2095
- Args:
2096
- species_string (str): five-token string in semicolon-delimited WI taxonomy format
2097
- allow_countries (list or str, optional): three-letter country codes, list of
2098
- country codes, or comma-separated list of country codes to allow
2099
- block_countries (list or str, optional): three-letter country codes, list of
2100
- country codes, or comma-separated list of country codes to block
2101
- allow_states (list or str, optional): two-letter state codes, list of
2102
- state codes, or comma-separated list of state codes to allow
2103
- block_states (list or str, optional): two-letter state code, list of
2104
- state codes, or comma-separated list of state codes to block
2105
-
2106
- Returns:
2107
- list of str: lines ready to be pasted into geofence_fixes.csv
2108
- """
2109
-
2110
- assert is_valid_taxonomy_string(species_string), \
2111
- '{} is not a valid taxonomy string'.format(species_string)
2112
-
2113
- lines = []
2114
-
2115
- if allow_countries is not None:
2116
- allow_countries = _parse_code_list(allow_countries)
2117
- for country in allow_countries:
2118
- lines.append(species_string + ',allow,' + country + ',')
2119
-
2120
- if block_countries is not None:
2121
- block_countries = _parse_code_list(block_countries)
2122
- for country in block_countries:
2123
- lines.append(species_string + ',block,' + country + ',')
2124
-
2125
- if allow_states is not None:
2126
- allow_states = _parse_code_list(allow_states)
2127
- for state in allow_states:
2128
- lines.append(species_string + ',allow,USA,' + state)
2129
-
2130
- if block_states is not None:
2131
- block_states = _parse_code_list(block_states)
2132
- for state in block_states:
2133
- lines.append(species_string + ',block,USA,' + state)
2134
-
2135
- return lines
2136
-
2137
- # ...def generate_csv_rows_for_species(...)
2138
-
2139
-
2140
- def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
2141
- """
2142
- Load geofencing information from a .json file, and country code mappings from
2143
- a .csv file. Stores results in the global tables [taxonomy_string_to_geofencing_rules],
2144
- [country_to_country_code], and [country_code_to_country].
2145
-
2146
- Args:
2147
- geofencing_file (str): .json file with geofencing rules
2148
- country_code_file (str): .csv file with country code mappings, in columns
2149
- called "name" and "alpha-3", e.g. from
2150
- https://github.com/lukes/ISO-3166-Countries-with-Regional-Codes/blob/master/all/all.csv
2151
- force_init (bool, optional): if the output dicts already exist, should we
2152
- re-initialize anyway?
2153
- """
2154
-
2155
- global taxonomy_string_to_geofencing_rules
2156
- global country_to_country_code
2157
- global country_code_to_country
2158
-
2159
- if (country_to_country_code is not None) and \
2160
- (country_code_to_country is not None) and \
2161
- (taxonomy_string_to_geofencing_rules is not None) and \
2162
- (not force_init):
2163
- return
2164
-
2165
- # Read country code information
2166
- country_code_df = pd.read_csv(country_code_file)
2167
- country_to_country_code = {}
2168
- country_code_to_country = {}
2169
- for i_row,row in country_code_df.iterrows():
2170
- country_to_country_code[row['name'].lower()] = row['alpha-3'].upper()
2171
- country_code_to_country[row['alpha-3'].upper()] = row['name'].lower()
2172
-
2173
- # Read geofencing information
2174
- with open(geofencing_file,'r',encoding='utf-8') as f:
2175
- taxonomy_string_to_geofencing_rules = json.load(f)
2176
-
2177
- """
2178
- Geofencing keys are taxonomy strings, e.g.:
2179
-
2180
- 'mammalia;cetartiodactyla;cervidae;odocoileus;virginianus'
2181
-
2182
- Geofencing values are tables mapping allow/block to country codes, optionally including region/state codes, e.g.:
2183
-
2184
- {'allow': {
2185
- 'ALA': [],
2186
- 'ARG': [],
2187
- ...
2188
- 'SUR': [],
2189
- 'TTO': [],
2190
- 'USA': ['AL',
2191
- 'AR',
2192
- 'AZ',
2193
- ...
2194
- }
2195
- """
2196
-
2197
- # Validate
2198
-
2199
- # species_string = next(iter(taxonomy_string_to_geofencing_rules.keys()))
2200
- for species_string in taxonomy_string_to_geofencing_rules.keys():
2201
-
2202
- species_rules = taxonomy_string_to_geofencing_rules[species_string]
2203
-
2204
- if len(species_rules.keys()) > 1:
2205
- print('Warning: taxon {} has both allow and block rules'.format(species_string))
2206
-
2207
- for rule_type in species_rules.keys():
2208
-
2209
- assert rule_type in ('allow','block')
2210
- all_country_rules_this_species = species_rules[rule_type]
2211
-
2212
- for country_code in all_country_rules_this_species.keys():
2213
- assert country_code in country_code_to_country
2214
- region_rules = all_country_rules_this_species[country_code]
2215
- # Right now we only have regional rules for the USA; these may be part of
2216
- # allow or block rules.
2217
- if len(region_rules) > 0:
2218
- assert country_code == 'USA'
2219
-
2220
- # ...for each species
2221
-
2222
- # ...def initialize_geofencing(...)
2223
-
2224
-
2225
- def _species_string_to_canonical_species_string(species):
2226
- """
2227
- Convert a string that may be a 5-token species string, a binomial name,
2228
- or a common name into a 5-token species string, using taxonomic lookup.
2229
- """
2230
-
2231
- global taxonomy_string_to_taxonomy_info
2232
- global binomial_name_to_taxonomy_info
2233
- global common_name_to_taxonomy_info
2234
-
2235
- assert taxonomy_string_to_geofencing_rules is not None, \
2236
- 'Initialize geofencing prior to species lookup'
2237
- assert taxonomy_string_to_taxonomy_info is not None, \
2238
- 'Initialize taxonomy lookup prior to species lookup'
2239
-
2240
- species = species.lower()
2241
-
2242
- # Turn "species" into a taxonomy string
2243
-
2244
- # If this is already a taxonomy string...
2245
- if len(species.split(';')) == 5:
2246
- taxonomy_string = species
2247
- # If this is a common name...
2248
- elif species in common_name_to_taxonomy_info:
2249
- taxonomy_info = common_name_to_taxonomy_info[species]
2250
- taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
2251
- # If this is a binomial name...
2252
- elif (species in binomial_name_to_taxonomy_info):
2253
- taxonomy_info = binomial_name_to_taxonomy_info[species]
2254
- taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
2255
- else:
2256
- raise ValueError('Could not find taxonomic information for {}'.format(species))
2257
-
2258
- return taxonomy_string
2259
-
2260
-
2261
- def species_allowed_in_country(species,country,state=None,return_status=False):
2262
- """
2263
- Determines whether [species] is allowed in [country], according to
2264
- already-initialized geofencing rules.
2265
-
2266
- Args:
2267
- species (str): can be a common name, a binomial name, or a species string
2268
- country (str): country name or three-letter code
2269
- state (str, optional): two-letter US state code
2270
- return_status (bool, optional): by default, this function returns a bool;
2271
- if you want to know *why* [species] is allowed/not allowed, settings
2272
- return_status to True will return additional information.
2273
-
2274
- Returns:
2275
- bool or str: typically returns True if [species] is allowed in [country], else
2276
- False. Returns a more detailed string if return_status is set.
2277
- """
2278
-
2279
- global taxonomy_string_to_taxonomy_info
2280
- global binomial_name_to_taxonomy_info
2281
- global common_name_to_taxonomy_info
2282
-
2283
- assert taxonomy_string_to_geofencing_rules is not None, \
2284
- 'Initialize geofencing prior to species lookup'
2285
- assert taxonomy_string_to_taxonomy_info is not None, \
2286
- 'Initialize taxonomy lookup prior to species lookup'
2287
-
2288
- taxonomy_string = _species_string_to_canonical_species_string(species)
2289
-
2290
- # Normalize [state]
2291
-
2292
- if state is not None:
2293
- state = state.upper()
2294
- assert len(state) == 2
2295
-
2296
- # Turn "country" into a country code
2297
-
2298
- if len(country) == 3:
2299
- assert country.upper() in country_code_to_country
2300
- country = country.upper()
2301
- else:
2302
- assert country.lower() in country_to_country_code
2303
- country = country_to_country_code[country.lower()]
2304
-
2305
- country_code = country.upper()
2306
-
2307
- # Species with no rules are allowed everywhere
2308
- if taxonomy_string not in taxonomy_string_to_geofencing_rules:
2309
- status = 'allow_by_default'
2310
- if return_status:
2311
- return status
2312
- else:
2313
- return True
2314
-
2315
- geofencing_rules_this_species = taxonomy_string_to_geofencing_rules[taxonomy_string]
2316
- allowed_countries = []
2317
- blocked_countries = []
2318
-
2319
- rule_types_this_species = list(geofencing_rules_this_species.keys())
2320
- for rule_type in rule_types_this_species:
2321
- assert rule_type in ('allow','block')
2322
-
2323
- if 'block' in rule_types_this_species:
2324
- blocked_countries = list(geofencing_rules_this_species['block'])
2325
- if 'allow' in rule_types_this_species:
2326
- allowed_countries = list(geofencing_rules_this_species['allow'])
2327
-
2328
- status = None
2329
-
2330
- # The convention is that block rules win over allow rules
2331
- if country_code in blocked_countries:
2332
- if country_code in allowed_countries:
2333
- status = 'blocked_over_allow'
2334
- else:
2335
- status = 'blocked'
2336
- elif country_code in allowed_countries:
2337
- status = 'allowed'
2338
- elif len(allowed_countries) > 0:
2339
- # The convention is that if allow rules exist, any country not on that list
2340
- # is blocked.
2341
- status = 'block_not_on_country_allow_list'
2342
- else:
2343
- # Only block rules exist for this species, and they don't include this country
2344
- assert len(blocked_countries) > 0
2345
- status = 'allow_not_on_block_list'
2346
-
2347
- # Now let's see whether we have to deal with any regional rules.
2348
- #
2349
- # Right now regional rules only exist for the US.
2350
- if (country_code == 'USA') and ('USA' in geofencing_rules_this_species[rule_type]):
2351
-
2352
- if state is None:
2353
-
2354
- state_list = geofencing_rules_this_species[rule_type][country_code]
2355
- if len(state_list) > 0:
2356
- assert status.startswith('allow')
2357
- status = 'allow_no_state'
2358
-
2359
- else:
2360
-
2361
- state_list = geofencing_rules_this_species[rule_type][country_code]
2362
-
2363
- if state in state_list:
2364
- # If the state is on the list, do what the list says
2365
- if rule_type == 'allow':
2366
- status = 'allow_on_state_allow_list'
2367
- else:
2368
- status = 'block_on_state_block_list'
2369
- else:
2370
- # If the state is not on the list, do the opposite of what the list says
2371
- if rule_type == 'allow':
2372
- status = 'block_not_on_state_allow_list'
2373
- else:
2374
- status = 'allow_not_on_state_block_list'
2375
-
2376
- if return_status:
2377
- return status
2378
- else:
2379
- if status.startswith('allow'):
2380
- return True
2381
- else:
2382
- assert status.startswith('block')
2383
- return False
2384
-
2385
- # ...def species_allowed_in_country(...)
2386
-
2387
-
2388
- def export_geofence_data_to_csv(csv_fn, include_common_names=True):
2389
- """
2390
- Converts the geofence .json representation into an equivalent .csv representation,
2391
- with one taxon per row and one region per column. Empty values indicate non-allowed
2392
- combinations, positive numbers indicate allowed combinations. Negative values
2393
- are reserved for specific non-allowed combinations.
2394
-
2395
- Module-global geofence data should already have been initialized with
2396
- initialize_geofencing().
2397
-
2398
- Args:
2399
- csv_fn (str): output .csv file
2400
- include_common_names (bool, optional): include a column for common names
2401
-
2402
- Returns:
2403
- dataframe: the pandas representation of the csv output file
2404
- """
2405
-
2406
- global taxonomy_string_to_geofencing_rules
2407
- global taxonomy_string_to_taxonomy_info
2408
-
2409
- all_taxa = sorted(list(taxonomy_string_to_geofencing_rules.keys()))
2410
- print('Preparing geofencing export for {} taxa'.format(len(all_taxa)))
2411
-
2412
- all_regions = set()
2413
-
2414
- # taxon = all_taxa[0]
2415
- for taxon in all_taxa:
2416
-
2417
- taxon_rules = taxonomy_string_to_geofencing_rules[taxon]
2418
- for rule_type in taxon_rules.keys():
2419
-
2420
- assert rule_type in ('allow','block')
2421
- all_country_rules_this_species = taxon_rules[rule_type]
2422
-
2423
- for country_code in all_country_rules_this_species.keys():
2424
- all_regions.add(country_code)
2425
- assert country_code in country_code_to_country
2426
- assert len(country_code) == 3
2427
- region_rules = all_country_rules_this_species[country_code]
2428
- if len(region_rules) > 0:
2429
- assert country_code == 'USA'
2430
- for region_name in region_rules:
2431
- assert len(region_name) == 2
2432
- assert isinstance(region_name,str)
2433
- all_regions.add(country_code + ':' + region_name)
2434
-
2435
- all_regions = sorted(list(all_regions))
2436
-
2437
- print('Found {} regions'.format(len(all_regions)))
2438
-
2439
- n_allowed = 0
2440
- df = pd.DataFrame(index=all_taxa,columns=all_regions)
2441
- # df = df.fillna(np.nan)
2442
-
2443
- for taxon in tqdm(all_taxa):
2444
- for region in all_regions:
2445
- tokens = region.split(':')
2446
- country_code = tokens[0]
2447
- state_code = None
2448
- if len(tokens) > 1:
2449
- state_code = tokens[1]
2450
- allowed = species_allowed_in_country(species=taxon,
2451
- country=country_code,
2452
- state=state_code,
2453
- return_status=False)
2454
- if allowed:
2455
- n_allowed += 1
2456
- df.loc[taxon,region] = 1
2457
-
2458
- # ...for each region
2459
-
2460
- # ...for each taxon
2461
-
2462
- print('Allowed {} of {} combinations'.format(n_allowed,len(all_taxa)*len(all_regions)))
2463
-
2464
- # Before saving, convert columns with numeric values to integers
2465
- for col in df.columns:
2466
- # Check whether each column has any non-NaN values that could be integers
2467
- if df[col].notna().any() and pd.to_numeric(df[col], errors='coerce').notna().any():
2468
- # Convert column to Int64 type (pandas nullable integer type)
2469
- df[col] = pd.to_numeric(df[col], errors='coerce').astype('Int64')
2470
-
2471
- if include_common_names:
2472
- df.insert(loc=0,column='common_name',value='')
2473
- for taxon in all_taxa:
2474
- if taxon in taxonomy_string_to_taxonomy_info:
2475
- taxonomy_info = taxonomy_string_to_taxonomy_info[taxon]
2476
- common_name = taxonomy_info['common_name']
2477
- assert isinstance(common_name,str) and len(common_name) < 50
2478
- df.loc[taxon,'common_name'] = common_name
2479
-
2480
- df.to_csv(csv_fn,index=True,header=True)
2481
-
2482
- # ...def export_geofence_data_to_csv(...)
2483
-
2484
-
2485
- #%% Interactive driver(s)
2486
-
2487
- if False:
2488
-
2489
- pass
2490
-
2491
- #%% Shared cell to initialize geofencing and taxonomy information
2492
-
2493
- from megadetector.utils.wi_utils import species_allowed_in_country # noqa
2494
- from megadetector.utils.wi_utils import initialize_geofencing, initialize_taxonomy_info # noqa
2495
- from megadetector.utils.wi_utils import _species_string_to_canonical_species_string # noqa
2496
- from megadetector.utils.wi_utils import generate_csv_rows_for_species # noqa
2497
- from megadetector.utils.wi_utils import _generate_csv_rows_to_block_all_countries_except # noqa
2498
-
2499
- from megadetector.utils.wi_utils import taxonomy_string_to_geofencing_rules # noqa
2500
- from megadetector.utils.wi_utils import taxonomy_string_to_taxonomy_info # noqa
2501
- from megadetector.utils.wi_utils import common_name_to_taxonomy_info # noqa
2502
- from megadetector.utils.wi_utils import binomial_name_to_taxonomy_info # noqa
2503
- from megadetector.utils.wi_utils import country_to_country_code # noqa
2504
- from megadetector.utils.wi_utils import country_code_to_country # noqa
2505
-
2506
- model_base = os.path.expanduser('~/models/speciesnet')
2507
- geofencing_file = os.path.join(model_base,'crop','geofence_release.2025.02.27.0702.json')
2508
- country_code_file = os.path.join(model_base,'country-codes.csv')
2509
- # encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
2510
- encoding = None; taxonomy_file = os.path.join(model_base,'taxonomy_mapping.json')
2511
-
2512
- initialize_geofencing(geofencing_file, country_code_file, force_init=True)
2513
- initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
2514
-
2515
- # from megadetector.utils.path_utils import open_file; open_file(geofencing_file)
2516
-
2517
-
2518
- #%% Generate a block list
2519
-
2520
- taxon_name = 'cercopithecidae'
2521
- taxonomy_info = binomial_name_to_taxonomy_info[taxon_name]
2522
- taxonomy_string_short = taxonomy_info_to_taxonomy_string(taxonomy_info)
2523
- assert len(taxonomy_string_short.split(';')) == 5
2524
-
2525
- block_list = 'ATG,BHS,BRB,BLZ,CAN,CRI,CUB,DMA,DOM,SLV,GRD,GTM,HTI,HND,JAM,' + \
2526
- 'MEX,NIC,PAN,KNA,LCA,VCT,TTO,USA,ARG,BOL,BRA,CHL,COL,ECU,GUY,PRY,PER,' + \
2527
- 'SUR,URY,VEN,ALB,AND,ARM,AUT,AZE,BLR,BEL,BIH,BGR,HRV,CYP,CZE,DNK,EST,FIN,' + \
2528
- 'FRA,GEO,DEU,GRC,HUN,ISL,IRL,ITA,KAZ,XKX,LVA,LIE,LTU,LUX,MLT,MDA,MCO,MNE,' + \
2529
- 'NLD,MKD,NOR,POL,PRT,ROU,RUS,SMR,SRB,SVK,SVN,ESP,SWE,CHE,TUR,UKR,GBR,VAT,AUS'
2530
-
2531
- rows = generate_csv_rows_for_species(species_string=taxonomy_string_short,
2532
- allow_countries=None,
2533
- block_countries=block_list,
2534
- allow_states=None,
2535
- block_states=None)
2536
-
2537
- # import clipboard; clipboard.copy('\n'.join(rows))
2538
- print(rows)
2539
-
2540
-
2541
- #%% Look up taxonomy info for a common name
2542
-
2543
- common_name = 'domestic horse'
2544
- info = common_name_to_taxonomy_info[common_name]
2545
- s = taxonomy_info_to_taxonomy_string(info,include_taxon_id_and_common_name=True)
2546
- print(s)
2547
-
2548
-
2549
- #%% Generate a block-except list
2550
-
2551
- block_except_list = 'ALB,AND,ARM,AUT,AZE,BEL,BGR,BIH,BLR,CHE,CYP,CZE,DEU,DNK,ESP,EST,FIN,FRA,GBR,GEO,GRC,HRV,HUN,IRL,IRN,IRQ,ISL,ISR,ITA,KAZ,LIE,LTU,LUX,LVA,MDA,MKD,MLT,MNE,NLD,NOR,POL,PRT,ROU,RUS,SMR,SRB,SVK,SVN,SWE,TUR,UKR,UZB'
2552
- species = 'eurasian badger'
2553
- species_string = _species_string_to_canonical_species_string(species)
2554
- rows = _generate_csv_rows_to_block_all_countries_except(species_string,block_except_list)
2555
-
2556
- # import clipboard; clipboard.copy('\n'.join(rows))
2557
- print(rows)
2558
-
2559
-
2560
- #%% Generate an allow-list
2561
-
2562
- taxon_name = 'potoroidae'
2563
- taxonomy_info = binomial_name_to_taxonomy_info[taxon_name]
2564
- taxonomy_string_short = taxonomy_info_to_taxonomy_string(taxonomy_info)
2565
- assert len(taxonomy_string_short.split(';')) == 5
2566
-
2567
- rows = generate_csv_rows_for_species(species_string=taxonomy_string_short,
2568
- allow_countries=['AUS'],
2569
- block_countries=None,
2570
- allow_states=None,
2571
- block_states=None)
2572
-
2573
- # import clipboard; clipboard.copy('\n'.join(rows))
2574
- print(rows)
2575
-
2576
-
2577
- #%% Test the effects of geofence changes
2578
-
2579
- species = 'canis lupus dingo'
2580
- country = 'guatemala'
2581
- species_allowed_in_country(species,country,state=None,return_status=False)
2582
-
2583
-
2584
- #%% Geofencing lookups
2585
-
2586
- # This can be a latin or common name
2587
- taxon = 'potoroidae'
2588
- # print(common_name_to_taxonomy_info[taxon])
2589
-
2590
- # This can be a name or country code
2591
- country = 'AUS'
2592
- print(species_allowed_in_country(taxon, country))
2593
-
2594
-
2595
- #%% Bulk geofence lookups
2596
-
2597
- if True:
2598
-
2599
- # Make sure some Guatemalan species are allowed in Guatemala
2600
- all_species = [
2601
- 'didelphis marsupialis',
2602
- 'didelphis virginiana',
2603
- 'dasypus novemcinctus',
2604
- 'urocyon cinereoargenteus',
2605
- 'nasua narica',
2606
- 'eira barbara',
2607
- 'conepatus semistriatus',
2608
- 'leopardus wiedii',
2609
- 'leopardus pardalis',
2610
- 'puma concolor',
2611
- 'panthera onca',
2612
- 'tapirus bairdii',
2613
- 'pecari tajacu',
2614
- 'tayassu pecari',
2615
- 'mazama temama',
2616
- 'mazama pandora',
2617
- 'odocoileus virginianus',
2618
- 'dasyprocta punctata',
2619
- 'tinamus major',
2620
- 'crax rubra',
2621
- 'meleagris ocellata',
2622
- 'gulo gulo' # Consistency check; this species should be blocked
2623
- ]
2624
-
2625
- country ='guatemala'
2626
- state = None
2627
-
2628
- if True:
2629
-
2630
- # Make sure some PNW species are allowed in the right states
2631
- all_species = \
2632
- ['Taxidea taxus',
2633
- 'Martes americana',
2634
- 'Ovis canadensis',
2635
- 'Ursus americanus',
2636
- 'Lynx rufus',
2637
- 'Lynx canadensis',
2638
- 'Puma concolor',
2639
- 'Canis latrans',
2640
- 'Cervus canadensis',
2641
- 'Canis lupus',
2642
- 'Ursus arctos',
2643
- 'Marmota caligata',
2644
- 'Alces alces',
2645
- 'Oreamnos americanus',
2646
- 'Odocoileus hemionus',
2647
- 'Vulpes vulpes',
2648
- 'Lepus americanus',
2649
- 'Mephitis mephitis',
2650
- 'Odocoileus virginianus',
2651
- 'Marmota flaviventris',
2652
- 'tapirus bairdii' # Consistency check; this species should be blocked
2653
- ]
2654
-
2655
- all_species = [s.lower() for s in all_species]
2656
-
2657
- country = 'USA'
2658
- state = 'WA'
2659
- # state = 'MT'
2660
-
2661
- if True:
2662
-
2663
- all_species = ['ammospermophilus harrisii']
2664
- country = 'USA'
2665
- state = 'CA'
2666
-
2667
- for species in all_species:
2668
-
2669
- taxonomy_info = binomial_name_to_taxonomy_info[species]
2670
- allowed = species_allowed_in_country(species, country, state=state, return_status=True)
2671
- state_string = ''
2672
- if state is not None:
2673
- state_string = ' ({})'.format(state)
2674
- print('{} ({}) for {}{}: {}'.format(taxonomy_info['common_name'],species,country,state_string,allowed))