megadetector 5.0.26__py3-none-any.whl → 5.0.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/mewc_to_md.py +1 -1
- megadetector/data_management/read_exif.py +2 -0
- megadetector/detection/process_video.py +1 -1
- megadetector/detection/pytorch_detector.py +4 -4
- megadetector/detection/run_detector.py +10 -3
- megadetector/detection/run_detector_batch.py +4 -3
- megadetector/detection/run_tiled_inference.py +65 -13
- megadetector/detection/video_utils.py +2 -2
- megadetector/postprocessing/classification_postprocessing.py +517 -20
- megadetector/postprocessing/create_crop_folder.py +1 -1
- megadetector/postprocessing/generate_csv_report.py +499 -0
- megadetector/postprocessing/load_api_results.py +4 -4
- megadetector/postprocessing/postprocess_batch_results.py +6 -4
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +0 -3
- megadetector/taxonomy_mapping/taxonomy_graph.py +1 -1
- megadetector/utils/ct_utils.py +3 -2
- megadetector/utils/path_utils.py +75 -29
- megadetector/utils/split_locations_into_train_val.py +16 -3
- megadetector/utils/wi_utils.py +68 -410
- megadetector/visualization/visualization_utils.py +25 -9
- megadetector/visualization/visualize_detector_output.py +50 -28
- {megadetector-5.0.26.dist-info → megadetector-5.0.28.dist-info}/METADATA +132 -132
- {megadetector-5.0.26.dist-info → megadetector-5.0.28.dist-info}/RECORD +26 -25
- {megadetector-5.0.26.dist-info → megadetector-5.0.28.dist-info}/WHEEL +1 -1
- {megadetector-5.0.26.dist-info → megadetector-5.0.28.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.26.dist-info → megadetector-5.0.28.dist-info}/top_level.txt +0 -0
megadetector/utils/wi_utils.py
CHANGED
|
@@ -15,6 +15,8 @@ Functions related to working with the WI insights platform, specifically for:
|
|
|
15
15
|
import os
|
|
16
16
|
import requests
|
|
17
17
|
import json
|
|
18
|
+
import tempfile
|
|
19
|
+
import uuid
|
|
18
20
|
|
|
19
21
|
import numpy as np
|
|
20
22
|
import pandas as pd
|
|
@@ -26,14 +28,15 @@ from functools import partial
|
|
|
26
28
|
from tqdm import tqdm
|
|
27
29
|
|
|
28
30
|
from megadetector.utils.path_utils import insert_before_extension
|
|
31
|
+
from megadetector.utils.path_utils import find_images
|
|
32
|
+
|
|
29
33
|
from megadetector.utils.ct_utils import split_list_into_n_chunks
|
|
30
34
|
from megadetector.utils.ct_utils import round_floats_in_nested_dict
|
|
31
35
|
from megadetector.utils.ct_utils import is_list_sorted
|
|
32
36
|
from megadetector.utils.ct_utils import invert_dictionary
|
|
33
37
|
from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
|
|
34
38
|
from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
35
|
-
|
|
36
|
-
from megadetector.utils.path_utils import find_images
|
|
39
|
+
|
|
37
40
|
from megadetector.postprocessing.validate_batch_results import \
|
|
38
41
|
validate_batch_results, ValidateBatchResultsOptions
|
|
39
42
|
|
|
@@ -70,7 +73,8 @@ def is_valid_prediction_string(s):
|
|
|
70
73
|
|
|
71
74
|
def is_valid_taxonomy_string(s):
|
|
72
75
|
"""
|
|
73
|
-
Determine whether [s] is a valid 5-token WI taxonomy string.
|
|
76
|
+
Determine whether [s] is a valid 5-token WI taxonomy string. Taxonomy strings
|
|
77
|
+
look like:
|
|
74
78
|
|
|
75
79
|
'mammalia;rodentia;;;;rodent'
|
|
76
80
|
'mammalia;chordata;canidae;canis;lupus dingo'
|
|
@@ -331,7 +335,7 @@ def read_images_from_download_bundle(download_folder):
|
|
|
331
335
|
* filename (str, the filename without path at the time of upload)
|
|
332
336
|
* location (str, starting with gs://)
|
|
333
337
|
|
|
334
|
-
May also contain
|
|
338
|
+
May also contain classification fields: wi_taxon_id (str), species, etc.
|
|
335
339
|
"""
|
|
336
340
|
|
|
337
341
|
print('Reading images from {}'.format(download_folder))
|
|
@@ -1147,7 +1151,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,
|
|
|
1147
1151
|
md_results_file,
|
|
1148
1152
|
base_folder=None,
|
|
1149
1153
|
max_decimals=5,
|
|
1150
|
-
convert_human_to_person=True
|
|
1154
|
+
convert_human_to_person=True,
|
|
1155
|
+
convert_homo_species_to_human=True):
|
|
1151
1156
|
"""
|
|
1152
1157
|
Generate an MD-formatted .json file from a predictions.json file, generated by the
|
|
1153
1158
|
SpeciesNet ensemble. Typically, MD results files use relative paths, and predictions.json
|
|
@@ -1176,6 +1181,9 @@ def generate_md_results_from_predictions_json(predictions_json_file,
|
|
|
1176
1181
|
convert_human_to_person (bool, optional): WI predictions.json files sometimes use the
|
|
1177
1182
|
detection category "human"; MD files usually use "person". If True, switches "human"
|
|
1178
1183
|
to "person".
|
|
1184
|
+
convert_homo_species_to_human (bool, optional): the ensemble often rolls human predictions
|
|
1185
|
+
up to "homo species", which isn't wrong, but looks odd. This forces these back to
|
|
1186
|
+
"homo sapiens".
|
|
1179
1187
|
"""
|
|
1180
1188
|
|
|
1181
1189
|
# Read predictions file
|
|
@@ -1216,7 +1224,6 @@ def generate_md_results_from_predictions_json(predictions_json_file,
|
|
|
1216
1224
|
# im_in = predictions[0]
|
|
1217
1225
|
for im_in in predictions:
|
|
1218
1226
|
|
|
1219
|
-
# blank_prediction_string
|
|
1220
1227
|
im_out = {}
|
|
1221
1228
|
|
|
1222
1229
|
fn = im_in['filepath']
|
|
@@ -1275,8 +1282,11 @@ def generate_md_results_from_predictions_json(predictions_json_file,
|
|
|
1275
1282
|
|
|
1276
1283
|
if 'prediction' in im_in:
|
|
1277
1284
|
|
|
1285
|
+
class_to_assign = None
|
|
1278
1286
|
im_out['top_classification_common_name'] = top_classification_common_name
|
|
1279
1287
|
class_to_assign = im_in['prediction']
|
|
1288
|
+
if convert_homo_species_to_human and class_to_assign.endswith('homo species'):
|
|
1289
|
+
class_to_assign = human_prediction_string
|
|
1280
1290
|
class_confidence = im_in['prediction_score']
|
|
1281
1291
|
|
|
1282
1292
|
if class_to_assign is not None:
|
|
@@ -1605,6 +1615,50 @@ def merge_prediction_json_files(input_prediction_files,output_prediction_file):
|
|
|
1605
1615
|
# ...def merge_prediction_json_files(...)
|
|
1606
1616
|
|
|
1607
1617
|
|
|
1618
|
+
def load_md_or_speciesnet_file(fn,verbose=True):
|
|
1619
|
+
"""
|
|
1620
|
+
Load a .json file that may be in MD or SpeciesNet format. Typically used so
|
|
1621
|
+
SpeciesNet files can be supplied to functions originally written to support MD
|
|
1622
|
+
format.
|
|
1623
|
+
|
|
1624
|
+
Args:
|
|
1625
|
+
fn (str): a .json file in predictions.json (MD or SpeciesNet) format
|
|
1626
|
+
verbose (bool, optional): enable additional debug output
|
|
1627
|
+
|
|
1628
|
+
Returns:
|
|
1629
|
+
dict: the contents of [fn], in MD format.
|
|
1630
|
+
"""
|
|
1631
|
+
|
|
1632
|
+
with open(fn,'r') as f:
|
|
1633
|
+
detector_output = json.load(f)
|
|
1634
|
+
|
|
1635
|
+
# Convert to MD format if necessary
|
|
1636
|
+
if 'predictions' in detector_output:
|
|
1637
|
+
if verbose:
|
|
1638
|
+
print('This appears to be a SpeciesNet output file, converting to MD format')
|
|
1639
|
+
md_temp_dir = os.path.join(tempfile.gettempdir(), 'megadetector_temp_files')
|
|
1640
|
+
os.makedirs(md_temp_dir,exist_ok=True)
|
|
1641
|
+
temp_results_file = os.path.join(md_temp_dir,str(uuid.uuid1()) + '.json')
|
|
1642
|
+
print('Writing temporary results to {}'.format(temp_results_file))
|
|
1643
|
+
generate_md_results_from_predictions_json(predictions_json_file=fn,
|
|
1644
|
+
md_results_file=temp_results_file,
|
|
1645
|
+
base_folder=None)
|
|
1646
|
+
with open(temp_results_file,'r') as f:
|
|
1647
|
+
detector_output = json.load(f)
|
|
1648
|
+
try:
|
|
1649
|
+
os.remove(temp_results_file)
|
|
1650
|
+
except Exception:
|
|
1651
|
+
if verbose:
|
|
1652
|
+
print('Warning: error removing temporary .json {}'.format(temp_results_file))
|
|
1653
|
+
|
|
1654
|
+
assert 'images' in detector_output, \
|
|
1655
|
+
'Detector output file should be a json file with an "images" field.'
|
|
1656
|
+
|
|
1657
|
+
return detector_output
|
|
1658
|
+
|
|
1659
|
+
# ...def load_md_or_speciesnet_file(...)
|
|
1660
|
+
|
|
1661
|
+
|
|
1608
1662
|
def validate_predictions_file(fn,instances=None,verbose=True):
|
|
1609
1663
|
"""
|
|
1610
1664
|
Validate the predictions.json file [fn].
|
|
@@ -2315,402 +2369,6 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
|
|
|
2315
2369
|
# ...def species_allowed_in_country(...)
|
|
2316
2370
|
|
|
2317
2371
|
|
|
2318
|
-
def restrict_to_taxa_list(taxa_list,
|
|
2319
|
-
speciesnet_taxonomy_file,
|
|
2320
|
-
input_file,
|
|
2321
|
-
output_file,
|
|
2322
|
-
allow_walk_down=False):
|
|
2323
|
-
"""
|
|
2324
|
-
Given a prediction file in MD .json format, likely without having had
|
|
2325
|
-
a geofence applied, apply a custom taxa list.
|
|
2326
|
-
|
|
2327
|
-
Args:
|
|
2328
|
-
taxa_list (str or list): list of latin names, or a text file containing
|
|
2329
|
-
a list of latin names. Optionally may contain a second (comma-delimited)
|
|
2330
|
-
column containing common names, used only for debugging. Latin names
|
|
2331
|
-
must exist in the SpeciesNet taxonomy.
|
|
2332
|
-
taxonomy_file (str): taxonomy filename, in the same format used for model
|
|
2333
|
-
release (with 7-token taxonomy entries)
|
|
2334
|
-
output_file (str): .json file to write, in MD format
|
|
2335
|
-
allow_walk_down (bool, optional): should we walk down the taxonomy tree
|
|
2336
|
-
when making mappings if a parent has only a single allowable child?
|
|
2337
|
-
For example, if only a single felid species is allowed, should other
|
|
2338
|
-
felid predictions be mapped to that species, as opposed to being mapped
|
|
2339
|
-
to the family?
|
|
2340
|
-
"""
|
|
2341
|
-
|
|
2342
|
-
##%% Read target taxa list
|
|
2343
|
-
|
|
2344
|
-
if isinstance(taxa_list,str):
|
|
2345
|
-
assert os.path.isfile(taxa_list), \
|
|
2346
|
-
'Could not find taxa list file {}'.format(taxa_list)
|
|
2347
|
-
with open(taxa_list,'r') as f:
|
|
2348
|
-
taxa_list = f.readlines()
|
|
2349
|
-
|
|
2350
|
-
taxa_list = [s.strip().lower() for s in taxa_list]
|
|
2351
|
-
taxa_list = [s for s in taxa_list if len(s) > 0]
|
|
2352
|
-
|
|
2353
|
-
target_latin_to_common = {}
|
|
2354
|
-
for s in taxa_list:
|
|
2355
|
-
if s.strip().startswith('#'):
|
|
2356
|
-
continue
|
|
2357
|
-
tokens = s.split(',')
|
|
2358
|
-
assert len(tokens) <= 2
|
|
2359
|
-
binomial_name = tokens[0]
|
|
2360
|
-
assert len(binomial_name.split(' ')) in (1,2,3), \
|
|
2361
|
-
'Illegal binomial name in species list: {}'.format(binomial_name)
|
|
2362
|
-
if len(tokens) > 0:
|
|
2363
|
-
common_name = tokens[1].strip().lower()
|
|
2364
|
-
else:
|
|
2365
|
-
common_name = None
|
|
2366
|
-
assert binomial_name not in target_latin_to_common
|
|
2367
|
-
target_latin_to_common[binomial_name] = common_name
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
##%% Read taxonomy file
|
|
2371
|
-
|
|
2372
|
-
with open(speciesnet_taxonomy_file,'r') as f:
|
|
2373
|
-
speciesnet_taxonomy_list = f.readlines()
|
|
2374
|
-
speciesnet_taxonomy_list = [s.strip() for s in \
|
|
2375
|
-
speciesnet_taxonomy_list if len(s.strip()) > 0]
|
|
2376
|
-
|
|
2377
|
-
# Maps the latin name of every taxon to the corresponding full taxon string
|
|
2378
|
-
#
|
|
2379
|
-
# For species, the key is a binomial name
|
|
2380
|
-
speciesnet_latin_name_to_taxon_string = {}
|
|
2381
|
-
speciesnet_common_name_to_taxon_string = {}
|
|
2382
|
-
|
|
2383
|
-
def _insert_taxonomy_string(s):
|
|
2384
|
-
|
|
2385
|
-
tokens = s.split(';')
|
|
2386
|
-
assert len(tokens) == 7
|
|
2387
|
-
|
|
2388
|
-
guid = tokens[0] # noqa
|
|
2389
|
-
class_name = tokens[1]
|
|
2390
|
-
order = tokens[2]
|
|
2391
|
-
family = tokens[3]
|
|
2392
|
-
genus = tokens[4]
|
|
2393
|
-
species = tokens[5]
|
|
2394
|
-
common_name = tokens[6]
|
|
2395
|
-
|
|
2396
|
-
if len(class_name) == 0:
|
|
2397
|
-
assert common_name in ('animal','vehicle','blank')
|
|
2398
|
-
return
|
|
2399
|
-
|
|
2400
|
-
if len(species) > 0:
|
|
2401
|
-
assert all([len(s) > 0 for s in [genus,family,order]])
|
|
2402
|
-
binomial_name = genus + ' ' + species
|
|
2403
|
-
if binomial_name not in speciesnet_latin_name_to_taxon_string:
|
|
2404
|
-
speciesnet_latin_name_to_taxon_string[binomial_name] = s
|
|
2405
|
-
elif len(genus) > 0:
|
|
2406
|
-
assert all([len(s) > 0 for s in [family,order]])
|
|
2407
|
-
if genus not in speciesnet_latin_name_to_taxon_string:
|
|
2408
|
-
speciesnet_latin_name_to_taxon_string[genus] = s
|
|
2409
|
-
elif len(family) > 0:
|
|
2410
|
-
assert len(order) > 0
|
|
2411
|
-
if family not in speciesnet_latin_name_to_taxon_string:
|
|
2412
|
-
speciesnet_latin_name_to_taxon_string[family] = s
|
|
2413
|
-
elif len(order) > 0:
|
|
2414
|
-
if order not in speciesnet_latin_name_to_taxon_string:
|
|
2415
|
-
speciesnet_latin_name_to_taxon_string[order] = s
|
|
2416
|
-
else:
|
|
2417
|
-
if class_name not in speciesnet_latin_name_to_taxon_string:
|
|
2418
|
-
speciesnet_latin_name_to_taxon_string[class_name] = s
|
|
2419
|
-
|
|
2420
|
-
if len(common_name) > 0:
|
|
2421
|
-
if common_name not in speciesnet_common_name_to_taxon_string:
|
|
2422
|
-
speciesnet_common_name_to_taxon_string[common_name] = s
|
|
2423
|
-
|
|
2424
|
-
for s in speciesnet_taxonomy_list:
|
|
2425
|
-
|
|
2426
|
-
_insert_taxonomy_string(s)
|
|
2427
|
-
|
|
2428
|
-
|
|
2429
|
-
##%% Make sure all parent taxa are represented in the taxonomy
|
|
2430
|
-
|
|
2431
|
-
# In theory any taxon that appears as the parent of another taxon should
|
|
2432
|
-
# also be in the taxonomy, but this isn't always true, so we fix it here.
|
|
2433
|
-
|
|
2434
|
-
new_taxon_string_to_missing_tokens = defaultdict(list)
|
|
2435
|
-
|
|
2436
|
-
# latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
|
|
2437
|
-
for latin_name in speciesnet_latin_name_to_taxon_string.keys():
|
|
2438
|
-
|
|
2439
|
-
if 'no cv result' in latin_name:
|
|
2440
|
-
continue
|
|
2441
|
-
|
|
2442
|
-
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
2443
|
-
tokens = taxon_string.split(';')
|
|
2444
|
-
|
|
2445
|
-
# Don't process GUID, species, or common name
|
|
2446
|
-
# i_token = 6
|
|
2447
|
-
for i_token in range(1,len(tokens)-2):
|
|
2448
|
-
|
|
2449
|
-
test_token = tokens[i_token]
|
|
2450
|
-
if len(test_token) == 0:
|
|
2451
|
-
continue
|
|
2452
|
-
|
|
2453
|
-
# Do we need to make up a taxon for this token?
|
|
2454
|
-
if test_token not in speciesnet_latin_name_to_taxon_string:
|
|
2455
|
-
|
|
2456
|
-
new_tokens = [''] * 7
|
|
2457
|
-
new_tokens[0] = 'fake_guid'
|
|
2458
|
-
for i_copy_token in range(1,i_token+1):
|
|
2459
|
-
new_tokens[i_copy_token] = tokens[i_copy_token]
|
|
2460
|
-
new_tokens[-1] = test_token + ' species'
|
|
2461
|
-
assert new_tokens[-2] == ''
|
|
2462
|
-
new_taxon_string = ';'.join(new_tokens)
|
|
2463
|
-
# assert new_taxon_string not in new_taxon_strings
|
|
2464
|
-
new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
|
|
2465
|
-
|
|
2466
|
-
# ...for each token
|
|
2467
|
-
|
|
2468
|
-
# ...for each taxon
|
|
2469
|
-
|
|
2470
|
-
print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
|
|
2471
|
-
len(new_taxon_string_to_missing_tokens)))
|
|
2472
|
-
|
|
2473
|
-
new_taxon_string_to_missing_tokens = \
|
|
2474
|
-
sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
|
|
2475
|
-
for taxon_string in new_taxon_string_to_missing_tokens:
|
|
2476
|
-
missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
|
|
2477
|
-
print('{} ({})'.format(taxon_string,missing_taxa))
|
|
2478
|
-
|
|
2479
|
-
for new_taxon_string in new_taxon_string_to_missing_tokens:
|
|
2480
|
-
_insert_taxonomy_string(new_taxon_string)
|
|
2481
|
-
|
|
2482
|
-
|
|
2483
|
-
##%% Make sure all species on the allow-list are in the taxonomy
|
|
2484
|
-
|
|
2485
|
-
n_failed_mappings = 0
|
|
2486
|
-
|
|
2487
|
-
for target_taxon_latin_name in target_latin_to_common.keys():
|
|
2488
|
-
if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
|
|
2489
|
-
common_name = target_latin_to_common[target_taxon_latin_name]
|
|
2490
|
-
s = '{} ({}) not in speciesnet taxonomy'.format(
|
|
2491
|
-
target_taxon_latin_name,common_name)
|
|
2492
|
-
if common_name in speciesnet_common_name_to_taxon_string:
|
|
2493
|
-
s += ' (common name maps to {})'.format(
|
|
2494
|
-
speciesnet_common_name_to_taxon_string[common_name])
|
|
2495
|
-
print(s)
|
|
2496
|
-
n_failed_mappings += 1
|
|
2497
|
-
|
|
2498
|
-
if n_failed_mappings > 0:
|
|
2499
|
-
raise ValueError('Cannot continue with geofence generation')
|
|
2500
|
-
|
|
2501
|
-
|
|
2502
|
-
##%% For the allow-list, map each parent taxon to a set of allowable child taxa
|
|
2503
|
-
|
|
2504
|
-
# Maps parent names to all allowed child names, or None if this is the
|
|
2505
|
-
# lowest-level allowable taxon on this path
|
|
2506
|
-
allowed_parent_taxon_to_child_taxa = defaultdict(set)
|
|
2507
|
-
|
|
2508
|
-
# latin_name = next(iter(target_latin_to_common.keys()))
|
|
2509
|
-
for latin_name in target_latin_to_common:
|
|
2510
|
-
|
|
2511
|
-
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
2512
|
-
tokens = taxon_string.split(';')
|
|
2513
|
-
assert len(tokens) == 7
|
|
2514
|
-
|
|
2515
|
-
# Remove GUID and common mame
|
|
2516
|
-
#
|
|
2517
|
-
# This is now always class/order/family/genus/species
|
|
2518
|
-
tokens = tokens[1:-1]
|
|
2519
|
-
|
|
2520
|
-
child_taxon = None
|
|
2521
|
-
|
|
2522
|
-
# If this is a species
|
|
2523
|
-
if len(tokens[-1]) > 0:
|
|
2524
|
-
binomial_name = tokens[-2] + ' ' + tokens[-1]
|
|
2525
|
-
assert binomial_name == latin_name
|
|
2526
|
-
allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
|
|
2527
|
-
child_taxon = binomial_name
|
|
2528
|
-
|
|
2529
|
-
# The first candidate parent is the genus
|
|
2530
|
-
parent_token_index = len(tokens) - 2
|
|
2531
|
-
|
|
2532
|
-
while(parent_token_index >= 0):
|
|
2533
|
-
|
|
2534
|
-
parent_taxon = tokens[parent_token_index]
|
|
2535
|
-
allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
|
|
2536
|
-
child_taxon = parent_taxon
|
|
2537
|
-
parent_token_index -= 1
|
|
2538
|
-
|
|
2539
|
-
# ...for each allowed latin name
|
|
2540
|
-
|
|
2541
|
-
allowed_parent_taxon_to_child_taxa = \
|
|
2542
|
-
sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
|
|
2543
|
-
|
|
2544
|
-
|
|
2545
|
-
##%% Map all predictions that exist in this dataset...
|
|
2546
|
-
|
|
2547
|
-
# ...to the prediction we should generate.
|
|
2548
|
-
|
|
2549
|
-
with open(input_file,'r') as f:
|
|
2550
|
-
input_data = json.load(f)
|
|
2551
|
-
|
|
2552
|
-
input_category_id_to_common_name = input_data['classification_categories'] #noqa
|
|
2553
|
-
input_category_id_to_taxonomy_string = \
|
|
2554
|
-
input_data['classification_category_descriptions']
|
|
2555
|
-
|
|
2556
|
-
input_category_id_to_output_taxon_string = {}
|
|
2557
|
-
|
|
2558
|
-
# input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
|
|
2559
|
-
for input_category_id in input_category_id_to_taxonomy_string.keys():
|
|
2560
|
-
|
|
2561
|
-
input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
|
|
2562
|
-
input_taxon_tokens = input_taxon_string.split(';')
|
|
2563
|
-
assert len(input_taxon_tokens) == 7
|
|
2564
|
-
|
|
2565
|
-
# Don't mess with blank/no-cv-result/animal/human
|
|
2566
|
-
if (input_taxon_string in non_taxonomic_prediction_strings) or \
|
|
2567
|
-
(input_taxon_string == human_prediction_string):
|
|
2568
|
-
input_category_id_to_output_taxon_string[input_category_id] = \
|
|
2569
|
-
input_taxon_string
|
|
2570
|
-
continue
|
|
2571
|
-
|
|
2572
|
-
# Remove GUID and common mame
|
|
2573
|
-
#
|
|
2574
|
-
# This is now always class/order/family/genus/species
|
|
2575
|
-
input_taxon_tokens = input_taxon_tokens[1:-1]
|
|
2576
|
-
|
|
2577
|
-
test_index = len(input_taxon_tokens) - 1
|
|
2578
|
-
target_taxon = None
|
|
2579
|
-
|
|
2580
|
-
# Start at the species level, and see whether each taxon is allowed
|
|
2581
|
-
while((test_index >= 0) and (target_taxon is None)):
|
|
2582
|
-
|
|
2583
|
-
# Species are represented as binomial names
|
|
2584
|
-
if (test_index == (len(input_taxon_tokens) - 1)) and \
|
|
2585
|
-
(len(input_taxon_tokens[-1]) > 0):
|
|
2586
|
-
test_taxon_name = \
|
|
2587
|
-
input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
|
|
2588
|
-
else:
|
|
2589
|
-
test_taxon_name = input_taxon_tokens[test_index]
|
|
2590
|
-
|
|
2591
|
-
# If we haven't yet found the level at which this taxon is non-empty,
|
|
2592
|
-
# keep going up
|
|
2593
|
-
if len(test_taxon_name) == 0:
|
|
2594
|
-
test_index -= 1
|
|
2595
|
-
continue
|
|
2596
|
-
|
|
2597
|
-
assert test_taxon_name in speciesnet_latin_name_to_taxon_string
|
|
2598
|
-
|
|
2599
|
-
# Is this taxon allowed according to the custom species list?
|
|
2600
|
-
if test_taxon_name in allowed_parent_taxon_to_child_taxa:
|
|
2601
|
-
|
|
2602
|
-
allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
|
|
2603
|
-
assert allowed_child_taxa is not None
|
|
2604
|
-
|
|
2605
|
-
# If this is the lowest-level allowable token or there is not a
|
|
2606
|
-
# unique child, don't walk any further, even if walking down
|
|
2607
|
-
# is enabled.
|
|
2608
|
-
if (None in allowed_child_taxa):
|
|
2609
|
-
assert len(allowed_child_taxa) == 1
|
|
2610
|
-
|
|
2611
|
-
if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
|
|
2612
|
-
target_taxon = test_taxon_name
|
|
2613
|
-
elif not allow_walk_down:
|
|
2614
|
-
target_taxon = test_taxon_name
|
|
2615
|
-
else:
|
|
2616
|
-
# If there's a unique child, walk back *down* the allowable
|
|
2617
|
-
# taxa until we run out of unique children
|
|
2618
|
-
while ((next(iter(allowed_child_taxa)) is not None) and \
|
|
2619
|
-
(len(allowed_child_taxa) == 1)):
|
|
2620
|
-
candidate_taxon = next(iter(allowed_child_taxa))
|
|
2621
|
-
assert candidate_taxon in allowed_parent_taxon_to_child_taxa
|
|
2622
|
-
assert candidate_taxon in speciesnet_latin_name_to_taxon_string
|
|
2623
|
-
allowed_child_taxa = \
|
|
2624
|
-
allowed_parent_taxon_to_child_taxa[candidate_taxon]
|
|
2625
|
-
target_taxon = candidate_taxon
|
|
2626
|
-
|
|
2627
|
-
# ...if this is an allowed taxon
|
|
2628
|
-
|
|
2629
|
-
test_index -= 1
|
|
2630
|
-
|
|
2631
|
-
# ...for each token
|
|
2632
|
-
|
|
2633
|
-
if target_taxon is None:
|
|
2634
|
-
output_taxon_string = animal_prediction_string
|
|
2635
|
-
else:
|
|
2636
|
-
output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
|
|
2637
|
-
input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
|
|
2638
|
-
|
|
2639
|
-
# ...for each category
|
|
2640
|
-
|
|
2641
|
-
|
|
2642
|
-
##%% Build the new tables
|
|
2643
|
-
|
|
2644
|
-
input_category_id_to_output_category_id = {}
|
|
2645
|
-
output_taxon_string_to_category_id = {}
|
|
2646
|
-
output_category_id_to_common_name = {}
|
|
2647
|
-
|
|
2648
|
-
for input_category_id in input_category_id_to_output_taxon_string:
|
|
2649
|
-
|
|
2650
|
-
original_common_name = \
|
|
2651
|
-
input_category_id_to_common_name[input_category_id]
|
|
2652
|
-
original_taxon_string = \
|
|
2653
|
-
input_category_id_to_taxonomy_string[input_category_id]
|
|
2654
|
-
output_taxon_string = \
|
|
2655
|
-
input_category_id_to_output_taxon_string[input_category_id]
|
|
2656
|
-
|
|
2657
|
-
output_common_name = output_taxon_string.split(';')[-1]
|
|
2658
|
-
|
|
2659
|
-
# Do we need to create a new output category?
|
|
2660
|
-
if output_taxon_string not in output_taxon_string_to_category_id:
|
|
2661
|
-
output_category_id = str(len(output_taxon_string_to_category_id))
|
|
2662
|
-
output_taxon_string_to_category_id[output_taxon_string] = \
|
|
2663
|
-
output_category_id
|
|
2664
|
-
output_category_id_to_common_name[output_category_id] = \
|
|
2665
|
-
output_common_name
|
|
2666
|
-
else:
|
|
2667
|
-
output_category_id = \
|
|
2668
|
-
output_taxon_string_to_category_id[output_taxon_string]
|
|
2669
|
-
|
|
2670
|
-
input_category_id_to_output_category_id[input_category_id] = \
|
|
2671
|
-
output_category_id
|
|
2672
|
-
|
|
2673
|
-
if False:
|
|
2674
|
-
print('Mapping {} ({}) to:\n{} ({})\n'.format(
|
|
2675
|
-
original_common_name,original_taxon_string,
|
|
2676
|
-
output_common_name,output_taxon_string))
|
|
2677
|
-
if False:
|
|
2678
|
-
print('Mapping {} to {}'.format(
|
|
2679
|
-
original_common_name,output_common_name,))
|
|
2680
|
-
|
|
2681
|
-
# ...for each category
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
##%% Remap all category labels
|
|
2685
|
-
|
|
2686
|
-
assert len(set(output_taxon_string_to_category_id.keys())) == \
|
|
2687
|
-
len(set(output_taxon_string_to_category_id.values()))
|
|
2688
|
-
|
|
2689
|
-
output_category_id_to_taxon_string = \
|
|
2690
|
-
invert_dictionary(output_taxon_string_to_category_id)
|
|
2691
|
-
|
|
2692
|
-
with open(input_file,'r') as f:
|
|
2693
|
-
output_data = json.load(f)
|
|
2694
|
-
|
|
2695
|
-
for im in tqdm(output_data['images']):
|
|
2696
|
-
if 'detections' in im and im['detections'] is not None:
|
|
2697
|
-
for det in im['detections']:
|
|
2698
|
-
if 'classifications' in det:
|
|
2699
|
-
for classification in det['classifications']:
|
|
2700
|
-
classification[0] = \
|
|
2701
|
-
input_category_id_to_output_category_id[classification[0]]
|
|
2702
|
-
|
|
2703
|
-
output_data['classification_categories'] = output_category_id_to_common_name
|
|
2704
|
-
output_data['classification_category_descriptions'] = \
|
|
2705
|
-
output_category_id_to_taxon_string
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
##%% Write output
|
|
2709
|
-
|
|
2710
|
-
with open(output_file,'w') as f:
|
|
2711
|
-
json.dump(output_data,f,indent=1)
|
|
2712
|
-
|
|
2713
|
-
|
|
2714
2372
|
#%% Interactive driver(s)
|
|
2715
2373
|
|
|
2716
2374
|
if False:
|
|
@@ -2729,7 +2387,9 @@ if False:
|
|
|
2729
2387
|
from megadetector.utils.wi_utils import taxonomy_string_to_taxonomy_info # noqa
|
|
2730
2388
|
from megadetector.utils.wi_utils import common_name_to_taxonomy_info # noqa
|
|
2731
2389
|
from megadetector.utils.wi_utils import binomial_name_to_taxonomy_info # noqa
|
|
2732
|
-
|
|
2390
|
+
from megadetector.utils.wi_utils import country_to_country_code # noqa
|
|
2391
|
+
from megadetector.utils.wi_utils import country_code_to_country # noqa
|
|
2392
|
+
|
|
2733
2393
|
model_base = os.path.expanduser('~/models/speciesnet')
|
|
2734
2394
|
geofencing_file = os.path.join(model_base,'crop','geofence_release.2025.02.27.0702.json')
|
|
2735
2395
|
country_code_file = os.path.join(model_base,'country-codes.csv')
|
|
@@ -2740,10 +2400,10 @@ if False:
|
|
|
2740
2400
|
initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
|
|
2741
2401
|
|
|
2742
2402
|
|
|
2743
|
-
#%%
|
|
2403
|
+
#%% Generate a block-except list
|
|
2744
2404
|
|
|
2745
|
-
block_except_list = '
|
|
2746
|
-
species = '
|
|
2405
|
+
block_except_list = 'ALB,AND,ARM,AUT,AZE,BEL,BGR,BIH,BLR,CHE,CYP,CZE,DEU,DNK,ESP,EST,FIN,FRA,GBR,GEO,GRC,HRV,HUN,IRL,IRN,IRQ,ISL,ISR,ITA,KAZ,LIE,LTU,LUX,LVA,MDA,MKD,MLT,MNE,NLD,NOR,POL,PRT,ROU,RUS,SMR,SRB,SVK,SVN,SWE,TUR,UKR,UZB'
|
|
2406
|
+
species = 'eurasian badger'
|
|
2747
2407
|
species_string = _species_string_to_canonical_species_string(species)
|
|
2748
2408
|
rows = _generate_csv_rows_to_block_all_countries_except(species_string,block_except_list)
|
|
2749
2409
|
|
|
@@ -2751,7 +2411,7 @@ if False:
|
|
|
2751
2411
|
print(rows)
|
|
2752
2412
|
|
|
2753
2413
|
|
|
2754
|
-
#%%
|
|
2414
|
+
#%% Generate an allow-list
|
|
2755
2415
|
|
|
2756
2416
|
taxon_name = 'hippopotamus amphibius'
|
|
2757
2417
|
taxonomy_info = binomial_name_to_taxonomy_info[taxon_name]
|
|
@@ -2763,9 +2423,7 @@ if False:
|
|
|
2763
2423
|
block_countries=None,
|
|
2764
2424
|
allow_states=None,
|
|
2765
2425
|
block_states=None,
|
|
2766
|
-
blockexcept_countries=None)
|
|
2767
|
-
|
|
2768
|
-
# _generate_csv_rows_to_block_all_countries_except(species_string,'AUS')
|
|
2426
|
+
blockexcept_countries=None)
|
|
2769
2427
|
|
|
2770
2428
|
|
|
2771
2429
|
#%% Test the effects of geofence changes
|
|
@@ -183,7 +183,7 @@ def exif_preserving_save(pil_image,output_file,quality='keep',default_quality=85
|
|
|
183
183
|
...for more ways to preserve jpeg quality if quality='keep' doesn't do the trick.
|
|
184
184
|
|
|
185
185
|
Args:
|
|
186
|
-
pil_image (Image): the PIL Image
|
|
186
|
+
pil_image (Image): the PIL Image object to save
|
|
187
187
|
output_file (str): the destination file
|
|
188
188
|
quality (str or int, optional): can be "keep" (default), or an integer from 0 to 100.
|
|
189
189
|
This is only used if PIL thinks the the source image is a JPEG. If you load a JPEG
|
|
@@ -1285,8 +1285,14 @@ def gray_scale_fraction(image,crop_size=(0.1,0.1)):
|
|
|
1285
1285
|
|
|
1286
1286
|
|
|
1287
1287
|
def _resize_relative_image(fn_relative,
|
|
1288
|
-
input_folder,
|
|
1289
|
-
|
|
1288
|
+
input_folder,
|
|
1289
|
+
output_folder,
|
|
1290
|
+
target_width,
|
|
1291
|
+
target_height,
|
|
1292
|
+
no_enlarge_width,
|
|
1293
|
+
verbose,
|
|
1294
|
+
quality,
|
|
1295
|
+
overwrite=True):
|
|
1290
1296
|
"""
|
|
1291
1297
|
Internal function for resizing an image from one folder to another,
|
|
1292
1298
|
maintaining relative path.
|
|
@@ -1294,6 +1300,12 @@ def _resize_relative_image(fn_relative,
|
|
|
1294
1300
|
|
|
1295
1301
|
input_fn_abs = os.path.join(input_folder,fn_relative)
|
|
1296
1302
|
output_fn_abs = os.path.join(output_folder,fn_relative)
|
|
1303
|
+
|
|
1304
|
+
if (not overwrite) and (os.path.isfile(output_fn_abs)):
|
|
1305
|
+
status = 'skipped'
|
|
1306
|
+
error = None
|
|
1307
|
+
return {'fn_relative':fn_relative,'status':status,'error':error}
|
|
1308
|
+
|
|
1297
1309
|
os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
|
|
1298
1310
|
try:
|
|
1299
1311
|
_ = resize_image(input_fn_abs,
|
|
@@ -1435,7 +1447,8 @@ def resize_image_folder(input_folder,
|
|
|
1435
1447
|
pool_type='process',
|
|
1436
1448
|
n_workers=10,
|
|
1437
1449
|
recursive=True,
|
|
1438
|
-
image_files_relative=None
|
|
1450
|
+
image_files_relative=None,
|
|
1451
|
+
overwrite=True):
|
|
1439
1452
|
"""
|
|
1440
1453
|
Resize all images in a folder (defaults to recursive).
|
|
1441
1454
|
|
|
@@ -1461,12 +1474,13 @@ def resize_image_folder(input_folder,
|
|
|
1461
1474
|
to disable parallelization
|
|
1462
1475
|
recursive (bool, optional): whether to search [input_folder] recursively for images.
|
|
1463
1476
|
image_files_relative (list, optional): if not None, skips any relative paths not
|
|
1464
|
-
in this list
|
|
1477
|
+
in this list
|
|
1478
|
+
overwrite (bool, optional): whether to overwrite existing target images
|
|
1465
1479
|
|
|
1466
1480
|
Returns:
|
|
1467
1481
|
list: a list of dicts with keys 'input_fn', 'output_fn', 'status', and 'error'.
|
|
1468
|
-
'status' will be 'success' or 'error'; 'error' will be None for successful
|
|
1469
|
-
otherwise will contain the image-specific error.
|
|
1482
|
+
'status' will be 'success', 'skipped', or 'error'; 'error' will be None for successful
|
|
1483
|
+
cases, otherwise will contain the image-specific error.
|
|
1470
1484
|
"""
|
|
1471
1485
|
|
|
1472
1486
|
assert os.path.isdir(input_folder), '{} is not a folder'.format(input_folder)
|
|
@@ -1502,7 +1516,8 @@ def resize_image_folder(input_folder,
|
|
|
1502
1516
|
target_height=target_height,
|
|
1503
1517
|
no_enlarge_width=no_enlarge_width,
|
|
1504
1518
|
verbose=verbose,
|
|
1505
|
-
quality=quality
|
|
1519
|
+
quality=quality,
|
|
1520
|
+
overwrite=overwrite))
|
|
1506
1521
|
|
|
1507
1522
|
else:
|
|
1508
1523
|
|
|
@@ -1522,7 +1537,8 @@ def resize_image_folder(input_folder,
|
|
|
1522
1537
|
target_height=target_height,
|
|
1523
1538
|
no_enlarge_width=no_enlarge_width,
|
|
1524
1539
|
verbose=verbose,
|
|
1525
|
-
quality=quality
|
|
1540
|
+
quality=quality,
|
|
1541
|
+
overwrite=overwrite)
|
|
1526
1542
|
|
|
1527
1543
|
results = list(tqdm(pool.imap(p, image_files_relative),total=len(image_files_relative)))
|
|
1528
1544
|
|