megadetector 5.0.27__py3-none-any.whl → 5.0.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (26) hide show
  1. megadetector/data_management/mewc_to_md.py +1 -1
  2. megadetector/data_management/read_exif.py +2 -0
  3. megadetector/detection/process_video.py +1 -1
  4. megadetector/detection/pytorch_detector.py +4 -4
  5. megadetector/detection/run_detector.py +10 -3
  6. megadetector/detection/run_detector_batch.py +4 -3
  7. megadetector/detection/run_tiled_inference.py +65 -13
  8. megadetector/detection/video_utils.py +2 -2
  9. megadetector/postprocessing/classification_postprocessing.py +517 -20
  10. megadetector/postprocessing/create_crop_folder.py +1 -1
  11. megadetector/postprocessing/generate_csv_report.py +499 -0
  12. megadetector/postprocessing/load_api_results.py +4 -4
  13. megadetector/postprocessing/postprocess_batch_results.py +6 -4
  14. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +0 -3
  15. megadetector/taxonomy_mapping/taxonomy_graph.py +1 -1
  16. megadetector/utils/ct_utils.py +3 -2
  17. megadetector/utils/path_utils.py +75 -29
  18. megadetector/utils/split_locations_into_train_val.py +16 -3
  19. megadetector/utils/wi_utils.py +68 -410
  20. megadetector/visualization/visualization_utils.py +25 -9
  21. megadetector/visualization/visualize_detector_output.py +50 -28
  22. {megadetector-5.0.27.dist-info → megadetector-5.0.28.dist-info}/METADATA +132 -132
  23. {megadetector-5.0.27.dist-info → megadetector-5.0.28.dist-info}/RECORD +26 -25
  24. {megadetector-5.0.27.dist-info → megadetector-5.0.28.dist-info}/WHEEL +1 -1
  25. {megadetector-5.0.27.dist-info → megadetector-5.0.28.dist-info}/licenses/LICENSE +0 -0
  26. {megadetector-5.0.27.dist-info → megadetector-5.0.28.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,8 @@ Functions related to working with the WI insights platform, specifically for:
15
15
  import os
16
16
  import requests
17
17
  import json
18
+ import tempfile
19
+ import uuid
18
20
 
19
21
  import numpy as np
20
22
  import pandas as pd
@@ -26,14 +28,15 @@ from functools import partial
26
28
  from tqdm import tqdm
27
29
 
28
30
  from megadetector.utils.path_utils import insert_before_extension
31
+ from megadetector.utils.path_utils import find_images
32
+
29
33
  from megadetector.utils.ct_utils import split_list_into_n_chunks
30
34
  from megadetector.utils.ct_utils import round_floats_in_nested_dict
31
35
  from megadetector.utils.ct_utils import is_list_sorted
32
36
  from megadetector.utils.ct_utils import invert_dictionary
33
37
  from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
34
38
  from megadetector.utils.ct_utils import sort_dictionary_by_value
35
- from megadetector.utils.ct_utils import sort_dictionary_by_key
36
- from megadetector.utils.path_utils import find_images
39
+
37
40
  from megadetector.postprocessing.validate_batch_results import \
38
41
  validate_batch_results, ValidateBatchResultsOptions
39
42
 
@@ -70,7 +73,8 @@ def is_valid_prediction_string(s):
70
73
 
71
74
  def is_valid_taxonomy_string(s):
72
75
  """
73
- Determine whether [s] is a valid 5-token WI taxonomy string. Taxonmy strings look like:
76
+ Determine whether [s] is a valid 5-token WI taxonomy string. Taxonomy strings
77
+ look like:
74
78
 
75
79
  'mammalia;rodentia;;;;rodent'
76
80
  'mammalia;chordata;canidae;canis;lupus dingo'
@@ -331,7 +335,7 @@ def read_images_from_download_bundle(download_folder):
331
335
  * filename (str, the filename without path at the time of upload)
332
336
  * location (str, starting with gs://)
333
337
 
334
- May also contain clasification fields: wi_taxon_id (str), species, etc.
338
+ May also contain classification fields: wi_taxon_id (str), species, etc.
335
339
  """
336
340
 
337
341
  print('Reading images from {}'.format(download_folder))
@@ -1147,7 +1151,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,
1147
1151
  md_results_file,
1148
1152
  base_folder=None,
1149
1153
  max_decimals=5,
1150
- convert_human_to_person=True):
1154
+ convert_human_to_person=True,
1155
+ convert_homo_species_to_human=True):
1151
1156
  """
1152
1157
  Generate an MD-formatted .json file from a predictions.json file, generated by the
1153
1158
  SpeciesNet ensemble. Typically, MD results files use relative paths, and predictions.json
@@ -1176,6 +1181,9 @@ def generate_md_results_from_predictions_json(predictions_json_file,
1176
1181
  convert_human_to_person (bool, optional): WI predictions.json files sometimes use the
1177
1182
  detection category "human"; MD files usually use "person". If True, switches "human"
1178
1183
  to "person".
1184
+ convert_homo_species_to_human (bool, optional): the ensemble often rolls human predictions
1185
+ up to "homo species", which isn't wrong, but looks odd. This forces these back to
1186
+ "homo sapiens".
1179
1187
  """
1180
1188
 
1181
1189
  # Read predictions file
@@ -1216,7 +1224,6 @@ def generate_md_results_from_predictions_json(predictions_json_file,
1216
1224
  # im_in = predictions[0]
1217
1225
  for im_in in predictions:
1218
1226
 
1219
- # blank_prediction_string
1220
1227
  im_out = {}
1221
1228
 
1222
1229
  fn = im_in['filepath']
@@ -1275,8 +1282,11 @@ def generate_md_results_from_predictions_json(predictions_json_file,
1275
1282
 
1276
1283
  if 'prediction' in im_in:
1277
1284
 
1285
+ class_to_assign = None
1278
1286
  im_out['top_classification_common_name'] = top_classification_common_name
1279
1287
  class_to_assign = im_in['prediction']
1288
+ if convert_homo_species_to_human and class_to_assign.endswith('homo species'):
1289
+ class_to_assign = human_prediction_string
1280
1290
  class_confidence = im_in['prediction_score']
1281
1291
 
1282
1292
  if class_to_assign is not None:
@@ -1605,6 +1615,50 @@ def merge_prediction_json_files(input_prediction_files,output_prediction_file):
1605
1615
  # ...def merge_prediction_json_files(...)
1606
1616
 
1607
1617
 
1618
+ def load_md_or_speciesnet_file(fn,verbose=True):
1619
+ """
1620
+ Load a .json file that may be in MD or SpeciesNet format. Typically used so
1621
+ SpeciesNet files can be supplied to functions originally written to support MD
1622
+ format.
1623
+
1624
+ Args:
1625
+ fn (str): a .json file in predictions.json (MD or SpeciesNet) format
1626
+ verbose (bool, optional): enable additional debug output
1627
+
1628
+ Returns:
1629
+ dict: the contents of [fn], in MD format.
1630
+ """
1631
+
1632
+ with open(fn,'r') as f:
1633
+ detector_output = json.load(f)
1634
+
1635
+ # Convert to MD format if necessary
1636
+ if 'predictions' in detector_output:
1637
+ if verbose:
1638
+ print('This appears to be a SpeciesNet output file, converting to MD format')
1639
+ md_temp_dir = os.path.join(tempfile.gettempdir(), 'megadetector_temp_files')
1640
+ os.makedirs(md_temp_dir,exist_ok=True)
1641
+ temp_results_file = os.path.join(md_temp_dir,str(uuid.uuid1()) + '.json')
1642
+ print('Writing temporary results to {}'.format(temp_results_file))
1643
+ generate_md_results_from_predictions_json(predictions_json_file=fn,
1644
+ md_results_file=temp_results_file,
1645
+ base_folder=None)
1646
+ with open(temp_results_file,'r') as f:
1647
+ detector_output = json.load(f)
1648
+ try:
1649
+ os.remove(temp_results_file)
1650
+ except Exception:
1651
+ if verbose:
1652
+ print('Warning: error removing temporary .json {}'.format(temp_results_file))
1653
+
1654
+ assert 'images' in detector_output, \
1655
+ 'Detector output file should be a json file with an "images" field.'
1656
+
1657
+ return detector_output
1658
+
1659
+ # ...def load_md_or_speciesnet_file(...)
1660
+
1661
+
1608
1662
  def validate_predictions_file(fn,instances=None,verbose=True):
1609
1663
  """
1610
1664
  Validate the predictions.json file [fn].
@@ -2315,402 +2369,6 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
2315
2369
  # ...def species_allowed_in_country(...)
2316
2370
 
2317
2371
 
2318
- def restrict_to_taxa_list(taxa_list,
2319
- speciesnet_taxonomy_file,
2320
- input_file,
2321
- output_file,
2322
- allow_walk_down=False):
2323
- """
2324
- Given a prediction file in MD .json format, likely without having had
2325
- a geofence applied, apply a custom taxa list.
2326
-
2327
- Args:
2328
- taxa_list (str or list): list of latin names, or a text file containing
2329
- a list of latin names. Optionally may contain a second (comma-delimited)
2330
- column containing common names, used only for debugging. Latin names
2331
- must exist in the SpeciesNet taxonomy.
2332
- taxonomy_file (str): taxonomy filename, in the same format used for model
2333
- release (with 7-token taxonomy entries)
2334
- output_file (str): .json file to write, in MD format
2335
- allow_walk_down (bool, optional): should we walk down the taxonomy tree
2336
- when making mappings if a parent has only a single allowable child?
2337
- For example, if only a single felid species is allowed, should other
2338
- felid predictions be mapped to that species, as opposed to being mapped
2339
- to the family?
2340
- """
2341
-
2342
- ##%% Read target taxa list
2343
-
2344
- if isinstance(taxa_list,str):
2345
- assert os.path.isfile(taxa_list), \
2346
- 'Could not find taxa list file {}'.format(taxa_list)
2347
- with open(taxa_list,'r') as f:
2348
- taxa_list = f.readlines()
2349
-
2350
- taxa_list = [s.strip().lower() for s in taxa_list]
2351
- taxa_list = [s for s in taxa_list if len(s) > 0]
2352
-
2353
- target_latin_to_common = {}
2354
- for s in taxa_list:
2355
- if s.strip().startswith('#'):
2356
- continue
2357
- tokens = s.split(',')
2358
- assert len(tokens) <= 2
2359
- binomial_name = tokens[0]
2360
- assert len(binomial_name.split(' ')) in (1,2,3), \
2361
- 'Illegal binomial name in species list: {}'.format(binomial_name)
2362
- if len(tokens) > 0:
2363
- common_name = tokens[1].strip().lower()
2364
- else:
2365
- common_name = None
2366
- assert binomial_name not in target_latin_to_common
2367
- target_latin_to_common[binomial_name] = common_name
2368
-
2369
-
2370
- ##%% Read taxonomy file
2371
-
2372
- with open(speciesnet_taxonomy_file,'r') as f:
2373
- speciesnet_taxonomy_list = f.readlines()
2374
- speciesnet_taxonomy_list = [s.strip() for s in \
2375
- speciesnet_taxonomy_list if len(s.strip()) > 0]
2376
-
2377
- # Maps the latin name of every taxon to the corresponding full taxon string
2378
- #
2379
- # For species, the key is a binomial name
2380
- speciesnet_latin_name_to_taxon_string = {}
2381
- speciesnet_common_name_to_taxon_string = {}
2382
-
2383
- def _insert_taxonomy_string(s):
2384
-
2385
- tokens = s.split(';')
2386
- assert len(tokens) == 7
2387
-
2388
- guid = tokens[0] # noqa
2389
- class_name = tokens[1]
2390
- order = tokens[2]
2391
- family = tokens[3]
2392
- genus = tokens[4]
2393
- species = tokens[5]
2394
- common_name = tokens[6]
2395
-
2396
- if len(class_name) == 0:
2397
- assert common_name in ('animal','vehicle','blank')
2398
- return
2399
-
2400
- if len(species) > 0:
2401
- assert all([len(s) > 0 for s in [genus,family,order]])
2402
- binomial_name = genus + ' ' + species
2403
- if binomial_name not in speciesnet_latin_name_to_taxon_string:
2404
- speciesnet_latin_name_to_taxon_string[binomial_name] = s
2405
- elif len(genus) > 0:
2406
- assert all([len(s) > 0 for s in [family,order]])
2407
- if genus not in speciesnet_latin_name_to_taxon_string:
2408
- speciesnet_latin_name_to_taxon_string[genus] = s
2409
- elif len(family) > 0:
2410
- assert len(order) > 0
2411
- if family not in speciesnet_latin_name_to_taxon_string:
2412
- speciesnet_latin_name_to_taxon_string[family] = s
2413
- elif len(order) > 0:
2414
- if order not in speciesnet_latin_name_to_taxon_string:
2415
- speciesnet_latin_name_to_taxon_string[order] = s
2416
- else:
2417
- if class_name not in speciesnet_latin_name_to_taxon_string:
2418
- speciesnet_latin_name_to_taxon_string[class_name] = s
2419
-
2420
- if len(common_name) > 0:
2421
- if common_name not in speciesnet_common_name_to_taxon_string:
2422
- speciesnet_common_name_to_taxon_string[common_name] = s
2423
-
2424
- for s in speciesnet_taxonomy_list:
2425
-
2426
- _insert_taxonomy_string(s)
2427
-
2428
-
2429
- ##%% Make sure all parent taxa are represented in the taxonomy
2430
-
2431
- # In theory any taxon that appears as the parent of another taxon should
2432
- # also be in the taxonomy, but this isn't always true, so we fix it here.
2433
-
2434
- new_taxon_string_to_missing_tokens = defaultdict(list)
2435
-
2436
- # latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
2437
- for latin_name in speciesnet_latin_name_to_taxon_string.keys():
2438
-
2439
- if 'no cv result' in latin_name:
2440
- continue
2441
-
2442
- taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
2443
- tokens = taxon_string.split(';')
2444
-
2445
- # Don't process GUID, species, or common name
2446
- # i_token = 6
2447
- for i_token in range(1,len(tokens)-2):
2448
-
2449
- test_token = tokens[i_token]
2450
- if len(test_token) == 0:
2451
- continue
2452
-
2453
- # Do we need to make up a taxon for this token?
2454
- if test_token not in speciesnet_latin_name_to_taxon_string:
2455
-
2456
- new_tokens = [''] * 7
2457
- new_tokens[0] = 'fake_guid'
2458
- for i_copy_token in range(1,i_token+1):
2459
- new_tokens[i_copy_token] = tokens[i_copy_token]
2460
- new_tokens[-1] = test_token + ' species'
2461
- assert new_tokens[-2] == ''
2462
- new_taxon_string = ';'.join(new_tokens)
2463
- # assert new_taxon_string not in new_taxon_strings
2464
- new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
2465
-
2466
- # ...for each token
2467
-
2468
- # ...for each taxon
2469
-
2470
- print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
2471
- len(new_taxon_string_to_missing_tokens)))
2472
-
2473
- new_taxon_string_to_missing_tokens = \
2474
- sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
2475
- for taxon_string in new_taxon_string_to_missing_tokens:
2476
- missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
2477
- print('{} ({})'.format(taxon_string,missing_taxa))
2478
-
2479
- for new_taxon_string in new_taxon_string_to_missing_tokens:
2480
- _insert_taxonomy_string(new_taxon_string)
2481
-
2482
-
2483
- ##%% Make sure all species on the allow-list are in the taxonomy
2484
-
2485
- n_failed_mappings = 0
2486
-
2487
- for target_taxon_latin_name in target_latin_to_common.keys():
2488
- if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
2489
- common_name = target_latin_to_common[target_taxon_latin_name]
2490
- s = '{} ({}) not in speciesnet taxonomy'.format(
2491
- target_taxon_latin_name,common_name)
2492
- if common_name in speciesnet_common_name_to_taxon_string:
2493
- s += ' (common name maps to {})'.format(
2494
- speciesnet_common_name_to_taxon_string[common_name])
2495
- print(s)
2496
- n_failed_mappings += 1
2497
-
2498
- if n_failed_mappings > 0:
2499
- raise ValueError('Cannot continue with geofence generation')
2500
-
2501
-
2502
- ##%% For the allow-list, map each parent taxon to a set of allowable child taxa
2503
-
2504
- # Maps parent names to all allowed child names, or None if this is the
2505
- # lowest-level allowable taxon on this path
2506
- allowed_parent_taxon_to_child_taxa = defaultdict(set)
2507
-
2508
- # latin_name = next(iter(target_latin_to_common.keys()))
2509
- for latin_name in target_latin_to_common:
2510
-
2511
- taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
2512
- tokens = taxon_string.split(';')
2513
- assert len(tokens) == 7
2514
-
2515
- # Remove GUID and common mame
2516
- #
2517
- # This is now always class/order/family/genus/species
2518
- tokens = tokens[1:-1]
2519
-
2520
- child_taxon = None
2521
-
2522
- # If this is a species
2523
- if len(tokens[-1]) > 0:
2524
- binomial_name = tokens[-2] + ' ' + tokens[-1]
2525
- assert binomial_name == latin_name
2526
- allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
2527
- child_taxon = binomial_name
2528
-
2529
- # The first candidate parent is the genus
2530
- parent_token_index = len(tokens) - 2
2531
-
2532
- while(parent_token_index >= 0):
2533
-
2534
- parent_taxon = tokens[parent_token_index]
2535
- allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
2536
- child_taxon = parent_taxon
2537
- parent_token_index -= 1
2538
-
2539
- # ...for each allowed latin name
2540
-
2541
- allowed_parent_taxon_to_child_taxa = \
2542
- sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
2543
-
2544
-
2545
- ##%% Map all predictions that exist in this dataset...
2546
-
2547
- # ...to the prediction we should generate.
2548
-
2549
- with open(input_file,'r') as f:
2550
- input_data = json.load(f)
2551
-
2552
- input_category_id_to_common_name = input_data['classification_categories'] #noqa
2553
- input_category_id_to_taxonomy_string = \
2554
- input_data['classification_category_descriptions']
2555
-
2556
- input_category_id_to_output_taxon_string = {}
2557
-
2558
- # input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
2559
- for input_category_id in input_category_id_to_taxonomy_string.keys():
2560
-
2561
- input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
2562
- input_taxon_tokens = input_taxon_string.split(';')
2563
- assert len(input_taxon_tokens) == 7
2564
-
2565
- # Don't mess with blank/no-cv-result/animal/human
2566
- if (input_taxon_string in non_taxonomic_prediction_strings) or \
2567
- (input_taxon_string == human_prediction_string):
2568
- input_category_id_to_output_taxon_string[input_category_id] = \
2569
- input_taxon_string
2570
- continue
2571
-
2572
- # Remove GUID and common mame
2573
- #
2574
- # This is now always class/order/family/genus/species
2575
- input_taxon_tokens = input_taxon_tokens[1:-1]
2576
-
2577
- test_index = len(input_taxon_tokens) - 1
2578
- target_taxon = None
2579
-
2580
- # Start at the species level, and see whether each taxon is allowed
2581
- while((test_index >= 0) and (target_taxon is None)):
2582
-
2583
- # Species are represented as binomial names
2584
- if (test_index == (len(input_taxon_tokens) - 1)) and \
2585
- (len(input_taxon_tokens[-1]) > 0):
2586
- test_taxon_name = \
2587
- input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
2588
- else:
2589
- test_taxon_name = input_taxon_tokens[test_index]
2590
-
2591
- # If we haven't yet found the level at which this taxon is non-empty,
2592
- # keep going up
2593
- if len(test_taxon_name) == 0:
2594
- test_index -= 1
2595
- continue
2596
-
2597
- assert test_taxon_name in speciesnet_latin_name_to_taxon_string
2598
-
2599
- # Is this taxon allowed according to the custom species list?
2600
- if test_taxon_name in allowed_parent_taxon_to_child_taxa:
2601
-
2602
- allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
2603
- assert allowed_child_taxa is not None
2604
-
2605
- # If this is the lowest-level allowable token or there is not a
2606
- # unique child, don't walk any further, even if walking down
2607
- # is enabled.
2608
- if (None in allowed_child_taxa):
2609
- assert len(allowed_child_taxa) == 1
2610
-
2611
- if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
2612
- target_taxon = test_taxon_name
2613
- elif not allow_walk_down:
2614
- target_taxon = test_taxon_name
2615
- else:
2616
- # If there's a unique child, walk back *down* the allowable
2617
- # taxa until we run out of unique children
2618
- while ((next(iter(allowed_child_taxa)) is not None) and \
2619
- (len(allowed_child_taxa) == 1)):
2620
- candidate_taxon = next(iter(allowed_child_taxa))
2621
- assert candidate_taxon in allowed_parent_taxon_to_child_taxa
2622
- assert candidate_taxon in speciesnet_latin_name_to_taxon_string
2623
- allowed_child_taxa = \
2624
- allowed_parent_taxon_to_child_taxa[candidate_taxon]
2625
- target_taxon = candidate_taxon
2626
-
2627
- # ...if this is an allowed taxon
2628
-
2629
- test_index -= 1
2630
-
2631
- # ...for each token
2632
-
2633
- if target_taxon is None:
2634
- output_taxon_string = animal_prediction_string
2635
- else:
2636
- output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
2637
- input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
2638
-
2639
- # ...for each category
2640
-
2641
-
2642
- ##%% Build the new tables
2643
-
2644
- input_category_id_to_output_category_id = {}
2645
- output_taxon_string_to_category_id = {}
2646
- output_category_id_to_common_name = {}
2647
-
2648
- for input_category_id in input_category_id_to_output_taxon_string:
2649
-
2650
- original_common_name = \
2651
- input_category_id_to_common_name[input_category_id]
2652
- original_taxon_string = \
2653
- input_category_id_to_taxonomy_string[input_category_id]
2654
- output_taxon_string = \
2655
- input_category_id_to_output_taxon_string[input_category_id]
2656
-
2657
- output_common_name = output_taxon_string.split(';')[-1]
2658
-
2659
- # Do we need to create a new output category?
2660
- if output_taxon_string not in output_taxon_string_to_category_id:
2661
- output_category_id = str(len(output_taxon_string_to_category_id))
2662
- output_taxon_string_to_category_id[output_taxon_string] = \
2663
- output_category_id
2664
- output_category_id_to_common_name[output_category_id] = \
2665
- output_common_name
2666
- else:
2667
- output_category_id = \
2668
- output_taxon_string_to_category_id[output_taxon_string]
2669
-
2670
- input_category_id_to_output_category_id[input_category_id] = \
2671
- output_category_id
2672
-
2673
- if False:
2674
- print('Mapping {} ({}) to:\n{} ({})\n'.format(
2675
- original_common_name,original_taxon_string,
2676
- output_common_name,output_taxon_string))
2677
- if False:
2678
- print('Mapping {} to {}'.format(
2679
- original_common_name,output_common_name,))
2680
-
2681
- # ...for each category
2682
-
2683
-
2684
- ##%% Remap all category labels
2685
-
2686
- assert len(set(output_taxon_string_to_category_id.keys())) == \
2687
- len(set(output_taxon_string_to_category_id.values()))
2688
-
2689
- output_category_id_to_taxon_string = \
2690
- invert_dictionary(output_taxon_string_to_category_id)
2691
-
2692
- with open(input_file,'r') as f:
2693
- output_data = json.load(f)
2694
-
2695
- for im in tqdm(output_data['images']):
2696
- if 'detections' in im and im['detections'] is not None:
2697
- for det in im['detections']:
2698
- if 'classifications' in det:
2699
- for classification in det['classifications']:
2700
- classification[0] = \
2701
- input_category_id_to_output_category_id[classification[0]]
2702
-
2703
- output_data['classification_categories'] = output_category_id_to_common_name
2704
- output_data['classification_category_descriptions'] = \
2705
- output_category_id_to_taxon_string
2706
-
2707
-
2708
- ##%% Write output
2709
-
2710
- with open(output_file,'w') as f:
2711
- json.dump(output_data,f,indent=1)
2712
-
2713
-
2714
2372
  #%% Interactive driver(s)
2715
2373
 
2716
2374
  if False:
@@ -2729,7 +2387,9 @@ if False:
2729
2387
  from megadetector.utils.wi_utils import taxonomy_string_to_taxonomy_info # noqa
2730
2388
  from megadetector.utils.wi_utils import common_name_to_taxonomy_info # noqa
2731
2389
  from megadetector.utils.wi_utils import binomial_name_to_taxonomy_info # noqa
2732
-
2390
+ from megadetector.utils.wi_utils import country_to_country_code # noqa
2391
+ from megadetector.utils.wi_utils import country_code_to_country # noqa
2392
+
2733
2393
  model_base = os.path.expanduser('~/models/speciesnet')
2734
2394
  geofencing_file = os.path.join(model_base,'crop','geofence_release.2025.02.27.0702.json')
2735
2395
  country_code_file = os.path.join(model_base,'country-codes.csv')
@@ -2740,10 +2400,10 @@ if False:
2740
2400
  initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
2741
2401
 
2742
2402
 
2743
- #%% Test driver for geofence_fixes.csv function
2403
+ #%% Generate a block-except list
2744
2404
 
2745
- block_except_list = 'AUS, PNG, THA, IDN, MYS'
2746
- species = 'dingo'
2405
+ block_except_list = 'ALB,AND,ARM,AUT,AZE,BEL,BGR,BIH,BLR,CHE,CYP,CZE,DEU,DNK,ESP,EST,FIN,FRA,GBR,GEO,GRC,HRV,HUN,IRL,IRN,IRQ,ISL,ISR,ITA,KAZ,LIE,LTU,LUX,LVA,MDA,MKD,MLT,MNE,NLD,NOR,POL,PRT,ROU,RUS,SMR,SRB,SVK,SVN,SWE,TUR,UKR,UZB'
2406
+ species = 'eurasian badger'
2747
2407
  species_string = _species_string_to_canonical_species_string(species)
2748
2408
  rows = _generate_csv_rows_to_block_all_countries_except(species_string,block_except_list)
2749
2409
 
@@ -2751,7 +2411,7 @@ if False:
2751
2411
  print(rows)
2752
2412
 
2753
2413
 
2754
- #%%
2414
+ #%% Generate an allow-list
2755
2415
 
2756
2416
  taxon_name = 'hippopotamus amphibius'
2757
2417
  taxonomy_info = binomial_name_to_taxonomy_info[taxon_name]
@@ -2763,9 +2423,7 @@ if False:
2763
2423
  block_countries=None,
2764
2424
  allow_states=None,
2765
2425
  block_states=None,
2766
- blockexcept_countries=None)
2767
-
2768
- # _generate_csv_rows_to_block_all_countries_except(species_string,'AUS')
2426
+ blockexcept_countries=None)
2769
2427
 
2770
2428
 
2771
2429
  #%% Test the effects of geofence changes
@@ -183,7 +183,7 @@ def exif_preserving_save(pil_image,output_file,quality='keep',default_quality=85
183
183
  ...for more ways to preserve jpeg quality if quality='keep' doesn't do the trick.
184
184
 
185
185
  Args:
186
- pil_image (Image): the PIL Image objct to save
186
+ pil_image (Image): the PIL Image object to save
187
187
  output_file (str): the destination file
188
188
  quality (str or int, optional): can be "keep" (default), or an integer from 0 to 100.
189
189
  This is only used if PIL thinks the the source image is a JPEG. If you load a JPEG
@@ -1285,8 +1285,14 @@ def gray_scale_fraction(image,crop_size=(0.1,0.1)):
1285
1285
 
1286
1286
 
1287
1287
  def _resize_relative_image(fn_relative,
1288
- input_folder,output_folder,
1289
- target_width,target_height,no_enlarge_width,verbose,quality):
1288
+ input_folder,
1289
+ output_folder,
1290
+ target_width,
1291
+ target_height,
1292
+ no_enlarge_width,
1293
+ verbose,
1294
+ quality,
1295
+ overwrite=True):
1290
1296
  """
1291
1297
  Internal function for resizing an image from one folder to another,
1292
1298
  maintaining relative path.
@@ -1294,6 +1300,12 @@ def _resize_relative_image(fn_relative,
1294
1300
 
1295
1301
  input_fn_abs = os.path.join(input_folder,fn_relative)
1296
1302
  output_fn_abs = os.path.join(output_folder,fn_relative)
1303
+
1304
+ if (not overwrite) and (os.path.isfile(output_fn_abs)):
1305
+ status = 'skipped'
1306
+ error = None
1307
+ return {'fn_relative':fn_relative,'status':status,'error':error}
1308
+
1297
1309
  os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
1298
1310
  try:
1299
1311
  _ = resize_image(input_fn_abs,
@@ -1435,7 +1447,8 @@ def resize_image_folder(input_folder,
1435
1447
  pool_type='process',
1436
1448
  n_workers=10,
1437
1449
  recursive=True,
1438
- image_files_relative=None):
1450
+ image_files_relative=None,
1451
+ overwrite=True):
1439
1452
  """
1440
1453
  Resize all images in a folder (defaults to recursive).
1441
1454
 
@@ -1461,12 +1474,13 @@ def resize_image_folder(input_folder,
1461
1474
  to disable parallelization
1462
1475
  recursive (bool, optional): whether to search [input_folder] recursively for images.
1463
1476
  image_files_relative (list, optional): if not None, skips any relative paths not
1464
- in this list.
1477
+ in this list
1478
+ overwrite (bool, optional): whether to overwrite existing target images
1465
1479
 
1466
1480
  Returns:
1467
1481
  list: a list of dicts with keys 'input_fn', 'output_fn', 'status', and 'error'.
1468
- 'status' will be 'success' or 'error'; 'error' will be None for successful cases,
1469
- otherwise will contain the image-specific error.
1482
+ 'status' will be 'success', 'skipped', or 'error'; 'error' will be None for successful
1483
+ cases, otherwise will contain the image-specific error.
1470
1484
  """
1471
1485
 
1472
1486
  assert os.path.isdir(input_folder), '{} is not a folder'.format(input_folder)
@@ -1502,7 +1516,8 @@ def resize_image_folder(input_folder,
1502
1516
  target_height=target_height,
1503
1517
  no_enlarge_width=no_enlarge_width,
1504
1518
  verbose=verbose,
1505
- quality=quality))
1519
+ quality=quality,
1520
+ overwrite=overwrite))
1506
1521
 
1507
1522
  else:
1508
1523
 
@@ -1522,7 +1537,8 @@ def resize_image_folder(input_folder,
1522
1537
  target_height=target_height,
1523
1538
  no_enlarge_width=no_enlarge_width,
1524
1539
  verbose=verbose,
1525
- quality=quality)
1540
+ quality=quality,
1541
+ overwrite=overwrite)
1526
1542
 
1527
1543
  results = list(tqdm(pool.imap(p, image_files_relative),total=len(image_files_relative)))
1528
1544