megadetector 5.0.29__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (95) hide show
  1. megadetector/classification/efficientnet/model.py +8 -8
  2. megadetector/classification/efficientnet/utils.py +6 -5
  3. megadetector/classification/prepare_classification_script_mc.py +3 -3
  4. megadetector/data_management/annotations/annotation_constants.py +0 -1
  5. megadetector/data_management/camtrap_dp_to_coco.py +34 -1
  6. megadetector/data_management/cct_json_utils.py +2 -2
  7. megadetector/data_management/coco_to_yolo.py +22 -5
  8. megadetector/data_management/databases/add_width_and_height_to_db.py +85 -12
  9. megadetector/data_management/databases/combine_coco_camera_traps_files.py +2 -2
  10. megadetector/data_management/databases/integrity_check_json_db.py +29 -15
  11. megadetector/data_management/generate_crops_from_cct.py +50 -1
  12. megadetector/data_management/labelme_to_coco.py +4 -2
  13. megadetector/data_management/labelme_to_yolo.py +82 -2
  14. megadetector/data_management/lila/generate_lila_per_image_labels.py +276 -18
  15. megadetector/data_management/lila/get_lila_annotation_counts.py +5 -3
  16. megadetector/data_management/lila/lila_common.py +3 -0
  17. megadetector/data_management/lila/test_lila_metadata_urls.py +15 -5
  18. megadetector/data_management/mewc_to_md.py +5 -0
  19. megadetector/data_management/ocr_tools.py +4 -3
  20. megadetector/data_management/read_exif.py +20 -5
  21. megadetector/data_management/remap_coco_categories.py +66 -4
  22. megadetector/data_management/remove_exif.py +50 -1
  23. megadetector/data_management/rename_images.py +3 -3
  24. megadetector/data_management/resize_coco_dataset.py +563 -95
  25. megadetector/data_management/yolo_output_to_md_output.py +131 -2
  26. megadetector/data_management/yolo_to_coco.py +140 -5
  27. megadetector/detection/change_detection.py +4 -3
  28. megadetector/detection/pytorch_detector.py +60 -22
  29. megadetector/detection/run_detector.py +225 -25
  30. megadetector/detection/run_detector_batch.py +42 -16
  31. megadetector/detection/run_inference_with_yolov5_val.py +12 -2
  32. megadetector/detection/run_tiled_inference.py +1 -0
  33. megadetector/detection/video_utils.py +53 -24
  34. megadetector/postprocessing/add_max_conf.py +4 -0
  35. megadetector/postprocessing/categorize_detections_by_size.py +1 -1
  36. megadetector/postprocessing/classification_postprocessing.py +55 -20
  37. megadetector/postprocessing/combine_batch_outputs.py +3 -2
  38. megadetector/postprocessing/compare_batch_results.py +64 -10
  39. megadetector/postprocessing/convert_output_format.py +12 -8
  40. megadetector/postprocessing/create_crop_folder.py +137 -10
  41. megadetector/postprocessing/load_api_results.py +26 -8
  42. megadetector/postprocessing/md_to_coco.py +4 -4
  43. megadetector/postprocessing/md_to_labelme.py +18 -7
  44. megadetector/postprocessing/merge_detections.py +5 -0
  45. megadetector/postprocessing/postprocess_batch_results.py +6 -3
  46. megadetector/postprocessing/remap_detection_categories.py +55 -2
  47. megadetector/postprocessing/render_detection_confusion_matrix.py +9 -6
  48. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  49. megadetector/taxonomy_mapping/map_new_lila_datasets.py +3 -4
  50. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +40 -19
  51. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +1 -1
  52. megadetector/taxonomy_mapping/species_lookup.py +123 -41
  53. megadetector/utils/ct_utils.py +133 -113
  54. megadetector/utils/md_tests.py +93 -13
  55. megadetector/utils/path_utils.py +137 -107
  56. megadetector/utils/split_locations_into_train_val.py +2 -2
  57. megadetector/utils/string_utils.py +7 -7
  58. megadetector/utils/url_utils.py +81 -58
  59. megadetector/utils/wi_utils.py +46 -17
  60. megadetector/visualization/plot_utils.py +13 -9
  61. megadetector/visualization/render_images_with_thumbnails.py +2 -1
  62. megadetector/visualization/visualization_utils.py +94 -46
  63. megadetector/visualization/visualize_db.py +36 -9
  64. megadetector/visualization/visualize_detector_output.py +4 -4
  65. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/METADATA +135 -135
  66. megadetector-10.0.0.dist-info/RECORD +139 -0
  67. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  68. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
  69. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  70. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  71. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -438
  72. megadetector/api/batch_processing/api_core/server.py +0 -294
  73. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  74. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  75. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  76. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  77. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  78. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  79. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  80. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  81. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  82. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  83. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  84. megadetector/api/synchronous/__init__.py +0 -0
  85. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  86. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  87. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  88. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  89. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  90. megadetector/api/synchronous/api_core/tests/load_test.py +0 -109
  91. megadetector/utils/azure_utils.py +0 -178
  92. megadetector/utils/sas_blob_utils.py +0 -513
  93. megadetector-5.0.29.dist-info/RECORD +0 -163
  94. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  95. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/WHEEL +0 -0
@@ -64,8 +64,8 @@ def split_locations_into_train_val(location_to_category_counts,
64
64
  default_max_allowable_error (float, optional): the maximum allowable error for categories not
65
65
  present in [category_to_max_allowable_error]. Set to None (or >= 1.0) to disable hard
66
66
  constraints for categories not present in [category_to_max_allowable_error]
67
- require_complete_coverage (bool, optional): require that every category appear in both train and
68
- val
67
+ require_complete_coverage (bool, optional): require that every category appear in both train
68
+ and val
69
69
 
70
70
  Returns:
71
71
  tuple: A two-element tuple:
@@ -26,7 +26,7 @@ def is_float(s):
26
26
 
27
27
  if s is None:
28
28
  return False
29
-
29
+
30
30
  try:
31
31
  _ = float(s)
32
32
  except ValueError:
@@ -53,7 +53,7 @@ def human_readable_to_bytes(size):
53
53
 
54
54
  if not size: # Handle empty string case after stripping spaces
55
55
  return 0
56
-
56
+
57
57
  if (size[-1] == 'B'):
58
58
  size = size[:-1]
59
59
 
@@ -70,7 +70,7 @@ def human_readable_to_bytes(size):
70
70
  # Need to separate numeric part from unit more carefully.
71
71
  numeric_part = ''
72
72
  unit_part = ''
73
-
73
+
74
74
  # Iterate from the end to find the unit (K, M, G, T)
75
75
  # This handles cases like "10KB" or "2.5GB"
76
76
  for i in range(len(size) -1, -1, -1):
@@ -79,7 +79,7 @@ def human_readable_to_bytes(size):
79
79
  else:
80
80
  numeric_part = size[:i+1]
81
81
  break
82
-
82
+
83
83
  # If no unit found, or numeric part is empty after stripping unit
84
84
  if not unit_part or not numeric_part:
85
85
  return 0
@@ -97,8 +97,8 @@ def human_readable_to_bytes(size):
97
97
  bytes_val *= 1024
98
98
  else:
99
99
  # If it's a known unit (like 'B' already stripped) but not T/G/M/K,
100
- # and it was floatable, it's just bytes. If it's an unknown unit, it's
101
- # an error.
100
+ # and it was floatable, it's just bytes. If it's an unknown unit, it's
101
+ # an error.
102
102
  if unit not in ['B', '']: # 'B' was stripped, '' means just a number
103
103
  bytes_val = 0
104
104
  except ValueError:
@@ -165,7 +165,7 @@ class TestStringUtils:
165
165
  assert human_readable_to_bytes("1GB") == 1024*1024*1024
166
166
  assert human_readable_to_bytes("1T") == 1024*1024*1024*1024
167
167
  assert human_readable_to_bytes("1TB") == 1024*1024*1024*1024
168
-
168
+
169
169
  assert human_readable_to_bytes("2.5K") == 2.5 * 1024
170
170
  assert human_readable_to_bytes("0.5MB") == 0.5 * 1024 * 1024
171
171
 
@@ -13,9 +13,8 @@ import re
13
13
  import urllib
14
14
  import urllib.request
15
15
  import urllib.error
16
- import tempfile
17
- import requests
18
- import shutil
16
+ import requests
17
+ import shutil
19
18
  import pytest
20
19
 
21
20
  from functools import partial
@@ -53,7 +52,7 @@ class DownloadProgressBar:
53
52
  self.pbar = progressbar.ProgressBar(max_value=total_size)
54
53
  self.pbar.start()
55
54
  except ImportError:
56
- self.pbar = None
55
+ self.pbar = None
57
56
  # print("ProgressBar not available, install 'progressbar2' for visual progress.")
58
57
 
59
58
  if self.pbar:
@@ -108,9 +107,9 @@ def download_url(url,
108
107
 
109
108
  # This does not guarantee uniqueness, hence "semi-best-effort"
110
109
  url_as_filename = re.sub(r'\W+', '', url_without_sas)
111
-
110
+
112
111
  n_folder_chars = len(target_folder)
113
-
112
+
114
113
  if (len(url_as_filename) + n_folder_chars) >= max_path_len:
115
114
  print('Warning: truncating filename target to {} characters'.format(max_path_len))
116
115
  max_fn_len = max_path_len - (n_folder_chars + 1)
@@ -202,15 +201,18 @@ def _do_parallelized_download(download_info,overwrite=False,verbose=False):
202
201
  # ...def _do_parallelized_download(...)
203
202
 
204
203
 
205
- def parallel_download_urls(url_to_target_file, verbose=False, overwrite=False,
206
- n_workers=20, pool_type='thread'):
204
+ def parallel_download_urls(url_to_target_file,
205
+ verbose=False,
206
+ overwrite=False,
207
+ n_workers=20,
208
+ pool_type='thread'):
207
209
  """
208
210
  Downloads a list of URLs to local files.
209
211
 
210
212
  Catches exceptions and reports them in the returned "results" array.
211
213
 
212
214
  Args:
213
- url_to_target_file: a dict mapping URLs to local filenames.
215
+ url_to_target_file (dict): a dict mapping URLs to local filenames.
214
216
  verbose (bool, optional): enable additional debug console output
215
217
  overwrite (bool, optional): whether to overwrite existing local files
216
218
  n_workers (int, optional): number of concurrent workers, set to <=1 to disable
@@ -229,7 +231,7 @@ def parallel_download_urls(url_to_target_file, verbose=False, overwrite=False,
229
231
 
230
232
  if verbose:
231
233
  print('Preparing download list')
232
- for url in tqdm(url_to_target_file, disable=(not verbose)):
234
+ for url in tqdm(url_to_target_file, disable=(not verbose)):
233
235
  download_info = {}
234
236
  download_info['url'] = url
235
237
  download_info['target_file'] = url_to_target_file[url]
@@ -341,7 +343,7 @@ def test_urls(urls,error_on_failure=True,n_workers=1,pool_type='thread',timeout=
341
343
  else:
342
344
  assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
343
345
  pool = Pool(n_workers)
344
-
346
+
345
347
  if verbose:
346
348
  print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
347
349
 
@@ -439,7 +441,7 @@ def get_url_sizes(urls,n_workers=1,pool_type='thread',timeout=None,verbose=False
439
441
  file_sizes = list(tqdm(pool.imap(
440
442
  partial(get_url_size,verbose=verbose,timeout=timeout),
441
443
  urls), total=len(urls), disable=(not verbose)))
442
-
444
+
443
445
  for i_url,url in enumerate(urls):
444
446
  url_to_size[url] = file_sizes[i_url]
445
447
  finally:
@@ -456,8 +458,8 @@ def get_url_sizes(urls,n_workers=1,pool_type='thread',timeout=None,verbose=False
456
458
  # Constants for tests
457
459
 
458
460
  SMALL_FILE_URL = "https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png"
459
- REDIRECT_SRC_URL = "http://google.com"
460
- REDIRECT_DEST_URL = "https://www.google.com/"
461
+ REDIRECT_SRC_URL = "http://google.com"
462
+ REDIRECT_DEST_URL = "https://www.google.com/"
461
463
  NON_EXISTENT_URL = "https://example.com/non_existent_page_404.html"
462
464
  DEFINITELY_NON_EXISTENT_DOMAIN_URL = "https://thisshouldnotexist1234567890.com/file.txt"
463
465
  RELATIVE_DOWNLOAD_URL = "https://raw.githubusercontent.com/agentmorris/MegaDetector/main/README.md"
@@ -470,13 +472,12 @@ class TestUrlUtils:
470
472
  Tests for url_utils.py
471
473
  """
472
474
 
473
-
474
475
  def set_up(self):
475
476
  """
476
477
  Create a temporary directory for testing.
477
478
  """
478
479
 
479
- self.test_dir = make_test_folder(subfolder='url_utils_tests')
480
+ self.test_dir = make_test_folder(subfolder='url_utils_tests')
480
481
  self.download_target_dir = os.path.join(self.test_dir, 'downloads')
481
482
  os.makedirs(self.download_target_dir, exist_ok=True)
482
483
 
@@ -488,7 +489,7 @@ class TestUrlUtils:
488
489
 
489
490
  if os.path.exists(self.test_dir):
490
491
  shutil.rmtree(self.test_dir)
491
-
492
+
492
493
 
493
494
  def test_download_url_to_specified_file(self):
494
495
  """
@@ -496,7 +497,9 @@ class TestUrlUtils:
496
497
  """
497
498
 
498
499
  dest_filename = os.path.join(self.download_target_dir, "downloaded_google_logo.png")
499
- returned_filename = download_url(SMALL_FILE_URL, destination_filename=dest_filename, verbose=False)
500
+ returned_filename = download_url(SMALL_FILE_URL,
501
+ destination_filename=dest_filename,
502
+ verbose=False)
500
503
  assert returned_filename == dest_filename
501
504
  assert os.path.exists(dest_filename)
502
505
  assert os.path.getsize(dest_filename) > 1000
@@ -507,10 +510,12 @@ class TestUrlUtils:
507
510
  Test download_url when destination_filename is None.
508
511
  """
509
512
 
510
- returned_filename = download_url(SMALL_FILE_URL, destination_filename=None, verbose=False)
513
+ returned_filename = download_url(SMALL_FILE_URL,
514
+ destination_filename=None,
515
+ verbose=False)
511
516
  assert os.path.exists(returned_filename)
512
517
  assert os.path.getsize(returned_filename) > 1000
513
-
518
+
514
519
 
515
520
  def test_download_url_non_existent(self):
516
521
  """
@@ -520,17 +525,19 @@ class TestUrlUtils:
520
525
  dest_filename = os.path.join(self.download_target_dir, "non_existent.html")
521
526
  try:
522
527
  download_url(NON_EXISTENT_URL, destination_filename=dest_filename, verbose=False)
523
- assert False, "urllib.error.HTTPError not raised for 404"
528
+ raise AssertionError("urllib.error.HTTPError not raised for 404")
524
529
  except urllib.error.HTTPError:
525
530
  pass
526
-
531
+
527
532
  try:
528
- download_url(DEFINITELY_NON_EXISTENT_DOMAIN_URL, destination_filename=dest_filename, verbose=False)
529
- assert False, \
530
- "urllib.error.URLError or requests.exceptions.ConnectionError not raised for DNS failure"
531
- except urllib.error.URLError:
533
+ download_url(DEFINITELY_NON_EXISTENT_DOMAIN_URL,
534
+ destination_filename=dest_filename,
535
+ verbose=False)
536
+ raise AssertionError(
537
+ "urllib.error.URLError or requests.exceptions.ConnectionError not raised for DNS failure")
538
+ except urllib.error.URLError:
532
539
  pass
533
- except requests.exceptions.ConnectionError:
540
+ except requests.exceptions.ConnectionError:
534
541
  pass
535
542
 
536
543
 
@@ -540,15 +547,18 @@ class TestUrlUtils:
540
547
  """
541
548
 
542
549
  dest_filename = os.path.join(self.download_target_dir, "force_test.png")
543
-
550
+
544
551
  download_url(SMALL_FILE_URL, destination_filename=dest_filename, verbose=False)
545
552
  assert os.path.exists(dest_filename)
546
553
  initial_mtime = os.path.getmtime(dest_filename)
547
554
 
548
- download_url(SMALL_FILE_URL, destination_filename=dest_filename, verbose=True)
555
+ download_url(SMALL_FILE_URL, destination_filename=dest_filename, verbose=True)
549
556
  assert os.path.getmtime(dest_filename) == initial_mtime
550
557
 
551
- download_url(SMALL_FILE_URL, destination_filename=dest_filename, force_download=True, verbose=False)
558
+ download_url(SMALL_FILE_URL,
559
+ destination_filename=dest_filename,
560
+ force_download=True,
561
+ verbose=False)
552
562
  assert os.path.exists(dest_filename)
553
563
 
554
564
 
@@ -558,7 +568,10 @@ class TestUrlUtils:
558
568
  """
559
569
 
560
570
  dest_filename = os.path.join(self.download_target_dir, "escape_test.png")
561
- download_url(SMALL_FILE_URL, destination_filename=dest_filename, escape_spaces=True, verbose=False)
571
+ download_url(SMALL_FILE_URL,
572
+ destination_filename=dest_filename,
573
+ escape_spaces=True,
574
+ verbose=False)
562
575
  assert os.path.exists(dest_filename)
563
576
 
564
577
 
@@ -567,7 +580,7 @@ class TestUrlUtils:
567
580
  Test download_relative_filename.
568
581
  """
569
582
 
570
- output_base = os.path.join(self.download_target_dir, "relative_dl")
583
+ output_base = os.path.join(self.download_target_dir, "relative_dl")
571
584
  returned_filename = download_relative_filename(RELATIVE_DOWNLOAD_URL, output_base, verbose=False)
572
585
  assert RELATIVE_DOWNLOAD_CONTAIN_TOKEN in returned_filename
573
586
  assert RELATIVE_DOWNLOAD_NOT_CONTAIN_TOKEN not in returned_filename
@@ -582,32 +595,38 @@ class TestUrlUtils:
582
595
 
583
596
  url1_target = os.path.join(self.download_target_dir, "parallel_dl_1.png")
584
597
  url2_target = os.path.join(self.download_target_dir, "parallel_dl_2_nonexistent.html")
585
-
598
+
586
599
  url_to_target_file = {
587
600
  SMALL_FILE_URL: url1_target,
588
601
  NON_EXISTENT_URL: url2_target
589
602
  }
590
-
603
+
591
604
  results = parallel_download_urls(url_to_target_file, n_workers=1, verbose=False)
592
-
605
+
593
606
  assert len(results) == 2
594
-
607
+
595
608
  status_map = {res['url']: res for res in results}
596
-
609
+
597
610
  assert status_map[SMALL_FILE_URL]['status'] == 'success'
598
611
  assert status_map[SMALL_FILE_URL]['target_file'] == url1_target
599
612
  assert os.path.exists(url1_target)
600
-
613
+
601
614
  assert status_map[NON_EXISTENT_URL]['status'].startswith('error: HTTP Error 404')
602
615
  assert status_map[NON_EXISTENT_URL]['target_file'] == url2_target
603
616
  assert not os.path.exists(url2_target)
604
617
 
605
618
  if not os.path.exists(url1_target):
606
619
  download_url(SMALL_FILE_URL, url1_target, verbose=False)
607
- results_skip = parallel_download_urls({SMALL_FILE_URL: url1_target}, n_workers=1, overwrite=False, verbose=True)
620
+ results_skip = parallel_download_urls({SMALL_FILE_URL: url1_target},
621
+ n_workers=1,
622
+ overwrite=False,
623
+ verbose=True)
608
624
  assert results_skip[0]['status'] == 'skipped'
609
625
 
610
- results_overwrite = parallel_download_urls({SMALL_FILE_URL: url1_target}, n_workers=1, overwrite=True, verbose=False)
626
+ results_overwrite = parallel_download_urls({SMALL_FILE_URL: url1_target},
627
+ n_workers=1,
628
+ overwrite=True,
629
+ verbose=False)
611
630
  assert results_overwrite[0]['status'] == 'success'
612
631
 
613
632
 
@@ -620,20 +639,22 @@ class TestUrlUtils:
620
639
  assert test_url(REDIRECT_SRC_URL, error_on_failure=False, timeout=10) in (200,301)
621
640
 
622
641
  status_non_existent = test_url(NON_EXISTENT_URL, error_on_failure=False, timeout=5)
623
- assert status_non_existent == 404
624
-
642
+ assert status_non_existent == 404
643
+
625
644
  try:
626
645
  test_url(NON_EXISTENT_URL, error_on_failure=True, timeout=5)
627
- assert False, "ValueError not raised for NON_EXISTENT_URL"
646
+ raise AssertionError("ValueError not raised for NON_EXISTENT_URL")
628
647
  except ValueError:
629
648
  pass
630
649
 
631
650
  try:
632
- test_url(DEFINITELY_NON_EXISTENT_DOMAIN_URL, error_on_failure=True, timeout=5)
633
- assert False, "requests.exceptions.ConnectionError or urllib.error.URLError not raised"
634
- except requests.exceptions.ConnectionError:
651
+ test_url(DEFINITELY_NON_EXISTENT_DOMAIN_URL,
652
+ error_on_failure=True,
653
+ timeout=5)
654
+ raise AssertionError("requests.exceptions.ConnectionError or urllib.error.URLError not raised")
655
+ except requests.exceptions.ConnectionError:
635
656
  pass
636
- except urllib.error.URLError:
657
+ except urllib.error.URLError:
637
658
  pass
638
659
 
639
660
 
@@ -645,10 +666,10 @@ class TestUrlUtils:
645
666
 
646
667
  try:
647
668
  test_urls(urls_to_test, error_on_failure=True, n_workers=1, timeout=5)
648
- assert False, "ValueError not raised for urls_to_test"
669
+ raise AssertionError("ValueError not raised for urls_to_test")
649
670
  except ValueError:
650
671
  pass
651
-
672
+
652
673
  good_urls = [SMALL_FILE_URL, REDIRECT_SRC_URL]
653
674
  good_status_codes = test_urls(good_urls, error_on_failure=True, n_workers=1, timeout=10)
654
675
  assert good_status_codes == [200, 200]
@@ -661,34 +682,36 @@ class TestUrlUtils:
661
682
 
662
683
  size = get_url_size(SMALL_FILE_URL, timeout=10)
663
684
  assert size is not None
664
- assert size > 1000
685
+ assert size > 1000
665
686
 
666
687
  size_dynamic = get_url_size(REDIRECT_DEST_URL, timeout=10, verbose=True)
667
688
  if size_dynamic is not None:
668
689
  assert isinstance(size_dynamic, int)
669
-
690
+
670
691
  size_non_existent = get_url_size(NON_EXISTENT_URL, timeout=5)
671
692
  assert size_non_existent is None
672
-
693
+
673
694
  size_bad_domain = get_url_size(DEFINITELY_NON_EXISTENT_DOMAIN_URL, timeout=5)
674
695
  assert size_bad_domain is None
675
696
 
676
697
  urls_for_size = [SMALL_FILE_URL, NON_EXISTENT_URL, REDIRECT_DEST_URL]
677
698
  sizes_map = get_url_sizes(urls_for_size, n_workers=1, timeout=10)
678
-
699
+
679
700
  assert SMALL_FILE_URL in sizes_map
680
- assert sizes_map[SMALL_FILE_URL] == size
681
-
701
+ assert sizes_map[SMALL_FILE_URL] == size
702
+
682
703
  assert NON_EXISTENT_URL in sizes_map
683
704
  assert sizes_map[NON_EXISTENT_URL] is None
684
-
705
+
685
706
  assert REDIRECT_DEST_URL in sizes_map
686
707
  assert sizes_map[REDIRECT_DEST_URL] == size_dynamic
687
708
 
688
709
 
689
- def test_url_utils():
710
+ def _test_url_utils():
690
711
  """
691
- Runs all tests in the TestUrlUtils class.
712
+ Runs all tests in the TestUrlUtils class. I generally disable this during testing
713
+ because it creates irritating nondeterminism, and this is neither a core module nor
714
+ a module that changes often.
692
715
  """
693
716
 
694
717
  test_instance = TestUrlUtils()
@@ -465,8 +465,9 @@ def write_download_commands(image_records_to_download,
465
465
  force_download (bool, optional): include gs commands even if the target file exists
466
466
  n_download_workers (int, optional): number of scripts to write (that's our hacky way
467
467
  of controlling parallelization)
468
- download_command_file (str, optional): path of the .sh script we should write, defaults
469
- to "download_wi_images.sh" in the destination folder
468
+ download_command_file_base (str, optional): path of the .sh script we should write, defaults
469
+ to "download_wi_images.sh" in the destination folder. Individual worker scripts will
470
+ have a number added, e.g. download_wi_images_00.sh.
470
471
  """
471
472
 
472
473
  if isinstance(image_records_to_download,dict):
@@ -1069,7 +1070,7 @@ def generate_whole_image_detections_for_classifications(classifications_json_fil
1069
1070
  ensemble_json_file=None,
1070
1071
  ignore_blank_classifications=True):
1071
1072
  """
1072
- Given a set of classification results in SpeciesNet format that were likely run on
1073
+ Given a set of classification results in SpeciesNet format that were likely run on
1073
1074
  already-cropped images, generate a file of [fake] detections in SpeciesNet format in which each
1074
1075
  image is covered in a single whole-image detection.
1075
1076
 
@@ -1485,6 +1486,8 @@ def generate_instances_json_from_folder(folder,
1485
1486
  Args:
1486
1487
  folder (str): the folder to recursively search for images
1487
1488
  country (str, optional): a three-letter country code
1489
+ admin1_region (str, optional): an administrative region code, typically a two-letter
1490
+ US state code
1488
1491
  lat (float, optional): latitude to associate with all images
1489
1492
  lon (float, optional): longitude to associate with all images
1490
1493
  output_file (str, optional): .json file to which we should write instance records
@@ -1590,8 +1593,8 @@ def merge_prediction_json_files(input_prediction_files,output_prediction_file):
1590
1593
  Merge all predictions.json files in [files] into a single .json file.
1591
1594
 
1592
1595
  Args:
1593
- files (list): list of predictions.json files to merge
1594
- output_file (str): output .json file
1596
+ input_prediction_files (list): list of predictions.json files to merge
1597
+ output_prediction_file (str): output .json file
1595
1598
  """
1596
1599
 
1597
1600
  predictions = []
@@ -2074,8 +2077,7 @@ def generate_csv_rows_for_species(species_string,
2074
2077
  allow_countries=None,
2075
2078
  block_countries=None,
2076
2079
  allow_states=None,
2077
- block_states=None,
2078
- blockexcept_countries=None):
2080
+ block_states=None):
2079
2081
  """
2080
2082
  Generate rows in the format expected by geofence_fixes.csv, representing a list of
2081
2083
  allow and/or block rules for the specified species and countries/states. Does not check
@@ -2084,13 +2086,13 @@ def generate_csv_rows_for_species(species_string,
2084
2086
 
2085
2087
  Args:
2086
2088
  species_string (str): five-token string in semicolon-delimited WI taxonomy format
2087
- allow_countries (optional, list or str): three-letter country codes, list of
2089
+ allow_countries (list or str, optional): three-letter country codes, list of
2088
2090
  country codes, or comma-separated list of country codes to allow
2089
- block_countries (optional, list or str): three-letter country codes, list of
2091
+ block_countries (list or str, optional): three-letter country codes, list of
2090
2092
  country codes, or comma-separated list of country codes to block
2091
- allow_states (optional, list or str): two-letter state codes, list of
2093
+ allow_states (list or str, optional): two-letter state codes, list of
2092
2094
  state codes, or comma-separated list of state codes to allow
2093
- block_states (optional, list or str): two-letter state code, list of
2095
+ block_states (list or str, optional): two-letter state code, list of
2094
2096
  state codes, or comma-separated list of state codes to block
2095
2097
 
2096
2098
  Returns:
@@ -2502,6 +2504,31 @@ if False:
2502
2504
  initialize_geofencing(geofencing_file, country_code_file, force_init=True)
2503
2505
  initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
2504
2506
 
2507
+ from megadetector.utils.path_utils import open_file; open_file(geofencing_file)
2508
+
2509
+
2510
+ #%% Generate a block list
2511
+
2512
+ taxon_name = 'cercopithecidae'
2513
+ taxonomy_info = binomial_name_to_taxonomy_info[taxon_name]
2514
+ taxonomy_string_short = taxonomy_info_to_taxonomy_string(taxonomy_info)
2515
+ assert len(taxonomy_string_short.split(';')) == 5
2516
+
2517
+ block_list = 'ATG,BHS,BRB,BLZ,CAN,CRI,CUB,DMA,DOM,SLV,GRD,GTM,HTI,HND,JAM,' + \
2518
+ 'MEX,NIC,PAN,KNA,LCA,VCT,TTO,USA,ARG,BOL,BRA,CHL,COL,ECU,GUY,PRY,PER,' + \
2519
+ 'SUR,URY,VEN,ALB,AND,ARM,AUT,AZE,BLR,BEL,BIH,BGR,HRV,CYP,CZE,DNK,EST,FIN,' + \
2520
+ 'FRA,GEO,DEU,GRC,HUN,ISL,IRL,ITA,KAZ,XKX,LVA,LIE,LTU,LUX,MLT,MDA,MCO,MNE,' + \
2521
+ 'NLD,MKD,NOR,POL,PRT,ROU,RUS,SMR,SRB,SVK,SVN,ESP,SWE,CHE,TUR,UKR,GBR,VAT,AUS'
2522
+
2523
+ rows = generate_csv_rows_for_species(species_string=taxonomy_string_short,
2524
+ allow_countries=None,
2525
+ block_countries=block_list,
2526
+ allow_states=None,
2527
+ block_states=None)
2528
+
2529
+ # import clipboard; clipboard.copy('\n'.join(rows))
2530
+ print(rows)
2531
+
2505
2532
 
2506
2533
  #%% Generate a block-except list
2507
2534
 
@@ -2521,12 +2548,14 @@ if False:
2521
2548
  taxonomy_string_short = taxonomy_info_to_taxonomy_string(taxonomy_info)
2522
2549
  assert len(taxonomy_string_short.split(';')) == 5
2523
2550
 
2524
- generate_csv_rows_for_species(species_string=taxonomy_string_short,
2525
- allow_countries=['AUS'],
2526
- block_countries=None,
2527
- allow_states=None,
2528
- block_states=None,
2529
- blockexcept_countries=None)
2551
+ rows = generate_csv_rows_for_species(species_string=taxonomy_string_short,
2552
+ allow_countries=['AUS'],
2553
+ block_countries=None,
2554
+ allow_states=None,
2555
+ block_states=None)
2556
+
2557
+ # import clipboard; clipboard.copy('\n'.join(rows))
2558
+ print(rows)
2530
2559
 
2531
2560
 
2532
2561
  #%% Test the effects of geofence changes
@@ -37,14 +37,14 @@ def plot_confusion_matrix(matrix,
37
37
  normalize (bool, optional): whether to perform row-wise normalization;
38
38
  by default, assumes values in the confusion matrix are percentages
39
39
  title (str, optional): figure title
40
- cmap (matplotlib.colors.colormap): colormap for cell backgrounds
41
- vmax (float, optional), value corresponding to the largest value of the colormap;
40
+ cmap (matplotlib.colors.colormap, optional): colormap for cell backgrounds
41
+ vmax (float, optional): value corresponding to the largest value of the colormap;
42
42
  if None, the maximum value in [matrix] will be used
43
43
  use_colorbar (bool, optional): whether to show colorbar
44
44
  y_label (bool, optional): whether to show class names on the y axis
45
- fmt (str): format string for rendering numeric values
46
- fig (Figure): existing figure to which we should render, otherwise creates
47
- a new figure
45
+ fmt (str, optional): format string for rendering numeric values
46
+ fig (Figure, optional): existing figure to which we should render, otherwise
47
+ creates a new figure
48
48
 
49
49
  Returns:
50
50
  matplotlib.figure.Figure: the figure we rendered to or created
@@ -140,8 +140,12 @@ def plot_precision_recall_curve(precisions,
140
140
  return fig
141
141
 
142
142
 
143
- def plot_stacked_bar_chart(data, series_labels=None, col_labels=None,
144
- x_label=None, y_label=None, log_scale=False):
143
+ def plot_stacked_bar_chart(data,
144
+ series_labels=None,
145
+ col_labels=None,
146
+ x_label=None,
147
+ y_label=None,
148
+ log_scale=False):
145
149
  """
146
150
  Plot a stacked bar chart, for plotting e.g. species distribution across locations.
147
151
 
@@ -154,7 +158,7 @@ def plot_stacked_bar_chart(data, series_labels=None, col_labels=None,
154
158
  col_labels (list of str, optional): column labels, typically location names
155
159
  x_label (str, optional): x-axis label
156
160
  y_label (str, optional): y-axis label
157
- log_scale (bool, optional) whether to plot the y axis in log-scale
161
+ log_scale (bool, optional): whether to plot the y axis in log-scale
158
162
 
159
163
  Returns:
160
164
  matplotlib.figure.Figure: the (new) figure
@@ -257,7 +261,7 @@ def plot_calibration_curve(true_scores, pred_scores, num_bins,
257
261
  plot_perf (bool, optional): whether to plot y=x line indicating perfect calibration
258
262
  plot_hist (bool, optional): whether to plot histogram of counts
259
263
  ax (Axes, optional): if given then no legend is drawn, and fig_kwargs are ignored
260
- fig_kwargs (dict, optional): only used if [ax] is None
264
+ fig_kwargs (dict): only used if [ax] is None
261
265
 
262
266
  Returns:
263
267
  matplotlib.figure.Figure: the (new) figure
@@ -131,7 +131,8 @@ def render_images_with_thumbnails(
131
131
  # through them, crop them, and save them to a list of cropped_images
132
132
  cropped_images = []
133
133
  for (name, box) in zip(secondary_image_filename_list,
134
- secondary_image_bounding_box_list):
134
+ secondary_image_bounding_box_list,
135
+ strict=True):
135
136
 
136
137
  other_image = vis_utils.load_image(name)
137
138
  cropped_image = crop_image_with_normalized_coordinates(