megadetector 5.0.29__py3-none-any.whl → 10.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/prepare_classification_script_mc.py +3 -3
- megadetector/data_management/annotations/annotation_constants.py +0 -1
- megadetector/data_management/camtrap_dp_to_coco.py +34 -1
- megadetector/data_management/cct_json_utils.py +2 -2
- megadetector/data_management/coco_to_yolo.py +22 -5
- megadetector/data_management/databases/add_width_and_height_to_db.py +85 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +2 -2
- megadetector/data_management/databases/integrity_check_json_db.py +29 -15
- megadetector/data_management/generate_crops_from_cct.py +50 -1
- megadetector/data_management/labelme_to_coco.py +4 -2
- megadetector/data_management/labelme_to_yolo.py +82 -2
- megadetector/data_management/lila/generate_lila_per_image_labels.py +276 -18
- megadetector/data_management/lila/get_lila_annotation_counts.py +5 -3
- megadetector/data_management/lila/lila_common.py +3 -0
- megadetector/data_management/lila/test_lila_metadata_urls.py +15 -5
- megadetector/data_management/mewc_to_md.py +5 -0
- megadetector/data_management/ocr_tools.py +4 -3
- megadetector/data_management/read_exif.py +20 -5
- megadetector/data_management/remap_coco_categories.py +66 -4
- megadetector/data_management/remove_exif.py +50 -1
- megadetector/data_management/rename_images.py +3 -3
- megadetector/data_management/resize_coco_dataset.py +563 -95
- megadetector/data_management/yolo_output_to_md_output.py +131 -2
- megadetector/data_management/yolo_to_coco.py +140 -5
- megadetector/detection/change_detection.py +4 -3
- megadetector/detection/pytorch_detector.py +60 -22
- megadetector/detection/run_detector.py +225 -25
- megadetector/detection/run_detector_batch.py +42 -16
- megadetector/detection/run_inference_with_yolov5_val.py +12 -2
- megadetector/detection/run_tiled_inference.py +1 -0
- megadetector/detection/video_utils.py +53 -24
- megadetector/postprocessing/add_max_conf.py +4 -0
- megadetector/postprocessing/categorize_detections_by_size.py +1 -1
- megadetector/postprocessing/classification_postprocessing.py +55 -20
- megadetector/postprocessing/combine_batch_outputs.py +3 -2
- megadetector/postprocessing/compare_batch_results.py +64 -10
- megadetector/postprocessing/convert_output_format.py +12 -8
- megadetector/postprocessing/create_crop_folder.py +137 -10
- megadetector/postprocessing/load_api_results.py +26 -8
- megadetector/postprocessing/md_to_coco.py +4 -4
- megadetector/postprocessing/md_to_labelme.py +18 -7
- megadetector/postprocessing/merge_detections.py +5 -0
- megadetector/postprocessing/postprocess_batch_results.py +6 -3
- megadetector/postprocessing/remap_detection_categories.py +55 -2
- megadetector/postprocessing/render_detection_confusion_matrix.py +9 -6
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +3 -4
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +40 -19
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +1 -1
- megadetector/taxonomy_mapping/species_lookup.py +123 -41
- megadetector/utils/ct_utils.py +133 -113
- megadetector/utils/md_tests.py +93 -13
- megadetector/utils/path_utils.py +137 -107
- megadetector/utils/split_locations_into_train_val.py +2 -2
- megadetector/utils/string_utils.py +7 -7
- megadetector/utils/url_utils.py +81 -58
- megadetector/utils/wi_utils.py +46 -17
- megadetector/visualization/plot_utils.py +13 -9
- megadetector/visualization/render_images_with_thumbnails.py +2 -1
- megadetector/visualization/visualization_utils.py +94 -46
- megadetector/visualization/visualize_db.py +36 -9
- megadetector/visualization/visualize_detector_output.py +4 -4
- {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/METADATA +135 -135
- megadetector-10.0.1.dist-info/RECORD +139 -0
- {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/top_level.txt +0 -0
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -438
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -109
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -513
- megadetector-5.0.29.dist-info/RECORD +0 -163
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/WHEEL +0 -0
|
@@ -64,8 +64,8 @@ def split_locations_into_train_val(location_to_category_counts,
|
|
|
64
64
|
default_max_allowable_error (float, optional): the maximum allowable error for categories not
|
|
65
65
|
present in [category_to_max_allowable_error]. Set to None (or >= 1.0) to disable hard
|
|
66
66
|
constraints for categories not present in [category_to_max_allowable_error]
|
|
67
|
-
require_complete_coverage (bool, optional): require that every category appear in both train
|
|
68
|
-
val
|
|
67
|
+
require_complete_coverage (bool, optional): require that every category appear in both train
|
|
68
|
+
and val
|
|
69
69
|
|
|
70
70
|
Returns:
|
|
71
71
|
tuple: A two-element tuple:
|
|
@@ -26,7 +26,7 @@ def is_float(s):
|
|
|
26
26
|
|
|
27
27
|
if s is None:
|
|
28
28
|
return False
|
|
29
|
-
|
|
29
|
+
|
|
30
30
|
try:
|
|
31
31
|
_ = float(s)
|
|
32
32
|
except ValueError:
|
|
@@ -53,7 +53,7 @@ def human_readable_to_bytes(size):
|
|
|
53
53
|
|
|
54
54
|
if not size: # Handle empty string case after stripping spaces
|
|
55
55
|
return 0
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
if (size[-1] == 'B'):
|
|
58
58
|
size = size[:-1]
|
|
59
59
|
|
|
@@ -70,7 +70,7 @@ def human_readable_to_bytes(size):
|
|
|
70
70
|
# Need to separate numeric part from unit more carefully.
|
|
71
71
|
numeric_part = ''
|
|
72
72
|
unit_part = ''
|
|
73
|
-
|
|
73
|
+
|
|
74
74
|
# Iterate from the end to find the unit (K, M, G, T)
|
|
75
75
|
# This handles cases like "10KB" or "2.5GB"
|
|
76
76
|
for i in range(len(size) -1, -1, -1):
|
|
@@ -79,7 +79,7 @@ def human_readable_to_bytes(size):
|
|
|
79
79
|
else:
|
|
80
80
|
numeric_part = size[:i+1]
|
|
81
81
|
break
|
|
82
|
-
|
|
82
|
+
|
|
83
83
|
# If no unit found, or numeric part is empty after stripping unit
|
|
84
84
|
if not unit_part or not numeric_part:
|
|
85
85
|
return 0
|
|
@@ -97,8 +97,8 @@ def human_readable_to_bytes(size):
|
|
|
97
97
|
bytes_val *= 1024
|
|
98
98
|
else:
|
|
99
99
|
# If it's a known unit (like 'B' already stripped) but not T/G/M/K,
|
|
100
|
-
# and it was floatable, it's just bytes. If it's an unknown unit, it's
|
|
101
|
-
|
|
100
|
+
# and it was floatable, it's just bytes. If it's an unknown unit, it's
|
|
101
|
+
# an error.
|
|
102
102
|
if unit not in ['B', '']: # 'B' was stripped, '' means just a number
|
|
103
103
|
bytes_val = 0
|
|
104
104
|
except ValueError:
|
|
@@ -165,7 +165,7 @@ class TestStringUtils:
|
|
|
165
165
|
assert human_readable_to_bytes("1GB") == 1024*1024*1024
|
|
166
166
|
assert human_readable_to_bytes("1T") == 1024*1024*1024*1024
|
|
167
167
|
assert human_readable_to_bytes("1TB") == 1024*1024*1024*1024
|
|
168
|
-
|
|
168
|
+
|
|
169
169
|
assert human_readable_to_bytes("2.5K") == 2.5 * 1024
|
|
170
170
|
assert human_readable_to_bytes("0.5MB") == 0.5 * 1024 * 1024
|
|
171
171
|
|
megadetector/utils/url_utils.py
CHANGED
|
@@ -13,9 +13,8 @@ import re
|
|
|
13
13
|
import urllib
|
|
14
14
|
import urllib.request
|
|
15
15
|
import urllib.error
|
|
16
|
-
import
|
|
17
|
-
import
|
|
18
|
-
import shutil
|
|
16
|
+
import requests
|
|
17
|
+
import shutil
|
|
19
18
|
import pytest
|
|
20
19
|
|
|
21
20
|
from functools import partial
|
|
@@ -53,7 +52,7 @@ class DownloadProgressBar:
|
|
|
53
52
|
self.pbar = progressbar.ProgressBar(max_value=total_size)
|
|
54
53
|
self.pbar.start()
|
|
55
54
|
except ImportError:
|
|
56
|
-
self.pbar = None
|
|
55
|
+
self.pbar = None
|
|
57
56
|
# print("ProgressBar not available, install 'progressbar2' for visual progress.")
|
|
58
57
|
|
|
59
58
|
if self.pbar:
|
|
@@ -108,9 +107,9 @@ def download_url(url,
|
|
|
108
107
|
|
|
109
108
|
# This does not guarantee uniqueness, hence "semi-best-effort"
|
|
110
109
|
url_as_filename = re.sub(r'\W+', '', url_without_sas)
|
|
111
|
-
|
|
110
|
+
|
|
112
111
|
n_folder_chars = len(target_folder)
|
|
113
|
-
|
|
112
|
+
|
|
114
113
|
if (len(url_as_filename) + n_folder_chars) >= max_path_len:
|
|
115
114
|
print('Warning: truncating filename target to {} characters'.format(max_path_len))
|
|
116
115
|
max_fn_len = max_path_len - (n_folder_chars + 1)
|
|
@@ -202,15 +201,18 @@ def _do_parallelized_download(download_info,overwrite=False,verbose=False):
|
|
|
202
201
|
# ...def _do_parallelized_download(...)
|
|
203
202
|
|
|
204
203
|
|
|
205
|
-
def parallel_download_urls(url_to_target_file,
|
|
206
|
-
|
|
204
|
+
def parallel_download_urls(url_to_target_file,
|
|
205
|
+
verbose=False,
|
|
206
|
+
overwrite=False,
|
|
207
|
+
n_workers=20,
|
|
208
|
+
pool_type='thread'):
|
|
207
209
|
"""
|
|
208
210
|
Downloads a list of URLs to local files.
|
|
209
211
|
|
|
210
212
|
Catches exceptions and reports them in the returned "results" array.
|
|
211
213
|
|
|
212
214
|
Args:
|
|
213
|
-
url_to_target_file: a dict mapping URLs to local filenames.
|
|
215
|
+
url_to_target_file (dict): a dict mapping URLs to local filenames.
|
|
214
216
|
verbose (bool, optional): enable additional debug console output
|
|
215
217
|
overwrite (bool, optional): whether to overwrite existing local files
|
|
216
218
|
n_workers (int, optional): number of concurrent workers, set to <=1 to disable
|
|
@@ -229,7 +231,7 @@ def parallel_download_urls(url_to_target_file, verbose=False, overwrite=False,
|
|
|
229
231
|
|
|
230
232
|
if verbose:
|
|
231
233
|
print('Preparing download list')
|
|
232
|
-
for url in tqdm(url_to_target_file, disable=(not verbose)):
|
|
234
|
+
for url in tqdm(url_to_target_file, disable=(not verbose)):
|
|
233
235
|
download_info = {}
|
|
234
236
|
download_info['url'] = url
|
|
235
237
|
download_info['target_file'] = url_to_target_file[url]
|
|
@@ -341,7 +343,7 @@ def test_urls(urls,error_on_failure=True,n_workers=1,pool_type='thread',timeout=
|
|
|
341
343
|
else:
|
|
342
344
|
assert pool_type == 'process', 'Unsupported pool type {}'.format(pool_type)
|
|
343
345
|
pool = Pool(n_workers)
|
|
344
|
-
|
|
346
|
+
|
|
345
347
|
if verbose:
|
|
346
348
|
print('Starting a {} pool with {} workers'.format(pool_type,n_workers))
|
|
347
349
|
|
|
@@ -439,7 +441,7 @@ def get_url_sizes(urls,n_workers=1,pool_type='thread',timeout=None,verbose=False
|
|
|
439
441
|
file_sizes = list(tqdm(pool.imap(
|
|
440
442
|
partial(get_url_size,verbose=verbose,timeout=timeout),
|
|
441
443
|
urls), total=len(urls), disable=(not verbose)))
|
|
442
|
-
|
|
444
|
+
|
|
443
445
|
for i_url,url in enumerate(urls):
|
|
444
446
|
url_to_size[url] = file_sizes[i_url]
|
|
445
447
|
finally:
|
|
@@ -456,8 +458,8 @@ def get_url_sizes(urls,n_workers=1,pool_type='thread',timeout=None,verbose=False
|
|
|
456
458
|
# Constants for tests
|
|
457
459
|
|
|
458
460
|
SMALL_FILE_URL = "https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png"
|
|
459
|
-
REDIRECT_SRC_URL = "http://google.com"
|
|
460
|
-
REDIRECT_DEST_URL = "https://www.google.com/"
|
|
461
|
+
REDIRECT_SRC_URL = "http://google.com"
|
|
462
|
+
REDIRECT_DEST_URL = "https://www.google.com/"
|
|
461
463
|
NON_EXISTENT_URL = "https://example.com/non_existent_page_404.html"
|
|
462
464
|
DEFINITELY_NON_EXISTENT_DOMAIN_URL = "https://thisshouldnotexist1234567890.com/file.txt"
|
|
463
465
|
RELATIVE_DOWNLOAD_URL = "https://raw.githubusercontent.com/agentmorris/MegaDetector/main/README.md"
|
|
@@ -470,13 +472,12 @@ class TestUrlUtils:
|
|
|
470
472
|
Tests for url_utils.py
|
|
471
473
|
"""
|
|
472
474
|
|
|
473
|
-
|
|
474
475
|
def set_up(self):
|
|
475
476
|
"""
|
|
476
477
|
Create a temporary directory for testing.
|
|
477
478
|
"""
|
|
478
479
|
|
|
479
|
-
self.test_dir = make_test_folder(subfolder='url_utils_tests')
|
|
480
|
+
self.test_dir = make_test_folder(subfolder='url_utils_tests')
|
|
480
481
|
self.download_target_dir = os.path.join(self.test_dir, 'downloads')
|
|
481
482
|
os.makedirs(self.download_target_dir, exist_ok=True)
|
|
482
483
|
|
|
@@ -488,7 +489,7 @@ class TestUrlUtils:
|
|
|
488
489
|
|
|
489
490
|
if os.path.exists(self.test_dir):
|
|
490
491
|
shutil.rmtree(self.test_dir)
|
|
491
|
-
|
|
492
|
+
|
|
492
493
|
|
|
493
494
|
def test_download_url_to_specified_file(self):
|
|
494
495
|
"""
|
|
@@ -496,7 +497,9 @@ class TestUrlUtils:
|
|
|
496
497
|
"""
|
|
497
498
|
|
|
498
499
|
dest_filename = os.path.join(self.download_target_dir, "downloaded_google_logo.png")
|
|
499
|
-
returned_filename = download_url(SMALL_FILE_URL,
|
|
500
|
+
returned_filename = download_url(SMALL_FILE_URL,
|
|
501
|
+
destination_filename=dest_filename,
|
|
502
|
+
verbose=False)
|
|
500
503
|
assert returned_filename == dest_filename
|
|
501
504
|
assert os.path.exists(dest_filename)
|
|
502
505
|
assert os.path.getsize(dest_filename) > 1000
|
|
@@ -507,10 +510,12 @@ class TestUrlUtils:
|
|
|
507
510
|
Test download_url when destination_filename is None.
|
|
508
511
|
"""
|
|
509
512
|
|
|
510
|
-
returned_filename = download_url(SMALL_FILE_URL,
|
|
513
|
+
returned_filename = download_url(SMALL_FILE_URL,
|
|
514
|
+
destination_filename=None,
|
|
515
|
+
verbose=False)
|
|
511
516
|
assert os.path.exists(returned_filename)
|
|
512
517
|
assert os.path.getsize(returned_filename) > 1000
|
|
513
|
-
|
|
518
|
+
|
|
514
519
|
|
|
515
520
|
def test_download_url_non_existent(self):
|
|
516
521
|
"""
|
|
@@ -520,17 +525,19 @@ class TestUrlUtils:
|
|
|
520
525
|
dest_filename = os.path.join(self.download_target_dir, "non_existent.html")
|
|
521
526
|
try:
|
|
522
527
|
download_url(NON_EXISTENT_URL, destination_filename=dest_filename, verbose=False)
|
|
523
|
-
|
|
528
|
+
raise AssertionError("urllib.error.HTTPError not raised for 404")
|
|
524
529
|
except urllib.error.HTTPError:
|
|
525
530
|
pass
|
|
526
|
-
|
|
531
|
+
|
|
527
532
|
try:
|
|
528
|
-
download_url(DEFINITELY_NON_EXISTENT_DOMAIN_URL,
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
533
|
+
download_url(DEFINITELY_NON_EXISTENT_DOMAIN_URL,
|
|
534
|
+
destination_filename=dest_filename,
|
|
535
|
+
verbose=False)
|
|
536
|
+
raise AssertionError(
|
|
537
|
+
"urllib.error.URLError or requests.exceptions.ConnectionError not raised for DNS failure")
|
|
538
|
+
except urllib.error.URLError:
|
|
532
539
|
pass
|
|
533
|
-
except requests.exceptions.ConnectionError:
|
|
540
|
+
except requests.exceptions.ConnectionError:
|
|
534
541
|
pass
|
|
535
542
|
|
|
536
543
|
|
|
@@ -540,15 +547,18 @@ class TestUrlUtils:
|
|
|
540
547
|
"""
|
|
541
548
|
|
|
542
549
|
dest_filename = os.path.join(self.download_target_dir, "force_test.png")
|
|
543
|
-
|
|
550
|
+
|
|
544
551
|
download_url(SMALL_FILE_URL, destination_filename=dest_filename, verbose=False)
|
|
545
552
|
assert os.path.exists(dest_filename)
|
|
546
553
|
initial_mtime = os.path.getmtime(dest_filename)
|
|
547
554
|
|
|
548
|
-
download_url(SMALL_FILE_URL, destination_filename=dest_filename, verbose=True)
|
|
555
|
+
download_url(SMALL_FILE_URL, destination_filename=dest_filename, verbose=True)
|
|
549
556
|
assert os.path.getmtime(dest_filename) == initial_mtime
|
|
550
557
|
|
|
551
|
-
download_url(SMALL_FILE_URL,
|
|
558
|
+
download_url(SMALL_FILE_URL,
|
|
559
|
+
destination_filename=dest_filename,
|
|
560
|
+
force_download=True,
|
|
561
|
+
verbose=False)
|
|
552
562
|
assert os.path.exists(dest_filename)
|
|
553
563
|
|
|
554
564
|
|
|
@@ -558,7 +568,10 @@ class TestUrlUtils:
|
|
|
558
568
|
"""
|
|
559
569
|
|
|
560
570
|
dest_filename = os.path.join(self.download_target_dir, "escape_test.png")
|
|
561
|
-
download_url(SMALL_FILE_URL,
|
|
571
|
+
download_url(SMALL_FILE_URL,
|
|
572
|
+
destination_filename=dest_filename,
|
|
573
|
+
escape_spaces=True,
|
|
574
|
+
verbose=False)
|
|
562
575
|
assert os.path.exists(dest_filename)
|
|
563
576
|
|
|
564
577
|
|
|
@@ -567,7 +580,7 @@ class TestUrlUtils:
|
|
|
567
580
|
Test download_relative_filename.
|
|
568
581
|
"""
|
|
569
582
|
|
|
570
|
-
output_base = os.path.join(self.download_target_dir, "relative_dl")
|
|
583
|
+
output_base = os.path.join(self.download_target_dir, "relative_dl")
|
|
571
584
|
returned_filename = download_relative_filename(RELATIVE_DOWNLOAD_URL, output_base, verbose=False)
|
|
572
585
|
assert RELATIVE_DOWNLOAD_CONTAIN_TOKEN in returned_filename
|
|
573
586
|
assert RELATIVE_DOWNLOAD_NOT_CONTAIN_TOKEN not in returned_filename
|
|
@@ -582,32 +595,38 @@ class TestUrlUtils:
|
|
|
582
595
|
|
|
583
596
|
url1_target = os.path.join(self.download_target_dir, "parallel_dl_1.png")
|
|
584
597
|
url2_target = os.path.join(self.download_target_dir, "parallel_dl_2_nonexistent.html")
|
|
585
|
-
|
|
598
|
+
|
|
586
599
|
url_to_target_file = {
|
|
587
600
|
SMALL_FILE_URL: url1_target,
|
|
588
601
|
NON_EXISTENT_URL: url2_target
|
|
589
602
|
}
|
|
590
|
-
|
|
603
|
+
|
|
591
604
|
results = parallel_download_urls(url_to_target_file, n_workers=1, verbose=False)
|
|
592
|
-
|
|
605
|
+
|
|
593
606
|
assert len(results) == 2
|
|
594
|
-
|
|
607
|
+
|
|
595
608
|
status_map = {res['url']: res for res in results}
|
|
596
|
-
|
|
609
|
+
|
|
597
610
|
assert status_map[SMALL_FILE_URL]['status'] == 'success'
|
|
598
611
|
assert status_map[SMALL_FILE_URL]['target_file'] == url1_target
|
|
599
612
|
assert os.path.exists(url1_target)
|
|
600
|
-
|
|
613
|
+
|
|
601
614
|
assert status_map[NON_EXISTENT_URL]['status'].startswith('error: HTTP Error 404')
|
|
602
615
|
assert status_map[NON_EXISTENT_URL]['target_file'] == url2_target
|
|
603
616
|
assert not os.path.exists(url2_target)
|
|
604
617
|
|
|
605
618
|
if not os.path.exists(url1_target):
|
|
606
619
|
download_url(SMALL_FILE_URL, url1_target, verbose=False)
|
|
607
|
-
results_skip = parallel_download_urls({SMALL_FILE_URL: url1_target},
|
|
620
|
+
results_skip = parallel_download_urls({SMALL_FILE_URL: url1_target},
|
|
621
|
+
n_workers=1,
|
|
622
|
+
overwrite=False,
|
|
623
|
+
verbose=True)
|
|
608
624
|
assert results_skip[0]['status'] == 'skipped'
|
|
609
625
|
|
|
610
|
-
results_overwrite = parallel_download_urls({SMALL_FILE_URL: url1_target},
|
|
626
|
+
results_overwrite = parallel_download_urls({SMALL_FILE_URL: url1_target},
|
|
627
|
+
n_workers=1,
|
|
628
|
+
overwrite=True,
|
|
629
|
+
verbose=False)
|
|
611
630
|
assert results_overwrite[0]['status'] == 'success'
|
|
612
631
|
|
|
613
632
|
|
|
@@ -620,20 +639,22 @@ class TestUrlUtils:
|
|
|
620
639
|
assert test_url(REDIRECT_SRC_URL, error_on_failure=False, timeout=10) in (200,301)
|
|
621
640
|
|
|
622
641
|
status_non_existent = test_url(NON_EXISTENT_URL, error_on_failure=False, timeout=5)
|
|
623
|
-
assert status_non_existent == 404
|
|
624
|
-
|
|
642
|
+
assert status_non_existent == 404
|
|
643
|
+
|
|
625
644
|
try:
|
|
626
645
|
test_url(NON_EXISTENT_URL, error_on_failure=True, timeout=5)
|
|
627
|
-
|
|
646
|
+
raise AssertionError("ValueError not raised for NON_EXISTENT_URL")
|
|
628
647
|
except ValueError:
|
|
629
648
|
pass
|
|
630
649
|
|
|
631
650
|
try:
|
|
632
|
-
test_url(DEFINITELY_NON_EXISTENT_DOMAIN_URL,
|
|
633
|
-
|
|
634
|
-
|
|
651
|
+
test_url(DEFINITELY_NON_EXISTENT_DOMAIN_URL,
|
|
652
|
+
error_on_failure=True,
|
|
653
|
+
timeout=5)
|
|
654
|
+
raise AssertionError("requests.exceptions.ConnectionError or urllib.error.URLError not raised")
|
|
655
|
+
except requests.exceptions.ConnectionError:
|
|
635
656
|
pass
|
|
636
|
-
except urllib.error.URLError:
|
|
657
|
+
except urllib.error.URLError:
|
|
637
658
|
pass
|
|
638
659
|
|
|
639
660
|
|
|
@@ -645,10 +666,10 @@ class TestUrlUtils:
|
|
|
645
666
|
|
|
646
667
|
try:
|
|
647
668
|
test_urls(urls_to_test, error_on_failure=True, n_workers=1, timeout=5)
|
|
648
|
-
|
|
669
|
+
raise AssertionError("ValueError not raised for urls_to_test")
|
|
649
670
|
except ValueError:
|
|
650
671
|
pass
|
|
651
|
-
|
|
672
|
+
|
|
652
673
|
good_urls = [SMALL_FILE_URL, REDIRECT_SRC_URL]
|
|
653
674
|
good_status_codes = test_urls(good_urls, error_on_failure=True, n_workers=1, timeout=10)
|
|
654
675
|
assert good_status_codes == [200, 200]
|
|
@@ -661,34 +682,36 @@ class TestUrlUtils:
|
|
|
661
682
|
|
|
662
683
|
size = get_url_size(SMALL_FILE_URL, timeout=10)
|
|
663
684
|
assert size is not None
|
|
664
|
-
assert size > 1000
|
|
685
|
+
assert size > 1000
|
|
665
686
|
|
|
666
687
|
size_dynamic = get_url_size(REDIRECT_DEST_URL, timeout=10, verbose=True)
|
|
667
688
|
if size_dynamic is not None:
|
|
668
689
|
assert isinstance(size_dynamic, int)
|
|
669
|
-
|
|
690
|
+
|
|
670
691
|
size_non_existent = get_url_size(NON_EXISTENT_URL, timeout=5)
|
|
671
692
|
assert size_non_existent is None
|
|
672
|
-
|
|
693
|
+
|
|
673
694
|
size_bad_domain = get_url_size(DEFINITELY_NON_EXISTENT_DOMAIN_URL, timeout=5)
|
|
674
695
|
assert size_bad_domain is None
|
|
675
696
|
|
|
676
697
|
urls_for_size = [SMALL_FILE_URL, NON_EXISTENT_URL, REDIRECT_DEST_URL]
|
|
677
698
|
sizes_map = get_url_sizes(urls_for_size, n_workers=1, timeout=10)
|
|
678
|
-
|
|
699
|
+
|
|
679
700
|
assert SMALL_FILE_URL in sizes_map
|
|
680
|
-
assert sizes_map[SMALL_FILE_URL] == size
|
|
681
|
-
|
|
701
|
+
assert sizes_map[SMALL_FILE_URL] == size
|
|
702
|
+
|
|
682
703
|
assert NON_EXISTENT_URL in sizes_map
|
|
683
704
|
assert sizes_map[NON_EXISTENT_URL] is None
|
|
684
|
-
|
|
705
|
+
|
|
685
706
|
assert REDIRECT_DEST_URL in sizes_map
|
|
686
707
|
assert sizes_map[REDIRECT_DEST_URL] == size_dynamic
|
|
687
708
|
|
|
688
709
|
|
|
689
|
-
def
|
|
710
|
+
def _test_url_utils():
|
|
690
711
|
"""
|
|
691
|
-
Runs all tests in the TestUrlUtils class.
|
|
712
|
+
Runs all tests in the TestUrlUtils class. I generally disable this during testing
|
|
713
|
+
because it creates irritating nondeterminism, and this is neither a core module nor
|
|
714
|
+
a module that changes often.
|
|
692
715
|
"""
|
|
693
716
|
|
|
694
717
|
test_instance = TestUrlUtils()
|
megadetector/utils/wi_utils.py
CHANGED
|
@@ -465,8 +465,9 @@ def write_download_commands(image_records_to_download,
|
|
|
465
465
|
force_download (bool, optional): include gs commands even if the target file exists
|
|
466
466
|
n_download_workers (int, optional): number of scripts to write (that's our hacky way
|
|
467
467
|
of controlling parallelization)
|
|
468
|
-
|
|
469
|
-
to "download_wi_images.sh" in the destination folder
|
|
468
|
+
download_command_file_base (str, optional): path of the .sh script we should write, defaults
|
|
469
|
+
to "download_wi_images.sh" in the destination folder. Individual worker scripts will
|
|
470
|
+
have a number added, e.g. download_wi_images_00.sh.
|
|
470
471
|
"""
|
|
471
472
|
|
|
472
473
|
if isinstance(image_records_to_download,dict):
|
|
@@ -1069,7 +1070,7 @@ def generate_whole_image_detections_for_classifications(classifications_json_fil
|
|
|
1069
1070
|
ensemble_json_file=None,
|
|
1070
1071
|
ignore_blank_classifications=True):
|
|
1071
1072
|
"""
|
|
1072
|
-
Given a set of classification results in SpeciesNet format that were likely run on
|
|
1073
|
+
Given a set of classification results in SpeciesNet format that were likely run on
|
|
1073
1074
|
already-cropped images, generate a file of [fake] detections in SpeciesNet format in which each
|
|
1074
1075
|
image is covered in a single whole-image detection.
|
|
1075
1076
|
|
|
@@ -1485,6 +1486,8 @@ def generate_instances_json_from_folder(folder,
|
|
|
1485
1486
|
Args:
|
|
1486
1487
|
folder (str): the folder to recursively search for images
|
|
1487
1488
|
country (str, optional): a three-letter country code
|
|
1489
|
+
admin1_region (str, optional): an administrative region code, typically a two-letter
|
|
1490
|
+
US state code
|
|
1488
1491
|
lat (float, optional): latitude to associate with all images
|
|
1489
1492
|
lon (float, optional): longitude to associate with all images
|
|
1490
1493
|
output_file (str, optional): .json file to which we should write instance records
|
|
@@ -1590,8 +1593,8 @@ def merge_prediction_json_files(input_prediction_files,output_prediction_file):
|
|
|
1590
1593
|
Merge all predictions.json files in [files] into a single .json file.
|
|
1591
1594
|
|
|
1592
1595
|
Args:
|
|
1593
|
-
|
|
1594
|
-
|
|
1596
|
+
input_prediction_files (list): list of predictions.json files to merge
|
|
1597
|
+
output_prediction_file (str): output .json file
|
|
1595
1598
|
"""
|
|
1596
1599
|
|
|
1597
1600
|
predictions = []
|
|
@@ -2074,8 +2077,7 @@ def generate_csv_rows_for_species(species_string,
|
|
|
2074
2077
|
allow_countries=None,
|
|
2075
2078
|
block_countries=None,
|
|
2076
2079
|
allow_states=None,
|
|
2077
|
-
block_states=None
|
|
2078
|
-
blockexcept_countries=None):
|
|
2080
|
+
block_states=None):
|
|
2079
2081
|
"""
|
|
2080
2082
|
Generate rows in the format expected by geofence_fixes.csv, representing a list of
|
|
2081
2083
|
allow and/or block rules for the specified species and countries/states. Does not check
|
|
@@ -2084,13 +2086,13 @@ def generate_csv_rows_for_species(species_string,
|
|
|
2084
2086
|
|
|
2085
2087
|
Args:
|
|
2086
2088
|
species_string (str): five-token string in semicolon-delimited WI taxonomy format
|
|
2087
|
-
allow_countries (
|
|
2089
|
+
allow_countries (list or str, optional): three-letter country codes, list of
|
|
2088
2090
|
country codes, or comma-separated list of country codes to allow
|
|
2089
|
-
block_countries (
|
|
2091
|
+
block_countries (list or str, optional): three-letter country codes, list of
|
|
2090
2092
|
country codes, or comma-separated list of country codes to block
|
|
2091
|
-
allow_states (
|
|
2093
|
+
allow_states (list or str, optional): two-letter state codes, list of
|
|
2092
2094
|
state codes, or comma-separated list of state codes to allow
|
|
2093
|
-
block_states (
|
|
2095
|
+
block_states (list or str, optional): two-letter state code, list of
|
|
2094
2096
|
state codes, or comma-separated list of state codes to block
|
|
2095
2097
|
|
|
2096
2098
|
Returns:
|
|
@@ -2502,6 +2504,31 @@ if False:
|
|
|
2502
2504
|
initialize_geofencing(geofencing_file, country_code_file, force_init=True)
|
|
2503
2505
|
initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
|
|
2504
2506
|
|
|
2507
|
+
from megadetector.utils.path_utils import open_file; open_file(geofencing_file)
|
|
2508
|
+
|
|
2509
|
+
|
|
2510
|
+
#%% Generate a block list
|
|
2511
|
+
|
|
2512
|
+
taxon_name = 'cercopithecidae'
|
|
2513
|
+
taxonomy_info = binomial_name_to_taxonomy_info[taxon_name]
|
|
2514
|
+
taxonomy_string_short = taxonomy_info_to_taxonomy_string(taxonomy_info)
|
|
2515
|
+
assert len(taxonomy_string_short.split(';')) == 5
|
|
2516
|
+
|
|
2517
|
+
block_list = 'ATG,BHS,BRB,BLZ,CAN,CRI,CUB,DMA,DOM,SLV,GRD,GTM,HTI,HND,JAM,' + \
|
|
2518
|
+
'MEX,NIC,PAN,KNA,LCA,VCT,TTO,USA,ARG,BOL,BRA,CHL,COL,ECU,GUY,PRY,PER,' + \
|
|
2519
|
+
'SUR,URY,VEN,ALB,AND,ARM,AUT,AZE,BLR,BEL,BIH,BGR,HRV,CYP,CZE,DNK,EST,FIN,' + \
|
|
2520
|
+
'FRA,GEO,DEU,GRC,HUN,ISL,IRL,ITA,KAZ,XKX,LVA,LIE,LTU,LUX,MLT,MDA,MCO,MNE,' + \
|
|
2521
|
+
'NLD,MKD,NOR,POL,PRT,ROU,RUS,SMR,SRB,SVK,SVN,ESP,SWE,CHE,TUR,UKR,GBR,VAT,AUS'
|
|
2522
|
+
|
|
2523
|
+
rows = generate_csv_rows_for_species(species_string=taxonomy_string_short,
|
|
2524
|
+
allow_countries=None,
|
|
2525
|
+
block_countries=block_list,
|
|
2526
|
+
allow_states=None,
|
|
2527
|
+
block_states=None)
|
|
2528
|
+
|
|
2529
|
+
# import clipboard; clipboard.copy('\n'.join(rows))
|
|
2530
|
+
print(rows)
|
|
2531
|
+
|
|
2505
2532
|
|
|
2506
2533
|
#%% Generate a block-except list
|
|
2507
2534
|
|
|
@@ -2521,12 +2548,14 @@ if False:
|
|
|
2521
2548
|
taxonomy_string_short = taxonomy_info_to_taxonomy_string(taxonomy_info)
|
|
2522
2549
|
assert len(taxonomy_string_short.split(';')) == 5
|
|
2523
2550
|
|
|
2524
|
-
generate_csv_rows_for_species(species_string=taxonomy_string_short,
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2551
|
+
rows = generate_csv_rows_for_species(species_string=taxonomy_string_short,
|
|
2552
|
+
allow_countries=['AUS'],
|
|
2553
|
+
block_countries=None,
|
|
2554
|
+
allow_states=None,
|
|
2555
|
+
block_states=None)
|
|
2556
|
+
|
|
2557
|
+
# import clipboard; clipboard.copy('\n'.join(rows))
|
|
2558
|
+
print(rows)
|
|
2530
2559
|
|
|
2531
2560
|
|
|
2532
2561
|
#%% Test the effects of geofence changes
|
|
@@ -37,14 +37,14 @@ def plot_confusion_matrix(matrix,
|
|
|
37
37
|
normalize (bool, optional): whether to perform row-wise normalization;
|
|
38
38
|
by default, assumes values in the confusion matrix are percentages
|
|
39
39
|
title (str, optional): figure title
|
|
40
|
-
cmap (matplotlib.colors.colormap): colormap for cell backgrounds
|
|
41
|
-
vmax (float, optional)
|
|
40
|
+
cmap (matplotlib.colors.colormap, optional): colormap for cell backgrounds
|
|
41
|
+
vmax (float, optional): value corresponding to the largest value of the colormap;
|
|
42
42
|
if None, the maximum value in [matrix] will be used
|
|
43
43
|
use_colorbar (bool, optional): whether to show colorbar
|
|
44
44
|
y_label (bool, optional): whether to show class names on the y axis
|
|
45
|
-
fmt (str): format string for rendering numeric values
|
|
46
|
-
fig (Figure): existing figure to which we should render, otherwise
|
|
47
|
-
a new figure
|
|
45
|
+
fmt (str, optional): format string for rendering numeric values
|
|
46
|
+
fig (Figure, optional): existing figure to which we should render, otherwise
|
|
47
|
+
creates a new figure
|
|
48
48
|
|
|
49
49
|
Returns:
|
|
50
50
|
matplotlib.figure.Figure: the figure we rendered to or created
|
|
@@ -140,8 +140,12 @@ def plot_precision_recall_curve(precisions,
|
|
|
140
140
|
return fig
|
|
141
141
|
|
|
142
142
|
|
|
143
|
-
def plot_stacked_bar_chart(data,
|
|
144
|
-
|
|
143
|
+
def plot_stacked_bar_chart(data,
|
|
144
|
+
series_labels=None,
|
|
145
|
+
col_labels=None,
|
|
146
|
+
x_label=None,
|
|
147
|
+
y_label=None,
|
|
148
|
+
log_scale=False):
|
|
145
149
|
"""
|
|
146
150
|
Plot a stacked bar chart, for plotting e.g. species distribution across locations.
|
|
147
151
|
|
|
@@ -154,7 +158,7 @@ def plot_stacked_bar_chart(data, series_labels=None, col_labels=None,
|
|
|
154
158
|
col_labels (list of str, optional): column labels, typically location names
|
|
155
159
|
x_label (str, optional): x-axis label
|
|
156
160
|
y_label (str, optional): y-axis label
|
|
157
|
-
log_scale (bool, optional) whether to plot the y axis in log-scale
|
|
161
|
+
log_scale (bool, optional): whether to plot the y axis in log-scale
|
|
158
162
|
|
|
159
163
|
Returns:
|
|
160
164
|
matplotlib.figure.Figure: the (new) figure
|
|
@@ -257,7 +261,7 @@ def plot_calibration_curve(true_scores, pred_scores, num_bins,
|
|
|
257
261
|
plot_perf (bool, optional): whether to plot y=x line indicating perfect calibration
|
|
258
262
|
plot_hist (bool, optional): whether to plot histogram of counts
|
|
259
263
|
ax (Axes, optional): if given then no legend is drawn, and fig_kwargs are ignored
|
|
260
|
-
fig_kwargs (dict
|
|
264
|
+
fig_kwargs (dict): only used if [ax] is None
|
|
261
265
|
|
|
262
266
|
Returns:
|
|
263
267
|
matplotlib.figure.Figure: the (new) figure
|
|
@@ -131,7 +131,8 @@ def render_images_with_thumbnails(
|
|
|
131
131
|
# through them, crop them, and save them to a list of cropped_images
|
|
132
132
|
cropped_images = []
|
|
133
133
|
for (name, box) in zip(secondary_image_filename_list,
|
|
134
|
-
secondary_image_bounding_box_list
|
|
134
|
+
secondary_image_bounding_box_list,
|
|
135
|
+
strict=True):
|
|
135
136
|
|
|
136
137
|
other_image = vis_utils.load_image(name)
|
|
137
138
|
cropped_image = crop_image_with_normalized_coordinates(
|