megadetector 10.0.9__py3-none-any.whl → 10.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/animl_to_md.py +5 -2
- megadetector/data_management/cct_json_utils.py +4 -2
- megadetector/data_management/cct_to_md.py +5 -4
- megadetector/data_management/cct_to_wi.py +5 -1
- megadetector/data_management/coco_to_yolo.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +4 -4
- megadetector/data_management/databases/integrity_check_json_db.py +2 -2
- megadetector/data_management/databases/subset_json_db.py +0 -3
- megadetector/data_management/generate_crops_from_cct.py +6 -4
- megadetector/data_management/get_image_sizes.py +5 -35
- megadetector/data_management/labelme_to_coco.py +10 -6
- megadetector/data_management/labelme_to_yolo.py +19 -28
- megadetector/data_management/lila/create_lila_test_set.py +22 -2
- megadetector/data_management/lila/generate_lila_per_image_labels.py +7 -5
- megadetector/data_management/lila/lila_common.py +2 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +0 -1
- megadetector/data_management/ocr_tools.py +6 -10
- megadetector/data_management/read_exif.py +69 -13
- megadetector/data_management/remap_coco_categories.py +1 -1
- megadetector/data_management/remove_exif.py +10 -5
- megadetector/data_management/rename_images.py +20 -13
- megadetector/data_management/resize_coco_dataset.py +10 -4
- megadetector/data_management/speciesnet_to_md.py +3 -3
- megadetector/data_management/yolo_output_to_md_output.py +3 -1
- megadetector/data_management/yolo_to_coco.py +28 -19
- megadetector/detection/change_detection.py +26 -18
- megadetector/detection/process_video.py +1 -1
- megadetector/detection/pytorch_detector.py +5 -5
- megadetector/detection/run_detector.py +34 -10
- megadetector/detection/run_detector_batch.py +60 -42
- megadetector/detection/run_inference_with_yolov5_val.py +3 -1
- megadetector/detection/run_md_and_speciesnet.py +282 -110
- megadetector/detection/run_tiled_inference.py +7 -7
- megadetector/detection/tf_detector.py +4 -6
- megadetector/detection/video_utils.py +9 -6
- megadetector/postprocessing/add_max_conf.py +4 -4
- megadetector/postprocessing/categorize_detections_by_size.py +3 -2
- megadetector/postprocessing/classification_postprocessing.py +19 -21
- megadetector/postprocessing/combine_batch_outputs.py +3 -2
- megadetector/postprocessing/compare_batch_results.py +49 -27
- megadetector/postprocessing/convert_output_format.py +8 -6
- megadetector/postprocessing/create_crop_folder.py +13 -4
- megadetector/postprocessing/generate_csv_report.py +22 -8
- megadetector/postprocessing/load_api_results.py +8 -4
- megadetector/postprocessing/md_to_coco.py +2 -3
- megadetector/postprocessing/md_to_labelme.py +12 -8
- megadetector/postprocessing/md_to_wi.py +2 -1
- megadetector/postprocessing/merge_detections.py +4 -6
- megadetector/postprocessing/postprocess_batch_results.py +4 -3
- megadetector/postprocessing/remap_detection_categories.py +6 -3
- megadetector/postprocessing/render_detection_confusion_matrix.py +18 -10
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +5 -3
- megadetector/postprocessing/separate_detections_into_folders.py +10 -4
- megadetector/postprocessing/subset_json_detector_output.py +1 -1
- megadetector/postprocessing/top_folders_to_bottom.py +22 -7
- megadetector/postprocessing/validate_batch_results.py +1 -1
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +59 -3
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +26 -17
- megadetector/taxonomy_mapping/species_lookup.py +51 -2
- megadetector/utils/ct_utils.py +9 -4
- megadetector/utils/directory_listing.py +3 -0
- megadetector/utils/extract_frames_from_video.py +4 -0
- megadetector/utils/gpu_test.py +6 -6
- megadetector/utils/md_tests.py +21 -21
- megadetector/utils/path_utils.py +171 -36
- megadetector/utils/split_locations_into_train_val.py +0 -4
- megadetector/utils/string_utils.py +21 -0
- megadetector/utils/url_utils.py +5 -3
- megadetector/utils/wi_platform_utils.py +168 -24
- megadetector/utils/wi_taxonomy_utils.py +38 -8
- megadetector/utils/write_html_image_list.py +1 -2
- megadetector/visualization/plot_utils.py +31 -19
- megadetector/visualization/render_images_with_thumbnails.py +3 -0
- megadetector/visualization/visualization_utils.py +18 -7
- megadetector/visualization/visualize_db.py +9 -26
- megadetector/visualization/visualize_detector_output.py +1 -0
- megadetector/visualization/visualize_video_output.py +14 -2
- {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/METADATA +1 -1
- {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/RECORD +84 -84
- {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/WHEEL +0 -0
- {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/licenses/LICENSE +0 -0
- {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/top_level.txt +0 -0
megadetector/utils/ct_utils.py
CHANGED
|
@@ -241,7 +241,10 @@ def write_json(path,
|
|
|
241
241
|
elif force_str:
|
|
242
242
|
default_handler = str
|
|
243
243
|
|
|
244
|
-
|
|
244
|
+
# Create the parent directory if necessary
|
|
245
|
+
parent_dir = os.path.dirname(path)
|
|
246
|
+
if len(parent_dir) > 0:
|
|
247
|
+
os.makedirs(parent_dir, exist_ok=True)
|
|
245
248
|
|
|
246
249
|
with open(path, 'w', newline='\n', encoding=encoding) as f:
|
|
247
250
|
json.dump(content, f, indent=indent, default=default_handler, ensure_ascii=ensure_ascii)
|
|
@@ -562,7 +565,7 @@ def sort_dictionary_by_value(d,sort_values=None,reverse=False):
|
|
|
562
565
|
reverse (bool, optional): whether to sort in reverse (descending) order
|
|
563
566
|
|
|
564
567
|
Returns:
|
|
565
|
-
dict: sorted copy of [d
|
|
568
|
+
dict: sorted copy of [d]
|
|
566
569
|
"""
|
|
567
570
|
|
|
568
571
|
if sort_values is None:
|
|
@@ -1022,8 +1025,10 @@ def parse_bool_string(s, strict=False):
|
|
|
1022
1025
|
s = str(s).lower().strip()
|
|
1023
1026
|
|
|
1024
1027
|
if strict:
|
|
1025
|
-
|
|
1026
|
-
|
|
1028
|
+
# Fun fact: ('false') (rather than ('false,')) creates a string,
|
|
1029
|
+
# not a tuple.
|
|
1030
|
+
false_strings = ('false',)
|
|
1031
|
+
true_strings = ('true',)
|
|
1027
1032
|
else:
|
|
1028
1033
|
false_strings = ('no', 'false', 'f', 'n', '0')
|
|
1029
1034
|
true_strings = ('yes', 'true', 't', 'y', '1')
|
|
@@ -129,6 +129,9 @@ def create_html_index(dir,
|
|
|
129
129
|
recursive (bool, optional): recurse into subfolders
|
|
130
130
|
"""
|
|
131
131
|
|
|
132
|
+
if template_fun is None:
|
|
133
|
+
template_fun = _create_plain_index
|
|
134
|
+
|
|
132
135
|
print('Traversing {}'.format(dir))
|
|
133
136
|
|
|
134
137
|
# Make sure we remove the trailing /
|
megadetector/utils/gpu_test.py
CHANGED
|
@@ -34,7 +34,7 @@ def torch_test():
|
|
|
34
34
|
except Exception as e: #noqa
|
|
35
35
|
print('PyTorch unavailable, not running PyTorch tests. PyTorch import error was:\n{}'.format(
|
|
36
36
|
str(e)))
|
|
37
|
-
return
|
|
37
|
+
return 0
|
|
38
38
|
|
|
39
39
|
print('Torch version: {}'.format(str(torch.__version__)))
|
|
40
40
|
print('CUDA available (according to PyTorch): {}'.format(torch.cuda.is_available()))
|
|
@@ -71,17 +71,17 @@ def tf_test():
|
|
|
71
71
|
Print diagnostic information about TF/CUDA status.
|
|
72
72
|
|
|
73
73
|
Returns:
|
|
74
|
-
int: The number of CUDA devices reported by
|
|
74
|
+
int: The number of CUDA devices reported by TensorFlow.
|
|
75
75
|
"""
|
|
76
76
|
|
|
77
77
|
try:
|
|
78
|
-
import tensorflow as tf
|
|
78
|
+
import tensorflow as tf # type: ignore
|
|
79
79
|
except Exception as e: #noqa
|
|
80
80
|
print('TensorFlow unavailable, not running TF tests. TF import error was:\n{}'.format(
|
|
81
81
|
str(e)))
|
|
82
|
-
return
|
|
82
|
+
return 0
|
|
83
83
|
|
|
84
|
-
from tensorflow.python.platform import build_info as build
|
|
84
|
+
from tensorflow.python.platform import build_info as build # type: ignore
|
|
85
85
|
print(f"TF version: {tf.__version__}")
|
|
86
86
|
|
|
87
87
|
if 'cuda_version' not in build.build_info:
|
|
@@ -94,7 +94,7 @@ def tf_test():
|
|
|
94
94
|
print(f"CuDNN build version reported by TensorFlow: {build.build_info['cudnn_version']}")
|
|
95
95
|
|
|
96
96
|
try:
|
|
97
|
-
from tensorflow.python.compiler.tensorrt import trt_convert as trt
|
|
97
|
+
from tensorflow.python.compiler.tensorrt import trt_convert as trt # type: ignore
|
|
98
98
|
print("Linked TensorRT version: {}".format(trt.trt_utils._pywrap_py_utils.get_linked_tensorrt_version()))
|
|
99
99
|
except Exception:
|
|
100
100
|
print('Could not probe TensorRT version')
|
megadetector/utils/md_tests.py
CHANGED
|
@@ -386,7 +386,7 @@ def output_files_are_identical(fn1,fn2,verbose=False):
|
|
|
386
386
|
fn2_results['images'] = \
|
|
387
387
|
sorted(fn2_results['images'], key=lambda d: d['file'])
|
|
388
388
|
|
|
389
|
-
if len(fn1_results['images']) != len(
|
|
389
|
+
if len(fn1_results['images']) != len(fn2_results['images']):
|
|
390
390
|
if verbose:
|
|
391
391
|
print('{} images in {}, {} images in {}'.format(
|
|
392
392
|
len(fn1_results['images']),fn1,
|
|
@@ -1249,8 +1249,8 @@ def run_cli_tests(options):
|
|
|
1249
1249
|
cmd_results = execute_and_print(cmd)
|
|
1250
1250
|
|
|
1251
1251
|
assert output_files_are_identical(fn1=inference_output_file,
|
|
1252
|
-
|
|
1253
|
-
|
|
1252
|
+
fn2=inference_output_file_queue,
|
|
1253
|
+
verbose=True)
|
|
1254
1254
|
|
|
1255
1255
|
|
|
1256
1256
|
## Run again with the image queue and worker-side preprocessing enabled
|
|
@@ -1265,24 +1265,24 @@ def run_cli_tests(options):
|
|
|
1265
1265
|
cmd_results = execute_and_print(cmd)
|
|
1266
1266
|
|
|
1267
1267
|
assert output_files_are_identical(fn1=inference_output_file,
|
|
1268
|
-
|
|
1269
|
-
|
|
1268
|
+
fn2=inference_output_file_preprocess_queue,
|
|
1269
|
+
verbose=True)
|
|
1270
1270
|
|
|
1271
1271
|
|
|
1272
|
-
## Run again with the image queue
|
|
1272
|
+
## Run again with the image queue but no worker-side preprocessing
|
|
1273
1273
|
|
|
1274
|
-
print('\n** Running MD on a folder (with image queue
|
|
1274
|
+
print('\n** Running MD on a folder (with image queue but no worker-side preprocessing) (CLI) **\n')
|
|
1275
1275
|
|
|
1276
|
-
cmd = base_cmd + ' --use_image_queue
|
|
1277
|
-
|
|
1278
|
-
insert_before_extension(inference_output_file,'
|
|
1279
|
-
cmd = cmd.replace(inference_output_file,
|
|
1276
|
+
cmd = base_cmd + ' --use_image_queue'
|
|
1277
|
+
inference_output_file_no_preprocess_queue = \
|
|
1278
|
+
insert_before_extension(inference_output_file,'no_preprocess_queue')
|
|
1279
|
+
cmd = cmd.replace(inference_output_file,inference_output_file_no_preprocess_queue)
|
|
1280
1280
|
cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
|
|
1281
1281
|
cmd_results = execute_and_print(cmd)
|
|
1282
1282
|
|
|
1283
1283
|
assert output_files_are_identical(fn1=inference_output_file,
|
|
1284
|
-
|
|
1285
|
-
|
|
1284
|
+
fn2=inference_output_file_no_preprocess_queue,
|
|
1285
|
+
verbose=True)
|
|
1286
1286
|
|
|
1287
1287
|
|
|
1288
1288
|
## Run again with the worker-side preprocessing and an alternative batch size
|
|
@@ -1316,8 +1316,8 @@ def run_cli_tests(options):
|
|
|
1316
1316
|
cmd_results = execute_and_print(cmd)
|
|
1317
1317
|
|
|
1318
1318
|
assert output_files_are_identical(fn1=inference_output_file,
|
|
1319
|
-
|
|
1320
|
-
|
|
1319
|
+
fn2=inference_output_file_checkpoint,
|
|
1320
|
+
verbose=True)
|
|
1321
1321
|
|
|
1322
1322
|
|
|
1323
1323
|
## Run again with "modern" postprocessing, make sure the results are *not* the same as classic
|
|
@@ -1331,8 +1331,8 @@ def run_cli_tests(options):
|
|
|
1331
1331
|
cmd_results = execute_and_print(cmd)
|
|
1332
1332
|
|
|
1333
1333
|
assert not output_files_are_identical(fn1=inference_output_file,
|
|
1334
|
-
|
|
1335
|
-
|
|
1334
|
+
fn2=inference_output_file_modern,
|
|
1335
|
+
verbose=True)
|
|
1336
1336
|
|
|
1337
1337
|
|
|
1338
1338
|
## Run again with "modern" postprocessing and worker-side preprocessing,
|
|
@@ -1348,13 +1348,13 @@ def run_cli_tests(options):
|
|
|
1348
1348
|
|
|
1349
1349
|
# This should not be the same as the "classic" results
|
|
1350
1350
|
assert not output_files_are_identical(fn1=inference_output_file,
|
|
1351
|
-
|
|
1352
|
-
|
|
1351
|
+
fn2=inference_output_file_modern_worker_preprocessing,
|
|
1352
|
+
verbose=True)
|
|
1353
1353
|
|
|
1354
1354
|
# ...but it should be the same as the single-threaded "modern" results
|
|
1355
1355
|
assert output_files_are_identical(fn1=inference_output_file_modern,
|
|
1356
|
-
|
|
1357
|
-
|
|
1356
|
+
fn2=inference_output_file_modern_worker_preprocessing,
|
|
1357
|
+
verbose=True)
|
|
1358
1358
|
|
|
1359
1359
|
|
|
1360
1360
|
if not options.skip_cpu_tests:
|
megadetector/utils/path_utils.py
CHANGED
|
@@ -152,7 +152,6 @@ def folder_list(base_dir,
|
|
|
152
152
|
folders = []
|
|
153
153
|
|
|
154
154
|
if recursive:
|
|
155
|
-
folders = []
|
|
156
155
|
for root, dirs, _ in os.walk(base_dir):
|
|
157
156
|
for d in dirs:
|
|
158
157
|
folders.append(os.path.join(root, d))
|
|
@@ -370,7 +369,9 @@ def safe_create_link(link_exists,link_new):
|
|
|
370
369
|
os.remove(link_new)
|
|
371
370
|
os.symlink(link_exists,link_new)
|
|
372
371
|
else:
|
|
373
|
-
os.
|
|
372
|
+
link_new_dir = os.path.dirname(link_new)
|
|
373
|
+
if len(link_new_dir) > 0:
|
|
374
|
+
os.makedirs(link_new_dir,exist_ok=True)
|
|
374
375
|
os.symlink(link_exists,link_new)
|
|
375
376
|
|
|
376
377
|
# ...def safe_create_link(...)
|
|
@@ -988,7 +989,9 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
|
|
|
988
989
|
if verbose:
|
|
989
990
|
print('{} to {}'.format(action_string,target_fn))
|
|
990
991
|
|
|
991
|
-
os.
|
|
992
|
+
target_dir = os.path.dirname(target_fn)
|
|
993
|
+
if len(target_dir) > 0:
|
|
994
|
+
os.makedirs(target_dir,exist_ok=True)
|
|
992
995
|
if move:
|
|
993
996
|
shutil.move(source_fn, target_fn)
|
|
994
997
|
else:
|
|
@@ -1038,14 +1041,91 @@ def parallel_copy_files(input_file_to_output_file,
|
|
|
1038
1041
|
input_output_tuples)):
|
|
1039
1042
|
pbar.update()
|
|
1040
1043
|
finally:
|
|
1041
|
-
pool
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1044
|
+
if pool is not None:
|
|
1045
|
+
pool.close()
|
|
1046
|
+
pool.join()
|
|
1047
|
+
if verbose:
|
|
1048
|
+
print("Pool closed and joined parallel file copying")
|
|
1045
1049
|
|
|
1046
1050
|
# ...def parallel_copy_files(...)
|
|
1047
1051
|
|
|
1048
1052
|
|
|
1053
|
+
#%% File deletion functions
|
|
1054
|
+
|
|
1055
|
+
def delete_file(input_file, verbose=False):
|
|
1056
|
+
"""
|
|
1057
|
+
Deletes a single file.
|
|
1058
|
+
|
|
1059
|
+
Args:
|
|
1060
|
+
input_file (str): file to delete
|
|
1061
|
+
verbose (bool, optional): enable additional debug console output
|
|
1062
|
+
|
|
1063
|
+
Returns:
|
|
1064
|
+
bool: True if file was deleted successfully, False otherwise
|
|
1065
|
+
"""
|
|
1066
|
+
|
|
1067
|
+
try:
|
|
1068
|
+
if verbose:
|
|
1069
|
+
print('Deleting file {}'.format(input_file))
|
|
1070
|
+
|
|
1071
|
+
if os.path.isfile(input_file):
|
|
1072
|
+
os.remove(input_file)
|
|
1073
|
+
return True
|
|
1074
|
+
else:
|
|
1075
|
+
if verbose:
|
|
1076
|
+
print('File {} does not exist'.format(input_file))
|
|
1077
|
+
return False
|
|
1078
|
+
|
|
1079
|
+
except Exception as e:
|
|
1080
|
+
if verbose:
|
|
1081
|
+
print('Error deleting file {}: {}'.format(input_file, str(e)))
|
|
1082
|
+
return False
|
|
1083
|
+
|
|
1084
|
+
# ...def delete_file(...)
|
|
1085
|
+
|
|
1086
|
+
|
|
1087
|
+
def parallel_delete_files(input_files,
|
|
1088
|
+
max_workers=16,
|
|
1089
|
+
use_threads=True,
|
|
1090
|
+
verbose=False):
|
|
1091
|
+
"""
|
|
1092
|
+
Deletes one or more files in parallel.
|
|
1093
|
+
|
|
1094
|
+
Args:
|
|
1095
|
+
input_files (list): list of files to delete
|
|
1096
|
+
max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
|
|
1097
|
+
use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
|
|
1098
|
+
max_workers <= 1
|
|
1099
|
+
verbose (bool, optional): enable additional debug console output
|
|
1100
|
+
"""
|
|
1101
|
+
|
|
1102
|
+
if len(input_files) == 0:
|
|
1103
|
+
return
|
|
1104
|
+
|
|
1105
|
+
n_workers = min(max_workers, len(input_files))
|
|
1106
|
+
|
|
1107
|
+
pool = None
|
|
1108
|
+
|
|
1109
|
+
try:
|
|
1110
|
+
if use_threads:
|
|
1111
|
+
pool = ThreadPool(n_workers)
|
|
1112
|
+
else:
|
|
1113
|
+
pool = Pool(n_workers)
|
|
1114
|
+
|
|
1115
|
+
with tqdm(total=len(input_files)) as pbar:
|
|
1116
|
+
for i, _ in enumerate(pool.imap_unordered(partial(delete_file, verbose=verbose),
|
|
1117
|
+
input_files)):
|
|
1118
|
+
pbar.update()
|
|
1119
|
+
finally:
|
|
1120
|
+
if pool is not None:
|
|
1121
|
+
pool.close()
|
|
1122
|
+
pool.join()
|
|
1123
|
+
if verbose:
|
|
1124
|
+
print('Pool closed and joined for file deletion')
|
|
1125
|
+
|
|
1126
|
+
# ...def parallel_delete_files(...)
|
|
1127
|
+
|
|
1128
|
+
|
|
1049
1129
|
#%% File size functions
|
|
1050
1130
|
|
|
1051
1131
|
def get_file_sizes(base_dir, convert_slashes=True):
|
|
@@ -1118,8 +1198,6 @@ def parallel_get_file_sizes(filenames,
|
|
|
1118
1198
|
dict: dictionary mapping filenames to file sizes in bytes
|
|
1119
1199
|
"""
|
|
1120
1200
|
|
|
1121
|
-
n_workers = min(max_workers,len(filenames))
|
|
1122
|
-
|
|
1123
1201
|
folder_name = None
|
|
1124
1202
|
|
|
1125
1203
|
if isinstance(filenames,str):
|
|
@@ -1137,23 +1215,37 @@ def parallel_get_file_sizes(filenames,
|
|
|
1137
1215
|
|
|
1138
1216
|
assert is_iterable(filenames), '[filenames] argument is neither a folder nor an iterable'
|
|
1139
1217
|
|
|
1218
|
+
n_workers = min(max_workers,len(filenames))
|
|
1219
|
+
|
|
1140
1220
|
if verbose:
|
|
1141
1221
|
print('Creating worker pool')
|
|
1142
1222
|
|
|
1143
|
-
|
|
1144
|
-
pool_string = 'thread'
|
|
1145
|
-
pool = ThreadPool(n_workers)
|
|
1146
|
-
else:
|
|
1147
|
-
pool_string = 'process'
|
|
1148
|
-
pool = Pool(n_workers)
|
|
1223
|
+
pool = None
|
|
1149
1224
|
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1225
|
+
try:
|
|
1226
|
+
|
|
1227
|
+
if use_threads:
|
|
1228
|
+
pool_string = 'thread'
|
|
1229
|
+
pool = ThreadPool(n_workers)
|
|
1230
|
+
else:
|
|
1231
|
+
pool_string = 'process'
|
|
1232
|
+
pool = Pool(n_workers)
|
|
1233
|
+
|
|
1234
|
+
if verbose:
|
|
1235
|
+
print('Created a {} pool of {} workers'.format(
|
|
1236
|
+
pool_string,n_workers))
|
|
1153
1237
|
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1238
|
+
# This returns (filename,size) tuples
|
|
1239
|
+
get_size_results = list(tqdm(pool.imap(
|
|
1240
|
+
partial(_get_file_size,verbose=verbose),filenames), total=len(filenames)))
|
|
1241
|
+
|
|
1242
|
+
finally:
|
|
1243
|
+
|
|
1244
|
+
if pool is not None:
|
|
1245
|
+
pool.close()
|
|
1246
|
+
pool.join()
|
|
1247
|
+
if verbose:
|
|
1248
|
+
print('Pool closed and join for file size collection')
|
|
1157
1249
|
|
|
1158
1250
|
to_return = {}
|
|
1159
1251
|
for r in get_size_results:
|
|
@@ -1208,6 +1300,8 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compress_
|
|
|
1208
1300
|
|
|
1209
1301
|
return output_fn
|
|
1210
1302
|
|
|
1303
|
+
# ...def zip_file(...)
|
|
1304
|
+
|
|
1211
1305
|
|
|
1212
1306
|
def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
1213
1307
|
overwrite=False, verbose=False, mode='x'):
|
|
@@ -1248,6 +1342,8 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
|
1248
1342
|
|
|
1249
1343
|
return output_fn
|
|
1250
1344
|
|
|
1345
|
+
# ...def add_files_to_single_tar_file(...)
|
|
1346
|
+
|
|
1251
1347
|
|
|
1252
1348
|
def zip_files_into_single_zipfile(input_files,
|
|
1253
1349
|
output_fn,
|
|
@@ -1292,6 +1388,8 @@ def zip_files_into_single_zipfile(input_files,
|
|
|
1292
1388
|
|
|
1293
1389
|
return output_fn
|
|
1294
1390
|
|
|
1391
|
+
# ...def zip_files_into_single_zipfile(...)
|
|
1392
|
+
|
|
1295
1393
|
|
|
1296
1394
|
def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, compress_level=9):
|
|
1297
1395
|
"""
|
|
@@ -1315,7 +1413,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
1315
1413
|
if not overwrite:
|
|
1316
1414
|
if os.path.isfile(output_fn):
|
|
1317
1415
|
print('Zip file {} exists, skipping'.format(output_fn))
|
|
1318
|
-
return
|
|
1416
|
+
return output_fn
|
|
1319
1417
|
|
|
1320
1418
|
if verbose:
|
|
1321
1419
|
print('Zipping {} to {} (compression level {})'.format(
|
|
@@ -1333,6 +1431,8 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
1333
1431
|
|
|
1334
1432
|
return output_fn
|
|
1335
1433
|
|
|
1434
|
+
# ...def zip_folder(...)
|
|
1435
|
+
|
|
1336
1436
|
|
|
1337
1437
|
def parallel_zip_files(input_files,
|
|
1338
1438
|
max_workers=16,
|
|
@@ -1361,11 +1461,22 @@ def parallel_zip_files(input_files,
|
|
|
1361
1461
|
else:
|
|
1362
1462
|
pool = Pool(n_workers)
|
|
1363
1463
|
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1464
|
+
try:
|
|
1465
|
+
|
|
1466
|
+
with tqdm(total=len(input_files)) as pbar:
|
|
1467
|
+
for i,_ in enumerate(pool.imap_unordered(partial(zip_file,
|
|
1468
|
+
output_fn=None,overwrite=overwrite,verbose=verbose,compress_level=compress_level),
|
|
1469
|
+
input_files)):
|
|
1470
|
+
pbar.update()
|
|
1471
|
+
|
|
1472
|
+
finally:
|
|
1473
|
+
|
|
1474
|
+
pool.close()
|
|
1475
|
+
pool.join()
|
|
1476
|
+
if verbose:
|
|
1477
|
+
print('Pool closed and joined for parallel zipping')
|
|
1478
|
+
|
|
1479
|
+
# ...def parallel_zip_files(...)
|
|
1369
1480
|
|
|
1370
1481
|
|
|
1371
1482
|
def parallel_zip_folders(input_folders,
|
|
@@ -1395,12 +1506,23 @@ def parallel_zip_folders(input_folders,
|
|
|
1395
1506
|
else:
|
|
1396
1507
|
pool = Pool(n_workers)
|
|
1397
1508
|
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1509
|
+
try:
|
|
1510
|
+
|
|
1511
|
+
with tqdm(total=len(input_folders)) as pbar:
|
|
1512
|
+
for i,_ in enumerate(pool.imap_unordered(
|
|
1513
|
+
partial(zip_folder,overwrite=overwrite,
|
|
1514
|
+
compress_level=compress_level,verbose=verbose),
|
|
1515
|
+
input_folders)):
|
|
1516
|
+
pbar.update()
|
|
1517
|
+
|
|
1518
|
+
finally:
|
|
1519
|
+
|
|
1520
|
+
pool.close()
|
|
1521
|
+
pool.join()
|
|
1522
|
+
if verbose:
|
|
1523
|
+
print('Pool closed and joined for parallel folder zipping')
|
|
1524
|
+
|
|
1525
|
+
# ...def parallel_zip_folders(...)
|
|
1404
1526
|
|
|
1405
1527
|
|
|
1406
1528
|
def zip_each_file_in_folder(folder_name,
|
|
@@ -1443,6 +1565,8 @@ def zip_each_file_in_folder(folder_name,
|
|
|
1443
1565
|
use_threads=use_threads,compress_level=compress_level,
|
|
1444
1566
|
overwrite=overwrite,verbose=verbose)
|
|
1445
1567
|
|
|
1568
|
+
# ...def zip_each_file_in_folder(...)
|
|
1569
|
+
|
|
1446
1570
|
|
|
1447
1571
|
def unzip_file(input_file, output_folder=None):
|
|
1448
1572
|
"""
|
|
@@ -1550,9 +1674,20 @@ def parallel_compute_file_hashes(filenames,
|
|
|
1550
1674
|
else:
|
|
1551
1675
|
pool = Pool(n_workers)
|
|
1552
1676
|
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1677
|
+
try:
|
|
1678
|
+
|
|
1679
|
+
results = list(tqdm(pool.imap(
|
|
1680
|
+
partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
|
|
1681
|
+
filenames), total=len(filenames)))
|
|
1682
|
+
|
|
1683
|
+
finally:
|
|
1684
|
+
|
|
1685
|
+
pool.close()
|
|
1686
|
+
pool.join()
|
|
1687
|
+
if verbose:
|
|
1688
|
+
print('Pool closed and joined for parallel zipping')
|
|
1689
|
+
|
|
1690
|
+
# ...if we are/aren't parallelizing
|
|
1556
1691
|
|
|
1557
1692
|
assert len(filenames) == len(results), 'Internal error in parallel_compute_file_hashes'
|
|
1558
1693
|
|
|
@@ -221,14 +221,10 @@ def split_locations_into_train_val(location_to_category_counts,
|
|
|
221
221
|
weighted_average_error,weighted_category_errors,category_to_val_fraction = \
|
|
222
222
|
compute_seed_errors(min_error_seed)
|
|
223
223
|
|
|
224
|
-
random_seed = min_error_seed
|
|
225
|
-
|
|
226
|
-
category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,reverse=True)
|
|
227
224
|
category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,
|
|
228
225
|
sort_values=category_id_to_count,
|
|
229
226
|
reverse=True)
|
|
230
227
|
|
|
231
|
-
|
|
232
228
|
print('Val fractions by category:\n')
|
|
233
229
|
|
|
234
230
|
for category in category_to_val_fraction:
|
|
@@ -34,6 +34,27 @@ def is_float(s):
|
|
|
34
34
|
return True
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
def is_int(s):
|
|
38
|
+
"""
|
|
39
|
+
Checks whether [s] is an object (typically a string) that can be cast to a int
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
s (object): object to evaluate
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
bool: True if s successfully casts to a int, otherwise False
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
if s is None:
|
|
49
|
+
return False
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
_ = int(s)
|
|
53
|
+
except ValueError:
|
|
54
|
+
return False
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
|
|
37
58
|
def human_readable_to_bytes(size):
|
|
38
59
|
"""
|
|
39
60
|
Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),
|
megadetector/utils/url_utils.py
CHANGED
|
@@ -132,7 +132,8 @@ def download_url(url,
|
|
|
132
132
|
if verbose:
|
|
133
133
|
print('Downloading file {} to {}'.format(os.path.basename(url_no_sas),destination_filename),end='')
|
|
134
134
|
target_dir = os.path.dirname(destination_filename)
|
|
135
|
-
|
|
135
|
+
if len(target_dir) > 0:
|
|
136
|
+
os.makedirs(target_dir,exist_ok=True)
|
|
136
137
|
urllib.request.urlretrieve(url, destination_filename, progress_updater)
|
|
137
138
|
assert(os.path.isfile(destination_filename))
|
|
138
139
|
n_bytes = os.path.getsize(destination_filename)
|
|
@@ -800,8 +801,9 @@ class TestUrlUtils:
|
|
|
800
801
|
def _test_url_utils():
|
|
801
802
|
"""
|
|
802
803
|
Runs all tests in the TestUrlUtils class. I generally disable this during testing
|
|
803
|
-
because it creates irritating nondeterminism
|
|
804
|
-
a module that changes
|
|
804
|
+
because it creates irritating nondeterminism (because it depends on downloading
|
|
805
|
+
stuff from the Internet), and this is neither a core module nor a module that changes
|
|
806
|
+
often.
|
|
805
807
|
"""
|
|
806
808
|
|
|
807
809
|
test_instance = TestUrlUtils()
|