megadetector 10.0.9__py3-none-any.whl → 10.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (84) hide show
  1. megadetector/data_management/animl_to_md.py +5 -2
  2. megadetector/data_management/cct_json_utils.py +4 -2
  3. megadetector/data_management/cct_to_md.py +5 -4
  4. megadetector/data_management/cct_to_wi.py +5 -1
  5. megadetector/data_management/coco_to_yolo.py +3 -2
  6. megadetector/data_management/databases/combine_coco_camera_traps_files.py +4 -4
  7. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  8. megadetector/data_management/databases/subset_json_db.py +0 -3
  9. megadetector/data_management/generate_crops_from_cct.py +6 -4
  10. megadetector/data_management/get_image_sizes.py +5 -35
  11. megadetector/data_management/labelme_to_coco.py +10 -6
  12. megadetector/data_management/labelme_to_yolo.py +19 -28
  13. megadetector/data_management/lila/create_lila_test_set.py +22 -2
  14. megadetector/data_management/lila/generate_lila_per_image_labels.py +7 -5
  15. megadetector/data_management/lila/lila_common.py +2 -2
  16. megadetector/data_management/lila/test_lila_metadata_urls.py +0 -1
  17. megadetector/data_management/ocr_tools.py +6 -10
  18. megadetector/data_management/read_exif.py +69 -13
  19. megadetector/data_management/remap_coco_categories.py +1 -1
  20. megadetector/data_management/remove_exif.py +10 -5
  21. megadetector/data_management/rename_images.py +20 -13
  22. megadetector/data_management/resize_coco_dataset.py +10 -4
  23. megadetector/data_management/speciesnet_to_md.py +3 -3
  24. megadetector/data_management/yolo_output_to_md_output.py +3 -1
  25. megadetector/data_management/yolo_to_coco.py +28 -19
  26. megadetector/detection/change_detection.py +26 -18
  27. megadetector/detection/process_video.py +1 -1
  28. megadetector/detection/pytorch_detector.py +5 -5
  29. megadetector/detection/run_detector.py +34 -10
  30. megadetector/detection/run_detector_batch.py +60 -42
  31. megadetector/detection/run_inference_with_yolov5_val.py +3 -1
  32. megadetector/detection/run_md_and_speciesnet.py +282 -110
  33. megadetector/detection/run_tiled_inference.py +7 -7
  34. megadetector/detection/tf_detector.py +4 -6
  35. megadetector/detection/video_utils.py +9 -6
  36. megadetector/postprocessing/add_max_conf.py +4 -4
  37. megadetector/postprocessing/categorize_detections_by_size.py +3 -2
  38. megadetector/postprocessing/classification_postprocessing.py +19 -21
  39. megadetector/postprocessing/combine_batch_outputs.py +3 -2
  40. megadetector/postprocessing/compare_batch_results.py +49 -27
  41. megadetector/postprocessing/convert_output_format.py +8 -6
  42. megadetector/postprocessing/create_crop_folder.py +13 -4
  43. megadetector/postprocessing/generate_csv_report.py +22 -8
  44. megadetector/postprocessing/load_api_results.py +8 -4
  45. megadetector/postprocessing/md_to_coco.py +2 -3
  46. megadetector/postprocessing/md_to_labelme.py +12 -8
  47. megadetector/postprocessing/md_to_wi.py +2 -1
  48. megadetector/postprocessing/merge_detections.py +4 -6
  49. megadetector/postprocessing/postprocess_batch_results.py +4 -3
  50. megadetector/postprocessing/remap_detection_categories.py +6 -3
  51. megadetector/postprocessing/render_detection_confusion_matrix.py +18 -10
  52. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  53. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +5 -3
  54. megadetector/postprocessing/separate_detections_into_folders.py +10 -4
  55. megadetector/postprocessing/subset_json_detector_output.py +1 -1
  56. megadetector/postprocessing/top_folders_to_bottom.py +22 -7
  57. megadetector/postprocessing/validate_batch_results.py +1 -1
  58. megadetector/taxonomy_mapping/map_new_lila_datasets.py +59 -3
  59. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
  60. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +26 -17
  61. megadetector/taxonomy_mapping/species_lookup.py +51 -2
  62. megadetector/utils/ct_utils.py +9 -4
  63. megadetector/utils/directory_listing.py +3 -0
  64. megadetector/utils/extract_frames_from_video.py +4 -0
  65. megadetector/utils/gpu_test.py +6 -6
  66. megadetector/utils/md_tests.py +21 -21
  67. megadetector/utils/path_utils.py +171 -36
  68. megadetector/utils/split_locations_into_train_val.py +0 -4
  69. megadetector/utils/string_utils.py +21 -0
  70. megadetector/utils/url_utils.py +5 -3
  71. megadetector/utils/wi_platform_utils.py +168 -24
  72. megadetector/utils/wi_taxonomy_utils.py +38 -8
  73. megadetector/utils/write_html_image_list.py +1 -2
  74. megadetector/visualization/plot_utils.py +31 -19
  75. megadetector/visualization/render_images_with_thumbnails.py +3 -0
  76. megadetector/visualization/visualization_utils.py +18 -7
  77. megadetector/visualization/visualize_db.py +9 -26
  78. megadetector/visualization/visualize_detector_output.py +1 -0
  79. megadetector/visualization/visualize_video_output.py +14 -2
  80. {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/METADATA +1 -1
  81. {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/RECORD +84 -84
  82. {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/WHEEL +0 -0
  83. {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/licenses/LICENSE +0 -0
  84. {megadetector-10.0.9.dist-info → megadetector-10.0.11.dist-info}/top_level.txt +0 -0
@@ -241,7 +241,10 @@ def write_json(path,
241
241
  elif force_str:
242
242
  default_handler = str
243
243
 
244
- os.makedirs(os.path.dirname(path), exist_ok=True)
244
+ # Create the parent directory if necessary
245
+ parent_dir = os.path.dirname(path)
246
+ if len(parent_dir) > 0:
247
+ os.makedirs(parent_dir, exist_ok=True)
245
248
 
246
249
  with open(path, 'w', newline='\n', encoding=encoding) as f:
247
250
  json.dump(content, f, indent=indent, default=default_handler, ensure_ascii=ensure_ascii)
@@ -562,7 +565,7 @@ def sort_dictionary_by_value(d,sort_values=None,reverse=False):
562
565
  reverse (bool, optional): whether to sort in reverse (descending) order
563
566
 
564
567
  Returns:
565
- dict: sorted copy of [d
568
+ dict: sorted copy of [d]
566
569
  """
567
570
 
568
571
  if sort_values is None:
@@ -1022,8 +1025,10 @@ def parse_bool_string(s, strict=False):
1022
1025
  s = str(s).lower().strip()
1023
1026
 
1024
1027
  if strict:
1025
- false_strings = ('false')
1026
- true_strings = ('true')
1028
+ # Fun fact: ('false') (rather than ('false,')) creates a string,
1029
+ # not a tuple.
1030
+ false_strings = ('false',)
1031
+ true_strings = ('true',)
1027
1032
  else:
1028
1033
  false_strings = ('no', 'false', 'f', 'n', '0')
1029
1034
  true_strings = ('yes', 'true', 't', 'y', '1')
@@ -129,6 +129,9 @@ def create_html_index(dir,
129
129
  recursive (bool, optional): recurse into subfolders
130
130
  """
131
131
 
132
+ if template_fun is None:
133
+ template_fun = _create_plain_index
134
+
132
135
  print('Traversing {}'.format(dir))
133
136
 
134
137
  # Make sure we remove the trailing /
@@ -55,6 +55,10 @@ class FrameExtractionOptions:
55
55
  #: must be a folder when this is specified.
56
56
  self.detector_output_file = None
57
57
 
58
+ # ...def __init__(...)
59
+
60
+ # ...class FrameExtractionOptions
61
+
58
62
 
59
63
  #%% Core functions
60
64
 
@@ -34,7 +34,7 @@ def torch_test():
34
34
  except Exception as e: #noqa
35
35
  print('PyTorch unavailable, not running PyTorch tests. PyTorch import error was:\n{}'.format(
36
36
  str(e)))
37
- return
37
+ return 0
38
38
 
39
39
  print('Torch version: {}'.format(str(torch.__version__)))
40
40
  print('CUDA available (according to PyTorch): {}'.format(torch.cuda.is_available()))
@@ -71,17 +71,17 @@ def tf_test():
71
71
  Print diagnostic information about TF/CUDA status.
72
72
 
73
73
  Returns:
74
- int: The number of CUDA devices reported by PyTorch.
74
+ int: The number of CUDA devices reported by TensorFlow.
75
75
  """
76
76
 
77
77
  try:
78
- import tensorflow as tf
78
+ import tensorflow as tf # type: ignore
79
79
  except Exception as e: #noqa
80
80
  print('TensorFlow unavailable, not running TF tests. TF import error was:\n{}'.format(
81
81
  str(e)))
82
- return
82
+ return 0
83
83
 
84
- from tensorflow.python.platform import build_info as build
84
+ from tensorflow.python.platform import build_info as build # type: ignore
85
85
  print(f"TF version: {tf.__version__}")
86
86
 
87
87
  if 'cuda_version' not in build.build_info:
@@ -94,7 +94,7 @@ def tf_test():
94
94
  print(f"CuDNN build version reported by TensorFlow: {build.build_info['cudnn_version']}")
95
95
 
96
96
  try:
97
- from tensorflow.python.compiler.tensorrt import trt_convert as trt
97
+ from tensorflow.python.compiler.tensorrt import trt_convert as trt # type: ignore
98
98
  print("Linked TensorRT version: {}".format(trt.trt_utils._pywrap_py_utils.get_linked_tensorrt_version()))
99
99
  except Exception:
100
100
  print('Could not probe TensorRT version')
@@ -386,7 +386,7 @@ def output_files_are_identical(fn1,fn2,verbose=False):
386
386
  fn2_results['images'] = \
387
387
  sorted(fn2_results['images'], key=lambda d: d['file'])
388
388
 
389
- if len(fn1_results['images']) != len(fn1_results['images']):
389
+ if len(fn1_results['images']) != len(fn2_results['images']):
390
390
  if verbose:
391
391
  print('{} images in {}, {} images in {}'.format(
392
392
  len(fn1_results['images']),fn1,
@@ -1249,8 +1249,8 @@ def run_cli_tests(options):
1249
1249
  cmd_results = execute_and_print(cmd)
1250
1250
 
1251
1251
  assert output_files_are_identical(fn1=inference_output_file,
1252
- fn2=inference_output_file_queue,
1253
- verbose=True)
1252
+ fn2=inference_output_file_queue,
1253
+ verbose=True)
1254
1254
 
1255
1255
 
1256
1256
  ## Run again with the image queue and worker-side preprocessing enabled
@@ -1265,24 +1265,24 @@ def run_cli_tests(options):
1265
1265
  cmd_results = execute_and_print(cmd)
1266
1266
 
1267
1267
  assert output_files_are_identical(fn1=inference_output_file,
1268
- fn2=inference_output_file_preprocess_queue,
1269
- verbose=True)
1268
+ fn2=inference_output_file_preprocess_queue,
1269
+ verbose=True)
1270
1270
 
1271
1271
 
1272
- ## Run again with the image queue and worker-side preprocessing
1272
+ ## Run again with the image queue but no worker-side preprocessing
1273
1273
 
1274
- print('\n** Running MD on a folder (with image queue and preprocessing) (CLI) **\n')
1274
+ print('\n** Running MD on a folder (with image queue but no worker-side preprocessing) (CLI) **\n')
1275
1275
 
1276
- cmd = base_cmd + ' --use_image_queue --preprocess_on_image_queue'
1277
- inference_output_file_preprocess_queue = \
1278
- insert_before_extension(inference_output_file,'preprocess_queue')
1279
- cmd = cmd.replace(inference_output_file,inference_output_file_preprocess_queue)
1276
+ cmd = base_cmd + ' --use_image_queue'
1277
+ inference_output_file_no_preprocess_queue = \
1278
+ insert_before_extension(inference_output_file,'no_preprocess_queue')
1279
+ cmd = cmd.replace(inference_output_file,inference_output_file_no_preprocess_queue)
1280
1280
  cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1281
1281
  cmd_results = execute_and_print(cmd)
1282
1282
 
1283
1283
  assert output_files_are_identical(fn1=inference_output_file,
1284
- fn2=inference_output_file_preprocess_queue,
1285
- verbose=True)
1284
+ fn2=inference_output_file_no_preprocess_queue,
1285
+ verbose=True)
1286
1286
 
1287
1287
 
1288
1288
  ## Run again with the worker-side preprocessing and an alternative batch size
@@ -1316,8 +1316,8 @@ def run_cli_tests(options):
1316
1316
  cmd_results = execute_and_print(cmd)
1317
1317
 
1318
1318
  assert output_files_are_identical(fn1=inference_output_file,
1319
- fn2=inference_output_file_checkpoint,
1320
- verbose=True)
1319
+ fn2=inference_output_file_checkpoint,
1320
+ verbose=True)
1321
1321
 
1322
1322
 
1323
1323
  ## Run again with "modern" postprocessing, make sure the results are *not* the same as classic
@@ -1331,8 +1331,8 @@ def run_cli_tests(options):
1331
1331
  cmd_results = execute_and_print(cmd)
1332
1332
 
1333
1333
  assert not output_files_are_identical(fn1=inference_output_file,
1334
- fn2=inference_output_file_modern,
1335
- verbose=True)
1334
+ fn2=inference_output_file_modern,
1335
+ verbose=True)
1336
1336
 
1337
1337
 
1338
1338
  ## Run again with "modern" postprocessing and worker-side preprocessing,
@@ -1348,13 +1348,13 @@ def run_cli_tests(options):
1348
1348
 
1349
1349
  # This should not be the same as the "classic" results
1350
1350
  assert not output_files_are_identical(fn1=inference_output_file,
1351
- fn2=inference_output_file_modern_worker_preprocessing,
1352
- verbose=True)
1351
+ fn2=inference_output_file_modern_worker_preprocessing,
1352
+ verbose=True)
1353
1353
 
1354
1354
  # ...but it should be the same as the single-threaded "modern" results
1355
1355
  assert output_files_are_identical(fn1=inference_output_file_modern,
1356
- fn2=inference_output_file_modern_worker_preprocessing,
1357
- verbose=True)
1356
+ fn2=inference_output_file_modern_worker_preprocessing,
1357
+ verbose=True)
1358
1358
 
1359
1359
 
1360
1360
  if not options.skip_cpu_tests:
@@ -152,7 +152,6 @@ def folder_list(base_dir,
152
152
  folders = []
153
153
 
154
154
  if recursive:
155
- folders = []
156
155
  for root, dirs, _ in os.walk(base_dir):
157
156
  for d in dirs:
158
157
  folders.append(os.path.join(root, d))
@@ -370,7 +369,9 @@ def safe_create_link(link_exists,link_new):
370
369
  os.remove(link_new)
371
370
  os.symlink(link_exists,link_new)
372
371
  else:
373
- os.makedirs(os.path.dirname(link_new),exist_ok=True)
372
+ link_new_dir = os.path.dirname(link_new)
373
+ if len(link_new_dir) > 0:
374
+ os.makedirs(link_new_dir,exist_ok=True)
374
375
  os.symlink(link_exists,link_new)
375
376
 
376
377
  # ...def safe_create_link(...)
@@ -988,7 +989,9 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
988
989
  if verbose:
989
990
  print('{} to {}'.format(action_string,target_fn))
990
991
 
991
- os.makedirs(os.path.dirname(target_fn),exist_ok=True)
992
+ target_dir = os.path.dirname(target_fn)
993
+ if len(target_dir) > 0:
994
+ os.makedirs(target_dir,exist_ok=True)
992
995
  if move:
993
996
  shutil.move(source_fn, target_fn)
994
997
  else:
@@ -1038,14 +1041,91 @@ def parallel_copy_files(input_file_to_output_file,
1038
1041
  input_output_tuples)):
1039
1042
  pbar.update()
1040
1043
  finally:
1041
- pool.close()
1042
- pool.join()
1043
- if verbose:
1044
- print("Pool closed and joined parallel file copying")
1044
+ if pool is not None:
1045
+ pool.close()
1046
+ pool.join()
1047
+ if verbose:
1048
+ print("Pool closed and joined parallel file copying")
1045
1049
 
1046
1050
  # ...def parallel_copy_files(...)
1047
1051
 
1048
1052
 
1053
+ #%% File deletion functions
1054
+
1055
+ def delete_file(input_file, verbose=False):
1056
+ """
1057
+ Deletes a single file.
1058
+
1059
+ Args:
1060
+ input_file (str): file to delete
1061
+ verbose (bool, optional): enable additional debug console output
1062
+
1063
+ Returns:
1064
+ bool: True if file was deleted successfully, False otherwise
1065
+ """
1066
+
1067
+ try:
1068
+ if verbose:
1069
+ print('Deleting file {}'.format(input_file))
1070
+
1071
+ if os.path.isfile(input_file):
1072
+ os.remove(input_file)
1073
+ return True
1074
+ else:
1075
+ if verbose:
1076
+ print('File {} does not exist'.format(input_file))
1077
+ return False
1078
+
1079
+ except Exception as e:
1080
+ if verbose:
1081
+ print('Error deleting file {}: {}'.format(input_file, str(e)))
1082
+ return False
1083
+
1084
+ # ...def delete_file(...)
1085
+
1086
+
1087
+ def parallel_delete_files(input_files,
1088
+ max_workers=16,
1089
+ use_threads=True,
1090
+ verbose=False):
1091
+ """
1092
+ Deletes one or more files in parallel.
1093
+
1094
+ Args:
1095
+ input_files (list): list of files to delete
1096
+ max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
1097
+ use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
1098
+ max_workers <= 1
1099
+ verbose (bool, optional): enable additional debug console output
1100
+ """
1101
+
1102
+ if len(input_files) == 0:
1103
+ return
1104
+
1105
+ n_workers = min(max_workers, len(input_files))
1106
+
1107
+ pool = None
1108
+
1109
+ try:
1110
+ if use_threads:
1111
+ pool = ThreadPool(n_workers)
1112
+ else:
1113
+ pool = Pool(n_workers)
1114
+
1115
+ with tqdm(total=len(input_files)) as pbar:
1116
+ for i, _ in enumerate(pool.imap_unordered(partial(delete_file, verbose=verbose),
1117
+ input_files)):
1118
+ pbar.update()
1119
+ finally:
1120
+ if pool is not None:
1121
+ pool.close()
1122
+ pool.join()
1123
+ if verbose:
1124
+ print('Pool closed and joined for file deletion')
1125
+
1126
+ # ...def parallel_delete_files(...)
1127
+
1128
+
1049
1129
  #%% File size functions
1050
1130
 
1051
1131
  def get_file_sizes(base_dir, convert_slashes=True):
@@ -1118,8 +1198,6 @@ def parallel_get_file_sizes(filenames,
1118
1198
  dict: dictionary mapping filenames to file sizes in bytes
1119
1199
  """
1120
1200
 
1121
- n_workers = min(max_workers,len(filenames))
1122
-
1123
1201
  folder_name = None
1124
1202
 
1125
1203
  if isinstance(filenames,str):
@@ -1137,23 +1215,37 @@ def parallel_get_file_sizes(filenames,
1137
1215
 
1138
1216
  assert is_iterable(filenames), '[filenames] argument is neither a folder nor an iterable'
1139
1217
 
1218
+ n_workers = min(max_workers,len(filenames))
1219
+
1140
1220
  if verbose:
1141
1221
  print('Creating worker pool')
1142
1222
 
1143
- if use_threads:
1144
- pool_string = 'thread'
1145
- pool = ThreadPool(n_workers)
1146
- else:
1147
- pool_string = 'process'
1148
- pool = Pool(n_workers)
1223
+ pool = None
1149
1224
 
1150
- if verbose:
1151
- print('Created a {} pool of {} workers'.format(
1152
- pool_string,n_workers))
1225
+ try:
1226
+
1227
+ if use_threads:
1228
+ pool_string = 'thread'
1229
+ pool = ThreadPool(n_workers)
1230
+ else:
1231
+ pool_string = 'process'
1232
+ pool = Pool(n_workers)
1233
+
1234
+ if verbose:
1235
+ print('Created a {} pool of {} workers'.format(
1236
+ pool_string,n_workers))
1153
1237
 
1154
- # This returns (filename,size) tuples
1155
- get_size_results = list(tqdm(pool.imap(
1156
- partial(_get_file_size,verbose=verbose),filenames), total=len(filenames)))
1238
+ # This returns (filename,size) tuples
1239
+ get_size_results = list(tqdm(pool.imap(
1240
+ partial(_get_file_size,verbose=verbose),filenames), total=len(filenames)))
1241
+
1242
+ finally:
1243
+
1244
+ if pool is not None:
1245
+ pool.close()
1246
+ pool.join()
1247
+ if verbose:
1248
+ print('Pool closed and join for file size collection')
1157
1249
 
1158
1250
  to_return = {}
1159
1251
  for r in get_size_results:
@@ -1208,6 +1300,8 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compress_
1208
1300
 
1209
1301
  return output_fn
1210
1302
 
1303
+ # ...def zip_file(...)
1304
+
1211
1305
 
1212
1306
  def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
1213
1307
  overwrite=False, verbose=False, mode='x'):
@@ -1248,6 +1342,8 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
1248
1342
 
1249
1343
  return output_fn
1250
1344
 
1345
+ # ...def add_files_to_single_tar_file(...)
1346
+
1251
1347
 
1252
1348
  def zip_files_into_single_zipfile(input_files,
1253
1349
  output_fn,
@@ -1292,6 +1388,8 @@ def zip_files_into_single_zipfile(input_files,
1292
1388
 
1293
1389
  return output_fn
1294
1390
 
1391
+ # ...def zip_files_into_single_zipfile(...)
1392
+
1295
1393
 
1296
1394
  def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, compress_level=9):
1297
1395
  """
@@ -1315,7 +1413,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
1315
1413
  if not overwrite:
1316
1414
  if os.path.isfile(output_fn):
1317
1415
  print('Zip file {} exists, skipping'.format(output_fn))
1318
- return
1416
+ return output_fn
1319
1417
 
1320
1418
  if verbose:
1321
1419
  print('Zipping {} to {} (compression level {})'.format(
@@ -1333,6 +1431,8 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
1333
1431
 
1334
1432
  return output_fn
1335
1433
 
1434
+ # ...def zip_folder(...)
1435
+
1336
1436
 
1337
1437
  def parallel_zip_files(input_files,
1338
1438
  max_workers=16,
@@ -1361,11 +1461,22 @@ def parallel_zip_files(input_files,
1361
1461
  else:
1362
1462
  pool = Pool(n_workers)
1363
1463
 
1364
- with tqdm(total=len(input_files)) as pbar:
1365
- for i,_ in enumerate(pool.imap_unordered(partial(zip_file,
1366
- output_fn=None,overwrite=overwrite,verbose=verbose,compress_level=compress_level),
1367
- input_files)):
1368
- pbar.update()
1464
+ try:
1465
+
1466
+ with tqdm(total=len(input_files)) as pbar:
1467
+ for i,_ in enumerate(pool.imap_unordered(partial(zip_file,
1468
+ output_fn=None,overwrite=overwrite,verbose=verbose,compress_level=compress_level),
1469
+ input_files)):
1470
+ pbar.update()
1471
+
1472
+ finally:
1473
+
1474
+ pool.close()
1475
+ pool.join()
1476
+ if verbose:
1477
+ print('Pool closed and joined for parallel zipping')
1478
+
1479
+ # ...def parallel_zip_files(...)
1369
1480
 
1370
1481
 
1371
1482
  def parallel_zip_folders(input_folders,
@@ -1395,12 +1506,23 @@ def parallel_zip_folders(input_folders,
1395
1506
  else:
1396
1507
  pool = Pool(n_workers)
1397
1508
 
1398
- with tqdm(total=len(input_folders)) as pbar:
1399
- for i,_ in enumerate(pool.imap_unordered(
1400
- partial(zip_folder,overwrite=overwrite,
1401
- compress_level=compress_level,verbose=verbose),
1402
- input_folders)):
1403
- pbar.update()
1509
+ try:
1510
+
1511
+ with tqdm(total=len(input_folders)) as pbar:
1512
+ for i,_ in enumerate(pool.imap_unordered(
1513
+ partial(zip_folder,overwrite=overwrite,
1514
+ compress_level=compress_level,verbose=verbose),
1515
+ input_folders)):
1516
+ pbar.update()
1517
+
1518
+ finally:
1519
+
1520
+ pool.close()
1521
+ pool.join()
1522
+ if verbose:
1523
+ print('Pool closed and joined for parallel folder zipping')
1524
+
1525
+ # ...def parallel_zip_folders(...)
1404
1526
 
1405
1527
 
1406
1528
  def zip_each_file_in_folder(folder_name,
@@ -1443,6 +1565,8 @@ def zip_each_file_in_folder(folder_name,
1443
1565
  use_threads=use_threads,compress_level=compress_level,
1444
1566
  overwrite=overwrite,verbose=verbose)
1445
1567
 
1568
+ # ...def zip_each_file_in_folder(...)
1569
+
1446
1570
 
1447
1571
  def unzip_file(input_file, output_folder=None):
1448
1572
  """
@@ -1550,9 +1674,20 @@ def parallel_compute_file_hashes(filenames,
1550
1674
  else:
1551
1675
  pool = Pool(n_workers)
1552
1676
 
1553
- results = list(tqdm(pool.imap(
1554
- partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
1555
- filenames), total=len(filenames)))
1677
+ try:
1678
+
1679
+ results = list(tqdm(pool.imap(
1680
+ partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
1681
+ filenames), total=len(filenames)))
1682
+
1683
+ finally:
1684
+
1685
+ pool.close()
1686
+ pool.join()
1687
+ if verbose:
1688
+ print('Pool closed and joined for parallel zipping')
1689
+
1690
+ # ...if we are/aren't parallelizing
1556
1691
 
1557
1692
  assert len(filenames) == len(results), 'Internal error in parallel_compute_file_hashes'
1558
1693
 
@@ -221,14 +221,10 @@ def split_locations_into_train_val(location_to_category_counts,
221
221
  weighted_average_error,weighted_category_errors,category_to_val_fraction = \
222
222
  compute_seed_errors(min_error_seed)
223
223
 
224
- random_seed = min_error_seed
225
-
226
- category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,reverse=True)
227
224
  category_to_val_fraction = sort_dictionary_by_value(category_to_val_fraction,
228
225
  sort_values=category_id_to_count,
229
226
  reverse=True)
230
227
 
231
-
232
228
  print('Val fractions by category:\n')
233
229
 
234
230
  for category in category_to_val_fraction:
@@ -34,6 +34,27 @@ def is_float(s):
34
34
  return True
35
35
 
36
36
 
37
+ def is_int(s):
38
+ """
39
+ Checks whether [s] is an object (typically a string) that can be cast to a int
40
+
41
+ Args:
42
+ s (object): object to evaluate
43
+
44
+ Returns:
45
+ bool: True if s successfully casts to a int, otherwise False
46
+ """
47
+
48
+ if s is None:
49
+ return False
50
+
51
+ try:
52
+ _ = int(s)
53
+ except ValueError:
54
+ return False
55
+ return True
56
+
57
+
37
58
  def human_readable_to_bytes(size):
38
59
  """
39
60
  Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),
@@ -132,7 +132,8 @@ def download_url(url,
132
132
  if verbose:
133
133
  print('Downloading file {} to {}'.format(os.path.basename(url_no_sas),destination_filename),end='')
134
134
  target_dir = os.path.dirname(destination_filename)
135
- os.makedirs(target_dir,exist_ok=True)
135
+ if len(target_dir) > 0:
136
+ os.makedirs(target_dir,exist_ok=True)
136
137
  urllib.request.urlretrieve(url, destination_filename, progress_updater)
137
138
  assert(os.path.isfile(destination_filename))
138
139
  n_bytes = os.path.getsize(destination_filename)
@@ -800,8 +801,9 @@ class TestUrlUtils:
800
801
  def _test_url_utils():
801
802
  """
802
803
  Runs all tests in the TestUrlUtils class. I generally disable this during testing
803
- because it creates irritating nondeterminism, and this is neither a core module nor
804
- a module that changes often.
804
+ because it creates irritating nondeterminism (because it depends on downloading
805
+ stuff from the Internet), and this is neither a core module nor a module that changes
806
+ often.
805
807
  """
806
808
 
807
809
  test_instance = TestUrlUtils()