megadetector 5.0.29__py3-none-any.whl → 10.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/prepare_classification_script_mc.py +3 -3
- megadetector/data_management/annotations/annotation_constants.py +0 -1
- megadetector/data_management/camtrap_dp_to_coco.py +34 -1
- megadetector/data_management/cct_json_utils.py +2 -2
- megadetector/data_management/coco_to_yolo.py +22 -5
- megadetector/data_management/databases/add_width_and_height_to_db.py +85 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +2 -2
- megadetector/data_management/databases/integrity_check_json_db.py +29 -15
- megadetector/data_management/generate_crops_from_cct.py +50 -1
- megadetector/data_management/labelme_to_coco.py +4 -2
- megadetector/data_management/labelme_to_yolo.py +82 -2
- megadetector/data_management/lila/generate_lila_per_image_labels.py +276 -18
- megadetector/data_management/lila/get_lila_annotation_counts.py +5 -3
- megadetector/data_management/lila/lila_common.py +3 -0
- megadetector/data_management/lila/test_lila_metadata_urls.py +15 -5
- megadetector/data_management/mewc_to_md.py +5 -0
- megadetector/data_management/ocr_tools.py +4 -3
- megadetector/data_management/read_exif.py +20 -5
- megadetector/data_management/remap_coco_categories.py +66 -4
- megadetector/data_management/remove_exif.py +50 -1
- megadetector/data_management/rename_images.py +3 -3
- megadetector/data_management/resize_coco_dataset.py +563 -95
- megadetector/data_management/yolo_output_to_md_output.py +131 -2
- megadetector/data_management/yolo_to_coco.py +140 -5
- megadetector/detection/change_detection.py +4 -3
- megadetector/detection/pytorch_detector.py +60 -22
- megadetector/detection/run_detector.py +225 -25
- megadetector/detection/run_detector_batch.py +42 -16
- megadetector/detection/run_inference_with_yolov5_val.py +12 -2
- megadetector/detection/run_tiled_inference.py +1 -0
- megadetector/detection/video_utils.py +53 -24
- megadetector/postprocessing/add_max_conf.py +4 -0
- megadetector/postprocessing/categorize_detections_by_size.py +1 -1
- megadetector/postprocessing/classification_postprocessing.py +55 -20
- megadetector/postprocessing/combine_batch_outputs.py +3 -2
- megadetector/postprocessing/compare_batch_results.py +64 -10
- megadetector/postprocessing/convert_output_format.py +12 -8
- megadetector/postprocessing/create_crop_folder.py +137 -10
- megadetector/postprocessing/load_api_results.py +26 -8
- megadetector/postprocessing/md_to_coco.py +4 -4
- megadetector/postprocessing/md_to_labelme.py +18 -7
- megadetector/postprocessing/merge_detections.py +5 -0
- megadetector/postprocessing/postprocess_batch_results.py +6 -3
- megadetector/postprocessing/remap_detection_categories.py +55 -2
- megadetector/postprocessing/render_detection_confusion_matrix.py +9 -6
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +3 -4
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +40 -19
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +1 -1
- megadetector/taxonomy_mapping/species_lookup.py +123 -41
- megadetector/utils/ct_utils.py +133 -113
- megadetector/utils/md_tests.py +93 -13
- megadetector/utils/path_utils.py +137 -107
- megadetector/utils/split_locations_into_train_val.py +2 -2
- megadetector/utils/string_utils.py +7 -7
- megadetector/utils/url_utils.py +81 -58
- megadetector/utils/wi_utils.py +46 -17
- megadetector/visualization/plot_utils.py +13 -9
- megadetector/visualization/render_images_with_thumbnails.py +2 -1
- megadetector/visualization/visualization_utils.py +94 -46
- megadetector/visualization/visualize_db.py +36 -9
- megadetector/visualization/visualize_detector_output.py +4 -4
- {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/METADATA +135 -135
- megadetector-10.0.1.dist-info/RECORD +139 -0
- {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/top_level.txt +0 -0
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -438
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -109
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -513
- megadetector-5.0.29.dist-info/RECORD +0 -163
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/WHEEL +0 -0
megadetector/utils/path_utils.py
CHANGED
|
@@ -24,7 +24,6 @@ import tarfile
|
|
|
24
24
|
import webbrowser
|
|
25
25
|
import subprocess
|
|
26
26
|
import re
|
|
27
|
-
import tempfile
|
|
28
27
|
|
|
29
28
|
from zipfile import ZipFile
|
|
30
29
|
from datetime import datetime
|
|
@@ -139,7 +138,7 @@ def folder_list(base_dir,
|
|
|
139
138
|
the native path separator
|
|
140
139
|
return_relative_paths (bool, optional): return paths that are relative to [base_dir],
|
|
141
140
|
rather than absolute paths
|
|
142
|
-
|
|
141
|
+
sort_folders (bool, optional): force folders to be sorted, otherwise uses the sorting
|
|
143
142
|
provided by os.walk()
|
|
144
143
|
recursive (bool, optional): enumerate recursively
|
|
145
144
|
|
|
@@ -315,7 +314,7 @@ def split_path(path):
|
|
|
315
314
|
return ''
|
|
316
315
|
if path is None:
|
|
317
316
|
return None
|
|
318
|
-
|
|
317
|
+
|
|
319
318
|
parts = []
|
|
320
319
|
while True:
|
|
321
320
|
# ntpath seems to do the right thing for both Windows and Unix paths
|
|
@@ -351,6 +350,8 @@ def safe_create_link(link_exists,link_new):
|
|
|
351
350
|
and if it has a different target than [link_exists], removes and re-creates
|
|
352
351
|
it.
|
|
353
352
|
|
|
353
|
+
Creates a *real* directory if necessary.
|
|
354
|
+
|
|
354
355
|
Errors if [link_new] already exists but it's not a link.
|
|
355
356
|
|
|
356
357
|
Args:
|
|
@@ -358,14 +359,21 @@ def safe_create_link(link_exists,link_new):
|
|
|
358
359
|
link_new (str): the target of the (possibly-new) symlink
|
|
359
360
|
"""
|
|
360
361
|
|
|
362
|
+
# If the new file already exists...
|
|
361
363
|
if os.path.exists(link_new) or os.path.islink(link_new):
|
|
364
|
+
# Error if it's not already a link
|
|
362
365
|
assert os.path.islink(link_new)
|
|
366
|
+
# If it's already a link, and it points to the "exists" file,
|
|
367
|
+
# leave it alone, otherwise redirect it.
|
|
363
368
|
if not os.readlink(link_new) == link_exists:
|
|
364
369
|
os.remove(link_new)
|
|
365
370
|
os.symlink(link_exists,link_new)
|
|
366
371
|
else:
|
|
372
|
+
os.makedirs(os.path.dirname(link_new),exist_ok=True)
|
|
367
373
|
os.symlink(link_exists,link_new)
|
|
368
374
|
|
|
375
|
+
# ...def safe_create_link(...)
|
|
376
|
+
|
|
369
377
|
|
|
370
378
|
def remove_empty_folders(path, remove_root=False):
|
|
371
379
|
"""
|
|
@@ -435,7 +443,7 @@ def path_join(*paths, convert_slashes=True):
|
|
|
435
443
|
else:
|
|
436
444
|
return joined_path
|
|
437
445
|
|
|
438
|
-
|
|
446
|
+
|
|
439
447
|
#%% Image-related path functions
|
|
440
448
|
|
|
441
449
|
def is_image_file(s, img_extensions=IMG_EXTENSIONS):
|
|
@@ -642,8 +650,8 @@ def wsl_path_to_windows_path(filename, failure_behavior='none'):
|
|
|
642
650
|
|
|
643
651
|
Args:
|
|
644
652
|
filename (str): filename to convert
|
|
645
|
-
failure_behavior (str): what to do if the path can't be processed as a
|
|
646
|
-
'none' to return None in this case, 'original' to return the original path.
|
|
653
|
+
failure_behavior (str, optional): what to do if the path can't be processed as a
|
|
654
|
+
WSL path. 'none' to return None in this case, 'original' to return the original path.
|
|
647
655
|
|
|
648
656
|
Returns:
|
|
649
657
|
str: Windows equivalent to the WSL path [filename]
|
|
@@ -690,7 +698,7 @@ def windows_path_to_wsl_path(filename, failure_behavior='none'):
|
|
|
690
698
|
|
|
691
699
|
Args:
|
|
692
700
|
filename (str): filename to convert
|
|
693
|
-
failure_behavior (str): what to do if the path can't be processed as a Windows path.
|
|
701
|
+
failure_behavior (str, optional): what to do if the path can't be processed as a Windows path.
|
|
694
702
|
'none' to return None in this case, 'original' to return the original path.
|
|
695
703
|
|
|
696
704
|
Returns:
|
|
@@ -841,7 +849,9 @@ def open_file_in_chrome(filename):
|
|
|
841
849
|
return False
|
|
842
850
|
|
|
843
851
|
|
|
844
|
-
def open_file(filename,
|
|
852
|
+
def open_file(filename,
|
|
853
|
+
attempt_to_open_in_wsl_host=False,
|
|
854
|
+
browser_name=None):
|
|
845
855
|
"""
|
|
846
856
|
Opens [filename] in the default OS file handler for this file type.
|
|
847
857
|
|
|
@@ -855,9 +865,9 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
|
|
|
855
865
|
|
|
856
866
|
Args:
|
|
857
867
|
filename (str): file to open
|
|
858
|
-
attempt_to_open_in_wsl_host: if this is True, and we're in WSL, attempts
|
|
859
|
-
[filename] in the Windows host environment
|
|
860
|
-
browser_name: see above
|
|
868
|
+
attempt_to_open_in_wsl_host (bool, optional): if this is True, and we're in WSL, attempts
|
|
869
|
+
to open [filename] in the Windows host environment
|
|
870
|
+
browser_name (str, optional): see above
|
|
861
871
|
"""
|
|
862
872
|
|
|
863
873
|
if browser_name is not None:
|
|
@@ -1148,7 +1158,7 @@ def parallel_get_file_sizes(filenames,
|
|
|
1148
1158
|
|
|
1149
1159
|
#%% Compression (zip/tar) functions
|
|
1150
1160
|
|
|
1151
|
-
def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False,
|
|
1161
|
+
def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compress_level=9):
|
|
1152
1162
|
"""
|
|
1153
1163
|
Zips a single file.
|
|
1154
1164
|
|
|
@@ -1158,7 +1168,7 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
|
|
|
1158
1168
|
[input_fn].zip
|
|
1159
1169
|
overwrite (bool, optional): whether to overwrite an existing target file
|
|
1160
1170
|
verbose (bool, optional): enable existing debug console output
|
|
1161
|
-
|
|
1171
|
+
compress_level (int, optional): compression level to use, between 0 and 9
|
|
1162
1172
|
|
|
1163
1173
|
Returns:
|
|
1164
1174
|
str: the output zipfile, whether we created it or determined that it already exists
|
|
@@ -1174,10 +1184,12 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
|
|
|
1174
1184
|
return output_fn
|
|
1175
1185
|
|
|
1176
1186
|
if verbose:
|
|
1177
|
-
print('Zipping {} to {} with level {}'.format(input_fn,output_fn,
|
|
1187
|
+
print('Zipping {} to {} with level {}'.format(input_fn,output_fn,compress_level))
|
|
1178
1188
|
|
|
1179
1189
|
with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
|
|
1180
|
-
zipf.write(input_fn,
|
|
1190
|
+
zipf.write(input_fn,
|
|
1191
|
+
arcname=basename,
|
|
1192
|
+
compresslevel=compress_level,
|
|
1181
1193
|
compress_type=zipfile.ZIP_DEFLATED)
|
|
1182
1194
|
|
|
1183
1195
|
return output_fn
|
|
@@ -1223,8 +1235,12 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
|
1223
1235
|
return output_fn
|
|
1224
1236
|
|
|
1225
1237
|
|
|
1226
|
-
def zip_files_into_single_zipfile(input_files,
|
|
1227
|
-
|
|
1238
|
+
def zip_files_into_single_zipfile(input_files,
|
|
1239
|
+
output_fn,
|
|
1240
|
+
arc_name_base,
|
|
1241
|
+
overwrite=False,
|
|
1242
|
+
verbose=False,
|
|
1243
|
+
compress_level=9):
|
|
1228
1244
|
"""
|
|
1229
1245
|
Zip all the files in [input_files] into [output_fn]. Archive names are relative to
|
|
1230
1246
|
arc_name_base.
|
|
@@ -1237,7 +1253,7 @@ def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
|
|
|
1237
1253
|
[arc_name_base]
|
|
1238
1254
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1239
1255
|
verbose (bool, optional): enable additional debug console output
|
|
1240
|
-
|
|
1256
|
+
compress_level (int, optional): compression level to use, between 0 and 9
|
|
1241
1257
|
|
|
1242
1258
|
Returns:
|
|
1243
1259
|
str: the output zipfile, whether we created it or determined that it already exists
|
|
@@ -1250,20 +1266,20 @@ def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
|
|
|
1250
1266
|
|
|
1251
1267
|
if verbose:
|
|
1252
1268
|
print('Zipping {} files to {} (compression level {})'.format(
|
|
1253
|
-
len(input_files),output_fn,
|
|
1269
|
+
len(input_files),output_fn,compress_level))
|
|
1254
1270
|
|
|
1255
1271
|
with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
|
|
1256
1272
|
for input_fn_abs in tqdm(input_files,disable=(not verbose)):
|
|
1257
1273
|
input_fn_relative = os.path.relpath(input_fn_abs,arc_name_base)
|
|
1258
1274
|
zipf.write(input_fn_abs,
|
|
1259
1275
|
arcname=input_fn_relative,
|
|
1260
|
-
compresslevel=
|
|
1276
|
+
compresslevel=compress_level,
|
|
1261
1277
|
compress_type=zipfile.ZIP_DEFLATED)
|
|
1262
1278
|
|
|
1263
1279
|
return output_fn
|
|
1264
1280
|
|
|
1265
1281
|
|
|
1266
|
-
def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False,
|
|
1282
|
+
def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, compress_level=9):
|
|
1267
1283
|
"""
|
|
1268
1284
|
Recursively zip everything in [input_folder] into a single zipfile, storing files as paths
|
|
1269
1285
|
relative to [input_folder].
|
|
@@ -1273,7 +1289,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
1273
1289
|
output_fn (str, optional): output filename; if this is None, we'll write to [input_folder].zip
|
|
1274
1290
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1275
1291
|
verbose (bool, optional): enable additional debug console output
|
|
1276
|
-
|
|
1292
|
+
compress_level (int, optional): compression level to use, between 0 and 9
|
|
1277
1293
|
|
|
1278
1294
|
Returns:
|
|
1279
1295
|
str: the output zipfile, whether we created it or determined that it already exists
|
|
@@ -1289,7 +1305,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
1289
1305
|
|
|
1290
1306
|
if verbose:
|
|
1291
1307
|
print('Zipping {} to {} (compression level {})'.format(
|
|
1292
|
-
input_folder,output_fn,
|
|
1308
|
+
input_folder,output_fn,compress_level))
|
|
1293
1309
|
|
|
1294
1310
|
relative_filenames = recursive_file_list(input_folder,return_relative_paths=True)
|
|
1295
1311
|
|
|
@@ -1298,7 +1314,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
1298
1314
|
input_fn_abs = os.path.join(input_folder,input_fn_relative)
|
|
1299
1315
|
zipf.write(input_fn_abs,
|
|
1300
1316
|
arcname=input_fn_relative,
|
|
1301
|
-
compresslevel=
|
|
1317
|
+
compresslevel=compress_level,
|
|
1302
1318
|
compress_type=zipfile.ZIP_DEFLATED)
|
|
1303
1319
|
|
|
1304
1320
|
return output_fn
|
|
@@ -1307,7 +1323,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
1307
1323
|
def parallel_zip_files(input_files,
|
|
1308
1324
|
max_workers=16,
|
|
1309
1325
|
use_threads=True,
|
|
1310
|
-
|
|
1326
|
+
compress_level=9,
|
|
1311
1327
|
overwrite=False,
|
|
1312
1328
|
verbose=False):
|
|
1313
1329
|
"""
|
|
@@ -1315,11 +1331,11 @@ def parallel_zip_files(input_files,
|
|
|
1315
1331
|
original files in place. Each file is zipped to [filename].zip.
|
|
1316
1332
|
|
|
1317
1333
|
Args:
|
|
1318
|
-
|
|
1334
|
+
input_files (str): list of files to zip
|
|
1319
1335
|
max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
|
|
1320
1336
|
use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
|
|
1321
1337
|
max_workers <= 1
|
|
1322
|
-
|
|
1338
|
+
compress_level (int, optional): zip compression level between 0 and 9
|
|
1323
1339
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1324
1340
|
verbose (bool, optional): enable additional debug console output
|
|
1325
1341
|
"""
|
|
@@ -1333,23 +1349,27 @@ def parallel_zip_files(input_files,
|
|
|
1333
1349
|
|
|
1334
1350
|
with tqdm(total=len(input_files)) as pbar:
|
|
1335
1351
|
for i,_ in enumerate(pool.imap_unordered(partial(zip_file,
|
|
1336
|
-
output_fn=None,overwrite=overwrite,verbose=verbose,
|
|
1352
|
+
output_fn=None,overwrite=overwrite,verbose=verbose,compress_level=compress_level),
|
|
1337
1353
|
input_files)):
|
|
1338
1354
|
pbar.update()
|
|
1339
1355
|
|
|
1340
1356
|
|
|
1341
|
-
def parallel_zip_folders(input_folders,
|
|
1342
|
-
|
|
1357
|
+
def parallel_zip_folders(input_folders,
|
|
1358
|
+
max_workers=16,
|
|
1359
|
+
use_threads=True,
|
|
1360
|
+
compress_level=9,
|
|
1361
|
+
overwrite=False,
|
|
1362
|
+
verbose=False):
|
|
1343
1363
|
"""
|
|
1344
1364
|
Zips one or more folders to separate output files in parallel, leaving the
|
|
1345
1365
|
original folders in place. Each folder is zipped to [folder_name].zip.
|
|
1346
1366
|
|
|
1347
1367
|
Args:
|
|
1348
|
-
|
|
1368
|
+
input_folders (list): list of folders to zip
|
|
1349
1369
|
max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
|
|
1350
1370
|
use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
|
|
1351
1371
|
max_workers <= 1
|
|
1352
|
-
|
|
1372
|
+
compress_level (int, optional): zip compression level between 0 and 9
|
|
1353
1373
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1354
1374
|
verbose (bool, optional): enable additional debug console output
|
|
1355
1375
|
"""
|
|
@@ -1364,13 +1384,19 @@ def parallel_zip_folders(input_folders, max_workers=16, use_threads=True,
|
|
|
1364
1384
|
with tqdm(total=len(input_folders)) as pbar:
|
|
1365
1385
|
for i,_ in enumerate(pool.imap_unordered(
|
|
1366
1386
|
partial(zip_folder,overwrite=overwrite,
|
|
1367
|
-
|
|
1387
|
+
compress_level=compress_level,verbose=verbose),
|
|
1368
1388
|
input_folders)):
|
|
1369
1389
|
pbar.update()
|
|
1370
1390
|
|
|
1371
1391
|
|
|
1372
|
-
def zip_each_file_in_folder(folder_name,
|
|
1373
|
-
|
|
1392
|
+
def zip_each_file_in_folder(folder_name,
|
|
1393
|
+
recursive=False,
|
|
1394
|
+
max_workers=16,
|
|
1395
|
+
use_threads=True,
|
|
1396
|
+
compress_level=9,
|
|
1397
|
+
overwrite=False,
|
|
1398
|
+
required_token=None,
|
|
1399
|
+
verbose=False,
|
|
1374
1400
|
exclude_zip=True):
|
|
1375
1401
|
"""
|
|
1376
1402
|
Zips each file in [folder_name] to its own zipfile (filename.zip), optionally recursing. To
|
|
@@ -1382,7 +1408,7 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
|
|
|
1382
1408
|
max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
|
|
1383
1409
|
use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
|
|
1384
1410
|
max_workers <= 1
|
|
1385
|
-
|
|
1411
|
+
compress_level (int, optional): zip compression level between 0 and 9
|
|
1386
1412
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1387
1413
|
required_token (str, optional): only zip files whose names contain this string
|
|
1388
1414
|
verbose (bool, optional): enable additional debug console output
|
|
@@ -1400,7 +1426,7 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
|
|
|
1400
1426
|
input_files = [fn for fn in input_files if (not fn.endswith('.zip'))]
|
|
1401
1427
|
|
|
1402
1428
|
parallel_zip_files(input_files=input_files,max_workers=max_workers,
|
|
1403
|
-
use_threads=use_threads,
|
|
1429
|
+
use_threads=use_threads,compress_level=compress_level,
|
|
1404
1430
|
overwrite=overwrite,verbose=verbose)
|
|
1405
1431
|
|
|
1406
1432
|
|
|
@@ -1435,6 +1461,8 @@ def compute_file_hash(file_path, algorithm='sha256', allow_failures=True):
|
|
|
1435
1461
|
Args:
|
|
1436
1462
|
file_path (str): the file to hash
|
|
1437
1463
|
algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
|
|
1464
|
+
allow_failures (bool, optional): if True, read failures will silently return
|
|
1465
|
+
None; if false, read failures will raise exceptions
|
|
1438
1466
|
|
|
1439
1467
|
Returns:
|
|
1440
1468
|
str: the hash value for this file
|
|
@@ -1584,7 +1612,7 @@ class TestPathUtils:
|
|
|
1584
1612
|
"""
|
|
1585
1613
|
Test the find_images function.
|
|
1586
1614
|
"""
|
|
1587
|
-
|
|
1615
|
+
|
|
1588
1616
|
# Create some dummy files
|
|
1589
1617
|
img1_abs = os.path.join(self.test_dir, 'img1.jpg')
|
|
1590
1618
|
img2_abs = os.path.join(self.test_dir, 'img2.PNG')
|
|
@@ -1609,7 +1637,7 @@ class TestPathUtils:
|
|
|
1609
1637
|
expected_non_recursive_rel = sorted(['img1.jpg', 'img2.PNG'])
|
|
1610
1638
|
found_non_recursive_rel = find_images(self.test_dir, recursive=False, return_relative_paths=True)
|
|
1611
1639
|
assert sorted(found_non_recursive_rel) == expected_non_recursive_rel
|
|
1612
|
-
|
|
1640
|
+
|
|
1613
1641
|
# Test recursive
|
|
1614
1642
|
expected_recursive_abs = sorted([
|
|
1615
1643
|
img1_abs.replace('\\', '/'),
|
|
@@ -1632,7 +1660,7 @@ class TestPathUtils:
|
|
|
1632
1660
|
empty_dir = os.path.join(self.test_dir, 'empty_dir')
|
|
1633
1661
|
os.makedirs(empty_dir, exist_ok=True)
|
|
1634
1662
|
assert find_images(empty_dir, recursive=True) == []
|
|
1635
|
-
|
|
1663
|
+
|
|
1636
1664
|
# Test with a directory that doesn't exist (should assert)
|
|
1637
1665
|
try:
|
|
1638
1666
|
find_images(os.path.join(self.test_dir, 'non_existent_dir'))
|
|
@@ -1656,7 +1684,7 @@ class TestPathUtils:
|
|
|
1656
1684
|
# file4.png
|
|
1657
1685
|
# subdir2/
|
|
1658
1686
|
# file5.doc
|
|
1659
|
-
|
|
1687
|
+
|
|
1660
1688
|
list_dir = os.path.join(self.test_dir,'recursive_list')
|
|
1661
1689
|
|
|
1662
1690
|
f1 = os.path.join(list_dir, 'file1.txt')
|
|
@@ -1677,32 +1705,32 @@ class TestPathUtils:
|
|
|
1677
1705
|
|
|
1678
1706
|
# Test recursive_file_list (recursive=True by default)
|
|
1679
1707
|
expected_all_files_abs = sorted([
|
|
1680
|
-
f1.replace('\\', '/'), f2.replace('\\', '/'), f3.replace('\\', '/'),
|
|
1708
|
+
f1.replace('\\', '/'), f2.replace('\\', '/'), f3.replace('\\', '/'),
|
|
1681
1709
|
f4.replace('\\', '/'), f5.replace('\\', '/')
|
|
1682
1710
|
])
|
|
1683
|
-
all_files_abs = recursive_file_list(list_dir, convert_slashes=True,
|
|
1711
|
+
all_files_abs = recursive_file_list(list_dir, convert_slashes=True,
|
|
1684
1712
|
return_relative_paths=False)
|
|
1685
1713
|
assert sorted(all_files_abs) == expected_all_files_abs
|
|
1686
1714
|
|
|
1687
1715
|
# Test recursive_file_list with relative paths
|
|
1688
1716
|
expected_all_files_rel = sorted([
|
|
1689
|
-
'file1.txt', 'file2.jpg',
|
|
1717
|
+
'file1.txt', 'file2.jpg',
|
|
1690
1718
|
os.path.join('subdir1', 'file3.txt').replace('\\', '/'),
|
|
1691
1719
|
os.path.join('subdir1', 'subsubdir', 'file4.png').replace('\\', '/'),
|
|
1692
1720
|
os.path.join('subdir2', 'file5.doc').replace('\\', '/')
|
|
1693
1721
|
])
|
|
1694
|
-
all_files_rel = recursive_file_list(list_dir, convert_slashes=True,
|
|
1722
|
+
all_files_rel = recursive_file_list(list_dir, convert_slashes=True,
|
|
1695
1723
|
return_relative_paths=True)
|
|
1696
1724
|
assert sorted(all_files_rel) == expected_all_files_rel
|
|
1697
1725
|
|
|
1698
1726
|
# Test file_list (non-recursive by default via wrapper)
|
|
1699
1727
|
expected_top_level_files_abs = sorted([f1.replace('\\', '/'), f2.replace('\\', '/')])
|
|
1700
|
-
top_level_files_abs = file_list(list_dir, convert_slashes=True,
|
|
1728
|
+
top_level_files_abs = file_list(list_dir, convert_slashes=True,
|
|
1701
1729
|
return_relative_paths=False, recursive=False)
|
|
1702
1730
|
assert sorted(top_level_files_abs) == expected_top_level_files_abs
|
|
1703
|
-
|
|
1731
|
+
|
|
1704
1732
|
# Test file_list (recursive explicitly) - should be same as recursive_file_list
|
|
1705
|
-
recursive_via_file_list = file_list(list_dir, convert_slashes=True,
|
|
1733
|
+
recursive_via_file_list = file_list(list_dir, convert_slashes=True,
|
|
1706
1734
|
return_relative_paths=False, recursive=True)
|
|
1707
1735
|
assert sorted(recursive_via_file_list) == expected_all_files_abs
|
|
1708
1736
|
|
|
@@ -1757,33 +1785,33 @@ class TestPathUtils:
|
|
|
1757
1785
|
expected_folders_non_recursive_abs = sorted([
|
|
1758
1786
|
subdir1.replace('\\', '/'), subdir2.replace('\\', '/')
|
|
1759
1787
|
])
|
|
1760
|
-
folders_non_recursive_abs = folder_list(folder_list_dir, recursive=False,
|
|
1788
|
+
folders_non_recursive_abs = folder_list(folder_list_dir, recursive=False,
|
|
1761
1789
|
return_relative_paths=False)
|
|
1762
1790
|
assert sorted(folders_non_recursive_abs) == expected_folders_non_recursive_abs
|
|
1763
1791
|
|
|
1764
1792
|
# Test non-recursive, relative paths
|
|
1765
1793
|
expected_folders_non_recursive_rel = sorted(['subdir1', 'subdir2'])
|
|
1766
|
-
folders_non_recursive_rel = folder_list(folder_list_dir, recursive=False,
|
|
1794
|
+
folders_non_recursive_rel = folder_list(folder_list_dir, recursive=False,
|
|
1767
1795
|
return_relative_paths=True)
|
|
1768
1796
|
assert sorted(folders_non_recursive_rel) == expected_folders_non_recursive_rel
|
|
1769
1797
|
|
|
1770
1798
|
# Test recursive
|
|
1771
1799
|
expected_folders_recursive_abs = sorted([
|
|
1772
|
-
subdir1.replace('\\', '/'),
|
|
1773
|
-
subsubdir1.replace('\\', '/'),
|
|
1800
|
+
subdir1.replace('\\', '/'),
|
|
1801
|
+
subsubdir1.replace('\\', '/'),
|
|
1774
1802
|
subdir2.replace('\\', '/')
|
|
1775
1803
|
])
|
|
1776
|
-
folders_recursive_abs = folder_list(folder_list_dir, recursive=True,
|
|
1804
|
+
folders_recursive_abs = folder_list(folder_list_dir, recursive=True,
|
|
1777
1805
|
return_relative_paths=False)
|
|
1778
1806
|
assert sorted(folders_recursive_abs) == expected_folders_recursive_abs
|
|
1779
1807
|
|
|
1780
1808
|
# Test recursive, relative paths
|
|
1781
1809
|
expected_folders_recursive_rel = sorted([
|
|
1782
|
-
'subdir1',
|
|
1783
|
-
os.path.join('subdir1', 'subsubdir1').replace('\\', '/'),
|
|
1810
|
+
'subdir1',
|
|
1811
|
+
os.path.join('subdir1', 'subsubdir1').replace('\\', '/'),
|
|
1784
1812
|
'subdir2'
|
|
1785
1813
|
])
|
|
1786
|
-
folders_recursive_rel = folder_list(folder_list_dir, recursive=True,
|
|
1814
|
+
folders_recursive_rel = folder_list(folder_list_dir, recursive=True,
|
|
1787
1815
|
return_relative_paths=True)
|
|
1788
1816
|
assert sorted(folders_recursive_rel) == expected_folders_recursive_rel
|
|
1789
1817
|
|
|
@@ -1793,7 +1821,7 @@ class TestPathUtils:
|
|
|
1793
1821
|
with open(os.path.join(empty_dir_for_folders, 'temp.txt'), 'w') as f: f.write('t')
|
|
1794
1822
|
assert folder_list(empty_dir_for_folders, recursive=True) == []
|
|
1795
1823
|
assert folder_list(empty_dir_for_folders, recursive=False) == []
|
|
1796
|
-
|
|
1824
|
+
|
|
1797
1825
|
# Test with a non-existent directory
|
|
1798
1826
|
try:
|
|
1799
1827
|
folder_list(os.path.join(self.test_dir, "non_existent_dir"))
|
|
@@ -1815,11 +1843,11 @@ class TestPathUtils:
|
|
|
1815
1843
|
# img2.png
|
|
1816
1844
|
# img3.png
|
|
1817
1845
|
|
|
1818
|
-
|
|
1846
|
+
folder_summary_dir = os.path.join(self.test_dir,'folder_summary')
|
|
1819
1847
|
|
|
1820
|
-
f1 = os.path.join(
|
|
1821
|
-
img1 = os.path.join(
|
|
1822
|
-
subdir = os.path.join(
|
|
1848
|
+
f1 = os.path.join(folder_summary_dir, 'file1.txt')
|
|
1849
|
+
img1 = os.path.join(folder_summary_dir, 'img1.jpg')
|
|
1850
|
+
subdir = os.path.join(folder_summary_dir, 'subdir')
|
|
1823
1851
|
os.makedirs(subdir, exist_ok=True)
|
|
1824
1852
|
f2 = os.path.join(subdir, 'file2.txt')
|
|
1825
1853
|
img2 = os.path.join(subdir, 'img2.png')
|
|
@@ -1828,24 +1856,24 @@ class TestPathUtils:
|
|
|
1828
1856
|
for filepath in [f1, img1, f2, img2, img3]:
|
|
1829
1857
|
with open(filepath, 'w') as f:
|
|
1830
1858
|
f.write('test')
|
|
1831
|
-
|
|
1832
|
-
summary = folder_summary(
|
|
1833
|
-
|
|
1859
|
+
|
|
1860
|
+
summary = folder_summary(folder_summary_dir, print_summary=False)
|
|
1861
|
+
|
|
1834
1862
|
assert summary['n_files'] == 5
|
|
1835
1863
|
assert summary['n_folders'] == 1 # 'subdir'
|
|
1836
1864
|
assert summary['extension_to_count']['.txt'] == 2
|
|
1837
1865
|
assert summary['extension_to_count']['.jpg'] == 1
|
|
1838
1866
|
assert summary['extension_to_count']['.png'] == 2
|
|
1839
|
-
|
|
1867
|
+
|
|
1840
1868
|
# Check order (sorted by value, desc)
|
|
1841
|
-
|
|
1842
|
-
# The specific order of keys with the same counts can vary based on file system list
|
|
1843
|
-
|
|
1844
|
-
|
|
1869
|
+
#
|
|
1870
|
+
# The specific order of keys with the same counts can vary based on file system list
|
|
1871
|
+
# order. We'll check that the counts are correct and the number of unique extensions is
|
|
1872
|
+
# right.
|
|
1845
1873
|
assert len(summary['extension_to_count']) == 3
|
|
1846
1874
|
|
|
1847
1875
|
|
|
1848
|
-
empty_dir = os.path.join(
|
|
1876
|
+
empty_dir = os.path.join(folder_summary_dir, "empty_summary_dir")
|
|
1849
1877
|
os.makedirs(empty_dir, exist_ok=True)
|
|
1850
1878
|
empty_summary = folder_summary(empty_dir, print_summary=False)
|
|
1851
1879
|
assert empty_summary['n_files'] == 0
|
|
@@ -1879,10 +1907,10 @@ class TestPathUtils:
|
|
|
1879
1907
|
assert insert_before_extension('path/to/file.ext', 'tag') == 'path/to/file.tag.ext'
|
|
1880
1908
|
assert insert_before_extension('path/to/file', 'tag') == 'path/to/file.tag'
|
|
1881
1909
|
assert insert_before_extension('file.tar.gz', 'new') == 'file.tar.new.gz'
|
|
1882
|
-
|
|
1910
|
+
|
|
1883
1911
|
# Test with custom separator
|
|
1884
1912
|
assert insert_before_extension('file.ext', 'inserted', separator='_') == 'file_inserted.ext'
|
|
1885
|
-
|
|
1913
|
+
|
|
1886
1914
|
# Test with s=None (timestamp) - check format roughly
|
|
1887
1915
|
fname_with_ts = insert_before_extension('file.ext', None)
|
|
1888
1916
|
parts = fname_with_ts.split('.')
|
|
@@ -1912,10 +1940,10 @@ class TestPathUtils:
|
|
|
1912
1940
|
else: # POSIX
|
|
1913
1941
|
assert split_path('/dir/subdir/file.jpg') == ['/', 'dir', 'subdir', 'file.jpg']
|
|
1914
1942
|
assert split_path('/') == ['/']
|
|
1915
|
-
|
|
1943
|
+
|
|
1916
1944
|
assert split_path('dir/file.txt') == ['dir', 'file.txt']
|
|
1917
1945
|
assert split_path('file.txt') == ['file.txt']
|
|
1918
|
-
assert split_path('') == ''
|
|
1946
|
+
assert split_path('') == ''
|
|
1919
1947
|
assert split_path('.') == ['.']
|
|
1920
1948
|
assert split_path('..') == ['..']
|
|
1921
1949
|
assert split_path('../a/b') == ['..', 'a', 'b']
|
|
@@ -1971,7 +1999,7 @@ class TestPathUtils:
|
|
|
1971
1999
|
safe_create_link(other_source_path, link_path) # Should remove and re-create
|
|
1972
2000
|
assert os.path.islink(link_path)
|
|
1973
2001
|
assert os.readlink(link_path) == other_source_path
|
|
1974
|
-
|
|
2002
|
+
|
|
1975
2003
|
# Link_new path exists and is a file (not a link)
|
|
1976
2004
|
file_path_conflict = os.path.join(self.test_dir, 'conflict_file.txt')
|
|
1977
2005
|
with open(file_path_conflict, 'w') as f:
|
|
@@ -1981,7 +2009,7 @@ class TestPathUtils:
|
|
|
1981
2009
|
raise AssertionError("AssertionError not raised for file conflict")
|
|
1982
2010
|
except AssertionError:
|
|
1983
2011
|
pass
|
|
1984
|
-
os.remove(file_path_conflict)
|
|
2012
|
+
os.remove(file_path_conflict)
|
|
1985
2013
|
|
|
1986
2014
|
# Link_new path exists and is a directory
|
|
1987
2015
|
dir_path_conflict = os.path.join(self.test_dir, 'conflict_dir')
|
|
@@ -2010,7 +2038,7 @@ class TestPathUtils:
|
|
|
2010
2038
|
# file.txt
|
|
2011
2039
|
# non_empty_top/
|
|
2012
2040
|
# file_in_top.txt
|
|
2013
|
-
|
|
2041
|
+
|
|
2014
2042
|
empty_top = os.path.join(self.test_dir, 'empty_top')
|
|
2015
2043
|
empty_mid = os.path.join(empty_top, 'empty_mid')
|
|
2016
2044
|
empty_leaf = os.path.join(empty_mid, 'empty_leaf')
|
|
@@ -2044,7 +2072,7 @@ class TestPathUtils:
|
|
|
2044
2072
|
assert not os.path.exists(empty_leaf_in_mixed)
|
|
2045
2073
|
assert os.path.exists(non_empty_mid)
|
|
2046
2074
|
assert os.path.exists(os.path.join(non_empty_mid, 'file.txt'))
|
|
2047
|
-
|
|
2075
|
+
|
|
2048
2076
|
# Process non_empty_top; should remove nothing.
|
|
2049
2077
|
remove_empty_folders(non_empty_top, remove_root=True)
|
|
2050
2078
|
assert os.path.exists(non_empty_top)
|
|
@@ -2081,7 +2109,7 @@ class TestPathUtils:
|
|
|
2081
2109
|
else:
|
|
2082
2110
|
# On POSIX, os.path.join uses '/', so convert_slashes=False should still be '/'
|
|
2083
2111
|
assert path_join('a', 'b', convert_slashes=False) == 'a/b'
|
|
2084
|
-
|
|
2112
|
+
|
|
2085
2113
|
assert path_join('a', '', 'b') == 'a/b' # os.path.join behavior
|
|
2086
2114
|
assert path_join('/a', 'b') == '/a/b'
|
|
2087
2115
|
assert path_join('a', '/b') == '/b' # '/b' is absolute
|
|
@@ -2107,7 +2135,7 @@ class TestPathUtils:
|
|
|
2107
2135
|
assert clean_path("path\\to\\file.txt") == "path\\to\\file.txt" # backslashes allowed
|
|
2108
2136
|
assert clean_path("path:to:file.txt") == "path:to:file.txt" # colons allowed
|
|
2109
2137
|
assert clean_path("path/to<illegal>/file.txt") == "path/toillegal/file.txt"
|
|
2110
|
-
|
|
2138
|
+
|
|
2111
2139
|
# flatten_path
|
|
2112
2140
|
assert flatten_path("path/to/file.txt") == "path~to~file.txt"
|
|
2113
2141
|
assert flatten_path("path:to:file.txt", separator_char_replacement='_') == "path_to_file.txt"
|
|
@@ -2137,7 +2165,7 @@ class TestPathUtils:
|
|
|
2137
2165
|
"""
|
|
2138
2166
|
|
|
2139
2167
|
test_list = ["item1", "item2 with space", "item3/with/slash"]
|
|
2140
|
-
|
|
2168
|
+
|
|
2141
2169
|
# Test with .json
|
|
2142
2170
|
json_file_path = os.path.join(self.test_dir, "test_list.json")
|
|
2143
2171
|
write_list_to_file(json_file_path, test_list)
|
|
@@ -2194,7 +2222,7 @@ class TestPathUtils:
|
|
|
2194
2222
|
content = f"content of file {i}"
|
|
2195
2223
|
with open(src_path, 'w') as f:
|
|
2196
2224
|
f.write(content)
|
|
2197
|
-
|
|
2225
|
+
|
|
2198
2226
|
file_mappings[src_path] = tgt_path
|
|
2199
2227
|
source_files_content[tgt_path] = content
|
|
2200
2228
|
|
|
@@ -2204,11 +2232,11 @@ class TestPathUtils:
|
|
|
2204
2232
|
assert os.path.exists(tgt_path)
|
|
2205
2233
|
with open(tgt_path, 'r') as f:
|
|
2206
2234
|
assert f.read() == expected_content
|
|
2207
|
-
|
|
2235
|
+
|
|
2208
2236
|
existing_target_path = list(source_files_content.keys())[0]
|
|
2209
2237
|
with open(existing_target_path, 'w') as f:
|
|
2210
2238
|
f.write("old content")
|
|
2211
|
-
|
|
2239
|
+
|
|
2212
2240
|
parallel_copy_files(file_mappings, max_workers=1, use_threads=True, overwrite=False)
|
|
2213
2241
|
with open(existing_target_path, 'r') as f:
|
|
2214
2242
|
assert f.read() == "old content"
|
|
@@ -2248,7 +2276,7 @@ class TestPathUtils:
|
|
|
2248
2276
|
content2 = "01234567890123456789" # 20 bytes
|
|
2249
2277
|
with open(f2_path, 'w') as f:
|
|
2250
2278
|
f.write(content2)
|
|
2251
|
-
|
|
2279
|
+
|
|
2252
2280
|
sizes_relative = get_file_sizes(file_sizes_test_dir)
|
|
2253
2281
|
expected_sizes_relative = {
|
|
2254
2282
|
'file1.txt': len(content1),
|
|
@@ -2264,14 +2292,19 @@ class TestPathUtils:
|
|
|
2264
2292
|
}
|
|
2265
2293
|
assert sizes_parallel_abs == expected_sizes_parallel_abs
|
|
2266
2294
|
|
|
2267
|
-
sizes_parallel_folder_abs = parallel_get_file_sizes(file_sizes_test_dir,
|
|
2295
|
+
sizes_parallel_folder_abs = parallel_get_file_sizes(file_sizes_test_dir,
|
|
2296
|
+
max_workers=1,
|
|
2297
|
+
return_relative_paths=False)
|
|
2268
2298
|
assert sizes_parallel_folder_abs == expected_sizes_parallel_abs
|
|
2269
2299
|
|
|
2270
|
-
sizes_parallel_folder_rel = parallel_get_file_sizes(file_sizes_test_dir,
|
|
2300
|
+
sizes_parallel_folder_rel = parallel_get_file_sizes(file_sizes_test_dir,
|
|
2301
|
+
max_workers=1,
|
|
2302
|
+
return_relative_paths=True)
|
|
2271
2303
|
assert sizes_parallel_folder_rel == expected_sizes_relative
|
|
2272
2304
|
|
|
2273
2305
|
non_existent_file = os.path.join(file_sizes_test_dir, "no_such_file.txt")
|
|
2274
|
-
sizes_with_error = parallel_get_file_sizes([f1_path, non_existent_file],
|
|
2306
|
+
sizes_with_error = parallel_get_file_sizes([f1_path, non_existent_file],
|
|
2307
|
+
max_workers=1)
|
|
2275
2308
|
expected_with_error = {
|
|
2276
2309
|
f1_path.replace('\\','/'): len(content1),
|
|
2277
2310
|
non_existent_file.replace('\\','/'): None
|
|
@@ -2302,7 +2335,7 @@ class TestPathUtils:
|
|
|
2302
2335
|
assert os.path.exists(unzipped_file_path_default)
|
|
2303
2336
|
with open(unzipped_file_path_default, 'r') as f:
|
|
2304
2337
|
assert f.read() == content
|
|
2305
|
-
|
|
2338
|
+
|
|
2306
2339
|
custom_zip_output_name = "custom_archive.zip"
|
|
2307
2340
|
custom_zip_output_path = os.path.join(self.test_dir, custom_zip_output_name)
|
|
2308
2341
|
zip_file(file_to_zip_path, output_fn=custom_zip_output_path, overwrite=True)
|
|
@@ -2329,7 +2362,7 @@ class TestPathUtils:
|
|
|
2329
2362
|
|
|
2330
2363
|
folder_to_zip = os.path.join(self.test_dir, "folder_to_zip")
|
|
2331
2364
|
os.makedirs(folder_to_zip, exist_ok=True)
|
|
2332
|
-
|
|
2365
|
+
|
|
2333
2366
|
file1_name = "file1.txt"; path1 = os.path.join(folder_to_zip, file1_name)
|
|
2334
2367
|
file2_name = "file2.log"; path2 = os.path.join(folder_to_zip, file2_name)
|
|
2335
2368
|
subdir_name = "sub"; subdir_path = os.path.join(folder_to_zip, subdir_name)
|
|
@@ -2355,7 +2388,7 @@ class TestPathUtils:
|
|
|
2355
2388
|
with open(os.path.join(unzip_output_dir, file1_name), 'r')as f: assert f.read() == content1
|
|
2356
2389
|
with open(os.path.join(unzip_output_dir, file2_name), 'r')as f: assert f.read() == content2
|
|
2357
2390
|
with open(os.path.join(unzip_output_dir, subdir_name, file3_name), 'r')as f: assert f.read() == content3
|
|
2358
|
-
|
|
2391
|
+
|
|
2359
2392
|
mtime_before = os.path.getmtime(default_zip_path)
|
|
2360
2393
|
zip_folder(folder_to_zip, output_fn=None, overwrite=False)
|
|
2361
2394
|
mtime_after = os.path.getmtime(default_zip_path)
|
|
@@ -2388,7 +2421,7 @@ class TestPathUtils:
|
|
|
2388
2421
|
|
|
2389
2422
|
expected_unzipped_file1 = os.path.join(unzip_dir, os.path.relpath(file1_path, self.test_dir))
|
|
2390
2423
|
expected_unzipped_file2 = os.path.join(unzip_dir, os.path.relpath(file2_path, self.test_dir))
|
|
2391
|
-
|
|
2424
|
+
|
|
2392
2425
|
assert os.path.exists(expected_unzipped_file1)
|
|
2393
2426
|
with open(expected_unzipped_file1, 'r') as f: assert f.read() == content1
|
|
2394
2427
|
assert os.path.exists(expected_unzipped_file2)
|
|
@@ -2415,7 +2448,7 @@ class TestPathUtils:
|
|
|
2415
2448
|
input_files = [file1_path, file2_path]
|
|
2416
2449
|
output_tar_path = os.path.join(self.test_dir, "archive.tar.gz")
|
|
2417
2450
|
|
|
2418
|
-
add_files_to_single_tar_file(input_files, output_tar_path, arc_name_base=self.test_dir,
|
|
2451
|
+
add_files_to_single_tar_file(input_files, output_tar_path, arc_name_base=self.test_dir,
|
|
2419
2452
|
overwrite=True, mode='x:gz')
|
|
2420
2453
|
assert os.path.exists(output_tar_path)
|
|
2421
2454
|
|
|
@@ -2442,7 +2475,7 @@ class TestPathUtils:
|
|
|
2442
2475
|
file2_to_zip = os.path.join(self.test_dir, "pz_file2.txt")
|
|
2443
2476
|
with open(file1_to_zip, 'w') as f: f.write("pz_content1")
|
|
2444
2477
|
with open(file2_to_zip, 'w') as f: f.write("pz_content2")
|
|
2445
|
-
|
|
2478
|
+
|
|
2446
2479
|
parallel_zip_files([file1_to_zip, file2_to_zip], max_workers=1, overwrite=True)
|
|
2447
2480
|
assert os.path.exists(file1_to_zip + ".zip")
|
|
2448
2481
|
assert os.path.exists(file2_to_zip + ".zip")
|
|
@@ -2475,13 +2508,13 @@ class TestPathUtils:
|
|
|
2475
2508
|
|
|
2476
2509
|
for p_path in [zef_file1, zef_file2_png, zef_file3_zip, zef_file_in_sub]:
|
|
2477
2510
|
with open(p_path, 'w') as f: f.write(f"content of {os.path.basename(p_path)}")
|
|
2478
|
-
|
|
2511
|
+
|
|
2479
2512
|
zip_each_file_in_folder(zef_folder, recursive=False, max_workers=1, overwrite=True)
|
|
2480
2513
|
assert os.path.exists(zef_file1 + ".zip")
|
|
2481
2514
|
assert os.path.exists(zef_file2_png + ".zip")
|
|
2482
|
-
assert not os.path.exists(zef_file3_zip + ".zip")
|
|
2483
|
-
assert not os.path.exists(zef_file_in_sub + ".zip")
|
|
2484
|
-
|
|
2515
|
+
assert not os.path.exists(zef_file3_zip + ".zip")
|
|
2516
|
+
assert not os.path.exists(zef_file_in_sub + ".zip")
|
|
2517
|
+
|
|
2485
2518
|
if os.path.exists(zef_file1 + ".zip"): os.remove(zef_file1 + ".zip")
|
|
2486
2519
|
if os.path.exists(zef_file2_png + ".zip"): os.remove(zef_file2_png + ".zip")
|
|
2487
2520
|
|
|
@@ -2498,13 +2531,13 @@ class TestPathUtils:
|
|
|
2498
2531
|
assert os.path.exists(zef_file1 + ".zip")
|
|
2499
2532
|
assert not os.path.exists(zef_file2_png + ".zip")
|
|
2500
2533
|
assert not os.path.exists(zef_file_in_sub + ".zip")
|
|
2501
|
-
|
|
2534
|
+
|
|
2502
2535
|
if os.path.exists(zef_file1 + ".zip"): os.remove(zef_file1 + ".zip")
|
|
2503
2536
|
dummy_to_zip = os.path.join(zef_folder,"dummy.txt")
|
|
2504
2537
|
with open(dummy_to_zip,'w') as f: f.write('d')
|
|
2505
2538
|
zip_each_file_in_folder(zef_folder, recursive=False, exclude_zip=False, max_workers=1, overwrite=True)
|
|
2506
2539
|
assert os.path.exists(dummy_to_zip + ".zip")
|
|
2507
|
-
assert os.path.exists(zef_file3_zip + ".zip")
|
|
2540
|
+
assert os.path.exists(zef_file3_zip + ".zip")
|
|
2508
2541
|
if os.path.exists(dummy_to_zip + ".zip"): os.remove(dummy_to_zip + ".zip")
|
|
2509
2542
|
if os.path.exists(zef_file3_zip + ".zip"): os.remove(zef_file3_zip + ".zip")
|
|
2510
2543
|
|
|
@@ -2520,12 +2553,12 @@ class TestPathUtils:
|
|
|
2520
2553
|
with open(file1_path, 'w') as f:
|
|
2521
2554
|
f.write(content1)
|
|
2522
2555
|
|
|
2523
|
-
file2_name = "hash_me2.txt"
|
|
2556
|
+
file2_name = "hash_me2.txt"
|
|
2524
2557
|
file2_path = os.path.join(self.test_dir, file2_name)
|
|
2525
2558
|
with open(file2_path, 'w') as f:
|
|
2526
|
-
f.write(content1)
|
|
2559
|
+
f.write(content1)
|
|
2527
2560
|
|
|
2528
|
-
file3_name = "hash_me3.txt"
|
|
2561
|
+
file3_name = "hash_me3.txt"
|
|
2529
2562
|
file3_path = os.path.join(self.test_dir, file3_name)
|
|
2530
2563
|
content3 = "This is a different test string for hashing."
|
|
2531
2564
|
with open(file3_path, 'w') as f:
|
|
@@ -2558,7 +2591,7 @@ class TestPathUtils:
|
|
|
2558
2591
|
|
|
2559
2592
|
files_to_hash = [file1_path, file3_path, non_existent_path]
|
|
2560
2593
|
hashes_parallel = parallel_compute_file_hashes(files_to_hash, max_workers=1)
|
|
2561
|
-
|
|
2594
|
+
|
|
2562
2595
|
norm_f1 = file1_path.replace('\\','/')
|
|
2563
2596
|
norm_f3 = file3_path.replace('\\','/')
|
|
2564
2597
|
norm_non = non_existent_path.replace('\\','/')
|
|
@@ -2623,6 +2656,3 @@ def test_path_utils():
|
|
|
2623
2656
|
test_instance.test_compute_file_hash()
|
|
2624
2657
|
finally:
|
|
2625
2658
|
test_instance.tear_down()
|
|
2626
|
-
|
|
2627
|
-
# from IPython import embed; embed()
|
|
2628
|
-
# test_path_utils()
|