megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +232 -223
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +341 -338
- megadetector/detection/pytorch_detector.py +308 -266
- megadetector/detection/run_detector.py +186 -166
- megadetector/detection/run_detector_batch.py +366 -364
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +312 -253
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +291 -283
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +808 -311
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +220 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -0
- megadetector/postprocessing/load_api_results.py +25 -22
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -302
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1019 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1511 -406
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +73 -60
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2868
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +424 -404
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +126 -98
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.27.dist-info/RECORD +0 -208
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
megadetector/utils/path_utils.py
CHANGED
|
@@ -24,6 +24,7 @@ import tarfile
|
|
|
24
24
|
import webbrowser
|
|
25
25
|
import subprocess
|
|
26
26
|
import re
|
|
27
|
+
import tempfile
|
|
27
28
|
|
|
28
29
|
from zipfile import ZipFile
|
|
29
30
|
from datetime import datetime
|
|
@@ -34,6 +35,7 @@ from shutil import which
|
|
|
34
35
|
from tqdm import tqdm
|
|
35
36
|
|
|
36
37
|
from megadetector.utils.ct_utils import is_iterable
|
|
38
|
+
from megadetector.utils.ct_utils import make_test_folder
|
|
37
39
|
from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
38
40
|
|
|
39
41
|
# Should all be lower-case
|
|
@@ -47,14 +49,14 @@ CHAR_LIMIT = 255
|
|
|
47
49
|
|
|
48
50
|
#%% General path functions
|
|
49
51
|
|
|
50
|
-
def recursive_file_list(base_dir,
|
|
51
|
-
convert_slashes=True,
|
|
52
|
-
return_relative_paths=False,
|
|
52
|
+
def recursive_file_list(base_dir,
|
|
53
|
+
convert_slashes=True,
|
|
54
|
+
return_relative_paths=False,
|
|
53
55
|
sort_files=True,
|
|
54
56
|
recursive=True):
|
|
55
57
|
r"""
|
|
56
58
|
Enumerates files (not directories) in [base_dir].
|
|
57
|
-
|
|
59
|
+
|
|
58
60
|
Args:
|
|
59
61
|
base_dir (str): folder to enumerate
|
|
60
62
|
convert_slashes (bool, optional): force forward slashes; if this is False, will use
|
|
@@ -64,13 +66,13 @@ def recursive_file_list(base_dir,
|
|
|
64
66
|
sort_files (bool, optional): force files to be sorted, otherwise uses the sorting
|
|
65
67
|
provided by os.walk()
|
|
66
68
|
recursive (bool, optional): enumerate recursively
|
|
67
|
-
|
|
69
|
+
|
|
68
70
|
Returns:
|
|
69
71
|
list: list of filenames
|
|
70
72
|
"""
|
|
71
|
-
|
|
73
|
+
|
|
72
74
|
assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
|
|
73
|
-
|
|
75
|
+
|
|
74
76
|
all_files = []
|
|
75
77
|
|
|
76
78
|
if recursive:
|
|
@@ -82,29 +84,29 @@ def recursive_file_list(base_dir,
|
|
|
82
84
|
all_files_relative = os.listdir(base_dir)
|
|
83
85
|
all_files = [os.path.join(base_dir,fn) for fn in all_files_relative]
|
|
84
86
|
all_files = [fn for fn in all_files if os.path.isfile(fn)]
|
|
85
|
-
|
|
87
|
+
|
|
86
88
|
if return_relative_paths:
|
|
87
89
|
all_files = [os.path.relpath(fn,base_dir) for fn in all_files]
|
|
88
90
|
|
|
89
91
|
if convert_slashes:
|
|
90
92
|
all_files = [fn.replace('\\', '/') for fn in all_files]
|
|
91
|
-
|
|
93
|
+
|
|
92
94
|
if sort_files:
|
|
93
95
|
all_files = sorted(all_files)
|
|
94
|
-
|
|
96
|
+
|
|
95
97
|
return all_files
|
|
96
98
|
|
|
97
99
|
|
|
98
|
-
def file_list(base_dir,
|
|
100
|
+
def file_list(base_dir,
|
|
99
101
|
convert_slashes=True,
|
|
100
|
-
return_relative_paths=False,
|
|
101
|
-
sort_files=True,
|
|
102
|
+
return_relative_paths=False,
|
|
103
|
+
sort_files=True,
|
|
102
104
|
recursive=False):
|
|
103
105
|
"""
|
|
104
|
-
Trivial wrapper for recursive_file_list, which was a poor function name choice
|
|
105
|
-
at the time, since I later wanted to add non-recursive lists, but it doesn't
|
|
106
|
+
Trivial wrapper for recursive_file_list, which was a poor function name choice
|
|
107
|
+
at the time, since I later wanted to add non-recursive lists, but it doesn't
|
|
106
108
|
make sense to have a "recursive" option in a function called "recursive_file_list".
|
|
107
|
-
|
|
109
|
+
|
|
108
110
|
Args:
|
|
109
111
|
base_dir (str): folder to enumerate
|
|
110
112
|
convert_slashes (bool, optional): force forward slashes; if this is False, will use
|
|
@@ -114,11 +116,11 @@ def file_list(base_dir,
|
|
|
114
116
|
sort_files (bool, optional): force files to be sorted, otherwise uses the sorting
|
|
115
117
|
provided by os.walk()
|
|
116
118
|
recursive (bool, optional): enumerate recursively
|
|
117
|
-
|
|
119
|
+
|
|
118
120
|
Returns:
|
|
119
|
-
list: list of filenames
|
|
121
|
+
list: list of filenames
|
|
120
122
|
"""
|
|
121
|
-
|
|
123
|
+
|
|
122
124
|
return recursive_file_list(base_dir,convert_slashes,return_relative_paths,sort_files,
|
|
123
125
|
recursive=recursive)
|
|
124
126
|
|
|
@@ -128,10 +130,9 @@ def folder_list(base_dir,
|
|
|
128
130
|
return_relative_paths=False,
|
|
129
131
|
sort_folders=True,
|
|
130
132
|
recursive=False):
|
|
131
|
-
|
|
132
133
|
"""
|
|
133
134
|
Enumerates folders (not files) in [base_dir].
|
|
134
|
-
|
|
135
|
+
|
|
135
136
|
Args:
|
|
136
137
|
base_dir (str): folder to enumerate
|
|
137
138
|
convert_slashes (bool, optional): force forward slashes; if this is False, will use
|
|
@@ -141,81 +142,81 @@ def folder_list(base_dir,
|
|
|
141
142
|
sort_files (bool, optional): force folders to be sorted, otherwise uses the sorting
|
|
142
143
|
provided by os.walk()
|
|
143
144
|
recursive (bool, optional): enumerate recursively
|
|
144
|
-
|
|
145
|
+
|
|
145
146
|
Returns:
|
|
146
147
|
list: list of folder names
|
|
147
148
|
"""
|
|
148
|
-
|
|
149
|
+
|
|
149
150
|
assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
|
|
150
|
-
|
|
151
|
+
|
|
151
152
|
folders = []
|
|
152
153
|
|
|
153
|
-
if recursive:
|
|
154
|
+
if recursive:
|
|
154
155
|
folders = []
|
|
155
156
|
for root, dirs, _ in os.walk(base_dir):
|
|
156
157
|
for d in dirs:
|
|
157
|
-
folders.append(os.path.join(root, d))
|
|
158
|
+
folders.append(os.path.join(root, d))
|
|
158
159
|
else:
|
|
159
160
|
folders = os.listdir(base_dir)
|
|
160
161
|
folders = [os.path.join(base_dir,fn) for fn in folders]
|
|
161
162
|
folders = [fn for fn in folders if os.path.isdir(fn)]
|
|
162
|
-
|
|
163
|
+
|
|
163
164
|
if return_relative_paths:
|
|
164
165
|
folders = [os.path.relpath(fn,base_dir) for fn in folders]
|
|
165
166
|
|
|
166
167
|
if convert_slashes:
|
|
167
168
|
folders = [fn.replace('\\', '/') for fn in folders]
|
|
168
|
-
|
|
169
|
+
|
|
169
170
|
if sort_folders:
|
|
170
|
-
folders = sorted(folders)
|
|
171
|
-
|
|
171
|
+
folders = sorted(folders)
|
|
172
|
+
|
|
172
173
|
return folders
|
|
173
174
|
|
|
174
175
|
|
|
175
176
|
def folder_summary(folder,print_summary=True):
|
|
176
177
|
"""
|
|
177
178
|
Returns (and optionally prints) a summary of [folder], including:
|
|
178
|
-
|
|
179
|
+
|
|
179
180
|
* The total number of files
|
|
180
181
|
* The total number of folders
|
|
181
|
-
* The number of files for each extension
|
|
182
|
-
|
|
182
|
+
* The number of files for each extension
|
|
183
|
+
|
|
183
184
|
Args:
|
|
184
185
|
folder (str): folder to summarize
|
|
185
186
|
print_summary (bool, optional): whether to print the summary
|
|
186
|
-
|
|
187
|
+
|
|
187
188
|
Returns:
|
|
188
189
|
dict: with fields "n_files", "n_folders", and "extension_to_count"
|
|
189
190
|
"""
|
|
190
|
-
|
|
191
|
+
|
|
191
192
|
assert os.path.isdir(folder), '{} is not a folder'.format(folder)
|
|
192
|
-
|
|
193
|
+
|
|
193
194
|
folders_relative = folder_list(folder,return_relative_paths=True,recursive=True)
|
|
194
195
|
files_relative = file_list(folder,return_relative_paths=True,recursive=True)
|
|
195
|
-
|
|
196
|
+
|
|
196
197
|
extension_to_count = defaultdict(int)
|
|
197
|
-
|
|
198
|
+
|
|
198
199
|
for fn in files_relative:
|
|
199
200
|
ext = os.path.splitext(fn)[1]
|
|
200
201
|
extension_to_count[ext] += 1
|
|
201
|
-
|
|
202
|
+
|
|
202
203
|
extension_to_count = sort_dictionary_by_value(extension_to_count,reverse=True)
|
|
203
|
-
|
|
204
|
+
|
|
204
205
|
if print_summary:
|
|
205
206
|
for extension in extension_to_count.keys():
|
|
206
207
|
print('{}: {}'.format(extension,extension_to_count[extension]))
|
|
207
208
|
print('')
|
|
208
209
|
print('Total files: {}'.format(len(files_relative)))
|
|
209
210
|
print('Total folders: {}'.format(len(folders_relative)))
|
|
210
|
-
|
|
211
|
+
|
|
211
212
|
to_return = {}
|
|
212
213
|
to_return['n_files'] = len(files_relative)
|
|
213
214
|
to_return['n_folders'] = len(folders_relative)
|
|
214
|
-
to_return['extension_to_count'] = extension_to_count
|
|
215
|
-
|
|
215
|
+
to_return['extension_to_count'] = extension_to_count
|
|
216
|
+
|
|
216
217
|
return to_return
|
|
217
|
-
|
|
218
|
-
|
|
218
|
+
|
|
219
|
+
|
|
219
220
|
def fileparts(path):
|
|
220
221
|
r"""
|
|
221
222
|
Breaks down a path into the directory path, filename, and extension.
|
|
@@ -223,25 +224,25 @@ def fileparts(path):
|
|
|
223
224
|
Note that the '.' lives with the extension, and separators are removed.
|
|
224
225
|
|
|
225
226
|
Examples:
|
|
226
|
-
|
|
227
|
+
|
|
227
228
|
.. code-block:: none
|
|
228
229
|
|
|
229
|
-
>>> fileparts('file')
|
|
230
|
+
>>> fileparts('file')
|
|
230
231
|
('', 'file', '')
|
|
231
232
|
>>> fileparts(r'c:/dir/file.jpg')
|
|
232
233
|
('c:/dir', 'file', '.jpg')
|
|
233
234
|
>>> fileparts('/dir/subdir/file.jpg')
|
|
234
|
-
('/dir/subdir', 'file', '.jpg')
|
|
235
|
+
('/dir/subdir', 'file', '.jpg')
|
|
235
236
|
|
|
236
237
|
Args:
|
|
237
238
|
path (str): path name to separate into parts
|
|
238
239
|
Returns:
|
|
239
|
-
tuple: tuple containing (p,n,e):
|
|
240
|
+
tuple: tuple containing (p,n,e):
|
|
240
241
|
- p: str, directory path
|
|
241
242
|
- n: str, filename without extension
|
|
242
243
|
- e: str, extension including the '.'
|
|
243
244
|
"""
|
|
244
|
-
|
|
245
|
+
|
|
245
246
|
# ntpath seems to do the right thing for both Windows and Unix paths
|
|
246
247
|
p = ntpath.dirname(path)
|
|
247
248
|
basename = ntpath.basename(path)
|
|
@@ -257,27 +258,27 @@ def insert_before_extension(filename, s=None, separator='.'):
|
|
|
257
258
|
appends [s].
|
|
258
259
|
|
|
259
260
|
Examples:
|
|
260
|
-
|
|
261
|
+
|
|
261
262
|
.. code-block:: none
|
|
262
|
-
|
|
263
|
+
|
|
263
264
|
>>> insert_before_extension('/dir/subdir/file.ext', 'insert')
|
|
264
265
|
'/dir/subdir/file.insert.ext'
|
|
265
266
|
>>> insert_before_extension('/dir/subdir/file', 'insert')
|
|
266
267
|
'/dir/subdir/file.insert'
|
|
267
268
|
>>> insert_before_extension('/dir/subdir/file')
|
|
268
269
|
'/dir/subdir/file.2020.07.20.10.54.38'
|
|
269
|
-
|
|
270
|
+
|
|
270
271
|
Args:
|
|
271
272
|
filename (str): filename to manipulate
|
|
272
273
|
s (str, optional): string to insert before the extension in [filename], or
|
|
273
274
|
None to insert a datestamp
|
|
274
275
|
separator (str, optional): separator to place between the filename base
|
|
275
276
|
and the inserted string
|
|
276
|
-
|
|
277
|
+
|
|
277
278
|
Returns:
|
|
278
279
|
str: modified string
|
|
279
280
|
"""
|
|
280
|
-
|
|
281
|
+
|
|
281
282
|
assert len(filename) > 0
|
|
282
283
|
if s is None or len(s) == 0:
|
|
283
284
|
s = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
|
|
@@ -290,9 +291,9 @@ def split_path(path):
|
|
|
290
291
|
Splits [path] into all its constituent file/folder tokens.
|
|
291
292
|
|
|
292
293
|
Examples:
|
|
293
|
-
|
|
294
|
+
|
|
294
295
|
.. code-block:: none
|
|
295
|
-
|
|
296
|
+
|
|
296
297
|
>>> split_path(r'c:\dir\subdir\file.txt')
|
|
297
298
|
['c:\\', 'dir', 'subdir', 'file.txt']
|
|
298
299
|
>>> split_path('/dir/subdir/file.jpg')
|
|
@@ -301,13 +302,19 @@ def split_path(path):
|
|
|
301
302
|
['c:\\']
|
|
302
303
|
>>> split_path('/')
|
|
303
304
|
['/']
|
|
304
|
-
|
|
305
|
+
|
|
305
306
|
Args:
|
|
306
307
|
path (str): path to split into tokens
|
|
307
|
-
|
|
308
|
+
|
|
308
309
|
Returns:
|
|
309
310
|
list: list of path tokens
|
|
310
311
|
"""
|
|
312
|
+
|
|
313
|
+
# Edge cases
|
|
314
|
+
if path == '':
|
|
315
|
+
return ''
|
|
316
|
+
if path is None:
|
|
317
|
+
return None
|
|
311
318
|
|
|
312
319
|
parts = []
|
|
313
320
|
while True:
|
|
@@ -325,32 +332,32 @@ def path_is_abs(p):
|
|
|
325
332
|
"""
|
|
326
333
|
Determines whether [p] is an absolute path. An absolute path is defined as
|
|
327
334
|
one that starts with slash, backslash, or a letter followed by a colon.
|
|
328
|
-
|
|
335
|
+
|
|
329
336
|
Args:
|
|
330
337
|
p (str): path to evaluate
|
|
331
|
-
|
|
338
|
+
|
|
332
339
|
Returns:
|
|
333
340
|
bool: True if [p] is an absolute path, else False
|
|
334
341
|
"""
|
|
335
|
-
|
|
342
|
+
|
|
336
343
|
return (len(p) > 1) and (p[0] == '/' or p[1] == ':' or p[0] == '\\')
|
|
337
344
|
|
|
338
345
|
|
|
339
346
|
def safe_create_link(link_exists,link_new):
|
|
340
347
|
"""
|
|
341
348
|
Creates a symlink at [link_new] pointing to [link_exists].
|
|
342
|
-
|
|
349
|
+
|
|
343
350
|
If [link_new] already exists, make sure it's a link (not a file),
|
|
344
351
|
and if it has a different target than [link_exists], removes and re-creates
|
|
345
352
|
it.
|
|
346
|
-
|
|
353
|
+
|
|
347
354
|
Errors if [link_new] already exists but it's not a link.
|
|
348
|
-
|
|
355
|
+
|
|
349
356
|
Args:
|
|
350
357
|
link_exists (str): the source of the (possibly-new) symlink
|
|
351
358
|
link_new (str): the target of the (possibly-new) symlink
|
|
352
359
|
"""
|
|
353
|
-
|
|
360
|
+
|
|
354
361
|
if os.path.exists(link_new) or os.path.islink(link_new):
|
|
355
362
|
assert os.path.islink(link_new)
|
|
356
363
|
if not os.readlink(link_new) == link_exists:
|
|
@@ -358,35 +365,35 @@ def safe_create_link(link_exists,link_new):
|
|
|
358
365
|
os.symlink(link_exists,link_new)
|
|
359
366
|
else:
|
|
360
367
|
os.symlink(link_exists,link_new)
|
|
361
|
-
|
|
368
|
+
|
|
362
369
|
|
|
363
370
|
def remove_empty_folders(path, remove_root=False):
|
|
364
371
|
"""
|
|
365
372
|
Recursively removes empty folders within the specified path.
|
|
366
|
-
|
|
373
|
+
|
|
367
374
|
Args:
|
|
368
|
-
path (str): the folder from which we should recursively remove
|
|
375
|
+
path (str): the folder from which we should recursively remove
|
|
369
376
|
empty folders.
|
|
370
|
-
remove_root (bool, optional): whether to remove the root directory if
|
|
377
|
+
remove_root (bool, optional): whether to remove the root directory if
|
|
371
378
|
it's empty after removing all empty subdirectories. This will always
|
|
372
379
|
be True during recursive calls.
|
|
373
|
-
|
|
380
|
+
|
|
374
381
|
Returns:
|
|
375
382
|
bool: True if the directory is empty after processing, False otherwise
|
|
376
383
|
"""
|
|
377
|
-
|
|
384
|
+
|
|
378
385
|
# Verify that [path] is a directory
|
|
379
386
|
if not os.path.isdir(path):
|
|
380
387
|
return False
|
|
381
|
-
|
|
388
|
+
|
|
382
389
|
# Track whether the current directory is empty
|
|
383
390
|
is_empty = True
|
|
384
|
-
|
|
391
|
+
|
|
385
392
|
# Iterate through all items in the directory
|
|
386
393
|
for item in os.listdir(path):
|
|
387
|
-
|
|
394
|
+
|
|
388
395
|
item_path = os.path.join(path, item)
|
|
389
|
-
|
|
396
|
+
|
|
390
397
|
# If it's a directory, process it recursively
|
|
391
398
|
if os.path.isdir(item_path):
|
|
392
399
|
# If the subdirectory is empty after processing, it will be removed
|
|
@@ -396,99 +403,57 @@ def remove_empty_folders(path, remove_root=False):
|
|
|
396
403
|
else:
|
|
397
404
|
# If there's a file, the directory is not empty
|
|
398
405
|
is_empty = False
|
|
399
|
-
|
|
406
|
+
|
|
400
407
|
# If the directory is empty and we're supposed to remove it
|
|
401
408
|
if is_empty and remove_root:
|
|
402
409
|
try:
|
|
403
|
-
os.rmdir(path)
|
|
410
|
+
os.rmdir(path)
|
|
404
411
|
except Exception as e:
|
|
405
412
|
print('Error removing directory {}: {}'.format(path,str(e)))
|
|
406
413
|
is_empty = False
|
|
407
|
-
|
|
414
|
+
|
|
408
415
|
return is_empty
|
|
409
416
|
|
|
410
417
|
# ...def remove_empty_folders(...)
|
|
411
418
|
|
|
412
419
|
|
|
413
|
-
def
|
|
420
|
+
def path_join(*paths, convert_slashes=True):
|
|
414
421
|
r"""
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
On UNIX, this is straightforward:
|
|
418
|
-
|
|
419
|
-
/blah/foo
|
|
420
|
-
|
|
421
|
-
...returns '/blah'
|
|
422
|
-
|
|
423
|
-
On Windows, we define this as the top-level folder that isn't the drive, so:
|
|
424
|
-
|
|
425
|
-
c:\blah\foo
|
|
426
|
-
|
|
427
|
-
...returns 'c:\blah'.
|
|
428
|
-
|
|
422
|
+
Wrapper for os.path.join that optionally converts backslashes to forward slashes.
|
|
423
|
+
|
|
429
424
|
Args:
|
|
430
|
-
|
|
431
|
-
|
|
425
|
+
*paths (variable-length set of strings): Path components to be joined.
|
|
426
|
+
convert_slashes (bool, optional): whether to convert \\ to /
|
|
427
|
+
|
|
432
428
|
Returns:
|
|
433
|
-
|
|
429
|
+
A string with the joined path components.
|
|
434
430
|
"""
|
|
435
|
-
|
|
436
|
-
if p == '':
|
|
437
|
-
return ''
|
|
438
|
-
|
|
439
|
-
# Path('/blah').parts is ('/','blah')
|
|
440
|
-
parts = split_path(p)
|
|
441
|
-
|
|
442
|
-
if len(parts) == 1:
|
|
443
|
-
return parts[0]
|
|
444
|
-
|
|
445
|
-
# Handle paths like:
|
|
446
|
-
#
|
|
447
|
-
# /, \, /stuff, c:, c:\stuff
|
|
448
|
-
drive = os.path.splitdrive(p)[0]
|
|
449
|
-
if parts[0] == drive or parts[0] == drive + '/' or parts[0] == drive + '\\' or parts[0] in ['\\', '/']:
|
|
450
|
-
return os.path.join(parts[0], parts[1])
|
|
451
|
-
else:
|
|
452
|
-
return parts[0]
|
|
453
|
-
|
|
454
|
-
# ...top_level_folder()
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
#%% Test driver for top_level_folder
|
|
458
431
|
|
|
459
|
-
|
|
432
|
+
joined_path = os.path.join(*paths)
|
|
433
|
+
if convert_slashes:
|
|
434
|
+
return joined_path.replace('\\', '/')
|
|
435
|
+
else:
|
|
436
|
+
return joined_path
|
|
460
437
|
|
|
461
|
-
#%%
|
|
462
438
|
|
|
463
|
-
p = 'blah/foo/bar'; s = top_level_folder(p); print(s); assert s == 'blah'
|
|
464
|
-
p = '/blah/foo/bar'; s = top_level_folder(p); print(s); assert s == '/blah'
|
|
465
|
-
p = 'bar'; s = top_level_folder(p); print(s); assert s == 'bar'
|
|
466
|
-
p = ''; s = top_level_folder(p); print(s); assert s == ''
|
|
467
|
-
p = 'c:\\'; s = top_level_folder(p); print(s); assert s == 'c:\\'
|
|
468
|
-
p = r'c:\blah'; s = top_level_folder(p); print(s); assert s == 'c:\\blah'
|
|
469
|
-
p = r'c:\foo'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
|
|
470
|
-
p = r'c:/foo'; s = top_level_folder(p); print(s); assert s == 'c:/foo'
|
|
471
|
-
p = r'c:\foo/bar'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
|
|
472
|
-
|
|
473
|
-
|
|
474
439
|
#%% Image-related path functions
|
|
475
440
|
|
|
476
441
|
def is_image_file(s, img_extensions=IMG_EXTENSIONS):
|
|
477
442
|
"""
|
|
478
443
|
Checks a file's extension against a hard-coded set of image file
|
|
479
444
|
extensions. Uses case-insensitive comparison.
|
|
480
|
-
|
|
445
|
+
|
|
481
446
|
Does not check whether the file exists, only determines whether the filename
|
|
482
447
|
implies it's an image file.
|
|
483
|
-
|
|
448
|
+
|
|
484
449
|
Args:
|
|
485
450
|
s (str): filename to evaluate for image-ness
|
|
486
451
|
img_extensions (list, optional): list of known image file extensions
|
|
487
|
-
|
|
452
|
+
|
|
488
453
|
Returns:
|
|
489
454
|
bool: True if [s] appears to be an image file, else False
|
|
490
455
|
"""
|
|
491
|
-
|
|
456
|
+
|
|
492
457
|
ext = os.path.splitext(s)[1]
|
|
493
458
|
return ext.lower() in img_extensions
|
|
494
459
|
|
|
@@ -497,27 +462,27 @@ def find_image_strings(strings):
|
|
|
497
462
|
"""
|
|
498
463
|
Given a list of strings that are potentially image file names, looks for
|
|
499
464
|
strings that actually look like image file names (based on extension).
|
|
500
|
-
|
|
465
|
+
|
|
501
466
|
Args:
|
|
502
467
|
strings (list): list of filenames to check for image-ness
|
|
503
|
-
|
|
468
|
+
|
|
504
469
|
Returns:
|
|
505
470
|
list: the subset of [strings] that appear to be image filenames
|
|
506
471
|
"""
|
|
507
|
-
|
|
472
|
+
|
|
508
473
|
return [s for s in strings if is_image_file(s)]
|
|
509
474
|
|
|
510
475
|
|
|
511
|
-
def find_images(dirname,
|
|
512
|
-
recursive=False,
|
|
513
|
-
return_relative_paths=False,
|
|
476
|
+
def find_images(dirname,
|
|
477
|
+
recursive=False,
|
|
478
|
+
return_relative_paths=False,
|
|
514
479
|
convert_slashes=True):
|
|
515
480
|
"""
|
|
516
481
|
Finds all files in a directory that look like image file names. Returns
|
|
517
482
|
absolute paths unless return_relative_paths is set. Uses the OS-native
|
|
518
483
|
path separator unless convert_slashes is set, in which case will always
|
|
519
484
|
use '/'.
|
|
520
|
-
|
|
485
|
+
|
|
521
486
|
Args:
|
|
522
487
|
dirname (str): the folder to search for images
|
|
523
488
|
recursive (bool, optional): whether to search recursively
|
|
@@ -528,30 +493,30 @@ def find_images(dirname,
|
|
|
528
493
|
Returns:
|
|
529
494
|
list: list of image filenames found in [dirname]
|
|
530
495
|
"""
|
|
531
|
-
|
|
496
|
+
|
|
532
497
|
assert os.path.isdir(dirname), '{} is not a folder'.format(dirname)
|
|
533
|
-
|
|
498
|
+
|
|
534
499
|
if recursive:
|
|
535
500
|
strings = glob.glob(os.path.join(dirname, '**', '*.*'), recursive=True)
|
|
536
501
|
else:
|
|
537
502
|
strings = glob.glob(os.path.join(dirname, '*.*'))
|
|
538
|
-
|
|
503
|
+
|
|
539
504
|
image_files = find_image_strings(strings)
|
|
540
|
-
|
|
505
|
+
|
|
541
506
|
if return_relative_paths:
|
|
542
507
|
image_files = [os.path.relpath(fn,dirname) for fn in image_files]
|
|
543
|
-
|
|
508
|
+
|
|
544
509
|
image_files = sorted(image_files)
|
|
545
|
-
|
|
510
|
+
|
|
546
511
|
if convert_slashes:
|
|
547
512
|
image_files = [fn.replace('\\', '/') for fn in image_files]
|
|
548
|
-
|
|
513
|
+
|
|
549
514
|
return image_files
|
|
550
515
|
|
|
551
516
|
|
|
552
517
|
#%% Filename cleaning functions
|
|
553
518
|
|
|
554
|
-
def clean_filename(filename,
|
|
519
|
+
def clean_filename(filename,
|
|
555
520
|
allow_list=VALID_FILENAME_CHARS,
|
|
556
521
|
char_limit=CHAR_LIMIT,
|
|
557
522
|
force_lower= False):
|
|
@@ -563,18 +528,18 @@ def clean_filename(filename,
|
|
|
563
528
|
|
|
564
529
|
Adapted from
|
|
565
530
|
https://gist.github.com/wassname/1393c4a57cfcbf03641dbc31886123b8
|
|
566
|
-
|
|
531
|
+
|
|
567
532
|
Args:
|
|
568
533
|
filename (str): filename to clean
|
|
569
534
|
allow_list (str, optional): string containing all allowable filename characters
|
|
570
535
|
char_limit (int, optional): maximum allowable filename length, if None will skip this
|
|
571
536
|
step
|
|
572
537
|
force_lower (bool, optional): convert the resulting filename to lowercase
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
str: cleaned version of [filename]
|
|
538
|
+
|
|
539
|
+
Returns:
|
|
540
|
+
str: cleaned version of [filename]
|
|
576
541
|
"""
|
|
577
|
-
|
|
542
|
+
|
|
578
543
|
# keep only valid ascii chars
|
|
579
544
|
cleaned_filename = (unicodedata.normalize('NFKD', filename)
|
|
580
545
|
.encode('ASCII', 'ignore').decode())
|
|
@@ -588,26 +553,26 @@ def clean_filename(filename,
|
|
|
588
553
|
return cleaned_filename
|
|
589
554
|
|
|
590
555
|
|
|
591
|
-
def clean_path(pathname,
|
|
556
|
+
def clean_path(pathname,
|
|
592
557
|
allow_list=VALID_PATH_CHARS,
|
|
593
558
|
char_limit=CHAR_LIMIT,
|
|
594
559
|
force_lower=False):
|
|
595
560
|
"""
|
|
596
561
|
Removes non-ASCII and other invalid path characters (on any reasonable
|
|
597
562
|
OS) from a path, then optionally trims to a maximum length.
|
|
598
|
-
|
|
563
|
+
|
|
599
564
|
Args:
|
|
600
565
|
pathname (str): path name to clean
|
|
601
566
|
allow_list (str, optional): string containing all allowable filename characters
|
|
602
567
|
char_limit (int, optional): maximum allowable filename length, if None will skip this
|
|
603
568
|
step
|
|
604
569
|
force_lower (bool, optional): convert the resulting filename to lowercase
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
str: cleaned version of [filename]
|
|
570
|
+
|
|
571
|
+
Returns:
|
|
572
|
+
str: cleaned version of [filename]
|
|
608
573
|
"""
|
|
609
|
-
|
|
610
|
-
return clean_filename(pathname, allow_list=allow_list,
|
|
574
|
+
|
|
575
|
+
return clean_filename(pathname, allow_list=allow_list,
|
|
611
576
|
char_limit=char_limit, force_lower=force_lower)
|
|
612
577
|
|
|
613
578
|
|
|
@@ -616,34 +581,34 @@ def flatten_path(pathname,separator_chars=SEPARATOR_CHARS,separator_char_replace
|
|
|
616
581
|
Removes non-ASCII and other invalid path characters (on any reasonable
|
|
617
582
|
OS) from a path, then trims to a maximum length. Replaces all valid
|
|
618
583
|
separators with [separator_char_replacement.]
|
|
619
|
-
|
|
584
|
+
|
|
620
585
|
Args:
|
|
621
586
|
pathname (str): path name to flatten
|
|
622
587
|
separator_chars (str, optional): string containing all known path separators
|
|
623
|
-
separator_char_replacement (str, optional): string to insert in place of
|
|
588
|
+
separator_char_replacement (str, optional): string to insert in place of
|
|
624
589
|
path separators.
|
|
625
|
-
|
|
590
|
+
|
|
626
591
|
Returns:
|
|
627
592
|
str: flattened version of [pathname]
|
|
628
593
|
"""
|
|
629
|
-
|
|
594
|
+
|
|
630
595
|
s = clean_path(pathname)
|
|
631
596
|
for c in separator_chars:
|
|
632
597
|
s = s.replace(c, separator_char_replacement)
|
|
633
598
|
return s
|
|
634
599
|
|
|
635
600
|
|
|
636
|
-
def is_executable(filename):
|
|
601
|
+
def is_executable(filename):
|
|
637
602
|
"""
|
|
638
603
|
Checks whether [filename] is on the system path and marked as executable.
|
|
639
|
-
|
|
604
|
+
|
|
640
605
|
Args:
|
|
641
606
|
filename (str): filename to check for executable status
|
|
642
|
-
|
|
607
|
+
|
|
643
608
|
Returns:
|
|
644
609
|
bool: True if [filename] is on the system path and marked as executable, otherwise False
|
|
645
610
|
"""
|
|
646
|
-
|
|
611
|
+
|
|
647
612
|
# https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
|
|
648
613
|
|
|
649
614
|
return which(filename) is not None
|
|
@@ -654,220 +619,247 @@ def is_executable(filename):
|
|
|
654
619
|
def environment_is_wsl():
|
|
655
620
|
"""
|
|
656
621
|
Determines whether we're running in WSL.
|
|
657
|
-
|
|
622
|
+
|
|
658
623
|
Returns:
|
|
659
|
-
True if we're running in WSL.
|
|
624
|
+
True if we're running in WSL.
|
|
660
625
|
"""
|
|
661
|
-
|
|
626
|
+
|
|
662
627
|
if sys.platform not in ('linux','posix'):
|
|
663
628
|
return False
|
|
664
629
|
platform_string = ' '.join(platform.uname()).lower()
|
|
665
630
|
return 'microsoft' in platform_string and 'wsl' in platform_string
|
|
666
|
-
|
|
667
631
|
|
|
668
|
-
|
|
632
|
+
|
|
633
|
+
def wsl_path_to_windows_path(filename, failure_behavior='none'):
|
|
669
634
|
r"""
|
|
670
|
-
Converts a WSL path to a Windows path,
|
|
671
|
-
|
|
672
|
-
|
|
635
|
+
Converts a WSL path to a Windows path. For example, converts:
|
|
636
|
+
|
|
673
637
|
/mnt/e/a/b/c
|
|
674
|
-
|
|
638
|
+
|
|
675
639
|
...to:
|
|
676
|
-
|
|
640
|
+
|
|
677
641
|
e:\a\b\c
|
|
678
|
-
|
|
642
|
+
|
|
679
643
|
Args:
|
|
680
644
|
filename (str): filename to convert
|
|
681
|
-
|
|
645
|
+
failure_behavior (str): what to do if the path can't be processed as a WSL path.
|
|
646
|
+
'none' to return None in this case, 'original' to return the original path.
|
|
647
|
+
|
|
682
648
|
Returns:
|
|
683
|
-
str: Windows equivalent to the WSL path [filename]
|
|
684
|
-
environment is neither Windows nor WSL.
|
|
649
|
+
str: Windows equivalent to the WSL path [filename]
|
|
685
650
|
"""
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
if
|
|
695
|
-
|
|
651
|
+
|
|
652
|
+
assert failure_behavior in ('none','original'), \
|
|
653
|
+
'Unrecognized failure_behavior value {}'.format(failure_behavior)
|
|
654
|
+
|
|
655
|
+
# Check whether the path follows the standard WSL mount pattern
|
|
656
|
+
wsl_path_pattern = r'^/mnt/([a-zA-Z])(/.*)?$'
|
|
657
|
+
match = re.match(wsl_path_pattern, filename)
|
|
658
|
+
|
|
659
|
+
if match:
|
|
660
|
+
|
|
661
|
+
# Extract the drive letter and the rest of the path
|
|
662
|
+
drive_letter = match.group(1)
|
|
663
|
+
path_remainder = match.group(2) if match.group(2) else ''
|
|
664
|
+
|
|
665
|
+
# Convert forward slashes to backslashes for Windows
|
|
666
|
+
path_remainder = path_remainder.replace('/', '\\')
|
|
667
|
+
|
|
668
|
+
# Format the Windows path
|
|
669
|
+
windows_path = f"{drive_letter}:{path_remainder}"
|
|
670
|
+
return windows_path
|
|
671
|
+
|
|
672
|
+
if failure_behavior == 'none':
|
|
696
673
|
return None
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
674
|
+
else:
|
|
675
|
+
return filename
|
|
676
|
+
|
|
677
|
+
# ...def wsl_path_to_windows_path(...)
|
|
700
678
|
|
|
701
|
-
|
|
679
|
+
|
|
680
|
+
def windows_path_to_wsl_path(filename, failure_behavior='none'):
|
|
702
681
|
r"""
|
|
703
682
|
Converts a Windows path to a WSL path, or returns None if that's not possible. E.g.
|
|
704
683
|
converts:
|
|
705
|
-
|
|
684
|
+
|
|
706
685
|
e:\a\b\c
|
|
707
|
-
|
|
686
|
+
|
|
708
687
|
...to:
|
|
709
|
-
|
|
688
|
+
|
|
710
689
|
/mnt/e/a/b/c
|
|
711
|
-
|
|
690
|
+
|
|
712
691
|
Args:
|
|
713
692
|
filename (str): filename to convert
|
|
714
|
-
|
|
693
|
+
failure_behavior (str): what to do if the path can't be processed as a Windows path.
|
|
694
|
+
'none' to return None in this case, 'original' to return the original path.
|
|
695
|
+
|
|
715
696
|
Returns:
|
|
716
|
-
str: WSL equivalent to the Windows path [filename]
|
|
717
|
-
environment is neither Windows nor WSL.
|
|
697
|
+
str: WSL equivalent to the Windows path [filename]
|
|
718
698
|
"""
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
699
|
+
|
|
700
|
+
assert failure_behavior in ('none','original'), \
|
|
701
|
+
'Unrecognized failure_behavior value {}'.format(failure_behavior)
|
|
702
|
+
|
|
703
|
+
filename = filename.replace('\\', '/')
|
|
704
|
+
|
|
705
|
+
# Check whether the path follows a Windows drive letter pattern
|
|
706
|
+
windows_path_pattern = r'^([a-zA-Z]):(/.*)?$'
|
|
707
|
+
match = re.match(windows_path_pattern, filename)
|
|
708
|
+
|
|
709
|
+
if match:
|
|
710
|
+
# Extract the drive letter and the rest of the path
|
|
711
|
+
drive_letter = match.group(1).lower() # Convert to lowercase for WSL
|
|
712
|
+
path_remainder = match.group(2) if match.group(2) else ''
|
|
713
|
+
|
|
714
|
+
# Format the WSL path
|
|
715
|
+
wsl_path = f"/mnt/{drive_letter}{path_remainder}"
|
|
716
|
+
return wsl_path
|
|
717
|
+
|
|
718
|
+
if failure_behavior == 'none':
|
|
729
719
|
return None
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
720
|
+
else:
|
|
721
|
+
return filename
|
|
722
|
+
|
|
723
|
+
# ...def window_path_to_wsl_path(...)
|
|
724
|
+
|
|
733
725
|
|
|
734
726
|
def open_file_in_chrome(filename):
|
|
735
727
|
"""
|
|
736
|
-
Open a file in chrome, regardless of file type. I typically use this to open
|
|
728
|
+
Open a file in chrome, regardless of file type. I typically use this to open
|
|
737
729
|
.md files in Chrome.
|
|
738
|
-
|
|
730
|
+
|
|
739
731
|
Args:
|
|
740
732
|
filename (str): file to open
|
|
741
|
-
|
|
733
|
+
|
|
742
734
|
Return:
|
|
743
735
|
bool: whether the operation was successful
|
|
744
736
|
"""
|
|
745
|
-
|
|
737
|
+
|
|
746
738
|
# Create URL
|
|
747
739
|
abs_path = os.path.abspath(filename)
|
|
748
|
-
|
|
740
|
+
|
|
749
741
|
system = platform.system()
|
|
750
742
|
if system == 'Windows':
|
|
751
743
|
url = f'file:///{abs_path.replace(os.sep, "/")}'
|
|
752
744
|
else: # macOS and Linux
|
|
753
745
|
url = f'file://{abs_path}'
|
|
754
|
-
|
|
746
|
+
|
|
755
747
|
# Determine the Chrome path
|
|
756
748
|
if system == 'Windows':
|
|
757
|
-
|
|
749
|
+
|
|
758
750
|
# This is a native Python module, but it only exists on Windows
|
|
759
751
|
import winreg
|
|
760
|
-
|
|
752
|
+
|
|
761
753
|
chrome_paths = [
|
|
762
754
|
os.path.expanduser("~") + r"\AppData\Local\Google\Chrome\Application\chrome.exe",
|
|
763
755
|
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
|
|
764
756
|
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
|
|
765
757
|
]
|
|
766
|
-
|
|
758
|
+
|
|
767
759
|
# Default approach: run from a typical chrome location
|
|
768
760
|
for path in chrome_paths:
|
|
769
761
|
if os.path.exists(path):
|
|
770
762
|
subprocess.run([path, url])
|
|
771
763
|
return True
|
|
772
|
-
|
|
764
|
+
|
|
773
765
|
# Method 2: Check registry for Chrome path
|
|
774
766
|
try:
|
|
775
|
-
with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
|
|
767
|
+
with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
|
|
776
768
|
r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe") as key:
|
|
777
769
|
chrome_path = winreg.QueryValue(key, None)
|
|
778
770
|
if chrome_path and os.path.exists(chrome_path):
|
|
779
771
|
subprocess.run([chrome_path, url])
|
|
780
772
|
return True
|
|
781
|
-
except:
|
|
773
|
+
except Exception:
|
|
782
774
|
pass
|
|
783
|
-
|
|
775
|
+
|
|
784
776
|
# Method 3: Try alternate registry location
|
|
785
777
|
try:
|
|
786
|
-
with winreg.OpenKey(winreg.HKEY_CURRENT_USER,
|
|
778
|
+
with winreg.OpenKey(winreg.HKEY_CURRENT_USER,
|
|
787
779
|
r"Software\Google\Chrome\BLBeacon") as key:
|
|
788
780
|
chrome_path = os.path.join(os.path.dirname(winreg.QueryValueEx(key, "version")[0]), "chrome.exe")
|
|
789
781
|
if os.path.exists(chrome_path):
|
|
790
782
|
subprocess.run([chrome_path, url])
|
|
791
783
|
return True
|
|
792
|
-
except:
|
|
784
|
+
except Exception:
|
|
793
785
|
pass
|
|
794
|
-
|
|
786
|
+
|
|
795
787
|
# Method 4: Try system path or command
|
|
796
788
|
for chrome_cmd in ["chrome", "chrome.exe", "googlechrome", "google-chrome"]:
|
|
797
789
|
try:
|
|
798
790
|
subprocess.run([chrome_cmd, url], shell=True)
|
|
799
791
|
return True
|
|
800
|
-
except:
|
|
792
|
+
except Exception:
|
|
801
793
|
continue
|
|
802
|
-
|
|
794
|
+
|
|
803
795
|
# Method 5: Use Windows URL protocol handler
|
|
804
796
|
try:
|
|
805
797
|
os.startfile(url)
|
|
806
798
|
return True
|
|
807
|
-
except:
|
|
799
|
+
except Exception:
|
|
808
800
|
pass
|
|
809
|
-
|
|
810
|
-
# Method 6: Use rundll32
|
|
801
|
+
|
|
802
|
+
# Method 6: Use rundll32
|
|
811
803
|
try:
|
|
812
804
|
cmd = f'rundll32 url.dll,FileProtocolHandler {url}'
|
|
813
805
|
subprocess.run(cmd, shell=True)
|
|
814
806
|
return True
|
|
815
|
-
except:
|
|
807
|
+
except Exception:
|
|
816
808
|
pass
|
|
817
|
-
|
|
809
|
+
|
|
818
810
|
elif system == 'Darwin':
|
|
819
|
-
|
|
811
|
+
|
|
820
812
|
chrome_paths = [
|
|
821
813
|
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
822
814
|
os.path.expanduser('~/Applications/Google Chrome.app/Contents/MacOS/Google Chrome')
|
|
823
815
|
]
|
|
824
|
-
|
|
816
|
+
|
|
825
817
|
for path in chrome_paths:
|
|
826
818
|
if os.path.exists(path):
|
|
827
819
|
subprocess.run([path, url])
|
|
828
820
|
return True
|
|
829
|
-
|
|
821
|
+
|
|
830
822
|
# Fallback to 'open' command with Chrome as the app
|
|
831
823
|
try:
|
|
832
824
|
subprocess.run(['open', '-a', 'Google Chrome', url])
|
|
833
825
|
return True
|
|
834
|
-
except:
|
|
826
|
+
except Exception:
|
|
835
827
|
pass
|
|
836
|
-
|
|
828
|
+
|
|
837
829
|
elif system == 'Linux':
|
|
838
|
-
|
|
830
|
+
|
|
839
831
|
chrome_commands = ['google-chrome', 'chrome', 'chromium', 'chromium-browser']
|
|
840
|
-
|
|
832
|
+
|
|
841
833
|
for cmd in chrome_commands:
|
|
842
834
|
try:
|
|
843
835
|
subprocess.run([cmd, url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
844
836
|
return True
|
|
845
|
-
except:
|
|
837
|
+
except Exception:
|
|
846
838
|
continue
|
|
847
|
-
|
|
839
|
+
|
|
848
840
|
print(f"Could not open {filename} in Chrome on {system}.")
|
|
849
841
|
return False
|
|
850
842
|
|
|
851
|
-
|
|
843
|
+
|
|
852
844
|
def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
|
|
853
845
|
"""
|
|
854
846
|
Opens [filename] in the default OS file handler for this file type.
|
|
855
|
-
|
|
847
|
+
|
|
856
848
|
If browser_name is not None, uses the webbrowser module to open the filename
|
|
857
849
|
in the specified browser; see https://docs.python.org/3/library/webbrowser.html
|
|
858
850
|
for supported browsers. Falls back to the default file handler if webbrowser.open()
|
|
859
851
|
fails. In this case, attempt_to_open_in_wsl_host is ignored unless webbrowser.open() fails.
|
|
860
|
-
|
|
861
|
-
If browser_name is 'default', uses the system default. This is different from the
|
|
852
|
+
|
|
853
|
+
If browser_name is 'default', uses the system default. This is different from the
|
|
862
854
|
parameter to webbrowser.get(), where None implies the system default.
|
|
863
|
-
|
|
855
|
+
|
|
864
856
|
Args:
|
|
865
857
|
filename (str): file to open
|
|
866
858
|
attempt_to_open_in_wsl_host: if this is True, and we're in WSL, attempts to open
|
|
867
859
|
[filename] in the Windows host environment
|
|
868
860
|
browser_name: see above
|
|
869
861
|
"""
|
|
870
|
-
|
|
862
|
+
|
|
871
863
|
if browser_name is not None:
|
|
872
864
|
if browser_name == 'chrome':
|
|
873
865
|
browser_name = 'google-chrome'
|
|
@@ -879,32 +871,32 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
|
|
|
879
871
|
result = False
|
|
880
872
|
if result:
|
|
881
873
|
return
|
|
882
|
-
|
|
874
|
+
|
|
883
875
|
if sys.platform == 'win32':
|
|
884
|
-
|
|
876
|
+
|
|
885
877
|
os.startfile(filename)
|
|
886
878
|
|
|
887
879
|
elif sys.platform == 'darwin':
|
|
888
|
-
|
|
880
|
+
|
|
889
881
|
opener = 'open'
|
|
890
882
|
subprocess.call([opener, filename])
|
|
891
|
-
|
|
883
|
+
|
|
892
884
|
elif attempt_to_open_in_wsl_host and environment_is_wsl():
|
|
893
|
-
|
|
885
|
+
|
|
894
886
|
windows_path = wsl_path_to_windows_path(filename)
|
|
895
|
-
|
|
887
|
+
|
|
896
888
|
# Fall back to xdg-open
|
|
897
889
|
if windows_path is None:
|
|
898
890
|
subprocess.call(['xdg-open', filename])
|
|
899
|
-
|
|
900
|
-
if os.path.isdir(filename):
|
|
891
|
+
|
|
892
|
+
if os.path.isdir(filename):
|
|
901
893
|
subprocess.run(["explorer.exe", windows_path])
|
|
902
894
|
else:
|
|
903
|
-
os.system("cmd.exe /C start
|
|
904
|
-
|
|
895
|
+
os.system("cmd.exe /C start {}".format(re.escape(windows_path)))
|
|
896
|
+
|
|
905
897
|
else:
|
|
906
|
-
|
|
907
|
-
opener = 'xdg-open'
|
|
898
|
+
|
|
899
|
+
opener = 'xdg-open'
|
|
908
900
|
subprocess.call([opener, filename])
|
|
909
901
|
|
|
910
902
|
# ...def open_file(...)
|
|
@@ -916,12 +908,12 @@ def write_list_to_file(output_file,strings):
|
|
|
916
908
|
"""
|
|
917
909
|
Writes a list of strings to either a JSON file or text file,
|
|
918
910
|
depending on extension of the given file name.
|
|
919
|
-
|
|
911
|
+
|
|
920
912
|
Args:
|
|
921
913
|
output_file (str): file to write
|
|
922
914
|
strings (list): list of strings to write to [output_file]
|
|
923
915
|
"""
|
|
924
|
-
|
|
916
|
+
|
|
925
917
|
with open(output_file, 'w') as f:
|
|
926
918
|
if output_file.endswith('.json'):
|
|
927
919
|
json.dump(strings, f, indent=1)
|
|
@@ -932,14 +924,14 @@ def write_list_to_file(output_file,strings):
|
|
|
932
924
|
def read_list_from_file(filename):
|
|
933
925
|
"""
|
|
934
926
|
Reads a json-formatted list of strings from a file.
|
|
935
|
-
|
|
927
|
+
|
|
936
928
|
Args:
|
|
937
929
|
filename (str): .json filename to read
|
|
938
|
-
|
|
930
|
+
|
|
939
931
|
Returns:
|
|
940
932
|
list: list of strings read from [filename]
|
|
941
933
|
"""
|
|
942
|
-
|
|
934
|
+
|
|
943
935
|
assert filename.endswith('.json')
|
|
944
936
|
with open(filename, 'r') as f:
|
|
945
937
|
file_list = json.load(f)
|
|
@@ -955,39 +947,39 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
|
|
|
955
947
|
"""
|
|
956
948
|
Internal function for copying files from within parallel_copy_files.
|
|
957
949
|
"""
|
|
958
|
-
|
|
950
|
+
|
|
959
951
|
assert len(input_output_tuple) == 2
|
|
960
952
|
source_fn = input_output_tuple[0]
|
|
961
953
|
target_fn = input_output_tuple[1]
|
|
962
954
|
if (not overwrite) and (os.path.isfile(target_fn)):
|
|
963
955
|
if verbose:
|
|
964
956
|
print('Skipping existing target file {}'.format(target_fn))
|
|
965
|
-
return
|
|
966
|
-
|
|
957
|
+
return
|
|
958
|
+
|
|
967
959
|
if move:
|
|
968
960
|
action_string = 'Moving'
|
|
969
961
|
else:
|
|
970
962
|
action_string = 'Copying'
|
|
971
|
-
|
|
963
|
+
|
|
972
964
|
if verbose:
|
|
973
965
|
print('{} to {}'.format(action_string,target_fn))
|
|
974
|
-
|
|
966
|
+
|
|
975
967
|
os.makedirs(os.path.dirname(target_fn),exist_ok=True)
|
|
976
968
|
if move:
|
|
977
969
|
shutil.move(source_fn, target_fn)
|
|
978
970
|
else:
|
|
979
971
|
shutil.copyfile(source_fn,target_fn)
|
|
980
|
-
|
|
981
972
|
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
973
|
+
|
|
974
|
+
def parallel_copy_files(input_file_to_output_file,
|
|
975
|
+
max_workers=16,
|
|
976
|
+
use_threads=True,
|
|
977
|
+
overwrite=False,
|
|
986
978
|
verbose=False,
|
|
987
979
|
move=False):
|
|
988
980
|
"""
|
|
989
981
|
Copy (or move) files from source to target according to the dict input_file_to_output_file.
|
|
990
|
-
|
|
982
|
+
|
|
991
983
|
Args:
|
|
992
984
|
input_file_to_output_file (dict): dictionary mapping source files to the target files
|
|
993
985
|
to which they should be copied
|
|
@@ -1000,24 +992,32 @@ def parallel_copy_files(input_file_to_output_file,
|
|
|
1000
992
|
"""
|
|
1001
993
|
|
|
1002
994
|
n_workers = min(max_workers,len(input_file_to_output_file))
|
|
1003
|
-
|
|
995
|
+
|
|
1004
996
|
# Package the dictionary as a set of 2-tuples
|
|
1005
997
|
input_output_tuples = []
|
|
1006
998
|
for input_fn in input_file_to_output_file:
|
|
1007
999
|
input_output_tuples.append((input_fn,input_file_to_output_file[input_fn]))
|
|
1008
1000
|
|
|
1009
|
-
|
|
1010
|
-
pool = ThreadPool(n_workers)
|
|
1011
|
-
else:
|
|
1012
|
-
pool = Pool(n_workers)
|
|
1001
|
+
pool = None
|
|
1013
1002
|
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1003
|
+
try:
|
|
1004
|
+
if use_threads:
|
|
1005
|
+
pool = ThreadPool(n_workers)
|
|
1006
|
+
else:
|
|
1007
|
+
pool = Pool(n_workers)
|
|
1008
|
+
|
|
1009
|
+
with tqdm(total=len(input_output_tuples)) as pbar:
|
|
1010
|
+
for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,
|
|
1011
|
+
overwrite=overwrite,
|
|
1012
|
+
verbose=verbose,
|
|
1013
|
+
move=move),
|
|
1014
|
+
input_output_tuples)):
|
|
1015
|
+
pbar.update()
|
|
1016
|
+
finally:
|
|
1017
|
+
pool.close()
|
|
1018
|
+
pool.join()
|
|
1019
|
+
if verbose:
|
|
1020
|
+
print("Pool closed and joined parallel file copying")
|
|
1021
1021
|
|
|
1022
1022
|
# ...def parallel_copy_files(...)
|
|
1023
1023
|
|
|
@@ -1028,36 +1028,36 @@ def get_file_sizes(base_dir, convert_slashes=True):
|
|
|
1028
1028
|
"""
|
|
1029
1029
|
Gets sizes recursively for all files in base_dir, returning a dict mapping
|
|
1030
1030
|
relative filenames to size.
|
|
1031
|
-
|
|
1031
|
+
|
|
1032
1032
|
TODO: merge the functionality here with parallel_get_file_sizes, which uses slightly
|
|
1033
1033
|
different semantics.
|
|
1034
|
-
|
|
1034
|
+
|
|
1035
1035
|
Args:
|
|
1036
1036
|
base_dir (str): folder within which we want all file sizes
|
|
1037
1037
|
convert_slashes (bool, optional): force forward slashes in return strings,
|
|
1038
1038
|
otherwise uses the native path separator
|
|
1039
|
-
|
|
1039
|
+
|
|
1040
1040
|
Returns:
|
|
1041
1041
|
dict: dictionary mapping filenames to file sizes in bytes
|
|
1042
1042
|
"""
|
|
1043
|
-
|
|
1044
|
-
relative_filenames = recursive_file_list(base_dir, convert_slashes=convert_slashes,
|
|
1043
|
+
|
|
1044
|
+
relative_filenames = recursive_file_list(base_dir, convert_slashes=convert_slashes,
|
|
1045
1045
|
return_relative_paths=True)
|
|
1046
|
-
|
|
1046
|
+
|
|
1047
1047
|
fn_to_size = {}
|
|
1048
1048
|
for fn_relative in tqdm(relative_filenames):
|
|
1049
1049
|
fn_abs = os.path.join(base_dir,fn_relative)
|
|
1050
1050
|
fn_to_size[fn_relative] = os.path.getsize(fn_abs)
|
|
1051
|
-
|
|
1051
|
+
|
|
1052
1052
|
return fn_to_size
|
|
1053
|
-
|
|
1053
|
+
|
|
1054
1054
|
|
|
1055
1055
|
def _get_file_size(filename,verbose=False):
|
|
1056
1056
|
"""
|
|
1057
1057
|
Internal function for safely getting the size of a file. Returns a (filename,size)
|
|
1058
1058
|
tuple, where size is None if there is an error.
|
|
1059
1059
|
"""
|
|
1060
|
-
|
|
1060
|
+
|
|
1061
1061
|
try:
|
|
1062
1062
|
size = os.path.getsize(filename)
|
|
1063
1063
|
except Exception as e:
|
|
@@ -1066,18 +1066,18 @@ def _get_file_size(filename,verbose=False):
|
|
|
1066
1066
|
size = None
|
|
1067
1067
|
return (filename,size)
|
|
1068
1068
|
|
|
1069
|
-
|
|
1070
|
-
def parallel_get_file_sizes(filenames,
|
|
1071
|
-
max_workers=16,
|
|
1072
|
-
use_threads=True,
|
|
1069
|
+
|
|
1070
|
+
def parallel_get_file_sizes(filenames,
|
|
1071
|
+
max_workers=16,
|
|
1072
|
+
use_threads=True,
|
|
1073
1073
|
verbose=False,
|
|
1074
|
-
recursive=True,
|
|
1074
|
+
recursive=True,
|
|
1075
1075
|
convert_slashes=True,
|
|
1076
1076
|
return_relative_paths=False):
|
|
1077
1077
|
"""
|
|
1078
1078
|
Returns a dictionary mapping every file in [filenames] to the corresponding file size,
|
|
1079
1079
|
or None for errors. If [filenames] is a folder, will enumerate the folder (optionally recursively).
|
|
1080
|
-
|
|
1080
|
+
|
|
1081
1081
|
Args:
|
|
1082
1082
|
filenames (list or str): list of filenames for which we should read sizes, or a folder
|
|
1083
1083
|
within which we should read all file sizes recursively
|
|
@@ -1089,33 +1089,33 @@ def parallel_get_file_sizes(filenames,
|
|
|
1089
1089
|
convert_slashes (bool, optional): convert backslashes to forward slashes
|
|
1090
1090
|
return_relative_paths (bool, optional): return relative paths; only relevant if [filenames]
|
|
1091
1091
|
is a folder.
|
|
1092
|
-
|
|
1092
|
+
|
|
1093
1093
|
Returns:
|
|
1094
1094
|
dict: dictionary mapping filenames to file sizes in bytes
|
|
1095
1095
|
"""
|
|
1096
1096
|
|
|
1097
1097
|
n_workers = min(max_workers,len(filenames))
|
|
1098
|
-
|
|
1098
|
+
|
|
1099
1099
|
folder_name = None
|
|
1100
|
-
|
|
1100
|
+
|
|
1101
1101
|
if isinstance(filenames,str):
|
|
1102
|
-
|
|
1102
|
+
|
|
1103
1103
|
folder_name = filenames
|
|
1104
|
-
assert os.path.isdir(filenames), 'Could not find folder {}'.format(folder_name)
|
|
1105
|
-
|
|
1104
|
+
assert os.path.isdir(filenames), 'Could not find folder {}'.format(folder_name)
|
|
1105
|
+
|
|
1106
1106
|
if verbose:
|
|
1107
1107
|
print('Enumerating files in {}'.format(folder_name))
|
|
1108
|
-
|
|
1108
|
+
|
|
1109
1109
|
# Enumerate absolute paths here, we'll convert to relative later if requested
|
|
1110
1110
|
filenames = recursive_file_list(folder_name,recursive=recursive,return_relative_paths=False)
|
|
1111
1111
|
|
|
1112
1112
|
else:
|
|
1113
|
-
|
|
1113
|
+
|
|
1114
1114
|
assert is_iterable(filenames), '[filenames] argument is neither a folder nor an iterable'
|
|
1115
|
-
|
|
1115
|
+
|
|
1116
1116
|
if verbose:
|
|
1117
1117
|
print('Creating worker pool')
|
|
1118
|
-
|
|
1118
|
+
|
|
1119
1119
|
if use_threads:
|
|
1120
1120
|
pool_string = 'thread'
|
|
1121
1121
|
pool = ThreadPool(n_workers)
|
|
@@ -1126,11 +1126,11 @@ def parallel_get_file_sizes(filenames,
|
|
|
1126
1126
|
if verbose:
|
|
1127
1127
|
print('Created a {} pool of {} workers'.format(
|
|
1128
1128
|
pool_string,n_workers))
|
|
1129
|
-
|
|
1129
|
+
|
|
1130
1130
|
# This returns (filename,size) tuples
|
|
1131
1131
|
get_size_results = list(tqdm(pool.imap(
|
|
1132
1132
|
partial(_get_file_size,verbose=verbose),filenames), total=len(filenames)))
|
|
1133
|
-
|
|
1133
|
+
|
|
1134
1134
|
to_return = {}
|
|
1135
1135
|
for r in get_size_results:
|
|
1136
1136
|
fn = r[0]
|
|
@@ -1151,7 +1151,7 @@ def parallel_get_file_sizes(filenames,
|
|
|
1151
1151
|
def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
|
|
1152
1152
|
"""
|
|
1153
1153
|
Zips a single file.
|
|
1154
|
-
|
|
1154
|
+
|
|
1155
1155
|
Args:
|
|
1156
1156
|
input_fn (str): file to zip
|
|
1157
1157
|
output_fn (str, optional): target zipfile; if this is None, we'll use
|
|
@@ -1159,23 +1159,23 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
|
|
|
1159
1159
|
overwrite (bool, optional): whether to overwrite an existing target file
|
|
1160
1160
|
verbose (bool, optional): enable existing debug console output
|
|
1161
1161
|
compresslevel (int, optional): compression level to use, between 0 and 9
|
|
1162
|
-
|
|
1162
|
+
|
|
1163
1163
|
Returns:
|
|
1164
1164
|
str: the output zipfile, whether we created it or determined that it already exists
|
|
1165
1165
|
"""
|
|
1166
|
-
|
|
1166
|
+
|
|
1167
1167
|
basename = os.path.basename(input_fn)
|
|
1168
|
-
|
|
1168
|
+
|
|
1169
1169
|
if output_fn is None:
|
|
1170
1170
|
output_fn = input_fn + '.zip'
|
|
1171
|
-
|
|
1171
|
+
|
|
1172
1172
|
if (not overwrite) and (os.path.isfile(output_fn)):
|
|
1173
1173
|
print('Skipping existing file {}'.format(output_fn))
|
|
1174
1174
|
return output_fn
|
|
1175
|
-
|
|
1175
|
+
|
|
1176
1176
|
if verbose:
|
|
1177
1177
|
print('Zipping {} to {} with level {}'.format(input_fn,output_fn,compresslevel))
|
|
1178
|
-
|
|
1178
|
+
|
|
1179
1179
|
with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
|
|
1180
1180
|
zipf.write(input_fn,arcname=basename,compresslevel=compresslevel,
|
|
1181
1181
|
compress_type=zipfile.ZIP_DEFLATED)
|
|
@@ -1186,9 +1186,9 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
|
|
|
1186
1186
|
def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
1187
1187
|
overwrite=False, verbose=False, mode='x'):
|
|
1188
1188
|
"""
|
|
1189
|
-
Adds all the files in [input_files] to the tar file [output_fn].
|
|
1189
|
+
Adds all the files in [input_files] to the tar file [output_fn].
|
|
1190
1190
|
Archive names are relative to arc_name_base.
|
|
1191
|
-
|
|
1191
|
+
|
|
1192
1192
|
Args:
|
|
1193
1193
|
input_files (list): list of absolute filenames to include in the .tar file
|
|
1194
1194
|
output_fn (str): .tar file to create
|
|
@@ -1198,11 +1198,11 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
|
1198
1198
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1199
1199
|
verbose (bool, optional): enable additional debug console output
|
|
1200
1200
|
mode (str, optional): compression type, can be 'x' (no compression), 'x:gz', or 'x:bz2'.
|
|
1201
|
-
|
|
1201
|
+
|
|
1202
1202
|
Returns:
|
|
1203
1203
|
str: the output tar file, whether we created it or determined that it already exists
|
|
1204
1204
|
"""
|
|
1205
|
-
|
|
1205
|
+
|
|
1206
1206
|
if os.path.isfile(output_fn):
|
|
1207
1207
|
if not overwrite:
|
|
1208
1208
|
print('Tar file {} exists, skipping'.format(output_fn))
|
|
@@ -1210,11 +1210,11 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
|
1210
1210
|
else:
|
|
1211
1211
|
print('Tar file {} exists, deleting and re-creating'.format(output_fn))
|
|
1212
1212
|
os.remove(output_fn)
|
|
1213
|
-
|
|
1213
|
+
|
|
1214
1214
|
if verbose:
|
|
1215
1215
|
print('Adding {} files to {} (mode {})'.format(
|
|
1216
1216
|
len(input_files),output_fn,mode))
|
|
1217
|
-
|
|
1217
|
+
|
|
1218
1218
|
with tarfile.open(output_fn,mode) as tarf:
|
|
1219
1219
|
for input_fn_abs in tqdm(input_files,disable=(not verbose)):
|
|
1220
1220
|
input_fn_relative = os.path.relpath(input_fn_abs,arc_name_base)
|
|
@@ -1226,9 +1226,9 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
|
1226
1226
|
def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
|
|
1227
1227
|
overwrite=False, verbose=False, compresslevel=9):
|
|
1228
1228
|
"""
|
|
1229
|
-
Zip all the files in [input_files] into [output_fn]. Archive names are relative to
|
|
1229
|
+
Zip all the files in [input_files] into [output_fn]. Archive names are relative to
|
|
1230
1230
|
arc_name_base.
|
|
1231
|
-
|
|
1231
|
+
|
|
1232
1232
|
Args:
|
|
1233
1233
|
input_files (list): list of absolute filenames to include in the .tar file
|
|
1234
1234
|
output_fn (str): .tar file to create
|
|
@@ -1238,20 +1238,20 @@ def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
|
|
|
1238
1238
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1239
1239
|
verbose (bool, optional): enable additional debug console output
|
|
1240
1240
|
compresslevel (int, optional): compression level to use, between 0 and 9
|
|
1241
|
-
|
|
1241
|
+
|
|
1242
1242
|
Returns:
|
|
1243
1243
|
str: the output zipfile, whether we created it or determined that it already exists
|
|
1244
1244
|
"""
|
|
1245
|
-
|
|
1245
|
+
|
|
1246
1246
|
if not overwrite:
|
|
1247
1247
|
if os.path.isfile(output_fn):
|
|
1248
1248
|
print('Zip file {} exists, skipping'.format(output_fn))
|
|
1249
1249
|
return output_fn
|
|
1250
|
-
|
|
1250
|
+
|
|
1251
1251
|
if verbose:
|
|
1252
1252
|
print('Zipping {} files to {} (compression level {})'.format(
|
|
1253
1253
|
len(input_files),output_fn,compresslevel))
|
|
1254
|
-
|
|
1254
|
+
|
|
1255
1255
|
with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
|
|
1256
1256
|
for input_fn_abs in tqdm(input_files,disable=(not verbose)):
|
|
1257
1257
|
input_fn_relative = os.path.relpath(input_fn_abs,arc_name_base)
|
|
@@ -1261,41 +1261,41 @@ def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
|
|
|
1261
1261
|
compress_type=zipfile.ZIP_DEFLATED)
|
|
1262
1262
|
|
|
1263
1263
|
return output_fn
|
|
1264
|
-
|
|
1265
|
-
|
|
1264
|
+
|
|
1265
|
+
|
|
1266
1266
|
def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
|
|
1267
1267
|
"""
|
|
1268
|
-
Recursively zip everything in [input_folder] into a single zipfile, storing files as paths
|
|
1268
|
+
Recursively zip everything in [input_folder] into a single zipfile, storing files as paths
|
|
1269
1269
|
relative to [input_folder].
|
|
1270
|
-
|
|
1271
|
-
Args:
|
|
1270
|
+
|
|
1271
|
+
Args:
|
|
1272
1272
|
input_folder (str): folder to zip
|
|
1273
1273
|
output_fn (str, optional): output filename; if this is None, we'll write to [input_folder].zip
|
|
1274
1274
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1275
1275
|
verbose (bool, optional): enable additional debug console output
|
|
1276
|
-
compresslevel (int, optional): compression level to use, between 0 and 9
|
|
1277
|
-
|
|
1276
|
+
compresslevel (int, optional): compression level to use, between 0 and 9
|
|
1277
|
+
|
|
1278
1278
|
Returns:
|
|
1279
|
-
str: the output zipfile, whether we created it or determined that it already exists
|
|
1279
|
+
str: the output zipfile, whether we created it or determined that it already exists
|
|
1280
1280
|
"""
|
|
1281
|
-
|
|
1281
|
+
|
|
1282
1282
|
if output_fn is None:
|
|
1283
1283
|
output_fn = input_folder + '.zip'
|
|
1284
|
-
|
|
1284
|
+
|
|
1285
1285
|
if not overwrite:
|
|
1286
1286
|
if os.path.isfile(output_fn):
|
|
1287
1287
|
print('Zip file {} exists, skipping'.format(output_fn))
|
|
1288
|
-
return
|
|
1289
|
-
|
|
1288
|
+
return
|
|
1289
|
+
|
|
1290
1290
|
if verbose:
|
|
1291
1291
|
print('Zipping {} to {} (compression level {})'.format(
|
|
1292
1292
|
input_folder,output_fn,compresslevel))
|
|
1293
|
-
|
|
1293
|
+
|
|
1294
1294
|
relative_filenames = recursive_file_list(input_folder,return_relative_paths=True)
|
|
1295
|
-
|
|
1295
|
+
|
|
1296
1296
|
with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
|
|
1297
1297
|
for input_fn_relative in tqdm(relative_filenames,disable=(not verbose)):
|
|
1298
|
-
input_fn_abs = os.path.join(input_folder,input_fn_relative)
|
|
1298
|
+
input_fn_abs = os.path.join(input_folder,input_fn_relative)
|
|
1299
1299
|
zipf.write(input_fn_abs,
|
|
1300
1300
|
arcname=input_fn_relative,
|
|
1301
1301
|
compresslevel=compresslevel,
|
|
@@ -1303,17 +1303,17 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
1303
1303
|
|
|
1304
1304
|
return output_fn
|
|
1305
1305
|
|
|
1306
|
-
|
|
1307
|
-
def parallel_zip_files(input_files,
|
|
1308
|
-
max_workers=16,
|
|
1309
|
-
use_threads=True,
|
|
1310
|
-
compresslevel=9,
|
|
1311
|
-
overwrite=False,
|
|
1306
|
+
|
|
1307
|
+
def parallel_zip_files(input_files,
|
|
1308
|
+
max_workers=16,
|
|
1309
|
+
use_threads=True,
|
|
1310
|
+
compresslevel=9,
|
|
1311
|
+
overwrite=False,
|
|
1312
1312
|
verbose=False):
|
|
1313
1313
|
"""
|
|
1314
|
-
Zips one or more files to separate output files in parallel, leaving the
|
|
1314
|
+
Zips one or more files to separate output files in parallel, leaving the
|
|
1315
1315
|
original files in place. Each file is zipped to [filename].zip.
|
|
1316
|
-
|
|
1316
|
+
|
|
1317
1317
|
Args:
|
|
1318
1318
|
input_file (str): list of files to zip
|
|
1319
1319
|
max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
|
|
@@ -1341,9 +1341,9 @@ def parallel_zip_files(input_files,
|
|
|
1341
1341
|
def parallel_zip_folders(input_folders, max_workers=16, use_threads=True,
|
|
1342
1342
|
compresslevel=9, overwrite=False, verbose=False):
|
|
1343
1343
|
"""
|
|
1344
|
-
Zips one or more folders to separate output files in parallel, leaving the
|
|
1344
|
+
Zips one or more folders to separate output files in parallel, leaving the
|
|
1345
1345
|
original folders in place. Each folder is zipped to [folder_name].zip.
|
|
1346
|
-
|
|
1346
|
+
|
|
1347
1347
|
Args:
|
|
1348
1348
|
input_folder (list): list of folders to zip
|
|
1349
1349
|
max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
|
|
@@ -1360,7 +1360,7 @@ def parallel_zip_folders(input_folders, max_workers=16, use_threads=True,
|
|
|
1360
1360
|
pool = ThreadPool(n_workers)
|
|
1361
1361
|
else:
|
|
1362
1362
|
pool = Pool(n_workers)
|
|
1363
|
-
|
|
1363
|
+
|
|
1364
1364
|
with tqdm(total=len(input_folders)) as pbar:
|
|
1365
1365
|
for i,_ in enumerate(pool.imap_unordered(
|
|
1366
1366
|
partial(zip_folder,overwrite=overwrite,
|
|
@@ -1373,9 +1373,9 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
|
|
|
1373
1373
|
compresslevel=9,overwrite=False,required_token=None,verbose=False,
|
|
1374
1374
|
exclude_zip=True):
|
|
1375
1375
|
"""
|
|
1376
|
-
Zips each file in [folder_name] to its own zipfile (filename.zip), optionally recursing. To
|
|
1376
|
+
Zips each file in [folder_name] to its own zipfile (filename.zip), optionally recursing. To
|
|
1377
1377
|
zip a whole folder into a single zipfile, use zip_folder().
|
|
1378
|
-
|
|
1378
|
+
|
|
1379
1379
|
Args:
|
|
1380
1380
|
folder_name (str): the folder within which we should zip files
|
|
1381
1381
|
recursive (bool, optional): whether to recurse within [folder_name]
|
|
@@ -1386,19 +1386,19 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
|
|
|
1386
1386
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1387
1387
|
required_token (str, optional): only zip files whose names contain this string
|
|
1388
1388
|
verbose (bool, optional): enable additional debug console output
|
|
1389
|
-
exclude_zip (bool, optional): skip files ending in .zip
|
|
1389
|
+
exclude_zip (bool, optional): skip files ending in .zip
|
|
1390
1390
|
"""
|
|
1391
|
-
|
|
1391
|
+
|
|
1392
1392
|
assert os.path.isdir(folder_name), '{} is not a folder'.format(folder_name)
|
|
1393
|
-
|
|
1393
|
+
|
|
1394
1394
|
input_files = recursive_file_list(folder_name,recursive=recursive,return_relative_paths=False)
|
|
1395
|
-
|
|
1395
|
+
|
|
1396
1396
|
if required_token is not None:
|
|
1397
1397
|
input_files = [fn for fn in input_files if required_token in fn]
|
|
1398
|
-
|
|
1398
|
+
|
|
1399
1399
|
if exclude_zip:
|
|
1400
1400
|
input_files = [fn for fn in input_files if (not fn.endswith('.zip'))]
|
|
1401
|
-
|
|
1401
|
+
|
|
1402
1402
|
parallel_zip_files(input_files=input_files,max_workers=max_workers,
|
|
1403
1403
|
use_threads=use_threads,compresslevel=compresslevel,
|
|
1404
1404
|
overwrite=overwrite,verbose=verbose)
|
|
@@ -1408,16 +1408,16 @@ def unzip_file(input_file, output_folder=None):
|
|
|
1408
1408
|
"""
|
|
1409
1409
|
Unzips a zipfile to the specified output folder, defaulting to the same location as
|
|
1410
1410
|
the input file.
|
|
1411
|
-
|
|
1411
|
+
|
|
1412
1412
|
Args:
|
|
1413
1413
|
input_file (str): zipfile to unzip
|
|
1414
1414
|
output_folder (str, optional): folder to which we should unzip [input_file], defaults
|
|
1415
1415
|
to unzipping to the folder where [input_file] lives
|
|
1416
1416
|
"""
|
|
1417
|
-
|
|
1417
|
+
|
|
1418
1418
|
if output_folder is None:
|
|
1419
1419
|
output_folder = os.path.dirname(input_file)
|
|
1420
|
-
|
|
1420
|
+
|
|
1421
1421
|
with zipfile.ZipFile(input_file, 'r') as zf:
|
|
1422
1422
|
zf.extractall(output_folder)
|
|
1423
1423
|
|
|
@@ -1427,31 +1427,31 @@ def unzip_file(input_file, output_folder=None):
|
|
|
1427
1427
|
def compute_file_hash(file_path, algorithm='sha256', allow_failures=True):
|
|
1428
1428
|
"""
|
|
1429
1429
|
Compute the hash of a file.
|
|
1430
|
-
|
|
1430
|
+
|
|
1431
1431
|
Adapted from:
|
|
1432
|
-
|
|
1432
|
+
|
|
1433
1433
|
https://www.geeksforgeeks.org/python-program-to-find-hash-of-file/
|
|
1434
|
-
|
|
1434
|
+
|
|
1435
1435
|
Args:
|
|
1436
1436
|
file_path (str): the file to hash
|
|
1437
1437
|
algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
|
|
1438
|
-
|
|
1438
|
+
|
|
1439
1439
|
Returns:
|
|
1440
1440
|
str: the hash value for this file
|
|
1441
1441
|
"""
|
|
1442
|
-
|
|
1442
|
+
|
|
1443
1443
|
try:
|
|
1444
|
-
|
|
1444
|
+
|
|
1445
1445
|
hash_func = hashlib.new(algorithm)
|
|
1446
|
-
|
|
1446
|
+
|
|
1447
1447
|
with open(file_path, 'rb') as file:
|
|
1448
1448
|
while chunk := file.read(8192): # Read the file in chunks of 8192 bytes
|
|
1449
1449
|
hash_func.update(chunk)
|
|
1450
|
-
|
|
1450
|
+
|
|
1451
1451
|
return str(hash_func.hexdigest())
|
|
1452
|
-
|
|
1452
|
+
|
|
1453
1453
|
except Exception:
|
|
1454
|
-
|
|
1454
|
+
|
|
1455
1455
|
if allow_failures:
|
|
1456
1456
|
return None
|
|
1457
1457
|
else:
|
|
@@ -1461,14 +1461,14 @@ def compute_file_hash(file_path, algorithm='sha256', allow_failures=True):
|
|
|
1461
1461
|
|
|
1462
1462
|
|
|
1463
1463
|
def parallel_compute_file_hashes(filenames,
|
|
1464
|
-
max_workers=16,
|
|
1465
|
-
use_threads=True,
|
|
1464
|
+
max_workers=16,
|
|
1465
|
+
use_threads=True,
|
|
1466
1466
|
recursive=True,
|
|
1467
1467
|
algorithm='sha256',
|
|
1468
1468
|
verbose=False):
|
|
1469
1469
|
"""
|
|
1470
1470
|
Compute file hashes for a list or folder of images.
|
|
1471
|
-
|
|
1471
|
+
|
|
1472
1472
|
Args:
|
|
1473
1473
|
filenames (list or str): a list of filenames or a folder
|
|
1474
1474
|
max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
|
|
@@ -1478,8 +1478,8 @@ def parallel_compute_file_hashes(filenames,
|
|
|
1478
1478
|
algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
|
|
1479
1479
|
recursive (bool, optional): if [filenames] is a folder, whether to enumerate recursively.
|
|
1480
1480
|
Ignored if [filenames] is a list.
|
|
1481
|
-
verbose (bool, optional): enable additional debug output
|
|
1482
|
-
|
|
1481
|
+
verbose (bool, optional): enable additional debug output
|
|
1482
|
+
|
|
1483
1483
|
Returns:
|
|
1484
1484
|
dict: a dict mapping filenames to hash values; values will be None for files that fail
|
|
1485
1485
|
to load.
|
|
@@ -1489,35 +1489,1140 @@ def parallel_compute_file_hashes(filenames,
|
|
|
1489
1489
|
if verbose:
|
|
1490
1490
|
print('Enumerating files in {}'.format(filenames))
|
|
1491
1491
|
filenames = recursive_file_list(filenames,recursive=recursive,return_relative_paths=False)
|
|
1492
|
-
|
|
1492
|
+
|
|
1493
1493
|
n_workers = min(max_workers,len(filenames))
|
|
1494
|
-
|
|
1494
|
+
|
|
1495
1495
|
if verbose:
|
|
1496
1496
|
print('Computing hashes for {} files on {} workers'.format(len(filenames),n_workers))
|
|
1497
|
-
|
|
1497
|
+
|
|
1498
1498
|
if n_workers <= 1:
|
|
1499
|
-
|
|
1499
|
+
|
|
1500
1500
|
results = []
|
|
1501
1501
|
for filename in filenames:
|
|
1502
1502
|
results.append(compute_file_hash(filename,algorithm=algorithm,allow_failures=True))
|
|
1503
|
-
|
|
1503
|
+
|
|
1504
1504
|
else:
|
|
1505
|
-
|
|
1505
|
+
|
|
1506
1506
|
if use_threads:
|
|
1507
1507
|
pool = ThreadPool(n_workers)
|
|
1508
1508
|
else:
|
|
1509
1509
|
pool = Pool(n_workers)
|
|
1510
|
-
|
|
1510
|
+
|
|
1511
1511
|
results = list(tqdm(pool.imap(
|
|
1512
1512
|
partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
|
|
1513
1513
|
filenames), total=len(filenames)))
|
|
1514
|
-
|
|
1514
|
+
|
|
1515
1515
|
assert len(filenames) == len(results), 'Internal error in parallel_compute_file_hashes'
|
|
1516
|
-
|
|
1516
|
+
|
|
1517
1517
|
to_return = {}
|
|
1518
1518
|
for i_file,filename in enumerate(filenames):
|
|
1519
1519
|
to_return[filename] = results[i_file]
|
|
1520
|
-
|
|
1520
|
+
|
|
1521
1521
|
return to_return
|
|
1522
1522
|
|
|
1523
1523
|
# ...def parallel_compute_file_hashes(...)
|
|
1524
|
+
|
|
1525
|
+
|
|
1526
|
+
#%% Tests
|
|
1527
|
+
|
|
1528
|
+
class TestPathUtils:
|
|
1529
|
+
"""
|
|
1530
|
+
Tests for path_utils.py
|
|
1531
|
+
"""
|
|
1532
|
+
|
|
1533
|
+
def set_up(self):
|
|
1534
|
+
"""
|
|
1535
|
+
Create a temporary directory for testing.
|
|
1536
|
+
"""
|
|
1537
|
+
|
|
1538
|
+
self.test_dir = make_test_folder(subfolder='megadetector/path_utils_tests')
|
|
1539
|
+
os.makedirs(self.test_dir, exist_ok=True)
|
|
1540
|
+
|
|
1541
|
+
|
|
1542
|
+
def tear_down(self):
|
|
1543
|
+
"""
|
|
1544
|
+
Remove the temporary directory after tests.
|
|
1545
|
+
"""
|
|
1546
|
+
|
|
1547
|
+
if os.path.exists(self.test_dir):
|
|
1548
|
+
shutil.rmtree(self.test_dir)
|
|
1549
|
+
|
|
1550
|
+
|
|
1551
|
+
def test_is_image_file(self):
|
|
1552
|
+
"""
|
|
1553
|
+
Test the is_image_file function.
|
|
1554
|
+
"""
|
|
1555
|
+
|
|
1556
|
+
assert is_image_file('test.jpg')
|
|
1557
|
+
assert is_image_file('test.jpeg')
|
|
1558
|
+
assert is_image_file('test.png')
|
|
1559
|
+
assert is_image_file('test.gif')
|
|
1560
|
+
assert is_image_file('test.bmp')
|
|
1561
|
+
assert is_image_file('test.tiff')
|
|
1562
|
+
assert is_image_file('test.TIF')
|
|
1563
|
+
assert not is_image_file('test.txt')
|
|
1564
|
+
assert not is_image_file('test.doc')
|
|
1565
|
+
assert is_image_file('path/to/image.JPG')
|
|
1566
|
+
assert not is_image_file('image')
|
|
1567
|
+
assert is_image_file('test.custom', img_extensions=['.custom'])
|
|
1568
|
+
assert not is_image_file('test.jpg', img_extensions=['.custom'])
|
|
1569
|
+
|
|
1570
|
+
|
|
1571
|
+
def test_find_image_strings(self):
|
|
1572
|
+
"""
|
|
1573
|
+
Test the find_image_strings function.
|
|
1574
|
+
"""
|
|
1575
|
+
|
|
1576
|
+
strings = ['a.jpg', 'b.txt', 'c.PNG', 'd.gif', 'e.jpeg', 'f.doc']
|
|
1577
|
+
expected = ['a.jpg', 'c.PNG', 'd.gif', 'e.jpeg']
|
|
1578
|
+
assert sorted(find_image_strings(strings)) == sorted(expected)
|
|
1579
|
+
assert find_image_strings([]) == []
|
|
1580
|
+
assert find_image_strings(['no_image.txt', 'another.doc']) == []
|
|
1581
|
+
|
|
1582
|
+
|
|
1583
|
+
def test_find_images(self):
|
|
1584
|
+
"""
|
|
1585
|
+
Test the find_images function.
|
|
1586
|
+
"""
|
|
1587
|
+
|
|
1588
|
+
# Create some dummy files
|
|
1589
|
+
img1_abs = os.path.join(self.test_dir, 'img1.jpg')
|
|
1590
|
+
img2_abs = os.path.join(self.test_dir, 'img2.PNG')
|
|
1591
|
+
txt1_abs = os.path.join(self.test_dir, 'text1.txt')
|
|
1592
|
+
open(img1_abs, 'w').close()
|
|
1593
|
+
open(img2_abs, 'w').close()
|
|
1594
|
+
open(txt1_abs, 'w').close()
|
|
1595
|
+
|
|
1596
|
+
subdir = os.path.join(self.test_dir, 'subdir')
|
|
1597
|
+
os.makedirs(subdir, exist_ok=True)
|
|
1598
|
+
img3_abs = os.path.join(subdir, 'img3.jpeg')
|
|
1599
|
+
txt2_abs = os.path.join(subdir, 'text2.txt')
|
|
1600
|
+
open(img3_abs, 'w').close()
|
|
1601
|
+
open(txt2_abs, 'w').close()
|
|
1602
|
+
|
|
1603
|
+
# Test non-recursive
|
|
1604
|
+
expected_non_recursive_abs = sorted([img1_abs.replace('\\', '/'), img2_abs.replace('\\', '/')])
|
|
1605
|
+
found_non_recursive_abs = find_images(self.test_dir, recursive=False, return_relative_paths=False)
|
|
1606
|
+
assert sorted(found_non_recursive_abs) == expected_non_recursive_abs
|
|
1607
|
+
|
|
1608
|
+
# Test non-recursive, relative paths
|
|
1609
|
+
expected_non_recursive_rel = sorted(['img1.jpg', 'img2.PNG'])
|
|
1610
|
+
found_non_recursive_rel = find_images(self.test_dir, recursive=False, return_relative_paths=True)
|
|
1611
|
+
assert sorted(found_non_recursive_rel) == expected_non_recursive_rel
|
|
1612
|
+
|
|
1613
|
+
# Test recursive
|
|
1614
|
+
expected_recursive_abs = sorted([
|
|
1615
|
+
img1_abs.replace('\\', '/'),
|
|
1616
|
+
img2_abs.replace('\\', '/'),
|
|
1617
|
+
img3_abs.replace('\\', '/')
|
|
1618
|
+
])
|
|
1619
|
+
found_recursive_abs = find_images(self.test_dir, recursive=True, return_relative_paths=False)
|
|
1620
|
+
assert sorted(found_recursive_abs) == expected_recursive_abs
|
|
1621
|
+
|
|
1622
|
+
# Test recursive, relative paths
|
|
1623
|
+
expected_recursive_rel = sorted([
|
|
1624
|
+
'img1.jpg',
|
|
1625
|
+
'img2.PNG',
|
|
1626
|
+
os.path.join('subdir', 'img3.jpeg').replace('\\', '/')
|
|
1627
|
+
])
|
|
1628
|
+
found_recursive_rel = find_images(self.test_dir, recursive=True, return_relative_paths=True)
|
|
1629
|
+
assert sorted(found_recursive_rel) == expected_recursive_rel
|
|
1630
|
+
|
|
1631
|
+
# Test with an empty directory
|
|
1632
|
+
empty_dir = os.path.join(self.test_dir, 'empty_dir')
|
|
1633
|
+
os.makedirs(empty_dir, exist_ok=True)
|
|
1634
|
+
assert find_images(empty_dir, recursive=True) == []
|
|
1635
|
+
|
|
1636
|
+
# Test with a directory that doesn't exist (should assert)
|
|
1637
|
+
try:
|
|
1638
|
+
find_images(os.path.join(self.test_dir, 'non_existent_dir'))
|
|
1639
|
+
raise AssertionError("AssertionError not raised for non_existent_dir")
|
|
1640
|
+
except AssertionError:
|
|
1641
|
+
pass
|
|
1642
|
+
|
|
1643
|
+
|
|
1644
|
+
def test_recursive_file_list_and_file_list(self):
|
|
1645
|
+
"""
|
|
1646
|
+
Test the recursive_file_list and file_list functions.
|
|
1647
|
+
"""
|
|
1648
|
+
|
|
1649
|
+
# Setup directory structure
|
|
1650
|
+
# test_dir/
|
|
1651
|
+
# file1.txt
|
|
1652
|
+
# file2.jpg
|
|
1653
|
+
# subdir1/
|
|
1654
|
+
# file3.txt
|
|
1655
|
+
# subsubdir/
|
|
1656
|
+
# file4.png
|
|
1657
|
+
# subdir2/
|
|
1658
|
+
# file5.doc
|
|
1659
|
+
|
|
1660
|
+
list_dir = os.path.join(self.test_dir,'recursive_list')
|
|
1661
|
+
|
|
1662
|
+
f1 = os.path.join(list_dir, 'file1.txt')
|
|
1663
|
+
f2 = os.path.join(list_dir, 'file2.jpg')
|
|
1664
|
+
subdir1 = os.path.join(list_dir, 'subdir1')
|
|
1665
|
+
os.makedirs(subdir1, exist_ok=True)
|
|
1666
|
+
f3 = os.path.join(subdir1, 'file3.txt')
|
|
1667
|
+
subsubdir = os.path.join(subdir1, 'subsubdir')
|
|
1668
|
+
os.makedirs(subsubdir, exist_ok=True)
|
|
1669
|
+
f4 = os.path.join(subsubdir, 'file4.png')
|
|
1670
|
+
subdir2 = os.path.join(list_dir, 'subdir2')
|
|
1671
|
+
os.makedirs(subdir2, exist_ok=True)
|
|
1672
|
+
f5 = os.path.join(subdir2, 'file5.doc')
|
|
1673
|
+
|
|
1674
|
+
for filepath in [f1, f2, f3, f4, f5]:
|
|
1675
|
+
with open(filepath, 'w') as f:
|
|
1676
|
+
f.write('test')
|
|
1677
|
+
|
|
1678
|
+
# Test recursive_file_list (recursive=True by default)
|
|
1679
|
+
expected_all_files_abs = sorted([
|
|
1680
|
+
f1.replace('\\', '/'), f2.replace('\\', '/'), f3.replace('\\', '/'),
|
|
1681
|
+
f4.replace('\\', '/'), f5.replace('\\', '/')
|
|
1682
|
+
])
|
|
1683
|
+
all_files_abs = recursive_file_list(list_dir, convert_slashes=True,
|
|
1684
|
+
return_relative_paths=False)
|
|
1685
|
+
assert sorted(all_files_abs) == expected_all_files_abs
|
|
1686
|
+
|
|
1687
|
+
# Test recursive_file_list with relative paths
|
|
1688
|
+
expected_all_files_rel = sorted([
|
|
1689
|
+
'file1.txt', 'file2.jpg',
|
|
1690
|
+
os.path.join('subdir1', 'file3.txt').replace('\\', '/'),
|
|
1691
|
+
os.path.join('subdir1', 'subsubdir', 'file4.png').replace('\\', '/'),
|
|
1692
|
+
os.path.join('subdir2', 'file5.doc').replace('\\', '/')
|
|
1693
|
+
])
|
|
1694
|
+
all_files_rel = recursive_file_list(list_dir, convert_slashes=True,
|
|
1695
|
+
return_relative_paths=True)
|
|
1696
|
+
assert sorted(all_files_rel) == expected_all_files_rel
|
|
1697
|
+
|
|
1698
|
+
# Test file_list (non-recursive by default via wrapper)
|
|
1699
|
+
expected_top_level_files_abs = sorted([f1.replace('\\', '/'), f2.replace('\\', '/')])
|
|
1700
|
+
top_level_files_abs = file_list(list_dir, convert_slashes=True,
|
|
1701
|
+
return_relative_paths=False, recursive=False)
|
|
1702
|
+
assert sorted(top_level_files_abs) == expected_top_level_files_abs
|
|
1703
|
+
|
|
1704
|
+
# Test file_list (recursive explicitly) - should be same as recursive_file_list
|
|
1705
|
+
recursive_via_file_list = file_list(list_dir, convert_slashes=True,
|
|
1706
|
+
return_relative_paths=False, recursive=True)
|
|
1707
|
+
assert sorted(recursive_via_file_list) == expected_all_files_abs
|
|
1708
|
+
|
|
1709
|
+
# Test with convert_slashes=False (use os.sep)
|
|
1710
|
+
#
|
|
1711
|
+
# Note: This test might be tricky if os.sep is '/', as no replacement happens. We'll check
|
|
1712
|
+
# that backslashes remain on Windows.
|
|
1713
|
+
if os.sep == '\\':
|
|
1714
|
+
f1_raw = os.path.join(list_dir, 'file1.txt')
|
|
1715
|
+
# Only one file for simplicity
|
|
1716
|
+
files_no_slash_conversion = file_list(list_dir, convert_slashes=False, recursive=False)
|
|
1717
|
+
assert any(f1_raw in s for s in files_no_slash_conversion)
|
|
1718
|
+
|
|
1719
|
+
# Test with an empty directory
|
|
1720
|
+
empty_dir = os.path.join(list_dir, "empty_dir_for_files")
|
|
1721
|
+
os.makedirs(empty_dir, exist_ok=True)
|
|
1722
|
+
assert recursive_file_list(empty_dir) == []
|
|
1723
|
+
assert file_list(empty_dir, recursive=False) == []
|
|
1724
|
+
|
|
1725
|
+
# Test with a non-existent directory
|
|
1726
|
+
try:
|
|
1727
|
+
recursive_file_list(os.path.join(list_dir, "non_existent_dir"))
|
|
1728
|
+
raise AssertionError("AssertionError not raised for non_existent_dir in recursive_file_list")
|
|
1729
|
+
except AssertionError:
|
|
1730
|
+
pass
|
|
1731
|
+
|
|
1732
|
+
|
|
1733
|
+
def test_folder_list(self):
|
|
1734
|
+
"""
|
|
1735
|
+
Test the folder_list function.
|
|
1736
|
+
"""
|
|
1737
|
+
|
|
1738
|
+
# Setup directory structure
|
|
1739
|
+
# test_dir/
|
|
1740
|
+
# subdir1/
|
|
1741
|
+
# subsubdir1/
|
|
1742
|
+
# subdir2/
|
|
1743
|
+
# file1.txt (should be ignored)
|
|
1744
|
+
|
|
1745
|
+
folder_list_dir = os.path.join(self.test_dir,'folder_list')
|
|
1746
|
+
|
|
1747
|
+
subdir1 = os.path.join(folder_list_dir, 'subdir1')
|
|
1748
|
+
subsubdir1 = os.path.join(subdir1, 'subsubdir1')
|
|
1749
|
+
subdir2 = os.path.join(folder_list_dir, 'subdir2')
|
|
1750
|
+
os.makedirs(subdir1, exist_ok=True)
|
|
1751
|
+
os.makedirs(subsubdir1, exist_ok=True)
|
|
1752
|
+
os.makedirs(subdir2, exist_ok=True)
|
|
1753
|
+
with open(os.path.join(folder_list_dir, 'file1.txt'), 'w') as f:
|
|
1754
|
+
f.write('test')
|
|
1755
|
+
|
|
1756
|
+
# Test non-recursive
|
|
1757
|
+
expected_folders_non_recursive_abs = sorted([
|
|
1758
|
+
subdir1.replace('\\', '/'), subdir2.replace('\\', '/')
|
|
1759
|
+
])
|
|
1760
|
+
folders_non_recursive_abs = folder_list(folder_list_dir, recursive=False,
|
|
1761
|
+
return_relative_paths=False)
|
|
1762
|
+
assert sorted(folders_non_recursive_abs) == expected_folders_non_recursive_abs
|
|
1763
|
+
|
|
1764
|
+
# Test non-recursive, relative paths
|
|
1765
|
+
expected_folders_non_recursive_rel = sorted(['subdir1', 'subdir2'])
|
|
1766
|
+
folders_non_recursive_rel = folder_list(folder_list_dir, recursive=False,
|
|
1767
|
+
return_relative_paths=True)
|
|
1768
|
+
assert sorted(folders_non_recursive_rel) == expected_folders_non_recursive_rel
|
|
1769
|
+
|
|
1770
|
+
# Test recursive
|
|
1771
|
+
expected_folders_recursive_abs = sorted([
|
|
1772
|
+
subdir1.replace('\\', '/'),
|
|
1773
|
+
subsubdir1.replace('\\', '/'),
|
|
1774
|
+
subdir2.replace('\\', '/')
|
|
1775
|
+
])
|
|
1776
|
+
folders_recursive_abs = folder_list(folder_list_dir, recursive=True,
|
|
1777
|
+
return_relative_paths=False)
|
|
1778
|
+
assert sorted(folders_recursive_abs) == expected_folders_recursive_abs
|
|
1779
|
+
|
|
1780
|
+
# Test recursive, relative paths
|
|
1781
|
+
expected_folders_recursive_rel = sorted([
|
|
1782
|
+
'subdir1',
|
|
1783
|
+
os.path.join('subdir1', 'subsubdir1').replace('\\', '/'),
|
|
1784
|
+
'subdir2'
|
|
1785
|
+
])
|
|
1786
|
+
folders_recursive_rel = folder_list(folder_list_dir, recursive=True,
|
|
1787
|
+
return_relative_paths=True)
|
|
1788
|
+
assert sorted(folders_recursive_rel) == expected_folders_recursive_rel
|
|
1789
|
+
|
|
1790
|
+
# Test with an empty directory (except for the file)
|
|
1791
|
+
empty_dir_for_folders = os.path.join(folder_list_dir, "empty_for_folders")
|
|
1792
|
+
os.makedirs(empty_dir_for_folders, exist_ok=True)
|
|
1793
|
+
with open(os.path.join(empty_dir_for_folders, 'temp.txt'), 'w') as f: f.write('t')
|
|
1794
|
+
assert folder_list(empty_dir_for_folders, recursive=True) == []
|
|
1795
|
+
assert folder_list(empty_dir_for_folders, recursive=False) == []
|
|
1796
|
+
|
|
1797
|
+
# Test with a non-existent directory
|
|
1798
|
+
try:
|
|
1799
|
+
folder_list(os.path.join(self.test_dir, "non_existent_dir"))
|
|
1800
|
+
raise AssertionError("AssertionError not raised for non_existent_dir in folder_list")
|
|
1801
|
+
except AssertionError:
|
|
1802
|
+
pass
|
|
1803
|
+
|
|
1804
|
+
|
|
1805
|
+
def test_folder_summary(self):
|
|
1806
|
+
"""
|
|
1807
|
+
Test the folder_summary function.
|
|
1808
|
+
"""
|
|
1809
|
+
|
|
1810
|
+
# test_dir/
|
|
1811
|
+
# file1.txt
|
|
1812
|
+
# img1.jpg
|
|
1813
|
+
# subdir/
|
|
1814
|
+
# file2.txt
|
|
1815
|
+
# img2.png
|
|
1816
|
+
# img3.png
|
|
1817
|
+
|
|
1818
|
+
fodler_summary_dir = os.path.join(self.test_dir,'folder_summary')
|
|
1819
|
+
|
|
1820
|
+
f1 = os.path.join(fodler_summary_dir, 'file1.txt')
|
|
1821
|
+
img1 = os.path.join(fodler_summary_dir, 'img1.jpg')
|
|
1822
|
+
subdir = os.path.join(fodler_summary_dir, 'subdir')
|
|
1823
|
+
os.makedirs(subdir, exist_ok=True)
|
|
1824
|
+
f2 = os.path.join(subdir, 'file2.txt')
|
|
1825
|
+
img2 = os.path.join(subdir, 'img2.png')
|
|
1826
|
+
img3 = os.path.join(subdir, 'img3.png')
|
|
1827
|
+
|
|
1828
|
+
for filepath in [f1, img1, f2, img2, img3]:
|
|
1829
|
+
with open(filepath, 'w') as f:
|
|
1830
|
+
f.write('test')
|
|
1831
|
+
|
|
1832
|
+
summary = folder_summary(fodler_summary_dir, print_summary=False)
|
|
1833
|
+
|
|
1834
|
+
assert summary['n_files'] == 5
|
|
1835
|
+
assert summary['n_folders'] == 1 # 'subdir'
|
|
1836
|
+
assert summary['extension_to_count']['.txt'] == 2
|
|
1837
|
+
assert summary['extension_to_count']['.jpg'] == 1
|
|
1838
|
+
assert summary['extension_to_count']['.png'] == 2
|
|
1839
|
+
|
|
1840
|
+
# Check order (sorted by value, desc)
|
|
1841
|
+
#
|
|
1842
|
+
# The specific order of keys with the same counts can vary based on file system list
|
|
1843
|
+
# order. We'll check that the counts are correct and the number of unique extensions is
|
|
1844
|
+
# right.
|
|
1845
|
+
assert len(summary['extension_to_count']) == 3
|
|
1846
|
+
|
|
1847
|
+
|
|
1848
|
+
empty_dir = os.path.join(fodler_summary_dir, "empty_summary_dir")
|
|
1849
|
+
os.makedirs(empty_dir, exist_ok=True)
|
|
1850
|
+
empty_summary = folder_summary(empty_dir, print_summary=False)
|
|
1851
|
+
assert empty_summary['n_files'] == 0
|
|
1852
|
+
assert empty_summary['n_folders'] == 0
|
|
1853
|
+
assert empty_summary['extension_to_count'] == {}
|
|
1854
|
+
|
|
1855
|
+
|
|
1856
|
+
def test_fileparts(self):
|
|
1857
|
+
"""
|
|
1858
|
+
Test the fileparts function.
|
|
1859
|
+
"""
|
|
1860
|
+
|
|
1861
|
+
assert fileparts('file') == ('', 'file', '')
|
|
1862
|
+
assert fileparts('file.txt') == ('', 'file', '.txt')
|
|
1863
|
+
assert fileparts(r'c:/dir/file.jpg') == ('c:/dir', 'file', '.jpg')
|
|
1864
|
+
assert fileparts('/dir/subdir/file.jpg') == ('/dir/subdir', 'file', '.jpg')
|
|
1865
|
+
assert fileparts(r'c:\dir\file') == (r'c:\dir', 'file', '')
|
|
1866
|
+
assert fileparts(r'c:\dir\file.tar.gz') == (r'c:\dir', 'file.tar', '.gz')
|
|
1867
|
+
assert fileparts('.bashrc') == ('', '.bashrc', '') # Hidden file, no extension
|
|
1868
|
+
assert fileparts('nodir/.bashrc') == ('nodir', '.bashrc', '')
|
|
1869
|
+
assert fileparts('a/b/c.d.e') == ('a/b', 'c.d', '.e')
|
|
1870
|
+
|
|
1871
|
+
|
|
1872
|
+
def test_insert_before_extension(self):
|
|
1873
|
+
"""
|
|
1874
|
+
Test the insert_before_extension function.
|
|
1875
|
+
"""
|
|
1876
|
+
|
|
1877
|
+
assert insert_before_extension('file.ext', 'inserted') == 'file.inserted.ext'
|
|
1878
|
+
assert insert_before_extension('file', 'inserted') == 'file.inserted'
|
|
1879
|
+
assert insert_before_extension('path/to/file.ext', 'tag') == 'path/to/file.tag.ext'
|
|
1880
|
+
assert insert_before_extension('path/to/file', 'tag') == 'path/to/file.tag'
|
|
1881
|
+
assert insert_before_extension('file.tar.gz', 'new') == 'file.tar.new.gz'
|
|
1882
|
+
|
|
1883
|
+
# Test with custom separator
|
|
1884
|
+
assert insert_before_extension('file.ext', 'inserted', separator='_') == 'file_inserted.ext'
|
|
1885
|
+
|
|
1886
|
+
# Test with s=None (timestamp) - check format roughly
|
|
1887
|
+
fname_with_ts = insert_before_extension('file.ext', None)
|
|
1888
|
+
parts = fname_with_ts.split('.')
|
|
1889
|
+
# file.YYYY.MM.DD.HH.MM.SS.ext
|
|
1890
|
+
assert len(parts) >= 8 # file, Y, M, D, H, M, S, ext
|
|
1891
|
+
assert parts[0] == 'file'
|
|
1892
|
+
assert parts[-1] == 'ext'
|
|
1893
|
+
assert all(p.isdigit() for p in parts[1:-1])
|
|
1894
|
+
|
|
1895
|
+
fname_no_ext_ts = insert_before_extension('file', '') # s is empty string, should also use timestamp
|
|
1896
|
+
parts_no_ext = fname_no_ext_ts.split('.')
|
|
1897
|
+
assert len(parts_no_ext) >= 7 # file, Y, M, D, H, M, S
|
|
1898
|
+
assert parts_no_ext[0] == 'file'
|
|
1899
|
+
assert all(p.isdigit() for p in parts_no_ext[1:])
|
|
1900
|
+
|
|
1901
|
+
|
|
1902
|
+
def test_split_path(self):
|
|
1903
|
+
"""
|
|
1904
|
+
Test the split_path function.
|
|
1905
|
+
"""
|
|
1906
|
+
|
|
1907
|
+
if os.name == 'nt':
|
|
1908
|
+
assert split_path(r'c:\dir\subdir\file.txt') == ['c:\\', 'dir', 'subdir', 'file.txt']
|
|
1909
|
+
assert split_path('c:\\') == ['c:\\']
|
|
1910
|
+
# Test with mixed slashes, ntpath.split handles them
|
|
1911
|
+
assert split_path(r'c:/dir/subdir/file.txt') == ['c:/', 'dir', 'subdir', 'file.txt']
|
|
1912
|
+
else: # POSIX
|
|
1913
|
+
assert split_path('/dir/subdir/file.jpg') == ['/', 'dir', 'subdir', 'file.jpg']
|
|
1914
|
+
assert split_path('/') == ['/']
|
|
1915
|
+
|
|
1916
|
+
assert split_path('dir/file.txt') == ['dir', 'file.txt']
|
|
1917
|
+
assert split_path('file.txt') == ['file.txt']
|
|
1918
|
+
assert split_path('') == ''
|
|
1919
|
+
assert split_path('.') == ['.']
|
|
1920
|
+
assert split_path('..') == ['..']
|
|
1921
|
+
assert split_path('../a/b') == ['..', 'a', 'b']
|
|
1922
|
+
|
|
1923
|
+
|
|
1924
|
+
def test_path_is_abs(self):
|
|
1925
|
+
"""
|
|
1926
|
+
Test the path_is_abs function.
|
|
1927
|
+
"""
|
|
1928
|
+
|
|
1929
|
+
assert path_is_abs('/absolute/path')
|
|
1930
|
+
assert path_is_abs('c:/absolute/path')
|
|
1931
|
+
assert path_is_abs('C:\\absolute\\path')
|
|
1932
|
+
assert path_is_abs('\\\\server\\share\\path') # UNC path
|
|
1933
|
+
assert path_is_abs('c:file_without_slash_after_drive')
|
|
1934
|
+
|
|
1935
|
+
assert not path_is_abs('relative/path')
|
|
1936
|
+
assert not path_is_abs('file.txt')
|
|
1937
|
+
assert not path_is_abs('../relative')
|
|
1938
|
+
assert not path_is_abs('')
|
|
1939
|
+
|
|
1940
|
+
|
|
1941
|
+
|
|
1942
|
+
def test_safe_create_link_unix(self):
|
|
1943
|
+
"""
|
|
1944
|
+
Test the safe_create_link function on Unix-like systems.
|
|
1945
|
+
"""
|
|
1946
|
+
|
|
1947
|
+
if os.name == 'nt':
|
|
1948
|
+
# print("Skipping test_safe_create_link_unix on Windows.")
|
|
1949
|
+
return
|
|
1950
|
+
|
|
1951
|
+
source_file_path = os.path.join(self.test_dir, 'source.txt')
|
|
1952
|
+
link_path = os.path.join(self.test_dir, 'link.txt')
|
|
1953
|
+
other_source_path = os.path.join(self.test_dir, 'other_source.txt')
|
|
1954
|
+
|
|
1955
|
+
with open(source_file_path, 'w') as f:
|
|
1956
|
+
f.write('source data')
|
|
1957
|
+
with open(other_source_path, 'w') as f:
|
|
1958
|
+
f.write('other data')
|
|
1959
|
+
|
|
1960
|
+
# Create new link
|
|
1961
|
+
safe_create_link(source_file_path, link_path)
|
|
1962
|
+
assert os.path.islink(link_path)
|
|
1963
|
+
assert os.readlink(link_path) == source_file_path
|
|
1964
|
+
|
|
1965
|
+
# Link already exists and points to the correct source
|
|
1966
|
+
safe_create_link(source_file_path, link_path) # Should do nothing
|
|
1967
|
+
assert os.path.islink(link_path)
|
|
1968
|
+
assert os.readlink(link_path) == source_file_path
|
|
1969
|
+
|
|
1970
|
+
# Link already exists but points to a different source
|
|
1971
|
+
safe_create_link(other_source_path, link_path) # Should remove and re-create
|
|
1972
|
+
assert os.path.islink(link_path)
|
|
1973
|
+
assert os.readlink(link_path) == other_source_path
|
|
1974
|
+
|
|
1975
|
+
# Link_new path exists and is a file (not a link)
|
|
1976
|
+
file_path_conflict = os.path.join(self.test_dir, 'conflict_file.txt')
|
|
1977
|
+
with open(file_path_conflict, 'w') as f:
|
|
1978
|
+
f.write('actual file')
|
|
1979
|
+
try:
|
|
1980
|
+
safe_create_link(source_file_path, file_path_conflict)
|
|
1981
|
+
raise AssertionError("AssertionError not raised for file conflict")
|
|
1982
|
+
except AssertionError:
|
|
1983
|
+
pass
|
|
1984
|
+
os.remove(file_path_conflict)
|
|
1985
|
+
|
|
1986
|
+
# Link_new path exists and is a directory
|
|
1987
|
+
dir_path_conflict = os.path.join(self.test_dir, 'conflict_dir')
|
|
1988
|
+
os.makedirs(dir_path_conflict, exist_ok=True)
|
|
1989
|
+
try:
|
|
1990
|
+
safe_create_link(source_file_path, dir_path_conflict)
|
|
1991
|
+
raise AssertionError("AssertionError not raised for directory conflict")
|
|
1992
|
+
except AssertionError: # islink will be false
|
|
1993
|
+
pass
|
|
1994
|
+
shutil.rmtree(dir_path_conflict)
|
|
1995
|
+
|
|
1996
|
+
|
|
1997
|
+
def test_remove_empty_folders(self):
|
|
1998
|
+
"""
|
|
1999
|
+
Test the remove_empty_folders function.
|
|
2000
|
+
"""
|
|
2001
|
+
|
|
2002
|
+
# test_dir/
|
|
2003
|
+
# empty_top/
|
|
2004
|
+
# empty_mid/
|
|
2005
|
+
# empty_leaf/
|
|
2006
|
+
# mixed_top/
|
|
2007
|
+
# empty_mid_in_mixed/
|
|
2008
|
+
# empty_leaf_in_mixed/
|
|
2009
|
+
# non_empty_mid/
|
|
2010
|
+
# file.txt
|
|
2011
|
+
# non_empty_top/
|
|
2012
|
+
# file_in_top.txt
|
|
2013
|
+
|
|
2014
|
+
empty_top = os.path.join(self.test_dir, 'empty_top')
|
|
2015
|
+
empty_mid = os.path.join(empty_top, 'empty_mid')
|
|
2016
|
+
empty_leaf = os.path.join(empty_mid, 'empty_leaf')
|
|
2017
|
+
os.makedirs(empty_leaf, exist_ok=True)
|
|
2018
|
+
|
|
2019
|
+
mixed_top = os.path.join(self.test_dir, 'mixed_top')
|
|
2020
|
+
empty_mid_in_mixed = os.path.join(mixed_top, 'empty_mid_in_mixed')
|
|
2021
|
+
empty_leaf_in_mixed = os.path.join(empty_mid_in_mixed, 'empty_leaf_in_mixed')
|
|
2022
|
+
os.makedirs(empty_leaf_in_mixed, exist_ok=True)
|
|
2023
|
+
non_empty_mid = os.path.join(mixed_top, 'non_empty_mid')
|
|
2024
|
+
os.makedirs(non_empty_mid, exist_ok=True)
|
|
2025
|
+
with open(os.path.join(non_empty_mid, 'file.txt'), 'w') as f:
|
|
2026
|
+
f.write('data')
|
|
2027
|
+
|
|
2028
|
+
non_empty_top = os.path.join(self.test_dir, 'non_empty_top')
|
|
2029
|
+
os.makedirs(non_empty_top, exist_ok=True)
|
|
2030
|
+
with open(os.path.join(non_empty_top, 'file_in_top.txt'), 'w') as f:
|
|
2031
|
+
f.write('data')
|
|
2032
|
+
|
|
2033
|
+
# Process empty_top - should remove all three
|
|
2034
|
+
remove_empty_folders(empty_top, remove_root=True)
|
|
2035
|
+
assert not os.path.exists(empty_top)
|
|
2036
|
+
assert not os.path.exists(empty_mid)
|
|
2037
|
+
assert not os.path.exists(empty_leaf)
|
|
2038
|
+
|
|
2039
|
+
# Process mixed_top; should remove empty_leaf_in_mixed and empty_mid_in_mixed
|
|
2040
|
+
# but not mixed_top or non_empty_mid.
|
|
2041
|
+
remove_empty_folders(mixed_top, remove_root=True)
|
|
2042
|
+
assert os.path.exists(mixed_top) # mixed_top itself should remain
|
|
2043
|
+
assert not os.path.exists(empty_mid_in_mixed)
|
|
2044
|
+
assert not os.path.exists(empty_leaf_in_mixed)
|
|
2045
|
+
assert os.path.exists(non_empty_mid)
|
|
2046
|
+
assert os.path.exists(os.path.join(non_empty_mid, 'file.txt'))
|
|
2047
|
+
|
|
2048
|
+
# Process non_empty_top; should remove nothing.
|
|
2049
|
+
remove_empty_folders(non_empty_top, remove_root=True)
|
|
2050
|
+
assert os.path.exists(non_empty_top)
|
|
2051
|
+
assert os.path.exists(os.path.join(non_empty_top, 'file_in_top.txt'))
|
|
2052
|
+
|
|
2053
|
+
# Test with a file path (should do nothing and return False)
|
|
2054
|
+
file_path_for_removal = os.path.join(self.test_dir, 'a_file.txt')
|
|
2055
|
+
with open(file_path_for_removal, 'w') as f: f.write('t')
|
|
2056
|
+
assert not remove_empty_folders(file_path_for_removal, remove_root=True)
|
|
2057
|
+
assert os.path.exists(file_path_for_removal)
|
|
2058
|
+
|
|
2059
|
+
# Test with remove_root=False for the top level
|
|
2060
|
+
another_empty_top = os.path.join(self.test_dir, 'another_empty_top')
|
|
2061
|
+
another_empty_mid = os.path.join(another_empty_top, 'another_empty_mid')
|
|
2062
|
+
os.makedirs(another_empty_mid)
|
|
2063
|
+
remove_empty_folders(another_empty_top, remove_root=False)
|
|
2064
|
+
assert os.path.exists(another_empty_top) # Root not removed
|
|
2065
|
+
assert not os.path.exists(another_empty_mid) # Mid removed
|
|
2066
|
+
|
|
2067
|
+
|
|
2068
|
+
def test_path_join(self):
|
|
2069
|
+
"""
|
|
2070
|
+
Test the path_join function.
|
|
2071
|
+
"""
|
|
2072
|
+
|
|
2073
|
+
assert path_join('a', 'b', 'c') == 'a/b/c'
|
|
2074
|
+
assert path_join('a/b', 'c', 'd.txt') == 'a/b/c/d.txt'
|
|
2075
|
+
if os.name == 'nt':
|
|
2076
|
+
# On Windows, os.path.join uses '\', so convert_slashes=True should change it
|
|
2077
|
+
assert path_join('a', 'b', convert_slashes=True) == 'a/b'
|
|
2078
|
+
assert path_join('a', 'b', convert_slashes=False) == 'a\\b'
|
|
2079
|
+
assert path_join('c:\\', 'foo', 'bar', convert_slashes=True) == 'c:/foo/bar'
|
|
2080
|
+
assert path_join('c:\\', 'foo', 'bar', convert_slashes=False) == 'c:\\foo\\bar'
|
|
2081
|
+
else:
|
|
2082
|
+
# On POSIX, os.path.join uses '/', so convert_slashes=False should still be '/'
|
|
2083
|
+
assert path_join('a', 'b', convert_slashes=False) == 'a/b'
|
|
2084
|
+
|
|
2085
|
+
assert path_join('a', '', 'b') == 'a/b' # os.path.join behavior
|
|
2086
|
+
assert path_join('/a', 'b') == '/a/b'
|
|
2087
|
+
assert path_join('a', '/b') == '/b' # '/b' is absolute
|
|
2088
|
+
|
|
2089
|
+
|
|
2090
|
+
def test_filename_cleaning(self):
|
|
2091
|
+
"""
|
|
2092
|
+
Test clean_filename, clean_path, and flatten_path functions.
|
|
2093
|
+
"""
|
|
2094
|
+
|
|
2095
|
+
# clean_filename
|
|
2096
|
+
assert clean_filename("test file.txt") == "test file.txt"
|
|
2097
|
+
assert clean_filename("test*file?.txt", char_limit=10) == "testfile.t"
|
|
2098
|
+
assert clean_filename("TestFile.TXT", force_lower=True) == "testfile.txt"
|
|
2099
|
+
assert clean_filename("file:with<illegal>chars.txt") == "filewithillegalchars.txt"
|
|
2100
|
+
assert clean_filename(" accented_name_éà.txt") == " accented_name_ea.txt"
|
|
2101
|
+
|
|
2102
|
+
# Separators are not allowed by default in clean_filename
|
|
2103
|
+
assert clean_filename("path/to/file.txt") == "pathtofile.txt"
|
|
2104
|
+
|
|
2105
|
+
# clean_path
|
|
2106
|
+
assert clean_path("path/to/file.txt") == "path/to/file.txt" # slashes allowed
|
|
2107
|
+
assert clean_path("path\\to\\file.txt") == "path\\to\\file.txt" # backslashes allowed
|
|
2108
|
+
assert clean_path("path:to:file.txt") == "path:to:file.txt" # colons allowed
|
|
2109
|
+
assert clean_path("path/to<illegal>/file.txt") == "path/toillegal/file.txt"
|
|
2110
|
+
|
|
2111
|
+
# flatten_path
|
|
2112
|
+
assert flatten_path("path/to/file.txt") == "path~to~file.txt"
|
|
2113
|
+
assert flatten_path("path:to:file.txt", separator_char_replacement='_') == "path_to_file.txt"
|
|
2114
|
+
assert flatten_path("path\\to/file:name.txt") == "path~to~file~name.txt"
|
|
2115
|
+
assert flatten_path("path/to<illegal>/file.txt") == "path~toillegal~file.txt"
|
|
2116
|
+
|
|
2117
|
+
|
|
2118
|
+
def test_is_executable(self):
|
|
2119
|
+
"""
|
|
2120
|
+
Test the is_executable function.
|
|
2121
|
+
This is a basic test; comprehensive testing is environment-dependent.
|
|
2122
|
+
"""
|
|
2123
|
+
|
|
2124
|
+
# Hard to test reliably across all systems without knowing what's on PATH.
|
|
2125
|
+
if os.name == 'nt':
|
|
2126
|
+
assert is_executable('cmd.exe')
|
|
2127
|
+
assert not is_executable('non_existent_executable_blah_blah')
|
|
2128
|
+
else:
|
|
2129
|
+
assert is_executable('ls')
|
|
2130
|
+
assert is_executable('sh')
|
|
2131
|
+
assert not is_executable('non_existent_executable_blah_blah')
|
|
2132
|
+
|
|
2133
|
+
|
|
2134
|
+
def test_write_read_list_to_file(self):
|
|
2135
|
+
"""
|
|
2136
|
+
Test write_list_to_file and read_list_from_file functions.
|
|
2137
|
+
"""
|
|
2138
|
+
|
|
2139
|
+
test_list = ["item1", "item2 with space", "item3/with/slash"]
|
|
2140
|
+
|
|
2141
|
+
# Test with .json
|
|
2142
|
+
json_file_path = os.path.join(self.test_dir, "test_list.json")
|
|
2143
|
+
write_list_to_file(json_file_path, test_list)
|
|
2144
|
+
read_list_json = read_list_from_file(json_file_path)
|
|
2145
|
+
assert test_list == read_list_json
|
|
2146
|
+
|
|
2147
|
+
# Test with .txt
|
|
2148
|
+
txt_file_path = os.path.join(self.test_dir, "test_list.txt")
|
|
2149
|
+
write_list_to_file(txt_file_path, test_list)
|
|
2150
|
+
# read_list_from_file is specifically for JSON, so we read .txt manually
|
|
2151
|
+
with open(txt_file_path, 'r') as f:
|
|
2152
|
+
read_list_txt = [line.strip() for line in f.readlines()]
|
|
2153
|
+
assert test_list == read_list_txt
|
|
2154
|
+
|
|
2155
|
+
# Test reading non-existent json
|
|
2156
|
+
try:
|
|
2157
|
+
read_list_from_file(os.path.join(self.test_dir,"non_existent.json"))
|
|
2158
|
+
raise AssertionError("FileNotFoundError not raised")
|
|
2159
|
+
except FileNotFoundError:
|
|
2160
|
+
pass
|
|
2161
|
+
|
|
2162
|
+
# Test reading a non-json file with read_list_from_file (should fail parsing)
|
|
2163
|
+
non_json_path = os.path.join(self.test_dir, "not_a_list.json")
|
|
2164
|
+
with open(non_json_path, 'w') as f: f.write("this is not json")
|
|
2165
|
+
try:
|
|
2166
|
+
read_list_from_file(non_json_path)
|
|
2167
|
+
raise AssertionError("json.JSONDecodeError not raised")
|
|
2168
|
+
except json.JSONDecodeError:
|
|
2169
|
+
pass
|
|
2170
|
+
|
|
2171
|
+
|
|
2172
|
+
def test_parallel_copy_files(self):
|
|
2173
|
+
"""
|
|
2174
|
+
Test the parallel_copy_files function (with max_workers=1 for test simplicity).
|
|
2175
|
+
"""
|
|
2176
|
+
|
|
2177
|
+
source_dir = os.path.join(self.test_dir, "copy_source")
|
|
2178
|
+
target_dir = os.path.join(self.test_dir, "copy_target")
|
|
2179
|
+
os.makedirs(source_dir, exist_ok=True)
|
|
2180
|
+
|
|
2181
|
+
file_mappings = {}
|
|
2182
|
+
source_files_content = {}
|
|
2183
|
+
|
|
2184
|
+
for i in range(3):
|
|
2185
|
+
src_fn = f"file{i}.txt"
|
|
2186
|
+
src_path = os.path.join(source_dir, src_fn)
|
|
2187
|
+
if i == 0:
|
|
2188
|
+
tgt_fn = f"copied_file{i}.txt"
|
|
2189
|
+
tgt_path = os.path.join(target_dir, tgt_fn)
|
|
2190
|
+
else:
|
|
2191
|
+
tgt_fn = f"copied_file{i}_subdir.txt"
|
|
2192
|
+
tgt_path = os.path.join(target_dir, f"sub{i}", tgt_fn)
|
|
2193
|
+
|
|
2194
|
+
content = f"content of file {i}"
|
|
2195
|
+
with open(src_path, 'w') as f:
|
|
2196
|
+
f.write(content)
|
|
2197
|
+
|
|
2198
|
+
file_mappings[src_path] = tgt_path
|
|
2199
|
+
source_files_content[tgt_path] = content
|
|
2200
|
+
|
|
2201
|
+
# Test copy
|
|
2202
|
+
parallel_copy_files(file_mappings, max_workers=1, use_threads=True, overwrite=False)
|
|
2203
|
+
for tgt_path, expected_content in source_files_content.items():
|
|
2204
|
+
assert os.path.exists(tgt_path)
|
|
2205
|
+
with open(tgt_path, 'r') as f:
|
|
2206
|
+
assert f.read() == expected_content
|
|
2207
|
+
|
|
2208
|
+
existing_target_path = list(source_files_content.keys())[0]
|
|
2209
|
+
with open(existing_target_path, 'w') as f:
|
|
2210
|
+
f.write("old content")
|
|
2211
|
+
|
|
2212
|
+
parallel_copy_files(file_mappings, max_workers=1, use_threads=True, overwrite=False)
|
|
2213
|
+
with open(existing_target_path, 'r') as f:
|
|
2214
|
+
assert f.read() == "old content"
|
|
2215
|
+
|
|
2216
|
+
parallel_copy_files(file_mappings, max_workers=1, use_threads=True, overwrite=True)
|
|
2217
|
+
with open(existing_target_path, 'r') as f:
|
|
2218
|
+
assert f.read() == source_files_content[existing_target_path]
|
|
2219
|
+
|
|
2220
|
+
for src_path_orig, tgt_path_orig in file_mappings.items(): # Re-create source for move
|
|
2221
|
+
with open(src_path_orig, 'w') as f:
|
|
2222
|
+
f.write(source_files_content[tgt_path_orig])
|
|
2223
|
+
|
|
2224
|
+
parallel_copy_files(file_mappings, max_workers=1, use_threads=True, move=True, overwrite=True)
|
|
2225
|
+
for src_path, tgt_path in file_mappings.items():
|
|
2226
|
+
assert not os.path.exists(src_path)
|
|
2227
|
+
assert os.path.exists(tgt_path)
|
|
2228
|
+
with open(tgt_path, 'r') as f:
|
|
2229
|
+
assert f.read() == source_files_content[tgt_path]
|
|
2230
|
+
|
|
2231
|
+
|
|
2232
|
+
def test_get_file_sizes(self):
|
|
2233
|
+
"""
|
|
2234
|
+
Test get_file_sizes and parallel_get_file_sizes functions.
|
|
2235
|
+
"""
|
|
2236
|
+
|
|
2237
|
+
file_sizes_test_dir = os.path.join(self.test_dir,'file_sizes')
|
|
2238
|
+
os.makedirs(file_sizes_test_dir,exist_ok=True)
|
|
2239
|
+
|
|
2240
|
+
f1_path = os.path.join(file_sizes_test_dir, 'file1.txt')
|
|
2241
|
+
content1 = "0123456789" # 10 bytes
|
|
2242
|
+
with open(f1_path, 'w') as f:
|
|
2243
|
+
f.write(content1)
|
|
2244
|
+
|
|
2245
|
+
subdir_path = os.path.join(file_sizes_test_dir, 'subdir')
|
|
2246
|
+
os.makedirs(subdir_path, exist_ok=True)
|
|
2247
|
+
f2_path = os.path.join(subdir_path, 'file2.txt')
|
|
2248
|
+
content2 = "01234567890123456789" # 20 bytes
|
|
2249
|
+
with open(f2_path, 'w') as f:
|
|
2250
|
+
f.write(content2)
|
|
2251
|
+
|
|
2252
|
+
sizes_relative = get_file_sizes(file_sizes_test_dir)
|
|
2253
|
+
expected_sizes_relative = {
|
|
2254
|
+
'file1.txt': len(content1),
|
|
2255
|
+
os.path.join('subdir', 'file2.txt').replace('\\','/'): len(content2)
|
|
2256
|
+
}
|
|
2257
|
+
assert sizes_relative == expected_sizes_relative
|
|
2258
|
+
|
|
2259
|
+
file_list_abs = [f1_path, f2_path]
|
|
2260
|
+
sizes_parallel_abs = parallel_get_file_sizes(file_list_abs, max_workers=1)
|
|
2261
|
+
expected_sizes_parallel_abs = {
|
|
2262
|
+
f1_path.replace('\\','/'): len(content1),
|
|
2263
|
+
f2_path.replace('\\','/'): len(content2)
|
|
2264
|
+
}
|
|
2265
|
+
assert sizes_parallel_abs == expected_sizes_parallel_abs
|
|
2266
|
+
|
|
2267
|
+
sizes_parallel_folder_abs = parallel_get_file_sizes(file_sizes_test_dir, max_workers=1, return_relative_paths=False)
|
|
2268
|
+
assert sizes_parallel_folder_abs == expected_sizes_parallel_abs
|
|
2269
|
+
|
|
2270
|
+
sizes_parallel_folder_rel = parallel_get_file_sizes(file_sizes_test_dir, max_workers=1, return_relative_paths=True)
|
|
2271
|
+
assert sizes_parallel_folder_rel == expected_sizes_relative
|
|
2272
|
+
|
|
2273
|
+
non_existent_file = os.path.join(file_sizes_test_dir, "no_such_file.txt")
|
|
2274
|
+
sizes_with_error = parallel_get_file_sizes([f1_path, non_existent_file], max_workers=1)
|
|
2275
|
+
expected_with_error = {
|
|
2276
|
+
f1_path.replace('\\','/'): len(content1),
|
|
2277
|
+
non_existent_file.replace('\\','/'): None
|
|
2278
|
+
}
|
|
2279
|
+
assert sizes_with_error == expected_with_error
|
|
2280
|
+
|
|
2281
|
+
|
|
2282
|
+
def test_zip_file_and_unzip_file(self):
|
|
2283
|
+
"""
|
|
2284
|
+
Test zip_file and unzip_file functions.
|
|
2285
|
+
"""
|
|
2286
|
+
|
|
2287
|
+
file_to_zip_name = "test_zip_me.txt"
|
|
2288
|
+
file_to_zip_path = os.path.join(self.test_dir, file_to_zip_name)
|
|
2289
|
+
content = "This is the content to be zipped."
|
|
2290
|
+
with open(file_to_zip_path, 'w') as f:
|
|
2291
|
+
f.write(content)
|
|
2292
|
+
|
|
2293
|
+
default_zip_output_path = file_to_zip_path + ".zip"
|
|
2294
|
+
returned_zip_path = zip_file(file_to_zip_path)
|
|
2295
|
+
assert returned_zip_path == default_zip_output_path
|
|
2296
|
+
assert os.path.exists(default_zip_output_path)
|
|
2297
|
+
|
|
2298
|
+
unzip_dir_default = os.path.join(self.test_dir, "unzip_default")
|
|
2299
|
+
os.makedirs(unzip_dir_default, exist_ok=True)
|
|
2300
|
+
unzip_file(default_zip_output_path, unzip_dir_default)
|
|
2301
|
+
unzipped_file_path_default = os.path.join(unzip_dir_default, file_to_zip_name)
|
|
2302
|
+
assert os.path.exists(unzipped_file_path_default)
|
|
2303
|
+
with open(unzipped_file_path_default, 'r') as f:
|
|
2304
|
+
assert f.read() == content
|
|
2305
|
+
|
|
2306
|
+
custom_zip_output_name = "custom_archive.zip"
|
|
2307
|
+
custom_zip_output_path = os.path.join(self.test_dir, custom_zip_output_name)
|
|
2308
|
+
zip_file(file_to_zip_path, output_fn=custom_zip_output_path, overwrite=True)
|
|
2309
|
+
assert os.path.exists(custom_zip_output_path)
|
|
2310
|
+
|
|
2311
|
+
zip_in_subdir_path = os.path.join(self.test_dir, "subdir_zip", "my.zip")
|
|
2312
|
+
file_in_subdir_name = "file_for_subdir_zip.txt"
|
|
2313
|
+
file_in_subdir_path = os.path.join(self.test_dir,"subdir_zip", file_in_subdir_name)
|
|
2314
|
+
os.makedirs(os.path.dirname(zip_in_subdir_path), exist_ok=True)
|
|
2315
|
+
with open(file_in_subdir_path, "w") as f: f.write("sub dir content")
|
|
2316
|
+
zip_file(file_in_subdir_path, output_fn=zip_in_subdir_path)
|
|
2317
|
+
|
|
2318
|
+
unzip_file(zip_in_subdir_path, output_folder=None)
|
|
2319
|
+
unzipped_in_same_dir_path = os.path.join(os.path.dirname(zip_in_subdir_path), file_in_subdir_name)
|
|
2320
|
+
assert os.path.exists(unzipped_in_same_dir_path)
|
|
2321
|
+
with open(unzipped_in_same_dir_path, 'r') as f:
|
|
2322
|
+
assert f.read() == "sub dir content"
|
|
2323
|
+
|
|
2324
|
+
|
|
2325
|
+
def test_zip_folder(self):
|
|
2326
|
+
"""
|
|
2327
|
+
Test the zip_folder function.
|
|
2328
|
+
"""
|
|
2329
|
+
|
|
2330
|
+
folder_to_zip = os.path.join(self.test_dir, "folder_to_zip")
|
|
2331
|
+
os.makedirs(folder_to_zip, exist_ok=True)
|
|
2332
|
+
|
|
2333
|
+
file1_name = "file1.txt"; path1 = os.path.join(folder_to_zip, file1_name)
|
|
2334
|
+
file2_name = "file2.log"; path2 = os.path.join(folder_to_zip, file2_name)
|
|
2335
|
+
subdir_name = "sub"; subdir_path = os.path.join(folder_to_zip, subdir_name)
|
|
2336
|
+
os.makedirs(subdir_path, exist_ok=True)
|
|
2337
|
+
file3_name = "file3.dat"; path3 = os.path.join(subdir_path, file3_name)
|
|
2338
|
+
|
|
2339
|
+
content1 = "content1"; content2 = "content2"; content3 = "content3"
|
|
2340
|
+
with open(path1, 'w') as f: f.write(content1)
|
|
2341
|
+
with open(path2, 'w') as f: f.write(content2)
|
|
2342
|
+
with open(path3, 'w') as f: f.write(content3)
|
|
2343
|
+
|
|
2344
|
+
default_zip_path = folder_to_zip + ".zip"
|
|
2345
|
+
zip_folder(folder_to_zip, output_fn=None, overwrite=True)
|
|
2346
|
+
assert os.path.exists(default_zip_path)
|
|
2347
|
+
|
|
2348
|
+
unzip_output_dir = os.path.join(self.test_dir, "unzipped_folder_content")
|
|
2349
|
+
os.makedirs(unzip_output_dir, exist_ok=True)
|
|
2350
|
+
unzip_file(default_zip_path, unzip_output_dir)
|
|
2351
|
+
|
|
2352
|
+
assert os.path.exists(os.path.join(unzip_output_dir, file1_name))
|
|
2353
|
+
assert os.path.exists(os.path.join(unzip_output_dir, file2_name))
|
|
2354
|
+
assert os.path.exists(os.path.join(unzip_output_dir, subdir_name, file3_name))
|
|
2355
|
+
with open(os.path.join(unzip_output_dir, file1_name), 'r')as f: assert f.read() == content1
|
|
2356
|
+
with open(os.path.join(unzip_output_dir, file2_name), 'r')as f: assert f.read() == content2
|
|
2357
|
+
with open(os.path.join(unzip_output_dir, subdir_name, file3_name), 'r')as f: assert f.read() == content3
|
|
2358
|
+
|
|
2359
|
+
mtime_before = os.path.getmtime(default_zip_path)
|
|
2360
|
+
zip_folder(folder_to_zip, output_fn=None, overwrite=False)
|
|
2361
|
+
mtime_after = os.path.getmtime(default_zip_path)
|
|
2362
|
+
assert mtime_before == mtime_after
|
|
2363
|
+
|
|
2364
|
+
|
|
2365
|
+
def test_zip_files_into_single_zipfile(self):
|
|
2366
|
+
"""
|
|
2367
|
+
Test zip_files_into_single_zipfile.
|
|
2368
|
+
"""
|
|
2369
|
+
|
|
2370
|
+
file1_path = os.path.join(self.test_dir, "zfs_file1.txt")
|
|
2371
|
+
content1 = "content for zfs1"
|
|
2372
|
+
with open(file1_path, 'w') as f: f.write(content1)
|
|
2373
|
+
|
|
2374
|
+
subdir_for_zfs = os.path.join(self.test_dir, "zfs_subdir")
|
|
2375
|
+
os.makedirs(subdir_for_zfs, exist_ok=True)
|
|
2376
|
+
file2_path = os.path.join(subdir_for_zfs, "zfs_file2.log")
|
|
2377
|
+
content2 = "content for zfs2"
|
|
2378
|
+
with open(file2_path, 'w') as f: f.write(content2)
|
|
2379
|
+
|
|
2380
|
+
input_files = [file1_path, file2_path]
|
|
2381
|
+
output_zip_path = os.path.join(self.test_dir, "multi_file_archive.zip")
|
|
2382
|
+
zip_files_into_single_zipfile(input_files, output_zip_path, arc_name_base=self.test_dir, overwrite=True)
|
|
2383
|
+
assert os.path.exists(output_zip_path)
|
|
2384
|
+
|
|
2385
|
+
unzip_dir = os.path.join(self.test_dir, "unzip_multi_file")
|
|
2386
|
+
os.makedirs(unzip_dir, exist_ok=True)
|
|
2387
|
+
unzip_file(output_zip_path, unzip_dir)
|
|
2388
|
+
|
|
2389
|
+
expected_unzipped_file1 = os.path.join(unzip_dir, os.path.relpath(file1_path, self.test_dir))
|
|
2390
|
+
expected_unzipped_file2 = os.path.join(unzip_dir, os.path.relpath(file2_path, self.test_dir))
|
|
2391
|
+
|
|
2392
|
+
assert os.path.exists(expected_unzipped_file1)
|
|
2393
|
+
with open(expected_unzipped_file1, 'r') as f: assert f.read() == content1
|
|
2394
|
+
assert os.path.exists(expected_unzipped_file2)
|
|
2395
|
+
assert os.path.basename(expected_unzipped_file2) == "zfs_file2.log"
|
|
2396
|
+
assert os.path.basename(os.path.dirname(expected_unzipped_file2)) == "zfs_subdir"
|
|
2397
|
+
with open(expected_unzipped_file2, 'r') as f: assert f.read() == content2
|
|
2398
|
+
|
|
2399
|
+
|
|
2400
|
+
def test_add_files_to_single_tar_file(self):
|
|
2401
|
+
"""
|
|
2402
|
+
Test add_files_to_single_tar_file.
|
|
2403
|
+
"""
|
|
2404
|
+
|
|
2405
|
+
file1_path = os.path.join(self.test_dir, "tar_file1.txt")
|
|
2406
|
+
content1 = "content for tar1"
|
|
2407
|
+
with open(file1_path, 'w') as f: f.write(content1)
|
|
2408
|
+
|
|
2409
|
+
subdir_for_tar = os.path.join(self.test_dir, "tar_subdir")
|
|
2410
|
+
os.makedirs(subdir_for_tar, exist_ok=True)
|
|
2411
|
+
file2_path = os.path.join(subdir_for_tar, "tar_file2.log")
|
|
2412
|
+
content2 = "content for tar2"
|
|
2413
|
+
with open(file2_path, 'w') as f: f.write(content2)
|
|
2414
|
+
|
|
2415
|
+
input_files = [file1_path, file2_path]
|
|
2416
|
+
output_tar_path = os.path.join(self.test_dir, "archive.tar.gz")
|
|
2417
|
+
|
|
2418
|
+
add_files_to_single_tar_file(input_files, output_tar_path, arc_name_base=self.test_dir,
|
|
2419
|
+
overwrite=True, mode='x:gz')
|
|
2420
|
+
assert os.path.exists(output_tar_path)
|
|
2421
|
+
|
|
2422
|
+
un_tar_dir = os.path.join(self.test_dir, "un_tar_contents")
|
|
2423
|
+
os.makedirs(un_tar_dir, exist_ok=True)
|
|
2424
|
+
with tarfile.open(output_tar_path, 'r:gz') as tf:
|
|
2425
|
+
tf.extractall(path=un_tar_dir)
|
|
2426
|
+
|
|
2427
|
+
expected_untarred_file1 = os.path.join(un_tar_dir, os.path.relpath(file1_path, self.test_dir))
|
|
2428
|
+
expected_untarred_file2 = os.path.join(un_tar_dir, os.path.relpath(file2_path, self.test_dir))
|
|
2429
|
+
|
|
2430
|
+
assert os.path.exists(expected_untarred_file1)
|
|
2431
|
+
with open(expected_untarred_file1, 'r') as f: assert f.read() == content1
|
|
2432
|
+
assert os.path.exists(expected_untarred_file2)
|
|
2433
|
+
with open(expected_untarred_file2, 'r') as f: assert f.read() == content2
|
|
2434
|
+
|
|
2435
|
+
|
|
2436
|
+
def test_parallel_zip_individual_files_and_folders(self):
|
|
2437
|
+
"""
|
|
2438
|
+
Test parallel_zip_files, parallel_zip_folders, and zip_each_file_in_folder.
|
|
2439
|
+
"""
|
|
2440
|
+
|
|
2441
|
+
file1_to_zip = os.path.join(self.test_dir, "pz_file1.txt")
|
|
2442
|
+
file2_to_zip = os.path.join(self.test_dir, "pz_file2.txt")
|
|
2443
|
+
with open(file1_to_zip, 'w') as f: f.write("pz_content1")
|
|
2444
|
+
with open(file2_to_zip, 'w') as f: f.write("pz_content2")
|
|
2445
|
+
|
|
2446
|
+
parallel_zip_files([file1_to_zip, file2_to_zip], max_workers=1, overwrite=True)
|
|
2447
|
+
assert os.path.exists(file1_to_zip + ".zip")
|
|
2448
|
+
assert os.path.exists(file2_to_zip + ".zip")
|
|
2449
|
+
unzip_dir_pz = os.path.join(self.test_dir, "unzip_pz")
|
|
2450
|
+
unzip_file(file1_to_zip + ".zip", unzip_dir_pz)
|
|
2451
|
+
assert os.path.exists(os.path.join(unzip_dir_pz, os.path.basename(file1_to_zip)))
|
|
2452
|
+
|
|
2453
|
+
folder1_to_zip = os.path.join(self.test_dir, "pz_folder1")
|
|
2454
|
+
os.makedirs(folder1_to_zip, exist_ok=True)
|
|
2455
|
+
with open(os.path.join(folder1_to_zip, "pf1.txt"), 'w') as f: f.write("pf1_content")
|
|
2456
|
+
folder2_to_zip = os.path.join(self.test_dir, "pz_folder2")
|
|
2457
|
+
os.makedirs(folder2_to_zip, exist_ok=True)
|
|
2458
|
+
with open(os.path.join(folder2_to_zip, "pf2.txt"), 'w') as f: f.write("pf2_content")
|
|
2459
|
+
|
|
2460
|
+
parallel_zip_folders([folder1_to_zip, folder2_to_zip], max_workers=1, overwrite=True)
|
|
2461
|
+
assert os.path.exists(folder1_to_zip + ".zip")
|
|
2462
|
+
assert os.path.exists(folder2_to_zip + ".zip")
|
|
2463
|
+
unzip_dir_pzf = os.path.join(self.test_dir, "unzip_pzf")
|
|
2464
|
+
unzip_file(folder1_to_zip + ".zip", unzip_dir_pzf)
|
|
2465
|
+
assert os.path.exists(os.path.join(unzip_dir_pzf, "pf1.txt"))
|
|
2466
|
+
|
|
2467
|
+
zef_folder = os.path.join(self.test_dir, "zef_test_folder")
|
|
2468
|
+
os.makedirs(zef_folder, exist_ok=True)
|
|
2469
|
+
zef_file1 = os.path.join(zef_folder, "zef1.txt")
|
|
2470
|
+
zef_file2_png = os.path.join(zef_folder, "zef2.png")
|
|
2471
|
+
zef_file3_zip = os.path.join(zef_folder, "zef3.zip")
|
|
2472
|
+
zef_subdir = os.path.join(zef_folder, "zef_sub")
|
|
2473
|
+
os.makedirs(zef_subdir, exist_ok=True)
|
|
2474
|
+
zef_file_in_sub = os.path.join(zef_subdir, "zef_subfile.txt")
|
|
2475
|
+
|
|
2476
|
+
for p_path in [zef_file1, zef_file2_png, zef_file3_zip, zef_file_in_sub]:
|
|
2477
|
+
with open(p_path, 'w') as f: f.write(f"content of {os.path.basename(p_path)}")
|
|
2478
|
+
|
|
2479
|
+
zip_each_file_in_folder(zef_folder, recursive=False, max_workers=1, overwrite=True)
|
|
2480
|
+
assert os.path.exists(zef_file1 + ".zip")
|
|
2481
|
+
assert os.path.exists(zef_file2_png + ".zip")
|
|
2482
|
+
assert not os.path.exists(zef_file3_zip + ".zip")
|
|
2483
|
+
assert not os.path.exists(zef_file_in_sub + ".zip")
|
|
2484
|
+
|
|
2485
|
+
if os.path.exists(zef_file1 + ".zip"): os.remove(zef_file1 + ".zip")
|
|
2486
|
+
if os.path.exists(zef_file2_png + ".zip"): os.remove(zef_file2_png + ".zip")
|
|
2487
|
+
|
|
2488
|
+
zip_each_file_in_folder(zef_folder, recursive=True, max_workers=1, overwrite=True)
|
|
2489
|
+
assert os.path.exists(zef_file1 + ".zip")
|
|
2490
|
+
assert os.path.exists(zef_file2_png + ".zip")
|
|
2491
|
+
assert not os.path.exists(zef_file3_zip + ".zip")
|
|
2492
|
+
assert os.path.exists(zef_file_in_sub + ".zip")
|
|
2493
|
+
|
|
2494
|
+
if os.path.exists(zef_file1 + ".zip"): os.remove(zef_file1 + ".zip")
|
|
2495
|
+
if os.path.exists(zef_file2_png + ".zip"): os.remove(zef_file2_png + ".zip")
|
|
2496
|
+
if os.path.exists(zef_file_in_sub + ".zip"): os.remove(zef_file_in_sub + ".zip")
|
|
2497
|
+
zip_each_file_in_folder(zef_folder, recursive=True, required_token="zef1", max_workers=1, overwrite=True)
|
|
2498
|
+
assert os.path.exists(zef_file1 + ".zip")
|
|
2499
|
+
assert not os.path.exists(zef_file2_png + ".zip")
|
|
2500
|
+
assert not os.path.exists(zef_file_in_sub + ".zip")
|
|
2501
|
+
|
|
2502
|
+
if os.path.exists(zef_file1 + ".zip"): os.remove(zef_file1 + ".zip")
|
|
2503
|
+
dummy_to_zip = os.path.join(zef_folder,"dummy.txt")
|
|
2504
|
+
with open(dummy_to_zip,'w') as f: f.write('d')
|
|
2505
|
+
zip_each_file_in_folder(zef_folder, recursive=False, exclude_zip=False, max_workers=1, overwrite=True)
|
|
2506
|
+
assert os.path.exists(dummy_to_zip + ".zip")
|
|
2507
|
+
assert os.path.exists(zef_file3_zip + ".zip")
|
|
2508
|
+
if os.path.exists(dummy_to_zip + ".zip"): os.remove(dummy_to_zip + ".zip")
|
|
2509
|
+
if os.path.exists(zef_file3_zip + ".zip"): os.remove(zef_file3_zip + ".zip")
|
|
2510
|
+
|
|
2511
|
+
|
|
2512
|
+
def test_compute_file_hash(self):
|
|
2513
|
+
"""
|
|
2514
|
+
Test compute_file_hash and parallel_compute_file_hashes.
|
|
2515
|
+
"""
|
|
2516
|
+
|
|
2517
|
+
file1_name = "hash_me1.txt"
|
|
2518
|
+
file1_path = os.path.join(self.test_dir, file1_name)
|
|
2519
|
+
content1 = "This is a test string for hashing."
|
|
2520
|
+
with open(file1_path, 'w') as f:
|
|
2521
|
+
f.write(content1)
|
|
2522
|
+
|
|
2523
|
+
file2_name = "hash_me2.txt"
|
|
2524
|
+
file2_path = os.path.join(self.test_dir, file2_name)
|
|
2525
|
+
with open(file2_path, 'w') as f:
|
|
2526
|
+
f.write(content1)
|
|
2527
|
+
|
|
2528
|
+
file3_name = "hash_me3.txt"
|
|
2529
|
+
file3_path = os.path.join(self.test_dir, file3_name)
|
|
2530
|
+
content3 = "This is a different test string for hashing."
|
|
2531
|
+
with open(file3_path, 'w') as f:
|
|
2532
|
+
f.write(content3)
|
|
2533
|
+
|
|
2534
|
+
expected_hash_content1_sha256 = \
|
|
2535
|
+
"c56f19d76df6a09e49fe0d9ce7b1bc7f1dbd582f668742bede65c54c47d5bcf4".lower()
|
|
2536
|
+
expected_hash_content3_sha256 = \
|
|
2537
|
+
"23013ff7e93264317f7b2fc0e9a217649f2dc0b11ca7e0bd49632424b70b6680".lower()
|
|
2538
|
+
|
|
2539
|
+
hash1 = compute_file_hash(file1_path)
|
|
2540
|
+
hash2 = compute_file_hash(file2_path)
|
|
2541
|
+
hash3 = compute_file_hash(file3_path)
|
|
2542
|
+
assert hash1 == expected_hash_content1_sha256
|
|
2543
|
+
assert hash2 == expected_hash_content1_sha256
|
|
2544
|
+
assert hash1 != hash3
|
|
2545
|
+
assert hash3 == expected_hash_content3_sha256
|
|
2546
|
+
|
|
2547
|
+
expected_hash_content1_md5 = "94b971f1f8cdb23c2af82af73160d4b0".lower()
|
|
2548
|
+
hash1_md5 = compute_file_hash(file1_path, algorithm='md5')
|
|
2549
|
+
assert hash1_md5 == expected_hash_content1_md5
|
|
2550
|
+
|
|
2551
|
+
non_existent_path = os.path.join(self.test_dir, "no_such_file.txt")
|
|
2552
|
+
assert compute_file_hash(non_existent_path, allow_failures=True) is None
|
|
2553
|
+
try:
|
|
2554
|
+
compute_file_hash(non_existent_path, allow_failures=False)
|
|
2555
|
+
raise AssertionError("FileNotFoundError not raised for compute_file_hash")
|
|
2556
|
+
except FileNotFoundError:
|
|
2557
|
+
pass
|
|
2558
|
+
|
|
2559
|
+
files_to_hash = [file1_path, file3_path, non_existent_path]
|
|
2560
|
+
hashes_parallel = parallel_compute_file_hashes(files_to_hash, max_workers=1)
|
|
2561
|
+
|
|
2562
|
+
norm_f1 = file1_path.replace('\\','/')
|
|
2563
|
+
norm_f3 = file3_path.replace('\\','/')
|
|
2564
|
+
norm_non = non_existent_path.replace('\\','/')
|
|
2565
|
+
|
|
2566
|
+
expected_parallel_hashes = {
|
|
2567
|
+
norm_f1: expected_hash_content1_sha256,
|
|
2568
|
+
norm_f3: expected_hash_content3_sha256,
|
|
2569
|
+
norm_non: None
|
|
2570
|
+
}
|
|
2571
|
+
hashes_parallel_norm = {k.replace('\\','/'): v for k,v in hashes_parallel.items()}
|
|
2572
|
+
assert hashes_parallel_norm == expected_parallel_hashes
|
|
2573
|
+
|
|
2574
|
+
hash_folder = os.path.join(self.test_dir, "hash_test_folder")
|
|
2575
|
+
os.makedirs(hash_folder, exist_ok=True)
|
|
2576
|
+
h_f1_name = "h_f1.txt"; h_f1_path = os.path.join(hash_folder, h_f1_name)
|
|
2577
|
+
h_f2_name = "h_f2.txt"; h_f2_path = os.path.join(hash_folder, h_f2_name)
|
|
2578
|
+
with open(h_f1_path, 'w') as f: f.write(content1)
|
|
2579
|
+
with open(h_f2_path, 'w') as f: f.write(content3)
|
|
2580
|
+
|
|
2581
|
+
hashes_folder_parallel = parallel_compute_file_hashes(hash_folder, recursive=False, max_workers=1)
|
|
2582
|
+
norm_hf1 = h_f1_path.replace('\\','/')
|
|
2583
|
+
norm_hf2 = h_f2_path.replace('\\','/')
|
|
2584
|
+
expected_folder_hashes = {
|
|
2585
|
+
norm_hf1: expected_hash_content1_sha256,
|
|
2586
|
+
norm_hf2: expected_hash_content3_sha256
|
|
2587
|
+
}
|
|
2588
|
+
hashes_folder_parallel_norm = {k.replace('\\','/'): v for k,v in hashes_folder_parallel.items()}
|
|
2589
|
+
assert hashes_folder_parallel_norm == expected_folder_hashes
|
|
2590
|
+
|
|
2591
|
+
|
|
2592
|
+
def test_path_utils():
|
|
2593
|
+
"""
|
|
2594
|
+
Runs all tests in the TestPathUtils class.
|
|
2595
|
+
"""
|
|
2596
|
+
|
|
2597
|
+
test_instance = TestPathUtils()
|
|
2598
|
+
test_instance.set_up()
|
|
2599
|
+
try:
|
|
2600
|
+
test_instance.test_is_image_file()
|
|
2601
|
+
test_instance.test_find_image_strings()
|
|
2602
|
+
test_instance.test_find_images()
|
|
2603
|
+
test_instance.test_recursive_file_list_and_file_list()
|
|
2604
|
+
test_instance.test_folder_list()
|
|
2605
|
+
test_instance.test_folder_summary()
|
|
2606
|
+
test_instance.test_fileparts()
|
|
2607
|
+
test_instance.test_insert_before_extension()
|
|
2608
|
+
test_instance.test_split_path()
|
|
2609
|
+
test_instance.test_path_is_abs()
|
|
2610
|
+
test_instance.test_safe_create_link_unix()
|
|
2611
|
+
test_instance.test_remove_empty_folders()
|
|
2612
|
+
test_instance.test_path_join()
|
|
2613
|
+
test_instance.test_filename_cleaning()
|
|
2614
|
+
test_instance.test_is_executable()
|
|
2615
|
+
test_instance.test_write_read_list_to_file()
|
|
2616
|
+
test_instance.test_parallel_copy_files()
|
|
2617
|
+
test_instance.test_get_file_sizes()
|
|
2618
|
+
test_instance.test_zip_file_and_unzip_file()
|
|
2619
|
+
test_instance.test_zip_folder()
|
|
2620
|
+
test_instance.test_zip_files_into_single_zipfile()
|
|
2621
|
+
test_instance.test_add_files_to_single_tar_file()
|
|
2622
|
+
test_instance.test_parallel_zip_individual_files_and_folders()
|
|
2623
|
+
test_instance.test_compute_file_hash()
|
|
2624
|
+
finally:
|
|
2625
|
+
test_instance.tear_down()
|
|
2626
|
+
|
|
2627
|
+
# from IPython import embed; embed()
|
|
2628
|
+
# test_path_utils()
|