megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +231 -224
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +304 -262
- megadetector/detection/run_detector.py +177 -164
- megadetector/detection/run_detector_batch.py +364 -363
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +256 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +290 -282
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +415 -415
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +219 -146
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +23 -20
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +313 -298
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1018 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1457 -398
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +61 -61
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +401 -397
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +79 -73
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.28.dist-info/RECORD +0 -209
megadetector/utils/path_utils.py
CHANGED
|
@@ -24,6 +24,7 @@ import tarfile
|
|
|
24
24
|
import webbrowser
|
|
25
25
|
import subprocess
|
|
26
26
|
import re
|
|
27
|
+
import tempfile
|
|
27
28
|
|
|
28
29
|
from zipfile import ZipFile
|
|
29
30
|
from datetime import datetime
|
|
@@ -34,6 +35,7 @@ from shutil import which
|
|
|
34
35
|
from tqdm import tqdm
|
|
35
36
|
|
|
36
37
|
from megadetector.utils.ct_utils import is_iterable
|
|
38
|
+
from megadetector.utils.ct_utils import make_test_folder
|
|
37
39
|
from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
38
40
|
|
|
39
41
|
# Should all be lower-case
|
|
@@ -47,14 +49,14 @@ CHAR_LIMIT = 255
|
|
|
47
49
|
|
|
48
50
|
#%% General path functions
|
|
49
51
|
|
|
50
|
-
def recursive_file_list(base_dir,
|
|
51
|
-
convert_slashes=True,
|
|
52
|
-
return_relative_paths=False,
|
|
52
|
+
def recursive_file_list(base_dir,
|
|
53
|
+
convert_slashes=True,
|
|
54
|
+
return_relative_paths=False,
|
|
53
55
|
sort_files=True,
|
|
54
56
|
recursive=True):
|
|
55
57
|
r"""
|
|
56
58
|
Enumerates files (not directories) in [base_dir].
|
|
57
|
-
|
|
59
|
+
|
|
58
60
|
Args:
|
|
59
61
|
base_dir (str): folder to enumerate
|
|
60
62
|
convert_slashes (bool, optional): force forward slashes; if this is False, will use
|
|
@@ -64,15 +66,15 @@ def recursive_file_list(base_dir,
|
|
|
64
66
|
sort_files (bool, optional): force files to be sorted, otherwise uses the sorting
|
|
65
67
|
provided by os.walk()
|
|
66
68
|
recursive (bool, optional): enumerate recursively
|
|
67
|
-
|
|
69
|
+
|
|
68
70
|
Returns:
|
|
69
71
|
list: list of filenames
|
|
70
72
|
"""
|
|
71
|
-
|
|
73
|
+
|
|
72
74
|
assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
|
|
73
|
-
|
|
75
|
+
|
|
74
76
|
all_files = []
|
|
75
|
-
|
|
77
|
+
|
|
76
78
|
if recursive:
|
|
77
79
|
for root, _, filenames in os.walk(base_dir):
|
|
78
80
|
for filename in filenames:
|
|
@@ -82,29 +84,29 @@ def recursive_file_list(base_dir,
|
|
|
82
84
|
all_files_relative = os.listdir(base_dir)
|
|
83
85
|
all_files = [os.path.join(base_dir,fn) for fn in all_files_relative]
|
|
84
86
|
all_files = [fn for fn in all_files if os.path.isfile(fn)]
|
|
85
|
-
|
|
87
|
+
|
|
86
88
|
if return_relative_paths:
|
|
87
89
|
all_files = [os.path.relpath(fn,base_dir) for fn in all_files]
|
|
88
90
|
|
|
89
91
|
if convert_slashes:
|
|
90
92
|
all_files = [fn.replace('\\', '/') for fn in all_files]
|
|
91
|
-
|
|
93
|
+
|
|
92
94
|
if sort_files:
|
|
93
95
|
all_files = sorted(all_files)
|
|
94
|
-
|
|
96
|
+
|
|
95
97
|
return all_files
|
|
96
98
|
|
|
97
99
|
|
|
98
|
-
def file_list(base_dir,
|
|
100
|
+
def file_list(base_dir,
|
|
99
101
|
convert_slashes=True,
|
|
100
|
-
return_relative_paths=False,
|
|
101
|
-
sort_files=True,
|
|
102
|
+
return_relative_paths=False,
|
|
103
|
+
sort_files=True,
|
|
102
104
|
recursive=False):
|
|
103
105
|
"""
|
|
104
|
-
Trivial wrapper for recursive_file_list, which was a poor function name choice
|
|
105
|
-
at the time, since I later wanted to add non-recursive lists, but it doesn't
|
|
106
|
+
Trivial wrapper for recursive_file_list, which was a poor function name choice
|
|
107
|
+
at the time, since I later wanted to add non-recursive lists, but it doesn't
|
|
106
108
|
make sense to have a "recursive" option in a function called "recursive_file_list".
|
|
107
|
-
|
|
109
|
+
|
|
108
110
|
Args:
|
|
109
111
|
base_dir (str): folder to enumerate
|
|
110
112
|
convert_slashes (bool, optional): force forward slashes; if this is False, will use
|
|
@@ -114,11 +116,11 @@ def file_list(base_dir,
|
|
|
114
116
|
sort_files (bool, optional): force files to be sorted, otherwise uses the sorting
|
|
115
117
|
provided by os.walk()
|
|
116
118
|
recursive (bool, optional): enumerate recursively
|
|
117
|
-
|
|
119
|
+
|
|
118
120
|
Returns:
|
|
119
|
-
list: list of filenames
|
|
121
|
+
list: list of filenames
|
|
120
122
|
"""
|
|
121
|
-
|
|
123
|
+
|
|
122
124
|
return recursive_file_list(base_dir,convert_slashes,return_relative_paths,sort_files,
|
|
123
125
|
recursive=recursive)
|
|
124
126
|
|
|
@@ -128,10 +130,9 @@ def folder_list(base_dir,
|
|
|
128
130
|
return_relative_paths=False,
|
|
129
131
|
sort_folders=True,
|
|
130
132
|
recursive=False):
|
|
131
|
-
|
|
132
133
|
"""
|
|
133
134
|
Enumerates folders (not files) in [base_dir].
|
|
134
|
-
|
|
135
|
+
|
|
135
136
|
Args:
|
|
136
137
|
base_dir (str): folder to enumerate
|
|
137
138
|
convert_slashes (bool, optional): force forward slashes; if this is False, will use
|
|
@@ -141,81 +142,81 @@ def folder_list(base_dir,
|
|
|
141
142
|
sort_files (bool, optional): force folders to be sorted, otherwise uses the sorting
|
|
142
143
|
provided by os.walk()
|
|
143
144
|
recursive (bool, optional): enumerate recursively
|
|
144
|
-
|
|
145
|
+
|
|
145
146
|
Returns:
|
|
146
147
|
list: list of folder names
|
|
147
148
|
"""
|
|
148
|
-
|
|
149
|
+
|
|
149
150
|
assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
|
|
150
|
-
|
|
151
|
+
|
|
151
152
|
folders = []
|
|
152
153
|
|
|
153
|
-
if recursive:
|
|
154
|
+
if recursive:
|
|
154
155
|
folders = []
|
|
155
156
|
for root, dirs, _ in os.walk(base_dir):
|
|
156
157
|
for d in dirs:
|
|
157
|
-
folders.append(os.path.join(root, d))
|
|
158
|
+
folders.append(os.path.join(root, d))
|
|
158
159
|
else:
|
|
159
160
|
folders = os.listdir(base_dir)
|
|
160
161
|
folders = [os.path.join(base_dir,fn) for fn in folders]
|
|
161
162
|
folders = [fn for fn in folders if os.path.isdir(fn)]
|
|
162
|
-
|
|
163
|
+
|
|
163
164
|
if return_relative_paths:
|
|
164
165
|
folders = [os.path.relpath(fn,base_dir) for fn in folders]
|
|
165
166
|
|
|
166
167
|
if convert_slashes:
|
|
167
168
|
folders = [fn.replace('\\', '/') for fn in folders]
|
|
168
|
-
|
|
169
|
+
|
|
169
170
|
if sort_folders:
|
|
170
|
-
folders = sorted(folders)
|
|
171
|
-
|
|
171
|
+
folders = sorted(folders)
|
|
172
|
+
|
|
172
173
|
return folders
|
|
173
174
|
|
|
174
175
|
|
|
175
176
|
def folder_summary(folder,print_summary=True):
|
|
176
177
|
"""
|
|
177
178
|
Returns (and optionally prints) a summary of [folder], including:
|
|
178
|
-
|
|
179
|
+
|
|
179
180
|
* The total number of files
|
|
180
181
|
* The total number of folders
|
|
181
|
-
* The number of files for each extension
|
|
182
|
-
|
|
182
|
+
* The number of files for each extension
|
|
183
|
+
|
|
183
184
|
Args:
|
|
184
185
|
folder (str): folder to summarize
|
|
185
186
|
print_summary (bool, optional): whether to print the summary
|
|
186
|
-
|
|
187
|
+
|
|
187
188
|
Returns:
|
|
188
189
|
dict: with fields "n_files", "n_folders", and "extension_to_count"
|
|
189
190
|
"""
|
|
190
|
-
|
|
191
|
+
|
|
191
192
|
assert os.path.isdir(folder), '{} is not a folder'.format(folder)
|
|
192
|
-
|
|
193
|
+
|
|
193
194
|
folders_relative = folder_list(folder,return_relative_paths=True,recursive=True)
|
|
194
195
|
files_relative = file_list(folder,return_relative_paths=True,recursive=True)
|
|
195
|
-
|
|
196
|
+
|
|
196
197
|
extension_to_count = defaultdict(int)
|
|
197
|
-
|
|
198
|
+
|
|
198
199
|
for fn in files_relative:
|
|
199
200
|
ext = os.path.splitext(fn)[1]
|
|
200
201
|
extension_to_count[ext] += 1
|
|
201
|
-
|
|
202
|
+
|
|
202
203
|
extension_to_count = sort_dictionary_by_value(extension_to_count,reverse=True)
|
|
203
|
-
|
|
204
|
+
|
|
204
205
|
if print_summary:
|
|
205
206
|
for extension in extension_to_count.keys():
|
|
206
207
|
print('{}: {}'.format(extension,extension_to_count[extension]))
|
|
207
208
|
print('')
|
|
208
209
|
print('Total files: {}'.format(len(files_relative)))
|
|
209
210
|
print('Total folders: {}'.format(len(folders_relative)))
|
|
210
|
-
|
|
211
|
+
|
|
211
212
|
to_return = {}
|
|
212
213
|
to_return['n_files'] = len(files_relative)
|
|
213
214
|
to_return['n_folders'] = len(folders_relative)
|
|
214
|
-
to_return['extension_to_count'] = extension_to_count
|
|
215
|
-
|
|
215
|
+
to_return['extension_to_count'] = extension_to_count
|
|
216
|
+
|
|
216
217
|
return to_return
|
|
217
|
-
|
|
218
|
-
|
|
218
|
+
|
|
219
|
+
|
|
219
220
|
def fileparts(path):
|
|
220
221
|
r"""
|
|
221
222
|
Breaks down a path into the directory path, filename, and extension.
|
|
@@ -223,25 +224,25 @@ def fileparts(path):
|
|
|
223
224
|
Note that the '.' lives with the extension, and separators are removed.
|
|
224
225
|
|
|
225
226
|
Examples:
|
|
226
|
-
|
|
227
|
+
|
|
227
228
|
.. code-block:: none
|
|
228
229
|
|
|
229
|
-
>>> fileparts('file')
|
|
230
|
+
>>> fileparts('file')
|
|
230
231
|
('', 'file', '')
|
|
231
232
|
>>> fileparts(r'c:/dir/file.jpg')
|
|
232
233
|
('c:/dir', 'file', '.jpg')
|
|
233
234
|
>>> fileparts('/dir/subdir/file.jpg')
|
|
234
|
-
('/dir/subdir', 'file', '.jpg')
|
|
235
|
+
('/dir/subdir', 'file', '.jpg')
|
|
235
236
|
|
|
236
237
|
Args:
|
|
237
238
|
path (str): path name to separate into parts
|
|
238
239
|
Returns:
|
|
239
|
-
tuple: tuple containing (p,n,e):
|
|
240
|
+
tuple: tuple containing (p,n,e):
|
|
240
241
|
- p: str, directory path
|
|
241
242
|
- n: str, filename without extension
|
|
242
243
|
- e: str, extension including the '.'
|
|
243
244
|
"""
|
|
244
|
-
|
|
245
|
+
|
|
245
246
|
# ntpath seems to do the right thing for both Windows and Unix paths
|
|
246
247
|
p = ntpath.dirname(path)
|
|
247
248
|
basename = ntpath.basename(path)
|
|
@@ -257,27 +258,27 @@ def insert_before_extension(filename, s=None, separator='.'):
|
|
|
257
258
|
appends [s].
|
|
258
259
|
|
|
259
260
|
Examples:
|
|
260
|
-
|
|
261
|
+
|
|
261
262
|
.. code-block:: none
|
|
262
|
-
|
|
263
|
+
|
|
263
264
|
>>> insert_before_extension('/dir/subdir/file.ext', 'insert')
|
|
264
265
|
'/dir/subdir/file.insert.ext'
|
|
265
266
|
>>> insert_before_extension('/dir/subdir/file', 'insert')
|
|
266
267
|
'/dir/subdir/file.insert'
|
|
267
268
|
>>> insert_before_extension('/dir/subdir/file')
|
|
268
269
|
'/dir/subdir/file.2020.07.20.10.54.38'
|
|
269
|
-
|
|
270
|
+
|
|
270
271
|
Args:
|
|
271
272
|
filename (str): filename to manipulate
|
|
272
273
|
s (str, optional): string to insert before the extension in [filename], or
|
|
273
274
|
None to insert a datestamp
|
|
274
275
|
separator (str, optional): separator to place between the filename base
|
|
275
276
|
and the inserted string
|
|
276
|
-
|
|
277
|
+
|
|
277
278
|
Returns:
|
|
278
279
|
str: modified string
|
|
279
280
|
"""
|
|
280
|
-
|
|
281
|
+
|
|
281
282
|
assert len(filename) > 0
|
|
282
283
|
if s is None or len(s) == 0:
|
|
283
284
|
s = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
|
|
@@ -290,9 +291,9 @@ def split_path(path):
|
|
|
290
291
|
Splits [path] into all its constituent file/folder tokens.
|
|
291
292
|
|
|
292
293
|
Examples:
|
|
293
|
-
|
|
294
|
+
|
|
294
295
|
.. code-block:: none
|
|
295
|
-
|
|
296
|
+
|
|
296
297
|
>>> split_path(r'c:\dir\subdir\file.txt')
|
|
297
298
|
['c:\\', 'dir', 'subdir', 'file.txt']
|
|
298
299
|
>>> split_path('/dir/subdir/file.jpg')
|
|
@@ -301,13 +302,19 @@ def split_path(path):
|
|
|
301
302
|
['c:\\']
|
|
302
303
|
>>> split_path('/')
|
|
303
304
|
['/']
|
|
304
|
-
|
|
305
|
+
|
|
305
306
|
Args:
|
|
306
307
|
path (str): path to split into tokens
|
|
307
|
-
|
|
308
|
+
|
|
308
309
|
Returns:
|
|
309
310
|
list: list of path tokens
|
|
310
311
|
"""
|
|
312
|
+
|
|
313
|
+
# Edge cases
|
|
314
|
+
if path == '':
|
|
315
|
+
return ''
|
|
316
|
+
if path is None:
|
|
317
|
+
return None
|
|
311
318
|
|
|
312
319
|
parts = []
|
|
313
320
|
while True:
|
|
@@ -325,32 +332,32 @@ def path_is_abs(p):
|
|
|
325
332
|
"""
|
|
326
333
|
Determines whether [p] is an absolute path. An absolute path is defined as
|
|
327
334
|
one that starts with slash, backslash, or a letter followed by a colon.
|
|
328
|
-
|
|
335
|
+
|
|
329
336
|
Args:
|
|
330
337
|
p (str): path to evaluate
|
|
331
|
-
|
|
338
|
+
|
|
332
339
|
Returns:
|
|
333
340
|
bool: True if [p] is an absolute path, else False
|
|
334
341
|
"""
|
|
335
|
-
|
|
342
|
+
|
|
336
343
|
return (len(p) > 1) and (p[0] == '/' or p[1] == ':' or p[0] == '\\')
|
|
337
344
|
|
|
338
345
|
|
|
339
346
|
def safe_create_link(link_exists,link_new):
|
|
340
347
|
"""
|
|
341
348
|
Creates a symlink at [link_new] pointing to [link_exists].
|
|
342
|
-
|
|
349
|
+
|
|
343
350
|
If [link_new] already exists, make sure it's a link (not a file),
|
|
344
351
|
and if it has a different target than [link_exists], removes and re-creates
|
|
345
352
|
it.
|
|
346
|
-
|
|
353
|
+
|
|
347
354
|
Errors if [link_new] already exists but it's not a link.
|
|
348
|
-
|
|
355
|
+
|
|
349
356
|
Args:
|
|
350
357
|
link_exists (str): the source of the (possibly-new) symlink
|
|
351
358
|
link_new (str): the target of the (possibly-new) symlink
|
|
352
359
|
"""
|
|
353
|
-
|
|
360
|
+
|
|
354
361
|
if os.path.exists(link_new) or os.path.islink(link_new):
|
|
355
362
|
assert os.path.islink(link_new)
|
|
356
363
|
if not os.readlink(link_new) == link_exists:
|
|
@@ -358,35 +365,35 @@ def safe_create_link(link_exists,link_new):
|
|
|
358
365
|
os.symlink(link_exists,link_new)
|
|
359
366
|
else:
|
|
360
367
|
os.symlink(link_exists,link_new)
|
|
361
|
-
|
|
368
|
+
|
|
362
369
|
|
|
363
370
|
def remove_empty_folders(path, remove_root=False):
|
|
364
371
|
"""
|
|
365
372
|
Recursively removes empty folders within the specified path.
|
|
366
|
-
|
|
373
|
+
|
|
367
374
|
Args:
|
|
368
|
-
path (str): the folder from which we should recursively remove
|
|
375
|
+
path (str): the folder from which we should recursively remove
|
|
369
376
|
empty folders.
|
|
370
|
-
remove_root (bool, optional): whether to remove the root directory if
|
|
377
|
+
remove_root (bool, optional): whether to remove the root directory if
|
|
371
378
|
it's empty after removing all empty subdirectories. This will always
|
|
372
379
|
be True during recursive calls.
|
|
373
|
-
|
|
380
|
+
|
|
374
381
|
Returns:
|
|
375
382
|
bool: True if the directory is empty after processing, False otherwise
|
|
376
383
|
"""
|
|
377
|
-
|
|
384
|
+
|
|
378
385
|
# Verify that [path] is a directory
|
|
379
386
|
if not os.path.isdir(path):
|
|
380
387
|
return False
|
|
381
|
-
|
|
388
|
+
|
|
382
389
|
# Track whether the current directory is empty
|
|
383
390
|
is_empty = True
|
|
384
|
-
|
|
391
|
+
|
|
385
392
|
# Iterate through all items in the directory
|
|
386
393
|
for item in os.listdir(path):
|
|
387
|
-
|
|
394
|
+
|
|
388
395
|
item_path = os.path.join(path, item)
|
|
389
|
-
|
|
396
|
+
|
|
390
397
|
# If it's a directory, process it recursively
|
|
391
398
|
if os.path.isdir(item_path):
|
|
392
399
|
# If the subdirectory is empty after processing, it will be removed
|
|
@@ -396,118 +403,57 @@ def remove_empty_folders(path, remove_root=False):
|
|
|
396
403
|
else:
|
|
397
404
|
# If there's a file, the directory is not empty
|
|
398
405
|
is_empty = False
|
|
399
|
-
|
|
406
|
+
|
|
400
407
|
# If the directory is empty and we're supposed to remove it
|
|
401
408
|
if is_empty and remove_root:
|
|
402
409
|
try:
|
|
403
|
-
os.rmdir(path)
|
|
410
|
+
os.rmdir(path)
|
|
404
411
|
except Exception as e:
|
|
405
412
|
print('Error removing directory {}: {}'.format(path,str(e)))
|
|
406
413
|
is_empty = False
|
|
407
|
-
|
|
414
|
+
|
|
408
415
|
return is_empty
|
|
409
416
|
|
|
410
417
|
# ...def remove_empty_folders(...)
|
|
411
418
|
|
|
412
419
|
|
|
413
|
-
def top_level_folder(p):
|
|
414
|
-
r"""
|
|
415
|
-
Gets the top-level folder from the path *p*.
|
|
416
|
-
|
|
417
|
-
On UNIX, this is straightforward:
|
|
418
|
-
|
|
419
|
-
/blah/foo
|
|
420
|
-
|
|
421
|
-
...returns '/blah'
|
|
422
|
-
|
|
423
|
-
On Windows, we define this as the top-level folder that isn't the drive, so:
|
|
424
|
-
|
|
425
|
-
c:\blah\foo
|
|
426
|
-
|
|
427
|
-
...returns 'c:\blah'.
|
|
428
|
-
|
|
429
|
-
Args:
|
|
430
|
-
p (str): filename to evaluate
|
|
431
|
-
|
|
432
|
-
Returns:
|
|
433
|
-
str: the top-level folder in [p], see above for details on how this is defined
|
|
434
|
-
"""
|
|
435
|
-
|
|
436
|
-
if p == '':
|
|
437
|
-
return ''
|
|
438
|
-
|
|
439
|
-
# Path('/blah').parts is ('/','blah')
|
|
440
|
-
parts = split_path(p)
|
|
441
|
-
|
|
442
|
-
if len(parts) == 1:
|
|
443
|
-
return parts[0]
|
|
444
|
-
|
|
445
|
-
# Handle paths like:
|
|
446
|
-
#
|
|
447
|
-
# /, \, /stuff, c:, c:\stuff
|
|
448
|
-
drive = os.path.splitdrive(p)[0]
|
|
449
|
-
if parts[0] == drive or parts[0] == drive + '/' or parts[0] == drive + '\\' or parts[0] in ['\\', '/']:
|
|
450
|
-
return os.path.join(parts[0], parts[1])
|
|
451
|
-
else:
|
|
452
|
-
return parts[0]
|
|
453
|
-
|
|
454
|
-
# ...top_level_folder()
|
|
455
|
-
|
|
456
|
-
|
|
457
420
|
def path_join(*paths, convert_slashes=True):
|
|
458
421
|
r"""
|
|
459
422
|
Wrapper for os.path.join that optionally converts backslashes to forward slashes.
|
|
460
|
-
|
|
423
|
+
|
|
461
424
|
Args:
|
|
462
425
|
*paths (variable-length set of strings): Path components to be joined.
|
|
463
426
|
convert_slashes (bool, optional): whether to convert \\ to /
|
|
464
|
-
|
|
427
|
+
|
|
465
428
|
Returns:
|
|
466
429
|
A string with the joined path components.
|
|
467
430
|
"""
|
|
468
|
-
|
|
431
|
+
|
|
469
432
|
joined_path = os.path.join(*paths)
|
|
470
433
|
if convert_slashes:
|
|
471
434
|
return joined_path.replace('\\', '/')
|
|
472
435
|
else:
|
|
473
436
|
return joined_path
|
|
474
437
|
|
|
475
|
-
|
|
476
|
-
#%% Test driver for top_level_folder
|
|
477
|
-
|
|
478
|
-
if False:
|
|
479
|
-
|
|
480
|
-
#%%
|
|
481
438
|
|
|
482
|
-
p = 'blah/foo/bar'; s = top_level_folder(p); print(s); assert s == 'blah'
|
|
483
|
-
p = '/blah/foo/bar'; s = top_level_folder(p); print(s); assert s == '/blah'
|
|
484
|
-
p = 'bar'; s = top_level_folder(p); print(s); assert s == 'bar'
|
|
485
|
-
p = ''; s = top_level_folder(p); print(s); assert s == ''
|
|
486
|
-
p = 'c:\\'; s = top_level_folder(p); print(s); assert s == 'c:\\'
|
|
487
|
-
p = r'c:\blah'; s = top_level_folder(p); print(s); assert s == 'c:\\blah'
|
|
488
|
-
p = r'c:\foo'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
|
|
489
|
-
p = r'c:/foo'; s = top_level_folder(p); print(s); assert s == 'c:/foo'
|
|
490
|
-
p = r'c:\foo/bar'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
|
|
491
|
-
|
|
492
|
-
|
|
493
439
|
#%% Image-related path functions
|
|
494
440
|
|
|
495
441
|
def is_image_file(s, img_extensions=IMG_EXTENSIONS):
|
|
496
442
|
"""
|
|
497
443
|
Checks a file's extension against a hard-coded set of image file
|
|
498
444
|
extensions. Uses case-insensitive comparison.
|
|
499
|
-
|
|
445
|
+
|
|
500
446
|
Does not check whether the file exists, only determines whether the filename
|
|
501
447
|
implies it's an image file.
|
|
502
|
-
|
|
448
|
+
|
|
503
449
|
Args:
|
|
504
450
|
s (str): filename to evaluate for image-ness
|
|
505
451
|
img_extensions (list, optional): list of known image file extensions
|
|
506
|
-
|
|
452
|
+
|
|
507
453
|
Returns:
|
|
508
454
|
bool: True if [s] appears to be an image file, else False
|
|
509
455
|
"""
|
|
510
|
-
|
|
456
|
+
|
|
511
457
|
ext = os.path.splitext(s)[1]
|
|
512
458
|
return ext.lower() in img_extensions
|
|
513
459
|
|
|
@@ -516,27 +462,27 @@ def find_image_strings(strings):
|
|
|
516
462
|
"""
|
|
517
463
|
Given a list of strings that are potentially image file names, looks for
|
|
518
464
|
strings that actually look like image file names (based on extension).
|
|
519
|
-
|
|
465
|
+
|
|
520
466
|
Args:
|
|
521
467
|
strings (list): list of filenames to check for image-ness
|
|
522
|
-
|
|
468
|
+
|
|
523
469
|
Returns:
|
|
524
470
|
list: the subset of [strings] that appear to be image filenames
|
|
525
471
|
"""
|
|
526
|
-
|
|
472
|
+
|
|
527
473
|
return [s for s in strings if is_image_file(s)]
|
|
528
474
|
|
|
529
475
|
|
|
530
|
-
def find_images(dirname,
|
|
531
|
-
recursive=False,
|
|
532
|
-
return_relative_paths=False,
|
|
476
|
+
def find_images(dirname,
|
|
477
|
+
recursive=False,
|
|
478
|
+
return_relative_paths=False,
|
|
533
479
|
convert_slashes=True):
|
|
534
480
|
"""
|
|
535
481
|
Finds all files in a directory that look like image file names. Returns
|
|
536
482
|
absolute paths unless return_relative_paths is set. Uses the OS-native
|
|
537
483
|
path separator unless convert_slashes is set, in which case will always
|
|
538
484
|
use '/'.
|
|
539
|
-
|
|
485
|
+
|
|
540
486
|
Args:
|
|
541
487
|
dirname (str): the folder to search for images
|
|
542
488
|
recursive (bool, optional): whether to search recursively
|
|
@@ -547,30 +493,30 @@ def find_images(dirname,
|
|
|
547
493
|
Returns:
|
|
548
494
|
list: list of image filenames found in [dirname]
|
|
549
495
|
"""
|
|
550
|
-
|
|
496
|
+
|
|
551
497
|
assert os.path.isdir(dirname), '{} is not a folder'.format(dirname)
|
|
552
|
-
|
|
498
|
+
|
|
553
499
|
if recursive:
|
|
554
500
|
strings = glob.glob(os.path.join(dirname, '**', '*.*'), recursive=True)
|
|
555
501
|
else:
|
|
556
502
|
strings = glob.glob(os.path.join(dirname, '*.*'))
|
|
557
|
-
|
|
503
|
+
|
|
558
504
|
image_files = find_image_strings(strings)
|
|
559
|
-
|
|
505
|
+
|
|
560
506
|
if return_relative_paths:
|
|
561
507
|
image_files = [os.path.relpath(fn,dirname) for fn in image_files]
|
|
562
|
-
|
|
508
|
+
|
|
563
509
|
image_files = sorted(image_files)
|
|
564
|
-
|
|
510
|
+
|
|
565
511
|
if convert_slashes:
|
|
566
512
|
image_files = [fn.replace('\\', '/') for fn in image_files]
|
|
567
|
-
|
|
513
|
+
|
|
568
514
|
return image_files
|
|
569
515
|
|
|
570
516
|
|
|
571
517
|
#%% Filename cleaning functions
|
|
572
518
|
|
|
573
|
-
def clean_filename(filename,
|
|
519
|
+
def clean_filename(filename,
|
|
574
520
|
allow_list=VALID_FILENAME_CHARS,
|
|
575
521
|
char_limit=CHAR_LIMIT,
|
|
576
522
|
force_lower= False):
|
|
@@ -582,18 +528,18 @@ def clean_filename(filename,
|
|
|
582
528
|
|
|
583
529
|
Adapted from
|
|
584
530
|
https://gist.github.com/wassname/1393c4a57cfcbf03641dbc31886123b8
|
|
585
|
-
|
|
531
|
+
|
|
586
532
|
Args:
|
|
587
533
|
filename (str): filename to clean
|
|
588
534
|
allow_list (str, optional): string containing all allowable filename characters
|
|
589
535
|
char_limit (int, optional): maximum allowable filename length, if None will skip this
|
|
590
536
|
step
|
|
591
537
|
force_lower (bool, optional): convert the resulting filename to lowercase
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
str: cleaned version of [filename]
|
|
538
|
+
|
|
539
|
+
Returns:
|
|
540
|
+
str: cleaned version of [filename]
|
|
595
541
|
"""
|
|
596
|
-
|
|
542
|
+
|
|
597
543
|
# keep only valid ascii chars
|
|
598
544
|
cleaned_filename = (unicodedata.normalize('NFKD', filename)
|
|
599
545
|
.encode('ASCII', 'ignore').decode())
|
|
@@ -607,26 +553,26 @@ def clean_filename(filename,
|
|
|
607
553
|
return cleaned_filename
|
|
608
554
|
|
|
609
555
|
|
|
610
|
-
def clean_path(pathname,
|
|
556
|
+
def clean_path(pathname,
|
|
611
557
|
allow_list=VALID_PATH_CHARS,
|
|
612
558
|
char_limit=CHAR_LIMIT,
|
|
613
559
|
force_lower=False):
|
|
614
560
|
"""
|
|
615
561
|
Removes non-ASCII and other invalid path characters (on any reasonable
|
|
616
562
|
OS) from a path, then optionally trims to a maximum length.
|
|
617
|
-
|
|
563
|
+
|
|
618
564
|
Args:
|
|
619
565
|
pathname (str): path name to clean
|
|
620
566
|
allow_list (str, optional): string containing all allowable filename characters
|
|
621
567
|
char_limit (int, optional): maximum allowable filename length, if None will skip this
|
|
622
568
|
step
|
|
623
569
|
force_lower (bool, optional): convert the resulting filename to lowercase
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
str: cleaned version of [filename]
|
|
570
|
+
|
|
571
|
+
Returns:
|
|
572
|
+
str: cleaned version of [filename]
|
|
627
573
|
"""
|
|
628
|
-
|
|
629
|
-
return clean_filename(pathname, allow_list=allow_list,
|
|
574
|
+
|
|
575
|
+
return clean_filename(pathname, allow_list=allow_list,
|
|
630
576
|
char_limit=char_limit, force_lower=force_lower)
|
|
631
577
|
|
|
632
578
|
|
|
@@ -635,34 +581,34 @@ def flatten_path(pathname,separator_chars=SEPARATOR_CHARS,separator_char_replace
|
|
|
635
581
|
Removes non-ASCII and other invalid path characters (on any reasonable
|
|
636
582
|
OS) from a path, then trims to a maximum length. Replaces all valid
|
|
637
583
|
separators with [separator_char_replacement.]
|
|
638
|
-
|
|
584
|
+
|
|
639
585
|
Args:
|
|
640
586
|
pathname (str): path name to flatten
|
|
641
587
|
separator_chars (str, optional): string containing all known path separators
|
|
642
|
-
separator_char_replacement (str, optional): string to insert in place of
|
|
588
|
+
separator_char_replacement (str, optional): string to insert in place of
|
|
643
589
|
path separators.
|
|
644
|
-
|
|
590
|
+
|
|
645
591
|
Returns:
|
|
646
592
|
str: flattened version of [pathname]
|
|
647
593
|
"""
|
|
648
|
-
|
|
594
|
+
|
|
649
595
|
s = clean_path(pathname)
|
|
650
596
|
for c in separator_chars:
|
|
651
597
|
s = s.replace(c, separator_char_replacement)
|
|
652
598
|
return s
|
|
653
599
|
|
|
654
600
|
|
|
655
|
-
def is_executable(filename):
|
|
601
|
+
def is_executable(filename):
|
|
656
602
|
"""
|
|
657
603
|
Checks whether [filename] is on the system path and marked as executable.
|
|
658
|
-
|
|
604
|
+
|
|
659
605
|
Args:
|
|
660
606
|
filename (str): filename to check for executable status
|
|
661
|
-
|
|
607
|
+
|
|
662
608
|
Returns:
|
|
663
609
|
bool: True if [filename] is on the system path and marked as executable, otherwise False
|
|
664
610
|
"""
|
|
665
|
-
|
|
611
|
+
|
|
666
612
|
# https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
|
|
667
613
|
|
|
668
614
|
return which(filename) is not None
|
|
@@ -673,247 +619,247 @@ def is_executable(filename):
|
|
|
673
619
|
def environment_is_wsl():
|
|
674
620
|
"""
|
|
675
621
|
Determines whether we're running in WSL.
|
|
676
|
-
|
|
622
|
+
|
|
677
623
|
Returns:
|
|
678
|
-
True if we're running in WSL.
|
|
624
|
+
True if we're running in WSL.
|
|
679
625
|
"""
|
|
680
|
-
|
|
626
|
+
|
|
681
627
|
if sys.platform not in ('linux','posix'):
|
|
682
628
|
return False
|
|
683
629
|
platform_string = ' '.join(platform.uname()).lower()
|
|
684
630
|
return 'microsoft' in platform_string and 'wsl' in platform_string
|
|
685
|
-
|
|
631
|
+
|
|
686
632
|
|
|
687
633
|
def wsl_path_to_windows_path(filename, failure_behavior='none'):
|
|
688
634
|
r"""
|
|
689
635
|
Converts a WSL path to a Windows path. For example, converts:
|
|
690
|
-
|
|
636
|
+
|
|
691
637
|
/mnt/e/a/b/c
|
|
692
|
-
|
|
638
|
+
|
|
693
639
|
...to:
|
|
694
|
-
|
|
640
|
+
|
|
695
641
|
e:\a\b\c
|
|
696
|
-
|
|
642
|
+
|
|
697
643
|
Args:
|
|
698
644
|
filename (str): filename to convert
|
|
699
645
|
failure_behavior (str): what to do if the path can't be processed as a WSL path.
|
|
700
646
|
'none' to return None in this case, 'original' to return the original path.
|
|
701
|
-
|
|
647
|
+
|
|
702
648
|
Returns:
|
|
703
649
|
str: Windows equivalent to the WSL path [filename]
|
|
704
650
|
"""
|
|
705
|
-
|
|
651
|
+
|
|
706
652
|
assert failure_behavior in ('none','original'), \
|
|
707
653
|
'Unrecognized failure_behavior value {}'.format(failure_behavior)
|
|
708
|
-
|
|
654
|
+
|
|
709
655
|
# Check whether the path follows the standard WSL mount pattern
|
|
710
656
|
wsl_path_pattern = r'^/mnt/([a-zA-Z])(/.*)?$'
|
|
711
657
|
match = re.match(wsl_path_pattern, filename)
|
|
712
|
-
|
|
658
|
+
|
|
713
659
|
if match:
|
|
714
660
|
|
|
715
661
|
# Extract the drive letter and the rest of the path
|
|
716
662
|
drive_letter = match.group(1)
|
|
717
663
|
path_remainder = match.group(2) if match.group(2) else ''
|
|
718
|
-
|
|
664
|
+
|
|
719
665
|
# Convert forward slashes to backslashes for Windows
|
|
720
666
|
path_remainder = path_remainder.replace('/', '\\')
|
|
721
|
-
|
|
667
|
+
|
|
722
668
|
# Format the Windows path
|
|
723
669
|
windows_path = f"{drive_letter}:{path_remainder}"
|
|
724
670
|
return windows_path
|
|
725
|
-
|
|
671
|
+
|
|
726
672
|
if failure_behavior == 'none':
|
|
727
673
|
return None
|
|
728
674
|
else:
|
|
729
675
|
return filename
|
|
730
676
|
|
|
731
677
|
# ...def wsl_path_to_windows_path(...)
|
|
732
|
-
|
|
733
|
-
|
|
678
|
+
|
|
679
|
+
|
|
734
680
|
def windows_path_to_wsl_path(filename, failure_behavior='none'):
|
|
735
681
|
r"""
|
|
736
682
|
Converts a Windows path to a WSL path, or returns None if that's not possible. E.g.
|
|
737
683
|
converts:
|
|
738
|
-
|
|
684
|
+
|
|
739
685
|
e:\a\b\c
|
|
740
|
-
|
|
686
|
+
|
|
741
687
|
...to:
|
|
742
|
-
|
|
688
|
+
|
|
743
689
|
/mnt/e/a/b/c
|
|
744
|
-
|
|
690
|
+
|
|
745
691
|
Args:
|
|
746
692
|
filename (str): filename to convert
|
|
747
693
|
failure_behavior (str): what to do if the path can't be processed as a Windows path.
|
|
748
694
|
'none' to return None in this case, 'original' to return the original path.
|
|
749
|
-
|
|
695
|
+
|
|
750
696
|
Returns:
|
|
751
697
|
str: WSL equivalent to the Windows path [filename]
|
|
752
698
|
"""
|
|
753
|
-
|
|
699
|
+
|
|
754
700
|
assert failure_behavior in ('none','original'), \
|
|
755
701
|
'Unrecognized failure_behavior value {}'.format(failure_behavior)
|
|
756
|
-
|
|
702
|
+
|
|
757
703
|
filename = filename.replace('\\', '/')
|
|
758
|
-
|
|
704
|
+
|
|
759
705
|
# Check whether the path follows a Windows drive letter pattern
|
|
760
706
|
windows_path_pattern = r'^([a-zA-Z]):(/.*)?$'
|
|
761
707
|
match = re.match(windows_path_pattern, filename)
|
|
762
|
-
|
|
708
|
+
|
|
763
709
|
if match:
|
|
764
710
|
# Extract the drive letter and the rest of the path
|
|
765
711
|
drive_letter = match.group(1).lower() # Convert to lowercase for WSL
|
|
766
712
|
path_remainder = match.group(2) if match.group(2) else ''
|
|
767
|
-
|
|
713
|
+
|
|
768
714
|
# Format the WSL path
|
|
769
715
|
wsl_path = f"/mnt/{drive_letter}{path_remainder}"
|
|
770
716
|
return wsl_path
|
|
771
|
-
|
|
717
|
+
|
|
772
718
|
if failure_behavior == 'none':
|
|
773
719
|
return None
|
|
774
720
|
else:
|
|
775
721
|
return filename
|
|
776
|
-
|
|
722
|
+
|
|
777
723
|
# ...def window_path_to_wsl_path(...)
|
|
778
724
|
|
|
779
725
|
|
|
780
726
|
def open_file_in_chrome(filename):
|
|
781
727
|
"""
|
|
782
|
-
Open a file in chrome, regardless of file type. I typically use this to open
|
|
728
|
+
Open a file in chrome, regardless of file type. I typically use this to open
|
|
783
729
|
.md files in Chrome.
|
|
784
|
-
|
|
730
|
+
|
|
785
731
|
Args:
|
|
786
732
|
filename (str): file to open
|
|
787
|
-
|
|
733
|
+
|
|
788
734
|
Return:
|
|
789
735
|
bool: whether the operation was successful
|
|
790
736
|
"""
|
|
791
|
-
|
|
737
|
+
|
|
792
738
|
# Create URL
|
|
793
739
|
abs_path = os.path.abspath(filename)
|
|
794
|
-
|
|
740
|
+
|
|
795
741
|
system = platform.system()
|
|
796
742
|
if system == 'Windows':
|
|
797
743
|
url = f'file:///{abs_path.replace(os.sep, "/")}'
|
|
798
744
|
else: # macOS and Linux
|
|
799
745
|
url = f'file://{abs_path}'
|
|
800
|
-
|
|
746
|
+
|
|
801
747
|
# Determine the Chrome path
|
|
802
748
|
if system == 'Windows':
|
|
803
|
-
|
|
749
|
+
|
|
804
750
|
# This is a native Python module, but it only exists on Windows
|
|
805
751
|
import winreg
|
|
806
|
-
|
|
752
|
+
|
|
807
753
|
chrome_paths = [
|
|
808
754
|
os.path.expanduser("~") + r"\AppData\Local\Google\Chrome\Application\chrome.exe",
|
|
809
755
|
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
|
|
810
756
|
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
|
|
811
757
|
]
|
|
812
|
-
|
|
758
|
+
|
|
813
759
|
# Default approach: run from a typical chrome location
|
|
814
760
|
for path in chrome_paths:
|
|
815
761
|
if os.path.exists(path):
|
|
816
762
|
subprocess.run([path, url])
|
|
817
763
|
return True
|
|
818
|
-
|
|
764
|
+
|
|
819
765
|
# Method 2: Check registry for Chrome path
|
|
820
766
|
try:
|
|
821
|
-
with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
|
|
767
|
+
with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
|
|
822
768
|
r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe") as key:
|
|
823
769
|
chrome_path = winreg.QueryValue(key, None)
|
|
824
770
|
if chrome_path and os.path.exists(chrome_path):
|
|
825
771
|
subprocess.run([chrome_path, url])
|
|
826
772
|
return True
|
|
827
|
-
except:
|
|
773
|
+
except Exception:
|
|
828
774
|
pass
|
|
829
|
-
|
|
775
|
+
|
|
830
776
|
# Method 3: Try alternate registry location
|
|
831
777
|
try:
|
|
832
|
-
with winreg.OpenKey(winreg.HKEY_CURRENT_USER,
|
|
778
|
+
with winreg.OpenKey(winreg.HKEY_CURRENT_USER,
|
|
833
779
|
r"Software\Google\Chrome\BLBeacon") as key:
|
|
834
780
|
chrome_path = os.path.join(os.path.dirname(winreg.QueryValueEx(key, "version")[0]), "chrome.exe")
|
|
835
781
|
if os.path.exists(chrome_path):
|
|
836
782
|
subprocess.run([chrome_path, url])
|
|
837
783
|
return True
|
|
838
|
-
except:
|
|
784
|
+
except Exception:
|
|
839
785
|
pass
|
|
840
|
-
|
|
786
|
+
|
|
841
787
|
# Method 4: Try system path or command
|
|
842
788
|
for chrome_cmd in ["chrome", "chrome.exe", "googlechrome", "google-chrome"]:
|
|
843
789
|
try:
|
|
844
790
|
subprocess.run([chrome_cmd, url], shell=True)
|
|
845
791
|
return True
|
|
846
|
-
except:
|
|
792
|
+
except Exception:
|
|
847
793
|
continue
|
|
848
|
-
|
|
794
|
+
|
|
849
795
|
# Method 5: Use Windows URL protocol handler
|
|
850
796
|
try:
|
|
851
797
|
os.startfile(url)
|
|
852
798
|
return True
|
|
853
|
-
except:
|
|
799
|
+
except Exception:
|
|
854
800
|
pass
|
|
855
|
-
|
|
856
|
-
# Method 6: Use rundll32
|
|
801
|
+
|
|
802
|
+
# Method 6: Use rundll32
|
|
857
803
|
try:
|
|
858
804
|
cmd = f'rundll32 url.dll,FileProtocolHandler {url}'
|
|
859
805
|
subprocess.run(cmd, shell=True)
|
|
860
806
|
return True
|
|
861
|
-
except:
|
|
807
|
+
except Exception:
|
|
862
808
|
pass
|
|
863
|
-
|
|
809
|
+
|
|
864
810
|
elif system == 'Darwin':
|
|
865
|
-
|
|
811
|
+
|
|
866
812
|
chrome_paths = [
|
|
867
813
|
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
868
814
|
os.path.expanduser('~/Applications/Google Chrome.app/Contents/MacOS/Google Chrome')
|
|
869
815
|
]
|
|
870
|
-
|
|
816
|
+
|
|
871
817
|
for path in chrome_paths:
|
|
872
818
|
if os.path.exists(path):
|
|
873
819
|
subprocess.run([path, url])
|
|
874
820
|
return True
|
|
875
|
-
|
|
821
|
+
|
|
876
822
|
# Fallback to 'open' command with Chrome as the app
|
|
877
823
|
try:
|
|
878
824
|
subprocess.run(['open', '-a', 'Google Chrome', url])
|
|
879
825
|
return True
|
|
880
|
-
except:
|
|
826
|
+
except Exception:
|
|
881
827
|
pass
|
|
882
|
-
|
|
828
|
+
|
|
883
829
|
elif system == 'Linux':
|
|
884
|
-
|
|
830
|
+
|
|
885
831
|
chrome_commands = ['google-chrome', 'chrome', 'chromium', 'chromium-browser']
|
|
886
|
-
|
|
832
|
+
|
|
887
833
|
for cmd in chrome_commands:
|
|
888
834
|
try:
|
|
889
835
|
subprocess.run([cmd, url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
890
836
|
return True
|
|
891
|
-
except:
|
|
837
|
+
except Exception:
|
|
892
838
|
continue
|
|
893
|
-
|
|
839
|
+
|
|
894
840
|
print(f"Could not open {filename} in Chrome on {system}.")
|
|
895
841
|
return False
|
|
896
842
|
|
|
897
|
-
|
|
843
|
+
|
|
898
844
|
def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
|
|
899
845
|
"""
|
|
900
846
|
Opens [filename] in the default OS file handler for this file type.
|
|
901
|
-
|
|
847
|
+
|
|
902
848
|
If browser_name is not None, uses the webbrowser module to open the filename
|
|
903
849
|
in the specified browser; see https://docs.python.org/3/library/webbrowser.html
|
|
904
850
|
for supported browsers. Falls back to the default file handler if webbrowser.open()
|
|
905
851
|
fails. In this case, attempt_to_open_in_wsl_host is ignored unless webbrowser.open() fails.
|
|
906
|
-
|
|
907
|
-
If browser_name is 'default', uses the system default. This is different from the
|
|
852
|
+
|
|
853
|
+
If browser_name is 'default', uses the system default. This is different from the
|
|
908
854
|
parameter to webbrowser.get(), where None implies the system default.
|
|
909
|
-
|
|
855
|
+
|
|
910
856
|
Args:
|
|
911
857
|
filename (str): file to open
|
|
912
858
|
attempt_to_open_in_wsl_host: if this is True, and we're in WSL, attempts to open
|
|
913
859
|
[filename] in the Windows host environment
|
|
914
860
|
browser_name: see above
|
|
915
861
|
"""
|
|
916
|
-
|
|
862
|
+
|
|
917
863
|
if browser_name is not None:
|
|
918
864
|
if browser_name == 'chrome':
|
|
919
865
|
browser_name = 'google-chrome'
|
|
@@ -925,32 +871,32 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
|
|
|
925
871
|
result = False
|
|
926
872
|
if result:
|
|
927
873
|
return
|
|
928
|
-
|
|
874
|
+
|
|
929
875
|
if sys.platform == 'win32':
|
|
930
|
-
|
|
876
|
+
|
|
931
877
|
os.startfile(filename)
|
|
932
878
|
|
|
933
879
|
elif sys.platform == 'darwin':
|
|
934
|
-
|
|
880
|
+
|
|
935
881
|
opener = 'open'
|
|
936
882
|
subprocess.call([opener, filename])
|
|
937
|
-
|
|
883
|
+
|
|
938
884
|
elif attempt_to_open_in_wsl_host and environment_is_wsl():
|
|
939
|
-
|
|
885
|
+
|
|
940
886
|
windows_path = wsl_path_to_windows_path(filename)
|
|
941
|
-
|
|
887
|
+
|
|
942
888
|
# Fall back to xdg-open
|
|
943
889
|
if windows_path is None:
|
|
944
890
|
subprocess.call(['xdg-open', filename])
|
|
945
|
-
|
|
946
|
-
if os.path.isdir(filename):
|
|
891
|
+
|
|
892
|
+
if os.path.isdir(filename):
|
|
947
893
|
subprocess.run(["explorer.exe", windows_path])
|
|
948
894
|
else:
|
|
949
|
-
os.system("cmd.exe /C start
|
|
950
|
-
|
|
895
|
+
os.system("cmd.exe /C start {}".format(re.escape(windows_path)))
|
|
896
|
+
|
|
951
897
|
else:
|
|
952
|
-
|
|
953
|
-
opener = 'xdg-open'
|
|
898
|
+
|
|
899
|
+
opener = 'xdg-open'
|
|
954
900
|
subprocess.call([opener, filename])
|
|
955
901
|
|
|
956
902
|
# ...def open_file(...)
|
|
@@ -962,12 +908,12 @@ def write_list_to_file(output_file,strings):
|
|
|
962
908
|
"""
|
|
963
909
|
Writes a list of strings to either a JSON file or text file,
|
|
964
910
|
depending on extension of the given file name.
|
|
965
|
-
|
|
911
|
+
|
|
966
912
|
Args:
|
|
967
913
|
output_file (str): file to write
|
|
968
914
|
strings (list): list of strings to write to [output_file]
|
|
969
915
|
"""
|
|
970
|
-
|
|
916
|
+
|
|
971
917
|
with open(output_file, 'w') as f:
|
|
972
918
|
if output_file.endswith('.json'):
|
|
973
919
|
json.dump(strings, f, indent=1)
|
|
@@ -978,14 +924,14 @@ def write_list_to_file(output_file,strings):
|
|
|
978
924
|
def read_list_from_file(filename):
|
|
979
925
|
"""
|
|
980
926
|
Reads a json-formatted list of strings from a file.
|
|
981
|
-
|
|
927
|
+
|
|
982
928
|
Args:
|
|
983
929
|
filename (str): .json filename to read
|
|
984
|
-
|
|
930
|
+
|
|
985
931
|
Returns:
|
|
986
932
|
list: list of strings read from [filename]
|
|
987
933
|
"""
|
|
988
|
-
|
|
934
|
+
|
|
989
935
|
assert filename.endswith('.json')
|
|
990
936
|
with open(filename, 'r') as f:
|
|
991
937
|
file_list = json.load(f)
|
|
@@ -1001,39 +947,39 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
|
|
|
1001
947
|
"""
|
|
1002
948
|
Internal function for copying files from within parallel_copy_files.
|
|
1003
949
|
"""
|
|
1004
|
-
|
|
950
|
+
|
|
1005
951
|
assert len(input_output_tuple) == 2
|
|
1006
952
|
source_fn = input_output_tuple[0]
|
|
1007
953
|
target_fn = input_output_tuple[1]
|
|
1008
954
|
if (not overwrite) and (os.path.isfile(target_fn)):
|
|
1009
955
|
if verbose:
|
|
1010
956
|
print('Skipping existing target file {}'.format(target_fn))
|
|
1011
|
-
return
|
|
1012
|
-
|
|
957
|
+
return
|
|
958
|
+
|
|
1013
959
|
if move:
|
|
1014
960
|
action_string = 'Moving'
|
|
1015
961
|
else:
|
|
1016
962
|
action_string = 'Copying'
|
|
1017
|
-
|
|
963
|
+
|
|
1018
964
|
if verbose:
|
|
1019
965
|
print('{} to {}'.format(action_string,target_fn))
|
|
1020
|
-
|
|
966
|
+
|
|
1021
967
|
os.makedirs(os.path.dirname(target_fn),exist_ok=True)
|
|
1022
968
|
if move:
|
|
1023
969
|
shutil.move(source_fn, target_fn)
|
|
1024
970
|
else:
|
|
1025
971
|
shutil.copyfile(source_fn,target_fn)
|
|
1026
|
-
|
|
1027
972
|
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
973
|
+
|
|
974
|
+
def parallel_copy_files(input_file_to_output_file,
|
|
975
|
+
max_workers=16,
|
|
976
|
+
use_threads=True,
|
|
977
|
+
overwrite=False,
|
|
1032
978
|
verbose=False,
|
|
1033
979
|
move=False):
|
|
1034
980
|
"""
|
|
1035
981
|
Copy (or move) files from source to target according to the dict input_file_to_output_file.
|
|
1036
|
-
|
|
982
|
+
|
|
1037
983
|
Args:
|
|
1038
984
|
input_file_to_output_file (dict): dictionary mapping source files to the target files
|
|
1039
985
|
to which they should be copied
|
|
@@ -1046,24 +992,32 @@ def parallel_copy_files(input_file_to_output_file,
|
|
|
1046
992
|
"""
|
|
1047
993
|
|
|
1048
994
|
n_workers = min(max_workers,len(input_file_to_output_file))
|
|
1049
|
-
|
|
995
|
+
|
|
1050
996
|
# Package the dictionary as a set of 2-tuples
|
|
1051
997
|
input_output_tuples = []
|
|
1052
998
|
for input_fn in input_file_to_output_file:
|
|
1053
999
|
input_output_tuples.append((input_fn,input_file_to_output_file[input_fn]))
|
|
1054
1000
|
|
|
1055
|
-
|
|
1056
|
-
pool = ThreadPool(n_workers)
|
|
1057
|
-
else:
|
|
1058
|
-
pool = Pool(n_workers)
|
|
1001
|
+
pool = None
|
|
1059
1002
|
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1003
|
+
try:
|
|
1004
|
+
if use_threads:
|
|
1005
|
+
pool = ThreadPool(n_workers)
|
|
1006
|
+
else:
|
|
1007
|
+
pool = Pool(n_workers)
|
|
1008
|
+
|
|
1009
|
+
with tqdm(total=len(input_output_tuples)) as pbar:
|
|
1010
|
+
for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,
|
|
1011
|
+
overwrite=overwrite,
|
|
1012
|
+
verbose=verbose,
|
|
1013
|
+
move=move),
|
|
1014
|
+
input_output_tuples)):
|
|
1015
|
+
pbar.update()
|
|
1016
|
+
finally:
|
|
1017
|
+
pool.close()
|
|
1018
|
+
pool.join()
|
|
1019
|
+
if verbose:
|
|
1020
|
+
print("Pool closed and joined parallel file copying")
|
|
1067
1021
|
|
|
1068
1022
|
# ...def parallel_copy_files(...)
|
|
1069
1023
|
|
|
@@ -1074,36 +1028,36 @@ def get_file_sizes(base_dir, convert_slashes=True):
|
|
|
1074
1028
|
"""
|
|
1075
1029
|
Gets sizes recursively for all files in base_dir, returning a dict mapping
|
|
1076
1030
|
relative filenames to size.
|
|
1077
|
-
|
|
1031
|
+
|
|
1078
1032
|
TODO: merge the functionality here with parallel_get_file_sizes, which uses slightly
|
|
1079
1033
|
different semantics.
|
|
1080
|
-
|
|
1034
|
+
|
|
1081
1035
|
Args:
|
|
1082
1036
|
base_dir (str): folder within which we want all file sizes
|
|
1083
1037
|
convert_slashes (bool, optional): force forward slashes in return strings,
|
|
1084
1038
|
otherwise uses the native path separator
|
|
1085
|
-
|
|
1039
|
+
|
|
1086
1040
|
Returns:
|
|
1087
1041
|
dict: dictionary mapping filenames to file sizes in bytes
|
|
1088
1042
|
"""
|
|
1089
|
-
|
|
1090
|
-
relative_filenames = recursive_file_list(base_dir, convert_slashes=convert_slashes,
|
|
1043
|
+
|
|
1044
|
+
relative_filenames = recursive_file_list(base_dir, convert_slashes=convert_slashes,
|
|
1091
1045
|
return_relative_paths=True)
|
|
1092
|
-
|
|
1046
|
+
|
|
1093
1047
|
fn_to_size = {}
|
|
1094
1048
|
for fn_relative in tqdm(relative_filenames):
|
|
1095
1049
|
fn_abs = os.path.join(base_dir,fn_relative)
|
|
1096
1050
|
fn_to_size[fn_relative] = os.path.getsize(fn_abs)
|
|
1097
|
-
|
|
1051
|
+
|
|
1098
1052
|
return fn_to_size
|
|
1099
|
-
|
|
1053
|
+
|
|
1100
1054
|
|
|
1101
1055
|
def _get_file_size(filename,verbose=False):
|
|
1102
1056
|
"""
|
|
1103
1057
|
Internal function for safely getting the size of a file. Returns a (filename,size)
|
|
1104
1058
|
tuple, where size is None if there is an error.
|
|
1105
1059
|
"""
|
|
1106
|
-
|
|
1060
|
+
|
|
1107
1061
|
try:
|
|
1108
1062
|
size = os.path.getsize(filename)
|
|
1109
1063
|
except Exception as e:
|
|
@@ -1112,18 +1066,18 @@ def _get_file_size(filename,verbose=False):
|
|
|
1112
1066
|
size = None
|
|
1113
1067
|
return (filename,size)
|
|
1114
1068
|
|
|
1115
|
-
|
|
1116
|
-
def parallel_get_file_sizes(filenames,
|
|
1117
|
-
max_workers=16,
|
|
1118
|
-
use_threads=True,
|
|
1069
|
+
|
|
1070
|
+
def parallel_get_file_sizes(filenames,
|
|
1071
|
+
max_workers=16,
|
|
1072
|
+
use_threads=True,
|
|
1119
1073
|
verbose=False,
|
|
1120
|
-
recursive=True,
|
|
1074
|
+
recursive=True,
|
|
1121
1075
|
convert_slashes=True,
|
|
1122
1076
|
return_relative_paths=False):
|
|
1123
1077
|
"""
|
|
1124
1078
|
Returns a dictionary mapping every file in [filenames] to the corresponding file size,
|
|
1125
1079
|
or None for errors. If [filenames] is a folder, will enumerate the folder (optionally recursively).
|
|
1126
|
-
|
|
1080
|
+
|
|
1127
1081
|
Args:
|
|
1128
1082
|
filenames (list or str): list of filenames for which we should read sizes, or a folder
|
|
1129
1083
|
within which we should read all file sizes recursively
|
|
@@ -1135,33 +1089,33 @@ def parallel_get_file_sizes(filenames,
|
|
|
1135
1089
|
convert_slashes (bool, optional): convert backslashes to forward slashes
|
|
1136
1090
|
return_relative_paths (bool, optional): return relative paths; only relevant if [filenames]
|
|
1137
1091
|
is a folder.
|
|
1138
|
-
|
|
1092
|
+
|
|
1139
1093
|
Returns:
|
|
1140
1094
|
dict: dictionary mapping filenames to file sizes in bytes
|
|
1141
1095
|
"""
|
|
1142
1096
|
|
|
1143
1097
|
n_workers = min(max_workers,len(filenames))
|
|
1144
|
-
|
|
1098
|
+
|
|
1145
1099
|
folder_name = None
|
|
1146
|
-
|
|
1100
|
+
|
|
1147
1101
|
if isinstance(filenames,str):
|
|
1148
|
-
|
|
1102
|
+
|
|
1149
1103
|
folder_name = filenames
|
|
1150
|
-
assert os.path.isdir(filenames), 'Could not find folder {}'.format(folder_name)
|
|
1151
|
-
|
|
1104
|
+
assert os.path.isdir(filenames), 'Could not find folder {}'.format(folder_name)
|
|
1105
|
+
|
|
1152
1106
|
if verbose:
|
|
1153
1107
|
print('Enumerating files in {}'.format(folder_name))
|
|
1154
|
-
|
|
1108
|
+
|
|
1155
1109
|
# Enumerate absolute paths here, we'll convert to relative later if requested
|
|
1156
1110
|
filenames = recursive_file_list(folder_name,recursive=recursive,return_relative_paths=False)
|
|
1157
1111
|
|
|
1158
1112
|
else:
|
|
1159
|
-
|
|
1113
|
+
|
|
1160
1114
|
assert is_iterable(filenames), '[filenames] argument is neither a folder nor an iterable'
|
|
1161
|
-
|
|
1115
|
+
|
|
1162
1116
|
if verbose:
|
|
1163
1117
|
print('Creating worker pool')
|
|
1164
|
-
|
|
1118
|
+
|
|
1165
1119
|
if use_threads:
|
|
1166
1120
|
pool_string = 'thread'
|
|
1167
1121
|
pool = ThreadPool(n_workers)
|
|
@@ -1172,11 +1126,11 @@ def parallel_get_file_sizes(filenames,
|
|
|
1172
1126
|
if verbose:
|
|
1173
1127
|
print('Created a {} pool of {} workers'.format(
|
|
1174
1128
|
pool_string,n_workers))
|
|
1175
|
-
|
|
1129
|
+
|
|
1176
1130
|
# This returns (filename,size) tuples
|
|
1177
1131
|
get_size_results = list(tqdm(pool.imap(
|
|
1178
1132
|
partial(_get_file_size,verbose=verbose),filenames), total=len(filenames)))
|
|
1179
|
-
|
|
1133
|
+
|
|
1180
1134
|
to_return = {}
|
|
1181
1135
|
for r in get_size_results:
|
|
1182
1136
|
fn = r[0]
|
|
@@ -1197,7 +1151,7 @@ def parallel_get_file_sizes(filenames,
|
|
|
1197
1151
|
def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
|
|
1198
1152
|
"""
|
|
1199
1153
|
Zips a single file.
|
|
1200
|
-
|
|
1154
|
+
|
|
1201
1155
|
Args:
|
|
1202
1156
|
input_fn (str): file to zip
|
|
1203
1157
|
output_fn (str, optional): target zipfile; if this is None, we'll use
|
|
@@ -1205,23 +1159,23 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
|
|
|
1205
1159
|
overwrite (bool, optional): whether to overwrite an existing target file
|
|
1206
1160
|
verbose (bool, optional): enable existing debug console output
|
|
1207
1161
|
compresslevel (int, optional): compression level to use, between 0 and 9
|
|
1208
|
-
|
|
1162
|
+
|
|
1209
1163
|
Returns:
|
|
1210
1164
|
str: the output zipfile, whether we created it or determined that it already exists
|
|
1211
1165
|
"""
|
|
1212
|
-
|
|
1166
|
+
|
|
1213
1167
|
basename = os.path.basename(input_fn)
|
|
1214
|
-
|
|
1168
|
+
|
|
1215
1169
|
if output_fn is None:
|
|
1216
1170
|
output_fn = input_fn + '.zip'
|
|
1217
|
-
|
|
1171
|
+
|
|
1218
1172
|
if (not overwrite) and (os.path.isfile(output_fn)):
|
|
1219
1173
|
print('Skipping existing file {}'.format(output_fn))
|
|
1220
1174
|
return output_fn
|
|
1221
|
-
|
|
1175
|
+
|
|
1222
1176
|
if verbose:
|
|
1223
1177
|
print('Zipping {} to {} with level {}'.format(input_fn,output_fn,compresslevel))
|
|
1224
|
-
|
|
1178
|
+
|
|
1225
1179
|
with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
|
|
1226
1180
|
zipf.write(input_fn,arcname=basename,compresslevel=compresslevel,
|
|
1227
1181
|
compress_type=zipfile.ZIP_DEFLATED)
|
|
@@ -1232,9 +1186,9 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
|
|
|
1232
1186
|
def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
1233
1187
|
overwrite=False, verbose=False, mode='x'):
|
|
1234
1188
|
"""
|
|
1235
|
-
Adds all the files in [input_files] to the tar file [output_fn].
|
|
1189
|
+
Adds all the files in [input_files] to the tar file [output_fn].
|
|
1236
1190
|
Archive names are relative to arc_name_base.
|
|
1237
|
-
|
|
1191
|
+
|
|
1238
1192
|
Args:
|
|
1239
1193
|
input_files (list): list of absolute filenames to include in the .tar file
|
|
1240
1194
|
output_fn (str): .tar file to create
|
|
@@ -1244,11 +1198,11 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
|
1244
1198
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1245
1199
|
verbose (bool, optional): enable additional debug console output
|
|
1246
1200
|
mode (str, optional): compression type, can be 'x' (no compression), 'x:gz', or 'x:bz2'.
|
|
1247
|
-
|
|
1201
|
+
|
|
1248
1202
|
Returns:
|
|
1249
1203
|
str: the output tar file, whether we created it or determined that it already exists
|
|
1250
1204
|
"""
|
|
1251
|
-
|
|
1205
|
+
|
|
1252
1206
|
if os.path.isfile(output_fn):
|
|
1253
1207
|
if not overwrite:
|
|
1254
1208
|
print('Tar file {} exists, skipping'.format(output_fn))
|
|
@@ -1256,11 +1210,11 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
|
1256
1210
|
else:
|
|
1257
1211
|
print('Tar file {} exists, deleting and re-creating'.format(output_fn))
|
|
1258
1212
|
os.remove(output_fn)
|
|
1259
|
-
|
|
1213
|
+
|
|
1260
1214
|
if verbose:
|
|
1261
1215
|
print('Adding {} files to {} (mode {})'.format(
|
|
1262
1216
|
len(input_files),output_fn,mode))
|
|
1263
|
-
|
|
1217
|
+
|
|
1264
1218
|
with tarfile.open(output_fn,mode) as tarf:
|
|
1265
1219
|
for input_fn_abs in tqdm(input_files,disable=(not verbose)):
|
|
1266
1220
|
input_fn_relative = os.path.relpath(input_fn_abs,arc_name_base)
|
|
@@ -1272,9 +1226,9 @@ def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
|
1272
1226
|
def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
|
|
1273
1227
|
overwrite=False, verbose=False, compresslevel=9):
|
|
1274
1228
|
"""
|
|
1275
|
-
Zip all the files in [input_files] into [output_fn]. Archive names are relative to
|
|
1229
|
+
Zip all the files in [input_files] into [output_fn]. Archive names are relative to
|
|
1276
1230
|
arc_name_base.
|
|
1277
|
-
|
|
1231
|
+
|
|
1278
1232
|
Args:
|
|
1279
1233
|
input_files (list): list of absolute filenames to include in the .tar file
|
|
1280
1234
|
output_fn (str): .tar file to create
|
|
@@ -1284,20 +1238,20 @@ def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
|
|
|
1284
1238
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1285
1239
|
verbose (bool, optional): enable additional debug console output
|
|
1286
1240
|
compresslevel (int, optional): compression level to use, between 0 and 9
|
|
1287
|
-
|
|
1241
|
+
|
|
1288
1242
|
Returns:
|
|
1289
1243
|
str: the output zipfile, whether we created it or determined that it already exists
|
|
1290
1244
|
"""
|
|
1291
|
-
|
|
1245
|
+
|
|
1292
1246
|
if not overwrite:
|
|
1293
1247
|
if os.path.isfile(output_fn):
|
|
1294
1248
|
print('Zip file {} exists, skipping'.format(output_fn))
|
|
1295
1249
|
return output_fn
|
|
1296
|
-
|
|
1250
|
+
|
|
1297
1251
|
if verbose:
|
|
1298
1252
|
print('Zipping {} files to {} (compression level {})'.format(
|
|
1299
1253
|
len(input_files),output_fn,compresslevel))
|
|
1300
|
-
|
|
1254
|
+
|
|
1301
1255
|
with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
|
|
1302
1256
|
for input_fn_abs in tqdm(input_files,disable=(not verbose)):
|
|
1303
1257
|
input_fn_relative = os.path.relpath(input_fn_abs,arc_name_base)
|
|
@@ -1307,41 +1261,41 @@ def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
|
|
|
1307
1261
|
compress_type=zipfile.ZIP_DEFLATED)
|
|
1308
1262
|
|
|
1309
1263
|
return output_fn
|
|
1310
|
-
|
|
1311
|
-
|
|
1264
|
+
|
|
1265
|
+
|
|
1312
1266
|
def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
|
|
1313
1267
|
"""
|
|
1314
|
-
Recursively zip everything in [input_folder] into a single zipfile, storing files as paths
|
|
1268
|
+
Recursively zip everything in [input_folder] into a single zipfile, storing files as paths
|
|
1315
1269
|
relative to [input_folder].
|
|
1316
|
-
|
|
1317
|
-
Args:
|
|
1270
|
+
|
|
1271
|
+
Args:
|
|
1318
1272
|
input_folder (str): folder to zip
|
|
1319
1273
|
output_fn (str, optional): output filename; if this is None, we'll write to [input_folder].zip
|
|
1320
1274
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1321
1275
|
verbose (bool, optional): enable additional debug console output
|
|
1322
|
-
compresslevel (int, optional): compression level to use, between 0 and 9
|
|
1323
|
-
|
|
1276
|
+
compresslevel (int, optional): compression level to use, between 0 and 9
|
|
1277
|
+
|
|
1324
1278
|
Returns:
|
|
1325
|
-
str: the output zipfile, whether we created it or determined that it already exists
|
|
1279
|
+
str: the output zipfile, whether we created it or determined that it already exists
|
|
1326
1280
|
"""
|
|
1327
|
-
|
|
1281
|
+
|
|
1328
1282
|
if output_fn is None:
|
|
1329
1283
|
output_fn = input_folder + '.zip'
|
|
1330
|
-
|
|
1284
|
+
|
|
1331
1285
|
if not overwrite:
|
|
1332
1286
|
if os.path.isfile(output_fn):
|
|
1333
1287
|
print('Zip file {} exists, skipping'.format(output_fn))
|
|
1334
|
-
return
|
|
1335
|
-
|
|
1288
|
+
return
|
|
1289
|
+
|
|
1336
1290
|
if verbose:
|
|
1337
1291
|
print('Zipping {} to {} (compression level {})'.format(
|
|
1338
1292
|
input_folder,output_fn,compresslevel))
|
|
1339
|
-
|
|
1293
|
+
|
|
1340
1294
|
relative_filenames = recursive_file_list(input_folder,return_relative_paths=True)
|
|
1341
|
-
|
|
1295
|
+
|
|
1342
1296
|
with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
|
|
1343
1297
|
for input_fn_relative in tqdm(relative_filenames,disable=(not verbose)):
|
|
1344
|
-
input_fn_abs = os.path.join(input_folder,input_fn_relative)
|
|
1298
|
+
input_fn_abs = os.path.join(input_folder,input_fn_relative)
|
|
1345
1299
|
zipf.write(input_fn_abs,
|
|
1346
1300
|
arcname=input_fn_relative,
|
|
1347
1301
|
compresslevel=compresslevel,
|
|
@@ -1349,17 +1303,17 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
1349
1303
|
|
|
1350
1304
|
return output_fn
|
|
1351
1305
|
|
|
1352
|
-
|
|
1353
|
-
def parallel_zip_files(input_files,
|
|
1354
|
-
max_workers=16,
|
|
1355
|
-
use_threads=True,
|
|
1356
|
-
compresslevel=9,
|
|
1357
|
-
overwrite=False,
|
|
1306
|
+
|
|
1307
|
+
def parallel_zip_files(input_files,
|
|
1308
|
+
max_workers=16,
|
|
1309
|
+
use_threads=True,
|
|
1310
|
+
compresslevel=9,
|
|
1311
|
+
overwrite=False,
|
|
1358
1312
|
verbose=False):
|
|
1359
1313
|
"""
|
|
1360
|
-
Zips one or more files to separate output files in parallel, leaving the
|
|
1314
|
+
Zips one or more files to separate output files in parallel, leaving the
|
|
1361
1315
|
original files in place. Each file is zipped to [filename].zip.
|
|
1362
|
-
|
|
1316
|
+
|
|
1363
1317
|
Args:
|
|
1364
1318
|
input_file (str): list of files to zip
|
|
1365
1319
|
max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
|
|
@@ -1387,9 +1341,9 @@ def parallel_zip_files(input_files,
|
|
|
1387
1341
|
def parallel_zip_folders(input_folders, max_workers=16, use_threads=True,
|
|
1388
1342
|
compresslevel=9, overwrite=False, verbose=False):
|
|
1389
1343
|
"""
|
|
1390
|
-
Zips one or more folders to separate output files in parallel, leaving the
|
|
1344
|
+
Zips one or more folders to separate output files in parallel, leaving the
|
|
1391
1345
|
original folders in place. Each folder is zipped to [folder_name].zip.
|
|
1392
|
-
|
|
1346
|
+
|
|
1393
1347
|
Args:
|
|
1394
1348
|
input_folder (list): list of folders to zip
|
|
1395
1349
|
max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
|
|
@@ -1406,7 +1360,7 @@ def parallel_zip_folders(input_folders, max_workers=16, use_threads=True,
|
|
|
1406
1360
|
pool = ThreadPool(n_workers)
|
|
1407
1361
|
else:
|
|
1408
1362
|
pool = Pool(n_workers)
|
|
1409
|
-
|
|
1363
|
+
|
|
1410
1364
|
with tqdm(total=len(input_folders)) as pbar:
|
|
1411
1365
|
for i,_ in enumerate(pool.imap_unordered(
|
|
1412
1366
|
partial(zip_folder,overwrite=overwrite,
|
|
@@ -1419,9 +1373,9 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
|
|
|
1419
1373
|
compresslevel=9,overwrite=False,required_token=None,verbose=False,
|
|
1420
1374
|
exclude_zip=True):
|
|
1421
1375
|
"""
|
|
1422
|
-
Zips each file in [folder_name] to its own zipfile (filename.zip), optionally recursing. To
|
|
1376
|
+
Zips each file in [folder_name] to its own zipfile (filename.zip), optionally recursing. To
|
|
1423
1377
|
zip a whole folder into a single zipfile, use zip_folder().
|
|
1424
|
-
|
|
1378
|
+
|
|
1425
1379
|
Args:
|
|
1426
1380
|
folder_name (str): the folder within which we should zip files
|
|
1427
1381
|
recursive (bool, optional): whether to recurse within [folder_name]
|
|
@@ -1432,19 +1386,19 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
|
|
|
1432
1386
|
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1433
1387
|
required_token (str, optional): only zip files whose names contain this string
|
|
1434
1388
|
verbose (bool, optional): enable additional debug console output
|
|
1435
|
-
exclude_zip (bool, optional): skip files ending in .zip
|
|
1389
|
+
exclude_zip (bool, optional): skip files ending in .zip
|
|
1436
1390
|
"""
|
|
1437
|
-
|
|
1391
|
+
|
|
1438
1392
|
assert os.path.isdir(folder_name), '{} is not a folder'.format(folder_name)
|
|
1439
|
-
|
|
1393
|
+
|
|
1440
1394
|
input_files = recursive_file_list(folder_name,recursive=recursive,return_relative_paths=False)
|
|
1441
|
-
|
|
1395
|
+
|
|
1442
1396
|
if required_token is not None:
|
|
1443
1397
|
input_files = [fn for fn in input_files if required_token in fn]
|
|
1444
|
-
|
|
1398
|
+
|
|
1445
1399
|
if exclude_zip:
|
|
1446
1400
|
input_files = [fn for fn in input_files if (not fn.endswith('.zip'))]
|
|
1447
|
-
|
|
1401
|
+
|
|
1448
1402
|
parallel_zip_files(input_files=input_files,max_workers=max_workers,
|
|
1449
1403
|
use_threads=use_threads,compresslevel=compresslevel,
|
|
1450
1404
|
overwrite=overwrite,verbose=verbose)
|
|
@@ -1454,16 +1408,16 @@ def unzip_file(input_file, output_folder=None):
|
|
|
1454
1408
|
"""
|
|
1455
1409
|
Unzips a zipfile to the specified output folder, defaulting to the same location as
|
|
1456
1410
|
the input file.
|
|
1457
|
-
|
|
1411
|
+
|
|
1458
1412
|
Args:
|
|
1459
1413
|
input_file (str): zipfile to unzip
|
|
1460
1414
|
output_folder (str, optional): folder to which we should unzip [input_file], defaults
|
|
1461
1415
|
to unzipping to the folder where [input_file] lives
|
|
1462
1416
|
"""
|
|
1463
|
-
|
|
1417
|
+
|
|
1464
1418
|
if output_folder is None:
|
|
1465
1419
|
output_folder = os.path.dirname(input_file)
|
|
1466
|
-
|
|
1420
|
+
|
|
1467
1421
|
with zipfile.ZipFile(input_file, 'r') as zf:
|
|
1468
1422
|
zf.extractall(output_folder)
|
|
1469
1423
|
|
|
@@ -1473,31 +1427,31 @@ def unzip_file(input_file, output_folder=None):
|
|
|
1473
1427
|
def compute_file_hash(file_path, algorithm='sha256', allow_failures=True):
|
|
1474
1428
|
"""
|
|
1475
1429
|
Compute the hash of a file.
|
|
1476
|
-
|
|
1430
|
+
|
|
1477
1431
|
Adapted from:
|
|
1478
|
-
|
|
1432
|
+
|
|
1479
1433
|
https://www.geeksforgeeks.org/python-program-to-find-hash-of-file/
|
|
1480
|
-
|
|
1434
|
+
|
|
1481
1435
|
Args:
|
|
1482
1436
|
file_path (str): the file to hash
|
|
1483
1437
|
algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
|
|
1484
|
-
|
|
1438
|
+
|
|
1485
1439
|
Returns:
|
|
1486
1440
|
str: the hash value for this file
|
|
1487
1441
|
"""
|
|
1488
|
-
|
|
1442
|
+
|
|
1489
1443
|
try:
|
|
1490
|
-
|
|
1444
|
+
|
|
1491
1445
|
hash_func = hashlib.new(algorithm)
|
|
1492
|
-
|
|
1446
|
+
|
|
1493
1447
|
with open(file_path, 'rb') as file:
|
|
1494
1448
|
while chunk := file.read(8192): # Read the file in chunks of 8192 bytes
|
|
1495
1449
|
hash_func.update(chunk)
|
|
1496
|
-
|
|
1450
|
+
|
|
1497
1451
|
return str(hash_func.hexdigest())
|
|
1498
|
-
|
|
1452
|
+
|
|
1499
1453
|
except Exception:
|
|
1500
|
-
|
|
1454
|
+
|
|
1501
1455
|
if allow_failures:
|
|
1502
1456
|
return None
|
|
1503
1457
|
else:
|
|
@@ -1507,14 +1461,14 @@ def compute_file_hash(file_path, algorithm='sha256', allow_failures=True):
|
|
|
1507
1461
|
|
|
1508
1462
|
|
|
1509
1463
|
def parallel_compute_file_hashes(filenames,
|
|
1510
|
-
max_workers=16,
|
|
1511
|
-
use_threads=True,
|
|
1464
|
+
max_workers=16,
|
|
1465
|
+
use_threads=True,
|
|
1512
1466
|
recursive=True,
|
|
1513
1467
|
algorithm='sha256',
|
|
1514
1468
|
verbose=False):
|
|
1515
1469
|
"""
|
|
1516
1470
|
Compute file hashes for a list or folder of images.
|
|
1517
|
-
|
|
1471
|
+
|
|
1518
1472
|
Args:
|
|
1519
1473
|
filenames (list or str): a list of filenames or a folder
|
|
1520
1474
|
max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
|
|
@@ -1524,8 +1478,8 @@ def parallel_compute_file_hashes(filenames,
|
|
|
1524
1478
|
algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
|
|
1525
1479
|
recursive (bool, optional): if [filenames] is a folder, whether to enumerate recursively.
|
|
1526
1480
|
Ignored if [filenames] is a list.
|
|
1527
|
-
verbose (bool, optional): enable additional debug output
|
|
1528
|
-
|
|
1481
|
+
verbose (bool, optional): enable additional debug output
|
|
1482
|
+
|
|
1529
1483
|
Returns:
|
|
1530
1484
|
dict: a dict mapping filenames to hash values; values will be None for files that fail
|
|
1531
1485
|
to load.
|
|
@@ -1535,35 +1489,1140 @@ def parallel_compute_file_hashes(filenames,
|
|
|
1535
1489
|
if verbose:
|
|
1536
1490
|
print('Enumerating files in {}'.format(filenames))
|
|
1537
1491
|
filenames = recursive_file_list(filenames,recursive=recursive,return_relative_paths=False)
|
|
1538
|
-
|
|
1492
|
+
|
|
1539
1493
|
n_workers = min(max_workers,len(filenames))
|
|
1540
|
-
|
|
1494
|
+
|
|
1541
1495
|
if verbose:
|
|
1542
1496
|
print('Computing hashes for {} files on {} workers'.format(len(filenames),n_workers))
|
|
1543
|
-
|
|
1497
|
+
|
|
1544
1498
|
if n_workers <= 1:
|
|
1545
|
-
|
|
1499
|
+
|
|
1546
1500
|
results = []
|
|
1547
1501
|
for filename in filenames:
|
|
1548
1502
|
results.append(compute_file_hash(filename,algorithm=algorithm,allow_failures=True))
|
|
1549
|
-
|
|
1503
|
+
|
|
1550
1504
|
else:
|
|
1551
|
-
|
|
1505
|
+
|
|
1552
1506
|
if use_threads:
|
|
1553
1507
|
pool = ThreadPool(n_workers)
|
|
1554
1508
|
else:
|
|
1555
1509
|
pool = Pool(n_workers)
|
|
1556
|
-
|
|
1510
|
+
|
|
1557
1511
|
results = list(tqdm(pool.imap(
|
|
1558
1512
|
partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
|
|
1559
1513
|
filenames), total=len(filenames)))
|
|
1560
|
-
|
|
1514
|
+
|
|
1561
1515
|
assert len(filenames) == len(results), 'Internal error in parallel_compute_file_hashes'
|
|
1562
|
-
|
|
1516
|
+
|
|
1563
1517
|
to_return = {}
|
|
1564
1518
|
for i_file,filename in enumerate(filenames):
|
|
1565
1519
|
to_return[filename] = results[i_file]
|
|
1566
|
-
|
|
1520
|
+
|
|
1567
1521
|
return to_return
|
|
1568
1522
|
|
|
1569
1523
|
# ...def parallel_compute_file_hashes(...)
|
|
1524
|
+
|
|
1525
|
+
|
|
1526
|
+
#%% Tests
|
|
1527
|
+
|
|
1528
|
+
class TestPathUtils:
|
|
1529
|
+
"""
|
|
1530
|
+
Tests for path_utils.py
|
|
1531
|
+
"""
|
|
1532
|
+
|
|
1533
|
+
def set_up(self):
|
|
1534
|
+
"""
|
|
1535
|
+
Create a temporary directory for testing.
|
|
1536
|
+
"""
|
|
1537
|
+
|
|
1538
|
+
self.test_dir = make_test_folder(subfolder='megadetector/path_utils_tests')
|
|
1539
|
+
os.makedirs(self.test_dir, exist_ok=True)
|
|
1540
|
+
|
|
1541
|
+
|
|
1542
|
+
def tear_down(self):
|
|
1543
|
+
"""
|
|
1544
|
+
Remove the temporary directory after tests.
|
|
1545
|
+
"""
|
|
1546
|
+
|
|
1547
|
+
if os.path.exists(self.test_dir):
|
|
1548
|
+
shutil.rmtree(self.test_dir)
|
|
1549
|
+
|
|
1550
|
+
|
|
1551
|
+
def test_is_image_file(self):
|
|
1552
|
+
"""
|
|
1553
|
+
Test the is_image_file function.
|
|
1554
|
+
"""
|
|
1555
|
+
|
|
1556
|
+
assert is_image_file('test.jpg')
|
|
1557
|
+
assert is_image_file('test.jpeg')
|
|
1558
|
+
assert is_image_file('test.png')
|
|
1559
|
+
assert is_image_file('test.gif')
|
|
1560
|
+
assert is_image_file('test.bmp')
|
|
1561
|
+
assert is_image_file('test.tiff')
|
|
1562
|
+
assert is_image_file('test.TIF')
|
|
1563
|
+
assert not is_image_file('test.txt')
|
|
1564
|
+
assert not is_image_file('test.doc')
|
|
1565
|
+
assert is_image_file('path/to/image.JPG')
|
|
1566
|
+
assert not is_image_file('image')
|
|
1567
|
+
assert is_image_file('test.custom', img_extensions=['.custom'])
|
|
1568
|
+
assert not is_image_file('test.jpg', img_extensions=['.custom'])
|
|
1569
|
+
|
|
1570
|
+
|
|
1571
|
+
def test_find_image_strings(self):
|
|
1572
|
+
"""
|
|
1573
|
+
Test the find_image_strings function.
|
|
1574
|
+
"""
|
|
1575
|
+
|
|
1576
|
+
strings = ['a.jpg', 'b.txt', 'c.PNG', 'd.gif', 'e.jpeg', 'f.doc']
|
|
1577
|
+
expected = ['a.jpg', 'c.PNG', 'd.gif', 'e.jpeg']
|
|
1578
|
+
assert sorted(find_image_strings(strings)) == sorted(expected)
|
|
1579
|
+
assert find_image_strings([]) == []
|
|
1580
|
+
assert find_image_strings(['no_image.txt', 'another.doc']) == []
|
|
1581
|
+
|
|
1582
|
+
|
|
1583
|
+
def test_find_images(self):
|
|
1584
|
+
"""
|
|
1585
|
+
Test the find_images function.
|
|
1586
|
+
"""
|
|
1587
|
+
|
|
1588
|
+
# Create some dummy files
|
|
1589
|
+
img1_abs = os.path.join(self.test_dir, 'img1.jpg')
|
|
1590
|
+
img2_abs = os.path.join(self.test_dir, 'img2.PNG')
|
|
1591
|
+
txt1_abs = os.path.join(self.test_dir, 'text1.txt')
|
|
1592
|
+
open(img1_abs, 'w').close()
|
|
1593
|
+
open(img2_abs, 'w').close()
|
|
1594
|
+
open(txt1_abs, 'w').close()
|
|
1595
|
+
|
|
1596
|
+
subdir = os.path.join(self.test_dir, 'subdir')
|
|
1597
|
+
os.makedirs(subdir, exist_ok=True)
|
|
1598
|
+
img3_abs = os.path.join(subdir, 'img3.jpeg')
|
|
1599
|
+
txt2_abs = os.path.join(subdir, 'text2.txt')
|
|
1600
|
+
open(img3_abs, 'w').close()
|
|
1601
|
+
open(txt2_abs, 'w').close()
|
|
1602
|
+
|
|
1603
|
+
# Test non-recursive
|
|
1604
|
+
expected_non_recursive_abs = sorted([img1_abs.replace('\\', '/'), img2_abs.replace('\\', '/')])
|
|
1605
|
+
found_non_recursive_abs = find_images(self.test_dir, recursive=False, return_relative_paths=False)
|
|
1606
|
+
assert sorted(found_non_recursive_abs) == expected_non_recursive_abs
|
|
1607
|
+
|
|
1608
|
+
# Test non-recursive, relative paths
|
|
1609
|
+
expected_non_recursive_rel = sorted(['img1.jpg', 'img2.PNG'])
|
|
1610
|
+
found_non_recursive_rel = find_images(self.test_dir, recursive=False, return_relative_paths=True)
|
|
1611
|
+
assert sorted(found_non_recursive_rel) == expected_non_recursive_rel
|
|
1612
|
+
|
|
1613
|
+
# Test recursive
|
|
1614
|
+
expected_recursive_abs = sorted([
|
|
1615
|
+
img1_abs.replace('\\', '/'),
|
|
1616
|
+
img2_abs.replace('\\', '/'),
|
|
1617
|
+
img3_abs.replace('\\', '/')
|
|
1618
|
+
])
|
|
1619
|
+
found_recursive_abs = find_images(self.test_dir, recursive=True, return_relative_paths=False)
|
|
1620
|
+
assert sorted(found_recursive_abs) == expected_recursive_abs
|
|
1621
|
+
|
|
1622
|
+
# Test recursive, relative paths
|
|
1623
|
+
expected_recursive_rel = sorted([
|
|
1624
|
+
'img1.jpg',
|
|
1625
|
+
'img2.PNG',
|
|
1626
|
+
os.path.join('subdir', 'img3.jpeg').replace('\\', '/')
|
|
1627
|
+
])
|
|
1628
|
+
found_recursive_rel = find_images(self.test_dir, recursive=True, return_relative_paths=True)
|
|
1629
|
+
assert sorted(found_recursive_rel) == expected_recursive_rel
|
|
1630
|
+
|
|
1631
|
+
# Test with an empty directory
|
|
1632
|
+
empty_dir = os.path.join(self.test_dir, 'empty_dir')
|
|
1633
|
+
os.makedirs(empty_dir, exist_ok=True)
|
|
1634
|
+
assert find_images(empty_dir, recursive=True) == []
|
|
1635
|
+
|
|
1636
|
+
# Test with a directory that doesn't exist (should assert)
|
|
1637
|
+
try:
|
|
1638
|
+
find_images(os.path.join(self.test_dir, 'non_existent_dir'))
|
|
1639
|
+
raise AssertionError("AssertionError not raised for non_existent_dir")
|
|
1640
|
+
except AssertionError:
|
|
1641
|
+
pass
|
|
1642
|
+
|
|
1643
|
+
|
|
1644
|
+
def test_recursive_file_list_and_file_list(self):
|
|
1645
|
+
"""
|
|
1646
|
+
Test the recursive_file_list and file_list functions.
|
|
1647
|
+
"""
|
|
1648
|
+
|
|
1649
|
+
# Setup directory structure
|
|
1650
|
+
# test_dir/
|
|
1651
|
+
# file1.txt
|
|
1652
|
+
# file2.jpg
|
|
1653
|
+
# subdir1/
|
|
1654
|
+
# file3.txt
|
|
1655
|
+
# subsubdir/
|
|
1656
|
+
# file4.png
|
|
1657
|
+
# subdir2/
|
|
1658
|
+
# file5.doc
|
|
1659
|
+
|
|
1660
|
+
list_dir = os.path.join(self.test_dir,'recursive_list')
|
|
1661
|
+
|
|
1662
|
+
f1 = os.path.join(list_dir, 'file1.txt')
|
|
1663
|
+
f2 = os.path.join(list_dir, 'file2.jpg')
|
|
1664
|
+
subdir1 = os.path.join(list_dir, 'subdir1')
|
|
1665
|
+
os.makedirs(subdir1, exist_ok=True)
|
|
1666
|
+
f3 = os.path.join(subdir1, 'file3.txt')
|
|
1667
|
+
subsubdir = os.path.join(subdir1, 'subsubdir')
|
|
1668
|
+
os.makedirs(subsubdir, exist_ok=True)
|
|
1669
|
+
f4 = os.path.join(subsubdir, 'file4.png')
|
|
1670
|
+
subdir2 = os.path.join(list_dir, 'subdir2')
|
|
1671
|
+
os.makedirs(subdir2, exist_ok=True)
|
|
1672
|
+
f5 = os.path.join(subdir2, 'file5.doc')
|
|
1673
|
+
|
|
1674
|
+
for filepath in [f1, f2, f3, f4, f5]:
|
|
1675
|
+
with open(filepath, 'w') as f:
|
|
1676
|
+
f.write('test')
|
|
1677
|
+
|
|
1678
|
+
# Test recursive_file_list (recursive=True by default)
|
|
1679
|
+
expected_all_files_abs = sorted([
|
|
1680
|
+
f1.replace('\\', '/'), f2.replace('\\', '/'), f3.replace('\\', '/'),
|
|
1681
|
+
f4.replace('\\', '/'), f5.replace('\\', '/')
|
|
1682
|
+
])
|
|
1683
|
+
all_files_abs = recursive_file_list(list_dir, convert_slashes=True,
|
|
1684
|
+
return_relative_paths=False)
|
|
1685
|
+
assert sorted(all_files_abs) == expected_all_files_abs
|
|
1686
|
+
|
|
1687
|
+
# Test recursive_file_list with relative paths
|
|
1688
|
+
expected_all_files_rel = sorted([
|
|
1689
|
+
'file1.txt', 'file2.jpg',
|
|
1690
|
+
os.path.join('subdir1', 'file3.txt').replace('\\', '/'),
|
|
1691
|
+
os.path.join('subdir1', 'subsubdir', 'file4.png').replace('\\', '/'),
|
|
1692
|
+
os.path.join('subdir2', 'file5.doc').replace('\\', '/')
|
|
1693
|
+
])
|
|
1694
|
+
all_files_rel = recursive_file_list(list_dir, convert_slashes=True,
|
|
1695
|
+
return_relative_paths=True)
|
|
1696
|
+
assert sorted(all_files_rel) == expected_all_files_rel
|
|
1697
|
+
|
|
1698
|
+
# Test file_list (non-recursive by default via wrapper)
|
|
1699
|
+
expected_top_level_files_abs = sorted([f1.replace('\\', '/'), f2.replace('\\', '/')])
|
|
1700
|
+
top_level_files_abs = file_list(list_dir, convert_slashes=True,
|
|
1701
|
+
return_relative_paths=False, recursive=False)
|
|
1702
|
+
assert sorted(top_level_files_abs) == expected_top_level_files_abs
|
|
1703
|
+
|
|
1704
|
+
# Test file_list (recursive explicitly) - should be same as recursive_file_list
|
|
1705
|
+
recursive_via_file_list = file_list(list_dir, convert_slashes=True,
|
|
1706
|
+
return_relative_paths=False, recursive=True)
|
|
1707
|
+
assert sorted(recursive_via_file_list) == expected_all_files_abs
|
|
1708
|
+
|
|
1709
|
+
# Test with convert_slashes=False (use os.sep)
|
|
1710
|
+
#
|
|
1711
|
+
# Note: This test might be tricky if os.sep is '/', as no replacement happens. We'll check
|
|
1712
|
+
# that backslashes remain on Windows.
|
|
1713
|
+
if os.sep == '\\':
|
|
1714
|
+
f1_raw = os.path.join(list_dir, 'file1.txt')
|
|
1715
|
+
# Only one file for simplicity
|
|
1716
|
+
files_no_slash_conversion = file_list(list_dir, convert_slashes=False, recursive=False)
|
|
1717
|
+
assert any(f1_raw in s for s in files_no_slash_conversion)
|
|
1718
|
+
|
|
1719
|
+
# Test with an empty directory
|
|
1720
|
+
empty_dir = os.path.join(list_dir, "empty_dir_for_files")
|
|
1721
|
+
os.makedirs(empty_dir, exist_ok=True)
|
|
1722
|
+
assert recursive_file_list(empty_dir) == []
|
|
1723
|
+
assert file_list(empty_dir, recursive=False) == []
|
|
1724
|
+
|
|
1725
|
+
# Test with a non-existent directory
|
|
1726
|
+
try:
|
|
1727
|
+
recursive_file_list(os.path.join(list_dir, "non_existent_dir"))
|
|
1728
|
+
raise AssertionError("AssertionError not raised for non_existent_dir in recursive_file_list")
|
|
1729
|
+
except AssertionError:
|
|
1730
|
+
pass
|
|
1731
|
+
|
|
1732
|
+
|
|
1733
|
+
def test_folder_list(self):
|
|
1734
|
+
"""
|
|
1735
|
+
Test the folder_list function.
|
|
1736
|
+
"""
|
|
1737
|
+
|
|
1738
|
+
# Setup directory structure
|
|
1739
|
+
# test_dir/
|
|
1740
|
+
# subdir1/
|
|
1741
|
+
# subsubdir1/
|
|
1742
|
+
# subdir2/
|
|
1743
|
+
# file1.txt (should be ignored)
|
|
1744
|
+
|
|
1745
|
+
folder_list_dir = os.path.join(self.test_dir,'folder_list')
|
|
1746
|
+
|
|
1747
|
+
subdir1 = os.path.join(folder_list_dir, 'subdir1')
|
|
1748
|
+
subsubdir1 = os.path.join(subdir1, 'subsubdir1')
|
|
1749
|
+
subdir2 = os.path.join(folder_list_dir, 'subdir2')
|
|
1750
|
+
os.makedirs(subdir1, exist_ok=True)
|
|
1751
|
+
os.makedirs(subsubdir1, exist_ok=True)
|
|
1752
|
+
os.makedirs(subdir2, exist_ok=True)
|
|
1753
|
+
with open(os.path.join(folder_list_dir, 'file1.txt'), 'w') as f:
|
|
1754
|
+
f.write('test')
|
|
1755
|
+
|
|
1756
|
+
# Test non-recursive
|
|
1757
|
+
expected_folders_non_recursive_abs = sorted([
|
|
1758
|
+
subdir1.replace('\\', '/'), subdir2.replace('\\', '/')
|
|
1759
|
+
])
|
|
1760
|
+
folders_non_recursive_abs = folder_list(folder_list_dir, recursive=False,
|
|
1761
|
+
return_relative_paths=False)
|
|
1762
|
+
assert sorted(folders_non_recursive_abs) == expected_folders_non_recursive_abs
|
|
1763
|
+
|
|
1764
|
+
# Test non-recursive, relative paths
|
|
1765
|
+
expected_folders_non_recursive_rel = sorted(['subdir1', 'subdir2'])
|
|
1766
|
+
folders_non_recursive_rel = folder_list(folder_list_dir, recursive=False,
|
|
1767
|
+
return_relative_paths=True)
|
|
1768
|
+
assert sorted(folders_non_recursive_rel) == expected_folders_non_recursive_rel
|
|
1769
|
+
|
|
1770
|
+
# Test recursive
|
|
1771
|
+
expected_folders_recursive_abs = sorted([
|
|
1772
|
+
subdir1.replace('\\', '/'),
|
|
1773
|
+
subsubdir1.replace('\\', '/'),
|
|
1774
|
+
subdir2.replace('\\', '/')
|
|
1775
|
+
])
|
|
1776
|
+
folders_recursive_abs = folder_list(folder_list_dir, recursive=True,
|
|
1777
|
+
return_relative_paths=False)
|
|
1778
|
+
assert sorted(folders_recursive_abs) == expected_folders_recursive_abs
|
|
1779
|
+
|
|
1780
|
+
# Test recursive, relative paths
|
|
1781
|
+
expected_folders_recursive_rel = sorted([
|
|
1782
|
+
'subdir1',
|
|
1783
|
+
os.path.join('subdir1', 'subsubdir1').replace('\\', '/'),
|
|
1784
|
+
'subdir2'
|
|
1785
|
+
])
|
|
1786
|
+
folders_recursive_rel = folder_list(folder_list_dir, recursive=True,
|
|
1787
|
+
return_relative_paths=True)
|
|
1788
|
+
assert sorted(folders_recursive_rel) == expected_folders_recursive_rel
|
|
1789
|
+
|
|
1790
|
+
# Test with an empty directory (except for the file)
|
|
1791
|
+
empty_dir_for_folders = os.path.join(folder_list_dir, "empty_for_folders")
|
|
1792
|
+
os.makedirs(empty_dir_for_folders, exist_ok=True)
|
|
1793
|
+
with open(os.path.join(empty_dir_for_folders, 'temp.txt'), 'w') as f: f.write('t')
|
|
1794
|
+
assert folder_list(empty_dir_for_folders, recursive=True) == []
|
|
1795
|
+
assert folder_list(empty_dir_for_folders, recursive=False) == []
|
|
1796
|
+
|
|
1797
|
+
# Test with a non-existent directory
|
|
1798
|
+
try:
|
|
1799
|
+
folder_list(os.path.join(self.test_dir, "non_existent_dir"))
|
|
1800
|
+
raise AssertionError("AssertionError not raised for non_existent_dir in folder_list")
|
|
1801
|
+
except AssertionError:
|
|
1802
|
+
pass
|
|
1803
|
+
|
|
1804
|
+
|
|
1805
|
+
def test_folder_summary(self):
|
|
1806
|
+
"""
|
|
1807
|
+
Test the folder_summary function.
|
|
1808
|
+
"""
|
|
1809
|
+
|
|
1810
|
+
# test_dir/
|
|
1811
|
+
# file1.txt
|
|
1812
|
+
# img1.jpg
|
|
1813
|
+
# subdir/
|
|
1814
|
+
# file2.txt
|
|
1815
|
+
# img2.png
|
|
1816
|
+
# img3.png
|
|
1817
|
+
|
|
1818
|
+
fodler_summary_dir = os.path.join(self.test_dir,'folder_summary')
|
|
1819
|
+
|
|
1820
|
+
f1 = os.path.join(fodler_summary_dir, 'file1.txt')
|
|
1821
|
+
img1 = os.path.join(fodler_summary_dir, 'img1.jpg')
|
|
1822
|
+
subdir = os.path.join(fodler_summary_dir, 'subdir')
|
|
1823
|
+
os.makedirs(subdir, exist_ok=True)
|
|
1824
|
+
f2 = os.path.join(subdir, 'file2.txt')
|
|
1825
|
+
img2 = os.path.join(subdir, 'img2.png')
|
|
1826
|
+
img3 = os.path.join(subdir, 'img3.png')
|
|
1827
|
+
|
|
1828
|
+
for filepath in [f1, img1, f2, img2, img3]:
|
|
1829
|
+
with open(filepath, 'w') as f:
|
|
1830
|
+
f.write('test')
|
|
1831
|
+
|
|
1832
|
+
summary = folder_summary(fodler_summary_dir, print_summary=False)
|
|
1833
|
+
|
|
1834
|
+
assert summary['n_files'] == 5
|
|
1835
|
+
assert summary['n_folders'] == 1 # 'subdir'
|
|
1836
|
+
assert summary['extension_to_count']['.txt'] == 2
|
|
1837
|
+
assert summary['extension_to_count']['.jpg'] == 1
|
|
1838
|
+
assert summary['extension_to_count']['.png'] == 2
|
|
1839
|
+
|
|
1840
|
+
# Check order (sorted by value, desc)
|
|
1841
|
+
#
|
|
1842
|
+
# The specific order of keys with the same counts can vary based on file system list
|
|
1843
|
+
# order. We'll check that the counts are correct and the number of unique extensions is
|
|
1844
|
+
# right.
|
|
1845
|
+
assert len(summary['extension_to_count']) == 3
|
|
1846
|
+
|
|
1847
|
+
|
|
1848
|
+
empty_dir = os.path.join(fodler_summary_dir, "empty_summary_dir")
|
|
1849
|
+
os.makedirs(empty_dir, exist_ok=True)
|
|
1850
|
+
empty_summary = folder_summary(empty_dir, print_summary=False)
|
|
1851
|
+
assert empty_summary['n_files'] == 0
|
|
1852
|
+
assert empty_summary['n_folders'] == 0
|
|
1853
|
+
assert empty_summary['extension_to_count'] == {}
|
|
1854
|
+
|
|
1855
|
+
|
|
1856
|
+
def test_fileparts(self):
|
|
1857
|
+
"""
|
|
1858
|
+
Test the fileparts function.
|
|
1859
|
+
"""
|
|
1860
|
+
|
|
1861
|
+
assert fileparts('file') == ('', 'file', '')
|
|
1862
|
+
assert fileparts('file.txt') == ('', 'file', '.txt')
|
|
1863
|
+
assert fileparts(r'c:/dir/file.jpg') == ('c:/dir', 'file', '.jpg')
|
|
1864
|
+
assert fileparts('/dir/subdir/file.jpg') == ('/dir/subdir', 'file', '.jpg')
|
|
1865
|
+
assert fileparts(r'c:\dir\file') == (r'c:\dir', 'file', '')
|
|
1866
|
+
assert fileparts(r'c:\dir\file.tar.gz') == (r'c:\dir', 'file.tar', '.gz')
|
|
1867
|
+
assert fileparts('.bashrc') == ('', '.bashrc', '') # Hidden file, no extension
|
|
1868
|
+
assert fileparts('nodir/.bashrc') == ('nodir', '.bashrc', '')
|
|
1869
|
+
assert fileparts('a/b/c.d.e') == ('a/b', 'c.d', '.e')
|
|
1870
|
+
|
|
1871
|
+
|
|
1872
|
+
def test_insert_before_extension(self):
|
|
1873
|
+
"""
|
|
1874
|
+
Test the insert_before_extension function.
|
|
1875
|
+
"""
|
|
1876
|
+
|
|
1877
|
+
assert insert_before_extension('file.ext', 'inserted') == 'file.inserted.ext'
|
|
1878
|
+
assert insert_before_extension('file', 'inserted') == 'file.inserted'
|
|
1879
|
+
assert insert_before_extension('path/to/file.ext', 'tag') == 'path/to/file.tag.ext'
|
|
1880
|
+
assert insert_before_extension('path/to/file', 'tag') == 'path/to/file.tag'
|
|
1881
|
+
assert insert_before_extension('file.tar.gz', 'new') == 'file.tar.new.gz'
|
|
1882
|
+
|
|
1883
|
+
# Test with custom separator
|
|
1884
|
+
assert insert_before_extension('file.ext', 'inserted', separator='_') == 'file_inserted.ext'
|
|
1885
|
+
|
|
1886
|
+
# Test with s=None (timestamp) - check format roughly
|
|
1887
|
+
fname_with_ts = insert_before_extension('file.ext', None)
|
|
1888
|
+
parts = fname_with_ts.split('.')
|
|
1889
|
+
# file.YYYY.MM.DD.HH.MM.SS.ext
|
|
1890
|
+
assert len(parts) >= 8 # file, Y, M, D, H, M, S, ext
|
|
1891
|
+
assert parts[0] == 'file'
|
|
1892
|
+
assert parts[-1] == 'ext'
|
|
1893
|
+
assert all(p.isdigit() for p in parts[1:-1])
|
|
1894
|
+
|
|
1895
|
+
fname_no_ext_ts = insert_before_extension('file', '') # s is empty string, should also use timestamp
|
|
1896
|
+
parts_no_ext = fname_no_ext_ts.split('.')
|
|
1897
|
+
assert len(parts_no_ext) >= 7 # file, Y, M, D, H, M, S
|
|
1898
|
+
assert parts_no_ext[0] == 'file'
|
|
1899
|
+
assert all(p.isdigit() for p in parts_no_ext[1:])
|
|
1900
|
+
|
|
1901
|
+
|
|
1902
|
+
def test_split_path(self):
|
|
1903
|
+
"""
|
|
1904
|
+
Test the split_path function.
|
|
1905
|
+
"""
|
|
1906
|
+
|
|
1907
|
+
if os.name == 'nt':
|
|
1908
|
+
assert split_path(r'c:\dir\subdir\file.txt') == ['c:\\', 'dir', 'subdir', 'file.txt']
|
|
1909
|
+
assert split_path('c:\\') == ['c:\\']
|
|
1910
|
+
# Test with mixed slashes, ntpath.split handles them
|
|
1911
|
+
assert split_path(r'c:/dir/subdir/file.txt') == ['c:/', 'dir', 'subdir', 'file.txt']
|
|
1912
|
+
else: # POSIX
|
|
1913
|
+
assert split_path('/dir/subdir/file.jpg') == ['/', 'dir', 'subdir', 'file.jpg']
|
|
1914
|
+
assert split_path('/') == ['/']
|
|
1915
|
+
|
|
1916
|
+
assert split_path('dir/file.txt') == ['dir', 'file.txt']
|
|
1917
|
+
assert split_path('file.txt') == ['file.txt']
|
|
1918
|
+
assert split_path('') == ''
|
|
1919
|
+
assert split_path('.') == ['.']
|
|
1920
|
+
assert split_path('..') == ['..']
|
|
1921
|
+
assert split_path('../a/b') == ['..', 'a', 'b']
|
|
1922
|
+
|
|
1923
|
+
|
|
1924
|
+
def test_path_is_abs(self):
|
|
1925
|
+
"""
|
|
1926
|
+
Test the path_is_abs function.
|
|
1927
|
+
"""
|
|
1928
|
+
|
|
1929
|
+
assert path_is_abs('/absolute/path')
|
|
1930
|
+
assert path_is_abs('c:/absolute/path')
|
|
1931
|
+
assert path_is_abs('C:\\absolute\\path')
|
|
1932
|
+
assert path_is_abs('\\\\server\\share\\path') # UNC path
|
|
1933
|
+
assert path_is_abs('c:file_without_slash_after_drive')
|
|
1934
|
+
|
|
1935
|
+
assert not path_is_abs('relative/path')
|
|
1936
|
+
assert not path_is_abs('file.txt')
|
|
1937
|
+
assert not path_is_abs('../relative')
|
|
1938
|
+
assert not path_is_abs('')
|
|
1939
|
+
|
|
1940
|
+
|
|
1941
|
+
|
|
1942
|
+
def test_safe_create_link_unix(self):
|
|
1943
|
+
"""
|
|
1944
|
+
Test the safe_create_link function on Unix-like systems.
|
|
1945
|
+
"""
|
|
1946
|
+
|
|
1947
|
+
if os.name == 'nt':
|
|
1948
|
+
# print("Skipping test_safe_create_link_unix on Windows.")
|
|
1949
|
+
return
|
|
1950
|
+
|
|
1951
|
+
source_file_path = os.path.join(self.test_dir, 'source.txt')
|
|
1952
|
+
link_path = os.path.join(self.test_dir, 'link.txt')
|
|
1953
|
+
other_source_path = os.path.join(self.test_dir, 'other_source.txt')
|
|
1954
|
+
|
|
1955
|
+
with open(source_file_path, 'w') as f:
|
|
1956
|
+
f.write('source data')
|
|
1957
|
+
with open(other_source_path, 'w') as f:
|
|
1958
|
+
f.write('other data')
|
|
1959
|
+
|
|
1960
|
+
# Create new link
|
|
1961
|
+
safe_create_link(source_file_path, link_path)
|
|
1962
|
+
assert os.path.islink(link_path)
|
|
1963
|
+
assert os.readlink(link_path) == source_file_path
|
|
1964
|
+
|
|
1965
|
+
# Link already exists and points to the correct source
|
|
1966
|
+
safe_create_link(source_file_path, link_path) # Should do nothing
|
|
1967
|
+
assert os.path.islink(link_path)
|
|
1968
|
+
assert os.readlink(link_path) == source_file_path
|
|
1969
|
+
|
|
1970
|
+
# Link already exists but points to a different source
|
|
1971
|
+
safe_create_link(other_source_path, link_path) # Should remove and re-create
|
|
1972
|
+
assert os.path.islink(link_path)
|
|
1973
|
+
assert os.readlink(link_path) == other_source_path
|
|
1974
|
+
|
|
1975
|
+
# Link_new path exists and is a file (not a link)
|
|
1976
|
+
file_path_conflict = os.path.join(self.test_dir, 'conflict_file.txt')
|
|
1977
|
+
with open(file_path_conflict, 'w') as f:
|
|
1978
|
+
f.write('actual file')
|
|
1979
|
+
try:
|
|
1980
|
+
safe_create_link(source_file_path, file_path_conflict)
|
|
1981
|
+
raise AssertionError("AssertionError not raised for file conflict")
|
|
1982
|
+
except AssertionError:
|
|
1983
|
+
pass
|
|
1984
|
+
os.remove(file_path_conflict)
|
|
1985
|
+
|
|
1986
|
+
# Link_new path exists and is a directory
|
|
1987
|
+
dir_path_conflict = os.path.join(self.test_dir, 'conflict_dir')
|
|
1988
|
+
os.makedirs(dir_path_conflict, exist_ok=True)
|
|
1989
|
+
try:
|
|
1990
|
+
safe_create_link(source_file_path, dir_path_conflict)
|
|
1991
|
+
raise AssertionError("AssertionError not raised for directory conflict")
|
|
1992
|
+
except AssertionError: # islink will be false
|
|
1993
|
+
pass
|
|
1994
|
+
shutil.rmtree(dir_path_conflict)
|
|
1995
|
+
|
|
1996
|
+
|
|
1997
|
+
def test_remove_empty_folders(self):
|
|
1998
|
+
"""
|
|
1999
|
+
Test the remove_empty_folders function.
|
|
2000
|
+
"""
|
|
2001
|
+
|
|
2002
|
+
# test_dir/
|
|
2003
|
+
# empty_top/
|
|
2004
|
+
# empty_mid/
|
|
2005
|
+
# empty_leaf/
|
|
2006
|
+
# mixed_top/
|
|
2007
|
+
# empty_mid_in_mixed/
|
|
2008
|
+
# empty_leaf_in_mixed/
|
|
2009
|
+
# non_empty_mid/
|
|
2010
|
+
# file.txt
|
|
2011
|
+
# non_empty_top/
|
|
2012
|
+
# file_in_top.txt
|
|
2013
|
+
|
|
2014
|
+
empty_top = os.path.join(self.test_dir, 'empty_top')
|
|
2015
|
+
empty_mid = os.path.join(empty_top, 'empty_mid')
|
|
2016
|
+
empty_leaf = os.path.join(empty_mid, 'empty_leaf')
|
|
2017
|
+
os.makedirs(empty_leaf, exist_ok=True)
|
|
2018
|
+
|
|
2019
|
+
mixed_top = os.path.join(self.test_dir, 'mixed_top')
|
|
2020
|
+
empty_mid_in_mixed = os.path.join(mixed_top, 'empty_mid_in_mixed')
|
|
2021
|
+
empty_leaf_in_mixed = os.path.join(empty_mid_in_mixed, 'empty_leaf_in_mixed')
|
|
2022
|
+
os.makedirs(empty_leaf_in_mixed, exist_ok=True)
|
|
2023
|
+
non_empty_mid = os.path.join(mixed_top, 'non_empty_mid')
|
|
2024
|
+
os.makedirs(non_empty_mid, exist_ok=True)
|
|
2025
|
+
with open(os.path.join(non_empty_mid, 'file.txt'), 'w') as f:
|
|
2026
|
+
f.write('data')
|
|
2027
|
+
|
|
2028
|
+
non_empty_top = os.path.join(self.test_dir, 'non_empty_top')
|
|
2029
|
+
os.makedirs(non_empty_top, exist_ok=True)
|
|
2030
|
+
with open(os.path.join(non_empty_top, 'file_in_top.txt'), 'w') as f:
|
|
2031
|
+
f.write('data')
|
|
2032
|
+
|
|
2033
|
+
# Process empty_top - should remove all three
|
|
2034
|
+
remove_empty_folders(empty_top, remove_root=True)
|
|
2035
|
+
assert not os.path.exists(empty_top)
|
|
2036
|
+
assert not os.path.exists(empty_mid)
|
|
2037
|
+
assert not os.path.exists(empty_leaf)
|
|
2038
|
+
|
|
2039
|
+
# Process mixed_top; should remove empty_leaf_in_mixed and empty_mid_in_mixed
|
|
2040
|
+
# but not mixed_top or non_empty_mid.
|
|
2041
|
+
remove_empty_folders(mixed_top, remove_root=True)
|
|
2042
|
+
assert os.path.exists(mixed_top) # mixed_top itself should remain
|
|
2043
|
+
assert not os.path.exists(empty_mid_in_mixed)
|
|
2044
|
+
assert not os.path.exists(empty_leaf_in_mixed)
|
|
2045
|
+
assert os.path.exists(non_empty_mid)
|
|
2046
|
+
assert os.path.exists(os.path.join(non_empty_mid, 'file.txt'))
|
|
2047
|
+
|
|
2048
|
+
# Process non_empty_top; should remove nothing.
|
|
2049
|
+
remove_empty_folders(non_empty_top, remove_root=True)
|
|
2050
|
+
assert os.path.exists(non_empty_top)
|
|
2051
|
+
assert os.path.exists(os.path.join(non_empty_top, 'file_in_top.txt'))
|
|
2052
|
+
|
|
2053
|
+
# Test with a file path (should do nothing and return False)
|
|
2054
|
+
file_path_for_removal = os.path.join(self.test_dir, 'a_file.txt')
|
|
2055
|
+
with open(file_path_for_removal, 'w') as f: f.write('t')
|
|
2056
|
+
assert not remove_empty_folders(file_path_for_removal, remove_root=True)
|
|
2057
|
+
assert os.path.exists(file_path_for_removal)
|
|
2058
|
+
|
|
2059
|
+
# Test with remove_root=False for the top level
|
|
2060
|
+
another_empty_top = os.path.join(self.test_dir, 'another_empty_top')
|
|
2061
|
+
another_empty_mid = os.path.join(another_empty_top, 'another_empty_mid')
|
|
2062
|
+
os.makedirs(another_empty_mid)
|
|
2063
|
+
remove_empty_folders(another_empty_top, remove_root=False)
|
|
2064
|
+
assert os.path.exists(another_empty_top) # Root not removed
|
|
2065
|
+
assert not os.path.exists(another_empty_mid) # Mid removed
|
|
2066
|
+
|
|
2067
|
+
|
|
2068
|
+
def test_path_join(self):
|
|
2069
|
+
"""
|
|
2070
|
+
Test the path_join function.
|
|
2071
|
+
"""
|
|
2072
|
+
|
|
2073
|
+
assert path_join('a', 'b', 'c') == 'a/b/c'
|
|
2074
|
+
assert path_join('a/b', 'c', 'd.txt') == 'a/b/c/d.txt'
|
|
2075
|
+
if os.name == 'nt':
|
|
2076
|
+
# On Windows, os.path.join uses '\', so convert_slashes=True should change it
|
|
2077
|
+
assert path_join('a', 'b', convert_slashes=True) == 'a/b'
|
|
2078
|
+
assert path_join('a', 'b', convert_slashes=False) == 'a\\b'
|
|
2079
|
+
assert path_join('c:\\', 'foo', 'bar', convert_slashes=True) == 'c:/foo/bar'
|
|
2080
|
+
assert path_join('c:\\', 'foo', 'bar', convert_slashes=False) == 'c:\\foo\\bar'
|
|
2081
|
+
else:
|
|
2082
|
+
# On POSIX, os.path.join uses '/', so convert_slashes=False should still be '/'
|
|
2083
|
+
assert path_join('a', 'b', convert_slashes=False) == 'a/b'
|
|
2084
|
+
|
|
2085
|
+
assert path_join('a', '', 'b') == 'a/b' # os.path.join behavior
|
|
2086
|
+
assert path_join('/a', 'b') == '/a/b'
|
|
2087
|
+
assert path_join('a', '/b') == '/b' # '/b' is absolute
|
|
2088
|
+
|
|
2089
|
+
|
|
2090
|
+
def test_filename_cleaning(self):
|
|
2091
|
+
"""
|
|
2092
|
+
Test clean_filename, clean_path, and flatten_path functions.
|
|
2093
|
+
"""
|
|
2094
|
+
|
|
2095
|
+
# clean_filename
|
|
2096
|
+
assert clean_filename("test file.txt") == "test file.txt"
|
|
2097
|
+
assert clean_filename("test*file?.txt", char_limit=10) == "testfile.t"
|
|
2098
|
+
assert clean_filename("TestFile.TXT", force_lower=True) == "testfile.txt"
|
|
2099
|
+
assert clean_filename("file:with<illegal>chars.txt") == "filewithillegalchars.txt"
|
|
2100
|
+
assert clean_filename(" accented_name_éà.txt") == " accented_name_ea.txt"
|
|
2101
|
+
|
|
2102
|
+
# Separators are not allowed by default in clean_filename
|
|
2103
|
+
assert clean_filename("path/to/file.txt") == "pathtofile.txt"
|
|
2104
|
+
|
|
2105
|
+
# clean_path
|
|
2106
|
+
assert clean_path("path/to/file.txt") == "path/to/file.txt" # slashes allowed
|
|
2107
|
+
assert clean_path("path\\to\\file.txt") == "path\\to\\file.txt" # backslashes allowed
|
|
2108
|
+
assert clean_path("path:to:file.txt") == "path:to:file.txt" # colons allowed
|
|
2109
|
+
assert clean_path("path/to<illegal>/file.txt") == "path/toillegal/file.txt"
|
|
2110
|
+
|
|
2111
|
+
# flatten_path
|
|
2112
|
+
assert flatten_path("path/to/file.txt") == "path~to~file.txt"
|
|
2113
|
+
assert flatten_path("path:to:file.txt", separator_char_replacement='_') == "path_to_file.txt"
|
|
2114
|
+
assert flatten_path("path\\to/file:name.txt") == "path~to~file~name.txt"
|
|
2115
|
+
assert flatten_path("path/to<illegal>/file.txt") == "path~toillegal~file.txt"
|
|
2116
|
+
|
|
2117
|
+
|
|
2118
|
+
def test_is_executable(self):
|
|
2119
|
+
"""
|
|
2120
|
+
Test the is_executable function.
|
|
2121
|
+
This is a basic test; comprehensive testing is environment-dependent.
|
|
2122
|
+
"""
|
|
2123
|
+
|
|
2124
|
+
# Hard to test reliably across all systems without knowing what's on PATH.
|
|
2125
|
+
if os.name == 'nt':
|
|
2126
|
+
assert is_executable('cmd.exe')
|
|
2127
|
+
assert not is_executable('non_existent_executable_blah_blah')
|
|
2128
|
+
else:
|
|
2129
|
+
assert is_executable('ls')
|
|
2130
|
+
assert is_executable('sh')
|
|
2131
|
+
assert not is_executable('non_existent_executable_blah_blah')
|
|
2132
|
+
|
|
2133
|
+
|
|
2134
|
+
def test_write_read_list_to_file(self):
|
|
2135
|
+
"""
|
|
2136
|
+
Test write_list_to_file and read_list_from_file functions.
|
|
2137
|
+
"""
|
|
2138
|
+
|
|
2139
|
+
test_list = ["item1", "item2 with space", "item3/with/slash"]
|
|
2140
|
+
|
|
2141
|
+
# Test with .json
|
|
2142
|
+
json_file_path = os.path.join(self.test_dir, "test_list.json")
|
|
2143
|
+
write_list_to_file(json_file_path, test_list)
|
|
2144
|
+
read_list_json = read_list_from_file(json_file_path)
|
|
2145
|
+
assert test_list == read_list_json
|
|
2146
|
+
|
|
2147
|
+
# Test with .txt
|
|
2148
|
+
txt_file_path = os.path.join(self.test_dir, "test_list.txt")
|
|
2149
|
+
write_list_to_file(txt_file_path, test_list)
|
|
2150
|
+
# read_list_from_file is specifically for JSON, so we read .txt manually
|
|
2151
|
+
with open(txt_file_path, 'r') as f:
|
|
2152
|
+
read_list_txt = [line.strip() for line in f.readlines()]
|
|
2153
|
+
assert test_list == read_list_txt
|
|
2154
|
+
|
|
2155
|
+
# Test reading non-existent json
|
|
2156
|
+
try:
|
|
2157
|
+
read_list_from_file(os.path.join(self.test_dir,"non_existent.json"))
|
|
2158
|
+
raise AssertionError("FileNotFoundError not raised")
|
|
2159
|
+
except FileNotFoundError:
|
|
2160
|
+
pass
|
|
2161
|
+
|
|
2162
|
+
# Test reading a non-json file with read_list_from_file (should fail parsing)
|
|
2163
|
+
non_json_path = os.path.join(self.test_dir, "not_a_list.json")
|
|
2164
|
+
with open(non_json_path, 'w') as f: f.write("this is not json")
|
|
2165
|
+
try:
|
|
2166
|
+
read_list_from_file(non_json_path)
|
|
2167
|
+
raise AssertionError("json.JSONDecodeError not raised")
|
|
2168
|
+
except json.JSONDecodeError:
|
|
2169
|
+
pass
|
|
2170
|
+
|
|
2171
|
+
|
|
2172
|
+
def test_parallel_copy_files(self):
|
|
2173
|
+
"""
|
|
2174
|
+
Test the parallel_copy_files function (with max_workers=1 for test simplicity).
|
|
2175
|
+
"""
|
|
2176
|
+
|
|
2177
|
+
source_dir = os.path.join(self.test_dir, "copy_source")
|
|
2178
|
+
target_dir = os.path.join(self.test_dir, "copy_target")
|
|
2179
|
+
os.makedirs(source_dir, exist_ok=True)
|
|
2180
|
+
|
|
2181
|
+
file_mappings = {}
|
|
2182
|
+
source_files_content = {}
|
|
2183
|
+
|
|
2184
|
+
for i in range(3):
|
|
2185
|
+
src_fn = f"file{i}.txt"
|
|
2186
|
+
src_path = os.path.join(source_dir, src_fn)
|
|
2187
|
+
if i == 0:
|
|
2188
|
+
tgt_fn = f"copied_file{i}.txt"
|
|
2189
|
+
tgt_path = os.path.join(target_dir, tgt_fn)
|
|
2190
|
+
else:
|
|
2191
|
+
tgt_fn = f"copied_file{i}_subdir.txt"
|
|
2192
|
+
tgt_path = os.path.join(target_dir, f"sub{i}", tgt_fn)
|
|
2193
|
+
|
|
2194
|
+
content = f"content of file {i}"
|
|
2195
|
+
with open(src_path, 'w') as f:
|
|
2196
|
+
f.write(content)
|
|
2197
|
+
|
|
2198
|
+
file_mappings[src_path] = tgt_path
|
|
2199
|
+
source_files_content[tgt_path] = content
|
|
2200
|
+
|
|
2201
|
+
# Test copy
|
|
2202
|
+
parallel_copy_files(file_mappings, max_workers=1, use_threads=True, overwrite=False)
|
|
2203
|
+
for tgt_path, expected_content in source_files_content.items():
|
|
2204
|
+
assert os.path.exists(tgt_path)
|
|
2205
|
+
with open(tgt_path, 'r') as f:
|
|
2206
|
+
assert f.read() == expected_content
|
|
2207
|
+
|
|
2208
|
+
existing_target_path = list(source_files_content.keys())[0]
|
|
2209
|
+
with open(existing_target_path, 'w') as f:
|
|
2210
|
+
f.write("old content")
|
|
2211
|
+
|
|
2212
|
+
parallel_copy_files(file_mappings, max_workers=1, use_threads=True, overwrite=False)
|
|
2213
|
+
with open(existing_target_path, 'r') as f:
|
|
2214
|
+
assert f.read() == "old content"
|
|
2215
|
+
|
|
2216
|
+
parallel_copy_files(file_mappings, max_workers=1, use_threads=True, overwrite=True)
|
|
2217
|
+
with open(existing_target_path, 'r') as f:
|
|
2218
|
+
assert f.read() == source_files_content[existing_target_path]
|
|
2219
|
+
|
|
2220
|
+
for src_path_orig, tgt_path_orig in file_mappings.items(): # Re-create source for move
|
|
2221
|
+
with open(src_path_orig, 'w') as f:
|
|
2222
|
+
f.write(source_files_content[tgt_path_orig])
|
|
2223
|
+
|
|
2224
|
+
parallel_copy_files(file_mappings, max_workers=1, use_threads=True, move=True, overwrite=True)
|
|
2225
|
+
for src_path, tgt_path in file_mappings.items():
|
|
2226
|
+
assert not os.path.exists(src_path)
|
|
2227
|
+
assert os.path.exists(tgt_path)
|
|
2228
|
+
with open(tgt_path, 'r') as f:
|
|
2229
|
+
assert f.read() == source_files_content[tgt_path]
|
|
2230
|
+
|
|
2231
|
+
|
|
2232
|
+
def test_get_file_sizes(self):
|
|
2233
|
+
"""
|
|
2234
|
+
Test get_file_sizes and parallel_get_file_sizes functions.
|
|
2235
|
+
"""
|
|
2236
|
+
|
|
2237
|
+
file_sizes_test_dir = os.path.join(self.test_dir,'file_sizes')
|
|
2238
|
+
os.makedirs(file_sizes_test_dir,exist_ok=True)
|
|
2239
|
+
|
|
2240
|
+
f1_path = os.path.join(file_sizes_test_dir, 'file1.txt')
|
|
2241
|
+
content1 = "0123456789" # 10 bytes
|
|
2242
|
+
with open(f1_path, 'w') as f:
|
|
2243
|
+
f.write(content1)
|
|
2244
|
+
|
|
2245
|
+
subdir_path = os.path.join(file_sizes_test_dir, 'subdir')
|
|
2246
|
+
os.makedirs(subdir_path, exist_ok=True)
|
|
2247
|
+
f2_path = os.path.join(subdir_path, 'file2.txt')
|
|
2248
|
+
content2 = "01234567890123456789" # 20 bytes
|
|
2249
|
+
with open(f2_path, 'w') as f:
|
|
2250
|
+
f.write(content2)
|
|
2251
|
+
|
|
2252
|
+
sizes_relative = get_file_sizes(file_sizes_test_dir)
|
|
2253
|
+
expected_sizes_relative = {
|
|
2254
|
+
'file1.txt': len(content1),
|
|
2255
|
+
os.path.join('subdir', 'file2.txt').replace('\\','/'): len(content2)
|
|
2256
|
+
}
|
|
2257
|
+
assert sizes_relative == expected_sizes_relative
|
|
2258
|
+
|
|
2259
|
+
file_list_abs = [f1_path, f2_path]
|
|
2260
|
+
sizes_parallel_abs = parallel_get_file_sizes(file_list_abs, max_workers=1)
|
|
2261
|
+
expected_sizes_parallel_abs = {
|
|
2262
|
+
f1_path.replace('\\','/'): len(content1),
|
|
2263
|
+
f2_path.replace('\\','/'): len(content2)
|
|
2264
|
+
}
|
|
2265
|
+
assert sizes_parallel_abs == expected_sizes_parallel_abs
|
|
2266
|
+
|
|
2267
|
+
sizes_parallel_folder_abs = parallel_get_file_sizes(file_sizes_test_dir, max_workers=1, return_relative_paths=False)
|
|
2268
|
+
assert sizes_parallel_folder_abs == expected_sizes_parallel_abs
|
|
2269
|
+
|
|
2270
|
+
sizes_parallel_folder_rel = parallel_get_file_sizes(file_sizes_test_dir, max_workers=1, return_relative_paths=True)
|
|
2271
|
+
assert sizes_parallel_folder_rel == expected_sizes_relative
|
|
2272
|
+
|
|
2273
|
+
non_existent_file = os.path.join(file_sizes_test_dir, "no_such_file.txt")
|
|
2274
|
+
sizes_with_error = parallel_get_file_sizes([f1_path, non_existent_file], max_workers=1)
|
|
2275
|
+
expected_with_error = {
|
|
2276
|
+
f1_path.replace('\\','/'): len(content1),
|
|
2277
|
+
non_existent_file.replace('\\','/'): None
|
|
2278
|
+
}
|
|
2279
|
+
assert sizes_with_error == expected_with_error
|
|
2280
|
+
|
|
2281
|
+
|
|
2282
|
+
def test_zip_file_and_unzip_file(self):
|
|
2283
|
+
"""
|
|
2284
|
+
Test zip_file and unzip_file functions.
|
|
2285
|
+
"""
|
|
2286
|
+
|
|
2287
|
+
file_to_zip_name = "test_zip_me.txt"
|
|
2288
|
+
file_to_zip_path = os.path.join(self.test_dir, file_to_zip_name)
|
|
2289
|
+
content = "This is the content to be zipped."
|
|
2290
|
+
with open(file_to_zip_path, 'w') as f:
|
|
2291
|
+
f.write(content)
|
|
2292
|
+
|
|
2293
|
+
default_zip_output_path = file_to_zip_path + ".zip"
|
|
2294
|
+
returned_zip_path = zip_file(file_to_zip_path)
|
|
2295
|
+
assert returned_zip_path == default_zip_output_path
|
|
2296
|
+
assert os.path.exists(default_zip_output_path)
|
|
2297
|
+
|
|
2298
|
+
unzip_dir_default = os.path.join(self.test_dir, "unzip_default")
|
|
2299
|
+
os.makedirs(unzip_dir_default, exist_ok=True)
|
|
2300
|
+
unzip_file(default_zip_output_path, unzip_dir_default)
|
|
2301
|
+
unzipped_file_path_default = os.path.join(unzip_dir_default, file_to_zip_name)
|
|
2302
|
+
assert os.path.exists(unzipped_file_path_default)
|
|
2303
|
+
with open(unzipped_file_path_default, 'r') as f:
|
|
2304
|
+
assert f.read() == content
|
|
2305
|
+
|
|
2306
|
+
custom_zip_output_name = "custom_archive.zip"
|
|
2307
|
+
custom_zip_output_path = os.path.join(self.test_dir, custom_zip_output_name)
|
|
2308
|
+
zip_file(file_to_zip_path, output_fn=custom_zip_output_path, overwrite=True)
|
|
2309
|
+
assert os.path.exists(custom_zip_output_path)
|
|
2310
|
+
|
|
2311
|
+
zip_in_subdir_path = os.path.join(self.test_dir, "subdir_zip", "my.zip")
|
|
2312
|
+
file_in_subdir_name = "file_for_subdir_zip.txt"
|
|
2313
|
+
file_in_subdir_path = os.path.join(self.test_dir,"subdir_zip", file_in_subdir_name)
|
|
2314
|
+
os.makedirs(os.path.dirname(zip_in_subdir_path), exist_ok=True)
|
|
2315
|
+
with open(file_in_subdir_path, "w") as f: f.write("sub dir content")
|
|
2316
|
+
zip_file(file_in_subdir_path, output_fn=zip_in_subdir_path)
|
|
2317
|
+
|
|
2318
|
+
unzip_file(zip_in_subdir_path, output_folder=None)
|
|
2319
|
+
unzipped_in_same_dir_path = os.path.join(os.path.dirname(zip_in_subdir_path), file_in_subdir_name)
|
|
2320
|
+
assert os.path.exists(unzipped_in_same_dir_path)
|
|
2321
|
+
with open(unzipped_in_same_dir_path, 'r') as f:
|
|
2322
|
+
assert f.read() == "sub dir content"
|
|
2323
|
+
|
|
2324
|
+
|
|
2325
|
+
def test_zip_folder(self):
|
|
2326
|
+
"""
|
|
2327
|
+
Test the zip_folder function.
|
|
2328
|
+
"""
|
|
2329
|
+
|
|
2330
|
+
folder_to_zip = os.path.join(self.test_dir, "folder_to_zip")
|
|
2331
|
+
os.makedirs(folder_to_zip, exist_ok=True)
|
|
2332
|
+
|
|
2333
|
+
file1_name = "file1.txt"; path1 = os.path.join(folder_to_zip, file1_name)
|
|
2334
|
+
file2_name = "file2.log"; path2 = os.path.join(folder_to_zip, file2_name)
|
|
2335
|
+
subdir_name = "sub"; subdir_path = os.path.join(folder_to_zip, subdir_name)
|
|
2336
|
+
os.makedirs(subdir_path, exist_ok=True)
|
|
2337
|
+
file3_name = "file3.dat"; path3 = os.path.join(subdir_path, file3_name)
|
|
2338
|
+
|
|
2339
|
+
content1 = "content1"; content2 = "content2"; content3 = "content3"
|
|
2340
|
+
with open(path1, 'w') as f: f.write(content1)
|
|
2341
|
+
with open(path2, 'w') as f: f.write(content2)
|
|
2342
|
+
with open(path3, 'w') as f: f.write(content3)
|
|
2343
|
+
|
|
2344
|
+
default_zip_path = folder_to_zip + ".zip"
|
|
2345
|
+
zip_folder(folder_to_zip, output_fn=None, overwrite=True)
|
|
2346
|
+
assert os.path.exists(default_zip_path)
|
|
2347
|
+
|
|
2348
|
+
unzip_output_dir = os.path.join(self.test_dir, "unzipped_folder_content")
|
|
2349
|
+
os.makedirs(unzip_output_dir, exist_ok=True)
|
|
2350
|
+
unzip_file(default_zip_path, unzip_output_dir)
|
|
2351
|
+
|
|
2352
|
+
assert os.path.exists(os.path.join(unzip_output_dir, file1_name))
|
|
2353
|
+
assert os.path.exists(os.path.join(unzip_output_dir, file2_name))
|
|
2354
|
+
assert os.path.exists(os.path.join(unzip_output_dir, subdir_name, file3_name))
|
|
2355
|
+
with open(os.path.join(unzip_output_dir, file1_name), 'r')as f: assert f.read() == content1
|
|
2356
|
+
with open(os.path.join(unzip_output_dir, file2_name), 'r')as f: assert f.read() == content2
|
|
2357
|
+
with open(os.path.join(unzip_output_dir, subdir_name, file3_name), 'r')as f: assert f.read() == content3
|
|
2358
|
+
|
|
2359
|
+
mtime_before = os.path.getmtime(default_zip_path)
|
|
2360
|
+
zip_folder(folder_to_zip, output_fn=None, overwrite=False)
|
|
2361
|
+
mtime_after = os.path.getmtime(default_zip_path)
|
|
2362
|
+
assert mtime_before == mtime_after
|
|
2363
|
+
|
|
2364
|
+
|
|
2365
|
+
def test_zip_files_into_single_zipfile(self):
|
|
2366
|
+
"""
|
|
2367
|
+
Test zip_files_into_single_zipfile.
|
|
2368
|
+
"""
|
|
2369
|
+
|
|
2370
|
+
file1_path = os.path.join(self.test_dir, "zfs_file1.txt")
|
|
2371
|
+
content1 = "content for zfs1"
|
|
2372
|
+
with open(file1_path, 'w') as f: f.write(content1)
|
|
2373
|
+
|
|
2374
|
+
subdir_for_zfs = os.path.join(self.test_dir, "zfs_subdir")
|
|
2375
|
+
os.makedirs(subdir_for_zfs, exist_ok=True)
|
|
2376
|
+
file2_path = os.path.join(subdir_for_zfs, "zfs_file2.log")
|
|
2377
|
+
content2 = "content for zfs2"
|
|
2378
|
+
with open(file2_path, 'w') as f: f.write(content2)
|
|
2379
|
+
|
|
2380
|
+
input_files = [file1_path, file2_path]
|
|
2381
|
+
output_zip_path = os.path.join(self.test_dir, "multi_file_archive.zip")
|
|
2382
|
+
zip_files_into_single_zipfile(input_files, output_zip_path, arc_name_base=self.test_dir, overwrite=True)
|
|
2383
|
+
assert os.path.exists(output_zip_path)
|
|
2384
|
+
|
|
2385
|
+
unzip_dir = os.path.join(self.test_dir, "unzip_multi_file")
|
|
2386
|
+
os.makedirs(unzip_dir, exist_ok=True)
|
|
2387
|
+
unzip_file(output_zip_path, unzip_dir)
|
|
2388
|
+
|
|
2389
|
+
expected_unzipped_file1 = os.path.join(unzip_dir, os.path.relpath(file1_path, self.test_dir))
|
|
2390
|
+
expected_unzipped_file2 = os.path.join(unzip_dir, os.path.relpath(file2_path, self.test_dir))
|
|
2391
|
+
|
|
2392
|
+
assert os.path.exists(expected_unzipped_file1)
|
|
2393
|
+
with open(expected_unzipped_file1, 'r') as f: assert f.read() == content1
|
|
2394
|
+
assert os.path.exists(expected_unzipped_file2)
|
|
2395
|
+
assert os.path.basename(expected_unzipped_file2) == "zfs_file2.log"
|
|
2396
|
+
assert os.path.basename(os.path.dirname(expected_unzipped_file2)) == "zfs_subdir"
|
|
2397
|
+
with open(expected_unzipped_file2, 'r') as f: assert f.read() == content2
|
|
2398
|
+
|
|
2399
|
+
|
|
2400
|
+
def test_add_files_to_single_tar_file(self):
|
|
2401
|
+
"""
|
|
2402
|
+
Test add_files_to_single_tar_file.
|
|
2403
|
+
"""
|
|
2404
|
+
|
|
2405
|
+
file1_path = os.path.join(self.test_dir, "tar_file1.txt")
|
|
2406
|
+
content1 = "content for tar1"
|
|
2407
|
+
with open(file1_path, 'w') as f: f.write(content1)
|
|
2408
|
+
|
|
2409
|
+
subdir_for_tar = os.path.join(self.test_dir, "tar_subdir")
|
|
2410
|
+
os.makedirs(subdir_for_tar, exist_ok=True)
|
|
2411
|
+
file2_path = os.path.join(subdir_for_tar, "tar_file2.log")
|
|
2412
|
+
content2 = "content for tar2"
|
|
2413
|
+
with open(file2_path, 'w') as f: f.write(content2)
|
|
2414
|
+
|
|
2415
|
+
input_files = [file1_path, file2_path]
|
|
2416
|
+
output_tar_path = os.path.join(self.test_dir, "archive.tar.gz")
|
|
2417
|
+
|
|
2418
|
+
add_files_to_single_tar_file(input_files, output_tar_path, arc_name_base=self.test_dir,
|
|
2419
|
+
overwrite=True, mode='x:gz')
|
|
2420
|
+
assert os.path.exists(output_tar_path)
|
|
2421
|
+
|
|
2422
|
+
un_tar_dir = os.path.join(self.test_dir, "un_tar_contents")
|
|
2423
|
+
os.makedirs(un_tar_dir, exist_ok=True)
|
|
2424
|
+
with tarfile.open(output_tar_path, 'r:gz') as tf:
|
|
2425
|
+
tf.extractall(path=un_tar_dir)
|
|
2426
|
+
|
|
2427
|
+
expected_untarred_file1 = os.path.join(un_tar_dir, os.path.relpath(file1_path, self.test_dir))
|
|
2428
|
+
expected_untarred_file2 = os.path.join(un_tar_dir, os.path.relpath(file2_path, self.test_dir))
|
|
2429
|
+
|
|
2430
|
+
assert os.path.exists(expected_untarred_file1)
|
|
2431
|
+
with open(expected_untarred_file1, 'r') as f: assert f.read() == content1
|
|
2432
|
+
assert os.path.exists(expected_untarred_file2)
|
|
2433
|
+
with open(expected_untarred_file2, 'r') as f: assert f.read() == content2
|
|
2434
|
+
|
|
2435
|
+
|
|
2436
|
+
def test_parallel_zip_individual_files_and_folders(self):
|
|
2437
|
+
"""
|
|
2438
|
+
Test parallel_zip_files, parallel_zip_folders, and zip_each_file_in_folder.
|
|
2439
|
+
"""
|
|
2440
|
+
|
|
2441
|
+
file1_to_zip = os.path.join(self.test_dir, "pz_file1.txt")
|
|
2442
|
+
file2_to_zip = os.path.join(self.test_dir, "pz_file2.txt")
|
|
2443
|
+
with open(file1_to_zip, 'w') as f: f.write("pz_content1")
|
|
2444
|
+
with open(file2_to_zip, 'w') as f: f.write("pz_content2")
|
|
2445
|
+
|
|
2446
|
+
parallel_zip_files([file1_to_zip, file2_to_zip], max_workers=1, overwrite=True)
|
|
2447
|
+
assert os.path.exists(file1_to_zip + ".zip")
|
|
2448
|
+
assert os.path.exists(file2_to_zip + ".zip")
|
|
2449
|
+
unzip_dir_pz = os.path.join(self.test_dir, "unzip_pz")
|
|
2450
|
+
unzip_file(file1_to_zip + ".zip", unzip_dir_pz)
|
|
2451
|
+
assert os.path.exists(os.path.join(unzip_dir_pz, os.path.basename(file1_to_zip)))
|
|
2452
|
+
|
|
2453
|
+
folder1_to_zip = os.path.join(self.test_dir, "pz_folder1")
|
|
2454
|
+
os.makedirs(folder1_to_zip, exist_ok=True)
|
|
2455
|
+
with open(os.path.join(folder1_to_zip, "pf1.txt"), 'w') as f: f.write("pf1_content")
|
|
2456
|
+
folder2_to_zip = os.path.join(self.test_dir, "pz_folder2")
|
|
2457
|
+
os.makedirs(folder2_to_zip, exist_ok=True)
|
|
2458
|
+
with open(os.path.join(folder2_to_zip, "pf2.txt"), 'w') as f: f.write("pf2_content")
|
|
2459
|
+
|
|
2460
|
+
parallel_zip_folders([folder1_to_zip, folder2_to_zip], max_workers=1, overwrite=True)
|
|
2461
|
+
assert os.path.exists(folder1_to_zip + ".zip")
|
|
2462
|
+
assert os.path.exists(folder2_to_zip + ".zip")
|
|
2463
|
+
unzip_dir_pzf = os.path.join(self.test_dir, "unzip_pzf")
|
|
2464
|
+
unzip_file(folder1_to_zip + ".zip", unzip_dir_pzf)
|
|
2465
|
+
assert os.path.exists(os.path.join(unzip_dir_pzf, "pf1.txt"))
|
|
2466
|
+
|
|
2467
|
+
zef_folder = os.path.join(self.test_dir, "zef_test_folder")
|
|
2468
|
+
os.makedirs(zef_folder, exist_ok=True)
|
|
2469
|
+
zef_file1 = os.path.join(zef_folder, "zef1.txt")
|
|
2470
|
+
zef_file2_png = os.path.join(zef_folder, "zef2.png")
|
|
2471
|
+
zef_file3_zip = os.path.join(zef_folder, "zef3.zip")
|
|
2472
|
+
zef_subdir = os.path.join(zef_folder, "zef_sub")
|
|
2473
|
+
os.makedirs(zef_subdir, exist_ok=True)
|
|
2474
|
+
zef_file_in_sub = os.path.join(zef_subdir, "zef_subfile.txt")
|
|
2475
|
+
|
|
2476
|
+
for p_path in [zef_file1, zef_file2_png, zef_file3_zip, zef_file_in_sub]:
|
|
2477
|
+
with open(p_path, 'w') as f: f.write(f"content of {os.path.basename(p_path)}")
|
|
2478
|
+
|
|
2479
|
+
zip_each_file_in_folder(zef_folder, recursive=False, max_workers=1, overwrite=True)
|
|
2480
|
+
assert os.path.exists(zef_file1 + ".zip")
|
|
2481
|
+
assert os.path.exists(zef_file2_png + ".zip")
|
|
2482
|
+
assert not os.path.exists(zef_file3_zip + ".zip")
|
|
2483
|
+
assert not os.path.exists(zef_file_in_sub + ".zip")
|
|
2484
|
+
|
|
2485
|
+
if os.path.exists(zef_file1 + ".zip"): os.remove(zef_file1 + ".zip")
|
|
2486
|
+
if os.path.exists(zef_file2_png + ".zip"): os.remove(zef_file2_png + ".zip")
|
|
2487
|
+
|
|
2488
|
+
zip_each_file_in_folder(zef_folder, recursive=True, max_workers=1, overwrite=True)
|
|
2489
|
+
assert os.path.exists(zef_file1 + ".zip")
|
|
2490
|
+
assert os.path.exists(zef_file2_png + ".zip")
|
|
2491
|
+
assert not os.path.exists(zef_file3_zip + ".zip")
|
|
2492
|
+
assert os.path.exists(zef_file_in_sub + ".zip")
|
|
2493
|
+
|
|
2494
|
+
if os.path.exists(zef_file1 + ".zip"): os.remove(zef_file1 + ".zip")
|
|
2495
|
+
if os.path.exists(zef_file2_png + ".zip"): os.remove(zef_file2_png + ".zip")
|
|
2496
|
+
if os.path.exists(zef_file_in_sub + ".zip"): os.remove(zef_file_in_sub + ".zip")
|
|
2497
|
+
zip_each_file_in_folder(zef_folder, recursive=True, required_token="zef1", max_workers=1, overwrite=True)
|
|
2498
|
+
assert os.path.exists(zef_file1 + ".zip")
|
|
2499
|
+
assert not os.path.exists(zef_file2_png + ".zip")
|
|
2500
|
+
assert not os.path.exists(zef_file_in_sub + ".zip")
|
|
2501
|
+
|
|
2502
|
+
if os.path.exists(zef_file1 + ".zip"): os.remove(zef_file1 + ".zip")
|
|
2503
|
+
dummy_to_zip = os.path.join(zef_folder,"dummy.txt")
|
|
2504
|
+
with open(dummy_to_zip,'w') as f: f.write('d')
|
|
2505
|
+
zip_each_file_in_folder(zef_folder, recursive=False, exclude_zip=False, max_workers=1, overwrite=True)
|
|
2506
|
+
assert os.path.exists(dummy_to_zip + ".zip")
|
|
2507
|
+
assert os.path.exists(zef_file3_zip + ".zip")
|
|
2508
|
+
if os.path.exists(dummy_to_zip + ".zip"): os.remove(dummy_to_zip + ".zip")
|
|
2509
|
+
if os.path.exists(zef_file3_zip + ".zip"): os.remove(zef_file3_zip + ".zip")
|
|
2510
|
+
|
|
2511
|
+
|
|
2512
|
+
def test_compute_file_hash(self):
|
|
2513
|
+
"""
|
|
2514
|
+
Test compute_file_hash and parallel_compute_file_hashes.
|
|
2515
|
+
"""
|
|
2516
|
+
|
|
2517
|
+
file1_name = "hash_me1.txt"
|
|
2518
|
+
file1_path = os.path.join(self.test_dir, file1_name)
|
|
2519
|
+
content1 = "This is a test string for hashing."
|
|
2520
|
+
with open(file1_path, 'w') as f:
|
|
2521
|
+
f.write(content1)
|
|
2522
|
+
|
|
2523
|
+
file2_name = "hash_me2.txt"
|
|
2524
|
+
file2_path = os.path.join(self.test_dir, file2_name)
|
|
2525
|
+
with open(file2_path, 'w') as f:
|
|
2526
|
+
f.write(content1)
|
|
2527
|
+
|
|
2528
|
+
file3_name = "hash_me3.txt"
|
|
2529
|
+
file3_path = os.path.join(self.test_dir, file3_name)
|
|
2530
|
+
content3 = "This is a different test string for hashing."
|
|
2531
|
+
with open(file3_path, 'w') as f:
|
|
2532
|
+
f.write(content3)
|
|
2533
|
+
|
|
2534
|
+
expected_hash_content1_sha256 = \
|
|
2535
|
+
"c56f19d76df6a09e49fe0d9ce7b1bc7f1dbd582f668742bede65c54c47d5bcf4".lower()
|
|
2536
|
+
expected_hash_content3_sha256 = \
|
|
2537
|
+
"23013ff7e93264317f7b2fc0e9a217649f2dc0b11ca7e0bd49632424b70b6680".lower()
|
|
2538
|
+
|
|
2539
|
+
hash1 = compute_file_hash(file1_path)
|
|
2540
|
+
hash2 = compute_file_hash(file2_path)
|
|
2541
|
+
hash3 = compute_file_hash(file3_path)
|
|
2542
|
+
assert hash1 == expected_hash_content1_sha256
|
|
2543
|
+
assert hash2 == expected_hash_content1_sha256
|
|
2544
|
+
assert hash1 != hash3
|
|
2545
|
+
assert hash3 == expected_hash_content3_sha256
|
|
2546
|
+
|
|
2547
|
+
expected_hash_content1_md5 = "94b971f1f8cdb23c2af82af73160d4b0".lower()
|
|
2548
|
+
hash1_md5 = compute_file_hash(file1_path, algorithm='md5')
|
|
2549
|
+
assert hash1_md5 == expected_hash_content1_md5
|
|
2550
|
+
|
|
2551
|
+
non_existent_path = os.path.join(self.test_dir, "no_such_file.txt")
|
|
2552
|
+
assert compute_file_hash(non_existent_path, allow_failures=True) is None
|
|
2553
|
+
try:
|
|
2554
|
+
compute_file_hash(non_existent_path, allow_failures=False)
|
|
2555
|
+
raise AssertionError("FileNotFoundError not raised for compute_file_hash")
|
|
2556
|
+
except FileNotFoundError:
|
|
2557
|
+
pass
|
|
2558
|
+
|
|
2559
|
+
files_to_hash = [file1_path, file3_path, non_existent_path]
|
|
2560
|
+
hashes_parallel = parallel_compute_file_hashes(files_to_hash, max_workers=1)
|
|
2561
|
+
|
|
2562
|
+
norm_f1 = file1_path.replace('\\','/')
|
|
2563
|
+
norm_f3 = file3_path.replace('\\','/')
|
|
2564
|
+
norm_non = non_existent_path.replace('\\','/')
|
|
2565
|
+
|
|
2566
|
+
expected_parallel_hashes = {
|
|
2567
|
+
norm_f1: expected_hash_content1_sha256,
|
|
2568
|
+
norm_f3: expected_hash_content3_sha256,
|
|
2569
|
+
norm_non: None
|
|
2570
|
+
}
|
|
2571
|
+
hashes_parallel_norm = {k.replace('\\','/'): v for k,v in hashes_parallel.items()}
|
|
2572
|
+
assert hashes_parallel_norm == expected_parallel_hashes
|
|
2573
|
+
|
|
2574
|
+
hash_folder = os.path.join(self.test_dir, "hash_test_folder")
|
|
2575
|
+
os.makedirs(hash_folder, exist_ok=True)
|
|
2576
|
+
h_f1_name = "h_f1.txt"; h_f1_path = os.path.join(hash_folder, h_f1_name)
|
|
2577
|
+
h_f2_name = "h_f2.txt"; h_f2_path = os.path.join(hash_folder, h_f2_name)
|
|
2578
|
+
with open(h_f1_path, 'w') as f: f.write(content1)
|
|
2579
|
+
with open(h_f2_path, 'w') as f: f.write(content3)
|
|
2580
|
+
|
|
2581
|
+
hashes_folder_parallel = parallel_compute_file_hashes(hash_folder, recursive=False, max_workers=1)
|
|
2582
|
+
norm_hf1 = h_f1_path.replace('\\','/')
|
|
2583
|
+
norm_hf2 = h_f2_path.replace('\\','/')
|
|
2584
|
+
expected_folder_hashes = {
|
|
2585
|
+
norm_hf1: expected_hash_content1_sha256,
|
|
2586
|
+
norm_hf2: expected_hash_content3_sha256
|
|
2587
|
+
}
|
|
2588
|
+
hashes_folder_parallel_norm = {k.replace('\\','/'): v for k,v in hashes_folder_parallel.items()}
|
|
2589
|
+
assert hashes_folder_parallel_norm == expected_folder_hashes
|
|
2590
|
+
|
|
2591
|
+
|
|
2592
|
+
def test_path_utils():
|
|
2593
|
+
"""
|
|
2594
|
+
Runs all tests in the TestPathUtils class.
|
|
2595
|
+
"""
|
|
2596
|
+
|
|
2597
|
+
test_instance = TestPathUtils()
|
|
2598
|
+
test_instance.set_up()
|
|
2599
|
+
try:
|
|
2600
|
+
test_instance.test_is_image_file()
|
|
2601
|
+
test_instance.test_find_image_strings()
|
|
2602
|
+
test_instance.test_find_images()
|
|
2603
|
+
test_instance.test_recursive_file_list_and_file_list()
|
|
2604
|
+
test_instance.test_folder_list()
|
|
2605
|
+
test_instance.test_folder_summary()
|
|
2606
|
+
test_instance.test_fileparts()
|
|
2607
|
+
test_instance.test_insert_before_extension()
|
|
2608
|
+
test_instance.test_split_path()
|
|
2609
|
+
test_instance.test_path_is_abs()
|
|
2610
|
+
test_instance.test_safe_create_link_unix()
|
|
2611
|
+
test_instance.test_remove_empty_folders()
|
|
2612
|
+
test_instance.test_path_join()
|
|
2613
|
+
test_instance.test_filename_cleaning()
|
|
2614
|
+
test_instance.test_is_executable()
|
|
2615
|
+
test_instance.test_write_read_list_to_file()
|
|
2616
|
+
test_instance.test_parallel_copy_files()
|
|
2617
|
+
test_instance.test_get_file_sizes()
|
|
2618
|
+
test_instance.test_zip_file_and_unzip_file()
|
|
2619
|
+
test_instance.test_zip_folder()
|
|
2620
|
+
test_instance.test_zip_files_into_single_zipfile()
|
|
2621
|
+
test_instance.test_add_files_to_single_tar_file()
|
|
2622
|
+
test_instance.test_parallel_zip_individual_files_and_folders()
|
|
2623
|
+
test_instance.test_compute_file_hash()
|
|
2624
|
+
finally:
|
|
2625
|
+
test_instance.tear_down()
|
|
2626
|
+
|
|
2627
|
+
# from IPython import embed; embed()
|
|
2628
|
+
# test_path_utils()
|