megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
md_utils/path_utils.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
path_utils.py
|
|
4
|
+
|
|
5
|
+
Miscellaneous useful utils for path manipulation, i.e. things that could *almost*
|
|
6
|
+
be in os.path, but aren't.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
9
|
|
|
10
10
|
#%% Imports and constants
|
|
11
11
|
|
|
@@ -14,23 +14,24 @@ import ntpath
|
|
|
14
14
|
import os
|
|
15
15
|
import sys
|
|
16
16
|
import platform
|
|
17
|
-
import posixpath
|
|
18
17
|
import string
|
|
19
18
|
import json
|
|
20
19
|
import shutil
|
|
21
20
|
import unicodedata
|
|
22
21
|
import zipfile
|
|
22
|
+
import tarfile
|
|
23
23
|
import webbrowser
|
|
24
24
|
import subprocess
|
|
25
25
|
import re
|
|
26
26
|
|
|
27
27
|
from zipfile import ZipFile
|
|
28
28
|
from datetime import datetime
|
|
29
|
-
from typing import Container, Iterable, List, Optional, Tuple, Sequence
|
|
30
29
|
from multiprocessing.pool import Pool, ThreadPool
|
|
31
30
|
from functools import partial
|
|
31
|
+
from shutil import which
|
|
32
32
|
from tqdm import tqdm
|
|
33
33
|
|
|
34
|
+
# Should all be lower-case
|
|
34
35
|
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
|
|
35
36
|
|
|
36
37
|
VALID_FILENAME_CHARS = f"~-_.() {string.ascii_letters}{string.digits}"
|
|
@@ -41,12 +42,27 @@ CHAR_LIMIT = 255
|
|
|
41
42
|
|
|
42
43
|
#%% General path functions
|
|
43
44
|
|
|
44
|
-
def recursive_file_list(base_dir,
|
|
45
|
-
|
|
45
|
+
def recursive_file_list(base_dir,
|
|
46
|
+
convert_slashes=True,
|
|
47
|
+
return_relative_paths=False,
|
|
48
|
+
sort_files=True,
|
|
46
49
|
recursive=True):
|
|
47
50
|
r"""
|
|
48
|
-
|
|
49
|
-
|
|
51
|
+
Enumerates files (not directories) in [base_dir], optionally converting
|
|
52
|
+
backslahes to slashes
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
base_dir (str): folder to enumerate
|
|
56
|
+
convert_slashes (bool, optional): force forward slashes; if this is False, will use
|
|
57
|
+
the native path separator
|
|
58
|
+
return_relative_paths (bool, optional): return paths that are relative to [base_dir],
|
|
59
|
+
rather than absolute paths
|
|
60
|
+
sort_files (bool, optional): force files to be sorted, otherwise uses the sorting
|
|
61
|
+
provided by os.walk()
|
|
62
|
+
recursive (bool, optional): enumerate recursively
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
list: list of filenames
|
|
50
66
|
"""
|
|
51
67
|
|
|
52
68
|
assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
|
|
@@ -79,61 +95,51 @@ def file_list(base_dir, convert_slashes=True, return_relative_paths=False, sort_
|
|
|
79
95
|
recursive=False):
|
|
80
96
|
"""
|
|
81
97
|
Trivial wrapper for recursive_file_list, which was a poor function name choice at the time,
|
|
82
|
-
it doesn't really make sense to have a "recursive" option in a function called
|
|
98
|
+
since it doesn't really make sense to have a "recursive" option in a function called
|
|
99
|
+
"recursive_file_list".
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
base_dir (str): folder to enumerate
|
|
103
|
+
convert_slashes (bool, optional): force forward slashes; if this is False, will use
|
|
104
|
+
the native path separator
|
|
105
|
+
return_relative_paths (bool, optional): return paths that are relative to [base_dir],
|
|
106
|
+
rather than absolute paths
|
|
107
|
+
sort_files (bool, optional): force files to be sorted, otherwise uses the sorting
|
|
108
|
+
provided by os.walk()
|
|
109
|
+
recursive (bool, optional): enumerate recursively
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
list: list of filenames
|
|
83
113
|
"""
|
|
84
114
|
|
|
85
115
|
return recursive_file_list(base_dir,convert_slashes,return_relative_paths,sort_files,
|
|
86
116
|
recursive=recursive)
|
|
87
117
|
|
|
88
118
|
|
|
89
|
-
def
|
|
90
|
-
r"""
|
|
91
|
-
Splits [path] into all its constituent tokens.
|
|
92
|
-
|
|
93
|
-
Non-recursive version of:
|
|
94
|
-
http://nicks-liquid-soapbox.blogspot.com/2011/03/splitting-path-to-list-in-python.html
|
|
95
|
-
|
|
96
|
-
Examples
|
|
97
|
-
>>> split_path(r'c:\dir\subdir\file.txt')
|
|
98
|
-
['c:\\', 'dir', 'subdir', 'file.txt']
|
|
99
|
-
>>> split_path('/dir/subdir/file.jpg')
|
|
100
|
-
['/', 'dir', 'subdir', 'file.jpg']
|
|
101
|
-
>>> split_path('c:\\')
|
|
102
|
-
['c:\\']
|
|
103
|
-
>>> split_path('/')
|
|
104
|
-
['/']
|
|
105
|
-
"""
|
|
106
|
-
|
|
107
|
-
parts = []
|
|
108
|
-
while True:
|
|
109
|
-
# ntpath seems to do the right thing for both Windows and Unix paths
|
|
110
|
-
head, tail = ntpath.split(path)
|
|
111
|
-
if head == '' or head == path:
|
|
112
|
-
break
|
|
113
|
-
parts.append(tail)
|
|
114
|
-
path = head
|
|
115
|
-
parts.append(head or tail)
|
|
116
|
-
return parts[::-1] # reverse
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def fileparts(path: str) -> Tuple[str, str, str]:
|
|
119
|
+
def fileparts(path):
|
|
120
120
|
r"""
|
|
121
121
|
Breaks down a path into the directory path, filename, and extension.
|
|
122
122
|
|
|
123
123
|
Note that the '.' lives with the extension, and separators are removed.
|
|
124
124
|
|
|
125
|
-
Examples
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
125
|
+
Examples:
|
|
126
|
+
|
|
127
|
+
.. code-block:: none
|
|
128
|
+
|
|
129
|
+
>>> fileparts('file')
|
|
130
|
+
('', 'file', '')
|
|
131
|
+
>>> fileparts(r'c:/dir/file.jpg')
|
|
132
|
+
('c:/dir', 'file', '.jpg')
|
|
133
|
+
>>> fileparts('/dir/subdir/file.jpg')
|
|
134
|
+
('/dir/subdir', 'file', '.jpg')
|
|
132
135
|
|
|
136
|
+
Args:
|
|
137
|
+
path (str): path name to separate into parts
|
|
133
138
|
Returns:
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
139
|
+
tuple: tuple containing (p,n,e):
|
|
140
|
+
- p: str, directory path
|
|
141
|
+
- n: str, filename without extension
|
|
142
|
+
- e: str, extension including the '.'
|
|
137
143
|
"""
|
|
138
144
|
|
|
139
145
|
# ntpath seems to do the right thing for both Windows and Unix paths
|
|
@@ -143,79 +149,168 @@ def fileparts(path: str) -> Tuple[str, str, str]:
|
|
|
143
149
|
return p, n, e
|
|
144
150
|
|
|
145
151
|
|
|
146
|
-
def insert_before_extension(filename
|
|
152
|
+
def insert_before_extension(filename, s=None, separator='.'):
|
|
147
153
|
"""
|
|
148
154
|
Insert string [s] before the extension in [filename], separated with [separator].
|
|
149
155
|
|
|
150
156
|
If [s] is empty, generates a date/timestamp. If [filename] has no extension,
|
|
151
157
|
appends [s].
|
|
152
158
|
|
|
153
|
-
Examples
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
159
|
+
Examples:
|
|
160
|
+
|
|
161
|
+
.. code-block:: none
|
|
162
|
+
|
|
163
|
+
>>> insert_before_extension('/dir/subdir/file.ext', 'insert')
|
|
164
|
+
'/dir/subdir/file.insert.ext'
|
|
165
|
+
>>> insert_before_extension('/dir/subdir/file', 'insert')
|
|
166
|
+
'/dir/subdir/file.insert'
|
|
167
|
+
>>> insert_before_extension('/dir/subdir/file')
|
|
168
|
+
'/dir/subdir/file.2020.07.20.10.54.38'
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
filename (str): filename to manipulate
|
|
172
|
+
s (str, optional): string to insert before the extension in [filename], or
|
|
173
|
+
None to insert a datestamp
|
|
174
|
+
separator (str, optional): separator to place between the filename base
|
|
175
|
+
and the inserted string
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
str: modified string
|
|
160
179
|
"""
|
|
161
180
|
|
|
162
181
|
assert len(filename) > 0
|
|
163
|
-
if len(s) == 0:
|
|
182
|
+
if s is None or len(s) == 0:
|
|
164
183
|
s = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
|
|
165
184
|
name, ext = os.path.splitext(filename)
|
|
166
185
|
return f'{name}{separator}{s}{ext}'
|
|
167
186
|
|
|
168
187
|
|
|
169
|
-
def
|
|
188
|
+
def split_path(path):
|
|
189
|
+
r"""
|
|
190
|
+
Splits [path] into all its constituent file/folder tokens.
|
|
191
|
+
|
|
192
|
+
Examples:
|
|
193
|
+
|
|
194
|
+
.. code-block:: none
|
|
195
|
+
|
|
196
|
+
>>> split_path(r'c:\dir\subdir\file.txt')
|
|
197
|
+
['c:\\', 'dir', 'subdir', 'file.txt']
|
|
198
|
+
>>> split_path('/dir/subdir/file.jpg')
|
|
199
|
+
['/', 'dir', 'subdir', 'file.jpg']
|
|
200
|
+
>>> split_path('c:\\')
|
|
201
|
+
['c:\\']
|
|
202
|
+
>>> split_path('/')
|
|
203
|
+
['/']
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
path (str): path to split into tokens
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
list: list of path tokens
|
|
170
210
|
"""
|
|
171
|
-
|
|
211
|
+
|
|
212
|
+
parts = []
|
|
213
|
+
while True:
|
|
214
|
+
# ntpath seems to do the right thing for both Windows and Unix paths
|
|
215
|
+
head, tail = ntpath.split(path)
|
|
216
|
+
if head == '' or head == path:
|
|
217
|
+
break
|
|
218
|
+
parts.append(tail)
|
|
219
|
+
path = head
|
|
220
|
+
parts.append(head or tail)
|
|
221
|
+
return parts[::-1] # reverse
|
|
172
222
|
|
|
173
|
-
This function behaves differently for Windows vs. Unix paths. Set
|
|
174
|
-
windows=True if [p] is a Windows path. Set windows=None (default) to treat
|
|
175
|
-
[p] as a native system path.
|
|
176
223
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
224
|
+
def path_is_abs(p):
|
|
225
|
+
"""
|
|
226
|
+
Determines whether [p] is an absolute path. An absolute path is defined as
|
|
227
|
+
one that starts with slash, backslash, or a letter followed by a colon.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
p (str): path to evaluate
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
bool: True if [p] is an absolute path, else False
|
|
234
|
+
"""
|
|
235
|
+
|
|
236
|
+
return (len(p) > 1) and (p[0] == '/' or p[1] == ':' or p[0] == '\\')
|
|
237
|
+
|
|
180
238
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
239
|
+
def top_level_folder(p):
|
|
240
|
+
r"""
|
|
241
|
+
Gets the top-level folder from the path *p*.
|
|
242
|
+
|
|
243
|
+
On UNIX, this is straightforward:
|
|
244
|
+
|
|
245
|
+
/blah/foo
|
|
246
|
+
|
|
247
|
+
...returns '/blah'
|
|
248
|
+
|
|
249
|
+
On Windows, we define this as the top-level folder that isn't the drive, so:
|
|
250
|
+
|
|
251
|
+
c:\blah\foo
|
|
252
|
+
|
|
253
|
+
...returns 'c:\blah'.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
p (str): filename to evaluate
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
str: the top-level folder in [p], see above for details on how this is defined
|
|
184
260
|
"""
|
|
185
261
|
|
|
186
262
|
if p == '':
|
|
187
263
|
return ''
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
if windows is not None:
|
|
191
|
-
os.path = ntpath if windows else posixpath
|
|
192
|
-
|
|
193
|
-
# Path('/blah').parts is ('/', 'blah')
|
|
264
|
+
|
|
265
|
+
# Path('/blah').parts is ('/','blah')
|
|
194
266
|
parts = split_path(p)
|
|
267
|
+
|
|
268
|
+
if len(parts) == 1:
|
|
269
|
+
return parts[0]
|
|
195
270
|
|
|
271
|
+
# Handle paths like:
|
|
272
|
+
#
|
|
273
|
+
# /, \, /stuff, c:, c:\stuff
|
|
196
274
|
drive = os.path.splitdrive(p)[0]
|
|
197
|
-
if
|
|
198
|
-
|
|
199
|
-
or parts[0] == drive + '/'
|
|
200
|
-
or parts[0] == drive + '\\'
|
|
201
|
-
or parts[0] in ['\\', '/']):
|
|
202
|
-
result = os.path.join(parts[0], parts[1])
|
|
275
|
+
if parts[0] == drive or parts[0] == drive + '/' or parts[0] == drive + '\\' or parts[0] in ['\\', '/']:
|
|
276
|
+
return os.path.join(parts[0], parts[1])
|
|
203
277
|
else:
|
|
204
|
-
|
|
278
|
+
return parts[0]
|
|
279
|
+
|
|
280
|
+
# ...top_level_folder()
|
|
205
281
|
|
|
206
|
-
os.path = default_lib # restore default os.path
|
|
207
|
-
return result
|
|
208
282
|
|
|
283
|
+
#%% Test driver for top_level_folder
|
|
284
|
+
|
|
285
|
+
if False:
|
|
286
|
+
|
|
287
|
+
#%%
|
|
288
|
+
|
|
289
|
+
p = 'blah/foo/bar'; s = top_level_folder(p); print(s); assert s == 'blah'
|
|
290
|
+
p = '/blah/foo/bar'; s = top_level_folder(p); print(s); assert s == '/blah'
|
|
291
|
+
p = 'bar'; s = top_level_folder(p); print(s); assert s == 'bar'
|
|
292
|
+
p = ''; s = top_level_folder(p); print(s); assert s == ''
|
|
293
|
+
p = 'c:\\'; s = top_level_folder(p); print(s); assert s == 'c:\\'
|
|
294
|
+
p = r'c:\blah'; s = top_level_folder(p); print(s); assert s == 'c:\\blah'
|
|
295
|
+
p = r'c:\foo'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
|
|
296
|
+
p = r'c:/foo'; s = top_level_folder(p); print(s); assert s == 'c:/foo'
|
|
297
|
+
p = r'c:\foo/bar'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
|
|
298
|
+
|
|
299
|
+
#%%
|
|
209
300
|
|
|
210
301
|
def safe_create_link(link_exists,link_new):
|
|
211
302
|
"""
|
|
212
|
-
|
|
303
|
+
Creates a symlink at [link_new] pointing to [link_exists].
|
|
213
304
|
|
|
214
|
-
If link_new already exists, make sure it's a link (not a file),
|
|
215
|
-
and if it has a different target than link_exists,
|
|
305
|
+
If [link_new] already exists, make sure it's a link (not a file),
|
|
306
|
+
and if it has a different target than [link_exists], removes and re-creates
|
|
216
307
|
it.
|
|
217
308
|
|
|
218
|
-
Errors if link_new already exists but it's not a link.
|
|
309
|
+
Errors if [link_new] already exists but it's not a link.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
link_exists (str): the source of the (possibly-new) symlink
|
|
313
|
+
link_new (str): the target of the (possibly-new) symlink
|
|
219
314
|
"""
|
|
220
315
|
|
|
221
316
|
if os.path.exists(link_new) or os.path.islink(link_new):
|
|
@@ -229,37 +324,60 @@ def safe_create_link(link_exists,link_new):
|
|
|
229
324
|
|
|
230
325
|
#%% Image-related path functions
|
|
231
326
|
|
|
232
|
-
def is_image_file(s
|
|
233
|
-
) -> bool:
|
|
327
|
+
def is_image_file(s, img_extensions=IMG_EXTENSIONS):
|
|
234
328
|
"""
|
|
235
329
|
Checks a file's extension against a hard-coded set of image file
|
|
236
|
-
extensions.
|
|
330
|
+
extensions. Uses case-insensitive comparison.
|
|
237
331
|
|
|
238
332
|
Does not check whether the file exists, only determines whether the filename
|
|
239
333
|
implies it's an image file.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
s (str): filename to evaluate for image-ness
|
|
337
|
+
img_extensions (list, optional): list of known image file extensions
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
bool: True if [s] appears to be an image file, else False
|
|
240
341
|
"""
|
|
241
342
|
|
|
242
343
|
ext = os.path.splitext(s)[1]
|
|
243
344
|
return ext.lower() in img_extensions
|
|
244
345
|
|
|
245
346
|
|
|
246
|
-
def find_image_strings(strings
|
|
347
|
+
def find_image_strings(strings):
|
|
247
348
|
"""
|
|
248
349
|
Given a list of strings that are potentially image file names, looks for
|
|
249
350
|
strings that actually look like image file names (based on extension).
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
strings (list): list of filenames to check for image-ness
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
list: the subset of [strings] that appear to be image filenames
|
|
250
357
|
"""
|
|
251
358
|
|
|
252
359
|
return [s for s in strings if is_image_file(s)]
|
|
253
360
|
|
|
254
361
|
|
|
255
|
-
def find_images(dirname
|
|
256
|
-
|
|
257
|
-
|
|
362
|
+
def find_images(dirname,
|
|
363
|
+
recursive=False,
|
|
364
|
+
return_relative_paths=False,
|
|
365
|
+
convert_slashes=True):
|
|
258
366
|
"""
|
|
259
367
|
Finds all files in a directory that look like image file names. Returns
|
|
260
368
|
absolute paths unless return_relative_paths is set. Uses the OS-native
|
|
261
369
|
path separator unless convert_slashes is set, in which case will always
|
|
262
370
|
use '/'.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
dirname (str): the folder to search for images
|
|
374
|
+
recursive (bool, optional): whether to search recursively
|
|
375
|
+
return_relative_paths (str, optional): return paths that are relative
|
|
376
|
+
to [dirname], rather than absolute paths
|
|
377
|
+
convert_slashes (bool, optional): force forward slashes in return values
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
list: list of image filenames found in [dirname]
|
|
263
381
|
"""
|
|
264
382
|
|
|
265
383
|
assert os.path.isdir(dirname), '{} is not a folder'.format(dirname)
|
|
@@ -284,16 +402,28 @@ def find_images(dirname: str, recursive: bool = False,
|
|
|
284
402
|
|
|
285
403
|
#%% Filename cleaning functions
|
|
286
404
|
|
|
287
|
-
def clean_filename(filename
|
|
288
|
-
|
|
405
|
+
def clean_filename(filename,
|
|
406
|
+
allow_list=VALID_FILENAME_CHARS,
|
|
407
|
+
char_limit=CHAR_LIMIT,
|
|
408
|
+
force_lower= False):
|
|
289
409
|
r"""
|
|
290
410
|
Removes non-ASCII and other invalid filename characters (on any
|
|
291
|
-
reasonable OS) from a filename, then trims to a maximum length.
|
|
411
|
+
reasonable OS) from a filename, then optionally trims to a maximum length.
|
|
292
412
|
|
|
293
413
|
Does not allow :\/ by default, use clean_path if you want to preserve those.
|
|
294
414
|
|
|
295
415
|
Adapted from
|
|
296
416
|
https://gist.github.com/wassname/1393c4a57cfcbf03641dbc31886123b8
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
filename (str): filename to clean
|
|
420
|
+
allow_list (str, optional): string containing all allowable filename characters
|
|
421
|
+
char_limit (int, optional): maximum allowable filename length, if None will skip this
|
|
422
|
+
step
|
|
423
|
+
force_lower (bool, optional): convert the resulting filename to lowercase
|
|
424
|
+
|
|
425
|
+
returns:
|
|
426
|
+
str: cleaned version of [filename]
|
|
297
427
|
"""
|
|
298
428
|
|
|
299
429
|
# keep only valid ascii chars
|
|
@@ -309,35 +439,75 @@ def clean_filename(filename: str, allow_list: str = VALID_FILENAME_CHARS,
|
|
|
309
439
|
return cleaned_filename
|
|
310
440
|
|
|
311
441
|
|
|
312
|
-
def clean_path(pathname
|
|
313
|
-
|
|
442
|
+
def clean_path(pathname,
|
|
443
|
+
allow_list=VALID_PATH_CHARS,
|
|
444
|
+
char_limit=CHAR_LIMIT,
|
|
445
|
+
force_lower=False):
|
|
314
446
|
"""
|
|
315
447
|
Removes non-ASCII and other invalid path characters (on any reasonable
|
|
316
|
-
OS) from a path, then trims to a maximum length.
|
|
448
|
+
OS) from a path, then optionally trims to a maximum length.
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
pathname (str): path name to clean
|
|
452
|
+
allow_list (str, optional): string containing all allowable filename characters
|
|
453
|
+
char_limit (int, optional): maximum allowable filename length, if None will skip this
|
|
454
|
+
step
|
|
455
|
+
force_lower (bool, optional): convert the resulting filename to lowercase
|
|
456
|
+
|
|
457
|
+
returns:
|
|
458
|
+
str: cleaned version of [filename]
|
|
317
459
|
"""
|
|
318
460
|
|
|
319
461
|
return clean_filename(pathname, allow_list=allow_list,
|
|
320
462
|
char_limit=char_limit, force_lower=force_lower)
|
|
321
463
|
|
|
322
464
|
|
|
323
|
-
def flatten_path(pathname
|
|
324
|
-
"""
|
|
465
|
+
def flatten_path(pathname,separator_chars=SEPARATOR_CHARS,separator_char_replacement='~'):
|
|
466
|
+
r"""
|
|
325
467
|
Removes non-ASCII and other invalid path characters (on any reasonable
|
|
326
468
|
OS) from a path, then trims to a maximum length. Replaces all valid
|
|
327
|
-
separators with
|
|
469
|
+
separators with [separator_char_replacement.]
|
|
470
|
+
|
|
471
|
+
Args:
|
|
472
|
+
pathname (str): path name to flatten
|
|
473
|
+
separator_chars (str, optional): string containing all known path separators
|
|
474
|
+
separator_char_replacement (str, optional): string to insert in place of
|
|
475
|
+
path separators.
|
|
476
|
+
|
|
477
|
+
Returns:
|
|
478
|
+
str: flattened version of [pathname]
|
|
328
479
|
"""
|
|
329
480
|
|
|
330
481
|
s = clean_path(pathname)
|
|
331
482
|
for c in separator_chars:
|
|
332
|
-
s = s.replace(c,
|
|
483
|
+
s = s.replace(c, separator_char_replacement)
|
|
333
484
|
return s
|
|
334
485
|
|
|
335
486
|
|
|
487
|
+
def is_executable(filename):
|
|
488
|
+
"""
|
|
489
|
+
Checks whether [filename] is on the system path and marked as executable.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
filename (str): filename to check for executable status
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
bool: True if [filename] is on the system path and marked as executable, otherwise False
|
|
496
|
+
"""
|
|
497
|
+
|
|
498
|
+
# https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
|
|
499
|
+
|
|
500
|
+
return which(filename) is not None
|
|
501
|
+
|
|
502
|
+
|
|
336
503
|
#%% Platform-independent way to open files in their associated application
|
|
337
504
|
|
|
338
505
|
def environment_is_wsl():
|
|
339
506
|
"""
|
|
340
|
-
|
|
507
|
+
Determines whether we're running in WSL.
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
True if we're running in WSL.
|
|
341
511
|
"""
|
|
342
512
|
|
|
343
513
|
if sys.platform not in ('linux','posix'):
|
|
@@ -347,7 +517,7 @@ def environment_is_wsl():
|
|
|
347
517
|
|
|
348
518
|
|
|
349
519
|
def wsl_path_to_windows_path(filename):
|
|
350
|
-
"""
|
|
520
|
+
r"""
|
|
351
521
|
Converts a WSL path to a Windows path, or returns None if that's not possible. E.g.
|
|
352
522
|
converts:
|
|
353
523
|
|
|
@@ -356,6 +526,12 @@ def wsl_path_to_windows_path(filename):
|
|
|
356
526
|
...to:
|
|
357
527
|
|
|
358
528
|
e:\a\b\c
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
filename (str): filename to convert
|
|
532
|
+
|
|
533
|
+
Returns:
|
|
534
|
+
str: Windows equivalent to the WSL path [filename]
|
|
359
535
|
"""
|
|
360
536
|
|
|
361
537
|
result = subprocess.run(['wslpath', '-w', filename], text=True, capture_output=True)
|
|
@@ -369,16 +545,19 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
|
|
|
369
545
|
"""
|
|
370
546
|
Opens [filename] in the default OS file handler for this file type.
|
|
371
547
|
|
|
372
|
-
If attempt_to_open_in_wsl_host is True, and we're in WSL, attempts to open
|
|
373
|
-
[filename] in the Windows host environment.
|
|
374
|
-
|
|
375
548
|
If browser_name is not None, uses the webbrowser module to open the filename
|
|
376
549
|
in the specified browser; see https://docs.python.org/3/library/webbrowser.html
|
|
377
550
|
for supported browsers. Falls back to the default file handler if webbrowser.open()
|
|
378
551
|
fails. In this case, attempt_to_open_in_wsl_host is ignored unless webbrowser.open() fails.
|
|
379
552
|
|
|
380
|
-
If browser_name is 'default',
|
|
553
|
+
If browser_name is 'default', uses the system default. This is different from the
|
|
381
554
|
parameter to webbrowser.get(), where None implies the system default.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
filename (str): file to open
|
|
558
|
+
attempt_to_open_in_wsl_host: if this is True, and we're in WSL, attempts to open
|
|
559
|
+
[filename] in the Windows host environment
|
|
560
|
+
browser_name: see above
|
|
382
561
|
"""
|
|
383
562
|
|
|
384
563
|
if browser_name is not None:
|
|
@@ -423,10 +602,14 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
|
|
|
423
602
|
|
|
424
603
|
#%% File list functions
|
|
425
604
|
|
|
426
|
-
def write_list_to_file(output_file
|
|
605
|
+
def write_list_to_file(output_file,strings):
|
|
427
606
|
"""
|
|
428
607
|
Writes a list of strings to either a JSON file or text file,
|
|
429
608
|
depending on extension of the given file name.
|
|
609
|
+
|
|
610
|
+
Args:
|
|
611
|
+
output_file (str): file to write
|
|
612
|
+
strings (list): list of strings to write to [output_file]
|
|
430
613
|
"""
|
|
431
614
|
|
|
432
615
|
with open(output_file, 'w') as f:
|
|
@@ -436,9 +619,15 @@ def write_list_to_file(output_file: str, strings: Sequence[str]) -> None:
|
|
|
436
619
|
f.write('\n'.join(strings))
|
|
437
620
|
|
|
438
621
|
|
|
439
|
-
def read_list_from_file(filename
|
|
622
|
+
def read_list_from_file(filename):
|
|
440
623
|
"""
|
|
441
624
|
Reads a json-formatted list of strings from a file.
|
|
625
|
+
|
|
626
|
+
Args:
|
|
627
|
+
filename (str): .json filename to read
|
|
628
|
+
|
|
629
|
+
Returns:
|
|
630
|
+
list: list of strings read from [filename]
|
|
442
631
|
"""
|
|
443
632
|
|
|
444
633
|
assert filename.endswith('.json')
|
|
@@ -451,6 +640,10 @@ def read_list_from_file(filename: str) -> List[str]:
|
|
|
451
640
|
|
|
452
641
|
|
|
453
642
|
def _copy_file(input_output_tuple,overwrite=True,verbose=False):
|
|
643
|
+
"""
|
|
644
|
+
Internal function for copying files from within parallel_copy_files.
|
|
645
|
+
"""
|
|
646
|
+
|
|
454
647
|
assert len(input_output_tuple) == 2
|
|
455
648
|
source_fn = input_output_tuple[0]
|
|
456
649
|
target_fn = input_output_tuple[1]
|
|
@@ -465,7 +658,16 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False):
|
|
|
465
658
|
def parallel_copy_files(input_file_to_output_file, max_workers=16,
|
|
466
659
|
use_threads=True, overwrite=False, verbose=False):
|
|
467
660
|
"""
|
|
468
|
-
|
|
661
|
+
Copies files from source to target according to the dict input_file_to_output_file.
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
input_file_to_output_file (dict): dictionary mapping source files to the target files
|
|
665
|
+
to which they should be copied
|
|
666
|
+
max_workers (int, optional): number of concurrent workers; set to <=1 to disable parallelism
|
|
667
|
+
use_threads (bool, optional): whether to use threads (True) or processes (False) for
|
|
668
|
+
parallel copying; ignored if max_workers <= 1
|
|
669
|
+
overwrite (bool, optional): whether to overwrite existing destination files
|
|
670
|
+
verbose (bool, optional): enable additionald debug output
|
|
469
671
|
"""
|
|
470
672
|
|
|
471
673
|
n_workers = min(max_workers,len(input_file_to_output_file))
|
|
@@ -490,11 +692,19 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
|
|
|
490
692
|
|
|
491
693
|
def get_file_sizes(base_dir, convert_slashes=True):
|
|
492
694
|
"""
|
|
493
|
-
|
|
695
|
+
Gets sizes recursively for all files in base_dir, returning a dict mapping
|
|
494
696
|
relative filenames to size.
|
|
495
697
|
|
|
496
698
|
TODO: merge the functionality here with parallel_get_file_sizes, which uses slightly
|
|
497
699
|
different semantics.
|
|
700
|
+
|
|
701
|
+
Args:
|
|
702
|
+
base_dir (str): folder within which we want all file sizes
|
|
703
|
+
convert_slashes (bool, optional): force forward slashes in return strings,
|
|
704
|
+
otherwise uses the native path separator
|
|
705
|
+
|
|
706
|
+
Returns:
|
|
707
|
+
dict: dictionary mapping filenames to file sizes in bytes
|
|
498
708
|
"""
|
|
499
709
|
|
|
500
710
|
relative_filenames = recursive_file_list(base_dir, convert_slashes=convert_slashes,
|
|
@@ -527,8 +737,19 @@ def parallel_get_file_sizes(filenames, max_workers=16,
|
|
|
527
737
|
use_threads=True, verbose=False,
|
|
528
738
|
recursive=True):
|
|
529
739
|
"""
|
|
530
|
-
|
|
740
|
+
Returns a dictionary mapping every file in [filenames] to the corresponding file size,
|
|
531
741
|
or None for errors. If [filenames] is a folder, will enumerate the folder (optionally recursively).
|
|
742
|
+
|
|
743
|
+
Args:
|
|
744
|
+
filenames (list or str): list of filenames for which we should read sizes, or a folder
|
|
745
|
+
within which we should read all file sizes recursively
|
|
746
|
+
max_workers (int, optional): number of concurrent workers; set to <=1 to disable parallelism
|
|
747
|
+
use_threads (bool, optional): whether to use threads (True) or processes (False) for
|
|
748
|
+
parallel copying; ignored if max_workers <= 1
|
|
749
|
+
verbose (bool, optional): enable additionald debug output
|
|
750
|
+
|
|
751
|
+
Returns:
|
|
752
|
+
dict: dictionary mapping filenames to file sizes in bytes
|
|
532
753
|
"""
|
|
533
754
|
|
|
534
755
|
n_workers = min(max_workers,len(filenames))
|
|
@@ -555,7 +776,18 @@ def parallel_get_file_sizes(filenames, max_workers=16,
|
|
|
555
776
|
|
|
556
777
|
def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
|
|
557
778
|
"""
|
|
558
|
-
|
|
779
|
+
Zips a single file.
|
|
780
|
+
|
|
781
|
+
Args:
|
|
782
|
+
input_fn (str): file to zip
|
|
783
|
+
output_fn (str, optional): target zipfile; if this is None, we'll use
|
|
784
|
+
[input_fn].zip
|
|
785
|
+
overwrite (bool, optional): whether to overwrite an existing target file
|
|
786
|
+
verbose (bool, optional): enable existing debug console output
|
|
787
|
+
compresslevel (int, optional): compression level to use, between 0 and 9
|
|
788
|
+
|
|
789
|
+
Returns:
|
|
790
|
+
str: the output zipfile, whether we created it or determined that it already exists
|
|
559
791
|
"""
|
|
560
792
|
|
|
561
793
|
basename = os.path.basename(input_fn)
|
|
@@ -565,7 +797,7 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
|
|
|
565
797
|
|
|
566
798
|
if (not overwrite) and (os.path.isfile(output_fn)):
|
|
567
799
|
print('Skipping existing file {}'.format(output_fn))
|
|
568
|
-
return
|
|
800
|
+
return output_fn
|
|
569
801
|
|
|
570
802
|
if verbose:
|
|
571
803
|
print('Zipping {} to {} with level {}'.format(input_fn,output_fn,compresslevel))
|
|
@@ -577,17 +809,70 @@ def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compressl
|
|
|
577
809
|
return output_fn
|
|
578
810
|
|
|
579
811
|
|
|
812
|
+
def add_files_to_single_tar_file(input_files, output_fn, arc_name_base,
|
|
813
|
+
overwrite=False, verbose=False, mode='x'):
|
|
814
|
+
"""
|
|
815
|
+
Adds all the files in [input_files] to the tar file [output_fn].
|
|
816
|
+
Archive names are relative to arc_name_base.
|
|
817
|
+
|
|
818
|
+
Args:
|
|
819
|
+
input_files (list): list of absolute filenames to include in the .tar file
|
|
820
|
+
output_fn (str): .tar file to create
|
|
821
|
+
arc_name_base (str): absolute folder from which relative paths should be determined;
|
|
822
|
+
behavior is undefined if there are files in [input_files] that don't live within
|
|
823
|
+
[arc_name_base]
|
|
824
|
+
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
825
|
+
verbose (bool, optional): enable additional debug console output
|
|
826
|
+
mode (str, optional): compression type, can be 'x' (no compression), 'x:gz', or 'x:bz2'.
|
|
827
|
+
|
|
828
|
+
Returns:
|
|
829
|
+
str: the output tar file, whether we created it or determined that it already exists
|
|
830
|
+
"""
|
|
831
|
+
|
|
832
|
+
if os.path.isfile(output_fn):
|
|
833
|
+
if not overwrite:
|
|
834
|
+
print('Tar file {} exists, skipping'.format(output_fn))
|
|
835
|
+
return output_fn
|
|
836
|
+
else:
|
|
837
|
+
print('Tar file {} exists, deleting and re-creating'.format(output_fn))
|
|
838
|
+
os.remove(output_fn)
|
|
839
|
+
|
|
840
|
+
if verbose:
|
|
841
|
+
print('Adding {} files to {} (mode {})'.format(
|
|
842
|
+
len(input_files),output_fn,mode))
|
|
843
|
+
|
|
844
|
+
with tarfile.open(output_fn,mode) as tarf:
|
|
845
|
+
for input_fn_abs in tqdm(input_files,disable=(not verbose)):
|
|
846
|
+
input_fn_relative = os.path.relpath(input_fn_abs,arc_name_base)
|
|
847
|
+
tarf.add(input_fn_abs,arcname=input_fn_relative)
|
|
848
|
+
|
|
849
|
+
return output_fn
|
|
850
|
+
|
|
851
|
+
|
|
580
852
|
def zip_files_into_single_zipfile(input_files, output_fn, arc_name_base,
|
|
581
853
|
overwrite=False, verbose=False, compresslevel=9):
|
|
582
854
|
"""
|
|
583
855
|
Zip all the files in [input_files] into [output_fn]. Archive names are relative to
|
|
584
856
|
arc_name_base.
|
|
857
|
+
|
|
858
|
+
Args:
|
|
859
|
+
input_files (list): list of absolute filenames to include in the .tar file
|
|
860
|
+
output_fn (str): .tar file to create
|
|
861
|
+
arc_name_base (str): absolute folder from which relative paths should be determined;
|
|
862
|
+
behavior is undefined if there are files in [input_files] that don't live within
|
|
863
|
+
[arc_name_base]
|
|
864
|
+
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
865
|
+
verbose (bool, optional): enable additional debug console output
|
|
866
|
+
compresslevel (int, optional): compression level to use, between 0 and 9
|
|
867
|
+
|
|
868
|
+
Returns:
|
|
869
|
+
str: the output zipfile, whether we created it or determined that it already exists
|
|
585
870
|
"""
|
|
586
871
|
|
|
587
872
|
if not overwrite:
|
|
588
873
|
if os.path.isfile(output_fn):
|
|
589
874
|
print('Zip file {} exists, skipping'.format(output_fn))
|
|
590
|
-
return
|
|
875
|
+
return output_fn
|
|
591
876
|
|
|
592
877
|
if verbose:
|
|
593
878
|
print('Zipping {} files to {} (compression level {})'.format(
|
|
@@ -609,7 +894,15 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
609
894
|
Recursively zip everything in [input_folder] into a single zipfile, storing outputs as relative
|
|
610
895
|
paths.
|
|
611
896
|
|
|
612
|
-
|
|
897
|
+
Args:
|
|
898
|
+
input_folder (str): folder to zip
|
|
899
|
+
output_fn (str, optional): output filename; if this is None, we'll write to [input_folder].zip
|
|
900
|
+
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
901
|
+
verbose (bool, optional): enable additional debug console output
|
|
902
|
+
compresslevel (int, optional): compression level to use, between 0 and 9
|
|
903
|
+
|
|
904
|
+
Returns:
|
|
905
|
+
str: the output zipfile, whether we created it or determined that it already exists
|
|
613
906
|
"""
|
|
614
907
|
|
|
615
908
|
if output_fn is None:
|
|
@@ -640,8 +933,17 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
|
|
|
640
933
|
def parallel_zip_files(input_files, max_workers=16, use_threads=True, compresslevel=9,
|
|
641
934
|
overwrite=False, verbose=False):
|
|
642
935
|
"""
|
|
643
|
-
|
|
936
|
+
Zips one or more files to separate output files in parallel, leaving the
|
|
644
937
|
original files in place. Each file is zipped to [filename].zip.
|
|
938
|
+
|
|
939
|
+
Args:
|
|
940
|
+
input_file (str): list of files to zip
|
|
941
|
+
max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
|
|
942
|
+
use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
|
|
943
|
+
max_workers <= 1
|
|
944
|
+
compresslevel (int, optional): zip compression level between 0 and 9
|
|
945
|
+
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
946
|
+
verbose (bool, optional): enable additional debug console output
|
|
645
947
|
"""
|
|
646
948
|
|
|
647
949
|
n_workers = min(max_workers,len(input_files))
|
|
@@ -661,8 +963,17 @@ def parallel_zip_files(input_files, max_workers=16, use_threads=True, compressle
|
|
|
661
963
|
def parallel_zip_folders(input_folders, max_workers=16, use_threads=True,
|
|
662
964
|
compresslevel=9, overwrite=False, verbose=False):
|
|
663
965
|
"""
|
|
664
|
-
|
|
966
|
+
Zips one or more folders to separate output files in parallel, leaving the
|
|
665
967
|
original folders in place. Each folder is zipped to [folder_name].zip.
|
|
968
|
+
|
|
969
|
+
Args:
|
|
970
|
+
input_folder (list): list of folders to zip
|
|
971
|
+
max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
|
|
972
|
+
use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
|
|
973
|
+
max_workers <= 1
|
|
974
|
+
compresslevel (int, optional): zip compression level between 0 and 9
|
|
975
|
+
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
976
|
+
verbose (bool, optional): enable additional debug console output
|
|
666
977
|
"""
|
|
667
978
|
|
|
668
979
|
n_workers = min(max_workers,len(input_folders))
|
|
@@ -684,10 +995,20 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
|
|
|
684
995
|
compresslevel=9,overwrite=False,required_token=None,verbose=False,
|
|
685
996
|
exclude_zip=True):
|
|
686
997
|
"""
|
|
687
|
-
|
|
688
|
-
folder into a single zipfile, use zip_folder().
|
|
998
|
+
Zips each file in [folder_name] to its own zipfile (filename.zip), optionally recursing. To
|
|
999
|
+
zip a whole folder into a single zipfile, use zip_folder().
|
|
689
1000
|
|
|
690
|
-
|
|
1001
|
+
Args:
|
|
1002
|
+
folder_name (str): the folder within which we should zip files
|
|
1003
|
+
recursive (bool, optional): whether to recurse within [folder_name]
|
|
1004
|
+
max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
|
|
1005
|
+
use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
|
|
1006
|
+
max_workers <= 1
|
|
1007
|
+
compresslevel (int, optional): zip compression level between 0 and 9
|
|
1008
|
+
overwrite (bool, optional): whether to overwrite an existing .tar file
|
|
1009
|
+
required_token (str, optional): only zip files whose names contain this string
|
|
1010
|
+
verbose (bool, optional): enable additional debug console output
|
|
1011
|
+
exclude_zip (bool, optional): skip files ending in .zip
|
|
691
1012
|
"""
|
|
692
1013
|
|
|
693
1014
|
assert os.path.isdir(folder_name), '{} is not a folder'.format(folder_name)
|
|
@@ -707,8 +1028,13 @@ def zip_each_file_in_folder(folder_name,recursive=False,max_workers=16,use_threa
|
|
|
707
1028
|
|
|
708
1029
|
def unzip_file(input_file, output_folder=None):
|
|
709
1030
|
"""
|
|
710
|
-
|
|
711
|
-
the input file
|
|
1031
|
+
Unzips a zipfile to the specified output folder, defaulting to the same location as
|
|
1032
|
+
the input file.
|
|
1033
|
+
|
|
1034
|
+
Args:
|
|
1035
|
+
input_file (str): zipfile to unzip
|
|
1036
|
+
output_folder (str, optional): folder to which we should unzip [input_file], defaults
|
|
1037
|
+
to unzipping to the folder where [input_file] lives
|
|
712
1038
|
"""
|
|
713
1039
|
|
|
714
1040
|
if output_folder is None:
|