dcicutils 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- dcicutils/command_utils.py +69 -1
- dcicutils/creds_utils.py +1 -1
- dcicutils/ff_utils.py +4 -1
- dcicutils/file_utils.py +250 -41
- dcicutils/http_utils.py +39 -0
- dcicutils/misc_utils.py +82 -5
- dcicutils/portal_object_utils.py +24 -89
- dcicutils/portal_utils.py +234 -36
- dcicutils/schema_utils.py +1 -1
- dcicutils/scripts/view_portal_object.py +87 -5
- dcicutils/structured_data.py +59 -17
- dcicutils/submitr/ref_lookup_strategy.py +31 -25
- dcicutils/tmpfile_utils.py +50 -10
- dcicutils/zip_utils.py +27 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b1.dist-info}/METADATA +6 -4
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b1.dist-info}/RECORD +19 -18
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b1.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b1.dist-info}/WHEEL +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b1.dist-info}/entry_points.txt +0 -0
dcicutils/command_utils.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
from __future__ import annotations
|
1
2
|
import contextlib
|
2
3
|
import functools
|
3
4
|
import glob
|
@@ -7,7 +8,7 @@ import re
|
|
7
8
|
import requests
|
8
9
|
import subprocess
|
9
10
|
|
10
|
-
from typing import Optional
|
11
|
+
from typing import Callable, Optional
|
11
12
|
from .exceptions import InvalidParameterError
|
12
13
|
from .lang_utils import there_are
|
13
14
|
from .misc_utils import INPUT, PRINT, environ_bool, print_error_message, decorator
|
@@ -384,3 +385,70 @@ def script_catch_errors():
|
|
384
385
|
message = str(e) # Note: We ignore the type, which isn't intended to be shown.
|
385
386
|
PRINT(message)
|
386
387
|
exit(1)
|
388
|
+
|
389
|
+
|
390
|
+
class Question:
|
391
|
+
"""
|
392
|
+
Supports asking the user (via stdin) a yes/no question, possibly repeatedly; and after
|
393
|
+
some maximum number times of the same answer in a row (consecutively), then asks them
|
394
|
+
if they want to automatically give that same answer to any/all subsequent questions.
|
395
|
+
Supports static/global list of such Question instances, hashed (only) by the question text.
|
396
|
+
"""
|
397
|
+
_static_instances = {}
|
398
|
+
|
399
|
+
@staticmethod
|
400
|
+
def instance(question: Optional[str] = None,
|
401
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> Question:
|
402
|
+
question = question if isinstance(question, str) else ""
|
403
|
+
if not (instance := Question._static_instances.get(question)):
|
404
|
+
Question._static_instances[question] = (instance := Question(question, max=max, printf=printf))
|
405
|
+
return instance
|
406
|
+
|
407
|
+
@staticmethod
|
408
|
+
def yes(question: Optional[str] = None,
|
409
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> bool:
|
410
|
+
return Question.instance(question, max=max, printf=printf).ask()
|
411
|
+
|
412
|
+
def __init__(self, question: Optional[str] = None,
|
413
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> None:
|
414
|
+
self._question = question if isinstance(question, str) else ""
|
415
|
+
self._max = max if isinstance(max, int) and max > 0 else None
|
416
|
+
self._print = printf if callable(printf) else print
|
417
|
+
self._yes_consecutive_count = 0
|
418
|
+
self._no_consecutive_count = 0
|
419
|
+
self._yes_automatic = False
|
420
|
+
self._no_automatic = False
|
421
|
+
|
422
|
+
def ask(self, question: Optional[str] = None) -> bool:
|
423
|
+
|
424
|
+
def question_automatic(value: str) -> bool:
|
425
|
+
nonlocal self
|
426
|
+
RARROW = "▶"
|
427
|
+
LARROW = "◀"
|
428
|
+
if yes_or_no(f"{RARROW}{RARROW}{RARROW}"
|
429
|
+
f" Do you want to answer {value} to all such questions?"
|
430
|
+
f" {LARROW}{LARROW}{LARROW}"):
|
431
|
+
return True
|
432
|
+
self._yes_consecutive_count = 0
|
433
|
+
self._no_consecutive_count = 0
|
434
|
+
|
435
|
+
if self._yes_automatic:
|
436
|
+
return True
|
437
|
+
elif self._no_automatic:
|
438
|
+
return False
|
439
|
+
elif yes_or_no((question if isinstance(question, str) else "") or self._question or "Undefined question"):
|
440
|
+
self._yes_consecutive_count += 1
|
441
|
+
self._no_consecutive_count = 0
|
442
|
+
if (self._no_consecutive_count == 0) and self._max and (self._yes_consecutive_count >= self._max):
|
443
|
+
# Have reached the maximum number of consecutive YES answers; ask if YES to all subsequent.
|
444
|
+
if question_automatic("YES"):
|
445
|
+
self._yes_automatic = True
|
446
|
+
return True
|
447
|
+
else:
|
448
|
+
self._no_consecutive_count += 1
|
449
|
+
self._yes_consecutive_count = 0
|
450
|
+
if (self._yes_consecutive_count == 0) and self._max and (self._no_consecutive_count >= self._max):
|
451
|
+
# Have reached the maximum number of consecutive NO answers; ask if NO to all subsequent.
|
452
|
+
if question_automatic("NO"):
|
453
|
+
self._no_automatic = True
|
454
|
+
return False
|
dcicutils/creds_utils.py
CHANGED
@@ -170,7 +170,7 @@ class KeyManager:
|
|
170
170
|
raise ValueError(f"A KeyManager named {name!r} has already been defined.")
|
171
171
|
key_manager_class._init_class_variables()
|
172
172
|
key_manager_class._REGISTERED = True
|
173
|
-
_KEY_MANAGERS[name] =
|
173
|
+
_KEY_MANAGERS[name] = key_manager_class
|
174
174
|
return key_manager_class
|
175
175
|
return _register_class
|
176
176
|
|
dcicutils/ff_utils.py
CHANGED
@@ -895,9 +895,12 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
|
|
895
895
|
used to create the generator.
|
896
896
|
Should NOT be used directly
|
897
897
|
"""
|
898
|
+
def get_es_host_local() -> Optional[str]:
|
899
|
+
return os.environ.get("ES_HOST_LOCAL", None)
|
898
900
|
health = get_health_page(key=auth)
|
899
901
|
if es_client is None:
|
900
|
-
es_url
|
902
|
+
if not (es_url := get_es_host_local()):
|
903
|
+
es_url = health['elasticsearch']
|
901
904
|
es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
|
902
905
|
namespace_star = health.get('namespace', '') + '*'
|
903
906
|
# match all given uuids to _id fields
|
dcicutils/file_utils.py
CHANGED
@@ -1,13 +1,23 @@
|
|
1
1
|
import glob
|
2
|
+
import hashlib
|
3
|
+
import io
|
2
4
|
import os
|
3
5
|
import pathlib
|
6
|
+
from datetime import datetime
|
7
|
+
import random
|
8
|
+
import string
|
9
|
+
from tempfile import gettempdir as get_temporary_directory
|
4
10
|
from typing import List, Optional, Union
|
11
|
+
from uuid import uuid4 as uuid
|
12
|
+
|
13
|
+
HOME_DIRECTORY = str(pathlib.Path().home())
|
5
14
|
|
6
15
|
|
7
16
|
def search_for_file(file: str,
|
8
|
-
location: Union[str, Optional[List[str]]] = None,
|
17
|
+
location: Union[str, pathlib.PosixPath, Optional[List[Union[str, pathlib.PosixPath]]]] = None,
|
9
18
|
recursive: bool = False,
|
10
|
-
single: bool = False
|
19
|
+
single: bool = False,
|
20
|
+
order: bool = True) -> Union[List[str], Optional[str]]:
|
11
21
|
"""
|
12
22
|
Searches for the existence of the given file name, first directly in the given directory or list
|
13
23
|
of directories, if specified, and if not then just in the current (working) directory; if the
|
@@ -16,43 +26,242 @@ def search_for_file(file: str,
|
|
16
26
|
first file which is found is returns (as a string), or None if none; if the single flag
|
17
27
|
is False, then all matched files are returned in a list, or and empty list if none.
|
18
28
|
"""
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
if
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
def order_by_fewest_number_of_paths_and_then_alphabetically(paths: List[str]) -> List[str]:
|
30
|
+
def order_by(path: str):
|
31
|
+
return len(path.split(os.path.sep)), path
|
32
|
+
return sorted(paths, key=order_by)
|
33
|
+
|
34
|
+
if not (file and isinstance(file, (str, pathlib.PosixPath))):
|
35
|
+
return None if single is True else []
|
36
|
+
if os.path.isabs(file):
|
37
|
+
if os.path.exists(file):
|
38
|
+
return str(file) if single is True else [str(file)]
|
39
|
+
return None if single is True else []
|
40
|
+
files_found = []
|
41
|
+
if not location:
|
42
|
+
location = ["."]
|
43
|
+
elif isinstance(location, (str, pathlib.PosixPath)):
|
44
|
+
location = [location]
|
45
|
+
elif not isinstance(location, list):
|
46
|
+
location = []
|
47
|
+
location_pruned = []
|
48
|
+
for directory in location:
|
49
|
+
if not isinstance(directory, str):
|
50
|
+
if not isinstance(directory, pathlib.PosixPath):
|
51
|
+
continue
|
52
|
+
directory = str(directory)
|
53
|
+
if not (directory := directory.strip()):
|
54
|
+
continue
|
55
|
+
if os.path.isfile(directory := os.path.abspath(os.path.normpath(directory))):
|
56
|
+
# Actually, allow a file rather then a directory; assume its parent directory was intended.
|
57
|
+
if not (directory := os.path.dirname(directory)):
|
33
58
|
continue
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
+
if directory not in location_pruned:
|
60
|
+
location_pruned.append(directory)
|
61
|
+
location = location_pruned
|
62
|
+
for directory in location:
|
63
|
+
if os.path.exists(os.path.join(directory, file)):
|
64
|
+
file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
|
65
|
+
if single is True:
|
66
|
+
return file_found
|
67
|
+
if file_found not in files_found:
|
68
|
+
files_found.append(file_found)
|
69
|
+
if recursive is True:
|
70
|
+
for directory in location:
|
71
|
+
if not directory.endswith("/**") and not file.startswith("**/"):
|
72
|
+
path = f"{directory}/**/{file}"
|
73
|
+
else:
|
74
|
+
path = f"{directory}/{file}"
|
75
|
+
files = glob.glob(path, recursive=True if recursive is True else False)
|
76
|
+
if files:
|
77
|
+
for file_found in files:
|
78
|
+
file_found = os.path.abspath(file_found)
|
79
|
+
if single is True:
|
80
|
+
return file_found
|
81
|
+
if file_found not in files_found:
|
82
|
+
files_found.append(file_found)
|
83
|
+
if single is True:
|
84
|
+
return files_found[0] if files_found else None
|
85
|
+
elif order is True:
|
86
|
+
return order_by_fewest_number_of_paths_and_then_alphabetically(files_found)
|
87
|
+
else:
|
88
|
+
return files_found
|
89
|
+
|
90
|
+
|
91
|
+
def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expand_home: Optional[bool] = None) -> str:
|
92
|
+
"""
|
93
|
+
Normalizes the given path value and returns the result; does things like remove redundant
|
94
|
+
consecutive directory separators and redundant parent paths. If the given absolute argument
|
95
|
+
is True than converts the path to an absolute path. If the given expand_home argument is False
|
96
|
+
and if the path can reasonably be represented with a home directory indicator (i.e. "~"), then
|
97
|
+
converts it to such. If the expand_home argument is True and path starts with the home directory
|
98
|
+
indicator (i.e. "~") then expands it to the actual (absolute) home path of the caller. If the
|
99
|
+
given path value is not actually even a string (or pathlib.Path) then returns an empty string.
|
100
|
+
"""
|
101
|
+
if isinstance(value, pathlib.Path):
|
102
|
+
value = str(value)
|
103
|
+
elif not isinstance(value, str):
|
104
|
+
return ""
|
105
|
+
if not (value := value.strip()) or not (value := os.path.normpath(value)):
|
106
|
+
return ""
|
107
|
+
if expand_home is True:
|
108
|
+
value = os.path.expanduser(value)
|
109
|
+
elif (expand_home is False) and (os.name == "posix"):
|
110
|
+
if value.startswith(home := HOME_DIRECTORY + os.sep):
|
111
|
+
value = "~/" + value[len(home):]
|
112
|
+
elif value == HOME_DIRECTORY:
|
113
|
+
value = "~"
|
114
|
+
if absolute is True:
|
115
|
+
value = os.path.abspath(value)
|
116
|
+
return value
|
117
|
+
|
118
|
+
|
119
|
+
def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
|
120
|
+
try:
|
121
|
+
return os.path.getsize(file) if isinstance(file, str) else None
|
122
|
+
except Exception:
|
123
|
+
if raise_exception is True:
|
124
|
+
raise
|
125
|
+
return None
|
126
|
+
|
127
|
+
|
128
|
+
def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
|
129
|
+
try:
|
130
|
+
return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
|
131
|
+
except Exception:
|
132
|
+
if raise_exception is True:
|
133
|
+
raise
|
134
|
+
return None
|
135
|
+
|
136
|
+
|
137
|
+
def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
|
138
|
+
"""
|
139
|
+
Returns True iff the contents of the two given files are exactly the same.
|
140
|
+
"""
|
141
|
+
try:
|
142
|
+
with open(filea, "rb") as fa:
|
143
|
+
with open(fileb, "rb") as fb:
|
144
|
+
chunk_size = 4096
|
145
|
+
while True:
|
146
|
+
chunka = fa.read(chunk_size)
|
147
|
+
chunkb = fb.read(chunk_size)
|
148
|
+
if chunka != chunkb:
|
149
|
+
return False
|
150
|
+
if not chunka:
|
151
|
+
break
|
152
|
+
return True
|
153
|
+
except Exception:
|
154
|
+
if raise_exception is True:
|
155
|
+
raise
|
156
|
+
return False
|
157
|
+
|
158
|
+
|
159
|
+
def compute_file_md5(file: str, raise_exception: bool = True) -> str:
|
160
|
+
"""
|
161
|
+
Returns the md5 checksum for the given file.
|
162
|
+
"""
|
163
|
+
if not isinstance(file, str):
|
164
|
+
return ""
|
165
|
+
try:
|
166
|
+
md5 = hashlib.md5()
|
167
|
+
with open(file, "rb") as file:
|
168
|
+
for chunk in iter(lambda: file.read(4096), b""):
|
169
|
+
md5.update(chunk)
|
170
|
+
return md5.hexdigest()
|
171
|
+
except Exception:
|
172
|
+
if raise_exception is True:
|
173
|
+
raise
|
174
|
+
return ""
|
175
|
+
|
176
|
+
|
177
|
+
def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
|
178
|
+
"""
|
179
|
+
Returns the AWS S3 "etag" for the given file; this value is md5-like but
|
180
|
+
not the same as a normal md5. We use this to compare that a file in S3
|
181
|
+
appears to be the exact the same file as a local file.
|
182
|
+
"""
|
183
|
+
try:
|
184
|
+
with io.open(file, "rb") as f:
|
185
|
+
return _compute_file_etag(f)
|
186
|
+
except Exception:
|
187
|
+
if raise_exception is True:
|
188
|
+
raise
|
189
|
+
return None
|
190
|
+
|
191
|
+
|
192
|
+
def _compute_file_etag(f: io.BufferedReader) -> str:
|
193
|
+
# See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
|
194
|
+
MULTIPART_THRESHOLD = 8388608
|
195
|
+
MULTIPART_CHUNKSIZE = 8388608
|
196
|
+
# BUFFER_SIZE = 1048576
|
197
|
+
# Verify some assumptions are correct
|
198
|
+
# assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
|
199
|
+
# assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
|
200
|
+
# assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
|
201
|
+
hash = hashlib.md5()
|
202
|
+
read = 0
|
203
|
+
chunks = None
|
204
|
+
while True:
|
205
|
+
# Read some from stdin, if we're at the end, stop reading
|
206
|
+
bits = f.read(1048576)
|
207
|
+
if len(bits) == 0:
|
208
|
+
break
|
209
|
+
read += len(bits)
|
210
|
+
hash.update(bits)
|
211
|
+
if chunks is None:
|
212
|
+
# We're handling a multi-part upload, so switch to calculating
|
213
|
+
# hashes of each chunk
|
214
|
+
if read >= MULTIPART_THRESHOLD:
|
215
|
+
chunks = b''
|
216
|
+
if chunks is not None:
|
217
|
+
if (read % MULTIPART_CHUNKSIZE) == 0:
|
218
|
+
# Dont with a chunk, add it to the list of hashes to hash later
|
219
|
+
chunks += hash.digest()
|
220
|
+
hash = hashlib.md5()
|
221
|
+
if chunks is None:
|
222
|
+
# Normal upload, just output the MD5 hash
|
223
|
+
etag = hash.hexdigest()
|
224
|
+
else:
|
225
|
+
# Multipart upload, need to output the hash of the hashes
|
226
|
+
if (read % MULTIPART_CHUNKSIZE) != 0:
|
227
|
+
# Add the last part if we have a partial chunk
|
228
|
+
chunks += hash.digest()
|
229
|
+
etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
|
230
|
+
return etag
|
231
|
+
|
232
|
+
|
233
|
+
def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
234
|
+
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
235
|
+
"""
|
236
|
+
Write to the given file (name/path) some random content. If the given file is None then writes
|
237
|
+
to a temporary file. In either case, returns the file written to. The of bytes written is 1024
|
238
|
+
by default be can be specified with the nbytes argument; default to writing ASCII text but if
|
239
|
+
the binary argument is True then writes binary data as well; if not binary the content is in
|
240
|
+
lines of 80 characters each; use the line_length argumetn in this case to change the line length.
|
241
|
+
"""
|
242
|
+
if not isinstance(nbytes, int) or nbytes < 0:
|
243
|
+
nbytes = 0
|
244
|
+
if not isinstance(file, str) or not file:
|
245
|
+
if not isinstance(prefix, str):
|
246
|
+
prefix = ""
|
247
|
+
if not isinstance(suffix, str):
|
248
|
+
suffix = ""
|
249
|
+
file = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
|
250
|
+
file = os.path.join(get_temporary_directory(), file)
|
251
|
+
with open(file, "wb" if binary is True else "w") as f:
|
252
|
+
if binary is True:
|
253
|
+
f.write(os.urandom(nbytes))
|
254
|
+
else:
|
255
|
+
if (not isinstance(line_length, int)) or (line_length < 1):
|
256
|
+
line_length = 80
|
257
|
+
line_length += 1
|
258
|
+
nlines = nbytes // line_length
|
259
|
+
nremainder = nbytes % line_length
|
260
|
+
for n in range(nlines):
|
261
|
+
f.write("".join(random.choices(string.ascii_letters + string.digits, k=line_length - 1)))
|
262
|
+
f.write("\n")
|
263
|
+
if nremainder > 1:
|
264
|
+
f.write("".join(random.choices(string.ascii_letters + string.digits, k=nremainder - 1)))
|
265
|
+
if nremainder > 0:
|
266
|
+
f.write("\n")
|
267
|
+
return file
|
dcicutils/http_utils.py
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
from contextlib import contextmanager
|
2
|
+
import requests
|
3
|
+
from typing import Callable, Optional
|
4
|
+
from dcicutils.tmpfile_utils import temporary_file
|
5
|
+
|
6
|
+
|
7
|
+
@contextmanager
|
8
|
+
def download(url: str, suffix: Optional[str] = None, binary: bool = True,
|
9
|
+
progress: Optional[Callable] = None) -> Optional[str]:
|
10
|
+
"""
|
11
|
+
Context manager to download the given URL into a temporary file and yields the file
|
12
|
+
path to it. An optional file suffix may be specified for this temporary file name.
|
13
|
+
Defaults to binary file mode; if not desired then pass False as the binary argument.
|
14
|
+
"""
|
15
|
+
with temporary_file(suffix=suffix) as file:
|
16
|
+
download_to(url, file, binary=binary, progress=progress)
|
17
|
+
yield file
|
18
|
+
|
19
|
+
|
20
|
+
def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
|
21
|
+
"""
|
22
|
+
Download the given URL into the given file. Defaults to binary
|
23
|
+
file mode; if not desired then pass False as the binary argument.
|
24
|
+
"""
|
25
|
+
if not callable(progress):
|
26
|
+
progress = None
|
27
|
+
response = requests.get(url, stream=True)
|
28
|
+
if progress:
|
29
|
+
nbytes = 0
|
30
|
+
nbytes_total = None
|
31
|
+
if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
|
32
|
+
nbytes_total = int(content_length)
|
33
|
+
with open(file, "wb" if binary is True else "w") as f:
|
34
|
+
for chunk in response.iter_content(chunk_size=8192):
|
35
|
+
if chunk:
|
36
|
+
f.write(chunk)
|
37
|
+
if progress:
|
38
|
+
nbytes += len(chunk)
|
39
|
+
progress(nbytes, nbytes_total)
|
dcicutils/misc_utils.py
CHANGED
@@ -3,6 +3,7 @@ This file contains functions that might be generally useful.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from collections import namedtuple
|
6
|
+
import appdirs
|
6
7
|
import contextlib
|
7
8
|
import datetime
|
8
9
|
import functools
|
@@ -13,10 +14,12 @@ import json
|
|
13
14
|
import logging
|
14
15
|
import math
|
15
16
|
import os
|
17
|
+
import platform
|
16
18
|
import pytz
|
17
19
|
import re
|
18
20
|
import rfc3986.validators
|
19
21
|
import rfc3986.exceptions
|
22
|
+
import shortuuid
|
20
23
|
import time
|
21
24
|
import uuid
|
22
25
|
import warnings
|
@@ -1152,7 +1155,8 @@ def remove_suffix(suffix: str, text: str, required: bool = False):
|
|
1152
1155
|
|
1153
1156
|
def remove_empty_properties(data: Optional[Union[list, dict]],
|
1154
1157
|
isempty: Optional[Callable] = None,
|
1155
|
-
isempty_array_element: Optional[Callable] = None
|
1158
|
+
isempty_array_element: Optional[Callable] = None,
|
1159
|
+
raise_exception_on_nonempty_array_element_after_empty: bool = False) -> None:
|
1156
1160
|
def _isempty(value: Any) -> bool: # noqa
|
1157
1161
|
return isempty(value) if callable(isempty) else value in [None, "", {}, []]
|
1158
1162
|
if isinstance(data, dict):
|
@@ -1160,11 +1164,22 @@ def remove_empty_properties(data: Optional[Union[list, dict]],
|
|
1160
1164
|
if _isempty(value := data[key]):
|
1161
1165
|
del data[key]
|
1162
1166
|
else:
|
1163
|
-
remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element
|
1167
|
+
remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element,
|
1168
|
+
raise_exception_on_nonempty_array_element_after_empty= # noqa
|
1169
|
+
raise_exception_on_nonempty_array_element_after_empty)
|
1164
1170
|
elif isinstance(data, list):
|
1165
1171
|
for item in data:
|
1166
|
-
remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element
|
1172
|
+
remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element,
|
1173
|
+
raise_exception_on_nonempty_array_element_after_empty= # noqa
|
1174
|
+
raise_exception_on_nonempty_array_element_after_empty)
|
1167
1175
|
if callable(isempty_array_element):
|
1176
|
+
if raise_exception_on_nonempty_array_element_after_empty is True:
|
1177
|
+
empty_element_seen = False
|
1178
|
+
for item in data:
|
1179
|
+
if not empty_element_seen and isempty_array_element(item):
|
1180
|
+
empty_element_seen = True
|
1181
|
+
elif empty_element_seen and not isempty_array_element(item):
|
1182
|
+
raise Exception("Non-empty element found after empty element.")
|
1168
1183
|
data[:] = [item for item in data if not isempty_array_element(item)]
|
1169
1184
|
|
1170
1185
|
|
@@ -1522,7 +1537,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
|
|
1522
1537
|
def create_dict(**kwargs) -> dict:
|
1523
1538
|
result = {}
|
1524
1539
|
for name in kwargs:
|
1525
|
-
if kwargs[name]:
|
1540
|
+
if not (kwargs[name] is None):
|
1526
1541
|
result[name] = kwargs[name]
|
1527
1542
|
return result
|
1528
1543
|
|
@@ -2548,6 +2563,19 @@ def normalize_spaces(value: str) -> str:
|
|
2548
2563
|
return re.sub(r"\s+", " ", value).strip()
|
2549
2564
|
|
2550
2565
|
|
2566
|
+
def normalize_string(value: Optional[str]) -> Optional[str]:
|
2567
|
+
"""
|
2568
|
+
Strips leading/trailing spaces, and converts multiple consecutive spaces to a single space
|
2569
|
+
in the given string value and returns the result. If the given value is None returns an
|
2570
|
+
empty string. If the given value is not actually even a string then return None.
|
2571
|
+
"""
|
2572
|
+
if value is None:
|
2573
|
+
return ""
|
2574
|
+
elif isinstance(value, str):
|
2575
|
+
return re.sub(r"\s+", " ", value).strip()
|
2576
|
+
return None
|
2577
|
+
|
2578
|
+
|
2551
2579
|
def find_nth_from_end(string: str, substring: str, nth: int) -> int:
|
2552
2580
|
"""
|
2553
2581
|
Returns the index of the nth occurrence of the given substring within
|
@@ -2590,7 +2618,11 @@ def format_size(nbytes: Union[int, float], precision: int = 2, nospace: bool = F
|
|
2590
2618
|
nbytes = int(nbytes)
|
2591
2619
|
return f"{nbytes} byte{'s' if nbytes != 1 else ''}"
|
2592
2620
|
unit = (UNITS_TERSE if terse else UNITS)[index]
|
2593
|
-
|
2621
|
+
size = f"{nbytes:.{precision}f}"
|
2622
|
+
if size.endswith(f".{'0' * precision}"):
|
2623
|
+
# Tidy up extraneous zeros.
|
2624
|
+
size = size[:-(precision - 1)]
|
2625
|
+
return f"{size}{'' if nospace else ' '}{unit}"
|
2594
2626
|
|
2595
2627
|
|
2596
2628
|
def format_duration(seconds: Union[int, float]) -> str:
|
@@ -2670,3 +2702,48 @@ class JsonLinesReader:
|
|
2670
2702
|
yield line
|
2671
2703
|
else:
|
2672
2704
|
raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
|
2705
|
+
|
2706
|
+
|
2707
|
+
def get_app_specific_directory() -> str:
|
2708
|
+
"""
|
2709
|
+
Returns the standard system application specific directory:
|
2710
|
+
- On MacOS this directory: is: ~/Library/Application Support
|
2711
|
+
- On Linux this directory is: ~/.local/share
|
2712
|
+
- On Windows this directory is: %USERPROFILE%\\AppData\\Local # noqa
|
2713
|
+
N.B. This is has been tested on MacOS and Linux but not on Windows.
|
2714
|
+
"""
|
2715
|
+
return appdirs.user_data_dir()
|
2716
|
+
|
2717
|
+
|
2718
|
+
def get_os_name() -> str:
|
2719
|
+
if os_name := platform.system():
|
2720
|
+
if os_name == "Darwin": return "osx" # noqa
|
2721
|
+
elif os_name == "Linux": return "linux" # noqa
|
2722
|
+
elif os_name == "Windows": return "windows" # noqa
|
2723
|
+
return ""
|
2724
|
+
|
2725
|
+
|
2726
|
+
def get_cpu_architecture_name() -> str:
|
2727
|
+
if os_architecture_name := platform.machine():
|
2728
|
+
if os_architecture_name == "x86_64": return "amd64" # noqa
|
2729
|
+
return os_architecture_name
|
2730
|
+
return ""
|
2731
|
+
|
2732
|
+
|
2733
|
+
def create_uuid(nodash: bool = False, upper: bool = False) -> str:
|
2734
|
+
value = str(uuid.uuid4())
|
2735
|
+
if nodash is True:
|
2736
|
+
value = value.replace("-", "")
|
2737
|
+
if upper is True:
|
2738
|
+
value = value.upper()
|
2739
|
+
return value
|
2740
|
+
|
2741
|
+
|
2742
|
+
def create_short_uuid(length: Optional[int] = None, upper: bool = False):
|
2743
|
+
# Not really techincally a uuid of course.
|
2744
|
+
if (length is None) or (not isinstance(length, int)) or (length < 1):
|
2745
|
+
length = 16
|
2746
|
+
value = shortuuid.ShortUUID().random(length=length)
|
2747
|
+
if upper is True:
|
2748
|
+
value = value.upper()
|
2749
|
+
return value
|