dcicutils 8.9.0.0b0__py3-none-any.whl → 8.9.0.1b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/command_utils.py +69 -1
- dcicutils/creds_utils.py +1 -1
- dcicutils/ff_utils.py +4 -1
- dcicutils/file_utils.py +250 -41
- dcicutils/http_utils.py +39 -0
- dcicutils/misc_utils.py +82 -5
- dcicutils/portal_object_utils.py +24 -89
- dcicutils/portal_utils.py +249 -37
- dcicutils/schema_utils.py +1 -1
- dcicutils/scripts/view_portal_object.py +87 -5
- dcicutils/structured_data.py +59 -17
- dcicutils/submitr/ref_lookup_strategy.py +31 -25
- dcicutils/tmpfile_utils.py +50 -10
- dcicutils/zip_utils.py +27 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/METADATA +6 -4
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/RECORD +19 -18
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/WHEEL +0 -0
- {dcicutils-8.9.0.0b0.dist-info → dcicutils-8.9.0.1b2.dist-info}/entry_points.txt +0 -0
dcicutils/command_utils.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
from __future__ import annotations
|
1
2
|
import contextlib
|
2
3
|
import functools
|
3
4
|
import glob
|
@@ -7,7 +8,7 @@ import re
|
|
7
8
|
import requests
|
8
9
|
import subprocess
|
9
10
|
|
10
|
-
from typing import Optional
|
11
|
+
from typing import Callable, Optional
|
11
12
|
from .exceptions import InvalidParameterError
|
12
13
|
from .lang_utils import there_are
|
13
14
|
from .misc_utils import INPUT, PRINT, environ_bool, print_error_message, decorator
|
@@ -384,3 +385,70 @@ def script_catch_errors():
|
|
384
385
|
message = str(e) # Note: We ignore the type, which isn't intended to be shown.
|
385
386
|
PRINT(message)
|
386
387
|
exit(1)
|
388
|
+
|
389
|
+
|
390
|
+
class Question:
|
391
|
+
"""
|
392
|
+
Supports asking the user (via stdin) a yes/no question, possibly repeatedly; and after
|
393
|
+
some maximum number times of the same answer in a row (consecutively), then asks them
|
394
|
+
if they want to automatically give that same answer to any/all subsequent questions.
|
395
|
+
Supports static/global list of such Question instances, hashed (only) by the question text.
|
396
|
+
"""
|
397
|
+
_static_instances = {}
|
398
|
+
|
399
|
+
@staticmethod
|
400
|
+
def instance(question: Optional[str] = None,
|
401
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> Question:
|
402
|
+
question = question if isinstance(question, str) else ""
|
403
|
+
if not (instance := Question._static_instances.get(question)):
|
404
|
+
Question._static_instances[question] = (instance := Question(question, max=max, printf=printf))
|
405
|
+
return instance
|
406
|
+
|
407
|
+
@staticmethod
|
408
|
+
def yes(question: Optional[str] = None,
|
409
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> bool:
|
410
|
+
return Question.instance(question, max=max, printf=printf).ask()
|
411
|
+
|
412
|
+
def __init__(self, question: Optional[str] = None,
|
413
|
+
max: Optional[int] = None, printf: Optional[Callable] = None) -> None:
|
414
|
+
self._question = question if isinstance(question, str) else ""
|
415
|
+
self._max = max if isinstance(max, int) and max > 0 else None
|
416
|
+
self._print = printf if callable(printf) else print
|
417
|
+
self._yes_consecutive_count = 0
|
418
|
+
self._no_consecutive_count = 0
|
419
|
+
self._yes_automatic = False
|
420
|
+
self._no_automatic = False
|
421
|
+
|
422
|
+
def ask(self, question: Optional[str] = None) -> bool:
|
423
|
+
|
424
|
+
def question_automatic(value: str) -> bool:
|
425
|
+
nonlocal self
|
426
|
+
RARROW = "▶"
|
427
|
+
LARROW = "◀"
|
428
|
+
if yes_or_no(f"{RARROW}{RARROW}{RARROW}"
|
429
|
+
f" Do you want to answer {value} to all such questions?"
|
430
|
+
f" {LARROW}{LARROW}{LARROW}"):
|
431
|
+
return True
|
432
|
+
self._yes_consecutive_count = 0
|
433
|
+
self._no_consecutive_count = 0
|
434
|
+
|
435
|
+
if self._yes_automatic:
|
436
|
+
return True
|
437
|
+
elif self._no_automatic:
|
438
|
+
return False
|
439
|
+
elif yes_or_no((question if isinstance(question, str) else "") or self._question or "Undefined question"):
|
440
|
+
self._yes_consecutive_count += 1
|
441
|
+
self._no_consecutive_count = 0
|
442
|
+
if (self._no_consecutive_count == 0) and self._max and (self._yes_consecutive_count >= self._max):
|
443
|
+
# Have reached the maximum number of consecutive YES answers; ask if YES to all subsequent.
|
444
|
+
if question_automatic("YES"):
|
445
|
+
self._yes_automatic = True
|
446
|
+
return True
|
447
|
+
else:
|
448
|
+
self._no_consecutive_count += 1
|
449
|
+
self._yes_consecutive_count = 0
|
450
|
+
if (self._yes_consecutive_count == 0) and self._max and (self._no_consecutive_count >= self._max):
|
451
|
+
# Have reached the maximum number of consecutive NO answers; ask if NO to all subsequent.
|
452
|
+
if question_automatic("NO"):
|
453
|
+
self._no_automatic = True
|
454
|
+
return False
|
dcicutils/creds_utils.py
CHANGED
@@ -170,7 +170,7 @@ class KeyManager:
|
|
170
170
|
raise ValueError(f"A KeyManager named {name!r} has already been defined.")
|
171
171
|
key_manager_class._init_class_variables()
|
172
172
|
key_manager_class._REGISTERED = True
|
173
|
-
_KEY_MANAGERS[name] =
|
173
|
+
_KEY_MANAGERS[name] = key_manager_class
|
174
174
|
return key_manager_class
|
175
175
|
return _register_class
|
176
176
|
|
dcicutils/ff_utils.py
CHANGED
@@ -895,9 +895,12 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
|
|
895
895
|
used to create the generator.
|
896
896
|
Should NOT be used directly
|
897
897
|
"""
|
898
|
+
def get_es_host_local() -> Optional[str]:
|
899
|
+
return os.environ.get("ES_HOST_LOCAL", None)
|
898
900
|
health = get_health_page(key=auth)
|
899
901
|
if es_client is None:
|
900
|
-
es_url
|
902
|
+
if not (es_url := get_es_host_local()):
|
903
|
+
es_url = health['elasticsearch']
|
901
904
|
es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
|
902
905
|
namespace_star = health.get('namespace', '') + '*'
|
903
906
|
# match all given uuids to _id fields
|
dcicutils/file_utils.py
CHANGED
@@ -1,13 +1,23 @@
|
|
1
1
|
import glob
|
2
|
+
import hashlib
|
3
|
+
import io
|
2
4
|
import os
|
3
5
|
import pathlib
|
6
|
+
from datetime import datetime
|
7
|
+
import random
|
8
|
+
import string
|
9
|
+
from tempfile import gettempdir as get_temporary_directory
|
4
10
|
from typing import List, Optional, Union
|
11
|
+
from uuid import uuid4 as uuid
|
12
|
+
|
13
|
+
HOME_DIRECTORY = str(pathlib.Path().home())
|
5
14
|
|
6
15
|
|
7
16
|
def search_for_file(file: str,
|
8
|
-
location: Union[str, Optional[List[str]]] = None,
|
17
|
+
location: Union[str, pathlib.PosixPath, Optional[List[Union[str, pathlib.PosixPath]]]] = None,
|
9
18
|
recursive: bool = False,
|
10
|
-
single: bool = False
|
19
|
+
single: bool = False,
|
20
|
+
order: bool = True) -> Union[List[str], Optional[str]]:
|
11
21
|
"""
|
12
22
|
Searches for the existence of the given file name, first directly in the given directory or list
|
13
23
|
of directories, if specified, and if not then just in the current (working) directory; if the
|
@@ -16,43 +26,242 @@ def search_for_file(file: str,
|
|
16
26
|
first file which is found is returns (as a string), or None if none; if the single flag
|
17
27
|
is False, then all matched files are returned in a list, or and empty list if none.
|
18
28
|
"""
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
if
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
def order_by_fewest_number_of_paths_and_then_alphabetically(paths: List[str]) -> List[str]:
|
30
|
+
def order_by(path: str):
|
31
|
+
return len(path.split(os.path.sep)), path
|
32
|
+
return sorted(paths, key=order_by)
|
33
|
+
|
34
|
+
if not (file and isinstance(file, (str, pathlib.PosixPath))):
|
35
|
+
return None if single is True else []
|
36
|
+
if os.path.isabs(file):
|
37
|
+
if os.path.exists(file):
|
38
|
+
return str(file) if single is True else [str(file)]
|
39
|
+
return None if single is True else []
|
40
|
+
files_found = []
|
41
|
+
if not location:
|
42
|
+
location = ["."]
|
43
|
+
elif isinstance(location, (str, pathlib.PosixPath)):
|
44
|
+
location = [location]
|
45
|
+
elif not isinstance(location, list):
|
46
|
+
location = []
|
47
|
+
location_pruned = []
|
48
|
+
for directory in location:
|
49
|
+
if not isinstance(directory, str):
|
50
|
+
if not isinstance(directory, pathlib.PosixPath):
|
51
|
+
continue
|
52
|
+
directory = str(directory)
|
53
|
+
if not (directory := directory.strip()):
|
54
|
+
continue
|
55
|
+
if os.path.isfile(directory := os.path.abspath(os.path.normpath(directory))):
|
56
|
+
# Actually, allow a file rather then a directory; assume its parent directory was intended.
|
57
|
+
if not (directory := os.path.dirname(directory)):
|
33
58
|
continue
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
+
if directory not in location_pruned:
|
60
|
+
location_pruned.append(directory)
|
61
|
+
location = location_pruned
|
62
|
+
for directory in location:
|
63
|
+
if os.path.exists(os.path.join(directory, file)):
|
64
|
+
file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
|
65
|
+
if single is True:
|
66
|
+
return file_found
|
67
|
+
if file_found not in files_found:
|
68
|
+
files_found.append(file_found)
|
69
|
+
if recursive is True:
|
70
|
+
for directory in location:
|
71
|
+
if not directory.endswith("/**") and not file.startswith("**/"):
|
72
|
+
path = f"{directory}/**/{file}"
|
73
|
+
else:
|
74
|
+
path = f"{directory}/{file}"
|
75
|
+
files = glob.glob(path, recursive=True if recursive is True else False)
|
76
|
+
if files:
|
77
|
+
for file_found in files:
|
78
|
+
file_found = os.path.abspath(file_found)
|
79
|
+
if single is True:
|
80
|
+
return file_found
|
81
|
+
if file_found not in files_found:
|
82
|
+
files_found.append(file_found)
|
83
|
+
if single is True:
|
84
|
+
return files_found[0] if files_found else None
|
85
|
+
elif order is True:
|
86
|
+
return order_by_fewest_number_of_paths_and_then_alphabetically(files_found)
|
87
|
+
else:
|
88
|
+
return files_found
|
89
|
+
|
90
|
+
|
91
|
+
def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expand_home: Optional[bool] = None) -> str:
|
92
|
+
"""
|
93
|
+
Normalizes the given path value and returns the result; does things like remove redundant
|
94
|
+
consecutive directory separators and redundant parent paths. If the given absolute argument
|
95
|
+
is True than converts the path to an absolute path. If the given expand_home argument is False
|
96
|
+
and if the path can reasonably be represented with a home directory indicator (i.e. "~"), then
|
97
|
+
converts it to such. If the expand_home argument is True and path starts with the home directory
|
98
|
+
indicator (i.e. "~") then expands it to the actual (absolute) home path of the caller. If the
|
99
|
+
given path value is not actually even a string (or pathlib.Path) then returns an empty string.
|
100
|
+
"""
|
101
|
+
if isinstance(value, pathlib.Path):
|
102
|
+
value = str(value)
|
103
|
+
elif not isinstance(value, str):
|
104
|
+
return ""
|
105
|
+
if not (value := value.strip()) or not (value := os.path.normpath(value)):
|
106
|
+
return ""
|
107
|
+
if expand_home is True:
|
108
|
+
value = os.path.expanduser(value)
|
109
|
+
elif (expand_home is False) and (os.name == "posix"):
|
110
|
+
if value.startswith(home := HOME_DIRECTORY + os.sep):
|
111
|
+
value = "~/" + value[len(home):]
|
112
|
+
elif value == HOME_DIRECTORY:
|
113
|
+
value = "~"
|
114
|
+
if absolute is True:
|
115
|
+
value = os.path.abspath(value)
|
116
|
+
return value
|
117
|
+
|
118
|
+
|
119
|
+
def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
|
120
|
+
try:
|
121
|
+
return os.path.getsize(file) if isinstance(file, str) else None
|
122
|
+
except Exception:
|
123
|
+
if raise_exception is True:
|
124
|
+
raise
|
125
|
+
return None
|
126
|
+
|
127
|
+
|
128
|
+
def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
|
129
|
+
try:
|
130
|
+
return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
|
131
|
+
except Exception:
|
132
|
+
if raise_exception is True:
|
133
|
+
raise
|
134
|
+
return None
|
135
|
+
|
136
|
+
|
137
|
+
def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
|
138
|
+
"""
|
139
|
+
Returns True iff the contents of the two given files are exactly the same.
|
140
|
+
"""
|
141
|
+
try:
|
142
|
+
with open(filea, "rb") as fa:
|
143
|
+
with open(fileb, "rb") as fb:
|
144
|
+
chunk_size = 4096
|
145
|
+
while True:
|
146
|
+
chunka = fa.read(chunk_size)
|
147
|
+
chunkb = fb.read(chunk_size)
|
148
|
+
if chunka != chunkb:
|
149
|
+
return False
|
150
|
+
if not chunka:
|
151
|
+
break
|
152
|
+
return True
|
153
|
+
except Exception:
|
154
|
+
if raise_exception is True:
|
155
|
+
raise
|
156
|
+
return False
|
157
|
+
|
158
|
+
|
159
|
+
def compute_file_md5(file: str, raise_exception: bool = True) -> str:
|
160
|
+
"""
|
161
|
+
Returns the md5 checksum for the given file.
|
162
|
+
"""
|
163
|
+
if not isinstance(file, str):
|
164
|
+
return ""
|
165
|
+
try:
|
166
|
+
md5 = hashlib.md5()
|
167
|
+
with open(file, "rb") as file:
|
168
|
+
for chunk in iter(lambda: file.read(4096), b""):
|
169
|
+
md5.update(chunk)
|
170
|
+
return md5.hexdigest()
|
171
|
+
except Exception:
|
172
|
+
if raise_exception is True:
|
173
|
+
raise
|
174
|
+
return ""
|
175
|
+
|
176
|
+
|
177
|
+
def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
|
178
|
+
"""
|
179
|
+
Returns the AWS S3 "etag" for the given file; this value is md5-like but
|
180
|
+
not the same as a normal md5. We use this to compare that a file in S3
|
181
|
+
appears to be the exact the same file as a local file.
|
182
|
+
"""
|
183
|
+
try:
|
184
|
+
with io.open(file, "rb") as f:
|
185
|
+
return _compute_file_etag(f)
|
186
|
+
except Exception:
|
187
|
+
if raise_exception is True:
|
188
|
+
raise
|
189
|
+
return None
|
190
|
+
|
191
|
+
|
192
|
+
def _compute_file_etag(f: io.BufferedReader) -> str:
|
193
|
+
# See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
|
194
|
+
MULTIPART_THRESHOLD = 8388608
|
195
|
+
MULTIPART_CHUNKSIZE = 8388608
|
196
|
+
# BUFFER_SIZE = 1048576
|
197
|
+
# Verify some assumptions are correct
|
198
|
+
# assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
|
199
|
+
# assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
|
200
|
+
# assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
|
201
|
+
hash = hashlib.md5()
|
202
|
+
read = 0
|
203
|
+
chunks = None
|
204
|
+
while True:
|
205
|
+
# Read some from stdin, if we're at the end, stop reading
|
206
|
+
bits = f.read(1048576)
|
207
|
+
if len(bits) == 0:
|
208
|
+
break
|
209
|
+
read += len(bits)
|
210
|
+
hash.update(bits)
|
211
|
+
if chunks is None:
|
212
|
+
# We're handling a multi-part upload, so switch to calculating
|
213
|
+
# hashes of each chunk
|
214
|
+
if read >= MULTIPART_THRESHOLD:
|
215
|
+
chunks = b''
|
216
|
+
if chunks is not None:
|
217
|
+
if (read % MULTIPART_CHUNKSIZE) == 0:
|
218
|
+
# Dont with a chunk, add it to the list of hashes to hash later
|
219
|
+
chunks += hash.digest()
|
220
|
+
hash = hashlib.md5()
|
221
|
+
if chunks is None:
|
222
|
+
# Normal upload, just output the MD5 hash
|
223
|
+
etag = hash.hexdigest()
|
224
|
+
else:
|
225
|
+
# Multipart upload, need to output the hash of the hashes
|
226
|
+
if (read % MULTIPART_CHUNKSIZE) != 0:
|
227
|
+
# Add the last part if we have a partial chunk
|
228
|
+
chunks += hash.digest()
|
229
|
+
etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
|
230
|
+
return etag
|
231
|
+
|
232
|
+
|
233
|
+
def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
234
|
+
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
235
|
+
"""
|
236
|
+
Write to the given file (name/path) some random content. If the given file is None then writes
|
237
|
+
to a temporary file. In either case, returns the file written to. The of bytes written is 1024
|
238
|
+
by default be can be specified with the nbytes argument; default to writing ASCII text but if
|
239
|
+
the binary argument is True then writes binary data as well; if not binary the content is in
|
240
|
+
lines of 80 characters each; use the line_length argumetn in this case to change the line length.
|
241
|
+
"""
|
242
|
+
if not isinstance(nbytes, int) or nbytes < 0:
|
243
|
+
nbytes = 0
|
244
|
+
if not isinstance(file, str) or not file:
|
245
|
+
if not isinstance(prefix, str):
|
246
|
+
prefix = ""
|
247
|
+
if not isinstance(suffix, str):
|
248
|
+
suffix = ""
|
249
|
+
file = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
|
250
|
+
file = os.path.join(get_temporary_directory(), file)
|
251
|
+
with open(file, "wb" if binary is True else "w") as f:
|
252
|
+
if binary is True:
|
253
|
+
f.write(os.urandom(nbytes))
|
254
|
+
else:
|
255
|
+
if (not isinstance(line_length, int)) or (line_length < 1):
|
256
|
+
line_length = 80
|
257
|
+
line_length += 1
|
258
|
+
nlines = nbytes // line_length
|
259
|
+
nremainder = nbytes % line_length
|
260
|
+
for n in range(nlines):
|
261
|
+
f.write("".join(random.choices(string.ascii_letters + string.digits, k=line_length - 1)))
|
262
|
+
f.write("\n")
|
263
|
+
if nremainder > 1:
|
264
|
+
f.write("".join(random.choices(string.ascii_letters + string.digits, k=nremainder - 1)))
|
265
|
+
if nremainder > 0:
|
266
|
+
f.write("\n")
|
267
|
+
return file
|
dcicutils/http_utils.py
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
from contextlib import contextmanager
|
2
|
+
import requests
|
3
|
+
from typing import Callable, Optional
|
4
|
+
from dcicutils.tmpfile_utils import temporary_file
|
5
|
+
|
6
|
+
|
7
|
+
@contextmanager
|
8
|
+
def download(url: str, suffix: Optional[str] = None, binary: bool = True,
|
9
|
+
progress: Optional[Callable] = None) -> Optional[str]:
|
10
|
+
"""
|
11
|
+
Context manager to download the given URL into a temporary file and yields the file
|
12
|
+
path to it. An optional file suffix may be specified for this temporary file name.
|
13
|
+
Defaults to binary file mode; if not desired then pass False as the binary argument.
|
14
|
+
"""
|
15
|
+
with temporary_file(suffix=suffix) as file:
|
16
|
+
download_to(url, file, binary=binary, progress=progress)
|
17
|
+
yield file
|
18
|
+
|
19
|
+
|
20
|
+
def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
|
21
|
+
"""
|
22
|
+
Download the given URL into the given file. Defaults to binary
|
23
|
+
file mode; if not desired then pass False as the binary argument.
|
24
|
+
"""
|
25
|
+
if not callable(progress):
|
26
|
+
progress = None
|
27
|
+
response = requests.get(url, stream=True)
|
28
|
+
if progress:
|
29
|
+
nbytes = 0
|
30
|
+
nbytes_total = None
|
31
|
+
if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
|
32
|
+
nbytes_total = int(content_length)
|
33
|
+
with open(file, "wb" if binary is True else "w") as f:
|
34
|
+
for chunk in response.iter_content(chunk_size=8192):
|
35
|
+
if chunk:
|
36
|
+
f.write(chunk)
|
37
|
+
if progress:
|
38
|
+
nbytes += len(chunk)
|
39
|
+
progress(nbytes, nbytes_total)
|
dcicutils/misc_utils.py
CHANGED
@@ -3,6 +3,7 @@ This file contains functions that might be generally useful.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from collections import namedtuple
|
6
|
+
import appdirs
|
6
7
|
import contextlib
|
7
8
|
import datetime
|
8
9
|
import functools
|
@@ -13,10 +14,12 @@ import json
|
|
13
14
|
import logging
|
14
15
|
import math
|
15
16
|
import os
|
17
|
+
import platform
|
16
18
|
import pytz
|
17
19
|
import re
|
18
20
|
import rfc3986.validators
|
19
21
|
import rfc3986.exceptions
|
22
|
+
import shortuuid
|
20
23
|
import time
|
21
24
|
import uuid
|
22
25
|
import warnings
|
@@ -1152,7 +1155,8 @@ def remove_suffix(suffix: str, text: str, required: bool = False):
|
|
1152
1155
|
|
1153
1156
|
def remove_empty_properties(data: Optional[Union[list, dict]],
|
1154
1157
|
isempty: Optional[Callable] = None,
|
1155
|
-
isempty_array_element: Optional[Callable] = None
|
1158
|
+
isempty_array_element: Optional[Callable] = None,
|
1159
|
+
raise_exception_on_nonempty_array_element_after_empty: bool = False) -> None:
|
1156
1160
|
def _isempty(value: Any) -> bool: # noqa
|
1157
1161
|
return isempty(value) if callable(isempty) else value in [None, "", {}, []]
|
1158
1162
|
if isinstance(data, dict):
|
@@ -1160,11 +1164,22 @@ def remove_empty_properties(data: Optional[Union[list, dict]],
|
|
1160
1164
|
if _isempty(value := data[key]):
|
1161
1165
|
del data[key]
|
1162
1166
|
else:
|
1163
|
-
remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element
|
1167
|
+
remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element,
|
1168
|
+
raise_exception_on_nonempty_array_element_after_empty= # noqa
|
1169
|
+
raise_exception_on_nonempty_array_element_after_empty)
|
1164
1170
|
elif isinstance(data, list):
|
1165
1171
|
for item in data:
|
1166
|
-
remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element
|
1172
|
+
remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element,
|
1173
|
+
raise_exception_on_nonempty_array_element_after_empty= # noqa
|
1174
|
+
raise_exception_on_nonempty_array_element_after_empty)
|
1167
1175
|
if callable(isempty_array_element):
|
1176
|
+
if raise_exception_on_nonempty_array_element_after_empty is True:
|
1177
|
+
empty_element_seen = False
|
1178
|
+
for item in data:
|
1179
|
+
if not empty_element_seen and isempty_array_element(item):
|
1180
|
+
empty_element_seen = True
|
1181
|
+
elif empty_element_seen and not isempty_array_element(item):
|
1182
|
+
raise Exception("Non-empty element found after empty element.")
|
1168
1183
|
data[:] = [item for item in data if not isempty_array_element(item)]
|
1169
1184
|
|
1170
1185
|
|
@@ -1522,7 +1537,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
|
|
1522
1537
|
def create_dict(**kwargs) -> dict:
|
1523
1538
|
result = {}
|
1524
1539
|
for name in kwargs:
|
1525
|
-
if kwargs[name]:
|
1540
|
+
if not (kwargs[name] is None):
|
1526
1541
|
result[name] = kwargs[name]
|
1527
1542
|
return result
|
1528
1543
|
|
@@ -2548,6 +2563,19 @@ def normalize_spaces(value: str) -> str:
|
|
2548
2563
|
return re.sub(r"\s+", " ", value).strip()
|
2549
2564
|
|
2550
2565
|
|
2566
|
+
def normalize_string(value: Optional[str]) -> Optional[str]:
|
2567
|
+
"""
|
2568
|
+
Strips leading/trailing spaces, and converts multiple consecutive spaces to a single space
|
2569
|
+
in the given string value and returns the result. If the given value is None returns an
|
2570
|
+
empty string. If the given value is not actually even a string then return None.
|
2571
|
+
"""
|
2572
|
+
if value is None:
|
2573
|
+
return ""
|
2574
|
+
elif isinstance(value, str):
|
2575
|
+
return re.sub(r"\s+", " ", value).strip()
|
2576
|
+
return None
|
2577
|
+
|
2578
|
+
|
2551
2579
|
def find_nth_from_end(string: str, substring: str, nth: int) -> int:
|
2552
2580
|
"""
|
2553
2581
|
Returns the index of the nth occurrence of the given substring within
|
@@ -2590,7 +2618,11 @@ def format_size(nbytes: Union[int, float], precision: int = 2, nospace: bool = F
|
|
2590
2618
|
nbytes = int(nbytes)
|
2591
2619
|
return f"{nbytes} byte{'s' if nbytes != 1 else ''}"
|
2592
2620
|
unit = (UNITS_TERSE if terse else UNITS)[index]
|
2593
|
-
|
2621
|
+
size = f"{nbytes:.{precision}f}"
|
2622
|
+
if size.endswith(f".{'0' * precision}"):
|
2623
|
+
# Tidy up extraneous zeros.
|
2624
|
+
size = size[:-(precision - 1)]
|
2625
|
+
return f"{size}{'' if nospace else ' '}{unit}"
|
2594
2626
|
|
2595
2627
|
|
2596
2628
|
def format_duration(seconds: Union[int, float]) -> str:
|
@@ -2670,3 +2702,48 @@ class JsonLinesReader:
|
|
2670
2702
|
yield line
|
2671
2703
|
else:
|
2672
2704
|
raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
|
2705
|
+
|
2706
|
+
|
2707
|
+
def get_app_specific_directory() -> str:
|
2708
|
+
"""
|
2709
|
+
Returns the standard system application specific directory:
|
2710
|
+
- On MacOS this directory: is: ~/Library/Application Support
|
2711
|
+
- On Linux this directory is: ~/.local/share
|
2712
|
+
- On Windows this directory is: %USERPROFILE%\\AppData\\Local # noqa
|
2713
|
+
N.B. This is has been tested on MacOS and Linux but not on Windows.
|
2714
|
+
"""
|
2715
|
+
return appdirs.user_data_dir()
|
2716
|
+
|
2717
|
+
|
2718
|
+
def get_os_name() -> str:
|
2719
|
+
if os_name := platform.system():
|
2720
|
+
if os_name == "Darwin": return "osx" # noqa
|
2721
|
+
elif os_name == "Linux": return "linux" # noqa
|
2722
|
+
elif os_name == "Windows": return "windows" # noqa
|
2723
|
+
return ""
|
2724
|
+
|
2725
|
+
|
2726
|
+
def get_cpu_architecture_name() -> str:
|
2727
|
+
if os_architecture_name := platform.machine():
|
2728
|
+
if os_architecture_name == "x86_64": return "amd64" # noqa
|
2729
|
+
return os_architecture_name
|
2730
|
+
return ""
|
2731
|
+
|
2732
|
+
|
2733
|
+
def create_uuid(nodash: bool = False, upper: bool = False) -> str:
|
2734
|
+
value = str(uuid.uuid4())
|
2735
|
+
if nodash is True:
|
2736
|
+
value = value.replace("-", "")
|
2737
|
+
if upper is True:
|
2738
|
+
value = value.upper()
|
2739
|
+
return value
|
2740
|
+
|
2741
|
+
|
2742
|
+
def create_short_uuid(length: Optional[int] = None, upper: bool = False):
|
2743
|
+
# Not really techincally a uuid of course.
|
2744
|
+
if (length is None) or (not isinstance(length, int)) or (length < 1):
|
2745
|
+
length = 16
|
2746
|
+
value = shortuuid.ShortUUID().random(length=length)
|
2747
|
+
if upper is True:
|
2748
|
+
value = value.upper()
|
2749
|
+
return value
|