dcicutils 8.8.4.1b28__py3-none-any.whl → 8.9.0.0b0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- dcicutils/ff_utils.py +1 -4
- dcicutils/file_utils.py +40 -249
- dcicutils/misc_utils.py +1 -62
- dcicutils/schema_utils.py +16 -0
- dcicutils/tmpfile_utils.py +4 -42
- dcicutils/zip_utils.py +0 -27
- {dcicutils-8.8.4.1b28.dist-info → dcicutils-8.9.0.0b0.dist-info}/METADATA +4 -6
- {dcicutils-8.8.4.1b28.dist-info → dcicutils-8.9.0.0b0.dist-info}/RECORD +11 -12
- dcicutils/http_utils.py +0 -39
- {dcicutils-8.8.4.1b28.dist-info → dcicutils-8.9.0.0b0.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.8.4.1b28.dist-info → dcicutils-8.9.0.0b0.dist-info}/WHEEL +0 -0
- {dcicutils-8.8.4.1b28.dist-info → dcicutils-8.9.0.0b0.dist-info}/entry_points.txt +0 -0
dcicutils/ff_utils.py
CHANGED
@@ -895,12 +895,9 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
|
|
895
895
|
used to create the generator.
|
896
896
|
Should NOT be used directly
|
897
897
|
"""
|
898
|
-
def get_es_host_local() -> Optional[str]:
|
899
|
-
return os.environ.get("ES_HOST_LOCAL", None)
|
900
898
|
health = get_health_page(key=auth)
|
901
899
|
if es_client is None:
|
902
|
-
|
903
|
-
es_url = health['elasticsearch']
|
900
|
+
es_url = health['elasticsearch']
|
904
901
|
es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
|
905
902
|
namespace_star = health.get('namespace', '') + '*'
|
906
903
|
# match all given uuids to _id fields
|
dcicutils/file_utils.py
CHANGED
@@ -1,23 +1,13 @@
|
|
1
1
|
import glob
|
2
|
-
import hashlib
|
3
|
-
import io
|
4
2
|
import os
|
5
3
|
import pathlib
|
6
|
-
from datetime import datetime
|
7
|
-
import random
|
8
|
-
import string
|
9
|
-
from tempfile import gettempdir as get_temporary_directory
|
10
4
|
from typing import List, Optional, Union
|
11
|
-
from uuid import uuid4 as uuid
|
12
|
-
|
13
|
-
HOME_DIRECTORY = str(pathlib.Path().home())
|
14
5
|
|
15
6
|
|
16
7
|
def search_for_file(file: str,
|
17
8
|
location: Union[str, Optional[List[str]]] = None,
|
18
9
|
recursive: bool = False,
|
19
|
-
single: bool = False,
|
20
|
-
order: bool = True) -> Union[List[str], Optional[str]]:
|
10
|
+
single: bool = False) -> Union[List[str], Optional[str]]:
|
21
11
|
"""
|
22
12
|
Searches for the existence of the given file name, first directly in the given directory or list
|
23
13
|
of directories, if specified, and if not then just in the current (working) directory; if the
|
@@ -26,242 +16,43 @@ def search_for_file(file: str,
|
|
26
16
|
first file which is found is returns (as a string), or None if none; if the single flag
|
27
17
|
is False, then all matched files are returned in a list, or and empty list if none.
|
28
18
|
"""
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
if not location:
|
42
|
-
location = ["."]
|
43
|
-
elif isinstance(location, (str, pathlib.PosixPath)):
|
44
|
-
location = [location]
|
45
|
-
elif not isinstance(location, list):
|
46
|
-
location = []
|
47
|
-
location_pruned = []
|
48
|
-
for directory in location:
|
49
|
-
if not isinstance(directory, str):
|
50
|
-
if not isinstance(directory, pathlib.PosixPath):
|
51
|
-
continue
|
52
|
-
directory = str(directory)
|
53
|
-
if not (directory := directory.strip()):
|
54
|
-
continue
|
55
|
-
if os.path.isfile(directory := os.path.abspath(os.path.normpath(directory))):
|
56
|
-
# Allow a file; assume its parent directory was intended.
|
57
|
-
if not (directory := os.path.dirname(directory)):
|
58
|
-
continue
|
59
|
-
if directory not in location_pruned:
|
60
|
-
location_pruned.append(directory)
|
61
|
-
location = location_pruned
|
62
|
-
for directory in location:
|
63
|
-
if os.path.exists(os.path.join(directory, file)):
|
64
|
-
file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
|
65
|
-
if single is True:
|
66
|
-
return file_found
|
67
|
-
if file_found not in files_found:
|
68
|
-
files_found.append(file_found)
|
69
|
-
if recursive is True:
|
19
|
+
if file and isinstance(file, (str, pathlib.PosixPath)):
|
20
|
+
if os.path.isabs(file):
|
21
|
+
if os.path.exists(file):
|
22
|
+
return file if single else [file]
|
23
|
+
return None if single else []
|
24
|
+
files_found = []
|
25
|
+
if not location:
|
26
|
+
location = ["."]
|
27
|
+
elif isinstance(location, (str, pathlib.PosixPath)):
|
28
|
+
location = [location]
|
29
|
+
elif not isinstance(location, list):
|
30
|
+
location = []
|
70
31
|
for directory in location:
|
71
|
-
if not directory
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
indicator (i.e. "~") then expands it to the actual (absolute) home path of the caller. If the
|
99
|
-
given path value is not actually even a string (or pathlib.Path) then returns an empty string.
|
100
|
-
"""
|
101
|
-
if isinstance(value, pathlib.Path):
|
102
|
-
value = str(value)
|
103
|
-
elif not isinstance(value, str):
|
104
|
-
return ""
|
105
|
-
if not (value := value.strip()) or not (value := os.path.normpath(value)):
|
106
|
-
return ""
|
107
|
-
if expand_home is True:
|
108
|
-
value = os.path.expanduser(value)
|
109
|
-
elif (expand_home is False) and (os.name == "posix"):
|
110
|
-
if value.startswith(home := HOME_DIRECTORY + os.sep):
|
111
|
-
value = "~/" + value[len(home):]
|
112
|
-
elif value == HOME_DIRECTORY:
|
113
|
-
value = "~"
|
114
|
-
if absolute is True:
|
115
|
-
value = os.path.abspath(value)
|
116
|
-
return value
|
117
|
-
|
118
|
-
|
119
|
-
def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
|
120
|
-
try:
|
121
|
-
return os.path.getsize(file) if isinstance(file, str) else None
|
122
|
-
except Exception:
|
123
|
-
if raise_exception is True:
|
124
|
-
raise
|
125
|
-
return None
|
126
|
-
|
127
|
-
|
128
|
-
def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
|
129
|
-
try:
|
130
|
-
return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
|
131
|
-
except Exception:
|
132
|
-
if raise_exception is True:
|
133
|
-
raise
|
134
|
-
return None
|
135
|
-
|
136
|
-
|
137
|
-
def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
|
138
|
-
"""
|
139
|
-
Returns True iff the contents of the two given files are exactly the same.
|
140
|
-
"""
|
141
|
-
try:
|
142
|
-
with open(filea, "rb") as fa:
|
143
|
-
with open(fileb, "rb") as fb:
|
144
|
-
chunk_size = 4096
|
145
|
-
while True:
|
146
|
-
chunka = fa.read(chunk_size)
|
147
|
-
chunkb = fb.read(chunk_size)
|
148
|
-
if chunka != chunkb:
|
149
|
-
return False
|
150
|
-
if not chunka:
|
151
|
-
break
|
152
|
-
return True
|
153
|
-
except Exception:
|
154
|
-
if raise_exception is True:
|
155
|
-
raise
|
156
|
-
return False
|
157
|
-
|
158
|
-
|
159
|
-
def compute_file_md5(file: str, raise_exception: bool = True) -> str:
|
160
|
-
"""
|
161
|
-
Returns the md5 checksum for the given file.
|
162
|
-
"""
|
163
|
-
if not isinstance(file, str):
|
164
|
-
return ""
|
165
|
-
try:
|
166
|
-
md5 = hashlib.md5()
|
167
|
-
with open(file, "rb") as file:
|
168
|
-
for chunk in iter(lambda: file.read(4096), b""):
|
169
|
-
md5.update(chunk)
|
170
|
-
return md5.hexdigest()
|
171
|
-
except Exception:
|
172
|
-
if raise_exception is True:
|
173
|
-
raise
|
174
|
-
return ""
|
175
|
-
|
176
|
-
|
177
|
-
def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
|
178
|
-
"""
|
179
|
-
Returns the AWS S3 "etag" for the given file; this value is md5-like but
|
180
|
-
not the same as a normal md5. We use this to compare that a file in S3
|
181
|
-
appears to be the exact the same file as a local file.
|
182
|
-
"""
|
183
|
-
try:
|
184
|
-
with io.open(file, "rb") as f:
|
185
|
-
return _compute_file_etag(f)
|
186
|
-
except Exception:
|
187
|
-
if raise_exception is True:
|
188
|
-
raise
|
189
|
-
return None
|
190
|
-
|
191
|
-
|
192
|
-
def _compute_file_etag(f: io.BufferedReader) -> str:
|
193
|
-
# See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
|
194
|
-
MULTIPART_THRESHOLD = 8388608
|
195
|
-
MULTIPART_CHUNKSIZE = 8388608
|
196
|
-
# BUFFER_SIZE = 1048576
|
197
|
-
# Verify some assumptions are correct
|
198
|
-
# assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
|
199
|
-
# assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
|
200
|
-
# assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
|
201
|
-
hash = hashlib.md5()
|
202
|
-
read = 0
|
203
|
-
chunks = None
|
204
|
-
while True:
|
205
|
-
# Read some from stdin, if we're at the end, stop reading
|
206
|
-
bits = f.read(1048576)
|
207
|
-
if len(bits) == 0:
|
208
|
-
break
|
209
|
-
read += len(bits)
|
210
|
-
hash.update(bits)
|
211
|
-
if chunks is None:
|
212
|
-
# We're handling a multi-part upload, so switch to calculating
|
213
|
-
# hashes of each chunk
|
214
|
-
if read >= MULTIPART_THRESHOLD:
|
215
|
-
chunks = b''
|
216
|
-
if chunks is not None:
|
217
|
-
if (read % MULTIPART_CHUNKSIZE) == 0:
|
218
|
-
# Dont with a chunk, add it to the list of hashes to hash later
|
219
|
-
chunks += hash.digest()
|
220
|
-
hash = hashlib.md5()
|
221
|
-
if chunks is None:
|
222
|
-
# Normal upload, just output the MD5 hash
|
223
|
-
etag = hash.hexdigest()
|
224
|
-
else:
|
225
|
-
# Multipart upload, need to output the hash of the hashes
|
226
|
-
if (read % MULTIPART_CHUNKSIZE) != 0:
|
227
|
-
# Add the last part if we have a partial chunk
|
228
|
-
chunks += hash.digest()
|
229
|
-
etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
|
230
|
-
return etag
|
231
|
-
|
232
|
-
|
233
|
-
def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
234
|
-
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
235
|
-
"""
|
236
|
-
Write to the given file (name/path) some random content. If the given file is None then writes
|
237
|
-
to a temporary file. In either case, returns the file written to. The of bytes written is 1024
|
238
|
-
by default be can be specified with the nbytes argument; default to writing ASCII text but if
|
239
|
-
the binary argument is True then writes binary data as well; if not binary the content is in
|
240
|
-
lines of 80 characters each; use the line_length argumetn in this case to change the line length.
|
241
|
-
"""
|
242
|
-
if not isinstance(nbytes, int) or nbytes < 0:
|
243
|
-
nbytes = 0
|
244
|
-
if not isinstance(file, str) or not file:
|
245
|
-
if not isinstance(prefix, str):
|
246
|
-
prefix = ""
|
247
|
-
if not isinstance(suffix, str):
|
248
|
-
suffix = ""
|
249
|
-
file = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
|
250
|
-
file = os.path.join(get_temporary_directory(), file)
|
251
|
-
with open(file, "wb" if binary is True else "w") as f:
|
252
|
-
if binary is True:
|
253
|
-
f.write(os.urandom(nbytes))
|
254
|
-
else:
|
255
|
-
if (not isinstance(line_length, int)) or (line_length < 1):
|
256
|
-
line_length = 80
|
257
|
-
line_length += 1
|
258
|
-
nlines = nbytes // line_length
|
259
|
-
nremainder = nbytes % line_length
|
260
|
-
for n in range(nlines):
|
261
|
-
f.write("".join(random.choices(string.ascii_letters + string.digits, k=line_length - 1)))
|
262
|
-
f.write("\n")
|
263
|
-
if nremainder > 1:
|
264
|
-
f.write("".join(random.choices(string.ascii_letters + string.digits, k=nremainder - 1)))
|
265
|
-
if nremainder > 0:
|
266
|
-
f.write("\n")
|
267
|
-
return file
|
32
|
+
if not directory:
|
33
|
+
continue
|
34
|
+
if isinstance(directory, (str, pathlib.PosixPath)) and os.path.exists(os.path.join(directory, file)):
|
35
|
+
file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
|
36
|
+
if single:
|
37
|
+
return file_found
|
38
|
+
if file_found not in files_found:
|
39
|
+
files_found.append(file_found)
|
40
|
+
if recursive:
|
41
|
+
for directory in location:
|
42
|
+
if not directory:
|
43
|
+
continue
|
44
|
+
if not directory.endswith("/**") and not file.startswith("**/"):
|
45
|
+
path = f"{directory}/**/{file}"
|
46
|
+
else:
|
47
|
+
path = f"{directory}/{file}"
|
48
|
+
files = glob.glob(path, recursive=recursive)
|
49
|
+
if files:
|
50
|
+
for file_found in files:
|
51
|
+
file_found = os.path.abspath(file_found)
|
52
|
+
if single:
|
53
|
+
return file_found
|
54
|
+
if file_found not in files_found:
|
55
|
+
files_found.append(file_found)
|
56
|
+
if files_found:
|
57
|
+
return files_found[0] if single else files_found
|
58
|
+
return None if single else []
|
dcicutils/misc_utils.py
CHANGED
@@ -3,7 +3,6 @@ This file contains functions that might be generally useful.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from collections import namedtuple
|
6
|
-
import appdirs
|
7
6
|
import contextlib
|
8
7
|
import datetime
|
9
8
|
import functools
|
@@ -14,12 +13,10 @@ import json
|
|
14
13
|
import logging
|
15
14
|
import math
|
16
15
|
import os
|
17
|
-
import platform
|
18
16
|
import pytz
|
19
17
|
import re
|
20
18
|
import rfc3986.validators
|
21
19
|
import rfc3986.exceptions
|
22
|
-
import shortuuid
|
23
20
|
import time
|
24
21
|
import uuid
|
25
22
|
import warnings
|
@@ -1525,7 +1522,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
|
|
1525
1522
|
def create_dict(**kwargs) -> dict:
|
1526
1523
|
result = {}
|
1527
1524
|
for name in kwargs:
|
1528
|
-
if
|
1525
|
+
if kwargs[name]:
|
1529
1526
|
result[name] = kwargs[name]
|
1530
1527
|
return result
|
1531
1528
|
|
@@ -2551,19 +2548,6 @@ def normalize_spaces(value: str) -> str:
|
|
2551
2548
|
return re.sub(r"\s+", " ", value).strip()
|
2552
2549
|
|
2553
2550
|
|
2554
|
-
def normalize_string(value: Optional[str]) -> Optional[str]:
|
2555
|
-
"""
|
2556
|
-
Strips leading/trailing spaces, and converts multiple consecutive spaces to a single space
|
2557
|
-
in the given string value and returns the result. If the given value is None returns an
|
2558
|
-
empty string. If the given value is not actually even a string then return None.
|
2559
|
-
"""
|
2560
|
-
if value is None:
|
2561
|
-
return ""
|
2562
|
-
elif isinstance(value, str):
|
2563
|
-
return value.strip()
|
2564
|
-
return None
|
2565
|
-
|
2566
|
-
|
2567
2551
|
def find_nth_from_end(string: str, substring: str, nth: int) -> int:
|
2568
2552
|
"""
|
2569
2553
|
Returns the index of the nth occurrence of the given substring within
|
@@ -2686,48 +2670,3 @@ class JsonLinesReader:
|
|
2686
2670
|
yield line
|
2687
2671
|
else:
|
2688
2672
|
raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
|
2689
|
-
|
2690
|
-
|
2691
|
-
def get_app_specific_directory() -> str:
|
2692
|
-
"""
|
2693
|
-
Returns the standard system application specific directory:
|
2694
|
-
- On MacOS this directory: is: ~/Library/Application Support
|
2695
|
-
- On Linux this directory is: ~/.local/share
|
2696
|
-
- On Windows this directory is: %USERPROFILE%\AppData\Local # noqa
|
2697
|
-
N.B. This is has been tested on MacOS and Linux but not on Windows.
|
2698
|
-
"""
|
2699
|
-
return appdirs.user_data_dir()
|
2700
|
-
|
2701
|
-
|
2702
|
-
def get_os_name() -> str:
|
2703
|
-
if os_name := platform.system():
|
2704
|
-
if os_name == "Darwin": return "osx" # noqa
|
2705
|
-
elif os_name == "Linux": return "linux" # noqa
|
2706
|
-
elif os_name == "Windows": return "windows" # noqa
|
2707
|
-
return ""
|
2708
|
-
|
2709
|
-
|
2710
|
-
def get_cpu_architecture_name() -> str:
|
2711
|
-
if os_architecture_name := platform.machine():
|
2712
|
-
if os_architecture_name == "x86_64": return "amd64" # noqa
|
2713
|
-
return os_architecture_name
|
2714
|
-
return ""
|
2715
|
-
|
2716
|
-
|
2717
|
-
def create_uuid(nodash: bool = False, upper: bool = False) -> str:
|
2718
|
-
value = str(uuid.uuid4())
|
2719
|
-
if nodash is True:
|
2720
|
-
value = value.replace("-", "")
|
2721
|
-
if upper is True:
|
2722
|
-
value = value.upper()
|
2723
|
-
return value
|
2724
|
-
|
2725
|
-
|
2726
|
-
def create_short_uuid(length: Optional[int] = None, upper: bool = False):
|
2727
|
-
# Not really techincally a uuid of course.
|
2728
|
-
if (length is None) or (not isinstance(length, int)) or (length < 1):
|
2729
|
-
length = 16
|
2730
|
-
value = shortuuid.ShortUUID().random(length=length)
|
2731
|
-
if upper is True:
|
2732
|
-
value = value.upper()
|
2733
|
-
return value
|
dcicutils/schema_utils.py
CHANGED
@@ -24,6 +24,7 @@ class JsonSchemaConstants:
|
|
24
24
|
|
25
25
|
|
26
26
|
class EncodedSchemaConstants:
|
27
|
+
DESCRIPTION = "description"
|
27
28
|
IDENTIFYING_PROPERTIES = "identifyingProperties"
|
28
29
|
LINK_TO = "linkTo"
|
29
30
|
MERGE_REF = "$merge"
|
@@ -187,6 +188,21 @@ def get_one_of_formats(schema: Dict[str, Any]) -> List[str]:
|
|
187
188
|
]
|
188
189
|
|
189
190
|
|
191
|
+
def is_link(property_schema: Dict[str, Any]) -> bool:
|
192
|
+
"""Is property schema a link?"""
|
193
|
+
return property_schema.get(SchemaConstants.LINK_TO, False)
|
194
|
+
|
195
|
+
|
196
|
+
def get_enum(property_schema: Dict[str, Any]) -> List[str]:
|
197
|
+
"""Return the enum of a property schema."""
|
198
|
+
return property_schema.get(SchemaConstants.ENUM, [])
|
199
|
+
|
200
|
+
|
201
|
+
def get_description(schema: Dict[str, Any]) -> str:
|
202
|
+
"""Return the description of a schema."""
|
203
|
+
return schema.get(SchemaConstants.DESCRIPTION, "")
|
204
|
+
|
205
|
+
|
190
206
|
class Schema:
|
191
207
|
|
192
208
|
def __init__(self, schema: dict, type: Optional[str] = None) -> None:
|
dcicutils/tmpfile_utils.py
CHANGED
@@ -1,11 +1,8 @@
|
|
1
1
|
from contextlib import contextmanager
|
2
|
-
from datetime import datetime
|
3
2
|
import os
|
4
3
|
import shutil
|
5
4
|
import tempfile
|
6
|
-
from uuid import uuid4 as uuid
|
7
5
|
from typing import List, Optional, Union
|
8
|
-
from dcicutils.file_utils import create_random_file
|
9
6
|
|
10
7
|
|
11
8
|
@contextmanager
|
@@ -18,38 +15,17 @@ def temporary_directory() -> str:
|
|
18
15
|
|
19
16
|
|
20
17
|
@contextmanager
|
21
|
-
def temporary_file(name: Optional[str] = None,
|
18
|
+
def temporary_file(name: Optional[str] = None, suffix: Optional[str] = None,
|
22
19
|
content: Optional[Union[str, bytes, List[str]]] = None) -> str:
|
23
20
|
with temporary_directory() as tmp_directory_name:
|
24
|
-
tmp_file_name =
|
25
|
-
|
26
|
-
with open(tmp_file_path, "wb" if isinstance(content, bytes) else "w") as tmp_file:
|
21
|
+
tmp_file_name = os.path.join(tmp_directory_name, name or tempfile.mktemp(dir="")) + (suffix or "")
|
22
|
+
with open(tmp_file_name, "wb" if isinstance(content, bytes) else "w") as tmp_file:
|
27
23
|
if content is not None:
|
28
24
|
tmp_file.write("\n".join(content) if isinstance(content, list) else content)
|
29
|
-
yield
|
30
|
-
|
31
|
-
|
32
|
-
def create_temporary_file_name(prefix: Optional[str] = None, suffix: Optional[str] = None) -> str:
|
33
|
-
"""
|
34
|
-
Generates and returns the full path to file within the system temporary directory.
|
35
|
-
"""
|
36
|
-
random_string = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
|
37
|
-
tmp_file_name = f"{prefix or ''}{random_string}{suffix or ''}"
|
38
|
-
return os.path.join(tempfile.gettempdir(), tmp_file_name)
|
39
|
-
|
40
|
-
|
41
|
-
@contextmanager
|
42
|
-
def temporary_random_file(prefix: Optional[str] = None, suffix: Optional[str] = None,
|
43
|
-
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
44
|
-
with temporary_file(prefix=prefix, suffix=suffix) as tmp_file_path:
|
45
|
-
create_random_file(tmp_file_path, nbytes=nbytes, binary=binary, line_length=line_length)
|
46
|
-
yield tmp_file_path
|
25
|
+
yield tmp_file_name
|
47
26
|
|
48
27
|
|
49
28
|
def remove_temporary_directory(tmp_directory_name: str) -> None:
|
50
|
-
"""
|
51
|
-
Removes the given directory, recursively; but ONLY if it is (somewhere) within the system temporary directory.
|
52
|
-
"""
|
53
29
|
def is_temporary_directory(path: str) -> bool:
|
54
30
|
try:
|
55
31
|
tmpdir = tempfile.gettempdir()
|
@@ -58,17 +34,3 @@ def remove_temporary_directory(tmp_directory_name: str) -> None:
|
|
58
34
|
return False
|
59
35
|
if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
|
60
36
|
shutil.rmtree(tmp_directory_name)
|
61
|
-
|
62
|
-
|
63
|
-
def remove_temporary_file(tmp_file_name: str) -> bool:
|
64
|
-
"""
|
65
|
-
Removes the given file; but ONLY if it is (somewhere) within the system temporary directory.
|
66
|
-
"""
|
67
|
-
try:
|
68
|
-
tmpdir = tempfile.gettempdir()
|
69
|
-
if (os.path.commonpath([tmpdir, tmp_file_name]) == tmpdir) and os.path.isfile(tmp_file_name):
|
70
|
-
os.remove(tmp_file_name)
|
71
|
-
return True
|
72
|
-
return False
|
73
|
-
except Exception:
|
74
|
-
return False
|
dcicutils/zip_utils.py
CHANGED
@@ -2,9 +2,7 @@ from contextlib import contextmanager
|
|
2
2
|
from dcicutils.tmpfile_utils import temporary_directory, temporary_file
|
3
3
|
import gzip
|
4
4
|
import os
|
5
|
-
import shutil
|
6
5
|
import tarfile
|
7
|
-
import tempfile
|
8
6
|
from typing import List, Optional
|
9
7
|
import zipfile
|
10
8
|
|
@@ -47,28 +45,3 @@ def unpack_gz_file_to_temporary_file(file: str, suffix: Optional[str] = None) ->
|
|
47
45
|
outputf.write(inputf.read())
|
48
46
|
outputf.close()
|
49
47
|
yield tmp_file_name
|
50
|
-
|
51
|
-
|
52
|
-
def extract_file_from_zip(zip_file: str, file_to_extract: str,
|
53
|
-
destination_file: str, raise_exception: bool = True) -> bool:
|
54
|
-
"""
|
55
|
-
Extracts from the given zip file, the given file to extract, writing it to the
|
56
|
-
given destination file. Returns True if all is well, otherwise False, or if the
|
57
|
-
raise_exception argument is True (the default), then raises and exception on error.
|
58
|
-
"""
|
59
|
-
try:
|
60
|
-
if not (destination_directory := os.path.dirname(destination_file)):
|
61
|
-
destination_directory = os.getcwd()
|
62
|
-
destination_file = os.path.join(destination_directory, destination_file)
|
63
|
-
with tempfile.TemporaryDirectory() as tmp_directory_name:
|
64
|
-
with zipfile.ZipFile(zip_file, "r") as zipf:
|
65
|
-
if file_to_extract not in zipf.namelist():
|
66
|
-
return False
|
67
|
-
zipf.extract(file_to_extract, path=tmp_directory_name)
|
68
|
-
os.makedirs(destination_directory, exist_ok=True)
|
69
|
-
shutil.move(os.path.join(tmp_directory_name, file_to_extract), destination_file)
|
70
|
-
return True
|
71
|
-
except Exception as e:
|
72
|
-
if raise_exception:
|
73
|
-
raise e
|
74
|
-
return False
|
@@ -1,12 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dcicutils
|
3
|
-
Version: 8.
|
3
|
+
Version: 8.9.0.0b0
|
4
4
|
Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
|
5
5
|
Home-page: https://github.com/4dn-dcic/utils
|
6
6
|
License: MIT
|
7
7
|
Author: 4DN-DCIC Team
|
8
8
|
Author-email: support@4dnucleome.org
|
9
|
-
Requires-Python: >=3.8,<3.
|
9
|
+
Requires-Python: >=3.8,<3.12
|
10
10
|
Classifier: Development Status :: 4 - Beta
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
12
|
Classifier: Intended Audience :: Science/Research
|
@@ -24,10 +24,9 @@ Classifier: Programming Language :: Python :: 3.9
|
|
24
24
|
Classifier: Topic :: Database :: Database Engines/Servers
|
25
25
|
Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
|
26
26
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
27
|
-
Requires-Dist: appdirs (>=1.4.4,<2.0.0)
|
28
27
|
Requires-Dist: aws-requests-auth (>=0.4.2,<1)
|
29
|
-
Requires-Dist: boto3 (>=1.
|
30
|
-
Requires-Dist: botocore (>=1.
|
28
|
+
Requires-Dist: boto3 (>=1.28.57,<2.0.0)
|
29
|
+
Requires-Dist: botocore (>=1.31.57,<2.0.0)
|
31
30
|
Requires-Dist: chardet (>=5.2.0,<6.0.0)
|
32
31
|
Requires-Dist: docker (>=4.4.4,<5.0.0)
|
33
32
|
Requires-Dist: elasticsearch (==7.13.4)
|
@@ -43,7 +42,6 @@ Requires-Dist: pytz (>=2020.4)
|
|
43
42
|
Requires-Dist: redis (>=4.5.1,<5.0.0)
|
44
43
|
Requires-Dist: requests (>=2.21.0,<3.0.0)
|
45
44
|
Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
|
46
|
-
Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
|
47
45
|
Requires-Dist: structlog (>=19.2.0,<20.0.0)
|
48
46
|
Requires-Dist: toml (>=0.10.1,<1)
|
49
47
|
Requires-Dist: tqdm (>=4.66.2,<5.0.0)
|
@@ -27,11 +27,10 @@ dcicutils/env_utils_legacy.py,sha256=J81OAtJHN69o1beHO6q1j7_J6TeblSjnAHlS8VA5KSM
|
|
27
27
|
dcicutils/es_utils.py,sha256=ZksLh5ei7kRUfiFltk8sd2ZSfh15twbstrMzBr8HNw4,7541
|
28
28
|
dcicutils/exceptions.py,sha256=4giQGtpak-omQv7BP6Ckeu91XK5fnDosC8gfdmN_ccA,9931
|
29
29
|
dcicutils/ff_mocks.py,sha256=6RKS4eUiu_Wl8yP_8V0CaV75w4ZdWxdCuL1CVlnMrek,36918
|
30
|
-
dcicutils/ff_utils.py,sha256=
|
31
|
-
dcicutils/file_utils.py,sha256=
|
30
|
+
dcicutils/ff_utils.py,sha256=Yf-fET5gdpjrH0gikpOCIJdY2Dv3obzUpR31ur816mU,72972
|
31
|
+
dcicutils/file_utils.py,sha256=098rXvLeIh8n69EGW7DpOS227ef3BPgwhRAktoU6mhE,2663
|
32
32
|
dcicutils/function_cache_decorator.py,sha256=XMyiEGODVr2WoAQ68vcoX_9_Xb9p8pZXdXl7keU8i2g,10026
|
33
33
|
dcicutils/glacier_utils.py,sha256=Q4CVXsZCbP-SoZIsZ5NMcawDfelOLzbQnIlQn-GdlTo,34149
|
34
|
-
dcicutils/http_utils.py,sha256=tNfH5JA-OwbQKEvD5HPJ3lcp2TSIZ4rnl__4d4JO8Gw,1583
|
35
34
|
dcicutils/jh_utils.py,sha256=Gpsxb9XEzggF_-Eq3ukjKvTnuyb9V1SCSUXkXsES4Kg,11502
|
36
35
|
dcicutils/kibana/dashboards.json,sha256=wHMB_mpJ8OaYhRRgvpZuihaB2lmSF64ADt_8hkBWgQg,16225
|
37
36
|
dcicutils/kibana/readme.md,sha256=3KmHF9FH6A6xwYsNxRFLw27q0XzHYnjZOlYUnn3VkQQ,2164
|
@@ -44,7 +43,7 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
|
|
44
43
|
dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
|
45
44
|
dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
|
46
45
|
dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
47
|
-
dcicutils/misc_utils.py,sha256=
|
46
|
+
dcicutils/misc_utils.py,sha256=YH_TTmv6ABWeMERwVvA2-rIfdS-CoPYLXJru9TvWxgM,104610
|
48
47
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
49
48
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
50
49
|
dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
|
@@ -56,7 +55,7 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
|
|
56
55
|
dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
|
57
56
|
dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
|
58
57
|
dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
|
59
|
-
dcicutils/schema_utils.py,sha256=
|
58
|
+
dcicutils/schema_utils.py,sha256=2hOzuGK7F8xZ7JyS7_Lan2wXOlNZezzT2lqgEs3QOe4,10605
|
60
59
|
dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
|
61
60
|
dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
|
62
61
|
dcicutils/scripts/view_portal_object.py,sha256=Cy-8GwGJS9EX-5RxE8mjsqNlDT0N6OCpkNffPVkTFQc,26262
|
@@ -68,13 +67,13 @@ dcicutils/structured_data.py,sha256=BQuIMv6OPySsn6YxtXE2Er-zLE2QJuCYhEQ3V0u_UXY,
|
|
68
67
|
dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
|
69
68
|
dcicutils/submitr/ref_lookup_strategy.py,sha256=Js2cVznTmgjciLWBPLCvMiwLIHXjDn3jww-gJPjYuFw,3467
|
70
69
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
71
|
-
dcicutils/tmpfile_utils.py,sha256=
|
70
|
+
dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
|
72
71
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
73
72
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
74
73
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
75
|
-
dcicutils/zip_utils.py,sha256=
|
76
|
-
dcicutils-8.
|
77
|
-
dcicutils-8.
|
78
|
-
dcicutils-8.
|
79
|
-
dcicutils-8.
|
80
|
-
dcicutils-8.
|
74
|
+
dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
|
75
|
+
dcicutils-8.9.0.0b0.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
76
|
+
dcicutils-8.9.0.0b0.dist-info/METADATA,sha256=sHJ_jTCTbZwTy6AoI9BSixIfwZDxdntJvQmTy5keWnI,3356
|
77
|
+
dcicutils-8.9.0.0b0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
78
|
+
dcicutils-8.9.0.0b0.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
79
|
+
dcicutils-8.9.0.0b0.dist-info/RECORD,,
|
dcicutils/http_utils.py
DELETED
@@ -1,39 +0,0 @@
|
|
1
|
-
from contextlib import contextmanager
|
2
|
-
import requests
|
3
|
-
from typing import Callable, Optional
|
4
|
-
from dcicutils.tmpfile_utils import temporary_file
|
5
|
-
|
6
|
-
|
7
|
-
@contextmanager
|
8
|
-
def download(url: str, suffix: Optional[str] = None, binary: bool = True,
|
9
|
-
progress: Optional[Callable] = None) -> Optional[str]:
|
10
|
-
"""
|
11
|
-
Context manager to download the given URL into a temporary file and yields the file
|
12
|
-
path to it. An optional file suffix may be specified for this temporary file name.
|
13
|
-
Defaults to binary file mode; if not desired then pass False as the binary argument.
|
14
|
-
"""
|
15
|
-
with temporary_file(suffix=suffix) as file:
|
16
|
-
download_to(url, file, binary=binary, progress=progress)
|
17
|
-
yield file
|
18
|
-
|
19
|
-
|
20
|
-
def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
|
21
|
-
"""
|
22
|
-
Download the given URL into the given file. Defaults to binary
|
23
|
-
file mode; if not desired then pass False as the binary argument.
|
24
|
-
"""
|
25
|
-
if not callable(progress):
|
26
|
-
progress = None
|
27
|
-
response = requests.get(url, stream=True)
|
28
|
-
if progress:
|
29
|
-
nbytes = 0
|
30
|
-
nbytes_total = None
|
31
|
-
if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
|
32
|
-
nbytes_total = int(content_length)
|
33
|
-
with open(file, "wb" if binary is True else "w") as f:
|
34
|
-
for chunk in response.iter_content(chunk_size=8192):
|
35
|
-
if chunk:
|
36
|
-
f.write(chunk)
|
37
|
-
if progress:
|
38
|
-
nbytes += len(chunk)
|
39
|
-
progress(nbytes, nbytes_total)
|
File without changes
|
File without changes
|
File without changes
|