dcicutils 8.8.4.1b30__py3-none-any.whl → 8.9.0.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/ff_utils.py +1 -4
- dcicutils/file_utils.py +40 -249
- dcicutils/misc_utils.py +1 -62
- dcicutils/schema_utils.py +16 -0
- dcicutils/tmpfile_utils.py +4 -42
- dcicutils/zip_utils.py +0 -27
- {dcicutils-8.8.4.1b30.dist-info → dcicutils-8.9.0.0b0.dist-info}/METADATA +4 -6
- {dcicutils-8.8.4.1b30.dist-info → dcicutils-8.9.0.0b0.dist-info}/RECORD +11 -12
- dcicutils/http_utils.py +0 -39
- {dcicutils-8.8.4.1b30.dist-info → dcicutils-8.9.0.0b0.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.8.4.1b30.dist-info → dcicutils-8.9.0.0b0.dist-info}/WHEEL +0 -0
- {dcicutils-8.8.4.1b30.dist-info → dcicutils-8.9.0.0b0.dist-info}/entry_points.txt +0 -0
dcicutils/ff_utils.py
CHANGED
@@ -895,12 +895,9 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
|
|
895
895
|
used to create the generator.
|
896
896
|
Should NOT be used directly
|
897
897
|
"""
|
898
|
-
def get_es_host_local() -> Optional[str]:
|
899
|
-
return os.environ.get("ES_HOST_LOCAL", None)
|
900
898
|
health = get_health_page(key=auth)
|
901
899
|
if es_client is None:
|
902
|
-
|
903
|
-
es_url = health['elasticsearch']
|
900
|
+
es_url = health['elasticsearch']
|
904
901
|
es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
|
905
902
|
namespace_star = health.get('namespace', '') + '*'
|
906
903
|
# match all given uuids to _id fields
|
dcicutils/file_utils.py
CHANGED
@@ -1,23 +1,13 @@
|
|
1
1
|
import glob
|
2
|
-
import hashlib
|
3
|
-
import io
|
4
2
|
import os
|
5
3
|
import pathlib
|
6
|
-
from datetime import datetime
|
7
|
-
import random
|
8
|
-
import string
|
9
|
-
from tempfile import gettempdir as get_temporary_directory
|
10
4
|
from typing import List, Optional, Union
|
11
|
-
from uuid import uuid4 as uuid
|
12
|
-
|
13
|
-
HOME_DIRECTORY = str(pathlib.Path().home())
|
14
5
|
|
15
6
|
|
16
7
|
def search_for_file(file: str,
|
17
8
|
location: Union[str, Optional[List[str]]] = None,
|
18
9
|
recursive: bool = False,
|
19
|
-
single: bool = False,
|
20
|
-
order: bool = True) -> Union[List[str], Optional[str]]:
|
10
|
+
single: bool = False) -> Union[List[str], Optional[str]]:
|
21
11
|
"""
|
22
12
|
Searches for the existence of the given file name, first directly in the given directory or list
|
23
13
|
of directories, if specified, and if not then just in the current (working) directory; if the
|
@@ -26,242 +16,43 @@ def search_for_file(file: str,
|
|
26
16
|
first file which is found is returns (as a string), or None if none; if the single flag
|
27
17
|
is False, then all matched files are returned in a list, or and empty list if none.
|
28
18
|
"""
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
if not location:
|
42
|
-
location = ["."]
|
43
|
-
elif isinstance(location, (str, pathlib.PosixPath)):
|
44
|
-
location = [location]
|
45
|
-
elif not isinstance(location, list):
|
46
|
-
location = []
|
47
|
-
location_pruned = []
|
48
|
-
for directory in location:
|
49
|
-
if not isinstance(directory, str):
|
50
|
-
if not isinstance(directory, pathlib.PosixPath):
|
51
|
-
continue
|
52
|
-
directory = str(directory)
|
53
|
-
if not (directory := directory.strip()):
|
54
|
-
continue
|
55
|
-
if os.path.isfile(directory := os.path.abspath(os.path.normpath(directory))):
|
56
|
-
# Allow a file; assume its parent directory was intended.
|
57
|
-
if not (directory := os.path.dirname(directory)):
|
58
|
-
continue
|
59
|
-
if directory not in location_pruned:
|
60
|
-
location_pruned.append(directory)
|
61
|
-
location = location_pruned
|
62
|
-
for directory in location:
|
63
|
-
if os.path.exists(os.path.join(directory, file)):
|
64
|
-
file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
|
65
|
-
if single is True:
|
66
|
-
return file_found
|
67
|
-
if file_found not in files_found:
|
68
|
-
files_found.append(file_found)
|
69
|
-
if recursive is True:
|
19
|
+
if file and isinstance(file, (str, pathlib.PosixPath)):
|
20
|
+
if os.path.isabs(file):
|
21
|
+
if os.path.exists(file):
|
22
|
+
return file if single else [file]
|
23
|
+
return None if single else []
|
24
|
+
files_found = []
|
25
|
+
if not location:
|
26
|
+
location = ["."]
|
27
|
+
elif isinstance(location, (str, pathlib.PosixPath)):
|
28
|
+
location = [location]
|
29
|
+
elif not isinstance(location, list):
|
30
|
+
location = []
|
70
31
|
for directory in location:
|
71
|
-
if not directory
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
indicator (i.e. "~") then expands it to the actual (absolute) home path of the caller. If the
|
99
|
-
given path value is not actually even a string (or pathlib.Path) then returns an empty string.
|
100
|
-
"""
|
101
|
-
if isinstance(value, pathlib.Path):
|
102
|
-
value = str(value)
|
103
|
-
elif not isinstance(value, str):
|
104
|
-
return ""
|
105
|
-
if not (value := value.strip()) or not (value := os.path.normpath(value)):
|
106
|
-
return ""
|
107
|
-
if expand_home is True:
|
108
|
-
value = os.path.expanduser(value)
|
109
|
-
elif (expand_home is False) and (os.name == "posix"):
|
110
|
-
if value.startswith(home := HOME_DIRECTORY + os.sep):
|
111
|
-
value = "~/" + value[len(home):]
|
112
|
-
elif value == HOME_DIRECTORY:
|
113
|
-
value = "~"
|
114
|
-
if absolute is True:
|
115
|
-
value = os.path.abspath(value)
|
116
|
-
return value
|
117
|
-
|
118
|
-
|
119
|
-
def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
|
120
|
-
try:
|
121
|
-
return os.path.getsize(file) if isinstance(file, str) else None
|
122
|
-
except Exception:
|
123
|
-
if raise_exception is True:
|
124
|
-
raise
|
125
|
-
return None
|
126
|
-
|
127
|
-
|
128
|
-
def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
|
129
|
-
try:
|
130
|
-
return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
|
131
|
-
except Exception:
|
132
|
-
if raise_exception is True:
|
133
|
-
raise
|
134
|
-
return None
|
135
|
-
|
136
|
-
|
137
|
-
def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
|
138
|
-
"""
|
139
|
-
Returns True iff the contents of the two given files are exactly the same.
|
140
|
-
"""
|
141
|
-
try:
|
142
|
-
with open(filea, "rb") as fa:
|
143
|
-
with open(fileb, "rb") as fb:
|
144
|
-
chunk_size = 4096
|
145
|
-
while True:
|
146
|
-
chunka = fa.read(chunk_size)
|
147
|
-
chunkb = fb.read(chunk_size)
|
148
|
-
if chunka != chunkb:
|
149
|
-
return False
|
150
|
-
if not chunka:
|
151
|
-
break
|
152
|
-
return True
|
153
|
-
except Exception:
|
154
|
-
if raise_exception is True:
|
155
|
-
raise
|
156
|
-
return False
|
157
|
-
|
158
|
-
|
159
|
-
def compute_file_md5(file: str, raise_exception: bool = True) -> str:
|
160
|
-
"""
|
161
|
-
Returns the md5 checksum for the given file.
|
162
|
-
"""
|
163
|
-
if not isinstance(file, str):
|
164
|
-
return ""
|
165
|
-
try:
|
166
|
-
md5 = hashlib.md5()
|
167
|
-
with open(file, "rb") as file:
|
168
|
-
for chunk in iter(lambda: file.read(4096), b""):
|
169
|
-
md5.update(chunk)
|
170
|
-
return md5.hexdigest()
|
171
|
-
except Exception:
|
172
|
-
if raise_exception is True:
|
173
|
-
raise
|
174
|
-
return ""
|
175
|
-
|
176
|
-
|
177
|
-
def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
|
178
|
-
"""
|
179
|
-
Returns the AWS S3 "etag" for the given file; this value is md5-like but
|
180
|
-
not the same as a normal md5. We use this to compare that a file in S3
|
181
|
-
appears to be the exact the same file as a local file.
|
182
|
-
"""
|
183
|
-
try:
|
184
|
-
with io.open(file, "rb") as f:
|
185
|
-
return _compute_file_etag(f)
|
186
|
-
except Exception:
|
187
|
-
if raise_exception is True:
|
188
|
-
raise
|
189
|
-
return None
|
190
|
-
|
191
|
-
|
192
|
-
def _compute_file_etag(f: io.BufferedReader) -> str:
|
193
|
-
# See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
|
194
|
-
MULTIPART_THRESHOLD = 8388608
|
195
|
-
MULTIPART_CHUNKSIZE = 8388608
|
196
|
-
# BUFFER_SIZE = 1048576
|
197
|
-
# Verify some assumptions are correct
|
198
|
-
# assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
|
199
|
-
# assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
|
200
|
-
# assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
|
201
|
-
hash = hashlib.md5()
|
202
|
-
read = 0
|
203
|
-
chunks = None
|
204
|
-
while True:
|
205
|
-
# Read some from stdin, if we're at the end, stop reading
|
206
|
-
bits = f.read(1048576)
|
207
|
-
if len(bits) == 0:
|
208
|
-
break
|
209
|
-
read += len(bits)
|
210
|
-
hash.update(bits)
|
211
|
-
if chunks is None:
|
212
|
-
# We're handling a multi-part upload, so switch to calculating
|
213
|
-
# hashes of each chunk
|
214
|
-
if read >= MULTIPART_THRESHOLD:
|
215
|
-
chunks = b''
|
216
|
-
if chunks is not None:
|
217
|
-
if (read % MULTIPART_CHUNKSIZE) == 0:
|
218
|
-
# Dont with a chunk, add it to the list of hashes to hash later
|
219
|
-
chunks += hash.digest()
|
220
|
-
hash = hashlib.md5()
|
221
|
-
if chunks is None:
|
222
|
-
# Normal upload, just output the MD5 hash
|
223
|
-
etag = hash.hexdigest()
|
224
|
-
else:
|
225
|
-
# Multipart upload, need to output the hash of the hashes
|
226
|
-
if (read % MULTIPART_CHUNKSIZE) != 0:
|
227
|
-
# Add the last part if we have a partial chunk
|
228
|
-
chunks += hash.digest()
|
229
|
-
etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
|
230
|
-
return etag
|
231
|
-
|
232
|
-
|
233
|
-
def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
234
|
-
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
235
|
-
"""
|
236
|
-
Write to the given file (name/path) some random content. If the given file is None then writes
|
237
|
-
to a temporary file. In either case, returns the file written to. The of bytes written is 1024
|
238
|
-
by default be can be specified with the nbytes argument; default to writing ASCII text but if
|
239
|
-
the binary argument is True then writes binary data as well; if not binary the content is in
|
240
|
-
lines of 80 characters each; use the line_length argumetn in this case to change the line length.
|
241
|
-
"""
|
242
|
-
if not isinstance(nbytes, int) or nbytes < 0:
|
243
|
-
nbytes = 0
|
244
|
-
if not isinstance(file, str) or not file:
|
245
|
-
if not isinstance(prefix, str):
|
246
|
-
prefix = ""
|
247
|
-
if not isinstance(suffix, str):
|
248
|
-
suffix = ""
|
249
|
-
file = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
|
250
|
-
file = os.path.join(get_temporary_directory(), file)
|
251
|
-
with open(file, "wb" if binary is True else "w") as f:
|
252
|
-
if binary is True:
|
253
|
-
f.write(os.urandom(nbytes))
|
254
|
-
else:
|
255
|
-
if (not isinstance(line_length, int)) or (line_length < 1):
|
256
|
-
line_length = 80
|
257
|
-
line_length += 1
|
258
|
-
nlines = nbytes // line_length
|
259
|
-
nremainder = nbytes % line_length
|
260
|
-
for n in range(nlines):
|
261
|
-
f.write("".join(random.choices(string.ascii_letters + string.digits, k=line_length - 1)))
|
262
|
-
f.write("\n")
|
263
|
-
if nremainder > 1:
|
264
|
-
f.write("".join(random.choices(string.ascii_letters + string.digits, k=nremainder - 1)))
|
265
|
-
if nremainder > 0:
|
266
|
-
f.write("\n")
|
267
|
-
return file
|
32
|
+
if not directory:
|
33
|
+
continue
|
34
|
+
if isinstance(directory, (str, pathlib.PosixPath)) and os.path.exists(os.path.join(directory, file)):
|
35
|
+
file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
|
36
|
+
if single:
|
37
|
+
return file_found
|
38
|
+
if file_found not in files_found:
|
39
|
+
files_found.append(file_found)
|
40
|
+
if recursive:
|
41
|
+
for directory in location:
|
42
|
+
if not directory:
|
43
|
+
continue
|
44
|
+
if not directory.endswith("/**") and not file.startswith("**/"):
|
45
|
+
path = f"{directory}/**/{file}"
|
46
|
+
else:
|
47
|
+
path = f"{directory}/{file}"
|
48
|
+
files = glob.glob(path, recursive=recursive)
|
49
|
+
if files:
|
50
|
+
for file_found in files:
|
51
|
+
file_found = os.path.abspath(file_found)
|
52
|
+
if single:
|
53
|
+
return file_found
|
54
|
+
if file_found not in files_found:
|
55
|
+
files_found.append(file_found)
|
56
|
+
if files_found:
|
57
|
+
return files_found[0] if single else files_found
|
58
|
+
return None if single else []
|
dcicutils/misc_utils.py
CHANGED
@@ -3,7 +3,6 @@ This file contains functions that might be generally useful.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from collections import namedtuple
|
6
|
-
import appdirs
|
7
6
|
import contextlib
|
8
7
|
import datetime
|
9
8
|
import functools
|
@@ -14,12 +13,10 @@ import json
|
|
14
13
|
import logging
|
15
14
|
import math
|
16
15
|
import os
|
17
|
-
import platform
|
18
16
|
import pytz
|
19
17
|
import re
|
20
18
|
import rfc3986.validators
|
21
19
|
import rfc3986.exceptions
|
22
|
-
import shortuuid
|
23
20
|
import time
|
24
21
|
import uuid
|
25
22
|
import warnings
|
@@ -1525,7 +1522,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
|
|
1525
1522
|
def create_dict(**kwargs) -> dict:
|
1526
1523
|
result = {}
|
1527
1524
|
for name in kwargs:
|
1528
|
-
if
|
1525
|
+
if kwargs[name]:
|
1529
1526
|
result[name] = kwargs[name]
|
1530
1527
|
return result
|
1531
1528
|
|
@@ -2551,19 +2548,6 @@ def normalize_spaces(value: str) -> str:
|
|
2551
2548
|
return re.sub(r"\s+", " ", value).strip()
|
2552
2549
|
|
2553
2550
|
|
2554
|
-
def normalize_string(value: Optional[str]) -> Optional[str]:
|
2555
|
-
"""
|
2556
|
-
Strips leading/trailing spaces, and converts multiple consecutive spaces to a single space
|
2557
|
-
in the given string value and returns the result. If the given value is None returns an
|
2558
|
-
empty string. If the given value is not actually even a string then return None.
|
2559
|
-
"""
|
2560
|
-
if value is None:
|
2561
|
-
return ""
|
2562
|
-
elif isinstance(value, str):
|
2563
|
-
return re.sub(r"\s+", " ", value).strip()
|
2564
|
-
return None
|
2565
|
-
|
2566
|
-
|
2567
2551
|
def find_nth_from_end(string: str, substring: str, nth: int) -> int:
|
2568
2552
|
"""
|
2569
2553
|
Returns the index of the nth occurrence of the given substring within
|
@@ -2686,48 +2670,3 @@ class JsonLinesReader:
|
|
2686
2670
|
yield line
|
2687
2671
|
else:
|
2688
2672
|
raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
|
2689
|
-
|
2690
|
-
|
2691
|
-
def get_app_specific_directory() -> str:
|
2692
|
-
"""
|
2693
|
-
Returns the standard system application specific directory:
|
2694
|
-
- On MacOS this directory: is: ~/Library/Application Support
|
2695
|
-
- On Linux this directory is: ~/.local/share
|
2696
|
-
- On Windows this directory is: %USERPROFILE%\AppData\Local # noqa
|
2697
|
-
N.B. This is has been tested on MacOS and Linux but not on Windows.
|
2698
|
-
"""
|
2699
|
-
return appdirs.user_data_dir()
|
2700
|
-
|
2701
|
-
|
2702
|
-
def get_os_name() -> str:
|
2703
|
-
if os_name := platform.system():
|
2704
|
-
if os_name == "Darwin": return "osx" # noqa
|
2705
|
-
elif os_name == "Linux": return "linux" # noqa
|
2706
|
-
elif os_name == "Windows": return "windows" # noqa
|
2707
|
-
return ""
|
2708
|
-
|
2709
|
-
|
2710
|
-
def get_cpu_architecture_name() -> str:
|
2711
|
-
if os_architecture_name := platform.machine():
|
2712
|
-
if os_architecture_name == "x86_64": return "amd64" # noqa
|
2713
|
-
return os_architecture_name
|
2714
|
-
return ""
|
2715
|
-
|
2716
|
-
|
2717
|
-
def create_uuid(nodash: bool = False, upper: bool = False) -> str:
|
2718
|
-
value = str(uuid.uuid4())
|
2719
|
-
if nodash is True:
|
2720
|
-
value = value.replace("-", "")
|
2721
|
-
if upper is True:
|
2722
|
-
value = value.upper()
|
2723
|
-
return value
|
2724
|
-
|
2725
|
-
|
2726
|
-
def create_short_uuid(length: Optional[int] = None, upper: bool = False):
|
2727
|
-
# Not really techincally a uuid of course.
|
2728
|
-
if (length is None) or (not isinstance(length, int)) or (length < 1):
|
2729
|
-
length = 16
|
2730
|
-
value = shortuuid.ShortUUID().random(length=length)
|
2731
|
-
if upper is True:
|
2732
|
-
value = value.upper()
|
2733
|
-
return value
|
dcicutils/schema_utils.py
CHANGED
@@ -24,6 +24,7 @@ class JsonSchemaConstants:
|
|
24
24
|
|
25
25
|
|
26
26
|
class EncodedSchemaConstants:
|
27
|
+
DESCRIPTION = "description"
|
27
28
|
IDENTIFYING_PROPERTIES = "identifyingProperties"
|
28
29
|
LINK_TO = "linkTo"
|
29
30
|
MERGE_REF = "$merge"
|
@@ -187,6 +188,21 @@ def get_one_of_formats(schema: Dict[str, Any]) -> List[str]:
|
|
187
188
|
]
|
188
189
|
|
189
190
|
|
191
|
+
def is_link(property_schema: Dict[str, Any]) -> bool:
|
192
|
+
"""Is property schema a link?"""
|
193
|
+
return property_schema.get(SchemaConstants.LINK_TO, False)
|
194
|
+
|
195
|
+
|
196
|
+
def get_enum(property_schema: Dict[str, Any]) -> List[str]:
|
197
|
+
"""Return the enum of a property schema."""
|
198
|
+
return property_schema.get(SchemaConstants.ENUM, [])
|
199
|
+
|
200
|
+
|
201
|
+
def get_description(schema: Dict[str, Any]) -> str:
|
202
|
+
"""Return the description of a schema."""
|
203
|
+
return schema.get(SchemaConstants.DESCRIPTION, "")
|
204
|
+
|
205
|
+
|
190
206
|
class Schema:
|
191
207
|
|
192
208
|
def __init__(self, schema: dict, type: Optional[str] = None) -> None:
|
dcicutils/tmpfile_utils.py
CHANGED
@@ -1,11 +1,8 @@
|
|
1
1
|
from contextlib import contextmanager
|
2
|
-
from datetime import datetime
|
3
2
|
import os
|
4
3
|
import shutil
|
5
4
|
import tempfile
|
6
|
-
from uuid import uuid4 as uuid
|
7
5
|
from typing import List, Optional, Union
|
8
|
-
from dcicutils.file_utils import create_random_file
|
9
6
|
|
10
7
|
|
11
8
|
@contextmanager
|
@@ -18,38 +15,17 @@ def temporary_directory() -> str:
|
|
18
15
|
|
19
16
|
|
20
17
|
@contextmanager
|
21
|
-
def temporary_file(name: Optional[str] = None,
|
18
|
+
def temporary_file(name: Optional[str] = None, suffix: Optional[str] = None,
|
22
19
|
content: Optional[Union[str, bytes, List[str]]] = None) -> str:
|
23
20
|
with temporary_directory() as tmp_directory_name:
|
24
|
-
tmp_file_name =
|
25
|
-
|
26
|
-
with open(tmp_file_path, "wb" if isinstance(content, bytes) else "w") as tmp_file:
|
21
|
+
tmp_file_name = os.path.join(tmp_directory_name, name or tempfile.mktemp(dir="")) + (suffix or "")
|
22
|
+
with open(tmp_file_name, "wb" if isinstance(content, bytes) else "w") as tmp_file:
|
27
23
|
if content is not None:
|
28
24
|
tmp_file.write("\n".join(content) if isinstance(content, list) else content)
|
29
|
-
yield
|
30
|
-
|
31
|
-
|
32
|
-
def create_temporary_file_name(prefix: Optional[str] = None, suffix: Optional[str] = None) -> str:
|
33
|
-
"""
|
34
|
-
Generates and returns the full path to file within the system temporary directory.
|
35
|
-
"""
|
36
|
-
random_string = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
|
37
|
-
tmp_file_name = f"{prefix or ''}{random_string}{suffix or ''}"
|
38
|
-
return os.path.join(tempfile.gettempdir(), tmp_file_name)
|
39
|
-
|
40
|
-
|
41
|
-
@contextmanager
|
42
|
-
def temporary_random_file(prefix: Optional[str] = None, suffix: Optional[str] = None,
|
43
|
-
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
44
|
-
with temporary_file(prefix=prefix, suffix=suffix) as tmp_file_path:
|
45
|
-
create_random_file(tmp_file_path, nbytes=nbytes, binary=binary, line_length=line_length)
|
46
|
-
yield tmp_file_path
|
25
|
+
yield tmp_file_name
|
47
26
|
|
48
27
|
|
49
28
|
def remove_temporary_directory(tmp_directory_name: str) -> None:
|
50
|
-
"""
|
51
|
-
Removes the given directory, recursively; but ONLY if it is (somewhere) within the system temporary directory.
|
52
|
-
"""
|
53
29
|
def is_temporary_directory(path: str) -> bool:
|
54
30
|
try:
|
55
31
|
tmpdir = tempfile.gettempdir()
|
@@ -58,17 +34,3 @@ def remove_temporary_directory(tmp_directory_name: str) -> None:
|
|
58
34
|
return False
|
59
35
|
if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
|
60
36
|
shutil.rmtree(tmp_directory_name)
|
61
|
-
|
62
|
-
|
63
|
-
def remove_temporary_file(tmp_file_name: str) -> bool:
|
64
|
-
"""
|
65
|
-
Removes the given file; but ONLY if it is (somewhere) within the system temporary directory.
|
66
|
-
"""
|
67
|
-
try:
|
68
|
-
tmpdir = tempfile.gettempdir()
|
69
|
-
if (os.path.commonpath([tmpdir, tmp_file_name]) == tmpdir) and os.path.isfile(tmp_file_name):
|
70
|
-
os.remove(tmp_file_name)
|
71
|
-
return True
|
72
|
-
return False
|
73
|
-
except Exception:
|
74
|
-
return False
|
dcicutils/zip_utils.py
CHANGED
@@ -2,9 +2,7 @@ from contextlib import contextmanager
|
|
2
2
|
from dcicutils.tmpfile_utils import temporary_directory, temporary_file
|
3
3
|
import gzip
|
4
4
|
import os
|
5
|
-
import shutil
|
6
5
|
import tarfile
|
7
|
-
import tempfile
|
8
6
|
from typing import List, Optional
|
9
7
|
import zipfile
|
10
8
|
|
@@ -47,28 +45,3 @@ def unpack_gz_file_to_temporary_file(file: str, suffix: Optional[str] = None) ->
|
|
47
45
|
outputf.write(inputf.read())
|
48
46
|
outputf.close()
|
49
47
|
yield tmp_file_name
|
50
|
-
|
51
|
-
|
52
|
-
def extract_file_from_zip(zip_file: str, file_to_extract: str,
|
53
|
-
destination_file: str, raise_exception: bool = True) -> bool:
|
54
|
-
"""
|
55
|
-
Extracts from the given zip file, the given file to extract, writing it to the
|
56
|
-
given destination file. Returns True if all is well, otherwise False, or if the
|
57
|
-
raise_exception argument is True (the default), then raises and exception on error.
|
58
|
-
"""
|
59
|
-
try:
|
60
|
-
if not (destination_directory := os.path.dirname(destination_file)):
|
61
|
-
destination_directory = os.getcwd()
|
62
|
-
destination_file = os.path.join(destination_directory, destination_file)
|
63
|
-
with tempfile.TemporaryDirectory() as tmp_directory_name:
|
64
|
-
with zipfile.ZipFile(zip_file, "r") as zipf:
|
65
|
-
if file_to_extract not in zipf.namelist():
|
66
|
-
return False
|
67
|
-
zipf.extract(file_to_extract, path=tmp_directory_name)
|
68
|
-
os.makedirs(destination_directory, exist_ok=True)
|
69
|
-
shutil.move(os.path.join(tmp_directory_name, file_to_extract), destination_file)
|
70
|
-
return True
|
71
|
-
except Exception as e:
|
72
|
-
if raise_exception:
|
73
|
-
raise e
|
74
|
-
return False
|
@@ -1,12 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dcicutils
|
3
|
-
Version: 8.
|
3
|
+
Version: 8.9.0.0b0
|
4
4
|
Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
|
5
5
|
Home-page: https://github.com/4dn-dcic/utils
|
6
6
|
License: MIT
|
7
7
|
Author: 4DN-DCIC Team
|
8
8
|
Author-email: support@4dnucleome.org
|
9
|
-
Requires-Python: >=3.8,<3.
|
9
|
+
Requires-Python: >=3.8,<3.12
|
10
10
|
Classifier: Development Status :: 4 - Beta
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
12
|
Classifier: Intended Audience :: Science/Research
|
@@ -24,10 +24,9 @@ Classifier: Programming Language :: Python :: 3.9
|
|
24
24
|
Classifier: Topic :: Database :: Database Engines/Servers
|
25
25
|
Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
|
26
26
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
27
|
-
Requires-Dist: appdirs (>=1.4.4,<2.0.0)
|
28
27
|
Requires-Dist: aws-requests-auth (>=0.4.2,<1)
|
29
|
-
Requires-Dist: boto3 (>=1.
|
30
|
-
Requires-Dist: botocore (>=1.
|
28
|
+
Requires-Dist: boto3 (>=1.28.57,<2.0.0)
|
29
|
+
Requires-Dist: botocore (>=1.31.57,<2.0.0)
|
31
30
|
Requires-Dist: chardet (>=5.2.0,<6.0.0)
|
32
31
|
Requires-Dist: docker (>=4.4.4,<5.0.0)
|
33
32
|
Requires-Dist: elasticsearch (==7.13.4)
|
@@ -43,7 +42,6 @@ Requires-Dist: pytz (>=2020.4)
|
|
43
42
|
Requires-Dist: redis (>=4.5.1,<5.0.0)
|
44
43
|
Requires-Dist: requests (>=2.21.0,<3.0.0)
|
45
44
|
Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
|
46
|
-
Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
|
47
45
|
Requires-Dist: structlog (>=19.2.0,<20.0.0)
|
48
46
|
Requires-Dist: toml (>=0.10.1,<1)
|
49
47
|
Requires-Dist: tqdm (>=4.66.2,<5.0.0)
|
@@ -27,11 +27,10 @@ dcicutils/env_utils_legacy.py,sha256=J81OAtJHN69o1beHO6q1j7_J6TeblSjnAHlS8VA5KSM
|
|
27
27
|
dcicutils/es_utils.py,sha256=ZksLh5ei7kRUfiFltk8sd2ZSfh15twbstrMzBr8HNw4,7541
|
28
28
|
dcicutils/exceptions.py,sha256=4giQGtpak-omQv7BP6Ckeu91XK5fnDosC8gfdmN_ccA,9931
|
29
29
|
dcicutils/ff_mocks.py,sha256=6RKS4eUiu_Wl8yP_8V0CaV75w4ZdWxdCuL1CVlnMrek,36918
|
30
|
-
dcicutils/ff_utils.py,sha256=
|
31
|
-
dcicutils/file_utils.py,sha256=
|
30
|
+
dcicutils/ff_utils.py,sha256=Yf-fET5gdpjrH0gikpOCIJdY2Dv3obzUpR31ur816mU,72972
|
31
|
+
dcicutils/file_utils.py,sha256=098rXvLeIh8n69EGW7DpOS227ef3BPgwhRAktoU6mhE,2663
|
32
32
|
dcicutils/function_cache_decorator.py,sha256=XMyiEGODVr2WoAQ68vcoX_9_Xb9p8pZXdXl7keU8i2g,10026
|
33
33
|
dcicutils/glacier_utils.py,sha256=Q4CVXsZCbP-SoZIsZ5NMcawDfelOLzbQnIlQn-GdlTo,34149
|
34
|
-
dcicutils/http_utils.py,sha256=tNfH5JA-OwbQKEvD5HPJ3lcp2TSIZ4rnl__4d4JO8Gw,1583
|
35
34
|
dcicutils/jh_utils.py,sha256=Gpsxb9XEzggF_-Eq3ukjKvTnuyb9V1SCSUXkXsES4Kg,11502
|
36
35
|
dcicutils/kibana/dashboards.json,sha256=wHMB_mpJ8OaYhRRgvpZuihaB2lmSF64ADt_8hkBWgQg,16225
|
37
36
|
dcicutils/kibana/readme.md,sha256=3KmHF9FH6A6xwYsNxRFLw27q0XzHYnjZOlYUnn3VkQQ,2164
|
@@ -44,7 +43,7 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
|
|
44
43
|
dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
|
45
44
|
dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
|
46
45
|
dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
47
|
-
dcicutils/misc_utils.py,sha256=
|
46
|
+
dcicutils/misc_utils.py,sha256=YH_TTmv6ABWeMERwVvA2-rIfdS-CoPYLXJru9TvWxgM,104610
|
48
47
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
49
48
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
50
49
|
dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
|
@@ -56,7 +55,7 @@ dcicutils/qa_utils.py,sha256=TT0SiJWiuxYvbsIyhK9VO4uV_suxhB6CpuC4qPacCzQ,160208
|
|
56
55
|
dcicutils/redis_tools.py,sha256=qkcSNMtvqkpvts-Cm9gWhneK523Q_oHwhNUud1be1qk,7055
|
57
56
|
dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
|
58
57
|
dcicutils/s3_utils.py,sha256=LauLFQGvZLfpBJ81tYMikjLd3SJRz2R_FrL1n4xSlyI,28868
|
59
|
-
dcicutils/schema_utils.py,sha256=
|
58
|
+
dcicutils/schema_utils.py,sha256=2hOzuGK7F8xZ7JyS7_Lan2wXOlNZezzT2lqgEs3QOe4,10605
|
60
59
|
dcicutils/scripts/publish_to_pypi.py,sha256=LFzNHIQK2EXFr88YcfctyA_WKEBFc1ElnSjWrCXedPM,13889
|
61
60
|
dcicutils/scripts/run_license_checker.py,sha256=z2keYnRDZsHQbTeo1XORAXSXNJK5axVzL5LjiNqZ7jE,4184
|
62
61
|
dcicutils/scripts/view_portal_object.py,sha256=Cy-8GwGJS9EX-5RxE8mjsqNlDT0N6OCpkNffPVkTFQc,26262
|
@@ -68,13 +67,13 @@ dcicutils/structured_data.py,sha256=BQuIMv6OPySsn6YxtXE2Er-zLE2QJuCYhEQ3V0u_UXY,
|
|
68
67
|
dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
|
69
68
|
dcicutils/submitr/ref_lookup_strategy.py,sha256=Js2cVznTmgjciLWBPLCvMiwLIHXjDn3jww-gJPjYuFw,3467
|
70
69
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
71
|
-
dcicutils/tmpfile_utils.py,sha256=
|
70
|
+
dcicutils/tmpfile_utils.py,sha256=n95XF8dZVbQRSXBZTGToXXfSs3JUVRyN6c3ZZ0nhAWI,1403
|
72
71
|
dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
73
72
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
74
73
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
75
|
-
dcicutils/zip_utils.py,sha256=
|
76
|
-
dcicutils-8.
|
77
|
-
dcicutils-8.
|
78
|
-
dcicutils-8.
|
79
|
-
dcicutils-8.
|
80
|
-
dcicutils-8.
|
74
|
+
dcicutils/zip_utils.py,sha256=rnjNv_k6L9jT2SjDSgVXp4BEJYLtz9XN6Cl2Fy-tqnM,2027
|
75
|
+
dcicutils-8.9.0.0b0.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
76
|
+
dcicutils-8.9.0.0b0.dist-info/METADATA,sha256=sHJ_jTCTbZwTy6AoI9BSixIfwZDxdntJvQmTy5keWnI,3356
|
77
|
+
dcicutils-8.9.0.0b0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
78
|
+
dcicutils-8.9.0.0b0.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
79
|
+
dcicutils-8.9.0.0b0.dist-info/RECORD,,
|
dcicutils/http_utils.py
DELETED
@@ -1,39 +0,0 @@
|
|
1
|
-
from contextlib import contextmanager
|
2
|
-
import requests
|
3
|
-
from typing import Callable, Optional
|
4
|
-
from dcicutils.tmpfile_utils import temporary_file
|
5
|
-
|
6
|
-
|
7
|
-
@contextmanager
|
8
|
-
def download(url: str, suffix: Optional[str] = None, binary: bool = True,
|
9
|
-
progress: Optional[Callable] = None) -> Optional[str]:
|
10
|
-
"""
|
11
|
-
Context manager to download the given URL into a temporary file and yields the file
|
12
|
-
path to it. An optional file suffix may be specified for this temporary file name.
|
13
|
-
Defaults to binary file mode; if not desired then pass False as the binary argument.
|
14
|
-
"""
|
15
|
-
with temporary_file(suffix=suffix) as file:
|
16
|
-
download_to(url, file, binary=binary, progress=progress)
|
17
|
-
yield file
|
18
|
-
|
19
|
-
|
20
|
-
def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
|
21
|
-
"""
|
22
|
-
Download the given URL into the given file. Defaults to binary
|
23
|
-
file mode; if not desired then pass False as the binary argument.
|
24
|
-
"""
|
25
|
-
if not callable(progress):
|
26
|
-
progress = None
|
27
|
-
response = requests.get(url, stream=True)
|
28
|
-
if progress:
|
29
|
-
nbytes = 0
|
30
|
-
nbytes_total = None
|
31
|
-
if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
|
32
|
-
nbytes_total = int(content_length)
|
33
|
-
with open(file, "wb" if binary is True else "w") as f:
|
34
|
-
for chunk in response.iter_content(chunk_size=8192):
|
35
|
-
if chunk:
|
36
|
-
f.write(chunk)
|
37
|
-
if progress:
|
38
|
-
nbytes += len(chunk)
|
39
|
-
progress(nbytes, nbytes_total)
|
File without changes
|
File without changes
|
File without changes
|