dcicutils 8.8.4.1b15__py3-none-any.whl → 8.8.4.1b17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/file_utils.py +72 -0
- dcicutils/http_utils.py +13 -2
- {dcicutils-8.8.4.1b15.dist-info → dcicutils-8.8.4.1b17.dist-info}/METADATA +1 -1
- {dcicutils-8.8.4.1b15.dist-info → dcicutils-8.8.4.1b17.dist-info}/RECORD +7 -7
- {dcicutils-8.8.4.1b15.dist-info → dcicutils-8.8.4.1b17.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.8.4.1b15.dist-info → dcicutils-8.8.4.1b17.dist-info}/WHEEL +0 -0
- {dcicutils-8.8.4.1b15.dist-info → dcicutils-8.8.4.1b17.dist-info}/entry_points.txt +0 -0
dcicutils/file_utils.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
import glob
|
2
|
+
import hashlib
|
3
|
+
import io
|
2
4
|
import os
|
3
5
|
import pathlib
|
4
6
|
from datetime import datetime
|
@@ -103,6 +105,76 @@ def are_files_equal(filea: str, fileb: str) -> bool:
|
|
103
105
|
return False
|
104
106
|
|
105
107
|
|
108
|
+
def compute_file_md5(file: str) -> str:
|
109
|
+
"""
|
110
|
+
Returns the md5 checksum for the given file.
|
111
|
+
"""
|
112
|
+
if not isinstance(file, str):
|
113
|
+
return ""
|
114
|
+
try:
|
115
|
+
md5 = hashlib.md5()
|
116
|
+
with open(file, "rb") as file:
|
117
|
+
for chunk in iter(lambda: file.read(4096), b""):
|
118
|
+
md5.update(chunk)
|
119
|
+
return md5.hexdigest()
|
120
|
+
except Exception:
|
121
|
+
return ""
|
122
|
+
|
123
|
+
|
124
|
+
def compute_file_etag(file: str) -> Optional[str]:
|
125
|
+
"""
|
126
|
+
Returns the AWS S3 "etag" for the given file; this value is md5-like but
|
127
|
+
not the same as a normal md5. We use this to compare that a file in S3
|
128
|
+
appears to be the exact the same file as a local file.
|
129
|
+
"""
|
130
|
+
try:
|
131
|
+
with io.open(file, "rb") as f:
|
132
|
+
return _compute_file_etag(f)
|
133
|
+
except Exception:
|
134
|
+
return None
|
135
|
+
|
136
|
+
|
137
|
+
def _compute_file_etag(f: io.BufferedReader) -> str:
|
138
|
+
# See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
|
139
|
+
MULTIPART_THRESHOLD = 8388608
|
140
|
+
MULTIPART_CHUNKSIZE = 8388608
|
141
|
+
# BUFFER_SIZE = 1048576
|
142
|
+
# Verify some assumptions are correct
|
143
|
+
# assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
|
144
|
+
# assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
|
145
|
+
# assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
|
146
|
+
hash = hashlib.md5()
|
147
|
+
read = 0
|
148
|
+
chunks = None
|
149
|
+
while True:
|
150
|
+
# Read some from stdin, if we're at the end, stop reading
|
151
|
+
bits = f.read(1048576)
|
152
|
+
if len(bits) == 0:
|
153
|
+
break
|
154
|
+
read += len(bits)
|
155
|
+
hash.update(bits)
|
156
|
+
if chunks is None:
|
157
|
+
# We're handling a multi-part upload, so switch to calculating
|
158
|
+
# hashes of each chunk
|
159
|
+
if read >= MULTIPART_THRESHOLD:
|
160
|
+
chunks = b''
|
161
|
+
if chunks is not None:
|
162
|
+
if (read % MULTIPART_CHUNKSIZE) == 0:
|
163
|
+
# Dont with a chunk, add it to the list of hashes to hash later
|
164
|
+
chunks += hash.digest()
|
165
|
+
hash = hashlib.md5()
|
166
|
+
if chunks is None:
|
167
|
+
# Normal upload, just output the MD5 hash
|
168
|
+
etag = hash.hexdigest()
|
169
|
+
else:
|
170
|
+
# Multipart upload, need to output the hash of the hashes
|
171
|
+
if (read % MULTIPART_CHUNKSIZE) != 0:
|
172
|
+
# Add the last part if we have a partial chunk
|
173
|
+
chunks += hash.digest()
|
174
|
+
etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
|
175
|
+
return etag
|
176
|
+
|
177
|
+
|
106
178
|
def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
107
179
|
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
108
180
|
"""
|
dcicutils/http_utils.py
CHANGED
@@ -1,20 +1,31 @@
|
|
1
1
|
from contextlib import contextmanager
|
2
2
|
import requests
|
3
|
-
from typing import Optional
|
3
|
+
from typing import Callable, Optional
|
4
4
|
from dcicutils.tmpfile_utils import temporary_file
|
5
5
|
|
6
6
|
|
7
7
|
@contextmanager
|
8
|
-
def download(url: str, suffix: Optional[str] = None, binary: bool = True
|
8
|
+
def download(url: str, suffix: Optional[str] = None, binary: bool = True,
|
9
|
+
progress: Optional[Callable] = None) -> Optional[str]:
|
9
10
|
"""
|
10
11
|
Context manager to ownload the given URL into a temporary file and yields the file
|
11
12
|
path to it. An optional file suffix may be specified. Defaults to binary file mode;
|
12
13
|
if this is not desired then pass False as the binary argument.
|
13
14
|
"""
|
15
|
+
if not callable(progress):
|
16
|
+
progress = None
|
14
17
|
with temporary_file(suffix=suffix) as file:
|
15
18
|
response = requests.get(url, stream=True)
|
19
|
+
if progress:
|
20
|
+
nbytes = 0
|
21
|
+
nbytes_total = None
|
22
|
+
if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
|
23
|
+
nbytes_total = int(content_length)
|
16
24
|
with open(file, "wb" if binary is True else "w") as f:
|
17
25
|
for chunk in response.iter_content(chunk_size=8192):
|
18
26
|
if chunk:
|
19
27
|
f.write(chunk)
|
28
|
+
if progress:
|
29
|
+
nbytes += len(chunk)
|
30
|
+
progress(nbytes, nbytes_total)
|
20
31
|
yield file
|
@@ -28,10 +28,10 @@ dcicutils/es_utils.py,sha256=ZksLh5ei7kRUfiFltk8sd2ZSfh15twbstrMzBr8HNw4,7541
|
|
28
28
|
dcicutils/exceptions.py,sha256=4giQGtpak-omQv7BP6Ckeu91XK5fnDosC8gfdmN_ccA,9931
|
29
29
|
dcicutils/ff_mocks.py,sha256=6RKS4eUiu_Wl8yP_8V0CaV75w4ZdWxdCuL1CVlnMrek,36918
|
30
30
|
dcicutils/ff_utils.py,sha256=oIhuZPnGtfwj6bWyCc1u23JbMB_6InPp01ZqUOljd8M,73123
|
31
|
-
dcicutils/file_utils.py,sha256=
|
31
|
+
dcicutils/file_utils.py,sha256=msxA3fFTtK09Qc_I3-r9Y5Pp5WVJRPPpLlFYv3Rju-E,8697
|
32
32
|
dcicutils/function_cache_decorator.py,sha256=XMyiEGODVr2WoAQ68vcoX_9_Xb9p8pZXdXl7keU8i2g,10026
|
33
33
|
dcicutils/glacier_utils.py,sha256=Q4CVXsZCbP-SoZIsZ5NMcawDfelOLzbQnIlQn-GdlTo,34149
|
34
|
-
dcicutils/http_utils.py,sha256=
|
34
|
+
dcicutils/http_utils.py,sha256=Je5ErNjR5e6lfSXGRncK_lcR_-zP38PIpmHjApy9Wi4,1289
|
35
35
|
dcicutils/jh_utils.py,sha256=Gpsxb9XEzggF_-Eq3ukjKvTnuyb9V1SCSUXkXsES4Kg,11502
|
36
36
|
dcicutils/kibana/dashboards.json,sha256=wHMB_mpJ8OaYhRRgvpZuihaB2lmSF64ADt_8hkBWgQg,16225
|
37
37
|
dcicutils/kibana/readme.md,sha256=3KmHF9FH6A6xwYsNxRFLw27q0XzHYnjZOlYUnn3VkQQ,2164
|
@@ -73,8 +73,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
|
73
73
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
74
74
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
75
75
|
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
76
|
-
dcicutils-8.8.4.
|
77
|
-
dcicutils-8.8.4.
|
78
|
-
dcicutils-8.8.4.
|
79
|
-
dcicutils-8.8.4.
|
80
|
-
dcicutils-8.8.4.
|
76
|
+
dcicutils-8.8.4.1b17.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
77
|
+
dcicutils-8.8.4.1b17.dist-info/METADATA,sha256=KLVzlNXL2JHkY6n3bL-fbnoVa1kNKAnIbK7bRluuxlM,3440
|
78
|
+
dcicutils-8.8.4.1b17.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
79
|
+
dcicutils-8.8.4.1b17.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
80
|
+
dcicutils-8.8.4.1b17.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|