dcicutils 8.8.4.1b13__py3-none-any.whl → 8.8.4.1b16__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- dcicutils/file_utils.py +72 -0
- dcicutils/misc_utils.py +7 -0
- {dcicutils-8.8.4.1b13.dist-info → dcicutils-8.8.4.1b16.dist-info}/METADATA +4 -3
- {dcicutils-8.8.4.1b13.dist-info → dcicutils-8.8.4.1b16.dist-info}/RECORD +7 -7
- {dcicutils-8.8.4.1b13.dist-info → dcicutils-8.8.4.1b16.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.8.4.1b13.dist-info → dcicutils-8.8.4.1b16.dist-info}/WHEEL +0 -0
- {dcicutils-8.8.4.1b13.dist-info → dcicutils-8.8.4.1b16.dist-info}/entry_points.txt +0 -0
dcicutils/file_utils.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
import glob
|
2
|
+
import hashlib
|
3
|
+
import io
|
2
4
|
import os
|
3
5
|
import pathlib
|
4
6
|
from datetime import datetime
|
@@ -103,6 +105,76 @@ def are_files_equal(filea: str, fileb: str) -> bool:
|
|
103
105
|
return False
|
104
106
|
|
105
107
|
|
108
|
+
def compute_file_md5(file: str) -> str:
|
109
|
+
"""
|
110
|
+
Returns the md5 checksum for the given file.
|
111
|
+
"""
|
112
|
+
if not isinstance(file, str):
|
113
|
+
return ""
|
114
|
+
try:
|
115
|
+
md5 = hashlib.md5()
|
116
|
+
with open(file, "rb") as file:
|
117
|
+
for chunk in iter(lambda: file.read(4096), b""):
|
118
|
+
md5.update(chunk)
|
119
|
+
return md5.hexdigest()
|
120
|
+
except Exception:
|
121
|
+
return ""
|
122
|
+
|
123
|
+
|
124
|
+
def compute_file_etag(file: str) -> Optional[str]:
|
125
|
+
"""
|
126
|
+
Returns the AWS S3 "etag" for the given file; this value is md5-like but
|
127
|
+
not the same as a normal md5. We use this to compare that a file in S3
|
128
|
+
appears to be the exact the same file as a local file.
|
129
|
+
"""
|
130
|
+
try:
|
131
|
+
with io.open(file, "rb") as f:
|
132
|
+
return _compute_file_etag(f)
|
133
|
+
except Exception:
|
134
|
+
return None
|
135
|
+
|
136
|
+
|
137
|
+
def _compute_file_etag(f: io.BufferedReader) -> str:
|
138
|
+
# See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
|
139
|
+
MULTIPART_THRESHOLD = 8388608
|
140
|
+
MULTIPART_CHUNKSIZE = 8388608
|
141
|
+
# BUFFER_SIZE = 1048576
|
142
|
+
# Verify some assumptions are correct
|
143
|
+
# assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
|
144
|
+
# assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
|
145
|
+
# assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
|
146
|
+
hash = hashlib.md5()
|
147
|
+
read = 0
|
148
|
+
chunks = None
|
149
|
+
while True:
|
150
|
+
# Read some from stdin, if we're at the end, stop reading
|
151
|
+
bits = f.read(1048576)
|
152
|
+
if len(bits) == 0:
|
153
|
+
break
|
154
|
+
read += len(bits)
|
155
|
+
hash.update(bits)
|
156
|
+
if chunks is None:
|
157
|
+
# We're handling a multi-part upload, so switch to calculating
|
158
|
+
# hashes of each chunk
|
159
|
+
if read >= MULTIPART_THRESHOLD:
|
160
|
+
chunks = b''
|
161
|
+
if chunks is not None:
|
162
|
+
if (read % MULTIPART_CHUNKSIZE) == 0:
|
163
|
+
# Dont with a chunk, add it to the list of hashes to hash later
|
164
|
+
chunks += hash.digest()
|
165
|
+
hash = hashlib.md5()
|
166
|
+
if chunks is None:
|
167
|
+
# Normal upload, just output the MD5 hash
|
168
|
+
etag = hash.hexdigest()
|
169
|
+
else:
|
170
|
+
# Multipart upload, need to output the hash of the hashes
|
171
|
+
if (read % MULTIPART_CHUNKSIZE) != 0:
|
172
|
+
# Add the last part if we have a partial chunk
|
173
|
+
chunks += hash.digest()
|
174
|
+
etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
|
175
|
+
return etag
|
176
|
+
|
177
|
+
|
106
178
|
def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
107
179
|
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
108
180
|
"""
|
dcicutils/misc_utils.py
CHANGED
@@ -19,6 +19,7 @@ import pytz
|
|
19
19
|
import re
|
20
20
|
import rfc3986.validators
|
21
21
|
import rfc3986.exceptions
|
22
|
+
import shortuuid
|
22
23
|
import time
|
23
24
|
import uuid
|
24
25
|
import warnings
|
@@ -2698,3 +2699,9 @@ def get_cpu_architecture_name() -> str:
|
|
2698
2699
|
if os_architecture_name == "x86_64": return "amd64" # noqa
|
2699
2700
|
return os_architecture_name
|
2700
2701
|
return ""
|
2702
|
+
|
2703
|
+
|
2704
|
+
def short_uuid(length: Optional[int] = None):
|
2705
|
+
if (length is None) or (not isinstance(length, int)) or (length < 1):
|
2706
|
+
length = 16
|
2707
|
+
return shortuuid.ShortUUID().random(length=length)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dcicutils
|
3
|
-
Version: 8.8.4.
|
3
|
+
Version: 8.8.4.1b16
|
4
4
|
Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
|
5
5
|
Home-page: https://github.com/4dn-dcic/utils
|
6
6
|
License: MIT
|
@@ -26,8 +26,8 @@ Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
|
|
26
26
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
27
27
|
Requires-Dist: appdirs (>=1.4.4,<2.0.0)
|
28
28
|
Requires-Dist: aws-requests-auth (>=0.4.2,<1)
|
29
|
-
Requires-Dist: boto3 (>=1.34.
|
30
|
-
Requires-Dist: botocore (>=1.34.
|
29
|
+
Requires-Dist: boto3 (>=1.34.93,<2.0.0)
|
30
|
+
Requires-Dist: botocore (>=1.34.93,<2.0.0)
|
31
31
|
Requires-Dist: chardet (>=5.2.0,<6.0.0)
|
32
32
|
Requires-Dist: docker (>=4.4.4,<5.0.0)
|
33
33
|
Requires-Dist: elasticsearch (==7.13.4)
|
@@ -43,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
|
|
43
43
|
Requires-Dist: redis (>=4.5.1,<5.0.0)
|
44
44
|
Requires-Dist: requests (>=2.21.0,<3.0.0)
|
45
45
|
Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
|
46
|
+
Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
|
46
47
|
Requires-Dist: structlog (>=19.2.0,<20.0.0)
|
47
48
|
Requires-Dist: toml (>=0.10.1,<1)
|
48
49
|
Requires-Dist: tqdm (>=4.66.2,<5.0.0)
|
@@ -28,7 +28,7 @@ dcicutils/es_utils.py,sha256=ZksLh5ei7kRUfiFltk8sd2ZSfh15twbstrMzBr8HNw4,7541
|
|
28
28
|
dcicutils/exceptions.py,sha256=4giQGtpak-omQv7BP6Ckeu91XK5fnDosC8gfdmN_ccA,9931
|
29
29
|
dcicutils/ff_mocks.py,sha256=6RKS4eUiu_Wl8yP_8V0CaV75w4ZdWxdCuL1CVlnMrek,36918
|
30
30
|
dcicutils/ff_utils.py,sha256=oIhuZPnGtfwj6bWyCc1u23JbMB_6InPp01ZqUOljd8M,73123
|
31
|
-
dcicutils/file_utils.py,sha256=
|
31
|
+
dcicutils/file_utils.py,sha256=msxA3fFTtK09Qc_I3-r9Y5Pp5WVJRPPpLlFYv3Rju-E,8697
|
32
32
|
dcicutils/function_cache_decorator.py,sha256=XMyiEGODVr2WoAQ68vcoX_9_Xb9p8pZXdXl7keU8i2g,10026
|
33
33
|
dcicutils/glacier_utils.py,sha256=Q4CVXsZCbP-SoZIsZ5NMcawDfelOLzbQnIlQn-GdlTo,34149
|
34
34
|
dcicutils/http_utils.py,sha256=RB0x9hRMZM9Xd1x00c5J0iUzUdYzIQR0XKFiQ94HWO0,807
|
@@ -44,7 +44,7 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
|
|
44
44
|
dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
|
45
45
|
dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
|
46
46
|
dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
47
|
-
dcicutils/misc_utils.py,sha256=
|
47
|
+
dcicutils/misc_utils.py,sha256=eVZ3lEkDebweKCeza2GIo7x3qEqqkj61Ilr17eMFlR0,105744
|
48
48
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
49
49
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
50
50
|
dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
|
@@ -73,8 +73,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
|
73
73
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
74
74
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
75
75
|
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
76
|
-
dcicutils-8.8.4.
|
77
|
-
dcicutils-8.8.4.
|
78
|
-
dcicutils-8.8.4.
|
79
|
-
dcicutils-8.8.4.
|
80
|
-
dcicutils-8.8.4.
|
76
|
+
dcicutils-8.8.4.1b16.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
77
|
+
dcicutils-8.8.4.1b16.dist-info/METADATA,sha256=7R_Eatzjy4Ez8_JufgAKLIQ_O6z0fVvjqHzLEwfE9O0,3440
|
78
|
+
dcicutils-8.8.4.1b16.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
79
|
+
dcicutils-8.8.4.1b16.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
80
|
+
dcicutils-8.8.4.1b16.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|