dcicutils 8.8.4.1b13__py3-none-any.whl → 8.8.4.1b16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/file_utils.py +72 -0
- dcicutils/misc_utils.py +7 -0
- {dcicutils-8.8.4.1b13.dist-info → dcicutils-8.8.4.1b16.dist-info}/METADATA +4 -3
- {dcicutils-8.8.4.1b13.dist-info → dcicutils-8.8.4.1b16.dist-info}/RECORD +7 -7
- {dcicutils-8.8.4.1b13.dist-info → dcicutils-8.8.4.1b16.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.8.4.1b13.dist-info → dcicutils-8.8.4.1b16.dist-info}/WHEEL +0 -0
- {dcicutils-8.8.4.1b13.dist-info → dcicutils-8.8.4.1b16.dist-info}/entry_points.txt +0 -0
dcicutils/file_utils.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
import glob
|
2
|
+
import hashlib
|
3
|
+
import io
|
2
4
|
import os
|
3
5
|
import pathlib
|
4
6
|
from datetime import datetime
|
@@ -103,6 +105,76 @@ def are_files_equal(filea: str, fileb: str) -> bool:
|
|
103
105
|
return False
|
104
106
|
|
105
107
|
|
108
|
+
def compute_file_md5(file: str) -> str:
|
109
|
+
"""
|
110
|
+
Returns the md5 checksum for the given file.
|
111
|
+
"""
|
112
|
+
if not isinstance(file, str):
|
113
|
+
return ""
|
114
|
+
try:
|
115
|
+
md5 = hashlib.md5()
|
116
|
+
with open(file, "rb") as file:
|
117
|
+
for chunk in iter(lambda: file.read(4096), b""):
|
118
|
+
md5.update(chunk)
|
119
|
+
return md5.hexdigest()
|
120
|
+
except Exception:
|
121
|
+
return ""
|
122
|
+
|
123
|
+
|
124
|
+
def compute_file_etag(file: str) -> Optional[str]:
|
125
|
+
"""
|
126
|
+
Returns the AWS S3 "etag" for the given file; this value is md5-like but
|
127
|
+
not the same as a normal md5. We use this to compare that a file in S3
|
128
|
+
appears to be the exact the same file as a local file.
|
129
|
+
"""
|
130
|
+
try:
|
131
|
+
with io.open(file, "rb") as f:
|
132
|
+
return _compute_file_etag(f)
|
133
|
+
except Exception:
|
134
|
+
return None
|
135
|
+
|
136
|
+
|
137
|
+
def _compute_file_etag(f: io.BufferedReader) -> str:
|
138
|
+
# See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
|
139
|
+
MULTIPART_THRESHOLD = 8388608
|
140
|
+
MULTIPART_CHUNKSIZE = 8388608
|
141
|
+
# BUFFER_SIZE = 1048576
|
142
|
+
# Verify some assumptions are correct
|
143
|
+
# assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
|
144
|
+
# assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
|
145
|
+
# assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
|
146
|
+
hash = hashlib.md5()
|
147
|
+
read = 0
|
148
|
+
chunks = None
|
149
|
+
while True:
|
150
|
+
# Read some from stdin, if we're at the end, stop reading
|
151
|
+
bits = f.read(1048576)
|
152
|
+
if len(bits) == 0:
|
153
|
+
break
|
154
|
+
read += len(bits)
|
155
|
+
hash.update(bits)
|
156
|
+
if chunks is None:
|
157
|
+
# We're handling a multi-part upload, so switch to calculating
|
158
|
+
# hashes of each chunk
|
159
|
+
if read >= MULTIPART_THRESHOLD:
|
160
|
+
chunks = b''
|
161
|
+
if chunks is not None:
|
162
|
+
if (read % MULTIPART_CHUNKSIZE) == 0:
|
163
|
+
# Dont with a chunk, add it to the list of hashes to hash later
|
164
|
+
chunks += hash.digest()
|
165
|
+
hash = hashlib.md5()
|
166
|
+
if chunks is None:
|
167
|
+
# Normal upload, just output the MD5 hash
|
168
|
+
etag = hash.hexdigest()
|
169
|
+
else:
|
170
|
+
# Multipart upload, need to output the hash of the hashes
|
171
|
+
if (read % MULTIPART_CHUNKSIZE) != 0:
|
172
|
+
# Add the last part if we have a partial chunk
|
173
|
+
chunks += hash.digest()
|
174
|
+
etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
|
175
|
+
return etag
|
176
|
+
|
177
|
+
|
106
178
|
def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
107
179
|
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
108
180
|
"""
|
dcicutils/misc_utils.py
CHANGED
@@ -19,6 +19,7 @@ import pytz
|
|
19
19
|
import re
|
20
20
|
import rfc3986.validators
|
21
21
|
import rfc3986.exceptions
|
22
|
+
import shortuuid
|
22
23
|
import time
|
23
24
|
import uuid
|
24
25
|
import warnings
|
@@ -2698,3 +2699,9 @@ def get_cpu_architecture_name() -> str:
|
|
2698
2699
|
if os_architecture_name == "x86_64": return "amd64" # noqa
|
2699
2700
|
return os_architecture_name
|
2700
2701
|
return ""
|
2702
|
+
|
2703
|
+
|
2704
|
+
def short_uuid(length: Optional[int] = None):
|
2705
|
+
if (length is None) or (not isinstance(length, int)) or (length < 1):
|
2706
|
+
length = 16
|
2707
|
+
return shortuuid.ShortUUID().random(length=length)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dcicutils
|
3
|
-
Version: 8.8.4.
|
3
|
+
Version: 8.8.4.1b16
|
4
4
|
Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
|
5
5
|
Home-page: https://github.com/4dn-dcic/utils
|
6
6
|
License: MIT
|
@@ -26,8 +26,8 @@ Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
|
|
26
26
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
27
27
|
Requires-Dist: appdirs (>=1.4.4,<2.0.0)
|
28
28
|
Requires-Dist: aws-requests-auth (>=0.4.2,<1)
|
29
|
-
Requires-Dist: boto3 (>=1.34.
|
30
|
-
Requires-Dist: botocore (>=1.34.
|
29
|
+
Requires-Dist: boto3 (>=1.34.93,<2.0.0)
|
30
|
+
Requires-Dist: botocore (>=1.34.93,<2.0.0)
|
31
31
|
Requires-Dist: chardet (>=5.2.0,<6.0.0)
|
32
32
|
Requires-Dist: docker (>=4.4.4,<5.0.0)
|
33
33
|
Requires-Dist: elasticsearch (==7.13.4)
|
@@ -43,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
|
|
43
43
|
Requires-Dist: redis (>=4.5.1,<5.0.0)
|
44
44
|
Requires-Dist: requests (>=2.21.0,<3.0.0)
|
45
45
|
Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
|
46
|
+
Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
|
46
47
|
Requires-Dist: structlog (>=19.2.0,<20.0.0)
|
47
48
|
Requires-Dist: toml (>=0.10.1,<1)
|
48
49
|
Requires-Dist: tqdm (>=4.66.2,<5.0.0)
|
@@ -28,7 +28,7 @@ dcicutils/es_utils.py,sha256=ZksLh5ei7kRUfiFltk8sd2ZSfh15twbstrMzBr8HNw4,7541
|
|
28
28
|
dcicutils/exceptions.py,sha256=4giQGtpak-omQv7BP6Ckeu91XK5fnDosC8gfdmN_ccA,9931
|
29
29
|
dcicutils/ff_mocks.py,sha256=6RKS4eUiu_Wl8yP_8V0CaV75w4ZdWxdCuL1CVlnMrek,36918
|
30
30
|
dcicutils/ff_utils.py,sha256=oIhuZPnGtfwj6bWyCc1u23JbMB_6InPp01ZqUOljd8M,73123
|
31
|
-
dcicutils/file_utils.py,sha256=
|
31
|
+
dcicutils/file_utils.py,sha256=msxA3fFTtK09Qc_I3-r9Y5Pp5WVJRPPpLlFYv3Rju-E,8697
|
32
32
|
dcicutils/function_cache_decorator.py,sha256=XMyiEGODVr2WoAQ68vcoX_9_Xb9p8pZXdXl7keU8i2g,10026
|
33
33
|
dcicutils/glacier_utils.py,sha256=Q4CVXsZCbP-SoZIsZ5NMcawDfelOLzbQnIlQn-GdlTo,34149
|
34
34
|
dcicutils/http_utils.py,sha256=RB0x9hRMZM9Xd1x00c5J0iUzUdYzIQR0XKFiQ94HWO0,807
|
@@ -44,7 +44,7 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
|
|
44
44
|
dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
|
45
45
|
dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
|
46
46
|
dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
47
|
-
dcicutils/misc_utils.py,sha256=
|
47
|
+
dcicutils/misc_utils.py,sha256=eVZ3lEkDebweKCeza2GIo7x3qEqqkj61Ilr17eMFlR0,105744
|
48
48
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
49
49
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
50
50
|
dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
|
@@ -73,8 +73,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
|
73
73
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
74
74
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
75
75
|
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
76
|
-
dcicutils-8.8.4.
|
77
|
-
dcicutils-8.8.4.
|
78
|
-
dcicutils-8.8.4.
|
79
|
-
dcicutils-8.8.4.
|
80
|
-
dcicutils-8.8.4.
|
76
|
+
dcicutils-8.8.4.1b16.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
77
|
+
dcicutils-8.8.4.1b16.dist-info/METADATA,sha256=7R_Eatzjy4Ez8_JufgAKLIQ_O6z0fVvjqHzLEwfE9O0,3440
|
78
|
+
dcicutils-8.8.4.1b16.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
79
|
+
dcicutils-8.8.4.1b16.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
|
80
|
+
dcicutils-8.8.4.1b16.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|