dcicutils 8.8.4.1b13__py3-none-any.whl → 8.8.4.1b16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dcicutils/file_utils.py CHANGED
@@ -1,4 +1,6 @@
1
1
  import glob
2
+ import hashlib
3
+ import io
2
4
  import os
3
5
  import pathlib
4
6
  from datetime import datetime
@@ -103,6 +105,76 @@ def are_files_equal(filea: str, fileb: str) -> bool:
103
105
  return False
104
106
 
105
107
 
108
+ def compute_file_md5(file: str) -> str:
109
+ """
110
+ Returns the md5 checksum for the given file.
111
+ """
112
+ if not isinstance(file, str):
113
+ return ""
114
+ try:
115
+ md5 = hashlib.md5()
116
+ with open(file, "rb") as file:
117
+ for chunk in iter(lambda: file.read(4096), b""):
118
+ md5.update(chunk)
119
+ return md5.hexdigest()
120
+ except Exception:
121
+ return ""
122
+
123
+
124
+ def compute_file_etag(file: str) -> Optional[str]:
125
+ """
126
+ Returns the AWS S3 "etag" for the given file; this value is md5-like but
127
+ not the same as a normal md5. We use this to compare that a file in S3
128
+ appears to be the exact the same file as a local file.
129
+ """
130
+ try:
131
+ with io.open(file, "rb") as f:
132
+ return _compute_file_etag(f)
133
+ except Exception:
134
+ return None
135
+
136
+
137
+ def _compute_file_etag(f: io.BufferedReader) -> str:
138
+ # See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
139
+ MULTIPART_THRESHOLD = 8388608
140
+ MULTIPART_CHUNKSIZE = 8388608
141
+ # BUFFER_SIZE = 1048576
142
+ # Verify some assumptions are correct
143
+ # assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
144
+ # assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
145
+ # assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
146
+ hash = hashlib.md5()
147
+ read = 0
148
+ chunks = None
149
+ while True:
150
+ # Read some from stdin, if we're at the end, stop reading
151
+ bits = f.read(1048576)
152
+ if len(bits) == 0:
153
+ break
154
+ read += len(bits)
155
+ hash.update(bits)
156
+ if chunks is None:
157
+ # We're handling a multi-part upload, so switch to calculating
158
+ # hashes of each chunk
159
+ if read >= MULTIPART_THRESHOLD:
160
+ chunks = b''
161
+ if chunks is not None:
162
+ if (read % MULTIPART_CHUNKSIZE) == 0:
163
+ # Dont with a chunk, add it to the list of hashes to hash later
164
+ chunks += hash.digest()
165
+ hash = hashlib.md5()
166
+ if chunks is None:
167
+ # Normal upload, just output the MD5 hash
168
+ etag = hash.hexdigest()
169
+ else:
170
+ # Multipart upload, need to output the hash of the hashes
171
+ if (read % MULTIPART_CHUNKSIZE) != 0:
172
+ # Add the last part if we have a partial chunk
173
+ chunks += hash.digest()
174
+ etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
175
+ return etag
176
+
177
+
106
178
  def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
107
179
  nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
108
180
  """
dcicutils/misc_utils.py CHANGED
@@ -19,6 +19,7 @@ import pytz
19
19
  import re
20
20
  import rfc3986.validators
21
21
  import rfc3986.exceptions
22
+ import shortuuid
22
23
  import time
23
24
  import uuid
24
25
  import warnings
@@ -2698,3 +2699,9 @@ def get_cpu_architecture_name() -> str:
2698
2699
  if os_architecture_name == "x86_64": return "amd64" # noqa
2699
2700
  return os_architecture_name
2700
2701
  return ""
2702
+
2703
+
2704
+ def short_uuid(length: Optional[int] = None):
2705
+ if (length is None) or (not isinstance(length, int)) or (length < 1):
2706
+ length = 16
2707
+ return shortuuid.ShortUUID().random(length=length)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.4.1b13
3
+ Version: 8.8.4.1b16
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -26,8 +26,8 @@ Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
26
26
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
27
27
  Requires-Dist: appdirs (>=1.4.4,<2.0.0)
28
28
  Requires-Dist: aws-requests-auth (>=0.4.2,<1)
29
- Requires-Dist: boto3 (>=1.34.90,<2.0.0)
30
- Requires-Dist: botocore (>=1.34.90,<2.0.0)
29
+ Requires-Dist: boto3 (>=1.34.93,<2.0.0)
30
+ Requires-Dist: botocore (>=1.34.93,<2.0.0)
31
31
  Requires-Dist: chardet (>=5.2.0,<6.0.0)
32
32
  Requires-Dist: docker (>=4.4.4,<5.0.0)
33
33
  Requires-Dist: elasticsearch (==7.13.4)
@@ -43,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
43
43
  Requires-Dist: redis (>=4.5.1,<5.0.0)
44
44
  Requires-Dist: requests (>=2.21.0,<3.0.0)
45
45
  Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
46
+ Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
46
47
  Requires-Dist: structlog (>=19.2.0,<20.0.0)
47
48
  Requires-Dist: toml (>=0.10.1,<1)
48
49
  Requires-Dist: tqdm (>=4.66.2,<5.0.0)
@@ -28,7 +28,7 @@ dcicutils/es_utils.py,sha256=ZksLh5ei7kRUfiFltk8sd2ZSfh15twbstrMzBr8HNw4,7541
28
28
  dcicutils/exceptions.py,sha256=4giQGtpak-omQv7BP6Ckeu91XK5fnDosC8gfdmN_ccA,9931
29
29
  dcicutils/ff_mocks.py,sha256=6RKS4eUiu_Wl8yP_8V0CaV75w4ZdWxdCuL1CVlnMrek,36918
30
30
  dcicutils/ff_utils.py,sha256=oIhuZPnGtfwj6bWyCc1u23JbMB_6InPp01ZqUOljd8M,73123
31
- dcicutils/file_utils.py,sha256=vPPFiGK5scq7opVQ_Ne8eeTAjy5OtSxyAgeJUievygQ,6283
31
+ dcicutils/file_utils.py,sha256=msxA3fFTtK09Qc_I3-r9Y5Pp5WVJRPPpLlFYv3Rju-E,8697
32
32
  dcicutils/function_cache_decorator.py,sha256=XMyiEGODVr2WoAQ68vcoX_9_Xb9p8pZXdXl7keU8i2g,10026
33
33
  dcicutils/glacier_utils.py,sha256=Q4CVXsZCbP-SoZIsZ5NMcawDfelOLzbQnIlQn-GdlTo,34149
34
34
  dcicutils/http_utils.py,sha256=RB0x9hRMZM9Xd1x00c5J0iUzUdYzIQR0XKFiQ94HWO0,807
@@ -44,7 +44,7 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
44
44
  dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
45
45
  dcicutils/license_utils.py,sha256=d1cq6iwv5Ju-VjdoINi6q7CPNNL7Oz6rcJdLMY38RX0,46978
46
46
  dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
47
- dcicutils/misc_utils.py,sha256=Nw47AZs-cSOzGp5TZWrQZftePcahz1yfw6iNwOzUt-s,105530
47
+ dcicutils/misc_utils.py,sha256=eVZ3lEkDebweKCeza2GIo7x3qEqqkj61Ilr17eMFlR0,105744
48
48
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
49
49
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
50
50
  dcicutils/portal_object_utils.py,sha256=gDXRgPsRvqCFwbC8WatsuflAxNiigOnqr0Hi93k3AgE,15422
@@ -73,8 +73,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
73
73
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
74
74
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
75
75
  dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
76
- dcicutils-8.8.4.1b13.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
77
- dcicutils-8.8.4.1b13.dist-info/METADATA,sha256=gPRDYcqVxkYQNNEWUBaES-znrDZQhf-BT2Ie5ooqFwU,3397
78
- dcicutils-8.8.4.1b13.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
79
- dcicutils-8.8.4.1b13.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
80
- dcicutils-8.8.4.1b13.dist-info/RECORD,,
76
+ dcicutils-8.8.4.1b16.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
77
+ dcicutils-8.8.4.1b16.dist-info/METADATA,sha256=7R_Eatzjy4Ez8_JufgAKLIQ_O6z0fVvjqHzLEwfE9O0,3440
78
+ dcicutils-8.8.4.1b16.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
79
+ dcicutils-8.8.4.1b16.dist-info/entry_points.txt,sha256=51Q4F_2V10L0282W7HFjP4jdzW4K8lnWDARJQVFy_hw,270
80
+ dcicutils-8.8.4.1b16.dist-info/RECORD,,