dcicutils 8.8.4.1b15__tar.gz → 8.8.4.1b18__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/PKG-INFO +1 -1
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/file_utils.py +122 -37
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/http_utils.py +13 -2
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/pyproject.toml +1 -1
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/LICENSE.txt +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/README.rst +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/__init__.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/base.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/beanstalk_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/bundle_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/captured_output.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/cloudformation_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/codebuild_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/command_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/common.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/contribution_scripts.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/contribution_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/creds_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/data_readers.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/data_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/datetime_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/deployment_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/diff_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/docker_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ecr_scripts.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ecr_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ecs_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/env_base.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/env_manager.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/env_scripts.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/env_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/env_utils_legacy.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/es_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/exceptions.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ff_mocks.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ff_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/function_cache_decorator.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/glacier_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/jh_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/kibana/dashboards.json +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/kibana/readme.md +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/lang_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/log_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/misc_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/obfuscation_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/opensearch_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/portal_object_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/portal_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/progress_bar.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/project_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/qa_checkers.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/qa_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/redis_tools.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/redis_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/s3_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/schema_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/scripts/publish_to_pypi.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/scripts/run_license_checker.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/scripts/view_portal_object.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/secrets_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/sheet_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/snapshot_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/ssl_certificate_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/structured_data.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/submitr/progress_constants.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/submitr/ref_lookup_strategy.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/task_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/tmpfile_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/trace_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/validation_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/variant_utils.py +0 -0
- {dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/zip_utils.py +0 -0
@@ -1,4 +1,6 @@
|
|
1
1
|
import glob
|
2
|
+
import hashlib
|
3
|
+
import io
|
2
4
|
import os
|
3
5
|
import pathlib
|
4
6
|
from datetime import datetime
|
@@ -21,46 +23,59 @@ def search_for_file(file: str,
|
|
21
23
|
first file which is found is returns (as a string), or None if none; if the single flag
|
22
24
|
is False, then all matched files are returned in a list, or and empty list if none.
|
23
25
|
"""
|
24
|
-
if file and isinstance(file, (str, pathlib.PosixPath)):
|
25
|
-
if
|
26
|
-
|
27
|
-
|
28
|
-
return
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
26
|
+
if not (file and isinstance(file, (str, pathlib.PosixPath))):
|
27
|
+
return None if single is True else []
|
28
|
+
if os.path.isabs(file):
|
29
|
+
if os.path.exists(file):
|
30
|
+
return file if single is True else [file]
|
31
|
+
return None if single is True else []
|
32
|
+
files_found = []
|
33
|
+
if not location:
|
34
|
+
location = ["."]
|
35
|
+
elif isinstance(location, (str, pathlib.PosixPath)):
|
36
|
+
location = [location]
|
37
|
+
elif not isinstance(location, list):
|
38
|
+
location = []
|
39
|
+
location_pruned = []
|
40
|
+
for directory in location:
|
41
|
+
if not isinstance(directory, str):
|
42
|
+
if not isinstance(directory, pathlib.PosixPath):
|
43
|
+
continue
|
44
|
+
directory = str(directory)
|
45
|
+
if not (directory := directory.strip()):
|
46
|
+
continue
|
47
|
+
if os.path.isfile(directory):
|
48
|
+
# Allow a file; assume its parent directory was intended.
|
49
|
+
if not (directory := os.path.dirname(directory)):
|
50
|
+
continue
|
51
|
+
location_pruned.append(directory)
|
52
|
+
location = location_pruned
|
53
|
+
for directory in location:
|
54
|
+
if os.path.exists(os.path.join(directory, file)):
|
55
|
+
file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
|
56
|
+
if single is True:
|
57
|
+
return file_found
|
58
|
+
if file_found not in files_found:
|
59
|
+
files_found.append(file_found)
|
60
|
+
if recursive is True:
|
36
61
|
for directory in location:
|
37
62
|
if not directory:
|
38
63
|
continue
|
39
|
-
if
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
if files:
|
55
|
-
for file_found in files:
|
56
|
-
file_found = os.path.abspath(file_found)
|
57
|
-
if single:
|
58
|
-
return file_found
|
59
|
-
if file_found not in files_found:
|
60
|
-
files_found.append(file_found)
|
61
|
-
if files_found:
|
62
|
-
return files_found[0] if single else files_found
|
63
|
-
return None if single else []
|
64
|
+
if not directory.endswith("/**") and not file.startswith("**/"):
|
65
|
+
path = f"{directory}/**/{file}"
|
66
|
+
else:
|
67
|
+
path = f"{directory}/{file}"
|
68
|
+
files = glob.glob(path, recursive=True if recursive is True else False)
|
69
|
+
if files:
|
70
|
+
for file_found in files:
|
71
|
+
file_found = os.path.abspath(file_found)
|
72
|
+
if single is True:
|
73
|
+
return file_found
|
74
|
+
if file_found not in files_found:
|
75
|
+
files_found.append(file_found)
|
76
|
+
if files_found:
|
77
|
+
return files_found[0] if single is True else files_found
|
78
|
+
return None if single is True else []
|
64
79
|
|
65
80
|
|
66
81
|
def normalize_file_path(path: str, home_directory: bool = True) -> str:
|
@@ -103,6 +118,76 @@ def are_files_equal(filea: str, fileb: str) -> bool:
|
|
103
118
|
return False
|
104
119
|
|
105
120
|
|
121
|
+
def compute_file_md5(file: str) -> str:
|
122
|
+
"""
|
123
|
+
Returns the md5 checksum for the given file.
|
124
|
+
"""
|
125
|
+
if not isinstance(file, str):
|
126
|
+
return ""
|
127
|
+
try:
|
128
|
+
md5 = hashlib.md5()
|
129
|
+
with open(file, "rb") as file:
|
130
|
+
for chunk in iter(lambda: file.read(4096), b""):
|
131
|
+
md5.update(chunk)
|
132
|
+
return md5.hexdigest()
|
133
|
+
except Exception:
|
134
|
+
return ""
|
135
|
+
|
136
|
+
|
137
|
+
def compute_file_etag(file: str) -> Optional[str]:
|
138
|
+
"""
|
139
|
+
Returns the AWS S3 "etag" for the given file; this value is md5-like but
|
140
|
+
not the same as a normal md5. We use this to compare that a file in S3
|
141
|
+
appears to be the exact the same file as a local file.
|
142
|
+
"""
|
143
|
+
try:
|
144
|
+
with io.open(file, "rb") as f:
|
145
|
+
return _compute_file_etag(f)
|
146
|
+
except Exception:
|
147
|
+
return None
|
148
|
+
|
149
|
+
|
150
|
+
def _compute_file_etag(f: io.BufferedReader) -> str:
|
151
|
+
# See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
|
152
|
+
MULTIPART_THRESHOLD = 8388608
|
153
|
+
MULTIPART_CHUNKSIZE = 8388608
|
154
|
+
# BUFFER_SIZE = 1048576
|
155
|
+
# Verify some assumptions are correct
|
156
|
+
# assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
|
157
|
+
# assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
|
158
|
+
# assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
|
159
|
+
hash = hashlib.md5()
|
160
|
+
read = 0
|
161
|
+
chunks = None
|
162
|
+
while True:
|
163
|
+
# Read some from stdin, if we're at the end, stop reading
|
164
|
+
bits = f.read(1048576)
|
165
|
+
if len(bits) == 0:
|
166
|
+
break
|
167
|
+
read += len(bits)
|
168
|
+
hash.update(bits)
|
169
|
+
if chunks is None:
|
170
|
+
# We're handling a multi-part upload, so switch to calculating
|
171
|
+
# hashes of each chunk
|
172
|
+
if read >= MULTIPART_THRESHOLD:
|
173
|
+
chunks = b''
|
174
|
+
if chunks is not None:
|
175
|
+
if (read % MULTIPART_CHUNKSIZE) == 0:
|
176
|
+
# Dont with a chunk, add it to the list of hashes to hash later
|
177
|
+
chunks += hash.digest()
|
178
|
+
hash = hashlib.md5()
|
179
|
+
if chunks is None:
|
180
|
+
# Normal upload, just output the MD5 hash
|
181
|
+
etag = hash.hexdigest()
|
182
|
+
else:
|
183
|
+
# Multipart upload, need to output the hash of the hashes
|
184
|
+
if (read % MULTIPART_CHUNKSIZE) != 0:
|
185
|
+
# Add the last part if we have a partial chunk
|
186
|
+
chunks += hash.digest()
|
187
|
+
etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
|
188
|
+
return etag
|
189
|
+
|
190
|
+
|
106
191
|
def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
107
192
|
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
108
193
|
"""
|
@@ -1,20 +1,31 @@
|
|
1
1
|
from contextlib import contextmanager
|
2
2
|
import requests
|
3
|
-
from typing import Optional
|
3
|
+
from typing import Callable, Optional
|
4
4
|
from dcicutils.tmpfile_utils import temporary_file
|
5
5
|
|
6
6
|
|
7
7
|
@contextmanager
|
8
|
-
def download(url: str, suffix: Optional[str] = None, binary: bool = True
|
8
|
+
def download(url: str, suffix: Optional[str] = None, binary: bool = True,
|
9
|
+
progress: Optional[Callable] = None) -> Optional[str]:
|
9
10
|
"""
|
10
11
|
Context manager to ownload the given URL into a temporary file and yields the file
|
11
12
|
path to it. An optional file suffix may be specified. Defaults to binary file mode;
|
12
13
|
if this is not desired then pass False as the binary argument.
|
13
14
|
"""
|
15
|
+
if not callable(progress):
|
16
|
+
progress = None
|
14
17
|
with temporary_file(suffix=suffix) as file:
|
15
18
|
response = requests.get(url, stream=True)
|
19
|
+
if progress:
|
20
|
+
nbytes = 0
|
21
|
+
nbytes_total = None
|
22
|
+
if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
|
23
|
+
nbytes_total = int(content_length)
|
16
24
|
with open(file, "wb" if binary is True else "w") as f:
|
17
25
|
for chunk in response.iter_content(chunk_size=8192):
|
18
26
|
if chunk:
|
19
27
|
f.write(chunk)
|
28
|
+
if progress:
|
29
|
+
nbytes += len(chunk)
|
30
|
+
progress(nbytes, nbytes_total)
|
20
31
|
yield file
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "dcicutils"
|
3
|
-
version = "8.8.4.
|
3
|
+
version = "8.8.4.1b18" # TODO: To become 8.8.5
|
4
4
|
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
|
5
5
|
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
|
6
6
|
license = "MIT"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/c4-infrastructure.jsonc
RENAMED
File without changes
|
File without changes
|
File without changes
|
{dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/park-lab-common.jsonc
RENAMED
File without changes
|
{dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc
RENAMED
File without changes
|
{dcicutils-8.8.4.1b15 → dcicutils-8.8.4.1b18}/dcicutils/license_policies/park-lab-pipeline.jsonc
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|