dcicutils 8.8.5__tar.gz → 8.8.6__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {dcicutils-8.8.5 → dcicutils-8.8.6}/PKG-INFO +6 -4
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ff_utils.py +4 -1
- dcicutils-8.8.6/dcicutils/file_utils.py +267 -0
- dcicutils-8.8.6/dcicutils/http_utils.py +39 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/misc_utils.py +82 -5
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/scripts/view_portal_object.py +87 -5
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/structured_data.py +35 -5
- dcicutils-8.8.6/dcicutils/tmpfile_utils.py +76 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/zip_utils.py +27 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/pyproject.toml +8 -6
- dcicutils-8.8.5/dcicutils/file_utils.py +0 -58
- dcicutils-8.8.5/dcicutils/tmpfile_utils.py +0 -36
- {dcicutils-8.8.5 → dcicutils-8.8.6}/LICENSE.txt +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/README.rst +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/__init__.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/base.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/beanstalk_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/bundle_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/captured_output.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/cloudformation_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/codebuild_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/command_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/common.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/contribution_scripts.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/contribution_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/creds_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/data_readers.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/data_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/datetime_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/deployment_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/diff_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/docker_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ecr_scripts.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ecr_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ecs_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/env_base.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/env_manager.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/env_scripts.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/env_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/env_utils_legacy.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/es_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/exceptions.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ff_mocks.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/function_cache_decorator.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/glacier_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/jh_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/kibana/dashboards.json +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/kibana/readme.md +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/lang_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/log_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/obfuscation_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/opensearch_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/portal_object_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/portal_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/progress_bar.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/project_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/qa_checkers.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/qa_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/redis_tools.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/redis_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/s3_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/schema_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/scripts/publish_to_pypi.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/scripts/run_license_checker.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/secrets_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/sheet_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/snapshot_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ssl_certificate_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/submitr/progress_constants.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/submitr/ref_lookup_strategy.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/task_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/trace_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/validation_utils.py +0 -0
- {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/variant_utils.py +0 -0
@@ -1,12 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dcicutils
|
3
|
-
Version: 8.8.
|
3
|
+
Version: 8.8.6
|
4
4
|
Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
|
5
5
|
Home-page: https://github.com/4dn-dcic/utils
|
6
6
|
License: MIT
|
7
7
|
Author: 4DN-DCIC Team
|
8
8
|
Author-email: support@4dnucleome.org
|
9
|
-
Requires-Python: >=3.8,<3.
|
9
|
+
Requires-Python: >=3.8,<3.13
|
10
10
|
Classifier: Development Status :: 4 - Beta
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
12
|
Classifier: Intended Audience :: Science/Research
|
@@ -24,9 +24,10 @@ Classifier: Programming Language :: Python :: 3.9
|
|
24
24
|
Classifier: Topic :: Database :: Database Engines/Servers
|
25
25
|
Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
|
26
26
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
27
|
+
Requires-Dist: appdirs (>=1.4.4,<2.0.0)
|
27
28
|
Requires-Dist: aws-requests-auth (>=0.4.2,<1)
|
28
|
-
Requires-Dist: boto3 (>=1.
|
29
|
-
Requires-Dist: botocore (>=1.
|
29
|
+
Requires-Dist: boto3 (>=1.34.93,<2.0.0)
|
30
|
+
Requires-Dist: botocore (>=1.34.93,<2.0.0)
|
30
31
|
Requires-Dist: chardet (>=5.2.0,<6.0.0)
|
31
32
|
Requires-Dist: docker (>=4.4.4,<5.0.0)
|
32
33
|
Requires-Dist: elasticsearch (==7.13.4)
|
@@ -42,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
|
|
42
43
|
Requires-Dist: redis (>=4.5.1,<5.0.0)
|
43
44
|
Requires-Dist: requests (>=2.21.0,<3.0.0)
|
44
45
|
Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
|
46
|
+
Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
|
45
47
|
Requires-Dist: structlog (>=19.2.0,<20.0.0)
|
46
48
|
Requires-Dist: toml (>=0.10.1,<1)
|
47
49
|
Requires-Dist: tqdm (>=4.66.2,<5.0.0)
|
@@ -895,9 +895,12 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
|
|
895
895
|
used to create the generator.
|
896
896
|
Should NOT be used directly
|
897
897
|
"""
|
898
|
+
def get_es_host_local() -> Optional[str]:
|
899
|
+
return os.environ.get("ES_HOST_LOCAL", None)
|
898
900
|
health = get_health_page(key=auth)
|
899
901
|
if es_client is None:
|
900
|
-
es_url
|
902
|
+
if not (es_url := get_es_host_local()):
|
903
|
+
es_url = health['elasticsearch']
|
901
904
|
es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
|
902
905
|
namespace_star = health.get('namespace', '') + '*'
|
903
906
|
# match all given uuids to _id fields
|
@@ -0,0 +1,267 @@
|
|
1
|
+
import glob
|
2
|
+
import hashlib
|
3
|
+
import io
|
4
|
+
import os
|
5
|
+
import pathlib
|
6
|
+
from datetime import datetime
|
7
|
+
import random
|
8
|
+
import string
|
9
|
+
from tempfile import gettempdir as get_temporary_directory
|
10
|
+
from typing import List, Optional, Union
|
11
|
+
from uuid import uuid4 as uuid
|
12
|
+
|
13
|
+
HOME_DIRECTORY = str(pathlib.Path().home())
|
14
|
+
|
15
|
+
|
16
|
+
def search_for_file(file: str,
|
17
|
+
location: Union[str, pathlib.PosixPath, Optional[List[Union[str, pathlib.PosixPath]]]] = None,
|
18
|
+
recursive: bool = False,
|
19
|
+
single: bool = False,
|
20
|
+
order: bool = True) -> Union[List[str], Optional[str]]:
|
21
|
+
"""
|
22
|
+
Searches for the existence of the given file name, first directly in the given directory or list
|
23
|
+
of directories, if specified, and if not then just in the current (working) directory; if the
|
24
|
+
given recursive flag is True then also searches all sub-directories of these directories;
|
25
|
+
returns the full path name to the file if found. If the single flag is True then just the
|
26
|
+
first file which is found is returns (as a string), or None if none; if the single flag
|
27
|
+
is False, then all matched files are returned in a list, or and empty list if none.
|
28
|
+
"""
|
29
|
+
def order_by_fewest_number_of_paths_and_then_alphabetically(paths: List[str]) -> List[str]:
|
30
|
+
def order_by(path: str):
|
31
|
+
return len(path.split(os.path.sep)), path
|
32
|
+
return sorted(paths, key=order_by)
|
33
|
+
|
34
|
+
if not (file and isinstance(file, (str, pathlib.PosixPath))):
|
35
|
+
return None if single is True else []
|
36
|
+
if os.path.isabs(file):
|
37
|
+
if os.path.exists(file):
|
38
|
+
return str(file) if single is True else [str(file)]
|
39
|
+
return None if single is True else []
|
40
|
+
files_found = []
|
41
|
+
if not location:
|
42
|
+
location = ["."]
|
43
|
+
elif isinstance(location, (str, pathlib.PosixPath)):
|
44
|
+
location = [location]
|
45
|
+
elif not isinstance(location, list):
|
46
|
+
location = []
|
47
|
+
location_pruned = []
|
48
|
+
for directory in location:
|
49
|
+
if not isinstance(directory, str):
|
50
|
+
if not isinstance(directory, pathlib.PosixPath):
|
51
|
+
continue
|
52
|
+
directory = str(directory)
|
53
|
+
if not (directory := directory.strip()):
|
54
|
+
continue
|
55
|
+
if os.path.isfile(directory := os.path.abspath(os.path.normpath(directory))):
|
56
|
+
# Actually, allow a file rather then a directory; assume its parent directory was intended.
|
57
|
+
if not (directory := os.path.dirname(directory)):
|
58
|
+
continue
|
59
|
+
if directory not in location_pruned:
|
60
|
+
location_pruned.append(directory)
|
61
|
+
location = location_pruned
|
62
|
+
for directory in location:
|
63
|
+
if os.path.exists(os.path.join(directory, file)):
|
64
|
+
file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
|
65
|
+
if single is True:
|
66
|
+
return file_found
|
67
|
+
if file_found not in files_found:
|
68
|
+
files_found.append(file_found)
|
69
|
+
if recursive is True:
|
70
|
+
for directory in location:
|
71
|
+
if not directory.endswith("/**") and not file.startswith("**/"):
|
72
|
+
path = f"{directory}/**/{file}"
|
73
|
+
else:
|
74
|
+
path = f"{directory}/{file}"
|
75
|
+
files = glob.glob(path, recursive=True if recursive is True else False)
|
76
|
+
if files:
|
77
|
+
for file_found in files:
|
78
|
+
file_found = os.path.abspath(file_found)
|
79
|
+
if single is True:
|
80
|
+
return file_found
|
81
|
+
if file_found not in files_found:
|
82
|
+
files_found.append(file_found)
|
83
|
+
if single is True:
|
84
|
+
return files_found[0] if files_found else None
|
85
|
+
elif order is True:
|
86
|
+
return order_by_fewest_number_of_paths_and_then_alphabetically(files_found)
|
87
|
+
else:
|
88
|
+
return files_found
|
89
|
+
|
90
|
+
|
91
|
+
def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expand_home: Optional[bool] = None) -> str:
|
92
|
+
"""
|
93
|
+
Normalizes the given path value and returns the result; does things like remove redundant
|
94
|
+
consecutive directory separators and redundant parent paths. If the given absolute argument
|
95
|
+
is True than converts the path to an absolute path. If the given expand_home argument is False
|
96
|
+
and if the path can reasonably be represented with a home directory indicator (i.e. "~"), then
|
97
|
+
converts it to such. If the expand_home argument is True and path starts with the home directory
|
98
|
+
indicator (i.e. "~") then expands it to the actual (absolute) home path of the caller. If the
|
99
|
+
given path value is not actually even a string (or pathlib.Path) then returns an empty string.
|
100
|
+
"""
|
101
|
+
if isinstance(value, pathlib.Path):
|
102
|
+
value = str(value)
|
103
|
+
elif not isinstance(value, str):
|
104
|
+
return ""
|
105
|
+
if not (value := value.strip()) or not (value := os.path.normpath(value)):
|
106
|
+
return ""
|
107
|
+
if expand_home is True:
|
108
|
+
value = os.path.expanduser(value)
|
109
|
+
elif (expand_home is False) and (os.name == "posix"):
|
110
|
+
if value.startswith(home := HOME_DIRECTORY + os.sep):
|
111
|
+
value = "~/" + value[len(home):]
|
112
|
+
elif value == HOME_DIRECTORY:
|
113
|
+
value = "~"
|
114
|
+
if absolute is True:
|
115
|
+
value = os.path.abspath(value)
|
116
|
+
return value
|
117
|
+
|
118
|
+
|
119
|
+
def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
|
120
|
+
try:
|
121
|
+
return os.path.getsize(file) if isinstance(file, str) else None
|
122
|
+
except Exception:
|
123
|
+
if raise_exception is True:
|
124
|
+
raise
|
125
|
+
return None
|
126
|
+
|
127
|
+
|
128
|
+
def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
|
129
|
+
try:
|
130
|
+
return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
|
131
|
+
except Exception:
|
132
|
+
if raise_exception is True:
|
133
|
+
raise
|
134
|
+
return None
|
135
|
+
|
136
|
+
|
137
|
+
def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
|
138
|
+
"""
|
139
|
+
Returns True iff the contents of the two given files are exactly the same.
|
140
|
+
"""
|
141
|
+
try:
|
142
|
+
with open(filea, "rb") as fa:
|
143
|
+
with open(fileb, "rb") as fb:
|
144
|
+
chunk_size = 4096
|
145
|
+
while True:
|
146
|
+
chunka = fa.read(chunk_size)
|
147
|
+
chunkb = fb.read(chunk_size)
|
148
|
+
if chunka != chunkb:
|
149
|
+
return False
|
150
|
+
if not chunka:
|
151
|
+
break
|
152
|
+
return True
|
153
|
+
except Exception:
|
154
|
+
if raise_exception is True:
|
155
|
+
raise
|
156
|
+
return False
|
157
|
+
|
158
|
+
|
159
|
+
def compute_file_md5(file: str, raise_exception: bool = True) -> str:
|
160
|
+
"""
|
161
|
+
Returns the md5 checksum for the given file.
|
162
|
+
"""
|
163
|
+
if not isinstance(file, str):
|
164
|
+
return ""
|
165
|
+
try:
|
166
|
+
md5 = hashlib.md5()
|
167
|
+
with open(file, "rb") as file:
|
168
|
+
for chunk in iter(lambda: file.read(4096), b""):
|
169
|
+
md5.update(chunk)
|
170
|
+
return md5.hexdigest()
|
171
|
+
except Exception:
|
172
|
+
if raise_exception is True:
|
173
|
+
raise
|
174
|
+
return ""
|
175
|
+
|
176
|
+
|
177
|
+
def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
|
178
|
+
"""
|
179
|
+
Returns the AWS S3 "etag" for the given file; this value is md5-like but
|
180
|
+
not the same as a normal md5. We use this to compare that a file in S3
|
181
|
+
appears to be the exact the same file as a local file.
|
182
|
+
"""
|
183
|
+
try:
|
184
|
+
with io.open(file, "rb") as f:
|
185
|
+
return _compute_file_etag(f)
|
186
|
+
except Exception:
|
187
|
+
if raise_exception is True:
|
188
|
+
raise
|
189
|
+
return None
|
190
|
+
|
191
|
+
|
192
|
+
def _compute_file_etag(f: io.BufferedReader) -> str:
|
193
|
+
# See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
|
194
|
+
MULTIPART_THRESHOLD = 8388608
|
195
|
+
MULTIPART_CHUNKSIZE = 8388608
|
196
|
+
# BUFFER_SIZE = 1048576
|
197
|
+
# Verify some assumptions are correct
|
198
|
+
# assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
|
199
|
+
# assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
|
200
|
+
# assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
|
201
|
+
hash = hashlib.md5()
|
202
|
+
read = 0
|
203
|
+
chunks = None
|
204
|
+
while True:
|
205
|
+
# Read some from stdin, if we're at the end, stop reading
|
206
|
+
bits = f.read(1048576)
|
207
|
+
if len(bits) == 0:
|
208
|
+
break
|
209
|
+
read += len(bits)
|
210
|
+
hash.update(bits)
|
211
|
+
if chunks is None:
|
212
|
+
# We're handling a multi-part upload, so switch to calculating
|
213
|
+
# hashes of each chunk
|
214
|
+
if read >= MULTIPART_THRESHOLD:
|
215
|
+
chunks = b''
|
216
|
+
if chunks is not None:
|
217
|
+
if (read % MULTIPART_CHUNKSIZE) == 0:
|
218
|
+
# Dont with a chunk, add it to the list of hashes to hash later
|
219
|
+
chunks += hash.digest()
|
220
|
+
hash = hashlib.md5()
|
221
|
+
if chunks is None:
|
222
|
+
# Normal upload, just output the MD5 hash
|
223
|
+
etag = hash.hexdigest()
|
224
|
+
else:
|
225
|
+
# Multipart upload, need to output the hash of the hashes
|
226
|
+
if (read % MULTIPART_CHUNKSIZE) != 0:
|
227
|
+
# Add the last part if we have a partial chunk
|
228
|
+
chunks += hash.digest()
|
229
|
+
etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
|
230
|
+
return etag
|
231
|
+
|
232
|
+
|
233
|
+
def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
234
|
+
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
235
|
+
"""
|
236
|
+
Write to the given file (name/path) some random content. If the given file is None then writes
|
237
|
+
to a temporary file. In either case, returns the file written to. The of bytes written is 1024
|
238
|
+
by default be can be specified with the nbytes argument; default to writing ASCII text but if
|
239
|
+
the binary argument is True then writes binary data as well; if not binary the content is in
|
240
|
+
lines of 80 characters each; use the line_length argumetn in this case to change the line length.
|
241
|
+
"""
|
242
|
+
if not isinstance(nbytes, int) or nbytes < 0:
|
243
|
+
nbytes = 0
|
244
|
+
if not isinstance(file, str) or not file:
|
245
|
+
if not isinstance(prefix, str):
|
246
|
+
prefix = ""
|
247
|
+
if not isinstance(suffix, str):
|
248
|
+
suffix = ""
|
249
|
+
file = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
|
250
|
+
file = os.path.join(get_temporary_directory(), file)
|
251
|
+
with open(file, "wb" if binary is True else "w") as f:
|
252
|
+
if binary is True:
|
253
|
+
f.write(os.urandom(nbytes))
|
254
|
+
else:
|
255
|
+
if (not isinstance(line_length, int)) or (line_length < 1):
|
256
|
+
line_length = 80
|
257
|
+
line_length += 1
|
258
|
+
nlines = nbytes // line_length
|
259
|
+
nremainder = nbytes % line_length
|
260
|
+
for n in range(nlines):
|
261
|
+
f.write("".join(random.choices(string.ascii_letters + string.digits, k=line_length - 1)))
|
262
|
+
f.write("\n")
|
263
|
+
if nremainder > 1:
|
264
|
+
f.write("".join(random.choices(string.ascii_letters + string.digits, k=nremainder - 1)))
|
265
|
+
if nremainder > 0:
|
266
|
+
f.write("\n")
|
267
|
+
return file
|
@@ -0,0 +1,39 @@
|
|
1
|
+
from contextlib import contextmanager
|
2
|
+
import requests
|
3
|
+
from typing import Callable, Optional
|
4
|
+
from dcicutils.tmpfile_utils import temporary_file
|
5
|
+
|
6
|
+
|
7
|
+
@contextmanager
|
8
|
+
def download(url: str, suffix: Optional[str] = None, binary: bool = True,
|
9
|
+
progress: Optional[Callable] = None) -> Optional[str]:
|
10
|
+
"""
|
11
|
+
Context manager to download the given URL into a temporary file and yields the file
|
12
|
+
path to it. An optional file suffix may be specified for this temporary file name.
|
13
|
+
Defaults to binary file mode; if not desired then pass False as the binary argument.
|
14
|
+
"""
|
15
|
+
with temporary_file(suffix=suffix) as file:
|
16
|
+
download_to(url, file, binary=binary, progress=progress)
|
17
|
+
yield file
|
18
|
+
|
19
|
+
|
20
|
+
def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
|
21
|
+
"""
|
22
|
+
Download the given URL into the given file. Defaults to binary
|
23
|
+
file mode; if not desired then pass False as the binary argument.
|
24
|
+
"""
|
25
|
+
if not callable(progress):
|
26
|
+
progress = None
|
27
|
+
response = requests.get(url, stream=True)
|
28
|
+
if progress:
|
29
|
+
nbytes = 0
|
30
|
+
nbytes_total = None
|
31
|
+
if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
|
32
|
+
nbytes_total = int(content_length)
|
33
|
+
with open(file, "wb" if binary is True else "w") as f:
|
34
|
+
for chunk in response.iter_content(chunk_size=8192):
|
35
|
+
if chunk:
|
36
|
+
f.write(chunk)
|
37
|
+
if progress:
|
38
|
+
nbytes += len(chunk)
|
39
|
+
progress(nbytes, nbytes_total)
|
@@ -3,6 +3,7 @@ This file contains functions that might be generally useful.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from collections import namedtuple
|
6
|
+
import appdirs
|
6
7
|
import contextlib
|
7
8
|
import datetime
|
8
9
|
import functools
|
@@ -13,10 +14,12 @@ import json
|
|
13
14
|
import logging
|
14
15
|
import math
|
15
16
|
import os
|
17
|
+
import platform
|
16
18
|
import pytz
|
17
19
|
import re
|
18
20
|
import rfc3986.validators
|
19
21
|
import rfc3986.exceptions
|
22
|
+
import shortuuid
|
20
23
|
import time
|
21
24
|
import uuid
|
22
25
|
import warnings
|
@@ -1152,7 +1155,8 @@ def remove_suffix(suffix: str, text: str, required: bool = False):
|
|
1152
1155
|
|
1153
1156
|
def remove_empty_properties(data: Optional[Union[list, dict]],
|
1154
1157
|
isempty: Optional[Callable] = None,
|
1155
|
-
isempty_array_element: Optional[Callable] = None
|
1158
|
+
isempty_array_element: Optional[Callable] = None,
|
1159
|
+
raise_exception_on_nonempty_array_element_after_empty: bool = False) -> None:
|
1156
1160
|
def _isempty(value: Any) -> bool: # noqa
|
1157
1161
|
return isempty(value) if callable(isempty) else value in [None, "", {}, []]
|
1158
1162
|
if isinstance(data, dict):
|
@@ -1160,11 +1164,22 @@ def remove_empty_properties(data: Optional[Union[list, dict]],
|
|
1160
1164
|
if _isempty(value := data[key]):
|
1161
1165
|
del data[key]
|
1162
1166
|
else:
|
1163
|
-
remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element
|
1167
|
+
remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element,
|
1168
|
+
raise_exception_on_nonempty_array_element_after_empty= # noqa
|
1169
|
+
raise_exception_on_nonempty_array_element_after_empty)
|
1164
1170
|
elif isinstance(data, list):
|
1165
1171
|
for item in data:
|
1166
|
-
remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element
|
1172
|
+
remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element,
|
1173
|
+
raise_exception_on_nonempty_array_element_after_empty= # noqa
|
1174
|
+
raise_exception_on_nonempty_array_element_after_empty)
|
1167
1175
|
if callable(isempty_array_element):
|
1176
|
+
if raise_exception_on_nonempty_array_element_after_empty is True:
|
1177
|
+
empty_element_seen = False
|
1178
|
+
for item in data:
|
1179
|
+
if not empty_element_seen and isempty_array_element(item):
|
1180
|
+
empty_element_seen = True
|
1181
|
+
elif empty_element_seen and not isempty_array_element(item):
|
1182
|
+
raise Exception("Non-empty element found after empty element.")
|
1168
1183
|
data[:] = [item for item in data if not isempty_array_element(item)]
|
1169
1184
|
|
1170
1185
|
|
@@ -1522,7 +1537,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
|
|
1522
1537
|
def create_dict(**kwargs) -> dict:
|
1523
1538
|
result = {}
|
1524
1539
|
for name in kwargs:
|
1525
|
-
if kwargs[name]:
|
1540
|
+
if not (kwargs[name] is None):
|
1526
1541
|
result[name] = kwargs[name]
|
1527
1542
|
return result
|
1528
1543
|
|
@@ -2548,6 +2563,19 @@ def normalize_spaces(value: str) -> str:
|
|
2548
2563
|
return re.sub(r"\s+", " ", value).strip()
|
2549
2564
|
|
2550
2565
|
|
2566
|
+
def normalize_string(value: Optional[str]) -> Optional[str]:
|
2567
|
+
"""
|
2568
|
+
Strips leading/trailing spaces, and converts multiple consecutive spaces to a single space
|
2569
|
+
in the given string value and returns the result. If the given value is None returns an
|
2570
|
+
empty string. If the given value is not actually even a string then return None.
|
2571
|
+
"""
|
2572
|
+
if value is None:
|
2573
|
+
return ""
|
2574
|
+
elif isinstance(value, str):
|
2575
|
+
return re.sub(r"\s+", " ", value).strip()
|
2576
|
+
return None
|
2577
|
+
|
2578
|
+
|
2551
2579
|
def find_nth_from_end(string: str, substring: str, nth: int) -> int:
|
2552
2580
|
"""
|
2553
2581
|
Returns the index of the nth occurrence of the given substring within
|
@@ -2590,7 +2618,11 @@ def format_size(nbytes: Union[int, float], precision: int = 2, nospace: bool = F
|
|
2590
2618
|
nbytes = int(nbytes)
|
2591
2619
|
return f"{nbytes} byte{'s' if nbytes != 1 else ''}"
|
2592
2620
|
unit = (UNITS_TERSE if terse else UNITS)[index]
|
2593
|
-
|
2621
|
+
size = f"{nbytes:.{precision}f}"
|
2622
|
+
if size.endswith(f".{'0' * precision}"):
|
2623
|
+
# Tidy up extraneous zeros.
|
2624
|
+
size = size[:-(precision - 1)]
|
2625
|
+
return f"{size}{'' if nospace else ' '}{unit}"
|
2594
2626
|
|
2595
2627
|
|
2596
2628
|
def format_duration(seconds: Union[int, float]) -> str:
|
@@ -2670,3 +2702,48 @@ class JsonLinesReader:
|
|
2670
2702
|
yield line
|
2671
2703
|
else:
|
2672
2704
|
raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
|
2705
|
+
|
2706
|
+
|
2707
|
+
def get_app_specific_directory() -> str:
|
2708
|
+
"""
|
2709
|
+
Returns the standard system application specific directory:
|
2710
|
+
- On MacOS this directory: is: ~/Library/Application Support
|
2711
|
+
- On Linux this directory is: ~/.local/share
|
2712
|
+
- On Windows this directory is: %USERPROFILE%\\AppData\\Local # noqa
|
2713
|
+
N.B. This is has been tested on MacOS and Linux but not on Windows.
|
2714
|
+
"""
|
2715
|
+
return appdirs.user_data_dir()
|
2716
|
+
|
2717
|
+
|
2718
|
+
def get_os_name() -> str:
|
2719
|
+
if os_name := platform.system():
|
2720
|
+
if os_name == "Darwin": return "osx" # noqa
|
2721
|
+
elif os_name == "Linux": return "linux" # noqa
|
2722
|
+
elif os_name == "Windows": return "windows" # noqa
|
2723
|
+
return ""
|
2724
|
+
|
2725
|
+
|
2726
|
+
def get_cpu_architecture_name() -> str:
|
2727
|
+
if os_architecture_name := platform.machine():
|
2728
|
+
if os_architecture_name == "x86_64": return "amd64" # noqa
|
2729
|
+
return os_architecture_name
|
2730
|
+
return ""
|
2731
|
+
|
2732
|
+
|
2733
|
+
def create_uuid(nodash: bool = False, upper: bool = False) -> str:
|
2734
|
+
value = str(uuid.uuid4())
|
2735
|
+
if nodash is True:
|
2736
|
+
value = value.replace("-", "")
|
2737
|
+
if upper is True:
|
2738
|
+
value = value.upper()
|
2739
|
+
return value
|
2740
|
+
|
2741
|
+
|
2742
|
+
def create_short_uuid(length: Optional[int] = None, upper: bool = False):
|
2743
|
+
# Not really techincally a uuid of course.
|
2744
|
+
if (length is None) or (not isinstance(length, int)) or (length < 1):
|
2745
|
+
length = 16
|
2746
|
+
value = shortuuid.ShortUUID().random(length=length)
|
2747
|
+
if upper is True:
|
2748
|
+
value = value.upper()
|
2749
|
+
return value
|
@@ -57,6 +57,7 @@
|
|
57
57
|
|
58
58
|
import argparse
|
59
59
|
from functools import lru_cache
|
60
|
+
import io
|
60
61
|
import json
|
61
62
|
import pyperclip
|
62
63
|
import os
|
@@ -97,11 +98,18 @@ def main():
|
|
97
98
|
help="Include all properties for schema usage.")
|
98
99
|
parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.")
|
99
100
|
parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.")
|
101
|
+
parser.add_argument("--post", type=str, required=False, default=None,
|
102
|
+
help="POST data of the main arg type with data from file specified with this option.")
|
103
|
+
parser.add_argument("--patch", type=str, required=False, default=None,
|
104
|
+
help="PATCH data of the main arg type with data from file specified with this option.")
|
100
105
|
parser.add_argument("--database", action="store_true", required=False, default=False,
|
101
106
|
help="Read from database output.")
|
107
|
+
parser.add_argument("--bool", action="store_true", required=False,
|
108
|
+
default=False, help="Only return whether found or not.")
|
102
109
|
parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.")
|
103
110
|
parser.add_argument("--copy", "-c", action="store_true", required=False, default=False,
|
104
111
|
help="Copy object data to clipboard.")
|
112
|
+
parser.add_argument("--indent", required=False, default=False, help="Indent output.", type=int)
|
105
113
|
parser.add_argument("--details", action="store_true", required=False, default=False, help="Detailed output.")
|
106
114
|
parser.add_argument("--more-details", action="store_true", required=False, default=False,
|
107
115
|
help="More detailed output.")
|
@@ -151,6 +159,18 @@ def main():
|
|
151
159
|
args.schema = True
|
152
160
|
|
153
161
|
if args.schema:
|
162
|
+
if args.post:
|
163
|
+
if post_data := _read_json_from_file(args.post):
|
164
|
+
if args.verbose:
|
165
|
+
_print(f"POSTing data from file ({args.post}) as type: {args.uuid}")
|
166
|
+
if isinstance(post_data, dict):
|
167
|
+
post_data = [post_data]
|
168
|
+
elif not isinstance(post_data, list):
|
169
|
+
_print(f"POST data neither list nor dictionary: {args.post}")
|
170
|
+
for item in post_data:
|
171
|
+
portal.post_metadata(args.uuid, item)
|
172
|
+
if args.verbose:
|
173
|
+
_print(f"Done POSTing data from file ({args.post}) as type: {args.uuid}")
|
154
174
|
schema, schema_name = _get_schema(portal, args.uuid)
|
155
175
|
if schema:
|
156
176
|
if args.copy:
|
@@ -166,14 +186,50 @@ def main():
|
|
166
186
|
_print_schema(schema, details=args.details, more_details=args.details,
|
167
187
|
all=args.all, raw=args.raw, raw_yaml=args.yaml)
|
168
188
|
return
|
169
|
-
|
170
|
-
|
189
|
+
elif args.patch:
|
190
|
+
if patch_data := _read_json_from_file(args.patch):
|
191
|
+
if args.verbose:
|
192
|
+
_print(f"PATCHing data from file ({args.patch}) for object: {args.uuid}")
|
193
|
+
if isinstance(patch_data, dict):
|
194
|
+
patch_data = [patch_data]
|
195
|
+
elif not isinstance(patch_data, list):
|
196
|
+
_print(f"PATCH data neither list nor dictionary: {args.patch}")
|
197
|
+
for item in patch_data:
|
198
|
+
portal.patch_metadata(args.uuid, item)
|
199
|
+
if args.verbose:
|
200
|
+
_print(f"Done PATCHing data from file ({args.patch}) as type: {args.uuid}")
|
201
|
+
return
|
202
|
+
else:
|
203
|
+
_print(f"No PATCH data found in file: {args.patch}")
|
204
|
+
exit(1)
|
205
|
+
|
206
|
+
data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw,
|
207
|
+
database=args.database, check=args.bool, verbose=args.verbose)
|
208
|
+
if args.bool:
|
209
|
+
if data:
|
210
|
+
_print(f"{args.uuid}: found")
|
211
|
+
exit(0)
|
212
|
+
else:
|
213
|
+
_print(f"{args.uuid}: not found")
|
214
|
+
exit(1)
|
171
215
|
if args.copy:
|
172
216
|
pyperclip.copy(json.dumps(data, indent=4))
|
173
217
|
if args.yaml:
|
174
218
|
_print(yaml.dump(data))
|
175
219
|
else:
|
176
|
-
|
220
|
+
if args.indent > 0:
|
221
|
+
_print(_format_json_with_indent(data, indent=args.indent))
|
222
|
+
else:
|
223
|
+
_print(json.dumps(data, default=str, indent=4))
|
224
|
+
|
225
|
+
|
226
|
+
def _format_json_with_indent(value: dict, indent: int = 0) -> Optional[str]:
|
227
|
+
if isinstance(value, dict):
|
228
|
+
result = json.dumps(value, indent=4)
|
229
|
+
if indent > 0:
|
230
|
+
result = f"{indent * ' '}{result}"
|
231
|
+
result = result.replace("\n", f"\n{indent * ' '}")
|
232
|
+
return result
|
177
233
|
|
178
234
|
|
179
235
|
def _create_portal(ini: str, env: Optional[str] = None,
|
@@ -198,7 +254,8 @@ def _create_portal(ini: str, env: Optional[str] = None,
|
|
198
254
|
|
199
255
|
|
200
256
|
def _get_portal_object(portal: Portal, uuid: str,
|
201
|
-
raw: bool = False, database: bool = False,
|
257
|
+
raw: bool = False, database: bool = False,
|
258
|
+
check: bool = False, verbose: bool = False) -> dict:
|
202
259
|
response = None
|
203
260
|
try:
|
204
261
|
if not uuid.startswith("/"):
|
@@ -212,13 +269,18 @@ def _get_portal_object(portal: Portal, uuid: str,
|
|
212
269
|
_exit()
|
213
270
|
_exit(f"Exception getting Portal object from {portal.server}: {uuid}\n{get_error_message(e)}")
|
214
271
|
if not response:
|
272
|
+
if check:
|
273
|
+
return None
|
215
274
|
_exit(f"Null response getting Portal object from {portal.server}: {uuid}")
|
216
275
|
if response.status_code not in [200, 307]:
|
217
276
|
# TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above.
|
218
277
|
_exit(f"Invalid status code ({response.status_code}) getting Portal object from {portal.server}: {uuid}")
|
219
278
|
if not response.json:
|
220
279
|
_exit(f"Invalid JSON getting Portal object: {uuid}")
|
221
|
-
|
280
|
+
response = response.json()
|
281
|
+
if raw:
|
282
|
+
response.pop("schema_version", None)
|
283
|
+
return response
|
222
284
|
|
223
285
|
|
224
286
|
@lru_cache(maxsize=1)
|
@@ -257,6 +319,7 @@ def _print_schema_info(schema: dict, level: int = 0,
|
|
257
319
|
required: Optional[List[str]] = None) -> None:
|
258
320
|
if not schema or not isinstance(schema, dict):
|
259
321
|
return
|
322
|
+
identifying_properties = schema.get("identifyingProperties")
|
260
323
|
if level == 0:
|
261
324
|
if required_properties := schema.get("required"):
|
262
325
|
_print("- required properties:")
|
@@ -383,6 +446,8 @@ def _print_schema_info(schema: dict, level: int = 0,
|
|
383
446
|
suffix += f" | enum"
|
384
447
|
if property_required:
|
385
448
|
suffix += f" | required"
|
449
|
+
if property_name in (identifying_properties or []):
|
450
|
+
suffix += f" | identifying"
|
386
451
|
if property.get("uniqueKey"):
|
387
452
|
suffix += f" | unique"
|
388
453
|
if pattern := property.get("pattern"):
|
@@ -529,6 +594,23 @@ def _print_tree(root_name: Optional[str],
|
|
529
594
|
print(line)
|
530
595
|
|
531
596
|
|
597
|
+
def _read_json_from_file(file: str) -> Optional[dict]:
|
598
|
+
if not os.path.exists(file):
|
599
|
+
_print(f"Cannot find file: {file}")
|
600
|
+
exit(1)
|
601
|
+
try:
|
602
|
+
with io.open(file, "r") as f:
|
603
|
+
try:
|
604
|
+
return json.load(f)
|
605
|
+
except Exception:
|
606
|
+
_print(f"Cannot parse JSON in file: {file}")
|
607
|
+
exit(1)
|
608
|
+
except Exception as e:
|
609
|
+
print(e)
|
610
|
+
_print(f"Cannot open file: {file}")
|
611
|
+
exit(1)
|
612
|
+
|
613
|
+
|
532
614
|
def _print(*args, **kwargs):
|
533
615
|
with uncaptured_output():
|
534
616
|
PRINT(*args, **kwargs)
|
@@ -53,6 +53,7 @@ class StructuredDataSet:
|
|
53
53
|
def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
|
54
54
|
schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
|
55
55
|
order: Optional[List[str]] = None, prune: bool = True,
|
56
|
+
remove_empty_objects_from_lists: bool = True,
|
56
57
|
ref_lookup_strategy: Optional[Callable] = None,
|
57
58
|
ref_lookup_nocache: bool = False,
|
58
59
|
norefs: bool = False,
|
@@ -65,7 +66,8 @@ class StructuredDataSet:
|
|
65
66
|
ref_lookup_nocache=ref_lookup_nocache) if portal else None
|
66
67
|
self._ref_lookup_strategy = ref_lookup_strategy
|
67
68
|
self._order = order
|
68
|
-
self._prune = prune
|
69
|
+
self._prune = prune is True
|
70
|
+
self._remove_empty_objects_from_lists = remove_empty_objects_from_lists is True
|
69
71
|
self._warnings = {}
|
70
72
|
self._errors = {}
|
71
73
|
self._resolved_refs = set()
|
@@ -93,12 +95,14 @@ class StructuredDataSet:
|
|
93
95
|
def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
|
94
96
|
schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
|
95
97
|
order: Optional[List[str]] = None, prune: bool = True,
|
98
|
+
remove_empty_objects_from_lists: bool = True,
|
96
99
|
ref_lookup_strategy: Optional[Callable] = None,
|
97
100
|
ref_lookup_nocache: bool = False,
|
98
101
|
norefs: bool = False,
|
99
102
|
progress: Optional[Callable] = None,
|
100
103
|
debug_sleep: Optional[str] = None) -> StructuredDataSet:
|
101
104
|
return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
|
105
|
+
remove_empty_objects_from_lists=remove_empty_objects_from_lists,
|
102
106
|
ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
|
103
107
|
norefs=norefs, progress=progress, debug_sleep=debug_sleep)
|
104
108
|
|
@@ -346,7 +350,18 @@ class StructuredDataSet:
|
|
346
350
|
|
347
351
|
def _load_json_file(self, file: str) -> None:
|
348
352
|
with open(file) as f:
|
349
|
-
|
353
|
+
file_json = json.load(f)
|
354
|
+
schema_inferred_from_file_name = Schema.type_name(file)
|
355
|
+
if self._portal.get_schema(schema_inferred_from_file_name) is not None:
|
356
|
+
# If the JSON file name looks like a schema name then assume it
|
357
|
+
# contains an object or an array of object of that schema type.
|
358
|
+
self._add(Schema.type_name(file), file_json)
|
359
|
+
elif isinstance(file_json, dict):
|
360
|
+
# Otherwise if the JSON file name does not look like a schema name then
|
361
|
+
# assume it a dictionary where each property is the name of a schema, and
|
362
|
+
# which (each property) contains a list of object of that schema type.
|
363
|
+
for schema_name in file_json:
|
364
|
+
self._add(schema_name, file_json[schema_name])
|
350
365
|
|
351
366
|
def _load_reader(self, reader: RowReader, type_name: str) -> None:
|
352
367
|
schema = None
|
@@ -368,7 +383,11 @@ class StructuredDataSet:
|
|
368
383
|
structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
|
369
384
|
if self._autoadd_properties:
|
370
385
|
self._add_properties(structured_row, self._autoadd_properties, schema)
|
371
|
-
self.
|
386
|
+
if (prune_error := self._prune_structured_row(structured_row)) is not None:
|
387
|
+
self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
|
388
|
+
"error": prune_error}, "validation")
|
389
|
+
else:
|
390
|
+
self._add(type_name, structured_row)
|
372
391
|
if self._progress:
|
373
392
|
self._progress({
|
374
393
|
PROGRESS.LOAD_ITEM: self._nrows,
|
@@ -385,9 +404,20 @@ class StructuredDataSet:
|
|
385
404
|
self._note_error(schema._unresolved_refs, "ref")
|
386
405
|
self._resolved_refs.update(schema._resolved_refs)
|
387
406
|
|
388
|
-
def
|
389
|
-
if self._prune:
|
407
|
+
def _prune_structured_row(self, data: dict) -> Optional[str]:
|
408
|
+
if not self._prune:
|
409
|
+
return None
|
410
|
+
if not self._remove_empty_objects_from_lists:
|
390
411
|
remove_empty_properties(data)
|
412
|
+
return None
|
413
|
+
try:
|
414
|
+
remove_empty_properties(data, isempty_array_element=lambda element: element == {},
|
415
|
+
raise_exception_on_nonempty_array_element_after_empty=True)
|
416
|
+
except Exception as e:
|
417
|
+
return str(e)
|
418
|
+
return None
|
419
|
+
|
420
|
+
def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
|
391
421
|
if type_name in self._data:
|
392
422
|
self._data[type_name].extend([data] if isinstance(data, dict) else data)
|
393
423
|
else:
|
@@ -0,0 +1,76 @@
|
|
1
|
+
from contextlib import contextmanager
|
2
|
+
from datetime import datetime
|
3
|
+
import os
|
4
|
+
import shutil
|
5
|
+
import tempfile
|
6
|
+
from uuid import uuid4 as uuid
|
7
|
+
from typing import List, Optional, Union
|
8
|
+
from dcicutils.file_utils import create_random_file
|
9
|
+
|
10
|
+
|
11
|
+
@contextmanager
|
12
|
+
def temporary_directory() -> str:
|
13
|
+
try:
|
14
|
+
with tempfile.TemporaryDirectory() as tmp_directory_name:
|
15
|
+
yield tmp_directory_name
|
16
|
+
finally:
|
17
|
+
remove_temporary_directory(tmp_directory_name)
|
18
|
+
|
19
|
+
|
20
|
+
@contextmanager
|
21
|
+
def temporary_file(name: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
|
22
|
+
content: Optional[Union[str, bytes, List[str]]] = None) -> str:
|
23
|
+
with temporary_directory() as tmp_directory_name:
|
24
|
+
tmp_file_name = f"{prefix or ''}{name or tempfile.mktemp(dir='')}{suffix or ''}"
|
25
|
+
tmp_file_path = os.path.join(tmp_directory_name, tmp_file_name)
|
26
|
+
with open(tmp_file_path, "wb" if isinstance(content, bytes) else "w") as tmp_file:
|
27
|
+
if content is not None:
|
28
|
+
tmp_file.write("\n".join(content) if isinstance(content, list) else content)
|
29
|
+
yield tmp_file_path
|
30
|
+
|
31
|
+
|
32
|
+
def create_temporary_file_name(prefix: Optional[str] = None, suffix: Optional[str] = None) -> str:
|
33
|
+
"""
|
34
|
+
Generates and returns the full path to file within the system temporary directory.
|
35
|
+
"""
|
36
|
+
random_string = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
|
37
|
+
tmp_file_name = f"{prefix or ''}{random_string}{suffix or ''}"
|
38
|
+
return os.path.join(tempfile.gettempdir(), tmp_file_name)
|
39
|
+
|
40
|
+
|
41
|
+
@contextmanager
|
42
|
+
def temporary_random_file(prefix: Optional[str] = None, suffix: Optional[str] = None,
|
43
|
+
nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
|
44
|
+
with temporary_file(prefix=prefix, suffix=suffix) as tmp_file_path:
|
45
|
+
create_random_file(tmp_file_path, nbytes=nbytes, binary=binary, line_length=line_length)
|
46
|
+
yield tmp_file_path
|
47
|
+
|
48
|
+
|
49
|
+
def remove_temporary_directory(tmp_directory_name: str) -> None:
|
50
|
+
"""
|
51
|
+
Removes the given directory, recursively; but ONLY if it is (somewhere) within the system temporary directory.
|
52
|
+
"""
|
53
|
+
if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
|
54
|
+
shutil.rmtree(tmp_directory_name)
|
55
|
+
|
56
|
+
|
57
|
+
def remove_temporary_file(tmp_file_name: str) -> bool:
|
58
|
+
"""
|
59
|
+
Removes the given file; but ONLY if it is (somewhere) within the system temporary directory.
|
60
|
+
"""
|
61
|
+
try:
|
62
|
+
tmpdir = tempfile.gettempdir()
|
63
|
+
if (os.path.commonpath([tmpdir, tmp_file_name]) == tmpdir) and os.path.isfile(tmp_file_name):
|
64
|
+
os.remove(tmp_file_name)
|
65
|
+
return True
|
66
|
+
return False
|
67
|
+
except Exception:
|
68
|
+
return False
|
69
|
+
|
70
|
+
|
71
|
+
def is_temporary_directory(path: str) -> bool:
|
72
|
+
try:
|
73
|
+
tmpdir = tempfile.gettempdir()
|
74
|
+
return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
|
75
|
+
except Exception:
|
76
|
+
return False
|
@@ -2,7 +2,9 @@ from contextlib import contextmanager
|
|
2
2
|
from dcicutils.tmpfile_utils import temporary_directory, temporary_file
|
3
3
|
import gzip
|
4
4
|
import os
|
5
|
+
import shutil
|
5
6
|
import tarfile
|
7
|
+
import tempfile
|
6
8
|
from typing import List, Optional
|
7
9
|
import zipfile
|
8
10
|
|
@@ -45,3 +47,28 @@ def unpack_gz_file_to_temporary_file(file: str, suffix: Optional[str] = None) ->
|
|
45
47
|
outputf.write(inputf.read())
|
46
48
|
outputf.close()
|
47
49
|
yield tmp_file_name
|
50
|
+
|
51
|
+
|
52
|
+
def extract_file_from_zip(zip_file: str, file_to_extract: str,
|
53
|
+
destination_file: str, raise_exception: bool = True) -> bool:
|
54
|
+
"""
|
55
|
+
Extracts from the given zip file, the given file to extract, writing it to the
|
56
|
+
given destination file. Returns True if all is well, otherwise False, or if the
|
57
|
+
raise_exception argument is True (the default), then raises and exception on error.
|
58
|
+
"""
|
59
|
+
try:
|
60
|
+
if not (destination_directory := os.path.dirname(destination_file)):
|
61
|
+
destination_directory = os.getcwd()
|
62
|
+
destination_file = os.path.join(destination_directory, destination_file)
|
63
|
+
with tempfile.TemporaryDirectory() as tmp_directory_name:
|
64
|
+
with zipfile.ZipFile(zip_file, "r") as zipf:
|
65
|
+
if file_to_extract not in zipf.namelist():
|
66
|
+
return False
|
67
|
+
zipf.extract(file_to_extract, path=tmp_directory_name)
|
68
|
+
os.makedirs(destination_directory, exist_ok=True)
|
69
|
+
shutil.move(os.path.join(tmp_directory_name, file_to_extract), destination_file)
|
70
|
+
return True
|
71
|
+
except Exception as e:
|
72
|
+
if raise_exception:
|
73
|
+
raise e
|
74
|
+
return False
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "dcicutils"
|
3
|
-
version = "8.8.
|
3
|
+
version = "8.8.6"
|
4
4
|
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
|
5
5
|
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
|
6
6
|
license = "MIT"
|
@@ -37,12 +37,13 @@ classifiers = [
|
|
37
37
|
|
38
38
|
|
39
39
|
[tool.poetry.dependencies]
|
40
|
-
python = ">=3.8,<3.
|
41
|
-
boto3 = "^1.
|
42
|
-
botocore = "^1.
|
40
|
+
python = ">=3.8,<3.13"
|
41
|
+
boto3 = "^1.34.93"
|
42
|
+
botocore = "^1.34.93"
|
43
43
|
# The DCIC portals (cgap-portal and fourfront) are very particular about which ElasticSearch version.
|
44
44
|
# This value is intentionally pinned and must not be changed casually.
|
45
45
|
elasticsearch = "7.13.4"
|
46
|
+
appdirs = "^1.4.4"
|
46
47
|
aws-requests-auth = ">=0.4.2,<1"
|
47
48
|
chardet = "^5.2.0"
|
48
49
|
docker = "^4.4.4"
|
@@ -60,6 +61,7 @@ pyperclip = "^1.8.2"
|
|
60
61
|
PyYAML = "^6.0.1"
|
61
62
|
requests = "^2.21.0"
|
62
63
|
rfc3986 = "^1.4.0"
|
64
|
+
shortuuid = "^1.0.13"
|
63
65
|
structlog = "^19.2.0"
|
64
66
|
toml = ">=0.10.1,<1"
|
65
67
|
tqdm = "^4.66.2"
|
@@ -69,8 +71,8 @@ webtest = "^2.0.34"
|
|
69
71
|
|
70
72
|
|
71
73
|
[tool.poetry.dev-dependencies]
|
72
|
-
boto3-stubs = "^1.
|
73
|
-
botocore-stubs = "^1.
|
74
|
+
boto3-stubs = "^1.34.93"
|
75
|
+
botocore-stubs = "^1.34.93"
|
74
76
|
coverage = ">=7.2.3"
|
75
77
|
# Loaded manually in GA workflow for coverage because a dependency on 2to3
|
76
78
|
# in its docopts dependency makes a problem for laoding it here in poetry. -kmp 7-Apr-2023
|
@@ -1,58 +0,0 @@
|
|
1
|
-
import glob
|
2
|
-
import os
|
3
|
-
import pathlib
|
4
|
-
from typing import List, Optional, Union
|
5
|
-
|
6
|
-
|
7
|
-
def search_for_file(file: str,
|
8
|
-
location: Union[str, Optional[List[str]]] = None,
|
9
|
-
recursive: bool = False,
|
10
|
-
single: bool = False) -> Union[List[str], Optional[str]]:
|
11
|
-
"""
|
12
|
-
Searches for the existence of the given file name, first directly in the given directory or list
|
13
|
-
of directories, if specified, and if not then just in the current (working) directory; if the
|
14
|
-
given recursive flag is True then also searches all sub-directories of these directories;
|
15
|
-
returns the full path name to the file if found. If the single flag is True then just the
|
16
|
-
first file which is found is returns (as a string), or None if none; if the single flag
|
17
|
-
is False, then all matched files are returned in a list, or and empty list if none.
|
18
|
-
"""
|
19
|
-
if file and isinstance(file, (str, pathlib.PosixPath)):
|
20
|
-
if os.path.isabs(file):
|
21
|
-
if os.path.exists(file):
|
22
|
-
return file if single else [file]
|
23
|
-
return None if single else []
|
24
|
-
files_found = []
|
25
|
-
if not location:
|
26
|
-
location = ["."]
|
27
|
-
elif isinstance(location, (str, pathlib.PosixPath)):
|
28
|
-
location = [location]
|
29
|
-
elif not isinstance(location, list):
|
30
|
-
location = []
|
31
|
-
for directory in location:
|
32
|
-
if not directory:
|
33
|
-
continue
|
34
|
-
if isinstance(directory, (str, pathlib.PosixPath)) and os.path.exists(os.path.join(directory, file)):
|
35
|
-
file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
|
36
|
-
if single:
|
37
|
-
return file_found
|
38
|
-
if file_found not in files_found:
|
39
|
-
files_found.append(file_found)
|
40
|
-
if recursive:
|
41
|
-
for directory in location:
|
42
|
-
if not directory:
|
43
|
-
continue
|
44
|
-
if not directory.endswith("/**") and not file.startswith("**/"):
|
45
|
-
path = f"{directory}/**/{file}"
|
46
|
-
else:
|
47
|
-
path = f"{directory}/{file}"
|
48
|
-
files = glob.glob(path, recursive=recursive)
|
49
|
-
if files:
|
50
|
-
for file_found in files:
|
51
|
-
file_found = os.path.abspath(file_found)
|
52
|
-
if single:
|
53
|
-
return file_found
|
54
|
-
if file_found not in files_found:
|
55
|
-
files_found.append(file_found)
|
56
|
-
if files_found:
|
57
|
-
return files_found[0] if single else files_found
|
58
|
-
return None if single else []
|
@@ -1,36 +0,0 @@
|
|
1
|
-
from contextlib import contextmanager
|
2
|
-
import os
|
3
|
-
import shutil
|
4
|
-
import tempfile
|
5
|
-
from typing import List, Optional, Union
|
6
|
-
|
7
|
-
|
8
|
-
@contextmanager
|
9
|
-
def temporary_directory() -> str:
|
10
|
-
try:
|
11
|
-
with tempfile.TemporaryDirectory() as tmp_directory_name:
|
12
|
-
yield tmp_directory_name
|
13
|
-
finally:
|
14
|
-
remove_temporary_directory(tmp_directory_name)
|
15
|
-
|
16
|
-
|
17
|
-
@contextmanager
|
18
|
-
def temporary_file(name: Optional[str] = None, suffix: Optional[str] = None,
|
19
|
-
content: Optional[Union[str, bytes, List[str]]] = None) -> str:
|
20
|
-
with temporary_directory() as tmp_directory_name:
|
21
|
-
tmp_file_name = os.path.join(tmp_directory_name, name or tempfile.mktemp(dir="")) + (suffix or "")
|
22
|
-
with open(tmp_file_name, "wb" if isinstance(content, bytes) else "w") as tmp_file:
|
23
|
-
if content is not None:
|
24
|
-
tmp_file.write("\n".join(content) if isinstance(content, list) else content)
|
25
|
-
yield tmp_file_name
|
26
|
-
|
27
|
-
|
28
|
-
def remove_temporary_directory(tmp_directory_name: str) -> None:
|
29
|
-
def is_temporary_directory(path: str) -> bool:
|
30
|
-
try:
|
31
|
-
tmpdir = tempfile.gettempdir()
|
32
|
-
return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
|
33
|
-
except Exception:
|
34
|
-
return False
|
35
|
-
if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
|
36
|
-
shutil.rmtree(tmp_directory_name)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/c4-python-infrastructure.jsonc
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|