dcicutils 8.8.5__tar.gz → 8.8.6__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. {dcicutils-8.8.5 → dcicutils-8.8.6}/PKG-INFO +6 -4
  2. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ff_utils.py +4 -1
  3. dcicutils-8.8.6/dcicutils/file_utils.py +267 -0
  4. dcicutils-8.8.6/dcicutils/http_utils.py +39 -0
  5. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/misc_utils.py +82 -5
  6. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/scripts/view_portal_object.py +87 -5
  7. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/structured_data.py +35 -5
  8. dcicutils-8.8.6/dcicutils/tmpfile_utils.py +76 -0
  9. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/zip_utils.py +27 -0
  10. {dcicutils-8.8.5 → dcicutils-8.8.6}/pyproject.toml +8 -6
  11. dcicutils-8.8.5/dcicutils/file_utils.py +0 -58
  12. dcicutils-8.8.5/dcicutils/tmpfile_utils.py +0 -36
  13. {dcicutils-8.8.5 → dcicutils-8.8.6}/LICENSE.txt +0 -0
  14. {dcicutils-8.8.5 → dcicutils-8.8.6}/README.rst +0 -0
  15. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/__init__.py +0 -0
  16. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/base.py +0 -0
  17. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/beanstalk_utils.py +0 -0
  18. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/bundle_utils.py +0 -0
  19. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/captured_output.py +0 -0
  20. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/cloudformation_utils.py +0 -0
  21. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/codebuild_utils.py +0 -0
  22. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/command_utils.py +0 -0
  23. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/common.py +0 -0
  24. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/contribution_scripts.py +0 -0
  25. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/contribution_utils.py +0 -0
  26. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/creds_utils.py +0 -0
  27. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/data_readers.py +0 -0
  28. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/data_utils.py +0 -0
  29. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/datetime_utils.py +0 -0
  30. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/deployment_utils.py +0 -0
  31. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/diff_utils.py +0 -0
  32. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/docker_utils.py +0 -0
  33. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ecr_scripts.py +0 -0
  34. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ecr_utils.py +0 -0
  35. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ecs_utils.py +0 -0
  36. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/env_base.py +0 -0
  37. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/env_manager.py +0 -0
  38. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/env_scripts.py +0 -0
  39. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/env_utils.py +0 -0
  40. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/env_utils_legacy.py +0 -0
  41. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/es_utils.py +0 -0
  42. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/exceptions.py +0 -0
  43. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ff_mocks.py +0 -0
  44. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/function_cache_decorator.py +0 -0
  45. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/glacier_utils.py +0 -0
  46. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/jh_utils.py +0 -0
  47. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/kibana/dashboards.json +0 -0
  48. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/kibana/readme.md +0 -0
  49. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/lang_utils.py +0 -0
  50. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/c4-infrastructure.jsonc +0 -0
  51. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/c4-python-infrastructure.jsonc +0 -0
  52. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/park-lab-common-server.jsonc +0 -0
  53. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/park-lab-common.jsonc +0 -0
  54. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/park-lab-gpl-pipeline.jsonc +0 -0
  55. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_policies/park-lab-pipeline.jsonc +0 -0
  56. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/license_utils.py +0 -0
  57. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/log_utils.py +0 -0
  58. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/obfuscation_utils.py +0 -0
  59. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/opensearch_utils.py +0 -0
  60. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/portal_object_utils.py +0 -0
  61. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/portal_utils.py +0 -0
  62. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/progress_bar.py +0 -0
  63. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/project_utils.py +0 -0
  64. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/qa_checkers.py +0 -0
  65. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/qa_utils.py +0 -0
  66. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/redis_tools.py +0 -0
  67. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/redis_utils.py +0 -0
  68. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/s3_utils.py +0 -0
  69. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/schema_utils.py +0 -0
  70. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/scripts/publish_to_pypi.py +0 -0
  71. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/scripts/run_license_checker.py +0 -0
  72. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/secrets_utils.py +0 -0
  73. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/sheet_utils.py +0 -0
  74. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/snapshot_utils.py +0 -0
  75. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/ssl_certificate_utils.py +0 -0
  76. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/submitr/progress_constants.py +0 -0
  77. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/submitr/ref_lookup_strategy.py +0 -0
  78. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/task_utils.py +0 -0
  79. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/trace_utils.py +0 -0
  80. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/validation_utils.py +0 -0
  81. {dcicutils-8.8.5 → dcicutils-8.8.6}/dcicutils/variant_utils.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.8.5
3
+ Version: 8.8.6
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
7
7
  Author: 4DN-DCIC Team
8
8
  Author-email: support@4dnucleome.org
9
- Requires-Python: >=3.8,<3.12
9
+ Requires-Python: >=3.8,<3.13
10
10
  Classifier: Development Status :: 4 - Beta
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Intended Audience :: Science/Research
@@ -24,9 +24,10 @@ Classifier: Programming Language :: Python :: 3.9
24
24
  Classifier: Topic :: Database :: Database Engines/Servers
25
25
  Requires-Dist: PyJWT (>=2.6.0,<3.0.0)
26
26
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
27
+ Requires-Dist: appdirs (>=1.4.4,<2.0.0)
27
28
  Requires-Dist: aws-requests-auth (>=0.4.2,<1)
28
- Requires-Dist: boto3 (>=1.28.57,<2.0.0)
29
- Requires-Dist: botocore (>=1.31.57,<2.0.0)
29
+ Requires-Dist: boto3 (>=1.34.93,<2.0.0)
30
+ Requires-Dist: botocore (>=1.34.93,<2.0.0)
30
31
  Requires-Dist: chardet (>=5.2.0,<6.0.0)
31
32
  Requires-Dist: docker (>=4.4.4,<5.0.0)
32
33
  Requires-Dist: elasticsearch (==7.13.4)
@@ -42,6 +43,7 @@ Requires-Dist: pytz (>=2020.4)
42
43
  Requires-Dist: redis (>=4.5.1,<5.0.0)
43
44
  Requires-Dist: requests (>=2.21.0,<3.0.0)
44
45
  Requires-Dist: rfc3986 (>=1.4.0,<2.0.0)
46
+ Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
45
47
  Requires-Dist: structlog (>=19.2.0,<20.0.0)
46
48
  Requires-Dist: toml (>=0.10.1,<1)
47
49
  Requires-Dist: tqdm (>=4.66.2,<5.0.0)
@@ -895,9 +895,12 @@ def _get_es_metadata(uuids, es_client, filters, sources, chunk_size, auth):
895
895
  used to create the generator.
896
896
  Should NOT be used directly
897
897
  """
898
+ def get_es_host_local() -> Optional[str]:
899
+ return os.environ.get("ES_HOST_LOCAL", None)
898
900
  health = get_health_page(key=auth)
899
901
  if es_client is None:
900
- es_url = health['elasticsearch']
902
+ if not (es_url := get_es_host_local()):
903
+ es_url = health['elasticsearch']
901
904
  es_client = es_utils.create_es_client(es_url, use_aws_auth=True)
902
905
  namespace_star = health.get('namespace', '') + '*'
903
906
  # match all given uuids to _id fields
@@ -0,0 +1,267 @@
1
+ import glob
2
+ import hashlib
3
+ import io
4
+ import os
5
+ import pathlib
6
+ from datetime import datetime
7
+ import random
8
+ import string
9
+ from tempfile import gettempdir as get_temporary_directory
10
+ from typing import List, Optional, Union
11
+ from uuid import uuid4 as uuid
12
+
13
+ HOME_DIRECTORY = str(pathlib.Path().home())
14
+
15
+
16
+ def search_for_file(file: str,
17
+ location: Union[str, pathlib.PosixPath, Optional[List[Union[str, pathlib.PosixPath]]]] = None,
18
+ recursive: bool = False,
19
+ single: bool = False,
20
+ order: bool = True) -> Union[List[str], Optional[str]]:
21
+ """
22
+ Searches for the existence of the given file name, first directly in the given directory or list
23
+ of directories, if specified, and if not then just in the current (working) directory; if the
24
+ given recursive flag is True then also searches all sub-directories of these directories;
25
+ returns the full path name to the file if found. If the single flag is True then just the
26
+ first file which is found is returns (as a string), or None if none; if the single flag
27
+ is False, then all matched files are returned in a list, or and empty list if none.
28
+ """
29
+ def order_by_fewest_number_of_paths_and_then_alphabetically(paths: List[str]) -> List[str]:
30
+ def order_by(path: str):
31
+ return len(path.split(os.path.sep)), path
32
+ return sorted(paths, key=order_by)
33
+
34
+ if not (file and isinstance(file, (str, pathlib.PosixPath))):
35
+ return None if single is True else []
36
+ if os.path.isabs(file):
37
+ if os.path.exists(file):
38
+ return str(file) if single is True else [str(file)]
39
+ return None if single is True else []
40
+ files_found = []
41
+ if not location:
42
+ location = ["."]
43
+ elif isinstance(location, (str, pathlib.PosixPath)):
44
+ location = [location]
45
+ elif not isinstance(location, list):
46
+ location = []
47
+ location_pruned = []
48
+ for directory in location:
49
+ if not isinstance(directory, str):
50
+ if not isinstance(directory, pathlib.PosixPath):
51
+ continue
52
+ directory = str(directory)
53
+ if not (directory := directory.strip()):
54
+ continue
55
+ if os.path.isfile(directory := os.path.abspath(os.path.normpath(directory))):
56
+ # Actually, allow a file rather then a directory; assume its parent directory was intended.
57
+ if not (directory := os.path.dirname(directory)):
58
+ continue
59
+ if directory not in location_pruned:
60
+ location_pruned.append(directory)
61
+ location = location_pruned
62
+ for directory in location:
63
+ if os.path.exists(os.path.join(directory, file)):
64
+ file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
65
+ if single is True:
66
+ return file_found
67
+ if file_found not in files_found:
68
+ files_found.append(file_found)
69
+ if recursive is True:
70
+ for directory in location:
71
+ if not directory.endswith("/**") and not file.startswith("**/"):
72
+ path = f"{directory}/**/{file}"
73
+ else:
74
+ path = f"{directory}/{file}"
75
+ files = glob.glob(path, recursive=True if recursive is True else False)
76
+ if files:
77
+ for file_found in files:
78
+ file_found = os.path.abspath(file_found)
79
+ if single is True:
80
+ return file_found
81
+ if file_found not in files_found:
82
+ files_found.append(file_found)
83
+ if single is True:
84
+ return files_found[0] if files_found else None
85
+ elif order is True:
86
+ return order_by_fewest_number_of_paths_and_then_alphabetically(files_found)
87
+ else:
88
+ return files_found
89
+
90
+
91
+ def normalize_path(value: Union[str, pathlib.Path], absolute: bool = False, expand_home: Optional[bool] = None) -> str:
92
+ """
93
+ Normalizes the given path value and returns the result; does things like remove redundant
94
+ consecutive directory separators and redundant parent paths. If the given absolute argument
95
+ is True than converts the path to an absolute path. If the given expand_home argument is False
96
+ and if the path can reasonably be represented with a home directory indicator (i.e. "~"), then
97
+ converts it to such. If the expand_home argument is True and path starts with the home directory
98
+ indicator (i.e. "~") then expands it to the actual (absolute) home path of the caller. If the
99
+ given path value is not actually even a string (or pathlib.Path) then returns an empty string.
100
+ """
101
+ if isinstance(value, pathlib.Path):
102
+ value = str(value)
103
+ elif not isinstance(value, str):
104
+ return ""
105
+ if not (value := value.strip()) or not (value := os.path.normpath(value)):
106
+ return ""
107
+ if expand_home is True:
108
+ value = os.path.expanduser(value)
109
+ elif (expand_home is False) and (os.name == "posix"):
110
+ if value.startswith(home := HOME_DIRECTORY + os.sep):
111
+ value = "~/" + value[len(home):]
112
+ elif value == HOME_DIRECTORY:
113
+ value = "~"
114
+ if absolute is True:
115
+ value = os.path.abspath(value)
116
+ return value
117
+
118
+
119
+ def get_file_size(file: str, raise_exception: bool = True) -> Optional[int]:
120
+ try:
121
+ return os.path.getsize(file) if isinstance(file, str) else None
122
+ except Exception:
123
+ if raise_exception is True:
124
+ raise
125
+ return None
126
+
127
+
128
+ def get_file_modified_datetime(file: str, raise_exception: bool = True) -> Optional[datetime]:
129
+ try:
130
+ return datetime.fromtimestamp(os.path.getmtime(file)) if isinstance(file, str) else None
131
+ except Exception:
132
+ if raise_exception is True:
133
+ raise
134
+ return None
135
+
136
+
137
+ def are_files_equal(filea: str, fileb: str, raise_exception: bool = True) -> bool:
138
+ """
139
+ Returns True iff the contents of the two given files are exactly the same.
140
+ """
141
+ try:
142
+ with open(filea, "rb") as fa:
143
+ with open(fileb, "rb") as fb:
144
+ chunk_size = 4096
145
+ while True:
146
+ chunka = fa.read(chunk_size)
147
+ chunkb = fb.read(chunk_size)
148
+ if chunka != chunkb:
149
+ return False
150
+ if not chunka:
151
+ break
152
+ return True
153
+ except Exception:
154
+ if raise_exception is True:
155
+ raise
156
+ return False
157
+
158
+
159
+ def compute_file_md5(file: str, raise_exception: bool = True) -> str:
160
+ """
161
+ Returns the md5 checksum for the given file.
162
+ """
163
+ if not isinstance(file, str):
164
+ return ""
165
+ try:
166
+ md5 = hashlib.md5()
167
+ with open(file, "rb") as file:
168
+ for chunk in iter(lambda: file.read(4096), b""):
169
+ md5.update(chunk)
170
+ return md5.hexdigest()
171
+ except Exception:
172
+ if raise_exception is True:
173
+ raise
174
+ return ""
175
+
176
+
177
+ def compute_file_etag(file: str, raise_exception: bool = True) -> Optional[str]:
178
+ """
179
+ Returns the AWS S3 "etag" for the given file; this value is md5-like but
180
+ not the same as a normal md5. We use this to compare that a file in S3
181
+ appears to be the exact the same file as a local file.
182
+ """
183
+ try:
184
+ with io.open(file, "rb") as f:
185
+ return _compute_file_etag(f)
186
+ except Exception:
187
+ if raise_exception is True:
188
+ raise
189
+ return None
190
+
191
+
192
+ def _compute_file_etag(f: io.BufferedReader) -> str:
193
+ # See: https://stackoverflow.com/questions/75723647/calculate-md5-from-aws-s3-etag
194
+ MULTIPART_THRESHOLD = 8388608
195
+ MULTIPART_CHUNKSIZE = 8388608
196
+ # BUFFER_SIZE = 1048576
197
+ # Verify some assumptions are correct
198
+ # assert(MULTIPART_CHUNKSIZE >= MULTIPART_THRESHOLD)
199
+ # assert((MULTIPART_THRESHOLD % BUFFER_SIZE) == 0)
200
+ # assert((MULTIPART_CHUNKSIZE % BUFFER_SIZE) == 0)
201
+ hash = hashlib.md5()
202
+ read = 0
203
+ chunks = None
204
+ while True:
205
+ # Read some from stdin, if we're at the end, stop reading
206
+ bits = f.read(1048576)
207
+ if len(bits) == 0:
208
+ break
209
+ read += len(bits)
210
+ hash.update(bits)
211
+ if chunks is None:
212
+ # We're handling a multi-part upload, so switch to calculating
213
+ # hashes of each chunk
214
+ if read >= MULTIPART_THRESHOLD:
215
+ chunks = b''
216
+ if chunks is not None:
217
+ if (read % MULTIPART_CHUNKSIZE) == 0:
218
+ # Dont with a chunk, add it to the list of hashes to hash later
219
+ chunks += hash.digest()
220
+ hash = hashlib.md5()
221
+ if chunks is None:
222
+ # Normal upload, just output the MD5 hash
223
+ etag = hash.hexdigest()
224
+ else:
225
+ # Multipart upload, need to output the hash of the hashes
226
+ if (read % MULTIPART_CHUNKSIZE) != 0:
227
+ # Add the last part if we have a partial chunk
228
+ chunks += hash.digest()
229
+ etag = hashlib.md5(chunks).hexdigest() + "-" + str(len(chunks) // 16)
230
+ return etag
231
+
232
+
233
+ def create_random_file(file: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
234
+ nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
235
+ """
236
+ Write to the given file (name/path) some random content. If the given file is None then writes
237
+ to a temporary file. In either case, returns the file written to. The of bytes written is 1024
238
+ by default be can be specified with the nbytes argument; default to writing ASCII text but if
239
+ the binary argument is True then writes binary data as well; if not binary the content is in
240
+ lines of 80 characters each; use the line_length argumetn in this case to change the line length.
241
+ """
242
+ if not isinstance(nbytes, int) or nbytes < 0:
243
+ nbytes = 0
244
+ if not isinstance(file, str) or not file:
245
+ if not isinstance(prefix, str):
246
+ prefix = ""
247
+ if not isinstance(suffix, str):
248
+ suffix = ""
249
+ file = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
250
+ file = os.path.join(get_temporary_directory(), file)
251
+ with open(file, "wb" if binary is True else "w") as f:
252
+ if binary is True:
253
+ f.write(os.urandom(nbytes))
254
+ else:
255
+ if (not isinstance(line_length, int)) or (line_length < 1):
256
+ line_length = 80
257
+ line_length += 1
258
+ nlines = nbytes // line_length
259
+ nremainder = nbytes % line_length
260
+ for n in range(nlines):
261
+ f.write("".join(random.choices(string.ascii_letters + string.digits, k=line_length - 1)))
262
+ f.write("\n")
263
+ if nremainder > 1:
264
+ f.write("".join(random.choices(string.ascii_letters + string.digits, k=nremainder - 1)))
265
+ if nremainder > 0:
266
+ f.write("\n")
267
+ return file
@@ -0,0 +1,39 @@
1
+ from contextlib import contextmanager
2
+ import requests
3
+ from typing import Callable, Optional
4
+ from dcicutils.tmpfile_utils import temporary_file
5
+
6
+
7
+ @contextmanager
8
+ def download(url: str, suffix: Optional[str] = None, binary: bool = True,
9
+ progress: Optional[Callable] = None) -> Optional[str]:
10
+ """
11
+ Context manager to download the given URL into a temporary file and yields the file
12
+ path to it. An optional file suffix may be specified for this temporary file name.
13
+ Defaults to binary file mode; if not desired then pass False as the binary argument.
14
+ """
15
+ with temporary_file(suffix=suffix) as file:
16
+ download_to(url, file, binary=binary, progress=progress)
17
+ yield file
18
+
19
+
20
+ def download_to(url: str, file: str, binary: bool = True, progress: Optional[Callable] = None) -> None:
21
+ """
22
+ Download the given URL into the given file. Defaults to binary
23
+ file mode; if not desired then pass False as the binary argument.
24
+ """
25
+ if not callable(progress):
26
+ progress = None
27
+ response = requests.get(url, stream=True)
28
+ if progress:
29
+ nbytes = 0
30
+ nbytes_total = None
31
+ if isinstance(content_length := response.headers.get("Content-Length"), str) and content_length.isdigit():
32
+ nbytes_total = int(content_length)
33
+ with open(file, "wb" if binary is True else "w") as f:
34
+ for chunk in response.iter_content(chunk_size=8192):
35
+ if chunk:
36
+ f.write(chunk)
37
+ if progress:
38
+ nbytes += len(chunk)
39
+ progress(nbytes, nbytes_total)
@@ -3,6 +3,7 @@ This file contains functions that might be generally useful.
3
3
  """
4
4
 
5
5
  from collections import namedtuple
6
+ import appdirs
6
7
  import contextlib
7
8
  import datetime
8
9
  import functools
@@ -13,10 +14,12 @@ import json
13
14
  import logging
14
15
  import math
15
16
  import os
17
+ import platform
16
18
  import pytz
17
19
  import re
18
20
  import rfc3986.validators
19
21
  import rfc3986.exceptions
22
+ import shortuuid
20
23
  import time
21
24
  import uuid
22
25
  import warnings
@@ -1152,7 +1155,8 @@ def remove_suffix(suffix: str, text: str, required: bool = False):
1152
1155
 
1153
1156
  def remove_empty_properties(data: Optional[Union[list, dict]],
1154
1157
  isempty: Optional[Callable] = None,
1155
- isempty_array_element: Optional[Callable] = None) -> None:
1158
+ isempty_array_element: Optional[Callable] = None,
1159
+ raise_exception_on_nonempty_array_element_after_empty: bool = False) -> None:
1156
1160
  def _isempty(value: Any) -> bool: # noqa
1157
1161
  return isempty(value) if callable(isempty) else value in [None, "", {}, []]
1158
1162
  if isinstance(data, dict):
@@ -1160,11 +1164,22 @@ def remove_empty_properties(data: Optional[Union[list, dict]],
1160
1164
  if _isempty(value := data[key]):
1161
1165
  del data[key]
1162
1166
  else:
1163
- remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element)
1167
+ remove_empty_properties(value, isempty=isempty, isempty_array_element=isempty_array_element,
1168
+ raise_exception_on_nonempty_array_element_after_empty= # noqa
1169
+ raise_exception_on_nonempty_array_element_after_empty)
1164
1170
  elif isinstance(data, list):
1165
1171
  for item in data:
1166
- remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element)
1172
+ remove_empty_properties(item, isempty=isempty, isempty_array_element=isempty_array_element,
1173
+ raise_exception_on_nonempty_array_element_after_empty= # noqa
1174
+ raise_exception_on_nonempty_array_element_after_empty)
1167
1175
  if callable(isempty_array_element):
1176
+ if raise_exception_on_nonempty_array_element_after_empty is True:
1177
+ empty_element_seen = False
1178
+ for item in data:
1179
+ if not empty_element_seen and isempty_array_element(item):
1180
+ empty_element_seen = True
1181
+ elif empty_element_seen and not isempty_array_element(item):
1182
+ raise Exception("Non-empty element found after empty element.")
1168
1183
  data[:] = [item for item in data if not isempty_array_element(item)]
1169
1184
 
1170
1185
 
@@ -1522,7 +1537,7 @@ def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
1522
1537
  def create_dict(**kwargs) -> dict:
1523
1538
  result = {}
1524
1539
  for name in kwargs:
1525
- if kwargs[name]:
1540
+ if not (kwargs[name] is None):
1526
1541
  result[name] = kwargs[name]
1527
1542
  return result
1528
1543
 
@@ -2548,6 +2563,19 @@ def normalize_spaces(value: str) -> str:
2548
2563
  return re.sub(r"\s+", " ", value).strip()
2549
2564
 
2550
2565
 
2566
+ def normalize_string(value: Optional[str]) -> Optional[str]:
2567
+ """
2568
+ Strips leading/trailing spaces, and converts multiple consecutive spaces to a single space
2569
+ in the given string value and returns the result. If the given value is None returns an
2570
+ empty string. If the given value is not actually even a string then return None.
2571
+ """
2572
+ if value is None:
2573
+ return ""
2574
+ elif isinstance(value, str):
2575
+ return re.sub(r"\s+", " ", value).strip()
2576
+ return None
2577
+
2578
+
2551
2579
  def find_nth_from_end(string: str, substring: str, nth: int) -> int:
2552
2580
  """
2553
2581
  Returns the index of the nth occurrence of the given substring within
@@ -2590,7 +2618,11 @@ def format_size(nbytes: Union[int, float], precision: int = 2, nospace: bool = F
2590
2618
  nbytes = int(nbytes)
2591
2619
  return f"{nbytes} byte{'s' if nbytes != 1 else ''}"
2592
2620
  unit = (UNITS_TERSE if terse else UNITS)[index]
2593
- return f"{nbytes:.{precision}f}{'' if nospace else ' '}{unit}"
2621
+ size = f"{nbytes:.{precision}f}"
2622
+ if size.endswith(f".{'0' * precision}"):
2623
+ # Tidy up extraneous zeros.
2624
+ size = size[:-(precision - 1)]
2625
+ return f"{size}{'' if nospace else ' '}{unit}"
2594
2626
 
2595
2627
 
2596
2628
  def format_duration(seconds: Union[int, float]) -> str:
@@ -2670,3 +2702,48 @@ class JsonLinesReader:
2670
2702
  yield line
2671
2703
  else:
2672
2704
  raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
2705
+
2706
+
2707
+ def get_app_specific_directory() -> str:
2708
+ """
2709
+ Returns the standard system application specific directory:
2710
+ - On MacOS this directory: is: ~/Library/Application Support
2711
+ - On Linux this directory is: ~/.local/share
2712
+ - On Windows this directory is: %USERPROFILE%\\AppData\\Local # noqa
2713
+ N.B. This is has been tested on MacOS and Linux but not on Windows.
2714
+ """
2715
+ return appdirs.user_data_dir()
2716
+
2717
+
2718
+ def get_os_name() -> str:
2719
+ if os_name := platform.system():
2720
+ if os_name == "Darwin": return "osx" # noqa
2721
+ elif os_name == "Linux": return "linux" # noqa
2722
+ elif os_name == "Windows": return "windows" # noqa
2723
+ return ""
2724
+
2725
+
2726
+ def get_cpu_architecture_name() -> str:
2727
+ if os_architecture_name := platform.machine():
2728
+ if os_architecture_name == "x86_64": return "amd64" # noqa
2729
+ return os_architecture_name
2730
+ return ""
2731
+
2732
+
2733
+ def create_uuid(nodash: bool = False, upper: bool = False) -> str:
2734
+ value = str(uuid.uuid4())
2735
+ if nodash is True:
2736
+ value = value.replace("-", "")
2737
+ if upper is True:
2738
+ value = value.upper()
2739
+ return value
2740
+
2741
+
2742
+ def create_short_uuid(length: Optional[int] = None, upper: bool = False):
2743
+ # Not really techincally a uuid of course.
2744
+ if (length is None) or (not isinstance(length, int)) or (length < 1):
2745
+ length = 16
2746
+ value = shortuuid.ShortUUID().random(length=length)
2747
+ if upper is True:
2748
+ value = value.upper()
2749
+ return value
@@ -57,6 +57,7 @@
57
57
 
58
58
  import argparse
59
59
  from functools import lru_cache
60
+ import io
60
61
  import json
61
62
  import pyperclip
62
63
  import os
@@ -97,11 +98,18 @@ def main():
97
98
  help="Include all properties for schema usage.")
98
99
  parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.")
99
100
  parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.")
101
+ parser.add_argument("--post", type=str, required=False, default=None,
102
+ help="POST data of the main arg type with data from file specified with this option.")
103
+ parser.add_argument("--patch", type=str, required=False, default=None,
104
+ help="PATCH data of the main arg type with data from file specified with this option.")
100
105
  parser.add_argument("--database", action="store_true", required=False, default=False,
101
106
  help="Read from database output.")
107
+ parser.add_argument("--bool", action="store_true", required=False,
108
+ default=False, help="Only return whether found or not.")
102
109
  parser.add_argument("--yaml", action="store_true", required=False, default=False, help="YAML output.")
103
110
  parser.add_argument("--copy", "-c", action="store_true", required=False, default=False,
104
111
  help="Copy object data to clipboard.")
112
+ parser.add_argument("--indent", required=False, default=False, help="Indent output.", type=int)
105
113
  parser.add_argument("--details", action="store_true", required=False, default=False, help="Detailed output.")
106
114
  parser.add_argument("--more-details", action="store_true", required=False, default=False,
107
115
  help="More detailed output.")
@@ -151,6 +159,18 @@ def main():
151
159
  args.schema = True
152
160
 
153
161
  if args.schema:
162
+ if args.post:
163
+ if post_data := _read_json_from_file(args.post):
164
+ if args.verbose:
165
+ _print(f"POSTing data from file ({args.post}) as type: {args.uuid}")
166
+ if isinstance(post_data, dict):
167
+ post_data = [post_data]
168
+ elif not isinstance(post_data, list):
169
+ _print(f"POST data neither list nor dictionary: {args.post}")
170
+ for item in post_data:
171
+ portal.post_metadata(args.uuid, item)
172
+ if args.verbose:
173
+ _print(f"Done POSTing data from file ({args.post}) as type: {args.uuid}")
154
174
  schema, schema_name = _get_schema(portal, args.uuid)
155
175
  if schema:
156
176
  if args.copy:
@@ -166,14 +186,50 @@ def main():
166
186
  _print_schema(schema, details=args.details, more_details=args.details,
167
187
  all=args.all, raw=args.raw, raw_yaml=args.yaml)
168
188
  return
169
-
170
- data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, database=args.database, verbose=args.verbose)
189
+ elif args.patch:
190
+ if patch_data := _read_json_from_file(args.patch):
191
+ if args.verbose:
192
+ _print(f"PATCHing data from file ({args.patch}) for object: {args.uuid}")
193
+ if isinstance(patch_data, dict):
194
+ patch_data = [patch_data]
195
+ elif not isinstance(patch_data, list):
196
+ _print(f"PATCH data neither list nor dictionary: {args.patch}")
197
+ for item in patch_data:
198
+ portal.patch_metadata(args.uuid, item)
199
+ if args.verbose:
200
+ _print(f"Done PATCHing data from file ({args.patch}) as type: {args.uuid}")
201
+ return
202
+ else:
203
+ _print(f"No PATCH data found in file: {args.patch}")
204
+ exit(1)
205
+
206
+ data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw,
207
+ database=args.database, check=args.bool, verbose=args.verbose)
208
+ if args.bool:
209
+ if data:
210
+ _print(f"{args.uuid}: found")
211
+ exit(0)
212
+ else:
213
+ _print(f"{args.uuid}: not found")
214
+ exit(1)
171
215
  if args.copy:
172
216
  pyperclip.copy(json.dumps(data, indent=4))
173
217
  if args.yaml:
174
218
  _print(yaml.dump(data))
175
219
  else:
176
- _print(json.dumps(data, default=str, indent=4))
220
+ if args.indent > 0:
221
+ _print(_format_json_with_indent(data, indent=args.indent))
222
+ else:
223
+ _print(json.dumps(data, default=str, indent=4))
224
+
225
+
226
+ def _format_json_with_indent(value: dict, indent: int = 0) -> Optional[str]:
227
+ if isinstance(value, dict):
228
+ result = json.dumps(value, indent=4)
229
+ if indent > 0:
230
+ result = f"{indent * ' '}{result}"
231
+ result = result.replace("\n", f"\n{indent * ' '}")
232
+ return result
177
233
 
178
234
 
179
235
  def _create_portal(ini: str, env: Optional[str] = None,
@@ -198,7 +254,8 @@ def _create_portal(ini: str, env: Optional[str] = None,
198
254
 
199
255
 
200
256
  def _get_portal_object(portal: Portal, uuid: str,
201
- raw: bool = False, database: bool = False, verbose: bool = False) -> dict:
257
+ raw: bool = False, database: bool = False,
258
+ check: bool = False, verbose: bool = False) -> dict:
202
259
  response = None
203
260
  try:
204
261
  if not uuid.startswith("/"):
@@ -212,13 +269,18 @@ def _get_portal_object(portal: Portal, uuid: str,
212
269
  _exit()
213
270
  _exit(f"Exception getting Portal object from {portal.server}: {uuid}\n{get_error_message(e)}")
214
271
  if not response:
272
+ if check:
273
+ return None
215
274
  _exit(f"Null response getting Portal object from {portal.server}: {uuid}")
216
275
  if response.status_code not in [200, 307]:
217
276
  # TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above.
218
277
  _exit(f"Invalid status code ({response.status_code}) getting Portal object from {portal.server}: {uuid}")
219
278
  if not response.json:
220
279
  _exit(f"Invalid JSON getting Portal object: {uuid}")
221
- return response.json()
280
+ response = response.json()
281
+ if raw:
282
+ response.pop("schema_version", None)
283
+ return response
222
284
 
223
285
 
224
286
  @lru_cache(maxsize=1)
@@ -257,6 +319,7 @@ def _print_schema_info(schema: dict, level: int = 0,
257
319
  required: Optional[List[str]] = None) -> None:
258
320
  if not schema or not isinstance(schema, dict):
259
321
  return
322
+ identifying_properties = schema.get("identifyingProperties")
260
323
  if level == 0:
261
324
  if required_properties := schema.get("required"):
262
325
  _print("- required properties:")
@@ -383,6 +446,8 @@ def _print_schema_info(schema: dict, level: int = 0,
383
446
  suffix += f" | enum"
384
447
  if property_required:
385
448
  suffix += f" | required"
449
+ if property_name in (identifying_properties or []):
450
+ suffix += f" | identifying"
386
451
  if property.get("uniqueKey"):
387
452
  suffix += f" | unique"
388
453
  if pattern := property.get("pattern"):
@@ -529,6 +594,23 @@ def _print_tree(root_name: Optional[str],
529
594
  print(line)
530
595
 
531
596
 
597
+ def _read_json_from_file(file: str) -> Optional[dict]:
598
+ if not os.path.exists(file):
599
+ _print(f"Cannot find file: {file}")
600
+ exit(1)
601
+ try:
602
+ with io.open(file, "r") as f:
603
+ try:
604
+ return json.load(f)
605
+ except Exception:
606
+ _print(f"Cannot parse JSON in file: {file}")
607
+ exit(1)
608
+ except Exception as e:
609
+ print(e)
610
+ _print(f"Cannot open file: {file}")
611
+ exit(1)
612
+
613
+
532
614
  def _print(*args, **kwargs):
533
615
  with uncaptured_output():
534
616
  PRINT(*args, **kwargs)
@@ -53,6 +53,7 @@ class StructuredDataSet:
53
53
  def __init__(self, file: Optional[str] = None, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
54
54
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
55
55
  order: Optional[List[str]] = None, prune: bool = True,
56
+ remove_empty_objects_from_lists: bool = True,
56
57
  ref_lookup_strategy: Optional[Callable] = None,
57
58
  ref_lookup_nocache: bool = False,
58
59
  norefs: bool = False,
@@ -65,7 +66,8 @@ class StructuredDataSet:
65
66
  ref_lookup_nocache=ref_lookup_nocache) if portal else None
66
67
  self._ref_lookup_strategy = ref_lookup_strategy
67
68
  self._order = order
68
- self._prune = prune
69
+ self._prune = prune is True
70
+ self._remove_empty_objects_from_lists = remove_empty_objects_from_lists is True
69
71
  self._warnings = {}
70
72
  self._errors = {}
71
73
  self._resolved_refs = set()
@@ -93,12 +95,14 @@ class StructuredDataSet:
93
95
  def load(file: str, portal: Optional[Union[VirtualApp, TestApp, Portal]] = None,
94
96
  schemas: Optional[List[dict]] = None, autoadd: Optional[dict] = None,
95
97
  order: Optional[List[str]] = None, prune: bool = True,
98
+ remove_empty_objects_from_lists: bool = True,
96
99
  ref_lookup_strategy: Optional[Callable] = None,
97
100
  ref_lookup_nocache: bool = False,
98
101
  norefs: bool = False,
99
102
  progress: Optional[Callable] = None,
100
103
  debug_sleep: Optional[str] = None) -> StructuredDataSet:
101
104
  return StructuredDataSet(file=file, portal=portal, schemas=schemas, autoadd=autoadd, order=order, prune=prune,
105
+ remove_empty_objects_from_lists=remove_empty_objects_from_lists,
102
106
  ref_lookup_strategy=ref_lookup_strategy, ref_lookup_nocache=ref_lookup_nocache,
103
107
  norefs=norefs, progress=progress, debug_sleep=debug_sleep)
104
108
 
@@ -346,7 +350,18 @@ class StructuredDataSet:
346
350
 
347
351
  def _load_json_file(self, file: str) -> None:
348
352
  with open(file) as f:
349
- self._add(Schema.type_name(file), json.load(f))
353
+ file_json = json.load(f)
354
+ schema_inferred_from_file_name = Schema.type_name(file)
355
+ if self._portal.get_schema(schema_inferred_from_file_name) is not None:
356
+ # If the JSON file name looks like a schema name then assume it
357
+ # contains an object or an array of object of that schema type.
358
+ self._add(Schema.type_name(file), file_json)
359
+ elif isinstance(file_json, dict):
360
+ # Otherwise if the JSON file name does not look like a schema name then
361
+ # assume it a dictionary where each property is the name of a schema, and
362
+ # which (each property) contains a list of object of that schema type.
363
+ for schema_name in file_json:
364
+ self._add(schema_name, file_json[schema_name])
350
365
 
351
366
  def _load_reader(self, reader: RowReader, type_name: str) -> None:
352
367
  schema = None
@@ -368,7 +383,11 @@ class StructuredDataSet:
368
383
  structured_row_template.set_value(structured_row, column_name, value, reader.file, reader.row_number)
369
384
  if self._autoadd_properties:
370
385
  self._add_properties(structured_row, self._autoadd_properties, schema)
371
- self._add(type_name, structured_row)
386
+ if (prune_error := self._prune_structured_row(structured_row)) is not None:
387
+ self._note_error({"src": create_dict(type=schema_name, row=reader.row_number),
388
+ "error": prune_error}, "validation")
389
+ else:
390
+ self._add(type_name, structured_row)
372
391
  if self._progress:
373
392
  self._progress({
374
393
  PROGRESS.LOAD_ITEM: self._nrows,
@@ -385,9 +404,20 @@ class StructuredDataSet:
385
404
  self._note_error(schema._unresolved_refs, "ref")
386
405
  self._resolved_refs.update(schema._resolved_refs)
387
406
 
388
- def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
389
- if self._prune:
407
+ def _prune_structured_row(self, data: dict) -> Optional[str]:
408
+ if not self._prune:
409
+ return None
410
+ if not self._remove_empty_objects_from_lists:
390
411
  remove_empty_properties(data)
412
+ return None
413
+ try:
414
+ remove_empty_properties(data, isempty_array_element=lambda element: element == {},
415
+ raise_exception_on_nonempty_array_element_after_empty=True)
416
+ except Exception as e:
417
+ return str(e)
418
+ return None
419
+
420
+ def _add(self, type_name: str, data: Union[dict, List[dict]]) -> None:
391
421
  if type_name in self._data:
392
422
  self._data[type_name].extend([data] if isinstance(data, dict) else data)
393
423
  else:
@@ -0,0 +1,76 @@
1
+ from contextlib import contextmanager
2
+ from datetime import datetime
3
+ import os
4
+ import shutil
5
+ import tempfile
6
+ from uuid import uuid4 as uuid
7
+ from typing import List, Optional, Union
8
+ from dcicutils.file_utils import create_random_file
9
+
10
+
11
+ @contextmanager
12
+ def temporary_directory() -> str:
13
+ try:
14
+ with tempfile.TemporaryDirectory() as tmp_directory_name:
15
+ yield tmp_directory_name
16
+ finally:
17
+ remove_temporary_directory(tmp_directory_name)
18
+
19
+
20
+ @contextmanager
21
+ def temporary_file(name: Optional[str] = None, prefix: Optional[str] = None, suffix: Optional[str] = None,
22
+ content: Optional[Union[str, bytes, List[str]]] = None) -> str:
23
+ with temporary_directory() as tmp_directory_name:
24
+ tmp_file_name = f"{prefix or ''}{name or tempfile.mktemp(dir='')}{suffix or ''}"
25
+ tmp_file_path = os.path.join(tmp_directory_name, tmp_file_name)
26
+ with open(tmp_file_path, "wb" if isinstance(content, bytes) else "w") as tmp_file:
27
+ if content is not None:
28
+ tmp_file.write("\n".join(content) if isinstance(content, list) else content)
29
+ yield tmp_file_path
30
+
31
+
32
+ def create_temporary_file_name(prefix: Optional[str] = None, suffix: Optional[str] = None) -> str:
33
+ """
34
+ Generates and returns the full path to file within the system temporary directory.
35
+ """
36
+ random_string = f"{datetime.utcnow().strftime('%Y%m%d%H%M%S')}{str(uuid()).replace('-', '')}"
37
+ tmp_file_name = f"{prefix or ''}{random_string}{suffix or ''}"
38
+ return os.path.join(tempfile.gettempdir(), tmp_file_name)
39
+
40
+
41
+ @contextmanager
42
+ def temporary_random_file(prefix: Optional[str] = None, suffix: Optional[str] = None,
43
+ nbytes: int = 1024, binary: bool = False, line_length: Optional[int] = None) -> str:
44
+ with temporary_file(prefix=prefix, suffix=suffix) as tmp_file_path:
45
+ create_random_file(tmp_file_path, nbytes=nbytes, binary=binary, line_length=line_length)
46
+ yield tmp_file_path
47
+
48
+
49
+ def remove_temporary_directory(tmp_directory_name: str) -> None:
50
+ """
51
+ Removes the given directory, recursively; but ONLY if it is (somewhere) within the system temporary directory.
52
+ """
53
+ if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
54
+ shutil.rmtree(tmp_directory_name)
55
+
56
+
57
+ def remove_temporary_file(tmp_file_name: str) -> bool:
58
+ """
59
+ Removes the given file; but ONLY if it is (somewhere) within the system temporary directory.
60
+ """
61
+ try:
62
+ tmpdir = tempfile.gettempdir()
63
+ if (os.path.commonpath([tmpdir, tmp_file_name]) == tmpdir) and os.path.isfile(tmp_file_name):
64
+ os.remove(tmp_file_name)
65
+ return True
66
+ return False
67
+ except Exception:
68
+ return False
69
+
70
+
71
+ def is_temporary_directory(path: str) -> bool:
72
+ try:
73
+ tmpdir = tempfile.gettempdir()
74
+ return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
75
+ except Exception:
76
+ return False
@@ -2,7 +2,9 @@ from contextlib import contextmanager
2
2
  from dcicutils.tmpfile_utils import temporary_directory, temporary_file
3
3
  import gzip
4
4
  import os
5
+ import shutil
5
6
  import tarfile
7
+ import tempfile
6
8
  from typing import List, Optional
7
9
  import zipfile
8
10
 
@@ -45,3 +47,28 @@ def unpack_gz_file_to_temporary_file(file: str, suffix: Optional[str] = None) ->
45
47
  outputf.write(inputf.read())
46
48
  outputf.close()
47
49
  yield tmp_file_name
50
+
51
+
52
+ def extract_file_from_zip(zip_file: str, file_to_extract: str,
53
+ destination_file: str, raise_exception: bool = True) -> bool:
54
+ """
55
+ Extracts from the given zip file, the given file to extract, writing it to the
56
+ given destination file. Returns True if all is well, otherwise False, or if the
57
+ raise_exception argument is True (the default), then raises and exception on error.
58
+ """
59
+ try:
60
+ if not (destination_directory := os.path.dirname(destination_file)):
61
+ destination_directory = os.getcwd()
62
+ destination_file = os.path.join(destination_directory, destination_file)
63
+ with tempfile.TemporaryDirectory() as tmp_directory_name:
64
+ with zipfile.ZipFile(zip_file, "r") as zipf:
65
+ if file_to_extract not in zipf.namelist():
66
+ return False
67
+ zipf.extract(file_to_extract, path=tmp_directory_name)
68
+ os.makedirs(destination_directory, exist_ok=True)
69
+ shutil.move(os.path.join(tmp_directory_name, file_to_extract), destination_file)
70
+ return True
71
+ except Exception as e:
72
+ if raise_exception:
73
+ raise e
74
+ return False
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcicutils"
3
- version = "8.8.5"
3
+ version = "8.8.6"
4
4
  description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
5
5
  authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
6
6
  license = "MIT"
@@ -37,12 +37,13 @@ classifiers = [
37
37
 
38
38
 
39
39
  [tool.poetry.dependencies]
40
- python = ">=3.8,<3.12"
41
- boto3 = "^1.28.57"
42
- botocore = "^1.31.57"
40
+ python = ">=3.8,<3.13"
41
+ boto3 = "^1.34.93"
42
+ botocore = "^1.34.93"
43
43
  # The DCIC portals (cgap-portal and fourfront) are very particular about which ElasticSearch version.
44
44
  # This value is intentionally pinned and must not be changed casually.
45
45
  elasticsearch = "7.13.4"
46
+ appdirs = "^1.4.4"
46
47
  aws-requests-auth = ">=0.4.2,<1"
47
48
  chardet = "^5.2.0"
48
49
  docker = "^4.4.4"
@@ -60,6 +61,7 @@ pyperclip = "^1.8.2"
60
61
  PyYAML = "^6.0.1"
61
62
  requests = "^2.21.0"
62
63
  rfc3986 = "^1.4.0"
64
+ shortuuid = "^1.0.13"
63
65
  structlog = "^19.2.0"
64
66
  toml = ">=0.10.1,<1"
65
67
  tqdm = "^4.66.2"
@@ -69,8 +71,8 @@ webtest = "^2.0.34"
69
71
 
70
72
 
71
73
  [tool.poetry.dev-dependencies]
72
- boto3-stubs = "^1.28.57"
73
- botocore-stubs = "^1.31.57"
74
+ boto3-stubs = "^1.34.93"
75
+ botocore-stubs = "^1.34.93"
74
76
  coverage = ">=7.2.3"
75
77
  # Loaded manually in GA workflow for coverage because a dependency on 2to3
76
78
  # in its docopts dependency makes a problem for laoding it here in poetry. -kmp 7-Apr-2023
@@ -1,58 +0,0 @@
1
- import glob
2
- import os
3
- import pathlib
4
- from typing import List, Optional, Union
5
-
6
-
7
- def search_for_file(file: str,
8
- location: Union[str, Optional[List[str]]] = None,
9
- recursive: bool = False,
10
- single: bool = False) -> Union[List[str], Optional[str]]:
11
- """
12
- Searches for the existence of the given file name, first directly in the given directory or list
13
- of directories, if specified, and if not then just in the current (working) directory; if the
14
- given recursive flag is True then also searches all sub-directories of these directories;
15
- returns the full path name to the file if found. If the single flag is True then just the
16
- first file which is found is returns (as a string), or None if none; if the single flag
17
- is False, then all matched files are returned in a list, or and empty list if none.
18
- """
19
- if file and isinstance(file, (str, pathlib.PosixPath)):
20
- if os.path.isabs(file):
21
- if os.path.exists(file):
22
- return file if single else [file]
23
- return None if single else []
24
- files_found = []
25
- if not location:
26
- location = ["."]
27
- elif isinstance(location, (str, pathlib.PosixPath)):
28
- location = [location]
29
- elif not isinstance(location, list):
30
- location = []
31
- for directory in location:
32
- if not directory:
33
- continue
34
- if isinstance(directory, (str, pathlib.PosixPath)) and os.path.exists(os.path.join(directory, file)):
35
- file_found = os.path.abspath(os.path.normpath(os.path.join(directory, file)))
36
- if single:
37
- return file_found
38
- if file_found not in files_found:
39
- files_found.append(file_found)
40
- if recursive:
41
- for directory in location:
42
- if not directory:
43
- continue
44
- if not directory.endswith("/**") and not file.startswith("**/"):
45
- path = f"{directory}/**/{file}"
46
- else:
47
- path = f"{directory}/{file}"
48
- files = glob.glob(path, recursive=recursive)
49
- if files:
50
- for file_found in files:
51
- file_found = os.path.abspath(file_found)
52
- if single:
53
- return file_found
54
- if file_found not in files_found:
55
- files_found.append(file_found)
56
- if files_found:
57
- return files_found[0] if single else files_found
58
- return None if single else []
@@ -1,36 +0,0 @@
1
- from contextlib import contextmanager
2
- import os
3
- import shutil
4
- import tempfile
5
- from typing import List, Optional, Union
6
-
7
-
8
- @contextmanager
9
- def temporary_directory() -> str:
10
- try:
11
- with tempfile.TemporaryDirectory() as tmp_directory_name:
12
- yield tmp_directory_name
13
- finally:
14
- remove_temporary_directory(tmp_directory_name)
15
-
16
-
17
- @contextmanager
18
- def temporary_file(name: Optional[str] = None, suffix: Optional[str] = None,
19
- content: Optional[Union[str, bytes, List[str]]] = None) -> str:
20
- with temporary_directory() as tmp_directory_name:
21
- tmp_file_name = os.path.join(tmp_directory_name, name or tempfile.mktemp(dir="")) + (suffix or "")
22
- with open(tmp_file_name, "wb" if isinstance(content, bytes) else "w") as tmp_file:
23
- if content is not None:
24
- tmp_file.write("\n".join(content) if isinstance(content, list) else content)
25
- yield tmp_file_name
26
-
27
-
28
- def remove_temporary_directory(tmp_directory_name: str) -> None:
29
- def is_temporary_directory(path: str) -> bool:
30
- try:
31
- tmpdir = tempfile.gettempdir()
32
- return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
33
- except Exception:
34
- return False
35
- if is_temporary_directory(tmp_directory_name): # Guard against errant deletion.
36
- shutil.rmtree(tmp_directory_name)
File without changes
File without changes
File without changes
File without changes