deriva 1.7.5__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deriva/core/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "1.7.5"
1
+ __version__ = "1.7.6"
2
2
 
3
3
  from deriva.core.utils.core_utils import *
4
4
  from deriva.core.base_cli import BaseCLI, KeyValuePairArgs
@@ -2,6 +2,7 @@ from deriva.transfer.download.deriva_download import DerivaDownload, GenericDown
2
2
  DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, \
3
3
  DerivaDownloadBaggingError
4
4
  from deriva.transfer.download.deriva_download_cli import DerivaDownloadCLI
5
+ from deriva.transfer.download.deriva_export import DerivaExport, DerivaExportCLI
5
6
 
6
7
  from deriva.transfer.upload.deriva_upload import DerivaUpload, GenericUploader, DerivaUploadError, DerivaUploadError, \
7
8
  DerivaUploadConfigurationError, DerivaUploadCatalogCreateError, DerivaUploadCatalogUpdateError, \
@@ -0,0 +1,241 @@
1
+ import os
2
+ import sys
3
+ import json
4
+ import traceback
5
+ import requests
6
+ import argparse
7
+ import logging
8
+ import certifi
9
+ import datetime
10
+ from collections.abc import Mapping, Iterable
11
+ from requests.exceptions import HTTPError, ConnectionError, Timeout
12
+ from deriva.core.deriva_binding import DerivaClientContext
13
+ from deriva.core.utils.mime_utils import parse_content_disposition
14
+ from deriva.core import BaseCLI, KeyValuePairArgs, get_new_requests_session, get_transfer_summary, get_credential, \
15
+ format_credential, format_exception, urlsplit, DEFAULT_SESSION_CONFIG, DEFAULT_CHUNK_SIZE
16
+ from deriva.transfer.download import DerivaDownloadError, DerivaDownloadConfigurationError, \
17
+ DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ EXPORT_SERVICE_PATH = "/deriva/export/%s"
22
+
23
+ """
24
+ Client tool for interacting with DERIVA Export service.
25
+
26
+ :param host (str): The host server for the export operation.
27
+ :param config_file (str): Path to an export configuration file.
28
+ :param credential (dict): Authentication credential (returned from get_credential()) for the export process. Optional.
29
+ :param envars (dict): A dictionary of variables used for template substitution. Optional.
30
+ :param output_dir (str): The directory where exported data will be stored (default: "."). Optional.
31
+ :param defer_download (bool): Whether to defer the actual data download. Optional.
32
+ :param timeout (int): Timeout value for export operations. Optional.
33
+ :param export_type (str): The type of export to perform (default: "bdbag"). Optional.
34
+
35
+ :return: The full path to the downloaded file. If "defer_download" is True, the URL(s) where the export can be downloaded.
36
+ """
37
+ class DerivaExport:
38
+ def __init__(self, **kwargs):
39
+ self.host = kwargs.get("host")
40
+ self.config_file = kwargs.get("config_file")
41
+ self.envars = kwargs.get("envars", dict())
42
+ self.credential = kwargs.get("credential")
43
+ self.output_dir = kwargs.get("output_dir", ".")
44
+ self.defer_download = kwargs.get("defer_download")
45
+ self.timeout = kwargs.get("timeout")
46
+ self.export_type = kwargs.get("export_type", "bdbag")
47
+ self.base_server_uri = "https://" + self.host
48
+ self.service_url = self.base_server_uri + EXPORT_SERVICE_PATH % self.export_type
49
+ self.session_config = DEFAULT_SESSION_CONFIG.copy()
50
+ if self.timeout is not None:
51
+ self.session_config["timeout"] = self.timeout
52
+ self.session = get_new_requests_session(self.service_url, self.session_config)
53
+ self.dcctx = DerivaClientContext()
54
+ self.session.headers.update({'deriva-client-context': self.dcctx.encoded()})
55
+
56
+ # credential initialization
57
+ if self.credential is None:
58
+ token = kwargs.get("token")
59
+ oauth2_token = kwargs.get("oauth2_token")
60
+ credential_file = kwargs.get("credential_file")
61
+ if token or oauth2_token:
62
+ self.credential = format_credential(token=token, oauth2_token=oauth2_token)
63
+ else:
64
+ self.credential = get_credential(self.host, credential_file)
65
+
66
+ if self.credential is None:
67
+ raise DerivaDownloadAuthenticationError(
68
+ "The requested service requires authentication and a valid login credential could "
69
+ "not be found (or was not provided) for the specified host.")
70
+ if 'bearer-token' in self.credential:
71
+ self.session.headers.update(
72
+ {'Authorization': 'Bearer {token}'.format(token=self.credential['bearer-token'])})
73
+ elif 'cookie' in self.credential:
74
+ cname, cval = self.credential['cookie'].split('=', 1)
75
+ self.session.cookies.set(cname, cval, domain=self.host, path='/')
76
+
77
+ def validate_authn_session(self):
78
+ url = self.base_server_uri + "/authn/session"
79
+ r = self.session.get(url)
80
+ if r.status_code == requests.codes.not_found or r.status_code == requests.codes.unauthorized:
81
+ logger.warning("Unable to authenticate. Check for missing or expired credentials.")
82
+ r.raise_for_status()
83
+ return r.json()
84
+
85
+ def recursive_format(self, d, **kwargs):
86
+ """
87
+ Recursively apply str.format to all string-based values in a dictionary.
88
+ Supports nested dictionaries and lists.
89
+
90
+ :param d: Dictionary or iterable containing values to be formatted
91
+ :param kwargs: Formatting arguments
92
+ :return: New dictionary or iterable with formatted strings
93
+ """
94
+ if isinstance(d, Mapping):
95
+ return {k: self.recursive_format(v, **kwargs) for k, v in d.items()}
96
+ elif isinstance(d, str):
97
+ return d.format(**kwargs)
98
+ elif isinstance(d, Iterable) and not isinstance(d, (str, bytes)):
99
+ return type(d)(self.recursive_format(v, **kwargs) for v in d)
100
+ else:
101
+ return d
102
+
103
+
104
+ def retrieve_file(self, url):
105
+ content_disposition = None
106
+ try:
107
+ head = self.session.head(url)
108
+ if head.ok:
109
+ content_disposition = head.headers.get("Content-Disposition") if head.ok else None
110
+ if not content_disposition:
111
+ raise DerivaDownloadError("HEAD response missing Content-Disposition header.")
112
+ except requests.HTTPError as e:
113
+ raise DerivaDownloadError("HEAD request for [%s] failed: %s" % (url, e))
114
+
115
+ filename = parse_content_disposition(content_disposition)
116
+ output_path = os.path.abspath(os.path.join(self.output_dir, filename))
117
+ with self.session.get(url, stream=True, verify=certifi.where()) as r:
118
+ if r.status_code != 200:
119
+ file_error = "File [%s] transfer failed." % output_path
120
+ url_error = 'HTTP GET Failed for url: %s' % url
121
+ host_error = "Host %s responded:\n\n%s" % (urlsplit(url).netloc, r.text)
122
+ raise DerivaDownloadError('%s\n\n%s\n%s' % (file_error, url_error, host_error))
123
+ else:
124
+ total = 0
125
+ start = datetime.datetime.now()
126
+ logging.debug("Transferring file %s to %s" % (url, output_path))
127
+ with open(output_path, 'wb') as data_file:
128
+ for chunk in r.iter_content(chunk_size=DEFAULT_CHUNK_SIZE):
129
+ data_file.write(chunk)
130
+ total += len(chunk)
131
+ elapsed = datetime.datetime.now() - start
132
+ summary = get_transfer_summary(total, elapsed)
133
+ logging.info("File [%s] transfer successful. %s" % (output_path, summary))
134
+ return output_path
135
+
136
+ def export(self):
137
+ try:
138
+ auth = self.validate_authn_session()
139
+ logger.debug("Authenticated session established. Session attributes: %s" % auth)
140
+
141
+ try:
142
+ logger.info("Processing export config file: %s" % self.config_file)
143
+ with open(self.config_file, encoding='utf-8') as cf:
144
+ config = json.loads(cf.read())
145
+ env = config.get("env", {})
146
+ env = self.recursive_format(env, **self.envars)
147
+ config.update({"env": env})
148
+ except Exception as e:
149
+ raise DerivaDownloadConfigurationError("Error processing export config file: %s" % format_exception(e))
150
+
151
+ logger.info("Requesting %s export at: %s" % (self.export_type, self.service_url))
152
+ response = self.session.post(self.service_url, json=config)
153
+ response.raise_for_status()
154
+ result_urls = response.text.split('\n')
155
+ logger.info("Export successful. Service responded with URL list: %s" % result_urls)
156
+ if not self.defer_download:
157
+ if self.export_type == "bdbag":
158
+ result_url = result_urls[1] if len(result_urls) > 1 else result_urls[0]
159
+ logger.info("Downloading exported bag content from %s to directory: %s" %
160
+ (result_url, os.path.abspath(self.output_dir)))
161
+ return self.retrieve_file(result_url)
162
+ elif self.export_type == "file":
163
+ for result_url in result_urls:
164
+ self.retrieve_file(result_url)
165
+ logger.info("Downloading exported file content from %s to directory: %s" %
166
+ (result_url, os.path.abspath(self.output_dir)))
167
+ else:
168
+ pass
169
+ else:
170
+ return result_urls
171
+ except ConnectionError as e:
172
+ raise DerivaDownloadError("Connection error occurred. %s" % format_exception(e))
173
+ except Timeout as e:
174
+ raise DerivaDownloadTimeoutError("Connection timeout occurred. %s" % format_exception(e))
175
+ except HTTPError as e:
176
+ if e.response.status_code == requests.codes.unauthorized:
177
+ raise DerivaDownloadAuthenticationError(
178
+ "The requested service requires authentication and a valid login session could "
179
+ "not be found for the specified host. Server responded: %s" % format_exception(e))
180
+ elif e.response.status_code == requests.codes.forbidden:
181
+ raise DerivaDownloadAuthorizationError(
182
+ "A requested operation was forbidden. Server responded: %s" % format_exception(e))
183
+ else:
184
+ raise DerivaDownloadError(format_exception(e))
185
+
186
+
187
+ class DerivaExportCLI(BaseCLI):
188
+ def __init__(self, description, epilog, **kwargs):
189
+
190
+ BaseCLI.__init__(self, description, epilog, **kwargs)
191
+ self.parser.add_argument("--defer-download", action="store_true",
192
+ help="Do not download exported file(s). Default: False")
193
+ self.parser.add_argument("--timeout", metavar="<seconds>",
194
+ help="Total number of seconds elapsed before the download is aborted.")
195
+ self.parser.add_argument("--export-type", choices=["bdbag", "file"], default="bdbag",
196
+ help="Export type: {bdbag|file}. Default is bdbag.",)
197
+ self.parser.add_argument("--output-dir", metavar="<output dir>", default=".",
198
+ help="Path to an output directory. Default is current directory.")
199
+ self.parser.add_argument("envars", metavar="[key=value key=value ...]",
200
+ nargs=argparse.REMAINDER, action=KeyValuePairArgs, default={},
201
+ help="Variable length of whitespace-delimited key=value pair arguments used for "
202
+ "string interpolation in specific parts of the configuration file. "
203
+ "For example: key1=value1 key2=value2")
204
+
205
+ def main(self):
206
+ try:
207
+ args = self.parse_cli()
208
+ except ValueError as e:
209
+ sys.stderr.write(str(e))
210
+ return 2
211
+ if not args.quiet:
212
+ sys.stderr.write("\n")
213
+
214
+ try:
215
+ exporter = DerivaExport(**vars(args))
216
+ exporter.export()
217
+ except (DerivaDownloadError, DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError,
218
+ DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError) as e:
219
+ sys.stderr.write(("\n" if not args.quiet else "") + format_exception(e))
220
+ if args.debug:
221
+ traceback.print_exc()
222
+ return 1
223
+ except:
224
+ sys.stderr.write("An unexpected error occurred.")
225
+ traceback.print_exc()
226
+ return 1
227
+ finally:
228
+ if not args.quiet:
229
+ sys.stderr.write("\n\n")
230
+ return 0
231
+
232
+ DESC = "Deriva Export Service Download Utility - CLI"
233
+ INFO = "For more information see: https://github.com/informatics-isi-edu/deriva-py"
234
+
235
+ def main():
236
+ cli = DerivaExportCLI(DESC, INFO, hostname_required=True, config_file_required=True)
237
+ return cli.main()
238
+
239
+
240
+ if __name__ == '__main__':
241
+ sys.exit(main())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deriva
3
- Version: 1.7.5
3
+ Version: 1.7.6
4
4
  Summary: Python APIs and CLIs (Command-Line Interfaces) for the DERIVA platform.
5
5
  Home-page: https://github.com/informatics-isi-edu/deriva-py
6
6
  Author: USC Information Sciences Institute, Informatics Systems Research Division
@@ -8,7 +8,7 @@ deriva/config/dump_catalog_annotations.py,sha256=QzaWDLfWIAQ0eWVV11zeceWgwDBOYIe
8
8
  deriva/config/rollback_annotation.py,sha256=vqrIcen-KZX8LDpu2OVNivzIHpQoQgWkZAChZJctvtk,3015
9
9
  deriva/config/examples/group_owner_policy.json,sha256=8v3GWM1F_BWnYD9x_f6Eo4kBDvyy8g7mRqujfoEKLNc,2408
10
10
  deriva/config/examples/self_serve_policy.json,sha256=pW-cqWz4rJNNXwY4eVZFkQ8gKCHclC9yDa22ylfcDqY,1676
11
- deriva/core/__init__.py,sha256=IT6J4hMqwaL8v4DRAsPa8FVUOfVCOWY_FTw0b8UthzQ,4945
11
+ deriva/core/__init__.py,sha256=aeGZj0oHsMpa48MGoOyfk693jk-xnWq3o8TcznkudM0,4945
12
12
  deriva/core/annotation.py,sha256=PkAkPkxX1brQsb8_drR1Qj5QjQA5mjkpXhkq9NuZ1g8,13432
13
13
  deriva/core/base_cli.py,sha256=EkLXOTeaFWUbPaYV-eLuLGga1PbkFVWi3Jjo-e_Vb-U,2681
14
14
  deriva/core/catalog_cli.py,sha256=-6Bo6GLWFWap7y3VxkzPs73HAe_XzRXIJMW-Ri84m3M,23273
@@ -53,7 +53,7 @@ deriva/core/utils/webauthn_utils.py,sha256=rD0HQZAjUKp4NfqHQG1FhH3x7uKog2et7w7LB
53
53
  deriva/seo/__init__.py,sha256=dYn48A7blbeYf40b4T3KVofrQK4u5K5MfxXWfIGloig,54
54
54
  deriva/seo/sitemap_builder.py,sha256=Ht_AbodEERDofIoCcd4kPlrl1pVW670WN5dT4cc05LQ,13948
55
55
  deriva/seo/sitemap_cli.py,sha256=miCqRfpSj5Dx5BfJGSd8Pi2e4OOQjotDzP_JubukhCM,2654
56
- deriva/transfer/__init__.py,sha256=Xbp-s9vmzFbumJZ743PEXE8rI1s6-s234F66C_ioC_E,1172
56
+ deriva/transfer/__init__.py,sha256=3a01U6e68kBEnFUp0dRMFLRDj2p50iIJF9RZaG3TXVI,1253
57
57
  deriva/transfer/backup/__init__.py,sha256=vxsZiDLMTJQPybXT89G-07GsUoLhnItTCbLdXcDSyeA,465
58
58
  deriva/transfer/backup/__main__.py,sha256=dT12--8C6sKGEtMhsYuy013ebXKpVnBJfhcQNlVtv6Y,361
59
59
  deriva/transfer/backup/deriva_backup.py,sha256=IO9Tmzx6jHfUCkP-41nSsAeOFLn9T-0HwQcpRLpM_zs,5228
@@ -62,6 +62,7 @@ deriva/transfer/download/__init__.py,sha256=Pr7Zud4AFsIWwopTxeC_pupslgCG_lzycO9w
62
62
  deriva/transfer/download/__main__.py,sha256=YUg7AZ07t_xaOgtfJnU_l1nkEHCCPR8sU5X-l1An6SY,363
63
63
  deriva/transfer/download/deriva_download.py,sha256=9WHX0iBUsXv3iT0pEy95kpVN-Oh4vc6ywI5tYmJWpfk,17145
64
64
  deriva/transfer/download/deriva_download_cli.py,sha256=wN8tyQDv1AIE_aDqjECbmkoEWN050vlEdJyteYbdgSs,3940
65
+ deriva/transfer/download/deriva_export.py,sha256=lymAfCrD0Ol2kVgx7g0UAGez755D3ak8tMEneCHzwUQ,12102
65
66
  deriva/transfer/download/processors/__init__.py,sha256=evLp36tZn-Z_AMshdfV3JJO8w1es5owsnRN0IFJUwIo,4507
66
67
  deriva/transfer/download/processors/base_processor.py,sha256=R6IIHSa_euv4X2Dyhd8fvQAiVYDGJTWMQtPoukHQn-Q,3837
67
68
  deriva/transfer/download/processors/postprocess/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -107,9 +108,9 @@ tests/deriva/core/mmo/test_mmo_find.py,sha256=PcUN76sik68B3XKg0G3wHVpKcPEld_6Rtb
107
108
  tests/deriva/core/mmo/test_mmo_prune.py,sha256=4pYtYL8g1BgadlewNPVpVA5lT_gV6SPTDYf04ZKzBTA,6851
108
109
  tests/deriva/core/mmo/test_mmo_rename.py,sha256=4oSR1G3Od701Ss3AnolI1Z7CbMxKuQF2uSr2_IcoR6s,8512
109
110
  tests/deriva/core/mmo/test_mmo_replace.py,sha256=w-66LWyiQ_ajC7Ipmhc4kAKwIloPdQELeUPsvelTdX8,8439
110
- deriva-1.7.5.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
111
- deriva-1.7.5.dist-info/METADATA,sha256=10B2Mj3_umQBki2v847VPJZx-PfF6f1STkv7RkcFMD4,1623
112
- deriva-1.7.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
113
- deriva-1.7.5.dist-info/entry_points.txt,sha256=72BEmEE4Bes5QhVxUHrl7EvUARrgISWxI2KGa8BbNZ8,786
114
- deriva-1.7.5.dist-info/top_level.txt,sha256=_LHDie5-O53wFlexfrxjewpVkf04oydf3CqX5h75DXE,13
115
- deriva-1.7.5.dist-info/RECORD,,
111
+ deriva-1.7.6.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
112
+ deriva-1.7.6.dist-info/METADATA,sha256=qEPrF-sL3FER0i9i_RzniT-nGZeNMcA8j1tkm5pcDmE,1623
113
+ deriva-1.7.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
114
+ deriva-1.7.6.dist-info/entry_points.txt,sha256=HmYCHlgbjYQ_aZX_j4_4tApH4tDTbYtS66jKlfytbn8,850
115
+ deriva-1.7.6.dist-info/top_level.txt,sha256=_LHDie5-O53wFlexfrxjewpVkf04oydf3CqX5h75DXE,13
116
+ deriva-1.7.6.dist-info/RECORD,,
@@ -7,6 +7,7 @@ deriva-annotation-validate = deriva.config.annotation_validate:main
7
7
  deriva-backup-cli = deriva.transfer.backup.__main__:main
8
8
  deriva-catalog-cli = deriva.core.catalog_cli:main
9
9
  deriva-download-cli = deriva.transfer.download.__main__:main
10
+ deriva-export-cli = deriva.transfer.download.deriva_export:main
10
11
  deriva-globus-auth-utils = deriva.core.utils.globus_auth_utils:main
11
12
  deriva-hatrac-cli = deriva.core.hatrac_cli:main
12
13
  deriva-restore-cli = deriva.transfer.restore.__main__:main
File without changes