deriva 1.7.0__py3-none-any.whl → 1.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva/core/__init__.py +1 -1
- deriva/core/catalog_cli.py +83 -39
- deriva/core/datapath.py +314 -3
- deriva/transfer/__init__.py +4 -2
- deriva/transfer/download/__init__.py +4 -0
- deriva/transfer/download/deriva_download.py +33 -13
- deriva/transfer/download/deriva_download_cli.py +3 -2
- deriva/transfer/download/processors/query/base_query_processor.py +9 -4
- deriva/transfer/upload/__init__.py +4 -0
- deriva/transfer/upload/deriva_upload.py +4 -0
- deriva/transfer/upload/deriva_upload_cli.py +2 -2
- {deriva-1.7.0.dist-info → deriva-1.7.1.dist-info}/METADATA +2 -2
- {deriva-1.7.0.dist-info → deriva-1.7.1.dist-info}/RECORD +18 -18
- tests/deriva/core/test_datapath.py +24 -2
- {deriva-1.7.0.dist-info → deriva-1.7.1.dist-info}/LICENSE +0 -0
- {deriva-1.7.0.dist-info → deriva-1.7.1.dist-info}/WHEEL +0 -0
- {deriva-1.7.0.dist-info → deriva-1.7.1.dist-info}/entry_points.txt +0 -0
- {deriva-1.7.0.dist-info → deriva-1.7.1.dist-info}/top_level.txt +0 -0
deriva/core/__init__.py
CHANGED
deriva/core/catalog_cli.py
CHANGED
|
@@ -55,21 +55,21 @@ class DerivaCatalogCLI (BaseCLI):
|
|
|
55
55
|
|
|
56
56
|
# parent arg parser
|
|
57
57
|
self.remove_options(['--config-file', '--credential-file'])
|
|
58
|
-
self.parser.add_argument("--protocol", choices=["http", "https"], default='https',
|
|
58
|
+
self.parser.add_argument("-p", "--protocol", choices=["http", "https"], default='https',
|
|
59
59
|
help="transport protocol: 'http' or 'https'")
|
|
60
60
|
subparsers = self.parser.add_subparsers(title='sub-commands', dest='subcmd')
|
|
61
61
|
|
|
62
62
|
# exists parser
|
|
63
63
|
exists_parser = subparsers.add_parser('exists', help="Check if catalog exists.")
|
|
64
|
-
exists_parser.add_argument("id", metavar="<id>", type=str, help="
|
|
64
|
+
exists_parser.add_argument("id", metavar="<id>", type=str, help="Catalog ID")
|
|
65
65
|
exists_parser.set_defaults(func=self.catalog_exists)
|
|
66
66
|
|
|
67
67
|
# create parser
|
|
68
68
|
create_parser = subparsers.add_parser('create', help="Create a new catalog.")
|
|
69
|
-
create_parser.add_argument("--id", metavar="<id>", type=str, help="
|
|
70
|
-
create_parser.add_argument("--owner", metavar="<owner> <owner> ...",
|
|
69
|
+
create_parser.add_argument("--id", metavar="<id>", type=str, help="Catalog ID")
|
|
70
|
+
create_parser.add_argument("-o", "--owner", metavar="<owner> <owner> ...",
|
|
71
71
|
nargs="+", help="List of quoted user or group identifier strings.")
|
|
72
|
-
create_parser.add_argument("--auto-configure", action="store_true",
|
|
72
|
+
create_parser.add_argument("-a", "--auto-configure", action="store_true",
|
|
73
73
|
help="Configure the new catalog with a set of baseline defaults")
|
|
74
74
|
create_parser.add_argument("--configure-args", metavar="[key=value key=value ...]",
|
|
75
75
|
nargs='+', action=KeyValuePairArgs, default={},
|
|
@@ -80,11 +80,12 @@ class DerivaCatalogCLI (BaseCLI):
|
|
|
80
80
|
|
|
81
81
|
# get parser
|
|
82
82
|
get_parser = subparsers.add_parser('get', help="Send a HTTP GET request to the catalog.")
|
|
83
|
-
get_parser.add_argument("id", metavar="<id>", type=str, help="
|
|
83
|
+
get_parser.add_argument("id", metavar="<id>", type=str, help="Catalog ID")
|
|
84
84
|
get_parser.add_argument("path", metavar="<request-path>", help="The ERMRest API path.")
|
|
85
|
-
get_parser.add_argument("--output-file", metavar="<output file path>", help="Path to output file.")
|
|
86
|
-
get_parser.add_argument("--output-format", choices=["json", "json-stream", "csv"], default="json"
|
|
87
|
-
|
|
85
|
+
get_parser.add_argument("-o", "--output-file", metavar="<output file path>", help="Path to output file.")
|
|
86
|
+
get_parser.add_argument("-f", "--output-format", choices=["json", "json-stream", "csv"], default="json",
|
|
87
|
+
help="The output file format. Defaults to 'json'")
|
|
88
|
+
get_parser.add_argument("-a", "--auto-delete", action="store_true",
|
|
88
89
|
help="Automatically delete output file if no results are returned.")
|
|
89
90
|
get_parser.add_argument("--headers", metavar="[key=value key=value ...]",
|
|
90
91
|
nargs='+', action=KeyValuePairArgs, default={},
|
|
@@ -95,10 +96,12 @@ class DerivaCatalogCLI (BaseCLI):
|
|
|
95
96
|
|
|
96
97
|
# put parser
|
|
97
98
|
put_parser = subparsers.add_parser('put', help="Send a HTTP PUT request to the catalog.")
|
|
98
|
-
put_parser.add_argument("id", metavar="<id>", type=str, help="
|
|
99
|
+
put_parser.add_argument("id", metavar="<id>", type=str, help="Catalog ID")
|
|
99
100
|
put_parser.add_argument("path", metavar="<request-path>", help="The ERMRest API path.")
|
|
100
|
-
put_parser.add_argument("
|
|
101
|
-
|
|
101
|
+
put_parser.add_argument("input-file", metavar="<input file path>",
|
|
102
|
+
help="Path to an input file containing the request message body.")
|
|
103
|
+
put_parser.add_argument("-f", "--input-format", choices=["json", "json-stream", "csv"], default="json",
|
|
104
|
+
help="The input file format. Defaults to 'json'")
|
|
102
105
|
put_parser.add_argument("--headers", metavar="[key=value key=value ...]",
|
|
103
106
|
nargs='+', action=KeyValuePairArgs, default={},
|
|
104
107
|
help="Variable length of whitespace-delimited key=value pair arguments used for "
|
|
@@ -108,10 +111,12 @@ class DerivaCatalogCLI (BaseCLI):
|
|
|
108
111
|
|
|
109
112
|
# post parser
|
|
110
113
|
post_parser = subparsers.add_parser('post', help="Send a HTTP POST request to the catalog.")
|
|
111
|
-
post_parser.add_argument("id", metavar="<id>", type=str, help="
|
|
114
|
+
post_parser.add_argument("id", metavar="<id>", type=str, help="Catalog ID")
|
|
112
115
|
post_parser.add_argument("path", metavar="<request-path>", help="The ERMRest API path.")
|
|
113
|
-
post_parser.add_argument("
|
|
114
|
-
|
|
116
|
+
post_parser.add_argument("input-file", metavar="<input file path>",
|
|
117
|
+
help="Path to an input file containing the request message body.")
|
|
118
|
+
post_parser.add_argument("-f", "--input-format", choices=["json", "json-stream", "csv"], default="json",
|
|
119
|
+
help="The input file format. Defaults to 'json'")
|
|
115
120
|
post_parser.add_argument("--headers", metavar="[key=value key=value ...]",
|
|
116
121
|
nargs='+', action=KeyValuePairArgs, default={},
|
|
117
122
|
help="Variable length of whitespace-delimited key=value pair arguments used for "
|
|
@@ -122,7 +127,7 @@ class DerivaCatalogCLI (BaseCLI):
|
|
|
122
127
|
# delete parser
|
|
123
128
|
del_parser = subparsers.add_parser('delete', help="Send a HTTP DELETE request to the catalog. "
|
|
124
129
|
"Use the 'drop' command to delete the entire catalog.")
|
|
125
|
-
del_parser.add_argument("id", metavar="<id>", type=str, help="
|
|
130
|
+
del_parser.add_argument("id", metavar="<id>", type=str, help="Catalog ID")
|
|
126
131
|
del_parser.add_argument("path", metavar="<request-path>", help="The ERMRest API path.")
|
|
127
132
|
del_parser.add_argument("--headers", metavar="[key=value key=value ...]",
|
|
128
133
|
nargs='+', action=KeyValuePairArgs, default={},
|
|
@@ -133,12 +138,12 @@ class DerivaCatalogCLI (BaseCLI):
|
|
|
133
138
|
|
|
134
139
|
# drop parser
|
|
135
140
|
drop_parser = subparsers.add_parser('drop', help="Delete a catalog.")
|
|
136
|
-
drop_parser.add_argument("id", metavar="<id>", type=str, help="
|
|
141
|
+
drop_parser.add_argument("id", metavar="<id>", type=str, help="Catalog ID")
|
|
137
142
|
drop_parser.set_defaults(func=self.catalog_drop)
|
|
138
143
|
|
|
139
144
|
# clone parser
|
|
140
145
|
clone_parser = subparsers.add_parser('clone', help="Clone a source catalog to a new destination catalog.")
|
|
141
|
-
clone_parser.add_argument("id", metavar="<id>", type=str, help="
|
|
146
|
+
clone_parser.add_argument("id", metavar="<id>", type=str, help="Catalog ID")
|
|
142
147
|
clone_parser.add_argument("--no-copy-data", action="store_false",
|
|
143
148
|
help="Do not copy table contents.")
|
|
144
149
|
clone_parser.add_argument("--no-copy-annotations", action="store_false",
|
|
@@ -154,8 +159,8 @@ class DerivaCatalogCLI (BaseCLI):
|
|
|
154
159
|
# create_alias parser
|
|
155
160
|
create_alias_parser = subparsers.add_parser('create-alias', help="Create a new catalog alias")
|
|
156
161
|
create_alias_parser.add_argument("--id", metavar="<id>", type=str, help="The alias id.")
|
|
157
|
-
create_alias_parser.add_argument("--alias-target", metavar="<alias>", help="The target catalog id.")
|
|
158
|
-
create_alias_parser.add_argument("--owner", metavar="<owner> <owner> ...",
|
|
162
|
+
create_alias_parser.add_argument("-t", "--alias-target", metavar="<alias>", help="The target catalog id.")
|
|
163
|
+
create_alias_parser.add_argument("-o", "--owner", metavar="<owner> <owner> ...",
|
|
159
164
|
nargs="+", help="List of quoted user or group identifier strings.")
|
|
160
165
|
create_alias_parser.set_defaults(func=self.catalog_alias_create)
|
|
161
166
|
|
|
@@ -167,10 +172,10 @@ class DerivaCatalogCLI (BaseCLI):
|
|
|
167
172
|
# update_alias parser
|
|
168
173
|
update_alias_parser = subparsers.add_parser('update-alias', help="Update an existing catalog alias")
|
|
169
174
|
update_alias_parser.add_argument("--id", metavar="<id>", type=str, help="The alias id.")
|
|
170
|
-
update_alias_parser.add_argument("--alias-target", metavar="<alias>", nargs='?', default=nochange, const=None,
|
|
175
|
+
update_alias_parser.add_argument("-t", "--alias-target", metavar="<alias>", nargs='?', default=nochange, const=None,
|
|
171
176
|
help="The target catalog id. If specified without a catalog id as an argument "
|
|
172
177
|
"value, the existing alias target will be cleared ")
|
|
173
|
-
update_alias_parser.add_argument("--owner", metavar="<owner> <owner> ...", nargs='+', default=nochange,
|
|
178
|
+
update_alias_parser.add_argument("-o", "--owner", metavar="<owner> <owner> ...", nargs='+', default=nochange,
|
|
174
179
|
help="List of quoted user or group identifier strings.")
|
|
175
180
|
update_alias_parser.set_defaults(func=self.catalog_alias_update)
|
|
176
181
|
|
|
@@ -194,9 +199,29 @@ class DerivaCatalogCLI (BaseCLI):
|
|
|
194
199
|
self.id = args.id
|
|
195
200
|
self.server = DerivaServer(self.protocol,
|
|
196
201
|
args.host,
|
|
197
|
-
credentials=DerivaCatalogCLI._get_credential(
|
|
198
|
-
|
|
199
|
-
|
|
202
|
+
credentials=DerivaCatalogCLI._get_credential(
|
|
203
|
+
self.host,
|
|
204
|
+
token=args.token,
|
|
205
|
+
oauth2_token=args.oauth2_token))
|
|
206
|
+
|
|
207
|
+
@staticmethod
|
|
208
|
+
def _decorate_headers(headers, file_format, method="get"):
|
|
209
|
+
|
|
210
|
+
header_format_map = {
|
|
211
|
+
"json": "application/json",
|
|
212
|
+
"json-stream": "application/x-json-stream",
|
|
213
|
+
"csv": "text/csv"
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
format_type = header_format_map.get(file_format)
|
|
217
|
+
if format_type is None:
|
|
218
|
+
raise UsageException("Unsupported format: %s" % file_format)
|
|
219
|
+
if str(method).lower() in ["get", "head"]:
|
|
220
|
+
headers["accept"] = format_type
|
|
221
|
+
elif str(method).lower() in ["post", "put"]:
|
|
222
|
+
headers["content-type"] = format_type
|
|
223
|
+
else:
|
|
224
|
+
raise UsageException("Unsupported method: %s" % method)
|
|
200
225
|
|
|
201
226
|
def catalog_exists(self, args):
|
|
202
227
|
"""Implements the catalog_exists sub-command.
|
|
@@ -250,16 +275,7 @@ class DerivaCatalogCLI (BaseCLI):
|
|
|
250
275
|
"""
|
|
251
276
|
headers = DEFAULT_HEADERS.copy()
|
|
252
277
|
headers.update(args.headers)
|
|
253
|
-
|
|
254
|
-
if args.output_format == "json":
|
|
255
|
-
headers["accept"] = "application/json"
|
|
256
|
-
elif args.output_format == "json-stream":
|
|
257
|
-
headers["accept"] = "application/x-json-stream"
|
|
258
|
-
elif args.output_format == "csv":
|
|
259
|
-
headers["accept"] = "text/csv"
|
|
260
|
-
else:
|
|
261
|
-
raise UsageException("Unsupported output format: %s" % args.output_format)
|
|
262
|
-
|
|
278
|
+
self._decorate_headers(headers, args.output_format)
|
|
263
279
|
catalog = self.server.connect_ermrest(args.id)
|
|
264
280
|
try:
|
|
265
281
|
if args.output_file:
|
|
@@ -278,13 +294,41 @@ class DerivaCatalogCLI (BaseCLI):
|
|
|
278
294
|
os.remove(args.output_file)
|
|
279
295
|
raise
|
|
280
296
|
|
|
281
|
-
# TODO: implement PUT at some point
|
|
282
297
|
def catalog_put(self, args):
|
|
283
|
-
|
|
298
|
+
"""Implements the catalog_put sub-command.
|
|
299
|
+
"""
|
|
300
|
+
headers = DEFAULT_HEADERS.copy()
|
|
301
|
+
headers.update(args.headers)
|
|
302
|
+
self._decorate_headers(headers, args.input_format, "put")
|
|
303
|
+
try:
|
|
304
|
+
catalog = self.server.connect_ermrest(args.id)
|
|
305
|
+
with open(args.input_file, "rb") as input_file:
|
|
306
|
+
resp = catalog.put(args.path, data=input_file, headers=headers)
|
|
307
|
+
if not args.quiet:
|
|
308
|
+
pp(resp.json())
|
|
309
|
+
except HTTPError as e:
|
|
310
|
+
if e.response.status_code == requests.codes.not_found:
|
|
311
|
+
raise ResourceException('Catalog not found', e)
|
|
312
|
+
else:
|
|
313
|
+
raise e
|
|
284
314
|
|
|
285
|
-
# TODO: implement POST at some point
|
|
286
315
|
def catalog_post(self, args):
|
|
287
|
-
|
|
316
|
+
"""Implements the catalog_post sub-command.
|
|
317
|
+
"""
|
|
318
|
+
headers = DEFAULT_HEADERS.copy()
|
|
319
|
+
headers.update(args.headers)
|
|
320
|
+
self._decorate_headers(headers, args.input_format, "post")
|
|
321
|
+
try:
|
|
322
|
+
catalog = self.server.connect_ermrest(args.id)
|
|
323
|
+
with open(args.input_file, "rb") as input_file:
|
|
324
|
+
resp = catalog.post(args.path, data=input_file, headers=headers)
|
|
325
|
+
if not args.quiet:
|
|
326
|
+
pp(resp.json())
|
|
327
|
+
except HTTPError as e:
|
|
328
|
+
if e.response.status_code == requests.codes.not_found:
|
|
329
|
+
raise ResourceException('Catalog not found', e)
|
|
330
|
+
else:
|
|
331
|
+
raise e
|
|
288
332
|
|
|
289
333
|
def catalog_delete(self, args):
|
|
290
334
|
"""Implements the catalog_delete sub-command.
|
deriva/core/datapath.py
CHANGED
|
@@ -395,7 +395,8 @@ class DataPath (object):
|
|
|
395
395
|
By default links use inner join semantics on the foreign key / key equality comparison. The `join_type`
|
|
396
396
|
parameter can be used to specify `left`, `right`, or `full` outer join semantics.
|
|
397
397
|
|
|
398
|
-
:param right: the right hand table of the link expression
|
|
398
|
+
:param right: the right hand table of the link expression; if the table or alias name is in use, an incremental
|
|
399
|
+
number will be used to disambiguate tables instances of the same original name.
|
|
399
400
|
:param on: an equality comparison between key and foreign key columns, a conjunction of such comparisons, or a foreign key object
|
|
400
401
|
:param join_type: the join type of this link which may be 'left', 'right', 'full' outer joins or '' for inner
|
|
401
402
|
join link by default.
|
|
@@ -413,7 +414,7 @@ class DataPath (object):
|
|
|
413
414
|
raise ValueError("'on' must be specified for outer joins")
|
|
414
415
|
if right._schema._catalog != self._root._schema._catalog:
|
|
415
416
|
raise ValueError("'right' is from a different catalog. Cannot link across catalogs.")
|
|
416
|
-
if isinstance(right, _TableAlias) and right.
|
|
417
|
+
if isinstance(right, _TableAlias) and right._parent == self:
|
|
417
418
|
raise ValueError("'right' is a table alias that has already been used.")
|
|
418
419
|
else:
|
|
419
420
|
# Generate an unused alias name for the table
|
|
@@ -606,6 +607,18 @@ class DataPath (object):
|
|
|
606
607
|
|
|
607
608
|
return self
|
|
608
609
|
|
|
610
|
+
def denormalize(self, context_name=None, heuristic=None, groupkey_name='RID'):
|
|
611
|
+
"""Denormalizes a path based on a visible-columns annotation 'context' or a heuristic approach.
|
|
612
|
+
|
|
613
|
+
This method does not mutate this object. It returns a result set representing the denormalization of the path.
|
|
614
|
+
|
|
615
|
+
:param context_name: name of the visible-columns context or if none given, will attempt apply heuristics
|
|
616
|
+
:param heuristic: heuristic to apply if no context name specified
|
|
617
|
+
:param groupkey_name: column name for the group by key of the generated query expression (default: 'RID')
|
|
618
|
+
:return: a results set.
|
|
619
|
+
"""
|
|
620
|
+
return _datapath_denormalize(self, context_name=context_name, heuristic=heuristic, groupkey_name=groupkey_name)
|
|
621
|
+
|
|
609
622
|
|
|
610
623
|
class _ResultSet (object):
|
|
611
624
|
"""A set of results for various queries or data manipulations.
|
|
@@ -623,6 +636,7 @@ class _ResultSet (object):
|
|
|
623
636
|
self._fetcher_fn = fetcher_fn
|
|
624
637
|
self._results_doc = None
|
|
625
638
|
self._sort_keys = None
|
|
639
|
+
self._limit = None
|
|
626
640
|
self.uri = uri
|
|
627
641
|
|
|
628
642
|
@property
|
|
@@ -656,6 +670,19 @@ class _ResultSet (object):
|
|
|
656
670
|
self._results_doc = None
|
|
657
671
|
return self
|
|
658
672
|
|
|
673
|
+
def limit(self, n):
|
|
674
|
+
"""Set a limit on the number of results to be returned.
|
|
675
|
+
|
|
676
|
+
:param n: integer or None.
|
|
677
|
+
:return: self
|
|
678
|
+
"""
|
|
679
|
+
try:
|
|
680
|
+
self._limit = None if n is None else int(n)
|
|
681
|
+
self._results_doc = None
|
|
682
|
+
return self
|
|
683
|
+
except ValueError:
|
|
684
|
+
raise ValueError('limit argument "n" must be an integer or None')
|
|
685
|
+
|
|
659
686
|
def fetch(self, limit=None, headers=DEFAULT_HEADERS):
|
|
660
687
|
"""Fetches the results from the catalog.
|
|
661
688
|
|
|
@@ -663,7 +690,7 @@ class _ResultSet (object):
|
|
|
663
690
|
:param headers: headers to send in request to server
|
|
664
691
|
:return: self
|
|
665
692
|
"""
|
|
666
|
-
limit = int(limit) if limit else
|
|
693
|
+
limit = int(limit) if limit else self._limit
|
|
667
694
|
self._results_doc = self._fetcher_fn(limit, self._sort_keys, headers)
|
|
668
695
|
logger.debug("Fetched %d entities" % len(self._results_doc))
|
|
669
696
|
return self
|
|
@@ -797,6 +824,18 @@ class _TableWrapper (object):
|
|
|
797
824
|
"""
|
|
798
825
|
return _AttributeGroup(self, self._query, keys)
|
|
799
826
|
|
|
827
|
+
def denormalize(self, context_name=None, heuristic=None, groupkey_name='RID'):
|
|
828
|
+
"""Denormalizes a path based on a visible-columns annotation 'context' or a heuristic approach.
|
|
829
|
+
|
|
830
|
+
This method does not mutate this object. It returns a result set representing the denormalization of the path.
|
|
831
|
+
|
|
832
|
+
:param context_name: name of the visible-columns context or if none given, will attempt apply heuristics
|
|
833
|
+
:param heuristic: heuristic to apply if no context name specified
|
|
834
|
+
:param groupkey_name: column name for the group by key of the generated query expression (default: 'RID')
|
|
835
|
+
:return: a results set.
|
|
836
|
+
"""
|
|
837
|
+
return self.path.denormalize(context_name=context_name, heuristic=heuristic, groupkey_name=groupkey_name)
|
|
838
|
+
|
|
800
839
|
def insert(self, entities, defaults=set(), nondefaults=set(), add_system_defaults=True, on_conflict_skip=False):
|
|
801
840
|
"""Inserts entities into the table.
|
|
802
841
|
|
|
@@ -1760,3 +1799,275 @@ class _AttributeGroup (object):
|
|
|
1760
1799
|
bin.maxval = result.get('maxval', bin.maxval)
|
|
1761
1800
|
if (bin.minval is None) or (bin.maxval is None):
|
|
1762
1801
|
raise ValueError('Automatic determination of binning bounds failed.')
|
|
1802
|
+
|
|
1803
|
+
##
|
|
1804
|
+
## UTILITIES FOR DENORMALIZATION ##############################################
|
|
1805
|
+
##
|
|
1806
|
+
|
|
1807
|
+
def _datapath_left_outer_join_by_fkey(path, fk, alias_name=None):
|
|
1808
|
+
"""Link a table to the path based on a foreign key reference.
|
|
1809
|
+
|
|
1810
|
+
:param path: a DataPath object
|
|
1811
|
+
:param fk: an ermrest_model.ForeignKey object
|
|
1812
|
+
:param alias_name: an optional 'alias' name to use for the foreign table
|
|
1813
|
+
"""
|
|
1814
|
+
assert isinstance(path, DataPath)
|
|
1815
|
+
assert isinstance(fk, _erm.ForeignKey)
|
|
1816
|
+
catalog = path._root._schema._catalog
|
|
1817
|
+
|
|
1818
|
+
# determine 'direction' -- inbound or outbound
|
|
1819
|
+
path_context_table = path.context._base_table._wrapped_table
|
|
1820
|
+
if (path_context_table.schema.name, path_context_table.name) == (fk.table.schema.name, fk.table.name):
|
|
1821
|
+
right = catalog.schemas[fk.pk_table.schema.name].tables[fk.pk_table.name]
|
|
1822
|
+
fkcols = zip(fk.foreign_key_columns, fk.referenced_columns)
|
|
1823
|
+
elif (path_context_table.schema.name, path_context_table.name) == (fk.pk_table.schema.name, fk.pk_table.name):
|
|
1824
|
+
right = catalog.schemas[fk.table.schema.name].tables[fk.table.name]
|
|
1825
|
+
fkcols = zip(fk.referenced_columns, fk.foreign_key_columns)
|
|
1826
|
+
else:
|
|
1827
|
+
raise ValueError('Context table "%s" not referenced by foreign key "%s"' % (path_context_table.name, fk.constraint_name))
|
|
1828
|
+
|
|
1829
|
+
# compose join condition
|
|
1830
|
+
on = None
|
|
1831
|
+
for lcol, rcol in fkcols:
|
|
1832
|
+
lcol = catalog.schemas[lcol.table.schema.name].tables[lcol.table.name].columns[lcol.name]
|
|
1833
|
+
rcol = catalog.schemas[rcol.table.schema.name].tables[rcol.table.name].columns[rcol.name]
|
|
1834
|
+
if on:
|
|
1835
|
+
on = on & (lcol == rcol)
|
|
1836
|
+
else:
|
|
1837
|
+
on = lcol == rcol
|
|
1838
|
+
|
|
1839
|
+
# link
|
|
1840
|
+
path.link(right.alias(alias_name) if alias_name else right, on=on, join_type='left')
|
|
1841
|
+
|
|
1842
|
+
|
|
1843
|
+
def _datapath_deserialize_vizcolumn(path, vizcol, sources=None):
|
|
1844
|
+
"""Deserializes a visual column specification.
|
|
1845
|
+
|
|
1846
|
+
If the visible column specifies a foreign key path, the datapath object
|
|
1847
|
+
will be changed by linking the foreign keys in the path.
|
|
1848
|
+
|
|
1849
|
+
:param path: a datapath object
|
|
1850
|
+
:param vizcol: a visible column specification
|
|
1851
|
+
:return: the element to be projected from the datapath or None
|
|
1852
|
+
"""
|
|
1853
|
+
assert isinstance(path, DataPath)
|
|
1854
|
+
sources = sources if sources else {}
|
|
1855
|
+
context = path.context
|
|
1856
|
+
table = context._wrapped_table
|
|
1857
|
+
model = table.schema.model
|
|
1858
|
+
|
|
1859
|
+
if isinstance(vizcol, str):
|
|
1860
|
+
# column name specification
|
|
1861
|
+
return context.columns[vizcol]
|
|
1862
|
+
elif isinstance(vizcol, list):
|
|
1863
|
+
# constraint specification
|
|
1864
|
+
try:
|
|
1865
|
+
fk = model.fkey(vizcol)
|
|
1866
|
+
_datapath_left_outer_join_by_fkey(path, fk, alias_name='F')
|
|
1867
|
+
return ArrayD(path.context).alias(path.context._name) # project all attributes
|
|
1868
|
+
except KeyError as e:
|
|
1869
|
+
raise ValueError('Invalid foreign key constraint name: %s. If this is a key constraint name, note that keys are not supported at this time.' % str(e))
|
|
1870
|
+
elif isinstance(vizcol, dict):
|
|
1871
|
+
# resolve visible column
|
|
1872
|
+
while 'sourcekey' in vizcol:
|
|
1873
|
+
temp = sources.get(vizcol['sourcekey'], {})
|
|
1874
|
+
if temp == vizcol:
|
|
1875
|
+
raise ValueError('Visible column self reference for sourcekey "%s"' % vizcol['sourcekey'])
|
|
1876
|
+
vizcol = temp
|
|
1877
|
+
# deserialize source definition
|
|
1878
|
+
source = vizcol.get('source')
|
|
1879
|
+
if not source:
|
|
1880
|
+
# case: none
|
|
1881
|
+
raise ValueError('Could not resolve source definition for visible column')
|
|
1882
|
+
elif isinstance(source, str):
|
|
1883
|
+
# case: column name
|
|
1884
|
+
return context.columns[source]
|
|
1885
|
+
elif isinstance(source, list):
|
|
1886
|
+
# case: path expression
|
|
1887
|
+
# ...validate syntax
|
|
1888
|
+
if not all(isinstance(obj, dict) for obj in source[:-1]):
|
|
1889
|
+
raise ValueError('Source path element must be a foreign key dict')
|
|
1890
|
+
if not isinstance(source[-1], str):
|
|
1891
|
+
raise ValueError('Source path must terminate in a column name string')
|
|
1892
|
+
# link path elements by fkey; and track whether path is outbound only fkeys
|
|
1893
|
+
outbound_only = True
|
|
1894
|
+
for path_elem in source[:-1]:
|
|
1895
|
+
try:
|
|
1896
|
+
fk = model.fkey(path_elem.get('inbound', path_elem.get('outbound')))
|
|
1897
|
+
_datapath_left_outer_join_by_fkey(path, fk, alias_name='F')
|
|
1898
|
+
outbound_only = outbound_only and 'outbound' in path_elem
|
|
1899
|
+
except KeyError as e:
|
|
1900
|
+
raise ValueError('Invalid foreign key constraint name: %s' % str(e))
|
|
1901
|
+
# return terminating column or entity
|
|
1902
|
+
# ...get terminal name
|
|
1903
|
+
terminal = source[-1]
|
|
1904
|
+
# ...get alias name
|
|
1905
|
+
alias = vizcol.get('markdown_name', vizcol.get('name', path.context._name + '_' + terminal))
|
|
1906
|
+
# ...get aggregate function
|
|
1907
|
+
aggregate = {
|
|
1908
|
+
'min': Min,
|
|
1909
|
+
'max': Max,
|
|
1910
|
+
'cnt': Cnt,
|
|
1911
|
+
'cnd_d': CntD,
|
|
1912
|
+
'array': Array,
|
|
1913
|
+
'array_d': ArrayD
|
|
1914
|
+
}.get(vizcol.get('aggregate'), ArrayD)
|
|
1915
|
+
# ...determine projection mode
|
|
1916
|
+
if vizcol.get('entity', True):
|
|
1917
|
+
# case: whole entities
|
|
1918
|
+
return aggregate(path.context).alias(alias)
|
|
1919
|
+
else:
|
|
1920
|
+
# case: specified attribute value(s)
|
|
1921
|
+
if outbound_only:
|
|
1922
|
+
# for outbound only paths, we can project a single value
|
|
1923
|
+
return path.context.columns[terminal].alias(alias)
|
|
1924
|
+
else:
|
|
1925
|
+
# otherwise, we need to use aggregate the values
|
|
1926
|
+
return aggregate(path.context.columns[terminal]).alias(alias)
|
|
1927
|
+
else:
|
|
1928
|
+
raise ValueError('Malformed source: %s' % str(source))
|
|
1929
|
+
else:
|
|
1930
|
+
raise ValueError('Malformed visible column: %s' % str(vizcol))
|
|
1931
|
+
|
|
1932
|
+
|
|
1933
|
+
def _datapath_contextualize(path, context_name='*', context_body=None, groupkey_name='RID'):
|
|
1934
|
+
"""Contextualizes a data path to a named visible columns context.
|
|
1935
|
+
|
|
1936
|
+
:param path: a datapath object
|
|
1937
|
+
:param context_name: name of the context within the path's terminating table's "visible columns" annotations
|
|
1938
|
+
:param context_body: a list of visible column definitions, if given, the `context_name` will be ignored
|
|
1939
|
+
:param groupkey_name: column name for the group by key of the generated query expression (default: 'RID')
|
|
1940
|
+
:return: a 'contextualized' attribute group query object
|
|
1941
|
+
"""
|
|
1942
|
+
assert isinstance(path, DataPath)
|
|
1943
|
+
path = copy.deepcopy(path)
|
|
1944
|
+
context = path.context
|
|
1945
|
+
table = context._wrapped_table
|
|
1946
|
+
sources = table.annotations.get(_erm.tag.source_definitions, {}).get('sources')
|
|
1947
|
+
vizcols = context_body if context_body else table.annotations.get(_erm.tag.visible_columns, {}).get(context_name, [])
|
|
1948
|
+
if not vizcols:
|
|
1949
|
+
raise ValueError('Visible columns context "%s" not found for table %s:%s' % (context_name, table.schema.name, table.name))
|
|
1950
|
+
groupkey = context.columns[groupkey_name]
|
|
1951
|
+
projection = []
|
|
1952
|
+
|
|
1953
|
+
for vizcol in vizcols:
|
|
1954
|
+
try:
|
|
1955
|
+
projection.append(_datapath_deserialize_vizcolumn(path, vizcol, sources=sources))
|
|
1956
|
+
path.context = context
|
|
1957
|
+
except ValueError as e:
|
|
1958
|
+
logger.warning(str(e))
|
|
1959
|
+
|
|
1960
|
+
def not_same_as_group_key(x):
|
|
1961
|
+
assert isinstance(groupkey, _ColumnWrapper)
|
|
1962
|
+
if not isinstance(x, _ColumnWrapper):
|
|
1963
|
+
return True
|
|
1964
|
+
return groupkey._wrapped_column != x._wrapped_column
|
|
1965
|
+
|
|
1966
|
+
projection = filter(not_same_as_group_key, projection) # project groupkey only once
|
|
1967
|
+
query = path.groupby(groupkey).attributes(*projection)
|
|
1968
|
+
return query
|
|
1969
|
+
|
|
1970
|
+
|
|
1971
|
+
def _datapath_generate_simple_denormalization(path, include_whole_entities=False):
|
|
1972
|
+
"""Generates a denormalized form of the table expressed in a visible columns specification.
|
|
1973
|
+
|
|
1974
|
+
:param path: a datapath object
|
|
1975
|
+
:param include_whole_entities: if a denormalization cannot find a 'name' like terminal, include the whole entity (i.e., all attributes), else return just the 'RID'
|
|
1976
|
+
:return: a generated visible columns specification based on a denormalization heuristic
|
|
1977
|
+
"""
|
|
1978
|
+
assert isinstance(path, DataPath)
|
|
1979
|
+
context = path.context
|
|
1980
|
+
table = context._wrapped_table
|
|
1981
|
+
|
|
1982
|
+
fkeys = list(table.foreign_keys)
|
|
1983
|
+
single_column_fkeys = {
|
|
1984
|
+
fkey.foreign_key_columns[0].name: fkey
|
|
1985
|
+
for fkey in table.foreign_keys if len(fkey.foreign_key_columns) == 1
|
|
1986
|
+
}
|
|
1987
|
+
|
|
1988
|
+
def _fkey_to_vizcol(name, fk, inbound=None):
|
|
1989
|
+
# name columns to look for in related tables
|
|
1990
|
+
name_candidates = [
|
|
1991
|
+
'displayname',
|
|
1992
|
+
'preferredname',
|
|
1993
|
+
'fullname',
|
|
1994
|
+
'name',
|
|
1995
|
+
'title',
|
|
1996
|
+
'label'
|
|
1997
|
+
]
|
|
1998
|
+
|
|
1999
|
+
# determine terminal column
|
|
2000
|
+
terminal = 'RID'
|
|
2001
|
+
for candidate_col in fk.pk_table.columns:
|
|
2002
|
+
if candidate_col.name.lower().replace(' ', '').replace('_', '') in name_candidates:
|
|
2003
|
+
terminal = candidate_col.name
|
|
2004
|
+
break
|
|
2005
|
+
|
|
2006
|
+
# define source path
|
|
2007
|
+
source = [{'outbound': fk.names[0]}, terminal]
|
|
2008
|
+
if inbound:
|
|
2009
|
+
source = [{'inbound': inbound.names[0]}] + source
|
|
2010
|
+
|
|
2011
|
+
# return vizcol spec
|
|
2012
|
+
return {
|
|
2013
|
+
'markdown_name': name,
|
|
2014
|
+
'source': source,
|
|
2015
|
+
'entity': include_whole_entities and terminal == 'RID'
|
|
2016
|
+
}
|
|
2017
|
+
|
|
2018
|
+
# assemble the visible column:
|
|
2019
|
+
# 1. column or single column fkeys
|
|
2020
|
+
# 2. all other (outbound fkey) related tables
|
|
2021
|
+
# 3. all associated tables
|
|
2022
|
+
vizcols = []
|
|
2023
|
+
for col in table.column_definitions:
|
|
2024
|
+
if col.name in single_column_fkeys:
|
|
2025
|
+
fkey = single_column_fkeys[col.name]
|
|
2026
|
+
vizcols.append(_fkey_to_vizcol(col.name, fkey))
|
|
2027
|
+
del single_column_fkeys[col.name]
|
|
2028
|
+
fkeys.remove(fkey)
|
|
2029
|
+
else:
|
|
2030
|
+
vizcols.append(col.name)
|
|
2031
|
+
|
|
2032
|
+
for outbound_fkey in fkeys:
|
|
2033
|
+
vizcols.append(_fkey_to_vizcol(outbound_fkey.constraint_name, outbound_fkey))
|
|
2034
|
+
|
|
2035
|
+
for inbound_fkey in table.referenced_by:
|
|
2036
|
+
if inbound_fkey.table.is_association():
|
|
2037
|
+
vizcols.append(
|
|
2038
|
+
_fkey_to_vizcol(
|
|
2039
|
+
inbound_fkey.table.name,
|
|
2040
|
+
inbound_fkey.table.foreign_keys[0] if inbound_fkey != inbound_fkey.table.foreign_keys[0] else inbound_fkey.table.foreign_keys[1],
|
|
2041
|
+
inbound=inbound_fkey
|
|
2042
|
+
)
|
|
2043
|
+
)
|
|
2044
|
+
|
|
2045
|
+
return vizcols
|
|
2046
|
+
|
|
2047
|
+
def simple_denormalization(path):
|
|
2048
|
+
"""A simple heuristic denormalization."""
|
|
2049
|
+
return _datapath_generate_simple_denormalization(path)
|
|
2050
|
+
|
|
2051
|
+
def simple_denormalization_with_whole_entities(path):
|
|
2052
|
+
"""A simple heuristic denormalization with related and associated entities."""
|
|
2053
|
+
return _datapath_generate_simple_denormalization(path, include_whole_entities=True)
|
|
2054
|
+
|
|
2055
|
+
def _datapath_denormalize(path, context_name=None, heuristic=None, groupkey_name='RID'):
|
|
2056
|
+
"""Denormalizes a path based on annotations or heuristics.
|
|
2057
|
+
|
|
2058
|
+
:param path: a DataPath object
|
|
2059
|
+
:param context_name: name of the visible-columns context or if none given, will attempt apply heuristics
|
|
2060
|
+
:param heuristic: heuristic to apply if no context name specified
|
|
2061
|
+
:param groupkey_name: column name for the group by key of the generated query expression (default: 'RID')
|
|
2062
|
+
"""
|
|
2063
|
+
assert isinstance(path, DataPath)
|
|
2064
|
+
assert context_name is None or isinstance(context_name, str)
|
|
2065
|
+
assert isinstance(groupkey_name, str)
|
|
2066
|
+
heuristic = heuristic or simple_denormalization
|
|
2067
|
+
assert callable(heuristic)
|
|
2068
|
+
return _datapath_contextualize(
|
|
2069
|
+
path,
|
|
2070
|
+
context_name=context_name,
|
|
2071
|
+
context_body=None if context_name else heuristic(path),
|
|
2072
|
+
groupkey_name=groupkey_name
|
|
2073
|
+
)
|
deriva/transfer/__init__.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from deriva.transfer.download.deriva_download import DerivaDownload, GenericDownloader, DerivaDownloadError, \
|
|
2
|
-
DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError
|
|
2
|
+
DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, \
|
|
3
|
+
DerivaDownloadBaggingError
|
|
3
4
|
from deriva.transfer.download.deriva_download_cli import DerivaDownloadCLI
|
|
4
5
|
|
|
5
6
|
from deriva.transfer.upload.deriva_upload import DerivaUpload, GenericUploader, DerivaUploadError, DerivaUploadError, \
|
|
6
|
-
DerivaUploadConfigurationError, DerivaUploadCatalogCreateError, DerivaUploadCatalogUpdateError
|
|
7
|
+
DerivaUploadConfigurationError, DerivaUploadCatalogCreateError, DerivaUploadCatalogUpdateError, \
|
|
8
|
+
DerivaUploadAuthenticationError
|
|
7
9
|
from deriva.transfer.upload.deriva_upload_cli import DerivaUploadCLI
|
|
8
10
|
|
|
9
11
|
from deriva.transfer.backup.deriva_backup import DerivaBackup, DerivaBackupAuthenticationError, \
|
|
@@ -7,6 +7,7 @@ import platform
|
|
|
7
7
|
import requests
|
|
8
8
|
from requests.exceptions import HTTPError
|
|
9
9
|
from bdbag import bdbag_api as bdb, bdbag_ro as ro, BAG_PROFILE_TAG, BDBAG_RO_PROFILE_ID
|
|
10
|
+
from bdbag.bdbagit import BagValidationError
|
|
10
11
|
from deriva.core import ErmrestCatalog, HatracStore, format_exception, get_credential, format_credential, read_config, \
|
|
11
12
|
stob, Megabyte, __version__ as VERSION
|
|
12
13
|
from deriva.core.utils.version_utils import get_installed_version
|
|
@@ -14,7 +15,11 @@ from deriva.transfer.download.processors import find_query_processor, find_trans
|
|
|
14
15
|
from deriva.transfer.download.processors.base_processor import LOCAL_PATH_KEY, REMOTE_PATHS_KEY, SERVICE_URL_KEY, \
|
|
15
16
|
FILE_SIZE_KEY
|
|
16
17
|
from deriva.transfer.download import DerivaDownloadError, DerivaDownloadConfigurationError, \
|
|
17
|
-
DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError
|
|
18
|
+
DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, \
|
|
19
|
+
DerivaDownloadBaggingError
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
class DerivaDownload(object):
|
|
@@ -43,7 +48,7 @@ class DerivaDownload(object):
|
|
|
43
48
|
info = "%s v%s [Python %s, %s]" % (
|
|
44
49
|
self.__class__.__name__, get_installed_version(VERSION),
|
|
45
50
|
platform.python_version(), platform.platform(aliased=True))
|
|
46
|
-
|
|
51
|
+
logger.info("Initializing downloader: %s" % info)
|
|
47
52
|
|
|
48
53
|
if not self.server:
|
|
49
54
|
raise DerivaDownloadConfigurationError("Server not specified!")
|
|
@@ -145,12 +150,12 @@ class DerivaDownload(object):
|
|
|
145
150
|
try:
|
|
146
151
|
if not self.credentials:
|
|
147
152
|
self.set_credentials(get_credential(self.hostname))
|
|
148
|
-
|
|
153
|
+
logger.info("Validating credentials for host: %s" % self.hostname)
|
|
149
154
|
attributes = self.catalog.get_authn_session().json()
|
|
150
155
|
identity = attributes["client"]
|
|
151
156
|
except HTTPError as he:
|
|
152
157
|
if he.response.status_code == 404:
|
|
153
|
-
|
|
158
|
+
logger.info("No existing login session found for host: %s" % self.hostname)
|
|
154
159
|
except Exception as e:
|
|
155
160
|
raise DerivaDownloadAuthenticationError("Unable to validate credentials: %s" % format_exception(e))
|
|
156
161
|
wallet = kwargs.get("wallet", {})
|
|
@@ -160,6 +165,7 @@ class DerivaDownload(object):
|
|
|
160
165
|
bag_archiver = None
|
|
161
166
|
bag_algorithms = None
|
|
162
167
|
bag_idempotent = False
|
|
168
|
+
bag_strict = True
|
|
163
169
|
bag_config = self.config.get('bag')
|
|
164
170
|
create_bag = True if bag_config else False
|
|
165
171
|
if create_bag:
|
|
@@ -171,7 +177,8 @@ class DerivaDownload(object):
|
|
|
171
177
|
bag_idempotent = stob(bag_config.get('bag_idempotent', False))
|
|
172
178
|
bag_metadata = bag_config.get('bag_metadata', {"Internal-Sender-Identifier":
|
|
173
179
|
"deriva@%s" % self.server_url})
|
|
174
|
-
bag_ro = create_bag and not bag_idempotent and stob(bag_config.get('bag_ro',
|
|
180
|
+
bag_ro = create_bag and not bag_idempotent and stob(bag_config.get('bag_ro', True))
|
|
181
|
+
bag_strict = stob(bag_config.get('bag_strict', True))
|
|
175
182
|
if create_bag:
|
|
176
183
|
bdb.ensure_bag_path_exists(bag_path)
|
|
177
184
|
bag = bdb.make_bag(bag_path, algs=bag_algorithms, metadata=bag_metadata, idempotent=bag_idempotent)
|
|
@@ -211,12 +218,13 @@ class DerivaDownload(object):
|
|
|
211
218
|
allow_anonymous=self.allow_anonymous,
|
|
212
219
|
timeout=self.timeout)
|
|
213
220
|
outputs = processor.process()
|
|
221
|
+
assert outputs is not None
|
|
214
222
|
if processor.should_abort():
|
|
215
223
|
raise DerivaDownloadTimeoutError("Timeout (%s seconds) waiting for processor [%s] to complete." %
|
|
216
224
|
(self.timeout_secs, processor_name))
|
|
217
225
|
self.check_payload_size(outputs)
|
|
218
226
|
except Exception as e:
|
|
219
|
-
|
|
227
|
+
logger.error(format_exception(e))
|
|
220
228
|
if create_bag:
|
|
221
229
|
bdb.cleanup_bag(bag_path)
|
|
222
230
|
if remote_file_manifest and os.path.isfile(remote_file_manifest):
|
|
@@ -270,16 +278,27 @@ class DerivaDownload(object):
|
|
|
270
278
|
remote_file_manifest=remote_file_manifest
|
|
271
279
|
if (remote_file_manifest and os.path.getsize(remote_file_manifest) > 0) else None,
|
|
272
280
|
update=True,
|
|
273
|
-
idempotent=bag_idempotent
|
|
281
|
+
idempotent=bag_idempotent,
|
|
282
|
+
strict=bag_strict)
|
|
283
|
+
except BagValidationError as bve:
|
|
284
|
+
msg = "Unable to validate bag.%s Error: %s" % (
|
|
285
|
+
"" if not bag_strict else
|
|
286
|
+
" Strict checking has been enabled, which most likely means that this bag "
|
|
287
|
+
"is empty (has no payload files or fetch references) and therefore invalid.",
|
|
288
|
+
format_exception(bve))
|
|
289
|
+
logger.error(msg)
|
|
290
|
+
bdb.cleanup_bag(bag_path)
|
|
291
|
+
raise DerivaDownloadBaggingError(msg)
|
|
274
292
|
except Exception as e:
|
|
275
|
-
|
|
293
|
+
msg = "Unhandled exception while updating bag manifests: %s" % format_exception(e)
|
|
294
|
+
logger.error(msg)
|
|
276
295
|
bdb.cleanup_bag(bag_path)
|
|
277
|
-
raise
|
|
296
|
+
raise DerivaDownloadBaggingError(msg)
|
|
278
297
|
finally:
|
|
279
298
|
if remote_file_manifest and os.path.isfile(remote_file_manifest):
|
|
280
299
|
os.remove(remote_file_manifest)
|
|
281
300
|
|
|
282
|
-
|
|
301
|
+
logger.info('Created bag: %s' % bag_path)
|
|
283
302
|
|
|
284
303
|
if bag_archiver is not None:
|
|
285
304
|
try:
|
|
@@ -289,8 +308,9 @@ class DerivaDownload(object):
|
|
|
289
308
|
bdb.cleanup_bag(bag_path)
|
|
290
309
|
outputs = {os.path.basename(archive): {LOCAL_PATH_KEY: archive}}
|
|
291
310
|
except Exception as e:
|
|
292
|
-
|
|
293
|
-
|
|
311
|
+
msg = "Exception while creating data bag archive: %s" % format_exception(e)
|
|
312
|
+
logger.error(msg)
|
|
313
|
+
raise DerivaDownloadBaggingError(msg)
|
|
294
314
|
else:
|
|
295
315
|
outputs = {os.path.basename(bag_path): {LOCAL_PATH_KEY: bag_path}}
|
|
296
316
|
|
|
@@ -318,7 +338,7 @@ class DerivaDownload(object):
|
|
|
318
338
|
(self.timeout_secs, processor_name))
|
|
319
339
|
self.check_payload_size(outputs)
|
|
320
340
|
except Exception as e:
|
|
321
|
-
|
|
341
|
+
logger.error(format_exception(e))
|
|
322
342
|
raise
|
|
323
343
|
|
|
324
344
|
return outputs
|
|
@@ -7,7 +7,8 @@ import requests
|
|
|
7
7
|
from requests.exceptions import HTTPError, ConnectionError
|
|
8
8
|
from deriva.transfer import GenericDownloader
|
|
9
9
|
from deriva.transfer.download import DerivaDownloadError, DerivaDownloadConfigurationError, \
|
|
10
|
-
DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError
|
|
10
|
+
DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, \
|
|
11
|
+
DerivaDownloadBaggingError
|
|
11
12
|
from deriva.core import BaseCLI, KeyValuePairArgs, format_credential, format_exception, urlparse
|
|
12
13
|
|
|
13
14
|
|
|
@@ -71,7 +72,7 @@ class DerivaDownloadCLI(BaseCLI):
|
|
|
71
72
|
raise DerivaDownloadAuthorizationError(
|
|
72
73
|
"A requested operation was forbidden. Server responded: %s" % e)
|
|
73
74
|
except (DerivaDownloadError, DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError,
|
|
74
|
-
DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError) as e:
|
|
75
|
+
DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, DerivaDownloadBaggingError) as e:
|
|
75
76
|
sys.stderr.write(("\n" if not args.quiet else "") + format_exception(e))
|
|
76
77
|
if args.debug:
|
|
77
78
|
traceback.print_exc()
|
|
@@ -21,8 +21,8 @@ class BaseQueryProcessor(BaseProcessor):
|
|
|
21
21
|
self.catalog = kwargs["catalog"]
|
|
22
22
|
self.store = kwargs["store"]
|
|
23
23
|
self.base_path = kwargs["base_path"]
|
|
24
|
-
self.query = self.parameters
|
|
25
|
-
if self.envars:
|
|
24
|
+
self.query = self.parameters.get("query_path", "")
|
|
25
|
+
if self.query and self.envars:
|
|
26
26
|
self.query = self.query.format(**self.envars)
|
|
27
27
|
self.sub_path = self.parameters.get("output_path")
|
|
28
28
|
self.output_filename = self.parameters.get("output_filename")
|
|
@@ -59,6 +59,9 @@ class BaseQueryProcessor(BaseProcessor):
|
|
|
59
59
|
return self.outputs
|
|
60
60
|
|
|
61
61
|
def catalogQuery(self, headers=None, as_file=True):
|
|
62
|
+
if not self.query:
|
|
63
|
+
return {}
|
|
64
|
+
|
|
62
65
|
if not headers:
|
|
63
66
|
headers = self.HEADERS.copy()
|
|
64
67
|
else:
|
|
@@ -175,8 +178,9 @@ class BaseQueryProcessor(BaseProcessor):
|
|
|
175
178
|
envars=self.envars)
|
|
176
179
|
|
|
177
180
|
def __del__(self):
|
|
178
|
-
|
|
179
|
-
session.
|
|
181
|
+
if self.sessions:
|
|
182
|
+
for session in self.sessions.values():
|
|
183
|
+
session.close()
|
|
180
184
|
|
|
181
185
|
|
|
182
186
|
class CSVQueryProcessor(BaseQueryProcessor):
|
|
@@ -232,3 +236,4 @@ class CreateDirProcessor(JSONEnvUpdateProcessor):
|
|
|
232
236
|
self.create_default_paths()
|
|
233
237
|
make_dirs(self.output_abspath)
|
|
234
238
|
|
|
239
|
+
return self.outputs
|
|
@@ -557,6 +557,10 @@ class DerivaUpload(object):
|
|
|
557
557
|
return None, None, None
|
|
558
558
|
|
|
559
559
|
def uploadFiles(self, status_callback=None, file_callback=None):
|
|
560
|
+
if not self.identity:
|
|
561
|
+
raise DerivaUploadAuthenticationError("Unable to determine user identity for %s. "
|
|
562
|
+
"Please ensure that you are authenticated successfully." %
|
|
563
|
+
self.server_url)
|
|
560
564
|
completed = 0
|
|
561
565
|
for group, assets in self.file_list.items():
|
|
562
566
|
if self.cancelled:
|
|
@@ -3,7 +3,7 @@ import sys
|
|
|
3
3
|
import json
|
|
4
4
|
import traceback
|
|
5
5
|
from deriva.transfer import DerivaUpload, DerivaUploadError, DerivaUploadConfigurationError, \
|
|
6
|
-
DerivaUploadCatalogCreateError, DerivaUploadCatalogUpdateError
|
|
6
|
+
DerivaUploadCatalogCreateError, DerivaUploadCatalogUpdateError, DerivaUploadAuthenticationError
|
|
7
7
|
from deriva.core import BaseCLI, write_config, format_credential, format_exception, urlparse
|
|
8
8
|
|
|
9
9
|
|
|
@@ -95,7 +95,7 @@ class DerivaUploadCLI(BaseCLI):
|
|
|
95
95
|
args.dry_run,
|
|
96
96
|
args.output_file)
|
|
97
97
|
except (RuntimeError, FileNotFoundError, DerivaUploadError, DerivaUploadConfigurationError,
|
|
98
|
-
DerivaUploadCatalogCreateError, DerivaUploadCatalogUpdateError) as e:
|
|
98
|
+
DerivaUploadCatalogCreateError, DerivaUploadCatalogUpdateError, DerivaUploadAuthenticationError) as e:
|
|
99
99
|
sys.stderr.write(("\n" if not args.quiet else "") + format_exception(e))
|
|
100
100
|
if args.debug:
|
|
101
101
|
traceback.print_exc()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deriva
|
|
3
|
-
Version: 1.7.
|
|
3
|
+
Version: 1.7.1
|
|
4
4
|
Summary: Python APIs and CLIs (Command-Line Interfaces) for the DERIVA platform.
|
|
5
5
|
Home-page: https://github.com/informatics-isi-edu/deriva-py
|
|
6
6
|
Author: USC Information Sciences Institute, Informatics Systems Research Division
|
|
@@ -29,7 +29,7 @@ Requires-Dist: certifi
|
|
|
29
29
|
Requires-Dist: pika
|
|
30
30
|
Requires-Dist: urllib3 <3,>=1.26
|
|
31
31
|
Requires-Dist: portalocker >=1.2.1
|
|
32
|
-
Requires-Dist: bdbag >=1.7.
|
|
32
|
+
Requires-Dist: bdbag >=1.7.3
|
|
33
33
|
Requires-Dist: globus-sdk <4,>=3
|
|
34
34
|
Requires-Dist: fair-research-login >=0.3.1
|
|
35
35
|
Requires-Dist: fair-identifiers-client >=0.5.1
|
|
@@ -8,11 +8,11 @@ deriva/config/dump_catalog_annotations.py,sha256=QzaWDLfWIAQ0eWVV11zeceWgwDBOYIe
|
|
|
8
8
|
deriva/config/rollback_annotation.py,sha256=EMVpuaMh2VaXXoHhsr3ldc7g7e92ngszEswdGNEmGFQ,3020
|
|
9
9
|
deriva/config/examples/group_owner_policy.json,sha256=8v3GWM1F_BWnYD9x_f6Eo4kBDvyy8g7mRqujfoEKLNc,2408
|
|
10
10
|
deriva/config/examples/self_serve_policy.json,sha256=pW-cqWz4rJNNXwY4eVZFkQ8gKCHclC9yDa22ylfcDqY,1676
|
|
11
|
-
deriva/core/__init__.py,sha256=
|
|
11
|
+
deriva/core/__init__.py,sha256=e2yjmArOhvRrFnmJxgjd_BPYIhiYsTVeYXe3iVQIKaU,4945
|
|
12
12
|
deriva/core/annotation.py,sha256=PkAkPkxX1brQsb8_drR1Qj5QjQA5mjkpXhkq9NuZ1g8,13432
|
|
13
13
|
deriva/core/base_cli.py,sha256=EkLXOTeaFWUbPaYV-eLuLGga1PbkFVWi3Jjo-e_Vb-U,2681
|
|
14
|
-
deriva/core/catalog_cli.py,sha256
|
|
15
|
-
deriva/core/datapath.py,sha256=
|
|
14
|
+
deriva/core/catalog_cli.py,sha256=-6Bo6GLWFWap7y3VxkzPs73HAe_XzRXIJMW-Ri84m3M,23273
|
|
15
|
+
deriva/core/datapath.py,sha256=4Q3snZ-rBqQV5x7ZAfU7fWdVcmQYu-8Ma6a7DcOL6zQ,81306
|
|
16
16
|
deriva/core/deriva_binding.py,sha256=_sA9HGrcVRqT-OhrneMDMOquyVOFOxLq3WzBQhasLIM,12970
|
|
17
17
|
deriva/core/deriva_server.py,sha256=nsW3gwg1sIaHl3BTf-nL41AkSj3dEpcEBlatvjvN8CQ,200
|
|
18
18
|
deriva/core/ermrest_catalog.py,sha256=B8XdzDScxad4PVUxRxT3GLUkV5vSsNnvMA5a76cUdsc,50817
|
|
@@ -52,15 +52,15 @@ deriva/core/utils/webauthn_utils.py,sha256=rD0HQZAjUKp4NfqHQG1FhH3x7uKog2et7w7LB
|
|
|
52
52
|
deriva/seo/__init__.py,sha256=dYn48A7blbeYf40b4T3KVofrQK4u5K5MfxXWfIGloig,54
|
|
53
53
|
deriva/seo/sitemap_builder.py,sha256=Ht_AbodEERDofIoCcd4kPlrl1pVW670WN5dT4cc05LQ,13948
|
|
54
54
|
deriva/seo/sitemap_cli.py,sha256=miCqRfpSj5Dx5BfJGSd8Pi2e4OOQjotDzP_JubukhCM,2654
|
|
55
|
-
deriva/transfer/__init__.py,sha256=
|
|
55
|
+
deriva/transfer/__init__.py,sha256=Xbp-s9vmzFbumJZ743PEXE8rI1s6-s234F66C_ioC_E,1172
|
|
56
56
|
deriva/transfer/backup/__init__.py,sha256=vxsZiDLMTJQPybXT89G-07GsUoLhnItTCbLdXcDSyeA,465
|
|
57
57
|
deriva/transfer/backup/__main__.py,sha256=dT12--8C6sKGEtMhsYuy013ebXKpVnBJfhcQNlVtv6Y,361
|
|
58
58
|
deriva/transfer/backup/deriva_backup.py,sha256=IO9Tmzx6jHfUCkP-41nSsAeOFLn9T-0HwQcpRLpM_zs,5228
|
|
59
59
|
deriva/transfer/backup/deriva_backup_cli.py,sha256=T0tvPKWniRinMQt0qG7FI8AoK3GgtlT6EyBZmZCAjL8,2157
|
|
60
|
-
deriva/transfer/download/__init__.py,sha256=
|
|
60
|
+
deriva/transfer/download/__init__.py,sha256=Pr7Zud4AFsIWwopTxeC_pupslgCG_lzycO9w9Xyh88Q,350
|
|
61
61
|
deriva/transfer/download/__main__.py,sha256=YUg7AZ07t_xaOgtfJnU_l1nkEHCCPR8sU5X-l1An6SY,363
|
|
62
|
-
deriva/transfer/download/deriva_download.py,sha256=
|
|
63
|
-
deriva/transfer/download/deriva_download_cli.py,sha256=
|
|
62
|
+
deriva/transfer/download/deriva_download.py,sha256=9WHX0iBUsXv3iT0pEy95kpVN-Oh4vc6ywI5tYmJWpfk,17145
|
|
63
|
+
deriva/transfer/download/deriva_download_cli.py,sha256=wN8tyQDv1AIE_aDqjECbmkoEWN050vlEdJyteYbdgSs,3940
|
|
64
64
|
deriva/transfer/download/processors/__init__.py,sha256=evLp36tZn-Z_AMshdfV3JJO8w1es5owsnRN0IFJUwIo,4507
|
|
65
65
|
deriva/transfer/download/processors/base_processor.py,sha256=R6IIHSa_euv4X2Dyhd8fvQAiVYDGJTWMQtPoukHQn-Q,3837
|
|
66
66
|
deriva/transfer/download/processors/postprocess/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -69,7 +69,7 @@ deriva/transfer/download/processors/postprocess/transfer_post_processor.py,sha25
|
|
|
69
69
|
deriva/transfer/download/processors/postprocess/url_post_processor.py,sha256=s68iIYqQSZHtbv4y-fCG8pjhApAeMEG6hYcKx2Pvf5Y,2745
|
|
70
70
|
deriva/transfer/download/processors/query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
71
|
deriva/transfer/download/processors/query/bag_fetch_query_processor.py,sha256=tiQtfuy01YgOFFD5b_sP7TGjMnt0Jqcg2gp1KNWqeLE,5645
|
|
72
|
-
deriva/transfer/download/processors/query/base_query_processor.py,sha256=
|
|
72
|
+
deriva/transfer/download/processors/query/base_query_processor.py,sha256=oM1pbsbpnPV12h3FLtxo8Sp0OCUQYiRhg22AVqf4N50,10327
|
|
73
73
|
deriva/transfer/download/processors/query/file_download_query_processor.py,sha256=Hg1NbKsaGJh9cB86yIyL7Fm7ywSNVop837Dv8aFXUes,7257
|
|
74
74
|
deriva/transfer/download/processors/transform/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
75
|
deriva/transfer/download/processors/transform/base_transform_processor.py,sha256=Ddw5gsNpDANeuLvUaF4utp8psaxOtAzlgXtOg8gb-Pc,4109
|
|
@@ -83,10 +83,10 @@ deriva/transfer/restore/__init__.py,sha256=l_VNbzpfjL8aVSASctsqSMbWO6m_9aVhkq5Lq
|
|
|
83
83
|
deriva/transfer/restore/__main__.py,sha256=iGGSF4_q3gUTLU3pPHYL9PNi9T6XNRHOBeTf1obhOjc,364
|
|
84
84
|
deriva/transfer/restore/deriva_restore.py,sha256=s0h7cXit2USSdjrIfrj0dr7BJ0rrHHMxYr6AxsuoDiI,30298
|
|
85
85
|
deriva/transfer/restore/deriva_restore_cli.py,sha256=2ViZ1Lyl5ndXPKeJFCHHGnwzkg3DfHhTuRa_bN7eJm8,5603
|
|
86
|
-
deriva/transfer/upload/__init__.py,sha256=
|
|
86
|
+
deriva/transfer/upload/__init__.py,sha256=4mlc_iUX-v7SpXzlCZmhxQtSiW5JeDGb2FX7bb1E6tY,304
|
|
87
87
|
deriva/transfer/upload/__main__.py,sha256=hqnXtGpRqPthwpO6uvrnf_TQm7McheeyOt960hStSMY,340
|
|
88
|
-
deriva/transfer/upload/deriva_upload.py,sha256=
|
|
89
|
-
deriva/transfer/upload/deriva_upload_cli.py,sha256
|
|
88
|
+
deriva/transfer/upload/deriva_upload.py,sha256=9NJbsPx1FANgGrCr5AP78AjtM41LJsanCJKEW4UH9ws,60376
|
|
89
|
+
deriva/transfer/upload/deriva_upload_cli.py,sha256=-Q6xgiYabQziTQcMQdGNDAv-eLxCCHO-BCSo4umbDE4,5082
|
|
90
90
|
deriva/transfer/upload/processors/__init__.py,sha256=sMM5xdJ82UIRdB1lGMKk7ft0BgtjS2oJ0sI4SQSqiIU,2481
|
|
91
91
|
deriva/transfer/upload/processors/archive_processor.py,sha256=ID0lDwDn4vPe5nbxy6m28Ssj_TsZpK4df2xRrM6nJRQ,2015
|
|
92
92
|
deriva/transfer/upload/processors/base_processor.py,sha256=Nsr-BP_0TvCtW7sb7s2QbYDEJoijONpQDDdnkVcbmng,666
|
|
@@ -97,10 +97,10 @@ deriva/transfer/upload/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
97
97
|
deriva/utils/__init__.py,sha256=jv2YF__bseklT3OWEzlqJ5qE24c4aWd5F4r0TTjOrWQ,65
|
|
98
98
|
tests/deriva/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
99
99
|
tests/deriva/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
100
|
-
tests/deriva/core/test_datapath.py,sha256=
|
|
101
|
-
deriva-1.7.
|
|
102
|
-
deriva-1.7.
|
|
103
|
-
deriva-1.7.
|
|
104
|
-
deriva-1.7.
|
|
105
|
-
deriva-1.7.
|
|
106
|
-
deriva-1.7.
|
|
100
|
+
tests/deriva/core/test_datapath.py,sha256=hC5PqyL9zqNOV4ydY5L4pHdt8r7Or7OgZnX-F52P2nU,37308
|
|
101
|
+
deriva-1.7.1.dist-info/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
|
|
102
|
+
deriva-1.7.1.dist-info/METADATA,sha256=WAYjUe8xgfn45OweE56aX8q67MuxaBcdxdkrSRfZfO4,1623
|
|
103
|
+
deriva-1.7.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
104
|
+
deriva-1.7.1.dist-info/entry_points.txt,sha256=72BEmEE4Bes5QhVxUHrl7EvUARrgISWxI2KGa8BbNZ8,786
|
|
105
|
+
deriva-1.7.1.dist-info/top_level.txt,sha256=_LHDie5-O53wFlexfrxjewpVkf04oydf3CqX5h75DXE,13
|
|
106
|
+
deriva-1.7.1.dist-info/RECORD,,
|
|
@@ -12,7 +12,8 @@ import os
|
|
|
12
12
|
import unittest
|
|
13
13
|
import sys
|
|
14
14
|
from deriva.core import DerivaServer, get_credential, ermrest_model as _em, __version__
|
|
15
|
-
from deriva.core.datapath import DataPathException, Min, Max, Sum, Avg, Cnt, CntD, Array, ArrayD, Bin
|
|
15
|
+
from deriva.core.datapath import DataPathException, Min, Max, Sum, Avg, Cnt, CntD, Array, ArrayD, Bin, \
|
|
16
|
+
simple_denormalization_with_whole_entities
|
|
16
17
|
|
|
17
18
|
try:
|
|
18
19
|
from pandas import DataFrame
|
|
@@ -129,11 +130,12 @@ def populate_test_catalog(catalog):
|
|
|
129
130
|
"""Populate the test catalog."""
|
|
130
131
|
paths = catalog.getPathBuilder()
|
|
131
132
|
logger.debug("Inserting project...")
|
|
132
|
-
logger.debug("Inserting experiment types...")
|
|
133
133
|
proj_table = paths.schemas[SNAME_ISA].tables[TNAME_PROJECT]
|
|
134
|
+
logger.debug("Inserting investigators...")
|
|
134
135
|
proj_table.insert([
|
|
135
136
|
{"Investigator": TEST_PROJ_INVESTIGATOR, "Num": TEST_PROJ_NUM}
|
|
136
137
|
])
|
|
138
|
+
logger.debug("Inserting experiment types...")
|
|
137
139
|
type_table = paths.schemas[SNAME_VOCAB].tables[TNAME_EXPERIMENT_TYPE]
|
|
138
140
|
types = type_table.insert([
|
|
139
141
|
{"Name": "{}".format(name), "Description": "NA"} for name in range(TEST_EXPTYPE_MAX)
|
|
@@ -806,6 +808,26 @@ class DatapathTests (unittest.TestCase):
|
|
|
806
808
|
self.assertEqual(path.context._name, path3.context._name, "Context of composed paths should equal far right-hand path's context")
|
|
807
809
|
self.assertGreater(len(path.Experiment.entities()), 0, "Should have returned results")
|
|
808
810
|
|
|
811
|
+
def test_simple_denormalization(self):
|
|
812
|
+
entities = self.experiment.entities()
|
|
813
|
+
results = self.experiment.denormalize()
|
|
814
|
+
self.assertEqual(len(entities), len(results))
|
|
815
|
+
self.assertNotEqual(entities[0].keys(), results[0].keys())
|
|
816
|
+
self.assertIn('Type', results[0])
|
|
817
|
+
self.assertTrue(entities[0]['Type'].startswith('TEST:'))
|
|
818
|
+
self.assertTrue(results[0]['Type'])
|
|
819
|
+
self.assertFalse(results[0]['Type'].startswith('TEST:'))
|
|
820
|
+
|
|
821
|
+
def test_simple_denormalization_w_entities(self):
|
|
822
|
+
entities = self.experiment.entities()
|
|
823
|
+
results = self.experiment.denormalize(heuristic=simple_denormalization_with_whole_entities)
|
|
824
|
+
self.assertEqual(len(entities), len(results))
|
|
825
|
+
self.assertLess(len(entities[0].keys()), len(results[0].keys()))
|
|
826
|
+
self.assertIn('Experiment_Project Investigator_Project_Num_fkey', results[0])
|
|
827
|
+
self.assertIsInstance(results[0]['Experiment_Project Investigator_Project_Num_fkey'], list)
|
|
828
|
+
self.assertIsInstance(results[0]['Experiment_Project Investigator_Project_Num_fkey'][0], dict)
|
|
829
|
+
self.assertIn('RID', results[0]['Experiment_Project Investigator_Project_Num_fkey'][0])
|
|
830
|
+
|
|
809
831
|
|
|
810
832
|
if __name__ == '__main__':
|
|
811
833
|
sys.exit(unittest.main())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|