deriva 1.7.0__py3-none-any.whl → 1.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva/config/annotation_config.py +2 -2
- deriva/core/__init__.py +1 -1
- deriva/core/catalog_cli.py +83 -39
- deriva/core/datapath.py +519 -26
- deriva/core/ermrest_catalog.py +103 -23
- deriva/core/ermrest_model.py +387 -7
- deriva/core/hatrac_cli.py +5 -3
- deriva/core/utils/globus_auth_utils.py +3 -1
- deriva/transfer/__init__.py +4 -2
- deriva/transfer/download/__init__.py +4 -0
- deriva/transfer/download/deriva_download.py +33 -13
- deriva/transfer/download/deriva_download_cli.py +3 -2
- deriva/transfer/download/processors/query/base_query_processor.py +9 -4
- deriva/transfer/upload/__init__.py +4 -0
- deriva/transfer/upload/deriva_upload.py +9 -2
- deriva/transfer/upload/deriva_upload_cli.py +2 -2
- {deriva-1.7.0.dist-info → deriva-1.7.3.dist-info}/METADATA +2 -2
- {deriva-1.7.0.dist-info → deriva-1.7.3.dist-info}/RECORD +23 -23
- tests/deriva/core/test_datapath.py +24 -2
- {deriva-1.7.0.dist-info → deriva-1.7.3.dist-info}/LICENSE +0 -0
- {deriva-1.7.0.dist-info → deriva-1.7.3.dist-info}/WHEEL +0 -0
- {deriva-1.7.0.dist-info → deriva-1.7.3.dist-info}/entry_points.txt +0 -0
- {deriva-1.7.0.dist-info → deriva-1.7.3.dist-info}/top_level.txt +0 -0
deriva/core/ermrest_model.py
CHANGED
|
@@ -1,7 +1,13 @@
|
|
|
1
1
|
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
2
4
|
from collections import OrderedDict
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from enum import Enum
|
|
3
7
|
import json
|
|
4
8
|
import re
|
|
9
|
+
import base64
|
|
10
|
+
import hashlib
|
|
5
11
|
|
|
6
12
|
from . import AttrDict, tag, urlquote, stob
|
|
7
13
|
|
|
@@ -17,6 +23,74 @@ class NoChange (object):
|
|
|
17
23
|
# singletone to use in APIs below
|
|
18
24
|
nochange = NoChange()
|
|
19
25
|
|
|
26
|
+
def make_id(*components):
|
|
27
|
+
"""Build an identifier that will be OK for ERMrest and Postgres.
|
|
28
|
+
|
|
29
|
+
Naively, append as '_'.join(components).
|
|
30
|
+
|
|
31
|
+
Fallback to heuristics mixing truncation with short hashes.
|
|
32
|
+
"""
|
|
33
|
+
# accept lists at top-level for convenience (compound keys, etc.)
|
|
34
|
+
expanded = []
|
|
35
|
+
for e in components:
|
|
36
|
+
if isinstance(e, list):
|
|
37
|
+
expanded.extend(e)
|
|
38
|
+
else:
|
|
39
|
+
expanded.append(e)
|
|
40
|
+
|
|
41
|
+
# prefer to use naive name as requested
|
|
42
|
+
naive_result = '_'.join(expanded)
|
|
43
|
+
naive_len = len(naive_result.encode('utf8'))
|
|
44
|
+
if naive_len <= 63:
|
|
45
|
+
return naive_result
|
|
46
|
+
|
|
47
|
+
# we'll need to truncate and hash in some way...
|
|
48
|
+
def hash(s, nbytes):
|
|
49
|
+
return base64.urlsafe_b64encode(hashlib.md5(s.encode('utf8')).digest()).decode()[0:nbytes]
|
|
50
|
+
|
|
51
|
+
def truncate(s, maxlen):
|
|
52
|
+
encoded_len = len(s.encode('utf8'))
|
|
53
|
+
# we need to chop whole (unicode) chars but test encoded byte lengths!
|
|
54
|
+
for i in range(max(1, len(s) - maxlen), len(s) - 1):
|
|
55
|
+
result = s[0:-1 * i].rstrip()
|
|
56
|
+
if len(result.encode('utf8')) <= (maxlen - 2):
|
|
57
|
+
return result + '..'
|
|
58
|
+
return s
|
|
59
|
+
|
|
60
|
+
naive_hash = hash(naive_result, 5)
|
|
61
|
+
parts = [
|
|
62
|
+
(i, expanded[i])
|
|
63
|
+
for i in range(len(expanded))
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
# try to find a solution truncating individual fields
|
|
67
|
+
for maxlen in [15, 12, 9]:
|
|
68
|
+
parts.sort(key=lambda p: (len(p[1].encode('utf8')), p[0]), reverse=True)
|
|
69
|
+
for i in range(len(parts)):
|
|
70
|
+
idx, part = parts[i]
|
|
71
|
+
if len(part.encode('utf8')) > maxlen:
|
|
72
|
+
parts[i] = (idx, truncate(part, maxlen))
|
|
73
|
+
candidate_result = '_'.join([
|
|
74
|
+
p[1]
|
|
75
|
+
for p in sorted(parts, key=lambda p: p[0])
|
|
76
|
+
] + [naive_hash])
|
|
77
|
+
if len(candidate_result.encode('utf8')) < 63:
|
|
78
|
+
return candidate_result
|
|
79
|
+
|
|
80
|
+
# fallback to truncating original naive name
|
|
81
|
+
# try to preserve suffix and trim in middle
|
|
82
|
+
result = ''.join([
|
|
83
|
+
truncate(naive_result, len(naive_result)//3),
|
|
84
|
+
naive_result[-len(naive_result)//3:],
|
|
85
|
+
'_',
|
|
86
|
+
naive_hash
|
|
87
|
+
])
|
|
88
|
+
if len(result.encode('utf8')) <= 63:
|
|
89
|
+
return result
|
|
90
|
+
|
|
91
|
+
# last-ditch (e.g. multibyte unicode suffix worst case)
|
|
92
|
+
return truncate(naive_result, 55) + naive_hash
|
|
93
|
+
|
|
20
94
|
def presence_annotation(tag_uri):
|
|
21
95
|
"""Decorator to establish property getter/setter/deleter for presence annotations.
|
|
22
96
|
|
|
@@ -684,6 +758,15 @@ class KeyedList (list):
|
|
|
684
758
|
list.append(self, e)
|
|
685
759
|
self.elements[e.name] = e
|
|
686
760
|
|
|
761
|
+
class FindAssociationResult (object):
|
|
762
|
+
"""Wrapper for results of Table.find_associations()"""
|
|
763
|
+
def __init__(self, table, self_fkey, other_fkeys):
|
|
764
|
+
self.table = table
|
|
765
|
+
self.name = table.name
|
|
766
|
+
self.schema = table.schema
|
|
767
|
+
self.self_fkey = self_fkey
|
|
768
|
+
self.other_fkeys = other_fkeys
|
|
769
|
+
|
|
687
770
|
class Table (object):
|
|
688
771
|
"""Named table.
|
|
689
772
|
"""
|
|
@@ -1075,6 +1158,184 @@ class Table (object):
|
|
|
1075
1158
|
provide_system
|
|
1076
1159
|
)
|
|
1077
1160
|
|
|
1161
|
+
@classmethod
|
|
1162
|
+
def define_association(
|
|
1163
|
+
cls,
|
|
1164
|
+
associates: Iterable[Key | Table | tuple[str, Key | Table]],
|
|
1165
|
+
metadata: Iterable[Key | Table | dict | tuple[str, bool, Key | Table]] = [],
|
|
1166
|
+
table_name: str | None = None,
|
|
1167
|
+
comment: str | None = None,
|
|
1168
|
+
provide_system: bool = True) -> dict:
|
|
1169
|
+
"""Build an association table definition.
|
|
1170
|
+
|
|
1171
|
+
:param associates: the existing Key instances being associated
|
|
1172
|
+
:param metadata: additional metadata fields for impure associations
|
|
1173
|
+
:param table_name: name for the association table or None for default naming
|
|
1174
|
+
:param comment: comment for the association table or None for default comment
|
|
1175
|
+
:param provide_system: add ERMrest system columns when True
|
|
1176
|
+
|
|
1177
|
+
This is a utility function to help build an association table
|
|
1178
|
+
definition. It simplifies the task, but removes some
|
|
1179
|
+
control. For full customization, consider using Table.define()
|
|
1180
|
+
directly instead.
|
|
1181
|
+
|
|
1182
|
+
A normal ("pure") N-ary association is a table with N foreign
|
|
1183
|
+
keys referencing N primary keys in referenced tables, with a
|
|
1184
|
+
composite primary key covering the N foreign keys. These pure
|
|
1185
|
+
association tables manage a set of distinct combinations of
|
|
1186
|
+
the associated foreign key values.
|
|
1187
|
+
|
|
1188
|
+
An "impure" association table adds additional metadata
|
|
1189
|
+
alongside the N foreign keys.
|
|
1190
|
+
|
|
1191
|
+
The "associates" parameter takes an iterable of Key instances
|
|
1192
|
+
from other tables. The association will be comprised of
|
|
1193
|
+
foreign keys referencing these associates. Optionally, a tuple
|
|
1194
|
+
of (str, Key) can supply a string _base name_ to influence how
|
|
1195
|
+
the foreign key columns and constraint will be named in the
|
|
1196
|
+
new association table. A bare Key instance will get a base
|
|
1197
|
+
name derived from the referenced table name.
|
|
1198
|
+
|
|
1199
|
+
The "metadata" parameter takes an iterable of plain dict
|
|
1200
|
+
column definitions or Key instances. Each dict must be a
|
|
1201
|
+
scalar column definition, such as produced by the
|
|
1202
|
+
Column.define() class method. Key instance will cause
|
|
1203
|
+
corresponding columns and foreign keys to be added to the
|
|
1204
|
+
association table to act as metadata. Optionally, a tuple of
|
|
1205
|
+
(str, bool, Key) can supply a string _base name_ and a boolean
|
|
1206
|
+
_nullok_ property to influence how the foreign key columns and
|
|
1207
|
+
constraint will be constructed and named. A bare Key instance
|
|
1208
|
+
will get a base name derived from the referened table name,
|
|
1209
|
+
and presumed as nullok=False.
|
|
1210
|
+
|
|
1211
|
+
If a Table instance is supplied instead of a Key instance for
|
|
1212
|
+
associates or metadata inputs, an attempt will be made to
|
|
1213
|
+
locate a key based on the RID system column. If this key
|
|
1214
|
+
cannot be found, a KeyError will be raised.
|
|
1215
|
+
|
|
1216
|
+
"""
|
|
1217
|
+
associates = list(associates)
|
|
1218
|
+
metadata = list(metadata)
|
|
1219
|
+
|
|
1220
|
+
if len(associates) < 2:
|
|
1221
|
+
raise ValueError('An association table requires at least 2 associates')
|
|
1222
|
+
|
|
1223
|
+
cdefs = []
|
|
1224
|
+
kdefs = []
|
|
1225
|
+
fkdefs = []
|
|
1226
|
+
|
|
1227
|
+
used_names = set()
|
|
1228
|
+
|
|
1229
|
+
def check_basename(basename):
|
|
1230
|
+
if not isinstance(base_name, str):
|
|
1231
|
+
raise TypeError('Base name %r is not of required type str' % (base_name,))
|
|
1232
|
+
if base_name in used_names:
|
|
1233
|
+
raise ValueError('Base name %r is not unique among associates and metadata' % (base_name,))
|
|
1234
|
+
used_names.add(base_name)
|
|
1235
|
+
|
|
1236
|
+
def choose_basename(key):
|
|
1237
|
+
base_name = key.table.name
|
|
1238
|
+
n = 2
|
|
1239
|
+
while base_name in used_names:
|
|
1240
|
+
base_name = '%s%d' % (key.table.name, n)
|
|
1241
|
+
n += 1
|
|
1242
|
+
used_names.add(base_name)
|
|
1243
|
+
return base_name
|
|
1244
|
+
|
|
1245
|
+
def check_key(key):
|
|
1246
|
+
if isinstance(key, Table):
|
|
1247
|
+
return key.key_by_columns(["RID"])
|
|
1248
|
+
return key
|
|
1249
|
+
|
|
1250
|
+
# check and normalize associates into list[(str, Key)] with distinct base names
|
|
1251
|
+
for i in range(len(associates)):
|
|
1252
|
+
if isinstance(associates[i], tuple):
|
|
1253
|
+
base_name, key = associates[i]
|
|
1254
|
+
check_basename(base_name)
|
|
1255
|
+
key = check_key(key)
|
|
1256
|
+
associates[i] = (base_name, key)
|
|
1257
|
+
else:
|
|
1258
|
+
key = check_key(associates[i])
|
|
1259
|
+
base_name = choose_basename(key)
|
|
1260
|
+
associates[i] = (base_name, key)
|
|
1261
|
+
|
|
1262
|
+
# build assoc table name if not provided
|
|
1263
|
+
if table_name is None:
|
|
1264
|
+
table_name = make_id(*[ assoc[1].table.name for assoc in associates ])
|
|
1265
|
+
|
|
1266
|
+
def simplify_type(ctype):
|
|
1267
|
+
if ctype.is_domain and ctype.typename.startswith('ermrest_'):
|
|
1268
|
+
return ctype.base_type
|
|
1269
|
+
|
|
1270
|
+
return ctype
|
|
1271
|
+
|
|
1272
|
+
def cdefs_for_key(base_name, key, nullok=False):
|
|
1273
|
+
return [
|
|
1274
|
+
Column.define(
|
|
1275
|
+
'%s_%s' % (base_name, col.name) if len(key.unique_columns) > 1 else base_name,
|
|
1276
|
+
simplify_type(col.type),
|
|
1277
|
+
nullok=nullok,
|
|
1278
|
+
)
|
|
1279
|
+
for col in key.unique_columns
|
|
1280
|
+
]
|
|
1281
|
+
|
|
1282
|
+
def fkdef_for_key(base_name, key):
|
|
1283
|
+
return ForeignKey.define(
|
|
1284
|
+
[
|
|
1285
|
+
'%s_%s' % (base_name, col.name) if len(key.unique_columns) > 1 else base_name
|
|
1286
|
+
for col in key.unique_columns
|
|
1287
|
+
],
|
|
1288
|
+
key.table.schema.name,
|
|
1289
|
+
key.table.name,
|
|
1290
|
+
[ col.name for col in key.unique_columns ],
|
|
1291
|
+
on_update='CASCADE',
|
|
1292
|
+
on_delete='CASCADE',
|
|
1293
|
+
constraint_name=make_id(table_name, base_name, 'fkey'),
|
|
1294
|
+
)
|
|
1295
|
+
|
|
1296
|
+
# build core association definition (i.e. the "pure" parts)
|
|
1297
|
+
k_cnames = []
|
|
1298
|
+
for base_name, key in associates:
|
|
1299
|
+
cdefs.extend(cdefs_for_key(base_name, key))
|
|
1300
|
+
fkdefs.append(fkdef_for_key(base_name, key))
|
|
1301
|
+
|
|
1302
|
+
k_cnames.extend([
|
|
1303
|
+
'%s_%s' % (base_name, col.name) if len(key.unique_columns) > 1 else base_name
|
|
1304
|
+
for col in key.unique_columns
|
|
1305
|
+
])
|
|
1306
|
+
|
|
1307
|
+
kdefs.append(
|
|
1308
|
+
Key.define(
|
|
1309
|
+
k_cnames,
|
|
1310
|
+
constraint_name=make_id(table_name, 'assoc', 'key'),
|
|
1311
|
+
)
|
|
1312
|
+
)
|
|
1313
|
+
|
|
1314
|
+
# check and normalize metadata into list[dict | (str, bool, Key)]
|
|
1315
|
+
for i in range(len(metadata)):
|
|
1316
|
+
if isinstance(metadata[i], tuple):
|
|
1317
|
+
base_name, nullok, key = metadata[i]
|
|
1318
|
+
check_basename(base_name)
|
|
1319
|
+
key = check_key(key)
|
|
1320
|
+
metadata[i] = (base_name, nullok, key)
|
|
1321
|
+
elif isinstance(metadata[i], dict):
|
|
1322
|
+
pass
|
|
1323
|
+
else:
|
|
1324
|
+
key = check_key(metadata[i])
|
|
1325
|
+
base_name = choose_basename(key)
|
|
1326
|
+
metadata[i] = (base_name, False, key)
|
|
1327
|
+
|
|
1328
|
+
# add metadata to definition
|
|
1329
|
+
for md in metadata:
|
|
1330
|
+
if isinstance(md, dict):
|
|
1331
|
+
cdefs.append(md)
|
|
1332
|
+
else:
|
|
1333
|
+
base_name, nullok, key = md
|
|
1334
|
+
cdefs.extend(cdefs_for_key(base_name, key, nullok))
|
|
1335
|
+
fkdefs.append(fkdef_for_key(base_name, key))
|
|
1336
|
+
|
|
1337
|
+
return Table.define(table_name, cdefs, kdefs, fkdefs, comment=comment, provide_system=provide_system)
|
|
1338
|
+
|
|
1078
1339
|
def prejson(self, prune=True):
|
|
1079
1340
|
return {
|
|
1080
1341
|
"schema_name": self.schema.name,
|
|
@@ -1348,7 +1609,7 @@ class Table (object):
|
|
|
1348
1609
|
if raise_nomatch:
|
|
1349
1610
|
raise KeyError(from_to_map)
|
|
1350
1611
|
|
|
1351
|
-
def is_association(self, min_arity=2, max_arity=2, unqualified=True, pure=True, no_overlap=True):
|
|
1612
|
+
def is_association(self, min_arity=2, max_arity=2, unqualified=True, pure=True, no_overlap=True, return_fkeys=False):
|
|
1352
1613
|
"""Return (truthy) integer arity if self is a matching association, else False.
|
|
1353
1614
|
|
|
1354
1615
|
min_arity: minimum number of associated fkeys (default 2)
|
|
@@ -1356,6 +1617,7 @@ class Table (object):
|
|
|
1356
1617
|
unqualified: reject qualified associations when True (default True)
|
|
1357
1618
|
pure: reject impure assocations when True (default True)
|
|
1358
1619
|
no_overlap: reject overlapping associations when True (default True)
|
|
1620
|
+
return_fkeys: return the set of N associated ForeignKeys if True
|
|
1359
1621
|
|
|
1360
1622
|
The default behavior with no arguments is to test for pure,
|
|
1361
1623
|
unqualified, non-overlapping, binary assocations.
|
|
@@ -1444,9 +1706,43 @@ class Table (object):
|
|
|
1444
1706
|
# reject: impure association
|
|
1445
1707
|
return False
|
|
1446
1708
|
|
|
1447
|
-
# return (truthy) arity
|
|
1448
|
-
|
|
1709
|
+
# return (truthy) arity or fkeys
|
|
1710
|
+
if return_fkeys:
|
|
1711
|
+
return covered_fkeys
|
|
1712
|
+
else:
|
|
1713
|
+
return len(covered_fkeys)
|
|
1714
|
+
|
|
1715
|
+
def find_associations(self, min_arity=2, max_arity=2, unqualified=True, pure=True, no_overlap=True) -> Iterable[FindAssociationResult]:
|
|
1716
|
+
"""Yield (iterable) Association objects linking to this table and meeting all criteria.
|
|
1449
1717
|
|
|
1718
|
+
min_arity: minimum number of associated fkeys (default 2)
|
|
1719
|
+
max_arity: maximum number of associated fkeys (default 2) or None
|
|
1720
|
+
unqualified: reject qualified associations when True (default True)
|
|
1721
|
+
pure: reject impure assocations when True (default True)
|
|
1722
|
+
no_overlap: reject overlapping associations when True (default True)
|
|
1723
|
+
|
|
1724
|
+
See documentation for sibling method Table.is_association(...)
|
|
1725
|
+
for more explanation of these association detection criteria.
|
|
1726
|
+
|
|
1727
|
+
"""
|
|
1728
|
+
peer_tables = set()
|
|
1729
|
+
for fkey in self.referenced_by:
|
|
1730
|
+
peer = fkey.table
|
|
1731
|
+
if peer in peer_tables:
|
|
1732
|
+
# check each peer only once
|
|
1733
|
+
continue
|
|
1734
|
+
peer_tables.add(peer)
|
|
1735
|
+
answer = peer.is_association(min_arity=min_arity, max_arity=max_arity, unqualified=unqualified, pure=pure, no_overlap=no_overlap, return_fkeys=True)
|
|
1736
|
+
if answer:
|
|
1737
|
+
answer = set(answer)
|
|
1738
|
+
for fkey in answer:
|
|
1739
|
+
if fkey.pk_table == self:
|
|
1740
|
+
answer.remove(fkey)
|
|
1741
|
+
yield FindAssociationResult(peer, fkey, answer)
|
|
1742
|
+
# arbitrarily choose first fkey to self
|
|
1743
|
+
# in case association is back to same table
|
|
1744
|
+
break
|
|
1745
|
+
|
|
1450
1746
|
@presence_annotation(tag.immutable)
|
|
1451
1747
|
def immutable(self): pass
|
|
1452
1748
|
|
|
@@ -1495,6 +1791,40 @@ class Table (object):
|
|
|
1495
1791
|
@object_annotation(tag.viz_3d_display)
|
|
1496
1792
|
def viz_3d_display(self): pass
|
|
1497
1793
|
|
|
1794
|
+
class Quantifier (str, Enum):
|
|
1795
|
+
"""Logic quantifiers"""
|
|
1796
|
+
any = 'any'
|
|
1797
|
+
all = 'all'
|
|
1798
|
+
|
|
1799
|
+
def find_tables_with_foreign_keys(target_tables: Iterable[Table], quantifier: Quantifier=Quantifier.all) -> set[Table]:
|
|
1800
|
+
"""Return set of tables with foreign key references to target tables.
|
|
1801
|
+
|
|
1802
|
+
:param target_tables: an iterable of ermrest_model.Table instances
|
|
1803
|
+
:param quantifier: one of the Quantifiers 'any' or 'all' (default 'all')
|
|
1804
|
+
|
|
1805
|
+
Each returned Table instance will be a table that references the
|
|
1806
|
+
targets according to the selected quantifier. A reference is a
|
|
1807
|
+
direct foreign key in the returned table that refers to a primary
|
|
1808
|
+
key of the target table.
|
|
1809
|
+
|
|
1810
|
+
- quantifier==all: a returned table references ALL targets
|
|
1811
|
+
- quantifier==any: a returned table references AT LEAST ONE target
|
|
1812
|
+
|
|
1813
|
+
For proper function, all target_tables instances MUST come from
|
|
1814
|
+
the same root Model instance hierarchy.
|
|
1815
|
+
|
|
1816
|
+
"""
|
|
1817
|
+
candidates = None
|
|
1818
|
+
for table in target_tables:
|
|
1819
|
+
referring = { fkey.table for fkey in table.referenced_by }
|
|
1820
|
+
if candidates is None:
|
|
1821
|
+
candidates = referring
|
|
1822
|
+
elif quantifier == Quantifier.all:
|
|
1823
|
+
candidates.intersection_update(referring)
|
|
1824
|
+
else:
|
|
1825
|
+
candidates.update(referring)
|
|
1826
|
+
return candidates
|
|
1827
|
+
|
|
1498
1828
|
class Column (object):
|
|
1499
1829
|
"""Named column.
|
|
1500
1830
|
"""
|
|
@@ -1696,7 +2026,6 @@ class Column (object):
|
|
|
1696
2026
|
|
|
1697
2027
|
@object_annotation(tag.column_display)
|
|
1698
2028
|
def column_display(self): pass
|
|
1699
|
-
|
|
1700
2029
|
|
|
1701
2030
|
def _constraint_name_parts(constraint, doc):
|
|
1702
2031
|
# modern systems should have 0 or 1 names here
|
|
@@ -1781,10 +2110,29 @@ class Key (object):
|
|
|
1781
2110
|
}
|
|
1782
2111
|
|
|
1783
2112
|
@classmethod
|
|
1784
|
-
def define(cls, colnames, constraint_names=[], comment=None, annotations={}):
|
|
1785
|
-
"""Build a key definition.
|
|
2113
|
+
def define(cls, colnames, constraint_names=[], comment=None, annotations={}, constraint_name=None):
|
|
2114
|
+
"""Build a key definition.
|
|
2115
|
+
|
|
2116
|
+
:param colnames: List of names of columns participating in the key
|
|
2117
|
+
:param constraint_names: Legacy input [ [ schema_name, constraint_name ] ] (for API backwards-compatibility)
|
|
2118
|
+
:param comment: Comment string
|
|
2119
|
+
:param annotations: Dictionary of { annotation_uri: annotation_value, ... }
|
|
2120
|
+
:param constraint_name: Constraint name string
|
|
2121
|
+
|
|
2122
|
+
The constraint_name kwarg takes a bare constraint name string
|
|
2123
|
+
and acts the same as setting the legacy constraint_names kwarg
|
|
2124
|
+
to: [ [ "placeholder", constraint_name ] ]. This odd syntax
|
|
2125
|
+
is for backwards-compatibility with earlier API versions, and
|
|
2126
|
+
mirrors the structure of constraint names in ERMrest model
|
|
2127
|
+
description outputs. In those outputs, the "placeholder" field
|
|
2128
|
+
contains the schema name of the table containing the
|
|
2129
|
+
constraint.
|
|
2130
|
+
|
|
2131
|
+
"""
|
|
1786
2132
|
if not isinstance(colnames, list):
|
|
1787
2133
|
raise TypeError('Colnames should be a list.')
|
|
2134
|
+
if constraint_name is not None:
|
|
2135
|
+
constraint_names = [ [ "placeholder", constraint_name ] ]
|
|
1788
2136
|
return {
|
|
1789
2137
|
'unique_columns': list(colnames),
|
|
1790
2138
|
'names': constraint_names,
|
|
@@ -1983,9 +2331,41 @@ class ForeignKey (object):
|
|
|
1983
2331
|
}
|
|
1984
2332
|
|
|
1985
2333
|
@classmethod
|
|
1986
|
-
def define(cls, fk_colnames, pk_sname, pk_tname, pk_colnames, on_update='NO ACTION', on_delete='NO ACTION', constraint_names=[], comment=None, acls={}, acl_bindings={}, annotations={}):
|
|
2334
|
+
def define(cls, fk_colnames, pk_sname, pk_tname, pk_colnames, on_update='NO ACTION', on_delete='NO ACTION', constraint_names=[], comment=None, acls={}, acl_bindings={}, annotations={}, constraint_name=None):
|
|
2335
|
+
"""Define a foreign key.
|
|
2336
|
+
|
|
2337
|
+
:param fk_colnames: List of column names participating in the foreign key
|
|
2338
|
+
:param pk_sname: Schema name string of the referenced primary key
|
|
2339
|
+
:param pk_tname: Table name string of the referenced primary key
|
|
2340
|
+
:param pk_colnames: List of column names participating in the referenced primary key
|
|
2341
|
+
:param on_update: Constraint behavior when referenced primary keys are updated
|
|
2342
|
+
:param on_update: Constraint behavior when referenced primary keys are deleted
|
|
2343
|
+
:param constraint_names: Legacy input [ [ schema_name, constraint_name ] ] (for API backwards-compatibility)
|
|
2344
|
+
:param comment: Comment string
|
|
2345
|
+
:param acls: Dictionary of { acl_name: acl, ... }
|
|
2346
|
+
:param acl_bindings: Dictionary of { binding_name: acl_binding, ... }
|
|
2347
|
+
:param annotations: Dictionary of { annotation_uri: annotation_value, ... }
|
|
2348
|
+
:param constraint_name: Constraint name string
|
|
2349
|
+
|
|
2350
|
+
The contraint behavior values for on_update and on_delete must
|
|
2351
|
+
be one of the following literal strings:
|
|
2352
|
+
|
|
2353
|
+
'NO ACTION', 'RESTRICT', 'CASCADE', 'SET NULL', 'SET DEFAULT'
|
|
2354
|
+
|
|
2355
|
+
The constraint_name kwarg takes a bare constraint name string
|
|
2356
|
+
and acts the same as setting the legacy constraint_names kwarg
|
|
2357
|
+
to: [ [ "placeholder", constraint_name ] ]. This odd syntax
|
|
2358
|
+
is for backwards-compatibility with earlier API versions, and
|
|
2359
|
+
mirrors the structure of constraint names in ERMrest model
|
|
2360
|
+
description outputs. In those outputs, the "placeholder" field
|
|
2361
|
+
contains the schema name of the table containing the
|
|
2362
|
+
constraint.
|
|
2363
|
+
|
|
2364
|
+
"""
|
|
1987
2365
|
if len(fk_colnames) != len(pk_colnames):
|
|
1988
2366
|
raise ValueError('The fk_colnames and pk_colnames lists must have the same length.')
|
|
2367
|
+
if constraint_name is not None:
|
|
2368
|
+
constraint_names = [ [ "placeholder", constraint_name ], ]
|
|
1989
2369
|
return {
|
|
1990
2370
|
'foreign_key_columns': [
|
|
1991
2371
|
{
|
deriva/core/hatrac_cli.py
CHANGED
|
@@ -185,14 +185,14 @@ class DerivaHatracCLI (BaseCLI):
|
|
|
185
185
|
|
|
186
186
|
try:
|
|
187
187
|
acls = self.store.get_acl(self.resource, args.access, args.role)
|
|
188
|
+
if acls is None:
|
|
189
|
+
raise DerivaHatracCLIException('No such object or namespace or ACL entry')
|
|
188
190
|
for access in acls:
|
|
189
191
|
print("%s:" % access)
|
|
190
192
|
for role in acls.get(access, []):
|
|
191
193
|
print(" %s" % role)
|
|
192
194
|
except HTTPError as e:
|
|
193
|
-
if e.response.status_code == requests.codes.
|
|
194
|
-
raise ResourceException('No such object or namespace or ACL entry', e)
|
|
195
|
-
elif e.response.status_code == requests.codes.bad_request:
|
|
195
|
+
if e.response.status_code == requests.codes.bad_request:
|
|
196
196
|
raise ResourceException('Invalid ACL name %s' % args.access, e)
|
|
197
197
|
else:
|
|
198
198
|
raise e
|
|
@@ -316,6 +316,8 @@ class DerivaHatracCLI (BaseCLI):
|
|
|
316
316
|
except HatracHashMismatch as e:
|
|
317
317
|
logging.debug(format_exception(e))
|
|
318
318
|
eprint(_resource_error_message('Checksum verification failed'))
|
|
319
|
+
except DerivaHatracCLIException as e:
|
|
320
|
+
eprint(e)
|
|
319
321
|
except RuntimeError as e:
|
|
320
322
|
logging.debug(format_exception(e))
|
|
321
323
|
eprint('Unexpected runtime error occurred')
|
|
@@ -52,7 +52,7 @@ class GlobusAuthUtil:
|
|
|
52
52
|
client_id = kwargs.get("client_id")
|
|
53
53
|
client_secret = kwargs.get("client_secret")
|
|
54
54
|
if not (client_id and client_secret):
|
|
55
|
-
cred_file = kwargs.get("credential_file", CLIENT_CRED_FILE)
|
|
55
|
+
cred_file = kwargs.get("credential_file", CLIENT_CRED_FILE) or CLIENT_CRED_FILE
|
|
56
56
|
if os.path.isfile(cred_file):
|
|
57
57
|
creds = read_config(cred_file)
|
|
58
58
|
if creds:
|
|
@@ -60,6 +60,8 @@ class GlobusAuthUtil:
|
|
|
60
60
|
if client:
|
|
61
61
|
client_id = client.get('client_id')
|
|
62
62
|
client_secret = client.get('client_secret')
|
|
63
|
+
else:
|
|
64
|
+
logging.warning("No Globus client credential file found at: %s" % cred_file)
|
|
63
65
|
|
|
64
66
|
if not (client_id and client_secret):
|
|
65
67
|
logging.warning("Client ID and secret not specified and/or could not be determined.")
|
deriva/transfer/__init__.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from deriva.transfer.download.deriva_download import DerivaDownload, GenericDownloader, DerivaDownloadError, \
|
|
2
|
-
DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError
|
|
2
|
+
DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, \
|
|
3
|
+
DerivaDownloadBaggingError
|
|
3
4
|
from deriva.transfer.download.deriva_download_cli import DerivaDownloadCLI
|
|
4
5
|
|
|
5
6
|
from deriva.transfer.upload.deriva_upload import DerivaUpload, GenericUploader, DerivaUploadError, DerivaUploadError, \
|
|
6
|
-
DerivaUploadConfigurationError, DerivaUploadCatalogCreateError, DerivaUploadCatalogUpdateError
|
|
7
|
+
DerivaUploadConfigurationError, DerivaUploadCatalogCreateError, DerivaUploadCatalogUpdateError, \
|
|
8
|
+
DerivaUploadAuthenticationError
|
|
7
9
|
from deriva.transfer.upload.deriva_upload_cli import DerivaUploadCLI
|
|
8
10
|
|
|
9
11
|
from deriva.transfer.backup.deriva_backup import DerivaBackup, DerivaBackupAuthenticationError, \
|
|
@@ -7,6 +7,7 @@ import platform
|
|
|
7
7
|
import requests
|
|
8
8
|
from requests.exceptions import HTTPError
|
|
9
9
|
from bdbag import bdbag_api as bdb, bdbag_ro as ro, BAG_PROFILE_TAG, BDBAG_RO_PROFILE_ID
|
|
10
|
+
from bdbag.bdbagit import BagValidationError
|
|
10
11
|
from deriva.core import ErmrestCatalog, HatracStore, format_exception, get_credential, format_credential, read_config, \
|
|
11
12
|
stob, Megabyte, __version__ as VERSION
|
|
12
13
|
from deriva.core.utils.version_utils import get_installed_version
|
|
@@ -14,7 +15,11 @@ from deriva.transfer.download.processors import find_query_processor, find_trans
|
|
|
14
15
|
from deriva.transfer.download.processors.base_processor import LOCAL_PATH_KEY, REMOTE_PATHS_KEY, SERVICE_URL_KEY, \
|
|
15
16
|
FILE_SIZE_KEY
|
|
16
17
|
from deriva.transfer.download import DerivaDownloadError, DerivaDownloadConfigurationError, \
|
|
17
|
-
DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError
|
|
18
|
+
DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, \
|
|
19
|
+
DerivaDownloadBaggingError
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
class DerivaDownload(object):
|
|
@@ -43,7 +48,7 @@ class DerivaDownload(object):
|
|
|
43
48
|
info = "%s v%s [Python %s, %s]" % (
|
|
44
49
|
self.__class__.__name__, get_installed_version(VERSION),
|
|
45
50
|
platform.python_version(), platform.platform(aliased=True))
|
|
46
|
-
|
|
51
|
+
logger.info("Initializing downloader: %s" % info)
|
|
47
52
|
|
|
48
53
|
if not self.server:
|
|
49
54
|
raise DerivaDownloadConfigurationError("Server not specified!")
|
|
@@ -145,12 +150,12 @@ class DerivaDownload(object):
|
|
|
145
150
|
try:
|
|
146
151
|
if not self.credentials:
|
|
147
152
|
self.set_credentials(get_credential(self.hostname))
|
|
148
|
-
|
|
153
|
+
logger.info("Validating credentials for host: %s" % self.hostname)
|
|
149
154
|
attributes = self.catalog.get_authn_session().json()
|
|
150
155
|
identity = attributes["client"]
|
|
151
156
|
except HTTPError as he:
|
|
152
157
|
if he.response.status_code == 404:
|
|
153
|
-
|
|
158
|
+
logger.info("No existing login session found for host: %s" % self.hostname)
|
|
154
159
|
except Exception as e:
|
|
155
160
|
raise DerivaDownloadAuthenticationError("Unable to validate credentials: %s" % format_exception(e))
|
|
156
161
|
wallet = kwargs.get("wallet", {})
|
|
@@ -160,6 +165,7 @@ class DerivaDownload(object):
|
|
|
160
165
|
bag_archiver = None
|
|
161
166
|
bag_algorithms = None
|
|
162
167
|
bag_idempotent = False
|
|
168
|
+
bag_strict = True
|
|
163
169
|
bag_config = self.config.get('bag')
|
|
164
170
|
create_bag = True if bag_config else False
|
|
165
171
|
if create_bag:
|
|
@@ -171,7 +177,8 @@ class DerivaDownload(object):
|
|
|
171
177
|
bag_idempotent = stob(bag_config.get('bag_idempotent', False))
|
|
172
178
|
bag_metadata = bag_config.get('bag_metadata', {"Internal-Sender-Identifier":
|
|
173
179
|
"deriva@%s" % self.server_url})
|
|
174
|
-
bag_ro = create_bag and not bag_idempotent and stob(bag_config.get('bag_ro',
|
|
180
|
+
bag_ro = create_bag and not bag_idempotent and stob(bag_config.get('bag_ro', True))
|
|
181
|
+
bag_strict = stob(bag_config.get('bag_strict', True))
|
|
175
182
|
if create_bag:
|
|
176
183
|
bdb.ensure_bag_path_exists(bag_path)
|
|
177
184
|
bag = bdb.make_bag(bag_path, algs=bag_algorithms, metadata=bag_metadata, idempotent=bag_idempotent)
|
|
@@ -211,12 +218,13 @@ class DerivaDownload(object):
|
|
|
211
218
|
allow_anonymous=self.allow_anonymous,
|
|
212
219
|
timeout=self.timeout)
|
|
213
220
|
outputs = processor.process()
|
|
221
|
+
assert outputs is not None
|
|
214
222
|
if processor.should_abort():
|
|
215
223
|
raise DerivaDownloadTimeoutError("Timeout (%s seconds) waiting for processor [%s] to complete." %
|
|
216
224
|
(self.timeout_secs, processor_name))
|
|
217
225
|
self.check_payload_size(outputs)
|
|
218
226
|
except Exception as e:
|
|
219
|
-
|
|
227
|
+
logger.error(format_exception(e))
|
|
220
228
|
if create_bag:
|
|
221
229
|
bdb.cleanup_bag(bag_path)
|
|
222
230
|
if remote_file_manifest and os.path.isfile(remote_file_manifest):
|
|
@@ -270,16 +278,27 @@ class DerivaDownload(object):
|
|
|
270
278
|
remote_file_manifest=remote_file_manifest
|
|
271
279
|
if (remote_file_manifest and os.path.getsize(remote_file_manifest) > 0) else None,
|
|
272
280
|
update=True,
|
|
273
|
-
idempotent=bag_idempotent
|
|
281
|
+
idempotent=bag_idempotent,
|
|
282
|
+
strict=bag_strict)
|
|
283
|
+
except BagValidationError as bve:
|
|
284
|
+
msg = "Unable to validate bag.%s Error: %s" % (
|
|
285
|
+
"" if not bag_strict else
|
|
286
|
+
" Strict checking has been enabled, which most likely means that this bag "
|
|
287
|
+
"is empty (has no payload files or fetch references) and therefore invalid.",
|
|
288
|
+
format_exception(bve))
|
|
289
|
+
logger.error(msg)
|
|
290
|
+
bdb.cleanup_bag(bag_path)
|
|
291
|
+
raise DerivaDownloadBaggingError(msg)
|
|
274
292
|
except Exception as e:
|
|
275
|
-
|
|
293
|
+
msg = "Unhandled exception while updating bag manifests: %s" % format_exception(e)
|
|
294
|
+
logger.error(msg)
|
|
276
295
|
bdb.cleanup_bag(bag_path)
|
|
277
|
-
raise
|
|
296
|
+
raise DerivaDownloadBaggingError(msg)
|
|
278
297
|
finally:
|
|
279
298
|
if remote_file_manifest and os.path.isfile(remote_file_manifest):
|
|
280
299
|
os.remove(remote_file_manifest)
|
|
281
300
|
|
|
282
|
-
|
|
301
|
+
logger.info('Created bag: %s' % bag_path)
|
|
283
302
|
|
|
284
303
|
if bag_archiver is not None:
|
|
285
304
|
try:
|
|
@@ -289,8 +308,9 @@ class DerivaDownload(object):
|
|
|
289
308
|
bdb.cleanup_bag(bag_path)
|
|
290
309
|
outputs = {os.path.basename(archive): {LOCAL_PATH_KEY: archive}}
|
|
291
310
|
except Exception as e:
|
|
292
|
-
|
|
293
|
-
|
|
311
|
+
msg = "Exception while creating data bag archive: %s" % format_exception(e)
|
|
312
|
+
logger.error(msg)
|
|
313
|
+
raise DerivaDownloadBaggingError(msg)
|
|
294
314
|
else:
|
|
295
315
|
outputs = {os.path.basename(bag_path): {LOCAL_PATH_KEY: bag_path}}
|
|
296
316
|
|
|
@@ -318,7 +338,7 @@ class DerivaDownload(object):
|
|
|
318
338
|
(self.timeout_secs, processor_name))
|
|
319
339
|
self.check_payload_size(outputs)
|
|
320
340
|
except Exception as e:
|
|
321
|
-
|
|
341
|
+
logger.error(format_exception(e))
|
|
322
342
|
raise
|
|
323
343
|
|
|
324
344
|
return outputs
|