deriva 1.7.0__py3-none-any.whl → 1.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,13 @@
1
1
 
2
+ from __future__ import annotations
3
+
2
4
  from collections import OrderedDict
5
+ from collections.abc import Iterable
6
+ from enum import Enum
3
7
  import json
4
8
  import re
9
+ import base64
10
+ import hashlib
5
11
 
6
12
  from . import AttrDict, tag, urlquote, stob
7
13
 
@@ -17,6 +23,74 @@ class NoChange (object):
17
23
  # singletone to use in APIs below
18
24
  nochange = NoChange()
19
25
 
26
+ def make_id(*components):
27
+ """Build an identifier that will be OK for ERMrest and Postgres.
28
+
29
+ Naively, append as '_'.join(components).
30
+
31
+ Fallback to heuristics mixing truncation with short hashes.
32
+ """
33
+ # accept lists at top-level for convenience (compound keys, etc.)
34
+ expanded = []
35
+ for e in components:
36
+ if isinstance(e, list):
37
+ expanded.extend(e)
38
+ else:
39
+ expanded.append(e)
40
+
41
+ # prefer to use naive name as requested
42
+ naive_result = '_'.join(expanded)
43
+ naive_len = len(naive_result.encode('utf8'))
44
+ if naive_len <= 63:
45
+ return naive_result
46
+
47
+ # we'll need to truncate and hash in some way...
48
+ def hash(s, nbytes):
49
+ return base64.urlsafe_b64encode(hashlib.md5(s.encode('utf8')).digest()).decode()[0:nbytes]
50
+
51
+ def truncate(s, maxlen):
52
+ encoded_len = len(s.encode('utf8'))
53
+ # we need to chop whole (unicode) chars but test encoded byte lengths!
54
+ for i in range(max(1, len(s) - maxlen), len(s) - 1):
55
+ result = s[0:-1 * i].rstrip()
56
+ if len(result.encode('utf8')) <= (maxlen - 2):
57
+ return result + '..'
58
+ return s
59
+
60
+ naive_hash = hash(naive_result, 5)
61
+ parts = [
62
+ (i, expanded[i])
63
+ for i in range(len(expanded))
64
+ ]
65
+
66
+ # try to find a solution truncating individual fields
67
+ for maxlen in [15, 12, 9]:
68
+ parts.sort(key=lambda p: (len(p[1].encode('utf8')), p[0]), reverse=True)
69
+ for i in range(len(parts)):
70
+ idx, part = parts[i]
71
+ if len(part.encode('utf8')) > maxlen:
72
+ parts[i] = (idx, truncate(part, maxlen))
73
+ candidate_result = '_'.join([
74
+ p[1]
75
+ for p in sorted(parts, key=lambda p: p[0])
76
+ ] + [naive_hash])
77
+ if len(candidate_result.encode('utf8')) < 63:
78
+ return candidate_result
79
+
80
+ # fallback to truncating original naive name
81
+ # try to preserve suffix and trim in middle
82
+ result = ''.join([
83
+ truncate(naive_result, len(naive_result)//3),
84
+ naive_result[-len(naive_result)//3:],
85
+ '_',
86
+ naive_hash
87
+ ])
88
+ if len(result.encode('utf8')) <= 63:
89
+ return result
90
+
91
+ # last-ditch (e.g. multibyte unicode suffix worst case)
92
+ return truncate(naive_result, 55) + naive_hash
93
+
20
94
  def presence_annotation(tag_uri):
21
95
  """Decorator to establish property getter/setter/deleter for presence annotations.
22
96
 
@@ -684,6 +758,15 @@ class KeyedList (list):
684
758
  list.append(self, e)
685
759
  self.elements[e.name] = e
686
760
 
761
+ class FindAssociationResult (object):
762
+ """Wrapper for results of Table.find_associations()"""
763
+ def __init__(self, table, self_fkey, other_fkeys):
764
+ self.table = table
765
+ self.name = table.name
766
+ self.schema = table.schema
767
+ self.self_fkey = self_fkey
768
+ self.other_fkeys = other_fkeys
769
+
687
770
  class Table (object):
688
771
  """Named table.
689
772
  """
@@ -1075,6 +1158,184 @@ class Table (object):
1075
1158
  provide_system
1076
1159
  )
1077
1160
 
1161
+ @classmethod
1162
+ def define_association(
1163
+ cls,
1164
+ associates: Iterable[Key | Table | tuple[str, Key | Table]],
1165
+ metadata: Iterable[Key | Table | dict | tuple[str, bool, Key | Table]] = [],
1166
+ table_name: str | None = None,
1167
+ comment: str | None = None,
1168
+ provide_system: bool = True) -> dict:
1169
+ """Build an association table definition.
1170
+
1171
+ :param associates: the existing Key instances being associated
1172
+ :param metadata: additional metadata fields for impure associations
1173
+ :param table_name: name for the association table or None for default naming
1174
+ :param comment: comment for the association table or None for default comment
1175
+ :param provide_system: add ERMrest system columns when True
1176
+
1177
+ This is a utility function to help build an association table
1178
+ definition. It simplifies the task, but removes some
1179
+ control. For full customization, consider using Table.define()
1180
+ directly instead.
1181
+
1182
+ A normal ("pure") N-ary association is a table with N foreign
1183
+ keys referencing N primary keys in referenced tables, with a
1184
+ composite primary key covering the N foreign keys. These pure
1185
+ association tables manage a set of distinct combinations of
1186
+ the associated foreign key values.
1187
+
1188
+ An "impure" association table adds additional metadata
1189
+ alongside the N foreign keys.
1190
+
1191
+ The "associates" parameter takes an iterable of Key instances
1192
+ from other tables. The association will be comprised of
1193
+ foreign keys referencing these associates. Optionally, a tuple
1194
+ of (str, Key) can supply a string _base name_ to influence how
1195
+ the foreign key columns and constraint will be named in the
1196
+ new association table. A bare Key instance will get a base
1197
+ name derived from the referenced table name.
1198
+
1199
+ The "metadata" parameter takes an iterable of plain dict
1200
+ column definitions or Key instances. Each dict must be a
1201
+ scalar column definition, such as produced by the
1202
+ Column.define() class method. Key instance will cause
1203
+ corresponding columns and foreign keys to be added to the
1204
+ association table to act as metadata. Optionally, a tuple of
1205
+ (str, bool, Key) can supply a string _base name_ and a boolean
1206
+ _nullok_ property to influence how the foreign key columns and
1207
+ constraint will be constructed and named. A bare Key instance
1208
+ will get a base name derived from the referened table name,
1209
+ and presumed as nullok=False.
1210
+
1211
+ If a Table instance is supplied instead of a Key instance for
1212
+ associates or metadata inputs, an attempt will be made to
1213
+ locate a key based on the RID system column. If this key
1214
+ cannot be found, a KeyError will be raised.
1215
+
1216
+ """
1217
+ associates = list(associates)
1218
+ metadata = list(metadata)
1219
+
1220
+ if len(associates) < 2:
1221
+ raise ValueError('An association table requires at least 2 associates')
1222
+
1223
+ cdefs = []
1224
+ kdefs = []
1225
+ fkdefs = []
1226
+
1227
+ used_names = set()
1228
+
1229
+ def check_basename(basename):
1230
+ if not isinstance(base_name, str):
1231
+ raise TypeError('Base name %r is not of required type str' % (base_name,))
1232
+ if base_name in used_names:
1233
+ raise ValueError('Base name %r is not unique among associates and metadata' % (base_name,))
1234
+ used_names.add(base_name)
1235
+
1236
+ def choose_basename(key):
1237
+ base_name = key.table.name
1238
+ n = 2
1239
+ while base_name in used_names:
1240
+ base_name = '%s%d' % (key.table.name, n)
1241
+ n += 1
1242
+ used_names.add(base_name)
1243
+ return base_name
1244
+
1245
+ def check_key(key):
1246
+ if isinstance(key, Table):
1247
+ return key.key_by_columns(["RID"])
1248
+ return key
1249
+
1250
+ # check and normalize associates into list[(str, Key)] with distinct base names
1251
+ for i in range(len(associates)):
1252
+ if isinstance(associates[i], tuple):
1253
+ base_name, key = associates[i]
1254
+ check_basename(base_name)
1255
+ key = check_key(key)
1256
+ associates[i] = (base_name, key)
1257
+ else:
1258
+ key = check_key(associates[i])
1259
+ base_name = choose_basename(key)
1260
+ associates[i] = (base_name, key)
1261
+
1262
+ # build assoc table name if not provided
1263
+ if table_name is None:
1264
+ table_name = make_id(*[ assoc[1].table.name for assoc in associates ])
1265
+
1266
+ def simplify_type(ctype):
1267
+ if ctype.is_domain and ctype.typename.startswith('ermrest_'):
1268
+ return ctype.base_type
1269
+
1270
+ return ctype
1271
+
1272
+ def cdefs_for_key(base_name, key, nullok=False):
1273
+ return [
1274
+ Column.define(
1275
+ '%s_%s' % (base_name, col.name) if len(key.unique_columns) > 1 else base_name,
1276
+ simplify_type(col.type),
1277
+ nullok=nullok,
1278
+ )
1279
+ for col in key.unique_columns
1280
+ ]
1281
+
1282
+ def fkdef_for_key(base_name, key):
1283
+ return ForeignKey.define(
1284
+ [
1285
+ '%s_%s' % (base_name, col.name) if len(key.unique_columns) > 1 else base_name
1286
+ for col in key.unique_columns
1287
+ ],
1288
+ key.table.schema.name,
1289
+ key.table.name,
1290
+ [ col.name for col in key.unique_columns ],
1291
+ on_update='CASCADE',
1292
+ on_delete='CASCADE',
1293
+ constraint_name=make_id(table_name, base_name, 'fkey'),
1294
+ )
1295
+
1296
+ # build core association definition (i.e. the "pure" parts)
1297
+ k_cnames = []
1298
+ for base_name, key in associates:
1299
+ cdefs.extend(cdefs_for_key(base_name, key))
1300
+ fkdefs.append(fkdef_for_key(base_name, key))
1301
+
1302
+ k_cnames.extend([
1303
+ '%s_%s' % (base_name, col.name) if len(key.unique_columns) > 1 else base_name
1304
+ for col in key.unique_columns
1305
+ ])
1306
+
1307
+ kdefs.append(
1308
+ Key.define(
1309
+ k_cnames,
1310
+ constraint_name=make_id(table_name, 'assoc', 'key'),
1311
+ )
1312
+ )
1313
+
1314
+ # check and normalize metadata into list[dict | (str, bool, Key)]
1315
+ for i in range(len(metadata)):
1316
+ if isinstance(metadata[i], tuple):
1317
+ base_name, nullok, key = metadata[i]
1318
+ check_basename(base_name)
1319
+ key = check_key(key)
1320
+ metadata[i] = (base_name, nullok, key)
1321
+ elif isinstance(metadata[i], dict):
1322
+ pass
1323
+ else:
1324
+ key = check_key(metadata[i])
1325
+ base_name = choose_basename(key)
1326
+ metadata[i] = (base_name, False, key)
1327
+
1328
+ # add metadata to definition
1329
+ for md in metadata:
1330
+ if isinstance(md, dict):
1331
+ cdefs.append(md)
1332
+ else:
1333
+ base_name, nullok, key = md
1334
+ cdefs.extend(cdefs_for_key(base_name, key, nullok))
1335
+ fkdefs.append(fkdef_for_key(base_name, key))
1336
+
1337
+ return Table.define(table_name, cdefs, kdefs, fkdefs, comment=comment, provide_system=provide_system)
1338
+
1078
1339
  def prejson(self, prune=True):
1079
1340
  return {
1080
1341
  "schema_name": self.schema.name,
@@ -1348,7 +1609,7 @@ class Table (object):
1348
1609
  if raise_nomatch:
1349
1610
  raise KeyError(from_to_map)
1350
1611
 
1351
- def is_association(self, min_arity=2, max_arity=2, unqualified=True, pure=True, no_overlap=True):
1612
+ def is_association(self, min_arity=2, max_arity=2, unqualified=True, pure=True, no_overlap=True, return_fkeys=False):
1352
1613
  """Return (truthy) integer arity if self is a matching association, else False.
1353
1614
 
1354
1615
  min_arity: minimum number of associated fkeys (default 2)
@@ -1356,6 +1617,7 @@ class Table (object):
1356
1617
  unqualified: reject qualified associations when True (default True)
1357
1618
  pure: reject impure assocations when True (default True)
1358
1619
  no_overlap: reject overlapping associations when True (default True)
1620
+ return_fkeys: return the set of N associated ForeignKeys if True
1359
1621
 
1360
1622
  The default behavior with no arguments is to test for pure,
1361
1623
  unqualified, non-overlapping, binary assocations.
@@ -1444,9 +1706,43 @@ class Table (object):
1444
1706
  # reject: impure association
1445
1707
  return False
1446
1708
 
1447
- # return (truthy) arity
1448
- return len(covered_fkeys)
1709
+ # return (truthy) arity or fkeys
1710
+ if return_fkeys:
1711
+ return covered_fkeys
1712
+ else:
1713
+ return len(covered_fkeys)
1714
+
1715
+ def find_associations(self, min_arity=2, max_arity=2, unqualified=True, pure=True, no_overlap=True) -> Iterable[FindAssociationResult]:
1716
+ """Yield (iterable) Association objects linking to this table and meeting all criteria.
1449
1717
 
1718
+ min_arity: minimum number of associated fkeys (default 2)
1719
+ max_arity: maximum number of associated fkeys (default 2) or None
1720
+ unqualified: reject qualified associations when True (default True)
1721
+ pure: reject impure assocations when True (default True)
1722
+ no_overlap: reject overlapping associations when True (default True)
1723
+
1724
+ See documentation for sibling method Table.is_association(...)
1725
+ for more explanation of these association detection criteria.
1726
+
1727
+ """
1728
+ peer_tables = set()
1729
+ for fkey in self.referenced_by:
1730
+ peer = fkey.table
1731
+ if peer in peer_tables:
1732
+ # check each peer only once
1733
+ continue
1734
+ peer_tables.add(peer)
1735
+ answer = peer.is_association(min_arity=min_arity, max_arity=max_arity, unqualified=unqualified, pure=pure, no_overlap=no_overlap, return_fkeys=True)
1736
+ if answer:
1737
+ answer = set(answer)
1738
+ for fkey in answer:
1739
+ if fkey.pk_table == self:
1740
+ answer.remove(fkey)
1741
+ yield FindAssociationResult(peer, fkey, answer)
1742
+ # arbitrarily choose first fkey to self
1743
+ # in case association is back to same table
1744
+ break
1745
+
1450
1746
  @presence_annotation(tag.immutable)
1451
1747
  def immutable(self): pass
1452
1748
 
@@ -1495,6 +1791,40 @@ class Table (object):
1495
1791
  @object_annotation(tag.viz_3d_display)
1496
1792
  def viz_3d_display(self): pass
1497
1793
 
1794
+ class Quantifier (str, Enum):
1795
+ """Logic quantifiers"""
1796
+ any = 'any'
1797
+ all = 'all'
1798
+
1799
+ def find_tables_with_foreign_keys(target_tables: Iterable[Table], quantifier: Quantifier=Quantifier.all) -> set[Table]:
1800
+ """Return set of tables with foreign key references to target tables.
1801
+
1802
+ :param target_tables: an iterable of ermrest_model.Table instances
1803
+ :param quantifier: one of the Quantifiers 'any' or 'all' (default 'all')
1804
+
1805
+ Each returned Table instance will be a table that references the
1806
+ targets according to the selected quantifier. A reference is a
1807
+ direct foreign key in the returned table that refers to a primary
1808
+ key of the target table.
1809
+
1810
+ - quantifier==all: a returned table references ALL targets
1811
+ - quantifier==any: a returned table references AT LEAST ONE target
1812
+
1813
+ For proper function, all target_tables instances MUST come from
1814
+ the same root Model instance hierarchy.
1815
+
1816
+ """
1817
+ candidates = None
1818
+ for table in target_tables:
1819
+ referring = { fkey.table for fkey in table.referenced_by }
1820
+ if candidates is None:
1821
+ candidates = referring
1822
+ elif quantifier == Quantifier.all:
1823
+ candidates.intersection_update(referring)
1824
+ else:
1825
+ candidates.update(referring)
1826
+ return candidates
1827
+
1498
1828
  class Column (object):
1499
1829
  """Named column.
1500
1830
  """
@@ -1696,7 +2026,6 @@ class Column (object):
1696
2026
 
1697
2027
  @object_annotation(tag.column_display)
1698
2028
  def column_display(self): pass
1699
-
1700
2029
 
1701
2030
  def _constraint_name_parts(constraint, doc):
1702
2031
  # modern systems should have 0 or 1 names here
@@ -1781,10 +2110,29 @@ class Key (object):
1781
2110
  }
1782
2111
 
1783
2112
  @classmethod
1784
- def define(cls, colnames, constraint_names=[], comment=None, annotations={}):
1785
- """Build a key definition."""
2113
+ def define(cls, colnames, constraint_names=[], comment=None, annotations={}, constraint_name=None):
2114
+ """Build a key definition.
2115
+
2116
+ :param colnames: List of names of columns participating in the key
2117
+ :param constraint_names: Legacy input [ [ schema_name, constraint_name ] ] (for API backwards-compatibility)
2118
+ :param comment: Comment string
2119
+ :param annotations: Dictionary of { annotation_uri: annotation_value, ... }
2120
+ :param constraint_name: Constraint name string
2121
+
2122
+ The constraint_name kwarg takes a bare constraint name string
2123
+ and acts the same as setting the legacy constraint_names kwarg
2124
+ to: [ [ "placeholder", constraint_name ] ]. This odd syntax
2125
+ is for backwards-compatibility with earlier API versions, and
2126
+ mirrors the structure of constraint names in ERMrest model
2127
+ description outputs. In those outputs, the "placeholder" field
2128
+ contains the schema name of the table containing the
2129
+ constraint.
2130
+
2131
+ """
1786
2132
  if not isinstance(colnames, list):
1787
2133
  raise TypeError('Colnames should be a list.')
2134
+ if constraint_name is not None:
2135
+ constraint_names = [ [ "placeholder", constraint_name ] ]
1788
2136
  return {
1789
2137
  'unique_columns': list(colnames),
1790
2138
  'names': constraint_names,
@@ -1983,9 +2331,41 @@ class ForeignKey (object):
1983
2331
  }
1984
2332
 
1985
2333
  @classmethod
1986
- def define(cls, fk_colnames, pk_sname, pk_tname, pk_colnames, on_update='NO ACTION', on_delete='NO ACTION', constraint_names=[], comment=None, acls={}, acl_bindings={}, annotations={}):
2334
+ def define(cls, fk_colnames, pk_sname, pk_tname, pk_colnames, on_update='NO ACTION', on_delete='NO ACTION', constraint_names=[], comment=None, acls={}, acl_bindings={}, annotations={}, constraint_name=None):
2335
+ """Define a foreign key.
2336
+
2337
+ :param fk_colnames: List of column names participating in the foreign key
2338
+ :param pk_sname: Schema name string of the referenced primary key
2339
+ :param pk_tname: Table name string of the referenced primary key
2340
+ :param pk_colnames: List of column names participating in the referenced primary key
2341
+ :param on_update: Constraint behavior when referenced primary keys are updated
2342
+ :param on_update: Constraint behavior when referenced primary keys are deleted
2343
+ :param constraint_names: Legacy input [ [ schema_name, constraint_name ] ] (for API backwards-compatibility)
2344
+ :param comment: Comment string
2345
+ :param acls: Dictionary of { acl_name: acl, ... }
2346
+ :param acl_bindings: Dictionary of { binding_name: acl_binding, ... }
2347
+ :param annotations: Dictionary of { annotation_uri: annotation_value, ... }
2348
+ :param constraint_name: Constraint name string
2349
+
2350
+ The contraint behavior values for on_update and on_delete must
2351
+ be one of the following literal strings:
2352
+
2353
+ 'NO ACTION', 'RESTRICT', 'CASCADE', 'SET NULL', 'SET DEFAULT'
2354
+
2355
+ The constraint_name kwarg takes a bare constraint name string
2356
+ and acts the same as setting the legacy constraint_names kwarg
2357
+ to: [ [ "placeholder", constraint_name ] ]. This odd syntax
2358
+ is for backwards-compatibility with earlier API versions, and
2359
+ mirrors the structure of constraint names in ERMrest model
2360
+ description outputs. In those outputs, the "placeholder" field
2361
+ contains the schema name of the table containing the
2362
+ constraint.
2363
+
2364
+ """
1987
2365
  if len(fk_colnames) != len(pk_colnames):
1988
2366
  raise ValueError('The fk_colnames and pk_colnames lists must have the same length.')
2367
+ if constraint_name is not None:
2368
+ constraint_names = [ [ "placeholder", constraint_name ], ]
1989
2369
  return {
1990
2370
  'foreign_key_columns': [
1991
2371
  {
deriva/core/hatrac_cli.py CHANGED
@@ -185,14 +185,14 @@ class DerivaHatracCLI (BaseCLI):
185
185
 
186
186
  try:
187
187
  acls = self.store.get_acl(self.resource, args.access, args.role)
188
+ if acls is None:
189
+ raise DerivaHatracCLIException('No such object or namespace or ACL entry')
188
190
  for access in acls:
189
191
  print("%s:" % access)
190
192
  for role in acls.get(access, []):
191
193
  print(" %s" % role)
192
194
  except HTTPError as e:
193
- if e.response.status_code == requests.codes.not_found:
194
- raise ResourceException('No such object or namespace or ACL entry', e)
195
- elif e.response.status_code == requests.codes.bad_request:
195
+ if e.response.status_code == requests.codes.bad_request:
196
196
  raise ResourceException('Invalid ACL name %s' % args.access, e)
197
197
  else:
198
198
  raise e
@@ -316,6 +316,8 @@ class DerivaHatracCLI (BaseCLI):
316
316
  except HatracHashMismatch as e:
317
317
  logging.debug(format_exception(e))
318
318
  eprint(_resource_error_message('Checksum verification failed'))
319
+ except DerivaHatracCLIException as e:
320
+ eprint(e)
319
321
  except RuntimeError as e:
320
322
  logging.debug(format_exception(e))
321
323
  eprint('Unexpected runtime error occurred')
@@ -52,7 +52,7 @@ class GlobusAuthUtil:
52
52
  client_id = kwargs.get("client_id")
53
53
  client_secret = kwargs.get("client_secret")
54
54
  if not (client_id and client_secret):
55
- cred_file = kwargs.get("credential_file", CLIENT_CRED_FILE)
55
+ cred_file = kwargs.get("credential_file", CLIENT_CRED_FILE) or CLIENT_CRED_FILE
56
56
  if os.path.isfile(cred_file):
57
57
  creds = read_config(cred_file)
58
58
  if creds:
@@ -60,6 +60,8 @@ class GlobusAuthUtil:
60
60
  if client:
61
61
  client_id = client.get('client_id')
62
62
  client_secret = client.get('client_secret')
63
+ else:
64
+ logging.warning("No Globus client credential file found at: %s" % cred_file)
63
65
 
64
66
  if not (client_id and client_secret):
65
67
  logging.warning("Client ID and secret not specified and/or could not be determined.")
@@ -1,9 +1,11 @@
1
1
  from deriva.transfer.download.deriva_download import DerivaDownload, GenericDownloader, DerivaDownloadError, \
2
- DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError
2
+ DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, \
3
+ DerivaDownloadBaggingError
3
4
  from deriva.transfer.download.deriva_download_cli import DerivaDownloadCLI
4
5
 
5
6
  from deriva.transfer.upload.deriva_upload import DerivaUpload, GenericUploader, DerivaUploadError, DerivaUploadError, \
6
- DerivaUploadConfigurationError, DerivaUploadCatalogCreateError, DerivaUploadCatalogUpdateError
7
+ DerivaUploadConfigurationError, DerivaUploadCatalogCreateError, DerivaUploadCatalogUpdateError, \
8
+ DerivaUploadAuthenticationError
7
9
  from deriva.transfer.upload.deriva_upload_cli import DerivaUploadCLI
8
10
 
9
11
  from deriva.transfer.backup.deriva_backup import DerivaBackup, DerivaBackupAuthenticationError, \
@@ -16,3 +16,7 @@ class DerivaDownloadAuthorizationError(Exception):
16
16
 
17
17
  class DerivaDownloadTimeoutError(Exception):
18
18
  pass
19
+
20
+
21
+ class DerivaDownloadBaggingError(Exception):
22
+ pass
@@ -7,6 +7,7 @@ import platform
7
7
  import requests
8
8
  from requests.exceptions import HTTPError
9
9
  from bdbag import bdbag_api as bdb, bdbag_ro as ro, BAG_PROFILE_TAG, BDBAG_RO_PROFILE_ID
10
+ from bdbag.bdbagit import BagValidationError
10
11
  from deriva.core import ErmrestCatalog, HatracStore, format_exception, get_credential, format_credential, read_config, \
11
12
  stob, Megabyte, __version__ as VERSION
12
13
  from deriva.core.utils.version_utils import get_installed_version
@@ -14,7 +15,11 @@ from deriva.transfer.download.processors import find_query_processor, find_trans
14
15
  from deriva.transfer.download.processors.base_processor import LOCAL_PATH_KEY, REMOTE_PATHS_KEY, SERVICE_URL_KEY, \
15
16
  FILE_SIZE_KEY
16
17
  from deriva.transfer.download import DerivaDownloadError, DerivaDownloadConfigurationError, \
17
- DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError
18
+ DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, \
19
+ DerivaDownloadBaggingError
20
+
21
+
22
+ logger = logging.getLogger(__name__)
18
23
 
19
24
 
20
25
  class DerivaDownload(object):
@@ -43,7 +48,7 @@ class DerivaDownload(object):
43
48
  info = "%s v%s [Python %s, %s]" % (
44
49
  self.__class__.__name__, get_installed_version(VERSION),
45
50
  platform.python_version(), platform.platform(aliased=True))
46
- logging.info("Initializing downloader: %s" % info)
51
+ logger.info("Initializing downloader: %s" % info)
47
52
 
48
53
  if not self.server:
49
54
  raise DerivaDownloadConfigurationError("Server not specified!")
@@ -145,12 +150,12 @@ class DerivaDownload(object):
145
150
  try:
146
151
  if not self.credentials:
147
152
  self.set_credentials(get_credential(self.hostname))
148
- logging.info("Validating credentials for host: %s" % self.hostname)
153
+ logger.info("Validating credentials for host: %s" % self.hostname)
149
154
  attributes = self.catalog.get_authn_session().json()
150
155
  identity = attributes["client"]
151
156
  except HTTPError as he:
152
157
  if he.response.status_code == 404:
153
- logging.info("No existing login session found for host: %s" % self.hostname)
158
+ logger.info("No existing login session found for host: %s" % self.hostname)
154
159
  except Exception as e:
155
160
  raise DerivaDownloadAuthenticationError("Unable to validate credentials: %s" % format_exception(e))
156
161
  wallet = kwargs.get("wallet", {})
@@ -160,6 +165,7 @@ class DerivaDownload(object):
160
165
  bag_archiver = None
161
166
  bag_algorithms = None
162
167
  bag_idempotent = False
168
+ bag_strict = True
163
169
  bag_config = self.config.get('bag')
164
170
  create_bag = True if bag_config else False
165
171
  if create_bag:
@@ -171,7 +177,8 @@ class DerivaDownload(object):
171
177
  bag_idempotent = stob(bag_config.get('bag_idempotent', False))
172
178
  bag_metadata = bag_config.get('bag_metadata', {"Internal-Sender-Identifier":
173
179
  "deriva@%s" % self.server_url})
174
- bag_ro = create_bag and not bag_idempotent and stob(bag_config.get('bag_ro', "True"))
180
+ bag_ro = create_bag and not bag_idempotent and stob(bag_config.get('bag_ro', True))
181
+ bag_strict = stob(bag_config.get('bag_strict', True))
175
182
  if create_bag:
176
183
  bdb.ensure_bag_path_exists(bag_path)
177
184
  bag = bdb.make_bag(bag_path, algs=bag_algorithms, metadata=bag_metadata, idempotent=bag_idempotent)
@@ -211,12 +218,13 @@ class DerivaDownload(object):
211
218
  allow_anonymous=self.allow_anonymous,
212
219
  timeout=self.timeout)
213
220
  outputs = processor.process()
221
+ assert outputs is not None
214
222
  if processor.should_abort():
215
223
  raise DerivaDownloadTimeoutError("Timeout (%s seconds) waiting for processor [%s] to complete." %
216
224
  (self.timeout_secs, processor_name))
217
225
  self.check_payload_size(outputs)
218
226
  except Exception as e:
219
- logging.error(format_exception(e))
227
+ logger.error(format_exception(e))
220
228
  if create_bag:
221
229
  bdb.cleanup_bag(bag_path)
222
230
  if remote_file_manifest and os.path.isfile(remote_file_manifest):
@@ -270,16 +278,27 @@ class DerivaDownload(object):
270
278
  remote_file_manifest=remote_file_manifest
271
279
  if (remote_file_manifest and os.path.getsize(remote_file_manifest) > 0) else None,
272
280
  update=True,
273
- idempotent=bag_idempotent)
281
+ idempotent=bag_idempotent,
282
+ strict=bag_strict)
283
+ except BagValidationError as bve:
284
+ msg = "Unable to validate bag.%s Error: %s" % (
285
+ "" if not bag_strict else
286
+ " Strict checking has been enabled, which most likely means that this bag "
287
+ "is empty (has no payload files or fetch references) and therefore invalid.",
288
+ format_exception(bve))
289
+ logger.error(msg)
290
+ bdb.cleanup_bag(bag_path)
291
+ raise DerivaDownloadBaggingError(msg)
274
292
  except Exception as e:
275
- logging.fatal("Exception while updating bag manifests: %s" % format_exception(e))
293
+ msg = "Unhandled exception while updating bag manifests: %s" % format_exception(e)
294
+ logger.error(msg)
276
295
  bdb.cleanup_bag(bag_path)
277
- raise
296
+ raise DerivaDownloadBaggingError(msg)
278
297
  finally:
279
298
  if remote_file_manifest and os.path.isfile(remote_file_manifest):
280
299
  os.remove(remote_file_manifest)
281
300
 
282
- logging.info('Created bag: %s' % bag_path)
301
+ logger.info('Created bag: %s' % bag_path)
283
302
 
284
303
  if bag_archiver is not None:
285
304
  try:
@@ -289,8 +308,9 @@ class DerivaDownload(object):
289
308
  bdb.cleanup_bag(bag_path)
290
309
  outputs = {os.path.basename(archive): {LOCAL_PATH_KEY: archive}}
291
310
  except Exception as e:
292
- logging.error("Exception while creating data bag archive: %s" % format_exception(e))
293
- raise
311
+ msg = "Exception while creating data bag archive: %s" % format_exception(e)
312
+ logger.error(msg)
313
+ raise DerivaDownloadBaggingError(msg)
294
314
  else:
295
315
  outputs = {os.path.basename(bag_path): {LOCAL_PATH_KEY: bag_path}}
296
316
 
@@ -318,7 +338,7 @@ class DerivaDownload(object):
318
338
  (self.timeout_secs, processor_name))
319
339
  self.check_payload_size(outputs)
320
340
  except Exception as e:
321
- logging.error(format_exception(e))
341
+ logger.error(format_exception(e))
322
342
  raise
323
343
 
324
344
  return outputs