deriva 1.7.0__py3-none-any.whl → 1.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deriva/core/datapath.py CHANGED
@@ -5,6 +5,7 @@ import copy
5
5
  from datetime import date
6
6
  import itertools
7
7
  import logging
8
+ import time
8
9
  import re
9
10
  from requests import HTTPError
10
11
  import warnings
@@ -395,7 +396,8 @@ class DataPath (object):
395
396
  By default links use inner join semantics on the foreign key / key equality comparison. The `join_type`
396
397
  parameter can be used to specify `left`, `right`, or `full` outer join semantics.
397
398
 
398
- :param right: the right hand table of the link expression
399
+ :param right: the right hand table of the link expression; if the table or alias name is in use, an incremental
400
+ number will be used to disambiguate tables instances of the same original name.
399
401
  :param on: an equality comparison between key and foreign key columns, a conjunction of such comparisons, or a foreign key object
400
402
  :param join_type: the join type of this link which may be 'left', 'right', 'full' outer joins or '' for inner
401
403
  join link by default.
@@ -413,7 +415,7 @@ class DataPath (object):
413
415
  raise ValueError("'on' must be specified for outer joins")
414
416
  if right._schema._catalog != self._root._schema._catalog:
415
417
  raise ValueError("'right' is from a different catalog. Cannot link across catalogs.")
416
- if isinstance(right, _TableAlias) and right._name in self._table_instances:
418
+ if isinstance(right, _TableAlias) and right._parent == self:
417
419
  raise ValueError("'right' is a table alias that has already been used.")
418
420
  else:
419
421
  # Generate an unused alias name for the table
@@ -606,6 +608,18 @@ class DataPath (object):
606
608
 
607
609
  return self
608
610
 
611
+ def denormalize(self, context_name=None, heuristic=None, groupkey_name='RID'):
612
+ """Denormalizes a path based on a visible-columns annotation 'context' or a heuristic approach.
613
+
614
+ This method does not mutate this object. It returns a result set representing the denormalization of the path.
615
+
616
+ :param context_name: name of the visible-columns context or if none given, will attempt apply heuristics
617
+ :param heuristic: heuristic to apply if no context name specified
618
+ :param groupkey_name: column name for the group by key of the generated query expression (default: 'RID')
619
+ :return: a results set.
620
+ """
621
+ return _datapath_denormalize(self, context_name=context_name, heuristic=heuristic, groupkey_name=groupkey_name)
622
+
609
623
 
610
624
  class _ResultSet (object):
611
625
  """A set of results for various queries or data manipulations.
@@ -623,6 +637,7 @@ class _ResultSet (object):
623
637
  self._fetcher_fn = fetcher_fn
624
638
  self._results_doc = None
625
639
  self._sort_keys = None
640
+ self._limit = None
626
641
  self.uri = uri
627
642
 
628
643
  @property
@@ -656,6 +671,19 @@ class _ResultSet (object):
656
671
  self._results_doc = None
657
672
  return self
658
673
 
674
+ def limit(self, n):
675
+ """Set a limit on the number of results to be returned.
676
+
677
+ :param n: integer or None.
678
+ :return: self
679
+ """
680
+ try:
681
+ self._limit = None if n is None else int(n)
682
+ self._results_doc = None
683
+ return self
684
+ except ValueError:
685
+ raise ValueError('limit argument "n" must be an integer or None')
686
+
659
687
  def fetch(self, limit=None, headers=DEFAULT_HEADERS):
660
688
  """Fetches the results from the catalog.
661
689
 
@@ -663,11 +691,107 @@ class _ResultSet (object):
663
691
  :param headers: headers to send in request to server
664
692
  :return: self
665
693
  """
666
- limit = int(limit) if limit else None
694
+ limit = int(limit) if limit else self._limit
667
695
  self._results_doc = self._fetcher_fn(limit, self._sort_keys, headers)
668
696
  logger.debug("Fetched %d entities" % len(self._results_doc))
669
697
  return self
670
698
 
699
+ def _json_size_approx(data):
700
+ """Return approximate byte count for minimal JSON encoding of data
701
+
702
+ Minimal encoding has no optional whitespace/indentation.
703
+ """
704
+ nbytes = 0
705
+
706
+ if isinstance(data, (list, tuple)):
707
+ nbytes += 2
708
+ for elem in data:
709
+ nbytes += _json_size_approx(elem) + 1
710
+ elif isinstance(data, dict):
711
+ nbytes += 2
712
+ for k, v in data.items():
713
+ nbytes += _json_size_approx(k) + _json_size_approx(v) + 2
714
+ elif isinstance(data, str):
715
+ nbytes += len(data.encode("utf-8")) + 2
716
+ else:
717
+ nbytes += len(str(data))
718
+
719
+ return nbytes
720
+
721
+ def _generate_batches(entities, max_batch_rows=1000, max_batch_bytes=250*1024):
722
+ """Generate a series of entity batches as slices of the input entities
723
+
724
+ """
725
+ if not isinstance(entities, (list, tuple)):
726
+ raise TypeError('invalid type %s for entities, list or tuple expected' % (type(entities),))
727
+
728
+ if not max_batch_rows:
729
+ logger.debug("disabling batching due to max_batch_rows=%r" % (max_batch_rows,))
730
+ return entities
731
+
732
+ top = len(entities)
733
+ lower = 0
734
+
735
+ while lower < top:
736
+ # to ensure progress, always use at least one row per batch regardless of nbytes
737
+ upper = lower + 1
738
+ batch_nbytes = _json_size_approx(entities[lower])
739
+
740
+ # advance upper position until a batch size limit is reached
741
+ while (upper - lower) < max_batch_rows:
742
+ if upper >= top:
743
+ break
744
+ batch_nbytes += _json_size_approx(entities[upper])
745
+ if batch_nbytes > max_batch_bytes:
746
+ break
747
+ upper += 1
748
+
749
+ # generate one batch and advance for next batch
750
+ logger.debug("yielding batch of %d/%d entities (%d:%d)" % (upper-lower, top, lower, upper))
751
+ yield entities[lower:upper]
752
+ lower = upper
753
+
754
+ def _request_with_retry(request_func, retry_codes={408, 429, 500, 502, 503, 504}, backoff_factor=4, max_attempts=5):
755
+ """Perform request func with exponential backoff and retry.
756
+
757
+ :param request_func: A function returning a requests.Response object or raising HTTPError
758
+ :param retry_codes: HTTPError status codes on which to attempt retry
759
+ :param backoff_factor: Base number of seconds for factor**attempt exponential backoff
760
+ :param max_attempts: Max number of request attempts.
761
+
762
+ Retry will be attempted on HTTPError exceptions which match retry_codes and
763
+ also on other unknown exceptions, presumed to be transport errors.
764
+
765
+ The request_func should do the equivalent of resp.raise_on_status() so that
766
+ it only returns a response object for successful requests.
767
+ """
768
+ attempt = 0
769
+ last_ex = None
770
+
771
+ while attempt < max_attempts:
772
+ try:
773
+ if attempt > 0:
774
+ delay = backoff_factor**(attempt-1)
775
+ logger.debug("sleeping %d seconds before retry %d..." % (delay, attempt))
776
+ time.sleep(delay)
777
+ attempt += 1
778
+ return request_func()
779
+ except HTTPError as e:
780
+ logger.debug(e.response.text)
781
+ last_ex = e
782
+ if 400 <= e.response.status_code < 500:
783
+ last_ex = DataPathException(_http_error_message(e), e)
784
+ if int(e.response.status_code) not in retry_codes:
785
+ raise last_ex
786
+ except Exception as e:
787
+ logger.debug(e.response.text)
788
+ last_ex = e
789
+
790
+ # early return means we don't get here on successful requests
791
+ logger.warning("maximum request retry limit %d exceeded" % (max_attempts,))
792
+ if last_ex is None:
793
+ raise ValueError('exceeded max_attempts without catching a request exception')
794
+ raise last_ex
671
795
 
672
796
  class _TableWrapper (object):
673
797
  """Wraps a Table for datapath expressions.
@@ -797,7 +921,19 @@ class _TableWrapper (object):
797
921
  """
798
922
  return _AttributeGroup(self, self._query, keys)
799
923
 
800
- def insert(self, entities, defaults=set(), nondefaults=set(), add_system_defaults=True, on_conflict_skip=False):
924
+ def denormalize(self, context_name=None, heuristic=None, groupkey_name='RID'):
925
+ """Denormalizes a path based on a visible-columns annotation 'context' or a heuristic approach.
926
+
927
+ This method does not mutate this object. It returns a result set representing the denormalization of the path.
928
+
929
+ :param context_name: name of the visible-columns context or if none given, will attempt apply heuristics
930
+ :param heuristic: heuristic to apply if no context name specified
931
+ :param groupkey_name: column name for the group by key of the generated query expression (default: 'RID')
932
+ :return: a results set.
933
+ """
934
+ return self.path.denormalize(context_name=context_name, heuristic=heuristic, groupkey_name=groupkey_name)
935
+
936
+ def insert(self, entities, defaults=set(), nondefaults=set(), add_system_defaults=True, on_conflict_skip=False, retry_codes={408, 429, 500, 502, 503, 504}, backoff_factor=4, max_attempts=5, max_batch_rows=1000, max_batch_bytes=250*1024):
801
937
  """Inserts entities into the table.
802
938
 
803
939
  :param entities: an iterable collection of entities (i.e., rows) to be inserted into the table.
@@ -805,7 +941,23 @@ class _TableWrapper (object):
805
941
  :param nondefaults: optional, set of columns names to override implicit system defaults
806
942
  :param add_system_defaults: flag to add system columns to the set of default columns.
807
943
  :param on_conflict_skip: flag to skip entities that violate uniqueness constraints.
944
+ :param retry_codes: set of HTTP status codes for which retry should be considered.
945
+ :param backoff_factor: number of seconds for base of exponential retry backoff.
946
+ :param max_attempts: maximum number of requests attempts with retry.
947
+ :param max_batch_rows: maximum number of rows for one request, or False to disable batching.
948
+ :param max_batch_bytes: approximate maximum number of bytes for one request.
808
949
  :return a collection of newly created entities.
950
+
951
+ Retry will only be attempted for idempotent insertion
952
+ requests, which are when a user-controlled, non-nullable key
953
+ is present in the table and the key's constituent column(s)
954
+ are not listed as defaults, and on_conflict_skip=True.
955
+
956
+ When performing retries, an exponential backoff delay is
957
+ introduced after each failed attempt. The delay is
958
+ backoff_factor**attempt_number seconds for attempts 0 through
959
+ max_attempts-1.
960
+
809
961
  """
810
962
  # empty entities will be accepted but results are therefore an empty entity set
811
963
  if not entities:
@@ -840,17 +992,55 @@ class _TableWrapper (object):
840
992
  if not hasattr(entities[0], 'keys'):
841
993
  raise TypeError('entities[0] does not look like a dictionary -- does not have a "keys()" method')
842
994
 
843
- try:
844
- resp = self._schema._catalog._wrapped_catalog.post(path, json=entities, headers={'Content-Type': 'application/json'})
845
- return _ResultSet(self.path.uri, lambda ignore1, ignore2, ignore3: resp.json())
846
- except HTTPError as e:
847
- logger.debug(e.response.text)
848
- if 400 <= e.response.status_code < 500:
849
- raise DataPathException(_http_error_message(e), e)
850
- else:
851
- raise e
852
-
853
- def update(self, entities, correlation={'RID'}, targets=None):
995
+ # perform one batch request in a helper we can hand to retry helper
996
+ def request_func(batch):
997
+ return self._schema._catalog._wrapped_catalog.post(path, json=batch, headers={'Content-Type': 'application/json'})
998
+
999
+ def _has_user_pkey(table):
1000
+ """Return True if table has at least one primary key other than the system RID key"""
1001
+ for key in table.keys:
1002
+ if { c.name for c in key.unique_columns } != {'RID'}:
1003
+ if all([ not c.nullok for c in key.unique_columns ]) \
1004
+ and all([ c.name not in defaults for c in key.unique_columns ]):
1005
+ return True
1006
+ return False
1007
+
1008
+ # determine whether insert is idempotent and therefore retry safe
1009
+ retry_safe = on_conflict_skip and _has_user_pkey(self._wrapped_table)
1010
+
1011
+ # perform all requests in a helper we can hand to _ResultSet
1012
+ def results_func(ignore1, ignore2, ignore3):
1013
+ results = []
1014
+ for batch in _generate_batches(
1015
+ entities,
1016
+ max_batch_rows=max_batch_rows,
1017
+ max_batch_bytes=max_batch_bytes
1018
+ ):
1019
+ try:
1020
+ if retry_safe:
1021
+ resp = _request_with_retry(
1022
+ lambda: request_func(batch),
1023
+ retry_codes=retry_codes,
1024
+ backoff_factor=backoff_factor,
1025
+ max_attempts=max_attempts
1026
+ )
1027
+ else:
1028
+ resp = request_func(batch)
1029
+ results.extend(resp.json())
1030
+ except HTTPError as e:
1031
+ logger.debug(e.response.text)
1032
+ if 400 <= e.response.status_code < 500:
1033
+ raise DataPathException(_http_error_message(e), e)
1034
+ else:
1035
+ raise e
1036
+ return results
1037
+
1038
+ result = _ResultSet(self.path.uri, results_func)
1039
+ result.fetch()
1040
+ return result
1041
+
1042
+
1043
+ def update(self, entities, correlation={'RID'}, targets=None, retry_codes={408, 429, 500, 502, 503, 504}, backoff_factor=4, max_attempts=5, max_batch_rows=1000, max_batch_bytes=250*1024):
854
1044
  """Update entities of a table.
855
1045
 
856
1046
  For more information see the ERMrest protocol for the `attributegroup` interface. By default, this method will
@@ -862,7 +1052,17 @@ class _TableWrapper (object):
862
1052
  :param correlation: an iterable collection of column names used to correlate input set to the set of rows to be
863
1053
  updated in the catalog. E.g., `{'col name'}` or `{mytable.mycolumn}` will work if you pass a _ColumnWrapper object.
864
1054
  :param targets: an iterable collection of column names used as the targets of the update operation.
865
- :return: a collection of updated entities as returned by the corresponding ERMrest interface.
1055
+ :param retry_codes: set of HTTP status codes for which retry should be considered.
1056
+ :param backoff_factor: number of seconds for base of exponential retry backoff.
1057
+ :param max_attempts: maximum number of requests attempts with retry.
1058
+ :param max_batch_rows: maximum number of rows for one request, or False to disable batching.
1059
+ :param max_batch_bytes: approximate maximum number of bytes for one request.
1060
+ :return a collection of newly created entities.
1061
+
1062
+ When performing retries, an exponential backoff delay is
1063
+ introduced after each failed attempt. The delay is
1064
+ backoff_factor**attempt_number seconds for attempts 0 through
1065
+ max_attempts-1.
866
1066
  """
867
1067
  # empty entities will be accepted but results are therefore an empty entity set
868
1068
  if not entities:
@@ -897,16 +1097,37 @@ class _TableWrapper (object):
897
1097
  targets=','.join(target_cnames)
898
1098
  )
899
1099
 
900
- try:
901
- resp = self._schema._catalog._wrapped_catalog.put(path, json=entities, headers={'Content-Type': 'application/json'})
902
- return _ResultSet(self.path.uri, lambda ignore1, ignore2, ignore3: resp.json())
903
- except HTTPError as e:
904
- logger.debug(e.response.text)
905
- if 400 <= e.response.status_code < 500:
906
- raise DataPathException(_http_error_message(e), e)
907
- else:
908
- raise e
909
-
1100
+ # perform one batch request in a helper we can hand to retry helper
1101
+ def request_func(batch):
1102
+ return self._schema._catalog._wrapped_catalog.put(path, json=batch, headers={'Content-Type': 'application/json'})
1103
+
1104
+ # perform all requests in a helper we can hand to _ResultSet
1105
+ def results_func(ignore1, ignore2, ignore3):
1106
+ results = []
1107
+ for batch in _generate_batches(
1108
+ entities,
1109
+ max_batch_rows=max_batch_rows,
1110
+ max_batch_bytes=max_batch_bytes
1111
+ ):
1112
+ try:
1113
+ resp = _request_with_retry(
1114
+ lambda: request_func(batch),
1115
+ retry_codes=retry_codes,
1116
+ backoff_factor=backoff_factor,
1117
+ max_attempts=max_attempts
1118
+ )
1119
+ results.extend(resp.json())
1120
+ except HTTPError as e:
1121
+ logger.debug(e.response.text)
1122
+ if 400 <= e.response.status_code < 500:
1123
+ raise DataPathException(_http_error_message(e), e)
1124
+ else:
1125
+ raise e
1126
+ return results
1127
+
1128
+ result = _ResultSet(self.path.uri, results_func)
1129
+ result.fetch()
1130
+ return result
910
1131
 
911
1132
  class _TableAlias (_TableWrapper):
912
1133
  """Represents a table alias in datapath expressions.
@@ -1760,3 +1981,275 @@ class _AttributeGroup (object):
1760
1981
  bin.maxval = result.get('maxval', bin.maxval)
1761
1982
  if (bin.minval is None) or (bin.maxval is None):
1762
1983
  raise ValueError('Automatic determination of binning bounds failed.')
1984
+
1985
+ ##
1986
+ ## UTILITIES FOR DENORMALIZATION ##############################################
1987
+ ##
1988
+
1989
+ def _datapath_left_outer_join_by_fkey(path, fk, alias_name=None):
1990
+ """Link a table to the path based on a foreign key reference.
1991
+
1992
+ :param path: a DataPath object
1993
+ :param fk: an ermrest_model.ForeignKey object
1994
+ :param alias_name: an optional 'alias' name to use for the foreign table
1995
+ """
1996
+ assert isinstance(path, DataPath)
1997
+ assert isinstance(fk, _erm.ForeignKey)
1998
+ catalog = path._root._schema._catalog
1999
+
2000
+ # determine 'direction' -- inbound or outbound
2001
+ path_context_table = path.context._base_table._wrapped_table
2002
+ if (path_context_table.schema.name, path_context_table.name) == (fk.table.schema.name, fk.table.name):
2003
+ right = catalog.schemas[fk.pk_table.schema.name].tables[fk.pk_table.name]
2004
+ fkcols = zip(fk.foreign_key_columns, fk.referenced_columns)
2005
+ elif (path_context_table.schema.name, path_context_table.name) == (fk.pk_table.schema.name, fk.pk_table.name):
2006
+ right = catalog.schemas[fk.table.schema.name].tables[fk.table.name]
2007
+ fkcols = zip(fk.referenced_columns, fk.foreign_key_columns)
2008
+ else:
2009
+ raise ValueError('Context table "%s" not referenced by foreign key "%s"' % (path_context_table.name, fk.constraint_name))
2010
+
2011
+ # compose join condition
2012
+ on = None
2013
+ for lcol, rcol in fkcols:
2014
+ lcol = catalog.schemas[lcol.table.schema.name].tables[lcol.table.name].columns[lcol.name]
2015
+ rcol = catalog.schemas[rcol.table.schema.name].tables[rcol.table.name].columns[rcol.name]
2016
+ if on:
2017
+ on = on & (lcol == rcol)
2018
+ else:
2019
+ on = lcol == rcol
2020
+
2021
+ # link
2022
+ path.link(right.alias(alias_name) if alias_name else right, on=on, join_type='left')
2023
+
2024
+
2025
+ def _datapath_deserialize_vizcolumn(path, vizcol, sources=None):
2026
+ """Deserializes a visual column specification.
2027
+
2028
+ If the visible column specifies a foreign key path, the datapath object
2029
+ will be changed by linking the foreign keys in the path.
2030
+
2031
+ :param path: a datapath object
2032
+ :param vizcol: a visible column specification
2033
+ :return: the element to be projected from the datapath or None
2034
+ """
2035
+ assert isinstance(path, DataPath)
2036
+ sources = sources if sources else {}
2037
+ context = path.context
2038
+ table = context._wrapped_table
2039
+ model = table.schema.model
2040
+
2041
+ if isinstance(vizcol, str):
2042
+ # column name specification
2043
+ return context.columns[vizcol]
2044
+ elif isinstance(vizcol, list):
2045
+ # constraint specification
2046
+ try:
2047
+ fk = model.fkey(vizcol)
2048
+ _datapath_left_outer_join_by_fkey(path, fk, alias_name='F')
2049
+ return ArrayD(path.context).alias(path.context._name) # project all attributes
2050
+ except KeyError as e:
2051
+ raise ValueError('Invalid foreign key constraint name: %s. If this is a key constraint name, note that keys are not supported at this time.' % str(e))
2052
+ elif isinstance(vizcol, dict):
2053
+ # resolve visible column
2054
+ while 'sourcekey' in vizcol:
2055
+ temp = sources.get(vizcol['sourcekey'], {})
2056
+ if temp == vizcol:
2057
+ raise ValueError('Visible column self reference for sourcekey "%s"' % vizcol['sourcekey'])
2058
+ vizcol = temp
2059
+ # deserialize source definition
2060
+ source = vizcol.get('source')
2061
+ if not source:
2062
+ # case: none
2063
+ raise ValueError('Could not resolve source definition for visible column')
2064
+ elif isinstance(source, str):
2065
+ # case: column name
2066
+ return context.columns[source]
2067
+ elif isinstance(source, list):
2068
+ # case: path expression
2069
+ # ...validate syntax
2070
+ if not all(isinstance(obj, dict) for obj in source[:-1]):
2071
+ raise ValueError('Source path element must be a foreign key dict')
2072
+ if not isinstance(source[-1], str):
2073
+ raise ValueError('Source path must terminate in a column name string')
2074
+ # link path elements by fkey; and track whether path is outbound only fkeys
2075
+ outbound_only = True
2076
+ for path_elem in source[:-1]:
2077
+ try:
2078
+ fk = model.fkey(path_elem.get('inbound', path_elem.get('outbound')))
2079
+ _datapath_left_outer_join_by_fkey(path, fk, alias_name='F')
2080
+ outbound_only = outbound_only and 'outbound' in path_elem
2081
+ except KeyError as e:
2082
+ raise ValueError('Invalid foreign key constraint name: %s' % str(e))
2083
+ # return terminating column or entity
2084
+ # ...get terminal name
2085
+ terminal = source[-1]
2086
+ # ...get alias name
2087
+ alias = vizcol.get('markdown_name', vizcol.get('name', path.context._name + '_' + terminal))
2088
+ # ...get aggregate function
2089
+ aggregate = {
2090
+ 'min': Min,
2091
+ 'max': Max,
2092
+ 'cnt': Cnt,
2093
+ 'cnd_d': CntD,
2094
+ 'array': Array,
2095
+ 'array_d': ArrayD
2096
+ }.get(vizcol.get('aggregate'), ArrayD)
2097
+ # ...determine projection mode
2098
+ if vizcol.get('entity', True):
2099
+ # case: whole entities
2100
+ return aggregate(path.context).alias(alias)
2101
+ else:
2102
+ # case: specified attribute value(s)
2103
+ if outbound_only:
2104
+ # for outbound only paths, we can project a single value
2105
+ return path.context.columns[terminal].alias(alias)
2106
+ else:
2107
+ # otherwise, we need to use aggregate the values
2108
+ return aggregate(path.context.columns[terminal]).alias(alias)
2109
+ else:
2110
+ raise ValueError('Malformed source: %s' % str(source))
2111
+ else:
2112
+ raise ValueError('Malformed visible column: %s' % str(vizcol))
2113
+
2114
+
2115
+ def _datapath_contextualize(path, context_name='*', context_body=None, groupkey_name='RID'):
2116
+ """Contextualizes a data path to a named visible columns context.
2117
+
2118
+ :param path: a datapath object
2119
+ :param context_name: name of the context within the path's terminating table's "visible columns" annotations
2120
+ :param context_body: a list of visible column definitions, if given, the `context_name` will be ignored
2121
+ :param groupkey_name: column name for the group by key of the generated query expression (default: 'RID')
2122
+ :return: a 'contextualized' attribute group query object
2123
+ """
2124
+ assert isinstance(path, DataPath)
2125
+ path = copy.deepcopy(path)
2126
+ context = path.context
2127
+ table = context._wrapped_table
2128
+ sources = table.annotations.get(_erm.tag.source_definitions, {}).get('sources')
2129
+ vizcols = context_body if context_body else table.annotations.get(_erm.tag.visible_columns, {}).get(context_name, [])
2130
+ if not vizcols:
2131
+ raise ValueError('Visible columns context "%s" not found for table %s:%s' % (context_name, table.schema.name, table.name))
2132
+ groupkey = context.columns[groupkey_name]
2133
+ projection = []
2134
+
2135
+ for vizcol in vizcols:
2136
+ try:
2137
+ projection.append(_datapath_deserialize_vizcolumn(path, vizcol, sources=sources))
2138
+ path.context = context
2139
+ except ValueError as e:
2140
+ logger.warning(str(e))
2141
+
2142
+ def not_same_as_group_key(x):
2143
+ assert isinstance(groupkey, _ColumnWrapper)
2144
+ if not isinstance(x, _ColumnWrapper):
2145
+ return True
2146
+ return groupkey._wrapped_column != x._wrapped_column
2147
+
2148
+ projection = filter(not_same_as_group_key, projection) # project groupkey only once
2149
+ query = path.groupby(groupkey).attributes(*projection)
2150
+ return query
2151
+
2152
+
2153
+ def _datapath_generate_simple_denormalization(path, include_whole_entities=False):
2154
+ """Generates a denormalized form of the table expressed in a visible columns specification.
2155
+
2156
+ :param path: a datapath object
2157
+ :param include_whole_entities: if a denormalization cannot find a 'name' like terminal, include the whole entity (i.e., all attributes), else return just the 'RID'
2158
+ :return: a generated visible columns specification based on a denormalization heuristic
2159
+ """
2160
+ assert isinstance(path, DataPath)
2161
+ context = path.context
2162
+ table = context._wrapped_table
2163
+
2164
+ fkeys = list(table.foreign_keys)
2165
+ single_column_fkeys = {
2166
+ fkey.foreign_key_columns[0].name: fkey
2167
+ for fkey in table.foreign_keys if len(fkey.foreign_key_columns) == 1
2168
+ }
2169
+
2170
+ def _fkey_to_vizcol(name, fk, inbound=None):
2171
+ # name columns to look for in related tables
2172
+ name_candidates = [
2173
+ 'displayname',
2174
+ 'preferredname',
2175
+ 'fullname',
2176
+ 'name',
2177
+ 'title',
2178
+ 'label'
2179
+ ]
2180
+
2181
+ # determine terminal column
2182
+ terminal = 'RID'
2183
+ for candidate_col in fk.pk_table.columns:
2184
+ if candidate_col.name.lower().replace(' ', '').replace('_', '') in name_candidates:
2185
+ terminal = candidate_col.name
2186
+ break
2187
+
2188
+ # define source path
2189
+ source = [{'outbound': fk.names[0]}, terminal]
2190
+ if inbound:
2191
+ source = [{'inbound': inbound.names[0]}] + source
2192
+
2193
+ # return vizcol spec
2194
+ return {
2195
+ 'markdown_name': name,
2196
+ 'source': source,
2197
+ 'entity': include_whole_entities and terminal == 'RID'
2198
+ }
2199
+
2200
+ # assemble the visible column:
2201
+ # 1. column or single column fkeys
2202
+ # 2. all other (outbound fkey) related tables
2203
+ # 3. all associated tables
2204
+ vizcols = []
2205
+ for col in table.column_definitions:
2206
+ if col.name in single_column_fkeys:
2207
+ fkey = single_column_fkeys[col.name]
2208
+ vizcols.append(_fkey_to_vizcol(col.name, fkey))
2209
+ del single_column_fkeys[col.name]
2210
+ fkeys.remove(fkey)
2211
+ else:
2212
+ vizcols.append(col.name)
2213
+
2214
+ for outbound_fkey in fkeys:
2215
+ vizcols.append(_fkey_to_vizcol(outbound_fkey.constraint_name, outbound_fkey))
2216
+
2217
+ for inbound_fkey in table.referenced_by:
2218
+ if inbound_fkey.table.is_association():
2219
+ vizcols.append(
2220
+ _fkey_to_vizcol(
2221
+ inbound_fkey.table.name,
2222
+ inbound_fkey.table.foreign_keys[0] if inbound_fkey != inbound_fkey.table.foreign_keys[0] else inbound_fkey.table.foreign_keys[1],
2223
+ inbound=inbound_fkey
2224
+ )
2225
+ )
2226
+
2227
+ return vizcols
2228
+
2229
+ def simple_denormalization(path):
2230
+ """A simple heuristic denormalization."""
2231
+ return _datapath_generate_simple_denormalization(path)
2232
+
2233
+ def simple_denormalization_with_whole_entities(path):
2234
+ """A simple heuristic denormalization with related and associated entities."""
2235
+ return _datapath_generate_simple_denormalization(path, include_whole_entities=True)
2236
+
2237
+ def _datapath_denormalize(path, context_name=None, heuristic=None, groupkey_name='RID'):
2238
+ """Denormalizes a path based on annotations or heuristics.
2239
+
2240
+ :param path: a DataPath object
2241
+ :param context_name: name of the visible-columns context or if none given, will attempt apply heuristics
2242
+ :param heuristic: heuristic to apply if no context name specified
2243
+ :param groupkey_name: column name for the group by key of the generated query expression (default: 'RID')
2244
+ """
2245
+ assert isinstance(path, DataPath)
2246
+ assert context_name is None or isinstance(context_name, str)
2247
+ assert isinstance(groupkey_name, str)
2248
+ heuristic = heuristic or simple_denormalization
2249
+ assert callable(heuristic)
2250
+ return _datapath_contextualize(
2251
+ path,
2252
+ context_name=context_name,
2253
+ context_body=None if context_name else heuristic(path),
2254
+ groupkey_name=groupkey_name
2255
+ )