pytrilogy 0.0.2.6__tar.gz → 0.0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (105) hide show
  1. {pytrilogy-0.0.2.6/pytrilogy.egg-info → pytrilogy-0.0.2.8}/PKG-INFO +1 -1
  2. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8/pytrilogy.egg-info}/PKG-INFO +1 -1
  3. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_environment.py +10 -3
  4. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_models.py +55 -0
  5. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/__init__.py +1 -1
  6. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/constants.py +1 -2
  7. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/enums.py +1 -0
  8. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/models.py +75 -21
  9. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/optimizations/inline_datasource.py +11 -7
  10. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/concept_strategies_v3.py +12 -2
  11. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/common.py +1 -0
  12. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/filter_node.py +19 -0
  13. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/group_node.py +1 -1
  14. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/group_to_node.py +0 -1
  15. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/node_merge_node.py +4 -0
  16. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/rowset_node.py +3 -2
  17. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/nodes/base_node.py +1 -1
  18. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/nodes/filter_node.py +1 -0
  19. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/nodes/merge_node.py +28 -23
  20. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/query_processor.py +24 -31
  21. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/dialect/base.py +38 -11
  22. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/dialect/common.py +15 -3
  23. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/dialect/duckdb.py +5 -3
  24. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/dialect/presto.py +2 -1
  25. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/parsing/common.py +6 -2
  26. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/LICENSE.md +0 -0
  27. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/README.md +0 -0
  28. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/pyproject.toml +0 -0
  29. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/pytrilogy.egg-info/SOURCES.txt +0 -0
  30. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/pytrilogy.egg-info/dependency_links.txt +0 -0
  31. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/pytrilogy.egg-info/entry_points.txt +0 -0
  32. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/pytrilogy.egg-info/requires.txt +0 -0
  33. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/pytrilogy.egg-info/top_level.txt +0 -0
  34. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/setup.cfg +0 -0
  35. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/setup.py +0 -0
  36. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_datatypes.py +0 -0
  37. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_declarations.py +0 -0
  38. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_derived_concepts.py +0 -0
  39. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_discovery_nodes.py +0 -0
  40. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_functions.py +0 -0
  41. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_imports.py +0 -0
  42. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_metadata.py +0 -0
  43. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_multi_join_assignments.py +0 -0
  44. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_parsing.py +0 -0
  45. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_partial_handling.py +0 -0
  46. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_query_processing.py +0 -0
  47. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_select.py +0 -0
  48. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_statements.py +0 -0
  49. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_undefined_concept.py +0 -0
  50. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/tests/test_where_clause.py +0 -0
  51. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/compiler.py +0 -0
  52. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/__init__.py +0 -0
  53. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/constants.py +0 -0
  54. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/env_processor.py +0 -0
  55. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/environment_helpers.py +0 -0
  56. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/ergonomics.py +0 -0
  57. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/exceptions.py +0 -0
  58. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/functions.py +0 -0
  59. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/graph_models.py +0 -0
  60. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/internal.py +0 -0
  61. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/optimization.py +0 -0
  62. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/optimizations/__init__.py +0 -0
  63. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/optimizations/base_optimization.py +0 -0
  64. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/optimizations/inline_constant.py +0 -0
  65. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/optimizations/predicate_pushdown.py +0 -0
  66. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/__init__.py +0 -0
  67. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/graph_utils.py +0 -0
  68. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/__init__.py +0 -0
  69. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/basic_node.py +0 -0
  70. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
  71. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/select_node.py +0 -0
  72. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/unnest_node.py +0 -0
  73. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/node_generators/window_node.py +0 -0
  74. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/nodes/__init__.py +0 -0
  75. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/nodes/group_node.py +0 -0
  76. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/nodes/select_node_v2.py +0 -0
  77. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/nodes/unnest_node.py +0 -0
  78. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/nodes/window_node.py +0 -0
  79. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/core/processing/utility.py +0 -0
  80. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/dialect/__init__.py +0 -0
  81. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/dialect/bigquery.py +0 -0
  82. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/dialect/config.py +0 -0
  83. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/dialect/enums.py +0 -0
  84. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/dialect/postgres.py +0 -0
  85. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/dialect/snowflake.py +0 -0
  86. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/dialect/sql_server.py +0 -0
  87. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/engine.py +0 -0
  88. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/executor.py +0 -0
  89. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/hooks/__init__.py +0 -0
  90. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/hooks/base_hook.py +0 -0
  91. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/hooks/graph_hook.py +0 -0
  92. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/hooks/query_debugger.py +0 -0
  93. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/metadata/__init__.py +0 -0
  94. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/parser.py +0 -0
  95. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/parsing/__init__.py +0 -0
  96. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/parsing/config.py +0 -0
  97. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/parsing/exceptions.py +0 -0
  98. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/parsing/helpers.py +0 -0
  99. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/parsing/parse_engine.py +0 -0
  100. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/parsing/render.py +0 -0
  101. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/parsing/trilogy.lark +0 -0
  102. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/py.typed +0 -0
  103. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/scripts/__init__.py +0 -0
  104. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/scripts/trilogy.py +0 -0
  105. {pytrilogy-0.0.2.6 → pytrilogy-0.0.2.8}/trilogy/utility.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.2.6
3
+ Version: 0.0.2.8
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pytrilogy
3
- Version: 0.0.2.6
3
+ Version: 0.0.2.8
4
4
  Summary: Declarative, typed query language that compiles to SQL.
5
5
  Home-page:
6
6
  Author:
@@ -59,6 +59,13 @@ key order_id int;
59
59
 
60
60
  assert env1.concepts["order_id"] == env1.concepts["replacements.order_id"]
61
61
 
62
- order_id = env1.datasources["replacements.replacements"].columns[0]
63
- assert order_id.concept == env1.concepts["order_id"]
64
- assert order_id.modifiers == [Modifier.PARTIAL]
62
+ found = False
63
+ for x in env1.datasources["replacements.replacements"].columns:
64
+ if (
65
+ x.alias == "order_id"
66
+ and x.concept.address == env1.concepts["order_id"].address
67
+ ):
68
+ assert x.concept == env1.concepts["order_id"]
69
+ assert x.modifiers == [Modifier.PARTIAL]
70
+ found = True
71
+ assert found
@@ -10,6 +10,8 @@ from trilogy.core.models import (
10
10
  UndefinedConcept,
11
11
  BaseJoin,
12
12
  Comparison,
13
+ Join,
14
+ JoinKey,
13
15
  )
14
16
 
15
17
 
@@ -54,6 +56,8 @@ def test_cte_merge(test_environment, test_environment_graph):
54
56
  merged = a + b
55
57
  assert merged.output_columns == outputs
56
58
 
59
+ assert "Target: Grain<Abstract>." in merged.comment
60
+
57
61
 
58
62
  def test_concept(test_environment, test_environment_graph):
59
63
  test_concept = list(test_environment.concepts.values())[0]
@@ -191,3 +195,54 @@ def test_comparison():
191
195
  Comparison(left=1, right="abc", operator=ComparisonOperator.EQ)
192
196
  except Exception as exc:
193
197
  assert isinstance(exc, SyntaxError)
198
+
199
+
200
+ def test_join(test_environment: Environment):
201
+ datasource = list(test_environment.datasources.values())[0]
202
+ outputs = [c.concept for c in datasource.columns]
203
+ output_map = {
204
+ c.address: {
205
+ datasource,
206
+ }
207
+ for c in outputs
208
+ }
209
+ a = CTE(
210
+ name="test",
211
+ output_columns=[outputs[0]],
212
+ grain=Grain(),
213
+ source=QueryDatasource(
214
+ input_concepts=[outputs[0]],
215
+ output_concepts=[outputs[0]],
216
+ datasources=[datasource],
217
+ grain=Grain(),
218
+ joins=[],
219
+ source_map={outputs[0].address: {datasource}},
220
+ ),
221
+ source_map={c.address: [datasource.identifier] for c in outputs},
222
+ )
223
+
224
+ b = CTE(
225
+ name="testb",
226
+ output_columns=outputs,
227
+ grain=Grain(),
228
+ source=QueryDatasource(
229
+ input_concepts=outputs,
230
+ output_concepts=outputs,
231
+ datasources=[datasource],
232
+ grain=Grain(),
233
+ joins=[],
234
+ source_map=output_map,
235
+ ),
236
+ source_map={c.address: [datasource.identifier] for c in outputs},
237
+ )
238
+ test = Join(
239
+ left_cte=a,
240
+ right_cte=b,
241
+ joinkeys=[JoinKey(concept=x) for x in outputs],
242
+ jointype=JoinType.RIGHT_OUTER,
243
+ )
244
+
245
+ assert (
246
+ str(test)
247
+ == "right outer JOIN test and testb on local.product_id<local.product_id>,local.category_id<local.category_id>"
248
+ ), str(test)
@@ -4,6 +4,6 @@ from trilogy.executor import Executor
4
4
  from trilogy.parser import parse
5
5
  from trilogy.constants import CONFIG
6
6
 
7
- __version__ = "0.0.2.6"
7
+ __version__ = "0.0.2.8"
8
8
 
9
9
  __all__ = ["parse", "Executor", "Dialects", "Environment", "CONFIG"]
@@ -33,6 +33,7 @@ class Config:
33
33
  strict_mode: bool = True
34
34
  human_identifiers: bool = True
35
35
  validate_missing: bool = True
36
+ show_comments: bool = False
36
37
  optimizations: Optimizations = field(default_factory=Optimizations)
37
38
 
38
39
  def set_random_seed(self, seed: int):
@@ -42,5 +43,3 @@ class Config:
42
43
  CONFIG = Config()
43
44
 
44
45
  CONFIG.set_random_seed(42)
45
-
46
- CONFIG.strict_mode = True
@@ -123,6 +123,7 @@ class FunctionType(Enum):
123
123
  MAP_ACCESS = "map_access"
124
124
  ATTR_ACCESS = "attr_access"
125
125
  STRUCT = "struct"
126
+ ARRAY = "array"
126
127
 
127
128
  # TEXT AND MAYBE MORE
128
129
  SPLIT = "split"
@@ -945,12 +945,14 @@ class ColumnAssignment(BaseModel):
945
945
  )
946
946
 
947
947
  def with_merge(
948
- self, concept: Concept, modifiers: List[Modifier]
948
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
949
949
  ) -> "ColumnAssignment":
950
950
  return ColumnAssignment(
951
951
  alias=self.alias,
952
- concept=concept,
953
- modifiers=modifiers,
952
+ concept=self.concept.with_merge(source, target, modifiers),
953
+ modifiers=(
954
+ modifiers if self.concept.address == source.address else self.modifiers
955
+ ),
954
956
  )
955
957
 
956
958
 
@@ -1890,20 +1892,22 @@ class Datasource(Namespaced, BaseModel):
1890
1892
  self, source: Concept, target: Concept, modifiers: List[Modifier]
1891
1893
  ):
1892
1894
  original = [c for c in self.columns if c.concept.address == source.address]
1895
+ if len(original) != 1:
1896
+ raise ValueError(
1897
+ f"Expected exactly one column to merge, got {len(original)} for {source.address}, {[x.alias for x in original]}"
1898
+ )
1893
1899
  # map to the alias with the modifier, and the original
1894
1900
  self.columns = [
1895
- (
1896
- c.with_merge(target, modifiers)
1897
- if c.concept.address == source.address
1898
- else c
1899
- )
1901
+ c.with_merge(source, target, modifiers)
1900
1902
  for c in self.columns
1903
+ if c.concept.address != source.address
1901
1904
  ] + original
1902
1905
  self.grain = self.grain.with_merge(source, target, modifiers)
1903
1906
  self.where = (
1904
1907
  self.where.with_merge(source, target, modifiers) if self.where else None
1905
1908
  )
1906
- del self.output_lcl
1909
+
1910
+ self.add_column(target, original[0].alias, modifiers)
1907
1911
 
1908
1912
  @property
1909
1913
  def env_label(self) -> str:
@@ -1915,7 +1919,7 @@ class Datasource(Namespaced, BaseModel):
1915
1919
  def condition(self):
1916
1920
  return None
1917
1921
 
1918
- @cached_property
1922
+ @property
1919
1923
  def output_lcl(self) -> LooseConceptList:
1920
1924
  return LooseConceptList(concepts=self.output_concepts)
1921
1925
 
@@ -1923,9 +1927,9 @@ class Datasource(Namespaced, BaseModel):
1923
1927
  def can_be_inlined(self) -> bool:
1924
1928
  if isinstance(self.address, Address) and self.address.is_query:
1925
1929
  return False
1926
- for x in self.columns:
1927
- if not isinstance(x.alias, str):
1928
- return False
1930
+ # for x in self.columns:
1931
+ # if not isinstance(x.alias, str):
1932
+ # return False
1929
1933
  return True
1930
1934
 
1931
1935
  @property
@@ -1960,12 +1964,15 @@ class Datasource(Namespaced, BaseModel):
1960
1964
  )
1961
1965
  return grain
1962
1966
 
1963
- def add_column(self, concept: Concept, alias: str, modifiers=None):
1967
+ def add_column(
1968
+ self,
1969
+ concept: Concept,
1970
+ alias: str | RawColumnExpr | Function,
1971
+ modifiers: List[Modifier] | None = None,
1972
+ ):
1964
1973
  self.columns.append(
1965
- ColumnAssignment(alias=alias, concept=concept, modifiers=modifiers)
1974
+ ColumnAssignment(alias=alias, concept=concept, modifiers=modifiers or [])
1966
1975
  )
1967
- # force refresh
1968
- del self.output_lcl
1969
1976
 
1970
1977
  def __add__(self, other):
1971
1978
  if not other == self:
@@ -1998,7 +2005,7 @@ class Datasource(Namespaced, BaseModel):
1998
2005
  where=self.where.with_namespace(namespace) if self.where else None,
1999
2006
  )
2000
2007
 
2001
- @cached_property
2008
+ @property
2002
2009
  def concepts(self) -> List[Concept]:
2003
2010
  return [c.concept for c in self.columns]
2004
2011
 
@@ -2059,6 +2066,7 @@ class Datasource(Namespaced, BaseModel):
2059
2066
  class UnnestJoin(BaseModel):
2060
2067
  concept: Concept
2061
2068
  alias: str = "unnest"
2069
+ rendering_required: bool = True
2062
2070
 
2063
2071
  def __hash__(self):
2064
2072
  return (self.alias + self.concept.address).__hash__()
@@ -2149,6 +2157,12 @@ class BaseJoin(BaseModel):
2149
2157
  )
2150
2158
 
2151
2159
  def __str__(self):
2160
+ if self.concept_pairs:
2161
+ return (
2162
+ f"{self.join_type.value} JOIN {self.left_datasource.identifier} and"
2163
+ f" {self.right_datasource.identifier} on"
2164
+ f" {','.join([str(k[0])+'='+str(k[1]) for k in self.concept_pairs])}"
2165
+ )
2152
2166
  return (
2153
2167
  f"{self.join_type.value} JOIN {self.left_datasource.identifier} and"
2154
2168
  f" {self.right_datasource.identifier} on"
@@ -2447,10 +2461,17 @@ class CTE(BaseModel):
2447
2461
  for join in self.joins
2448
2462
  if not isinstance(join, Join)
2449
2463
  or (
2450
- join.right_cte.name != removed_cte
2451
- and join.left_cte.name != removed_cte
2464
+ isinstance(join, Join)
2465
+ and (
2466
+ join.right_cte.name != removed_cte
2467
+ and join.left_cte.name != removed_cte
2468
+ )
2452
2469
  )
2453
2470
  ]
2471
+ for join in self.joins:
2472
+ if isinstance(join, UnnestJoin) and join.concept == concept:
2473
+ join.rendering_required = False
2474
+
2454
2475
  self.parent_ctes = [
2455
2476
  x for x in self.parent_ctes if x.name != removed_cte
2456
2477
  ]
@@ -2460,6 +2481,19 @@ class CTE(BaseModel):
2460
2481
  self.base_alias_override = candidates[0] if candidates else None
2461
2482
  return True
2462
2483
 
2484
+ @property
2485
+ def comment(self) -> str:
2486
+ base = f"Target: {str(self.grain)}."
2487
+ if self.parent_ctes:
2488
+ base += f" References: {', '.join([x.name for x in self.parent_ctes])}."
2489
+ if self.joins:
2490
+ base += f"\n-- Joins: {', '.join([str(x) for x in self.joins])}."
2491
+ if self.partial_concepts:
2492
+ base += (
2493
+ f"\n-- Partials: {', '.join([str(x) for x in self.partial_concepts])}."
2494
+ )
2495
+ return base
2496
+
2463
2497
  def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
2464
2498
  qds_being_inlined = parent.source
2465
2499
  ds_being_inlined = qds_being_inlined.datasources[0]
@@ -2550,6 +2584,10 @@ class CTE(BaseModel):
2550
2584
  self.hidden_concepts = unique(
2551
2585
  self.hidden_concepts + other.hidden_concepts, "address"
2552
2586
  )
2587
+ self.existence_source_map = {
2588
+ **self.existence_source_map,
2589
+ **other.existence_source_map,
2590
+ }
2553
2591
  return self
2554
2592
 
2555
2593
  @property
@@ -2741,6 +2779,12 @@ class Join(BaseModel):
2741
2779
  return self.left_name + self.right_name + self.jointype.value
2742
2780
 
2743
2781
  def __str__(self):
2782
+ if self.joinkey_pairs:
2783
+ return (
2784
+ f"{self.jointype.value} JOIN {self.left_name} and"
2785
+ f" {self.right_name} on"
2786
+ f" {','.join([str(k[0])+'='+str(k[1]) for k in self.joinkey_pairs])}"
2787
+ )
2744
2788
  return (
2745
2789
  f"{self.jointype.value} JOIN {self.left_name} and"
2746
2790
  f" {self.right_name} on {','.join([str(k) for k in self.joinkeys])}"
@@ -3002,6 +3046,7 @@ class Environment(BaseModel):
3002
3046
 
3003
3047
  materialized_concepts: List[Concept] = Field(default_factory=list)
3004
3048
  alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
3049
+ canonical_map: Dict[str, str] = Field(default_factory=dict)
3005
3050
  _parse_count: int = 0
3006
3051
 
3007
3052
  @classmethod
@@ -3050,7 +3095,7 @@ class Environment(BaseModel):
3050
3095
  if x.address not in current_mat
3051
3096
  ]
3052
3097
  if new:
3053
- logger.info(f"Environment added new materialized concepts {new}")
3098
+ logger.debug(f"Environment added new materialized concepts {new}")
3054
3099
 
3055
3100
  def validate_concept(self, lookup: str, meta: Meta | None = None):
3056
3101
  existing: Concept = self.concepts.get(lookup) # type: ignore
@@ -3213,13 +3258,22 @@ class Environment(BaseModel):
3213
3258
  self, source: Concept, target: Concept, modifiers: List[Modifier]
3214
3259
  ):
3215
3260
  replacements = {}
3261
+ # exit early if we've run this
3262
+ if source.address in self.alias_origin_lookup:
3263
+ if self.concepts[source.address] == target:
3264
+ return
3216
3265
  self.alias_origin_lookup[source.address] = source
3217
3266
  for k, v in self.concepts.items():
3267
+
3218
3268
  if v.address == target.address:
3219
3269
  v.pseudonyms[source.address] = source
3220
3270
  if v.address == source.address:
3221
3271
  replacements[k] = target
3272
+ self.canonical_map[k] = target.address
3222
3273
  v.pseudonyms[target.address] = target
3274
+ # we need to update keys and grains of all concepts
3275
+ else:
3276
+ replacements[k] = v.with_merge(source, target, modifiers)
3223
3277
  self.concepts.update(replacements)
3224
3278
 
3225
3279
  for k, ds in self.datasources.items():
@@ -42,19 +42,18 @@ class InlineDatasource(OptimizationRule):
42
42
  self.log(f"parent {parent_cte.name} datasource is not inlineable")
43
43
  continue
44
44
  root_outputs = {x.address for x in root.output_concepts}
45
- cte_outputs = {x.address for x in cte.output_columns}
46
- inherited = {x for x, v in cte.source_map.items() if v}
47
- # cte_inherited_outputs = {x.address for x in parent_cte.output_columns if parent_cte.source_map.get(x.address)}
48
- grain_components = {x.address for x in root.grain.components}
45
+ inherited = {
46
+ x for x, v in cte.source_map.items() if v and parent_cte.name in v
47
+ }
49
48
  if not inherited.issubset(root_outputs):
50
49
  cte_missing = inherited - root_outputs
51
50
  self.log(
52
51
  f"Not all {parent_cte.name} require inputs are found on datasource, missing {cte_missing}"
53
52
  )
54
53
  continue
55
- if not grain_components.issubset(cte_outputs):
56
- self.log("Not all datasource components in cte outputs, forcing group")
57
- force_group = True
54
+ if not root.grain.issubset(parent_cte.grain):
55
+ self.log(f"Not all {parent_cte.name} is at wrong grain to inline")
56
+ continue
58
57
  to_inline.append(parent_cte)
59
58
 
60
59
  optimized = False
@@ -68,6 +67,11 @@ class InlineDatasource(OptimizationRule):
68
67
  f"Skipping inlining raw datasource {replaceable.source.name} ({replaceable.name}) due to multiple references"
69
68
  )
70
69
  continue
70
+ if not replaceable.source.datasources[0].grain.issubset(replaceable.grain):
71
+ self.log(
72
+ f"Forcing group ({parent_cte.grain} being replaced by inlined source {root.grain})"
73
+ )
74
+ force_group = True
71
75
  result = cte.inline_parent_datasource(replaceable, force_group=force_group)
72
76
  if result:
73
77
  self.log(
@@ -612,9 +612,19 @@ def _search_concepts(
612
612
  )
613
613
 
614
614
  if expanded:
615
- expanded.resolve()
615
+ # we don't need to return the entire list; just the ones we needed pre-expansion
616
+ ex_resolve = expanded.resolve()
617
+ extra = [
618
+ x
619
+ for x in ex_resolve.output_concepts
620
+ if x.address not in [y.address for y in mandatory_list]
621
+ and x not in ex_resolve.grain.components
622
+ ]
623
+ expanded.output_concepts = mandatory_list
624
+ expanded.rebuild_cache()
625
+
616
626
  logger.info(
617
- f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in mandatory_list]} via concept addition;"
627
+ f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in mandatory_list]} via concept addition; removing extra {[c.address for c in extra]}"
618
628
  )
619
629
  return expanded
620
630
  # if we can't find it after expanding to a merge, then
@@ -209,6 +209,7 @@ def gen_enrichment_node(
209
209
  environment=environment,
210
210
  g=g,
211
211
  parents=[enrich_node, base_node],
212
+ force_group=False,
212
213
  node_joins=[
213
214
  NodeJoin(
214
215
  left_node=enrich_node,
@@ -105,10 +105,29 @@ def gen_filter_node(
105
105
  environment=environment,
106
106
  g=g,
107
107
  parents=core_parents,
108
+ grain=Grain(
109
+ components=[immediate_parent] + parent_row_concepts,
110
+ ),
111
+ )
112
+
113
+ assert filter_node.resolve().grain == Grain(
114
+ components=[immediate_parent] + parent_row_concepts,
108
115
  )
109
116
  if not local_optional or all(
110
117
  [x.address in [y.address for y in parent_row_concepts] for x in local_optional]
111
118
  ):
119
+ outputs = [
120
+ x
121
+ for x in filter_node.output_concepts
122
+ if x.address in [y.address for y in local_optional]
123
+ ]
124
+ logger.info(
125
+ f"{padding(depth)}{LOGGER_PREFIX} no extra enrichment needed for filter node"
126
+ )
127
+ filter_node.output_concepts = [
128
+ concept,
129
+ ] + outputs
130
+ filter_node.rebuild_cache()
112
131
  return filter_node
113
132
  enrich_node = source_concepts( # this fetches the parent + join keys
114
133
  # to then connect to the rest of the query
@@ -29,7 +29,7 @@ def gen_group_node(
29
29
  resolve_function_parent_concepts(concept), "address"
30
30
  )
31
31
  logger.info(
32
- f"{padding(depth)}{LOGGER_PREFIX} parent_concepts are {[x.address for x in parent_concepts]} from group grain {concept.grain}"
32
+ f"{padding(depth)}{LOGGER_PREFIX} parent concepts are {[x.address for x in parent_concepts]} from group grain {concept.grain}"
33
33
  )
34
34
 
35
35
  # if the aggregation has a grain, we need to ensure these are the ONLY optional in the output of the select
@@ -95,5 +95,4 @@ def gen_group_to_node(
95
95
  ],
96
96
  whole_grain=True,
97
97
  depth=depth,
98
- partial_concepts=group_node.partial_concepts,
99
98
  )
@@ -260,6 +260,7 @@ def subgraphs_to_merge_node(
260
260
  source_concepts,
261
261
  history,
262
262
  conditions,
263
+ enable_early_exit: bool = True,
263
264
  ):
264
265
  parents: List[StrategyNode] = []
265
266
  logger.info(
@@ -290,6 +291,8 @@ def subgraphs_to_merge_node(
290
291
  for x in parents:
291
292
  for y in x.output_concepts:
292
293
  input_c.append(y)
294
+ if len(parents) == 1 and enable_early_exit:
295
+ return parents[0]
293
296
 
294
297
  return MergeNode(
295
298
  input_concepts=unique(input_c, "address"),
@@ -350,6 +353,7 @@ def gen_merge_node(
350
353
  source_concepts=source_concepts,
351
354
  history=history,
352
355
  conditions=conditions,
356
+ enable_early_exit=False,
353
357
  )
354
358
  if test:
355
359
  return test
@@ -74,8 +74,9 @@ def gen_rowset_node(
74
74
  if existence_parents:
75
75
  node.parents += existence_parents
76
76
  # we don't need to join to any existence parents
77
- if isinstance(node, MergeNode):
78
- node.node_joins = []
77
+ # if isinstance(node, MergeNode) and node.node_joins is None:
78
+ # # set it explicitly to empty to avoid inference
79
+ # node.node_joins = []
79
80
  for parent in existence_parents:
80
81
  for x in parent.output_concepts:
81
82
  if x.address not in node.output_lcl:
@@ -192,7 +192,7 @@ class StrategyNode:
192
192
  p.resolve() for p in self.parents
193
193
  ]
194
194
 
195
- grain = Grain(components=self.output_concepts)
195
+ grain = self.grain if self.grain else Grain(components=self.output_concepts)
196
196
  source_map = resolve_concept_map(
197
197
  parent_sources,
198
198
  self.output_concepts,
@@ -63,4 +63,5 @@ class FilterNode(StrategyNode):
63
63
  conditions=self.conditions,
64
64
  partial_concepts=list(self.partial_concepts),
65
65
  force_group=self.force_group,
66
+ grain=self.grain,
66
67
  )
@@ -78,7 +78,7 @@ def deduplicate_nodes_and_joins(
78
78
  duplicates = False
79
79
  duplicates, merged, removed = deduplicate_nodes(merged, logging_prefix)
80
80
  # filter out any removed joins
81
- if joins:
81
+ if joins is not None:
82
82
  joins = [
83
83
  j
84
84
  for j in joins
@@ -138,6 +138,16 @@ class MergeNode(StrategyNode):
138
138
  continue
139
139
  final_joins.append(join)
140
140
  self.node_joins = final_joins
141
+ partial_lookup: list[Concept] = []
142
+ non_partial: List[Concept] = []
143
+ for node in parents or []:
144
+ partial_lookup += node.partial_concepts
145
+ non_partial += [
146
+ x for x in node.output_concepts if x not in node.partial_concepts
147
+ ]
148
+
149
+ final_partial = [x for x in partial_lookup if x not in non_partial]
150
+ self.partial_concepts = final_partial
141
151
 
142
152
  def translate_node_joins(self, node_joins: List[NodeJoin]) -> List[BaseJoin]:
143
153
  joins = []
@@ -219,12 +229,13 @@ class MergeNode(StrategyNode):
219
229
  )
220
230
  joins = self.translate_node_joins(final_joins)
221
231
  else:
232
+ logger.info(
233
+ f"{self.logging_prefix}{LOGGER_PREFIX} Final joins is not null {final_joins} but is empty, skipping join generation"
234
+ )
222
235
  return []
223
236
 
224
237
  for join in joins:
225
- logger.info(
226
- f"{self.logging_prefix}{LOGGER_PREFIX} final join {join.join_type} {[str(c) for c in join.concepts]}"
227
- )
238
+ logger.info(f"{self.logging_prefix}{LOGGER_PREFIX} final join {str(join)}")
228
239
  return joins
229
240
 
230
241
  def _resolve(self) -> QueryDatasource:
@@ -249,6 +260,12 @@ class MergeNode(StrategyNode):
249
260
  # early exit if we can just return the parent
250
261
  final_datasets: List[QueryDatasource | Datasource] = list(merged.values())
251
262
 
263
+ existence_final = [
264
+ x
265
+ for x in final_datasets
266
+ if all([y in self.existence_concepts for y in x.output_concepts])
267
+ ]
268
+
252
269
  if len(merged.keys()) == 1:
253
270
  final: QueryDatasource | Datasource = list(merged.values())[0]
254
271
  if (
@@ -288,34 +305,25 @@ class MergeNode(StrategyNode):
288
305
  for source in final_datasets:
289
306
  pregrain += source.grain
290
307
 
291
- grain = (
292
- self.grain
293
- if self.grain
294
- else Grain(
295
- components=[
296
- c
297
- for c in pregrain.components
298
- if c.address in [x.address for x in self.output_concepts]
299
- ]
300
- )
301
- )
308
+ grain = self.grain if self.grain else pregrain
302
309
 
303
310
  logger.info(
304
311
  f"{self.logging_prefix}{LOGGER_PREFIX} has pre grain {pregrain} and final merge node grain {grain}"
305
312
  )
306
-
307
- if len(final_datasets) > 1:
313
+ join_candidates = [x for x in final_datasets if x not in existence_final]
314
+ if len(join_candidates) > 1:
308
315
  joins = self.generate_joins(
309
- final_datasets, final_joins, pregrain, grain, self.environment
316
+ join_candidates, final_joins, pregrain, grain, self.environment
310
317
  )
311
318
  else:
312
319
  joins = []
313
-
320
+ logger.info(
321
+ f"{self.logging_prefix}{LOGGER_PREFIX} Final join count for CTE parent count {len(join_candidates)} is {len(joins)}"
322
+ )
314
323
  full_join_concepts = []
315
324
  for join in joins:
316
325
  if join.join_type == JoinType.FULL:
317
326
  full_join_concepts += join.concepts
318
-
319
327
  if self.whole_grain:
320
328
  force_group = False
321
329
  elif self.force_group is False:
@@ -337,9 +345,6 @@ class MergeNode(StrategyNode):
337
345
  inherited_inputs=self.input_concepts + self.existence_concepts,
338
346
  full_joins=full_join_concepts,
339
347
  )
340
- logger.info(
341
- f"{self.logging_prefix}{LOGGER_PREFIX} source_map {str(source_map)}"
342
- )
343
348
  qds = QueryDatasource(
344
349
  input_concepts=unique(self.input_concepts, "address"),
345
350
  output_concepts=unique(self.output_concepts, "address"),
@@ -183,49 +183,42 @@ def generate_cte_name(full_name: str, name_map: dict[str, str]) -> str:
183
183
  return full_name.replace("<", "").replace(">", "").replace(",", "_")
184
184
 
185
185
 
186
- def resolve_cte_base_name_and_alias(
186
+ def resolve_cte_base_name_and_alias_v2(
187
187
  name: str,
188
188
  source: QueryDatasource,
189
- parents: List[CTE],
190
- joins: List[Join | InstantiatedUnnestJoin],
189
+ source_map: Dict[str, list[str]],
190
+ raw_joins: List[Join | InstantiatedUnnestJoin],
191
191
  ) -> Tuple[str | None, str | None]:
192
-
193
- valid_joins: List[Join] = [join for join in joins if isinstance(join, Join)]
194
- relevant_parent_sources = set()
195
- for k, v in source.source_map.items():
196
- if v:
197
- relevant_parent_sources.update(v)
198
- eligible = [x for x in source.datasources if x in relevant_parent_sources]
192
+ joins: List[Join] = [join for join in raw_joins if isinstance(join, Join)]
199
193
  if (
200
- len(eligible) == 1
201
- and isinstance(eligible[0], Datasource)
202
- and not eligible[0].name == CONSTANT_DATASET
194
+ len(source.datasources) == 1
195
+ and isinstance(source.datasources[0], Datasource)
196
+ and not source.datasources[0].name == CONSTANT_DATASET
203
197
  ):
204
- ds = eligible[0]
198
+ ds = source.datasources[0]
205
199
  return ds.safe_location, ds.identifier
206
200
 
207
- # if we have multiple joined CTEs, pick the base
208
- # as the root
209
- elif len(eligible) == 1 and len(parents) == 1:
210
- return parents[0].name, parents[0].name
211
- elif valid_joins and len(valid_joins) > 0:
212
- candidates = [x.left_cte.name for x in valid_joins]
213
- disallowed = [x.right_cte.name for x in valid_joins]
201
+ if joins and len(joins) > 0:
202
+ candidates = [x.left_cte.name for x in joins]
203
+ disallowed = [x.right_cte.name for x in joins]
214
204
  try:
215
205
  cte = [y for y in candidates if y not in disallowed][0]
216
206
  return cte, cte
217
207
  except IndexError:
218
208
  raise SyntaxError(
219
- f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in parents]}"
209
+ f"Invalid join configuration {candidates} {disallowed} for {name}",
220
210
  )
221
- elif eligible:
222
- matched = [x for x in parents if x.source.name == eligible[0].name]
223
- if matched:
224
- return matched[0].name, matched[0].name
225
211
 
226
- logger.info(
227
- f"Could not determine CTE base name for {name} with relevant sources {relevant_parent_sources}"
228
- )
212
+ counts: dict[str, int] = defaultdict(lambda: 0)
213
+ output_addresses = [x.address for x in source.output_concepts]
214
+ for k, v in source_map.items():
215
+ for vx in v:
216
+ if k in output_addresses:
217
+ counts[vx] = counts[vx] + 1
218
+ else:
219
+ counts[vx] = counts[vx]
220
+ if counts:
221
+ return max(counts, key=counts.get), max(counts, key=counts.get) # type: ignore
229
222
  return None, None
230
223
 
231
224
 
@@ -274,8 +267,8 @@ def datasource_to_ctes(
274
267
  for x in [base_join_to_join(join, parents) for join in query_datasource.joins]
275
268
  if x
276
269
  ]
277
- base_name, base_alias = resolve_cte_base_name_and_alias(
278
- human_id, query_datasource, parents, final_joins
270
+ base_name, base_alias = resolve_cte_base_name_and_alias_v2(
271
+ human_id, query_datasource, source_map, final_joins
279
272
  )
280
273
  cte = CTE(
281
274
  name=human_id,
@@ -51,7 +51,7 @@ from trilogy.core.models import (
51
51
  MergeStatementV2,
52
52
  )
53
53
  from trilogy.core.query_processor import process_query, process_persist
54
- from trilogy.dialect.common import render_join
54
+ from trilogy.dialect.common import render_join, render_unnest
55
55
  from trilogy.hooks.base_hook import BaseHook
56
56
  from trilogy.core.enums import UnnestMode
57
57
 
@@ -128,6 +128,7 @@ FUNCTION_MAP = {
128
128
  FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
129
129
  FunctionType.ATTR_ACCESS: lambda x: f"""{x[0]}.{x[1].replace("'", "")}""",
130
130
  FunctionType.STRUCT: lambda x: f"{{{', '.join(struct_arg(x))}}}",
131
+ FunctionType.ARRAY: lambda x: f"[{', '.join(x)}]",
131
132
  # math
132
133
  FunctionType.ADD: lambda x: f"{x[0]} + {x[1]}",
133
134
  FunctionType.SUBTRACT: lambda x: f"{x[0]} - {x[1]}",
@@ -466,7 +467,7 @@ class BaseDialect:
466
467
  elif isinstance(e, MapWrapper):
467
468
  return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map)}:{self.render_expr(v, cte=cte, cte_map=cte_map)}' for k, v in e.items()])}}}"
468
469
  elif isinstance(e, list):
469
- return f"[{','.join([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}]"
470
+ return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}"
470
471
  elif isinstance(e, DataType):
471
472
  return str(e.value)
472
473
  elif isinstance(e, DatePart):
@@ -480,8 +481,12 @@ class BaseDialect:
480
481
  raise ValueError(f"Unable to render type {type(e)} {e}")
481
482
 
482
483
  def render_cte(self, cte: CTE):
483
- if self.UNNEST_MODE in (UnnestMode.CROSS_APPLY, UnnestMode.CROSS_JOIN):
484
- # for a cross apply, derviation happens in the join
484
+ if self.UNNEST_MODE in (
485
+ UnnestMode.CROSS_APPLY,
486
+ UnnestMode.CROSS_JOIN,
487
+ UnnestMode.CROSS_JOIN_ALIAS,
488
+ ):
489
+ # for a cross apply, derivation happens in the join
485
490
  # so we only use the alias to select
486
491
  select_columns = [
487
492
  self.render_concept_sql(c, cte)
@@ -499,20 +504,42 @@ class BaseDialect:
499
504
  for c in cte.output_columns
500
505
  if c.address not in [y.address for y in cte.hidden_concepts]
501
506
  ]
502
- if cte.quote_address:
503
- source = f"{self.QUOTE_CHARACTER}{cte.base_name}{self.QUOTE_CHARACTER}"
507
+ source: str | None = cte.base_name
508
+ if not cte.render_from_clause:
509
+ if len(cte.joins) > 0:
510
+ if cte.join_derived_concepts and self.UNNEST_MODE in (
511
+ UnnestMode.CROSS_JOIN_ALIAS,
512
+ UnnestMode.CROSS_JOIN,
513
+ UnnestMode.CROSS_APPLY,
514
+ ):
515
+ source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_concept_sql, cte)}"
516
+ # direct - eg DUCK DB - can be directly selected inline
517
+ elif cte.join_derived_concepts and self.UNNEST_MODE == UnnestMode.DIRECT:
518
+ source = None
519
+ else:
520
+ raise SyntaxError("CTE has joins but no from clause")
521
+ else:
522
+ source = None
523
+ else:
524
+ if cte.quote_address:
525
+ source = f"{self.QUOTE_CHARACTER}{cte.base_name}{self.QUOTE_CHARACTER}"
526
+ else:
527
+ source = cte.base_name
528
+ if cte.base_name != cte.base_alias:
529
+ source = f"{source} as {cte.base_alias}"
530
+ if not cte.render_from_clause:
531
+ final_joins = []
504
532
  else:
505
- source = cte.base_name
506
- if cte.base_name != cte.base_alias:
507
- source = f"{source} as {cte.base_alias}"
533
+ final_joins = cte.joins or []
508
534
  return CompiledCTE(
509
535
  name=cte.name,
510
536
  statement=self.SQL_TEMPLATE.render(
511
537
  select_columns=select_columns,
512
- base=(f"{source}" if cte.render_from_clause else None),
538
+ base=f"{source}" if source else None,
513
539
  grain=cte.grain,
514
540
  limit=cte.limit,
515
541
  # some joins may not need to be rendered
542
+ comment=cte.comment if CONFIG.show_comments else None,
516
543
  joins=[
517
544
  j
518
545
  for j in [
@@ -523,7 +550,7 @@ class BaseDialect:
523
550
  cte,
524
551
  self.UNNEST_MODE,
525
552
  )
526
- for join in (cte.joins or [])
553
+ for join in final_joins
527
554
  ]
528
555
  if j
529
556
  ],
@@ -9,6 +9,18 @@ def null_wrapper(lval: str, rval: str, concept: Concept) -> str:
9
9
  return f"{lval} = {rval}"
10
10
 
11
11
 
12
+ def render_unnest(
13
+ unnest_mode: UnnestMode,
14
+ quote_character: str,
15
+ concept: Concept,
16
+ render_func: Callable[[Concept, CTE, bool], str],
17
+ cte: CTE,
18
+ ):
19
+ if unnest_mode == UnnestMode.CROSS_JOIN:
20
+ return f"{render_func(concept, cte, False)} as {quote_character}{concept.safe_address}{quote_character}"
21
+ return f"{render_func(concept, cte, False)} as unnest_wrapper ({quote_character}{concept.safe_address}{quote_character})"
22
+
23
+
12
24
  def render_join(
13
25
  join: Join | InstantiatedUnnestJoin,
14
26
  quote_character: str,
@@ -25,10 +37,10 @@ def render_join(
25
37
  if not cte:
26
38
  raise ValueError("must provide a cte to build an unnest joins")
27
39
  if unnest_mode == UnnestMode.CROSS_JOIN:
28
- return f"CROSS JOIN {render_func(join.concept, cte, False)} as {quote_character}{join.concept.safe_address}{quote_character}"
40
+ return f"CROSS JOIN {render_unnest(unnest_mode, quote_character, join.concept, render_func, cte)}"
29
41
  if unnest_mode == UnnestMode.CROSS_JOIN_ALIAS:
30
- return f"CROSS JOIN {render_func(join.concept, cte, False)} as array_unnest ({quote_character}{join.concept.safe_address}{quote_character})"
31
- return f"FULL JOIN {render_func(join.concept, cte, False)} as unnest_wrapper({quote_character}{join.concept.safe_address}{quote_character})"
42
+ return f"CROSS JOIN {render_unnest(unnest_mode, quote_character, join.concept, render_func, cte)}"
43
+ return f"FULL JOIN {render_unnest(unnest_mode, quote_character, join.concept, render_func, cte)}"
32
44
  left_name = join.left_name
33
45
  right_name = join.right_name
34
46
  right_base = join.right_ref
@@ -47,8 +47,9 @@ CREATE OR REPLACE TABLE {{ output.address.location }} AS
47
47
  {% endif %}{%- if ctes %}
48
48
  WITH {% for cte in ctes %}
49
49
  {{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
50
- {% if full_select -%}{{full_select}}
51
- {% else -%}
50
+ {%- if full_select -%}{{full_select}}
51
+ {%- else -%}{%- if comment %}
52
+ -- {{ comment }}{% endif %}
52
53
  SELECT
53
54
  {%- for select in select_columns %}
54
55
  {{ select }}{% if not loop.last %},{% endif %}{% endfor %}
@@ -56,7 +57,8 @@ SELECT
56
57
  {{ base }}{% endif %}{% if joins %}
57
58
  {%- for join in joins %}
58
59
  {{ join }}{% endfor %}{% endif %}
59
- {% if where %}WHERE
60
+ {%- if where %}
61
+ WHERE
60
62
  {{ where }}
61
63
  {% endif -%}{%- if group_by %}
62
64
  GROUP BY {% for group in group_by %}
@@ -33,6 +33,7 @@ FUNCTION_MAP = {
33
33
  FunctionType.DATE_ADD: lambda x: f"DATE_ADD('{x[1]}', {x[2]}, {x[0]})",
34
34
  FunctionType.CURRENT_DATE: lambda x: "CURRENT_DATE",
35
35
  FunctionType.CURRENT_DATETIME: lambda x: "CURRENT_TIMESTAMP",
36
+ FunctionType.ARRAY: lambda x: f"ARRAY[{', '.join(x)}]",
36
37
  }
37
38
 
38
39
  FUNCTION_GRAIN_MATCH_MAP = {
@@ -86,7 +87,7 @@ class PrestoDialect(BaseDialect):
86
87
  QUOTE_CHARACTER = '"'
87
88
  SQL_TEMPLATE = SQL_TEMPLATE
88
89
  DATATYPE_MAP = {**BaseDialect.DATATYPE_MAP, DataType.NUMERIC: "DECIMAL"}
89
- UNNEST_MODE = UnnestMode.CROSS_JOIN
90
+ UNNEST_MODE = UnnestMode.CROSS_JOIN_ALIAS
90
91
 
91
92
 
92
93
  class TrinoDialect(PrestoDialect):
@@ -112,12 +112,16 @@ def filter_item_to_concept(
112
112
  return Concept(
113
113
  name=name,
114
114
  datatype=parent.content.datatype,
115
- purpose=parent.content.purpose,
115
+ purpose=Purpose.PROPERTY,
116
116
  lineage=parent,
117
117
  metadata=fmetadata,
118
118
  namespace=namespace,
119
119
  # filtered copies cannot inherit keys
120
- keys=None,
120
+ keys=(
121
+ parent.content.keys
122
+ if parent.content.purpose == Purpose.PROPERTY
123
+ else (parent.content,)
124
+ ),
121
125
  grain=(
122
126
  parent.content.grain
123
127
  if parent.content.purpose == Purpose.PROPERTY
File without changes
File without changes
File without changes
File without changes
File without changes