pytrilogy 0.0.2.7__py3-none-any.whl → 0.0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (34) hide show
  1. {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/RECORD +34 -34
  3. {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +1 -0
  6. trilogy/core/enums.py +1 -0
  7. trilogy/core/models.py +154 -56
  8. trilogy/core/optimization.py +44 -5
  9. trilogy/core/optimizations/inline_datasource.py +14 -8
  10. trilogy/core/optimizations/predicate_pushdown.py +73 -44
  11. trilogy/core/processing/concept_strategies_v3.py +69 -28
  12. trilogy/core/processing/node_generators/common.py +42 -16
  13. trilogy/core/processing/node_generators/filter_node.py +89 -48
  14. trilogy/core/processing/node_generators/group_node.py +3 -1
  15. trilogy/core/processing/node_generators/rowset_node.py +13 -54
  16. trilogy/core/processing/node_generators/select_node.py +10 -13
  17. trilogy/core/processing/node_generators/unnest_node.py +5 -3
  18. trilogy/core/processing/node_generators/window_node.py +23 -2
  19. trilogy/core/processing/nodes/__init__.py +34 -6
  20. trilogy/core/processing/nodes/base_node.py +67 -13
  21. trilogy/core/processing/nodes/filter_node.py +3 -0
  22. trilogy/core/processing/nodes/group_node.py +3 -0
  23. trilogy/core/processing/nodes/merge_node.py +1 -11
  24. trilogy/core/processing/nodes/select_node_v2.py +1 -0
  25. trilogy/core/processing/utility.py +29 -10
  26. trilogy/core/query_processor.py +47 -20
  27. trilogy/dialect/base.py +47 -14
  28. trilogy/dialect/common.py +15 -3
  29. trilogy/dialect/presto.py +2 -1
  30. trilogy/parsing/parse_engine.py +20 -1
  31. trilogy/parsing/trilogy.lark +3 -1
  32. {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/LICENSE.md +0 -0
  33. {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/entry_points.txt +0 -0
  34. {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ from .window_node import WindowNode
6
6
  from .base_node import StrategyNode, NodeJoin
7
7
  from .unnest_node import UnnestNode
8
8
  from pydantic import BaseModel, Field, ConfigDict
9
- from trilogy.core.models import Concept, Environment
9
+ from trilogy.core.models import Concept, Environment, WhereClause
10
10
 
11
11
 
12
12
  class History(BaseModel):
@@ -15,23 +15,42 @@ class History(BaseModel):
15
15
  started: set[str] = Field(default_factory=set)
16
16
  model_config = ConfigDict(arbitrary_types_allowed=True)
17
17
 
18
- def _concepts_to_lookup(self, search: list[Concept], accept_partial: bool) -> str:
18
+ def _concepts_to_lookup(
19
+ self,
20
+ search: list[Concept],
21
+ accept_partial: bool,
22
+ conditions: WhereClause | None = None,
23
+ ) -> str:
24
+ if conditions:
25
+ return (
26
+ "-".join([c.address for c in search])
27
+ + str(accept_partial)
28
+ + str(conditions)
29
+ )
19
30
  return "-".join([c.address for c in search]) + str(accept_partial)
20
31
 
21
32
  def search_to_history(
22
- self, search: list[Concept], accept_partial: bool, output: StrategyNode | None
33
+ self,
34
+ search: list[Concept],
35
+ accept_partial: bool,
36
+ output: StrategyNode | None,
37
+ conditions: WhereClause | None = None,
23
38
  ):
24
- self.history[self._concepts_to_lookup(search, accept_partial)] = output
39
+ self.history[
40
+ self._concepts_to_lookup(search, accept_partial, conditions=conditions)
41
+ ] = output
25
42
 
26
43
  def get_history(
27
44
  self,
28
45
  search: list[Concept],
46
+ conditions: WhereClause | None = None,
29
47
  accept_partial: bool = False,
30
48
  parent_key: str = "",
31
49
  ) -> StrategyNode | None | bool:
32
50
  key = self._concepts_to_lookup(
33
51
  search,
34
52
  accept_partial,
53
+ conditions,
35
54
  )
36
55
  if parent_key and parent_key == key:
37
56
  raise ValueError(
@@ -48,11 +67,13 @@ class History(BaseModel):
48
67
  self,
49
68
  search: list[Concept],
50
69
  accept_partial: bool = False,
70
+ conditions: WhereClause | None = None,
51
71
  ):
52
72
  self.started.add(
53
73
  self._concepts_to_lookup(
54
74
  search,
55
- accept_partial,
75
+ accept_partial=accept_partial,
76
+ conditions=conditions,
56
77
  )
57
78
  )
58
79
 
@@ -60,11 +81,13 @@ class History(BaseModel):
60
81
  self,
61
82
  search: list[Concept],
62
83
  accept_partial: bool = False,
84
+ conditions: WhereClause | None = None,
63
85
  ):
64
86
  return (
65
87
  self._concepts_to_lookup(
66
88
  search,
67
89
  accept_partial,
90
+ conditions=conditions,
68
91
  )
69
92
  in self.started
70
93
  )
@@ -76,6 +99,7 @@ class History(BaseModel):
76
99
  accept_partial: bool,
77
100
  fail_if_not_found: bool,
78
101
  accept_partial_optional: bool,
102
+ conditions: WhereClause | None = None,
79
103
  ) -> str:
80
104
  return (
81
105
  str(main.address)
@@ -84,6 +108,7 @@ class History(BaseModel):
84
108
  + str(accept_partial)
85
109
  + str(fail_if_not_found)
86
110
  + str(accept_partial_optional)
111
+ + str(conditions)
87
112
  )
88
113
 
89
114
  def gen_select_node(
@@ -97,6 +122,7 @@ class History(BaseModel):
97
122
  fail_if_not_found: bool = False,
98
123
  accept_partial: bool = False,
99
124
  accept_partial_optional: bool = False,
125
+ conditions: WhereClause | None = None,
100
126
  ) -> StrategyNode | None:
101
127
  from trilogy.core.processing.node_generators.select_node import gen_select_node
102
128
 
@@ -105,7 +131,8 @@ class History(BaseModel):
105
131
  local_optional,
106
132
  accept_partial,
107
133
  fail_if_not_found,
108
- accept_partial_optional,
134
+ accept_partial_optional=accept_partial_optional,
135
+ conditions=conditions,
109
136
  )
110
137
  if fingerprint in self.select_history:
111
138
  return self.select_history[fingerprint]
@@ -119,6 +146,7 @@ class History(BaseModel):
119
146
  accept_partial=accept_partial,
120
147
  accept_partial_optional=accept_partial_optional,
121
148
  source_concepts=source_concepts,
149
+ conditions=conditions,
122
150
  )
123
151
  self.select_history[fingerprint] = gen
124
152
  return gen
@@ -17,6 +17,7 @@ from trilogy.core.models import (
17
17
  from trilogy.core.enums import Purpose, JoinType, PurposeLineage, Granularity
18
18
  from trilogy.utility import unique
19
19
  from dataclasses import dataclass
20
+ from trilogy.core.enums import BooleanOperator
20
21
 
21
22
 
22
23
  def concept_list_to_grain(
@@ -71,7 +72,6 @@ def resolve_concept_map(
71
72
  concept_map[concept.address].add(input)
72
73
  elif concept.address not in concept_map:
73
74
  concept_map[concept.address].add(input)
74
-
75
75
  # second loop, include partials
76
76
  for input in inputs:
77
77
  for concept in input.output_concepts:
@@ -92,14 +92,31 @@ def resolve_concept_map(
92
92
  return concept_map
93
93
 
94
94
 
95
- def get_all_parent_partial(all_concepts: List[Concept], parents: List["StrategyNode"]):
96
- return [
97
- c
98
- for c in all_concepts
99
- if len([c.address in [x.address for x in p.partial_concepts] for p in parents])
100
- >= 1
101
- and all([c.address in [x.address for x in p.partial_concepts] for p in parents])
102
- ]
95
+ def get_all_parent_partial(
96
+ all_concepts: List[Concept], parents: List["StrategyNode"]
97
+ ) -> List[Concept]:
98
+ return unique(
99
+ [
100
+ c
101
+ for c in all_concepts
102
+ if len(
103
+ [
104
+ p
105
+ for p in parents
106
+ if c.address in [x.address for x in p.partial_concepts]
107
+ ]
108
+ )
109
+ >= 1
110
+ and all(
111
+ [
112
+ c.address in p.partial_lcl
113
+ for p in parents
114
+ if c.address in p.output_lcl
115
+ ]
116
+ )
117
+ ],
118
+ "address",
119
+ )
103
120
 
104
121
 
105
122
  class StrategyNode:
@@ -137,7 +154,7 @@ class StrategyNode:
137
154
  self.partial_concepts = partial_concepts or get_all_parent_partial(
138
155
  self.output_concepts, self.parents
139
156
  )
140
- self.partial_lcl = LooseConceptList(concepts=self.partial_concepts)
157
+
141
158
  self.depth = depth
142
159
  self.conditions = conditions
143
160
  self.grain = grain
@@ -146,13 +163,50 @@ class StrategyNode:
146
163
  self.hidden_concepts = hidden_concepts or []
147
164
  self.existence_concepts = existence_concepts or []
148
165
  self.virtual_output_concepts = virtual_output_concepts or []
166
+ self.validate_parents()
167
+
168
+ def add_parents(self, parents: list["StrategyNode"]):
169
+ self.parents += parents
170
+ self.validate_parents()
171
+
172
+ def add_condition(self, condition: Conditional | Comparison | Parenthetical):
173
+ if self.conditions:
174
+ self.conditions = Conditional(
175
+ left=self.conditions, right=condition, operator=BooleanOperator.AND
176
+ )
177
+ else:
178
+ self.conditions = condition
179
+
180
+ def validate_parents(self):
181
+ # validate parents exist
182
+ # assign partial values where needed
149
183
  for parent in self.parents:
150
184
  if not parent:
151
185
  raise SyntaxError("Unresolvable parent")
152
186
 
187
+ # TODO: make this accurate
188
+ if self.parents:
189
+ self.partial_concepts = get_all_parent_partial(
190
+ self.output_concepts, self.parents
191
+ )
192
+
193
+ self.partial_lcl = LooseConceptList(concepts=self.partial_concepts)
194
+
153
195
  def add_output_concepts(self, concepts: List[Concept]):
154
196
  for concept in concepts:
155
- self.output_concepts.append(concept)
197
+ if concept.address not in self.output_lcl.addresses:
198
+ self.output_concepts.append(concept)
199
+ self.output_lcl = LooseConceptList(concepts=self.output_concepts)
200
+ self.rebuild_cache()
201
+
202
+ def add_existence_concepts(self, concepts: List[Concept]):
203
+ for concept in concepts:
204
+ if concept.address not in [x.address for x in self.output_concepts]:
205
+ self.existence_concepts.append(concept)
206
+ self.rebuild_cache()
207
+
208
+ def set_output_concepts(self, concepts: List[Concept]):
209
+ self.output_concepts = concepts
156
210
  self.output_lcl = LooseConceptList(concepts=self.output_concepts)
157
211
  self.rebuild_cache()
158
212
 
@@ -195,8 +249,8 @@ class StrategyNode:
195
249
  grain = self.grain if self.grain else Grain(components=self.output_concepts)
196
250
  source_map = resolve_concept_map(
197
251
  parent_sources,
198
- self.output_concepts,
199
- self.input_concepts + self.existence_concepts,
252
+ targets=self.output_concepts,
253
+ inherited_inputs=self.input_concepts + self.existence_concepts,
200
254
  )
201
255
  return QueryDatasource(
202
256
  input_concepts=self.input_concepts,
@@ -36,6 +36,7 @@ class FilterNode(StrategyNode):
36
36
  partial_concepts: List[Concept] | None = None,
37
37
  force_group: bool | None = False,
38
38
  grain: Grain | None = None,
39
+ existence_concepts: List[Concept] | None = None,
39
40
  ):
40
41
  super().__init__(
41
42
  output_concepts=output_concepts,
@@ -49,6 +50,7 @@ class FilterNode(StrategyNode):
49
50
  partial_concepts=partial_concepts,
50
51
  force_group=force_group,
51
52
  grain=grain,
53
+ existence_concepts=existence_concepts,
52
54
  )
53
55
 
54
56
  def copy(self) -> "FilterNode":
@@ -64,4 +66,5 @@ class FilterNode(StrategyNode):
64
66
  partial_concepts=list(self.partial_concepts),
65
67
  force_group=self.force_group,
66
68
  grain=self.grain,
69
+ existence_concepts=list(self.existence_concepts),
67
70
  )
@@ -39,6 +39,7 @@ class GroupNode(StrategyNode):
39
39
  partial_concepts: Optional[List[Concept]] = None,
40
40
  force_group: bool | None = None,
41
41
  conditions: Conditional | Comparison | Parenthetical | None = None,
42
+ existence_concepts: List[Concept] | None = None,
42
43
  ):
43
44
  super().__init__(
44
45
  input_concepts=input_concepts,
@@ -51,6 +52,7 @@ class GroupNode(StrategyNode):
51
52
  partial_concepts=partial_concepts,
52
53
  force_group=force_group,
53
54
  conditions=conditions,
55
+ existence_concepts=existence_concepts,
54
56
  )
55
57
 
56
58
  def _resolve(self) -> QueryDatasource:
@@ -173,4 +175,5 @@ class GroupNode(StrategyNode):
173
175
  partial_concepts=list(self.partial_concepts),
174
176
  force_group=self.force_group,
175
177
  conditions=self.conditions,
178
+ existence_concepts=list(self.existence_concepts),
176
179
  )
@@ -138,16 +138,6 @@ class MergeNode(StrategyNode):
138
138
  continue
139
139
  final_joins.append(join)
140
140
  self.node_joins = final_joins
141
- partial_lookup: list[Concept] = []
142
- non_partial: List[Concept] = []
143
- for node in parents or []:
144
- partial_lookup += node.partial_concepts
145
- non_partial += [
146
- x for x in node.output_concepts if x not in node.partial_concepts
147
- ]
148
-
149
- final_partial = [x for x in partial_lookup if x not in non_partial]
150
- self.partial_concepts = final_partial
151
141
 
152
142
  def translate_node_joins(self, node_joins: List[NodeJoin]) -> List[BaseJoin]:
153
143
  joins = []
@@ -265,7 +255,6 @@ class MergeNode(StrategyNode):
265
255
  for x in final_datasets
266
256
  if all([y in self.existence_concepts for y in x.output_concepts])
267
257
  ]
268
-
269
258
  if len(merged.keys()) == 1:
270
259
  final: QueryDatasource | Datasource = list(merged.values())[0]
271
260
  if (
@@ -378,4 +367,5 @@ class MergeNode(StrategyNode):
378
367
  node_joins=self.node_joins,
379
368
  join_concepts=list(self.join_concepts) if self.join_concepts else None,
380
369
  force_join_type=self.force_join_type,
370
+ existence_concepts=list(self.existence_concepts),
381
371
  )
@@ -166,6 +166,7 @@ class SelectNode(StrategyNode):
166
166
  resolution = self.resolve_from_provided_datasource()
167
167
  if resolution:
168
168
  return resolution
169
+
169
170
  required = [c.address for c in self.all_concepts]
170
171
  raise NoDatasourceException(
171
172
  f"Could not find any way to resolve datasources for required concepts {required} with derivation {[x.derivation for x in self.all_concepts]}"
@@ -20,7 +20,7 @@ from trilogy.core.models import (
20
20
  DataType,
21
21
  )
22
22
 
23
- from trilogy.core.enums import Purpose, Granularity
23
+ from trilogy.core.enums import Purpose, Granularity, BooleanOperator
24
24
  from trilogy.core.constants import CONSTANT_DATASET
25
25
  from enum import Enum
26
26
  from trilogy.utility import unique
@@ -62,7 +62,7 @@ def create_log_lambda(prefix: str, depth: int, logger: Logger):
62
62
  pad = padding(depth)
63
63
 
64
64
  def log_lambda(msg: str):
65
- logger.info(f"{pad} {prefix} {msg}")
65
+ logger.info(f"{pad}{prefix} {msg}")
66
66
 
67
67
  return log_lambda
68
68
 
@@ -328,14 +328,6 @@ def get_disconnected_components(
328
328
  for datasource, concepts in concept_map.items():
329
329
  graph.add_node(datasource, type=NodeType.NODE)
330
330
  for concept in concepts:
331
- # TODO: determine if this is the right way to handle things
332
- # if concept.derivation in (PurposeLineage.FILTER, PurposeLineage.WINDOW):
333
- # if isinstance(concept.lineage, FilterItem):
334
- # graph.add_node(concept.lineage.content.address, type=NodeType.CONCEPT)
335
- # graph.add_edge(datasource, concept.lineage.content.address)
336
- # if isinstance(concept.lineage, WindowItem):
337
- # graph.add_node(concept.lineage.content.address, type=NodeType.CONCEPT)
338
- # graph.add_edge(datasource, concept.lineage.content.address)
339
331
  graph.add_node(concept.address, type=NodeType.CONCEPT)
340
332
  graph.add_edge(datasource, concept.address)
341
333
  all_concepts.add(concept)
@@ -378,3 +370,30 @@ def is_scalar_condition(
378
370
  elif isinstance(element, Conditional):
379
371
  return is_scalar_condition(element.left) and is_scalar_condition(element.right)
380
372
  return True
373
+
374
+
375
+ def decompose_condition(
376
+ conditional: Conditional,
377
+ ) -> list[SubselectComparison | Comparison | Conditional | Parenthetical]:
378
+ chunks: list[SubselectComparison | Comparison | Conditional | Parenthetical] = []
379
+ if conditional.operator == BooleanOperator.AND:
380
+ if not (
381
+ isinstance(
382
+ conditional.left,
383
+ (SubselectComparison, Comparison, Conditional, Parenthetical),
384
+ )
385
+ and isinstance(
386
+ conditional.right,
387
+ (SubselectComparison, Comparison, Conditional, Parenthetical),
388
+ )
389
+ ):
390
+ chunks.append(conditional)
391
+ else:
392
+ for val in [conditional.left, conditional.right]:
393
+ if isinstance(val, Conditional):
394
+ chunks.extend(decompose_condition(val))
395
+ else:
396
+ chunks.append(val)
397
+ else:
398
+ chunks.append(conditional)
399
+ return chunks
@@ -4,9 +4,9 @@ from trilogy.core.env_processor import generate_graph
4
4
  from trilogy.core.graph_models import ReferenceGraph
5
5
  from trilogy.core.constants import CONSTANT_DATASET
6
6
  from trilogy.core.processing.concept_strategies_v3 import source_query_concepts
7
- from trilogy.core.enums import SelectFiltering
7
+ from trilogy.core.enums import SelectFiltering, BooleanOperator
8
8
  from trilogy.constants import CONFIG, DEFAULT_NAMESPACE
9
- from trilogy.core.processing.nodes import GroupNode, SelectNode, StrategyNode
9
+ from trilogy.core.processing.nodes import GroupNode, SelectNode, StrategyNode, History
10
10
  from trilogy.core.models import (
11
11
  Concept,
12
12
  Environment,
@@ -24,6 +24,7 @@ from trilogy.core.models import (
24
24
  Datasource,
25
25
  BaseJoin,
26
26
  InstantiatedUnnestJoin,
27
+ Conditional,
27
28
  )
28
29
 
29
30
  from trilogy.utility import unique
@@ -307,7 +308,10 @@ def datasource_to_ctes(
307
308
 
308
309
 
309
310
  def append_existence_check(
310
- node: StrategyNode, environment: Environment, graph: ReferenceGraph
311
+ node: StrategyNode,
312
+ environment: Environment,
313
+ graph: ReferenceGraph,
314
+ history: History | None = None,
311
315
  ):
312
316
  # we if we have a where clause doing an existence check
313
317
  # treat that as separate subquery
@@ -318,25 +322,22 @@ def append_existence_check(
318
322
  logger.info(
319
323
  f"{LOGGER_PREFIX} fetching existance clause inputs {[str(c) for c in subselect]}"
320
324
  )
321
- eds = source_query_concepts([*subselect], environment=environment, g=graph)
322
-
323
- final_eds = eds.resolve()
324
- first_parent = node.resolve()
325
- first_parent.datasources.append(final_eds)
326
- for x in final_eds.output_concepts:
327
- if x.address not in first_parent.existence_source_map:
328
- first_parent.existence_source_map[x.address] = {final_eds}
325
+ eds = source_query_concepts(
326
+ [*subselect], environment=environment, g=graph, history=history
327
+ )
328
+ node.add_parents([eds])
329
+ node.add_existence_concepts([*subselect])
329
330
 
330
331
 
331
- def get_query_datasources(
332
+ def get_query_node(
332
333
  environment: Environment,
333
334
  statement: SelectStatement | MultiSelectStatement,
334
335
  graph: Optional[ReferenceGraph] = None,
335
- hooks: Optional[List[BaseHook]] = None,
336
- ) -> QueryDatasource:
336
+ history: History | None = None,
337
+ ) -> StrategyNode:
337
338
  graph = graph or generate_graph(environment)
338
339
  logger.info(
339
- f"{LOGGER_PREFIX} getting source datasource for query with output {[str(c) for c in statement.output_components]}"
340
+ f"{LOGGER_PREFIX} getting source datasource for query with filtering {statement.where_clause_category} and output {[str(c) for c in statement.output_components]}"
340
341
  )
341
342
  if not statement.output_components:
342
343
  raise ValueError(f"Statement has no output components {statement}")
@@ -353,22 +354,28 @@ def get_query_datasources(
353
354
  )
354
355
  nest_where = True
355
356
 
356
- ods = source_query_concepts(
357
+ ods: StrategyNode = source_query_concepts(
357
358
  search_concepts,
358
359
  environment=environment,
359
360
  g=graph,
361
+ conditions=(statement.where_clause if statement.where_clause else None),
362
+ history=history,
360
363
  )
361
- ds: GroupNode | SelectNode
364
+ if not ods:
365
+ raise ValueError(
366
+ f"Could not find source query concepts for {[x.address for x in search_concepts]}"
367
+ )
368
+ ds: StrategyNode
362
369
  if nest_where and statement.where_clause:
363
370
  if not all_aggregate:
364
371
  ods.conditions = statement.where_clause.conditional
365
- ods.output_concepts = search_concepts
372
+ ods.output_concepts = statement.output_components
366
373
  # ods.hidden_concepts = where_delta
367
374
  ods.rebuild_cache()
368
- append_existence_check(ods, environment, graph)
375
+ append_existence_check(ods, environment, graph, history)
369
376
  ds = GroupNode(
370
377
  output_concepts=statement.output_components,
371
- input_concepts=search_concepts,
378
+ input_concepts=statement.output_components,
372
379
  parents=[ods],
373
380
  environment=ods.environment,
374
381
  g=ods.g,
@@ -390,7 +397,26 @@ def get_query_datasources(
390
397
 
391
398
  else:
392
399
  ds = ods
400
+ if statement.having_clause:
401
+ if ds.conditions:
402
+ ds.conditions = Conditional(
403
+ left=ds.conditions,
404
+ right=statement.having_clause.conditional,
405
+ operator=BooleanOperator.AND,
406
+ )
407
+ else:
408
+ ds.conditions = statement.having_clause.conditional
409
+ return ds
410
+
411
+
412
+ def get_query_datasources(
413
+ environment: Environment,
414
+ statement: SelectStatement | MultiSelectStatement,
415
+ graph: Optional[ReferenceGraph] = None,
416
+ hooks: Optional[List[BaseHook]] = None,
417
+ ) -> QueryDatasource:
393
418
 
419
+ ds = get_query_node(environment, statement, graph)
394
420
  final_qds = ds.resolve()
395
421
  if hooks:
396
422
  for hook in hooks:
@@ -475,6 +501,7 @@ def process_query(
475
501
  grain=statement.grain,
476
502
  limit=statement.limit,
477
503
  where_clause=statement.where_clause,
504
+ having_clause=statement.having_clause,
478
505
  output_columns=statement.output_components,
479
506
  ctes=final_ctes,
480
507
  base=root_cte,
trilogy/dialect/base.py CHANGED
@@ -51,7 +51,7 @@ from trilogy.core.models import (
51
51
  MergeStatementV2,
52
52
  )
53
53
  from trilogy.core.query_processor import process_query, process_persist
54
- from trilogy.dialect.common import render_join
54
+ from trilogy.dialect.common import render_join, render_unnest
55
55
  from trilogy.hooks.base_hook import BaseHook
56
56
  from trilogy.core.enums import UnnestMode
57
57
 
@@ -128,6 +128,7 @@ FUNCTION_MAP = {
128
128
  FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
129
129
  FunctionType.ATTR_ACCESS: lambda x: f"""{x[0]}.{x[1].replace("'", "")}""",
130
130
  FunctionType.STRUCT: lambda x: f"{{{', '.join(struct_arg(x))}}}",
131
+ FunctionType.ARRAY: lambda x: f"[{', '.join(x)}]",
131
132
  # math
132
133
  FunctionType.ADD: lambda x: f"{x[0]} + {x[1]}",
133
134
  FunctionType.SUBTRACT: lambda x: f"{x[0]} - {x[1]}",
@@ -385,14 +386,19 @@ class BaseDialect:
385
386
  e.right.address,
386
387
  [
387
388
  INVALID_REFERENCE_STRING(
388
- f"Missing source reference to {e.right.name}"
389
+ f"Missing source reference to {e.right.address}"
389
390
  )
390
391
  ],
391
392
  )
392
393
  else:
393
394
  lookup = lookup_cte.existence_source_map[e.right.address]
394
-
395
- return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {lookup[0]}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {lookup[0]} where {lookup[0]}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
395
+ if len(lookup) > 0:
396
+ target = lookup[0]
397
+ else:
398
+ target = INVALID_REFERENCE_STRING(
399
+ f"Missing source CTE for {e.right.address}"
400
+ )
401
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
396
402
  elif isinstance(e.right, (ListWrapper, Parenthetical, list)):
397
403
  return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
398
404
 
@@ -466,7 +472,7 @@ class BaseDialect:
466
472
  elif isinstance(e, MapWrapper):
467
473
  return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map)}:{self.render_expr(v, cte=cte, cte_map=cte_map)}' for k, v in e.items()])}}}"
468
474
  elif isinstance(e, list):
469
- return f"[{','.join([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}]"
475
+ return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}"
470
476
  elif isinstance(e, DataType):
471
477
  return str(e.value)
472
478
  elif isinstance(e, DatePart):
@@ -480,8 +486,12 @@ class BaseDialect:
480
486
  raise ValueError(f"Unable to render type {type(e)} {e}")
481
487
 
482
488
  def render_cte(self, cte: CTE):
483
- if self.UNNEST_MODE in (UnnestMode.CROSS_APPLY, UnnestMode.CROSS_JOIN):
484
- # for a cross apply, derviation happens in the join
489
+ if self.UNNEST_MODE in (
490
+ UnnestMode.CROSS_APPLY,
491
+ UnnestMode.CROSS_JOIN,
492
+ UnnestMode.CROSS_JOIN_ALIAS,
493
+ ):
494
+ # for a cross apply, derivation happens in the join
485
495
  # so we only use the alias to select
486
496
  select_columns = [
487
497
  self.render_concept_sql(c, cte)
@@ -499,17 +509,40 @@ class BaseDialect:
499
509
  for c in cte.output_columns
500
510
  if c.address not in [y.address for y in cte.hidden_concepts]
501
511
  ]
502
- if cte.quote_address:
503
- source = f"{self.QUOTE_CHARACTER}{cte.base_name}{self.QUOTE_CHARACTER}"
512
+ source: str | None = cte.base_name
513
+ if not cte.render_from_clause:
514
+ if len(cte.joins) > 0:
515
+ if cte.join_derived_concepts and self.UNNEST_MODE in (
516
+ UnnestMode.CROSS_JOIN_ALIAS,
517
+ UnnestMode.CROSS_JOIN,
518
+ UnnestMode.CROSS_APPLY,
519
+ ):
520
+ source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_concept_sql, cte)}"
521
+ # direct - eg DUCK DB - can be directly selected inline
522
+ elif (
523
+ cte.join_derived_concepts and self.UNNEST_MODE == UnnestMode.DIRECT
524
+ ):
525
+ source = None
526
+ else:
527
+ raise SyntaxError("CTE has joins but no from clause")
528
+ else:
529
+ source = None
530
+ else:
531
+ if cte.quote_address:
532
+ source = f"{self.QUOTE_CHARACTER}{cte.base_name}{self.QUOTE_CHARACTER}"
533
+ else:
534
+ source = cte.base_name
535
+ if cte.base_name != cte.base_alias:
536
+ source = f"{source} as {cte.base_alias}"
537
+ if not cte.render_from_clause:
538
+ final_joins = []
504
539
  else:
505
- source = cte.base_name
506
- if cte.base_name != cte.base_alias:
507
- source = f"{source} as {cte.base_alias}"
540
+ final_joins = cte.joins or []
508
541
  return CompiledCTE(
509
542
  name=cte.name,
510
543
  statement=self.SQL_TEMPLATE.render(
511
544
  select_columns=select_columns,
512
- base=(f"{source}" if cte.render_from_clause else None),
545
+ base=f"{source}" if source else None,
513
546
  grain=cte.grain,
514
547
  limit=cte.limit,
515
548
  # some joins may not need to be rendered
@@ -524,7 +557,7 @@ class BaseDialect:
524
557
  cte,
525
558
  self.UNNEST_MODE,
526
559
  )
527
- for join in (cte.joins or [])
560
+ for join in final_joins
528
561
  ]
529
562
  if j
530
563
  ],