pytrilogy 0.0.2.8__py3-none-any.whl → 0.0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (32) hide show
  1. {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/RECORD +32 -32
  3. {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +1 -0
  6. trilogy/core/models.py +161 -59
  7. trilogy/core/optimization.py +44 -5
  8. trilogy/core/optimizations/inline_datasource.py +14 -8
  9. trilogy/core/optimizations/predicate_pushdown.py +73 -44
  10. trilogy/core/processing/concept_strategies_v3.py +69 -28
  11. trilogy/core/processing/node_generators/common.py +42 -16
  12. trilogy/core/processing/node_generators/filter_node.py +94 -48
  13. trilogy/core/processing/node_generators/group_node.py +3 -1
  14. trilogy/core/processing/node_generators/rowset_node.py +13 -54
  15. trilogy/core/processing/node_generators/select_node.py +10 -13
  16. trilogy/core/processing/node_generators/unnest_node.py +5 -3
  17. trilogy/core/processing/node_generators/window_node.py +23 -2
  18. trilogy/core/processing/nodes/__init__.py +34 -6
  19. trilogy/core/processing/nodes/base_node.py +67 -13
  20. trilogy/core/processing/nodes/filter_node.py +3 -0
  21. trilogy/core/processing/nodes/group_node.py +4 -5
  22. trilogy/core/processing/nodes/merge_node.py +1 -11
  23. trilogy/core/processing/nodes/select_node_v2.py +1 -0
  24. trilogy/core/processing/utility.py +46 -14
  25. trilogy/core/query_processor.py +48 -21
  26. trilogy/dialect/base.py +28 -15
  27. trilogy/dialect/duckdb.py +1 -1
  28. trilogy/parsing/parse_engine.py +39 -2
  29. trilogy/parsing/trilogy.lark +3 -1
  30. {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/LICENSE.md +0 -0
  31. {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/entry_points.txt +0 -0
  32. {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ from .window_node import WindowNode
6
6
  from .base_node import StrategyNode, NodeJoin
7
7
  from .unnest_node import UnnestNode
8
8
  from pydantic import BaseModel, Field, ConfigDict
9
- from trilogy.core.models import Concept, Environment
9
+ from trilogy.core.models import Concept, Environment, WhereClause
10
10
 
11
11
 
12
12
  class History(BaseModel):
@@ -15,23 +15,42 @@ class History(BaseModel):
15
15
  started: set[str] = Field(default_factory=set)
16
16
  model_config = ConfigDict(arbitrary_types_allowed=True)
17
17
 
18
- def _concepts_to_lookup(self, search: list[Concept], accept_partial: bool) -> str:
18
+ def _concepts_to_lookup(
19
+ self,
20
+ search: list[Concept],
21
+ accept_partial: bool,
22
+ conditions: WhereClause | None = None,
23
+ ) -> str:
24
+ if conditions:
25
+ return (
26
+ "-".join([c.address for c in search])
27
+ + str(accept_partial)
28
+ + str(conditions)
29
+ )
19
30
  return "-".join([c.address for c in search]) + str(accept_partial)
20
31
 
21
32
  def search_to_history(
22
- self, search: list[Concept], accept_partial: bool, output: StrategyNode | None
33
+ self,
34
+ search: list[Concept],
35
+ accept_partial: bool,
36
+ output: StrategyNode | None,
37
+ conditions: WhereClause | None = None,
23
38
  ):
24
- self.history[self._concepts_to_lookup(search, accept_partial)] = output
39
+ self.history[
40
+ self._concepts_to_lookup(search, accept_partial, conditions=conditions)
41
+ ] = output
25
42
 
26
43
  def get_history(
27
44
  self,
28
45
  search: list[Concept],
46
+ conditions: WhereClause | None = None,
29
47
  accept_partial: bool = False,
30
48
  parent_key: str = "",
31
49
  ) -> StrategyNode | None | bool:
32
50
  key = self._concepts_to_lookup(
33
51
  search,
34
52
  accept_partial,
53
+ conditions,
35
54
  )
36
55
  if parent_key and parent_key == key:
37
56
  raise ValueError(
@@ -48,11 +67,13 @@ class History(BaseModel):
48
67
  self,
49
68
  search: list[Concept],
50
69
  accept_partial: bool = False,
70
+ conditions: WhereClause | None = None,
51
71
  ):
52
72
  self.started.add(
53
73
  self._concepts_to_lookup(
54
74
  search,
55
- accept_partial,
75
+ accept_partial=accept_partial,
76
+ conditions=conditions,
56
77
  )
57
78
  )
58
79
 
@@ -60,11 +81,13 @@ class History(BaseModel):
60
81
  self,
61
82
  search: list[Concept],
62
83
  accept_partial: bool = False,
84
+ conditions: WhereClause | None = None,
63
85
  ):
64
86
  return (
65
87
  self._concepts_to_lookup(
66
88
  search,
67
89
  accept_partial,
90
+ conditions=conditions,
68
91
  )
69
92
  in self.started
70
93
  )
@@ -76,6 +99,7 @@ class History(BaseModel):
76
99
  accept_partial: bool,
77
100
  fail_if_not_found: bool,
78
101
  accept_partial_optional: bool,
102
+ conditions: WhereClause | None = None,
79
103
  ) -> str:
80
104
  return (
81
105
  str(main.address)
@@ -84,6 +108,7 @@ class History(BaseModel):
84
108
  + str(accept_partial)
85
109
  + str(fail_if_not_found)
86
110
  + str(accept_partial_optional)
111
+ + str(conditions)
87
112
  )
88
113
 
89
114
  def gen_select_node(
@@ -97,6 +122,7 @@ class History(BaseModel):
97
122
  fail_if_not_found: bool = False,
98
123
  accept_partial: bool = False,
99
124
  accept_partial_optional: bool = False,
125
+ conditions: WhereClause | None = None,
100
126
  ) -> StrategyNode | None:
101
127
  from trilogy.core.processing.node_generators.select_node import gen_select_node
102
128
 
@@ -105,7 +131,8 @@ class History(BaseModel):
105
131
  local_optional,
106
132
  accept_partial,
107
133
  fail_if_not_found,
108
- accept_partial_optional,
134
+ accept_partial_optional=accept_partial_optional,
135
+ conditions=conditions,
109
136
  )
110
137
  if fingerprint in self.select_history:
111
138
  return self.select_history[fingerprint]
@@ -119,6 +146,7 @@ class History(BaseModel):
119
146
  accept_partial=accept_partial,
120
147
  accept_partial_optional=accept_partial_optional,
121
148
  source_concepts=source_concepts,
149
+ conditions=conditions,
122
150
  )
123
151
  self.select_history[fingerprint] = gen
124
152
  return gen
@@ -17,6 +17,7 @@ from trilogy.core.models import (
17
17
  from trilogy.core.enums import Purpose, JoinType, PurposeLineage, Granularity
18
18
  from trilogy.utility import unique
19
19
  from dataclasses import dataclass
20
+ from trilogy.core.enums import BooleanOperator
20
21
 
21
22
 
22
23
  def concept_list_to_grain(
@@ -71,7 +72,6 @@ def resolve_concept_map(
71
72
  concept_map[concept.address].add(input)
72
73
  elif concept.address not in concept_map:
73
74
  concept_map[concept.address].add(input)
74
-
75
75
  # second loop, include partials
76
76
  for input in inputs:
77
77
  for concept in input.output_concepts:
@@ -92,14 +92,31 @@ def resolve_concept_map(
92
92
  return concept_map
93
93
 
94
94
 
95
- def get_all_parent_partial(all_concepts: List[Concept], parents: List["StrategyNode"]):
96
- return [
97
- c
98
- for c in all_concepts
99
- if len([c.address in [x.address for x in p.partial_concepts] for p in parents])
100
- >= 1
101
- and all([c.address in [x.address for x in p.partial_concepts] for p in parents])
102
- ]
95
+ def get_all_parent_partial(
96
+ all_concepts: List[Concept], parents: List["StrategyNode"]
97
+ ) -> List[Concept]:
98
+ return unique(
99
+ [
100
+ c
101
+ for c in all_concepts
102
+ if len(
103
+ [
104
+ p
105
+ for p in parents
106
+ if c.address in [x.address for x in p.partial_concepts]
107
+ ]
108
+ )
109
+ >= 1
110
+ and all(
111
+ [
112
+ c.address in p.partial_lcl
113
+ for p in parents
114
+ if c.address in p.output_lcl
115
+ ]
116
+ )
117
+ ],
118
+ "address",
119
+ )
103
120
 
104
121
 
105
122
  class StrategyNode:
@@ -137,7 +154,7 @@ class StrategyNode:
137
154
  self.partial_concepts = partial_concepts or get_all_parent_partial(
138
155
  self.output_concepts, self.parents
139
156
  )
140
- self.partial_lcl = LooseConceptList(concepts=self.partial_concepts)
157
+
141
158
  self.depth = depth
142
159
  self.conditions = conditions
143
160
  self.grain = grain
@@ -146,13 +163,50 @@ class StrategyNode:
146
163
  self.hidden_concepts = hidden_concepts or []
147
164
  self.existence_concepts = existence_concepts or []
148
165
  self.virtual_output_concepts = virtual_output_concepts or []
166
+ self.validate_parents()
167
+
168
+ def add_parents(self, parents: list["StrategyNode"]):
169
+ self.parents += parents
170
+ self.validate_parents()
171
+
172
+ def add_condition(self, condition: Conditional | Comparison | Parenthetical):
173
+ if self.conditions:
174
+ self.conditions = Conditional(
175
+ left=self.conditions, right=condition, operator=BooleanOperator.AND
176
+ )
177
+ else:
178
+ self.conditions = condition
179
+
180
+ def validate_parents(self):
181
+ # validate parents exist
182
+ # assign partial values where needed
149
183
  for parent in self.parents:
150
184
  if not parent:
151
185
  raise SyntaxError("Unresolvable parent")
152
186
 
187
+ # TODO: make this accurate
188
+ if self.parents:
189
+ self.partial_concepts = get_all_parent_partial(
190
+ self.output_concepts, self.parents
191
+ )
192
+
193
+ self.partial_lcl = LooseConceptList(concepts=self.partial_concepts)
194
+
153
195
  def add_output_concepts(self, concepts: List[Concept]):
154
196
  for concept in concepts:
155
- self.output_concepts.append(concept)
197
+ if concept.address not in self.output_lcl.addresses:
198
+ self.output_concepts.append(concept)
199
+ self.output_lcl = LooseConceptList(concepts=self.output_concepts)
200
+ self.rebuild_cache()
201
+
202
+ def add_existence_concepts(self, concepts: List[Concept]):
203
+ for concept in concepts:
204
+ if concept.address not in [x.address for x in self.output_concepts]:
205
+ self.existence_concepts.append(concept)
206
+ self.rebuild_cache()
207
+
208
+ def set_output_concepts(self, concepts: List[Concept]):
209
+ self.output_concepts = concepts
156
210
  self.output_lcl = LooseConceptList(concepts=self.output_concepts)
157
211
  self.rebuild_cache()
158
212
 
@@ -195,8 +249,8 @@ class StrategyNode:
195
249
  grain = self.grain if self.grain else Grain(components=self.output_concepts)
196
250
  source_map = resolve_concept_map(
197
251
  parent_sources,
198
- self.output_concepts,
199
- self.input_concepts + self.existence_concepts,
252
+ targets=self.output_concepts,
253
+ inherited_inputs=self.input_concepts + self.existence_concepts,
200
254
  )
201
255
  return QueryDatasource(
202
256
  input_concepts=self.input_concepts,
@@ -36,6 +36,7 @@ class FilterNode(StrategyNode):
36
36
  partial_concepts: List[Concept] | None = None,
37
37
  force_group: bool | None = False,
38
38
  grain: Grain | None = None,
39
+ existence_concepts: List[Concept] | None = None,
39
40
  ):
40
41
  super().__init__(
41
42
  output_concepts=output_concepts,
@@ -49,6 +50,7 @@ class FilterNode(StrategyNode):
49
50
  partial_concepts=partial_concepts,
50
51
  force_group=force_group,
51
52
  grain=grain,
53
+ existence_concepts=existence_concepts,
52
54
  )
53
55
 
54
56
  def copy(self) -> "FilterNode":
@@ -64,4 +66,5 @@ class FilterNode(StrategyNode):
64
66
  partial_concepts=list(self.partial_concepts),
65
67
  force_group=self.force_group,
66
68
  grain=self.grain,
69
+ existence_concepts=list(self.existence_concepts),
67
70
  )
@@ -39,6 +39,7 @@ class GroupNode(StrategyNode):
39
39
  partial_concepts: Optional[List[Concept]] = None,
40
40
  force_group: bool | None = None,
41
41
  conditions: Conditional | Comparison | Parenthetical | None = None,
42
+ existence_concepts: List[Concept] | None = None,
42
43
  ):
43
44
  super().__init__(
44
45
  input_concepts=input_concepts,
@@ -51,6 +52,7 @@ class GroupNode(StrategyNode):
51
52
  partial_concepts=partial_concepts,
52
53
  force_group=force_group,
53
54
  conditions=conditions,
55
+ existence_concepts=existence_concepts,
54
56
  )
55
57
 
56
58
  def _resolve(self) -> QueryDatasource:
@@ -128,7 +130,7 @@ class GroupNode(StrategyNode):
128
130
  if self.conditions
129
131
  else self.output_concepts
130
132
  ),
131
- inherited_inputs=self.input_concepts,
133
+ inherited_inputs=self.input_concepts + self.existence_concepts,
132
134
  ),
133
135
  joins=[],
134
136
  grain=grain,
@@ -137,10 +139,6 @@ class GroupNode(StrategyNode):
137
139
  )
138
140
  # if there is a condition on a group node and it's not scalar
139
141
  # inject an additional CTE
140
- if self.conditions:
141
- logger.info("CONDITIONS")
142
- logger.info(str(self.conditions))
143
- logger.info(is_scalar_condition(self.conditions))
144
142
  if self.conditions and not is_scalar_condition(self.conditions):
145
143
  base.condition = None
146
144
  base.output_concepts = self.output_concepts + self.conditions.row_arguments
@@ -173,4 +171,5 @@ class GroupNode(StrategyNode):
173
171
  partial_concepts=list(self.partial_concepts),
174
172
  force_group=self.force_group,
175
173
  conditions=self.conditions,
174
+ existence_concepts=list(self.existence_concepts),
176
175
  )
@@ -138,16 +138,6 @@ class MergeNode(StrategyNode):
138
138
  continue
139
139
  final_joins.append(join)
140
140
  self.node_joins = final_joins
141
- partial_lookup: list[Concept] = []
142
- non_partial: List[Concept] = []
143
- for node in parents or []:
144
- partial_lookup += node.partial_concepts
145
- non_partial += [
146
- x for x in node.output_concepts if x not in node.partial_concepts
147
- ]
148
-
149
- final_partial = [x for x in partial_lookup if x not in non_partial]
150
- self.partial_concepts = final_partial
151
141
 
152
142
  def translate_node_joins(self, node_joins: List[NodeJoin]) -> List[BaseJoin]:
153
143
  joins = []
@@ -265,7 +255,6 @@ class MergeNode(StrategyNode):
265
255
  for x in final_datasets
266
256
  if all([y in self.existence_concepts for y in x.output_concepts])
267
257
  ]
268
-
269
258
  if len(merged.keys()) == 1:
270
259
  final: QueryDatasource | Datasource = list(merged.values())[0]
271
260
  if (
@@ -378,4 +367,5 @@ class MergeNode(StrategyNode):
378
367
  node_joins=self.node_joins,
379
368
  join_concepts=list(self.join_concepts) if self.join_concepts else None,
380
369
  force_join_type=self.force_join_type,
370
+ existence_concepts=list(self.existence_concepts),
381
371
  )
@@ -166,6 +166,7 @@ class SelectNode(StrategyNode):
166
166
  resolution = self.resolve_from_provided_datasource()
167
167
  if resolution:
168
168
  return resolution
169
+
169
170
  required = [c.address for c in self.all_concepts]
170
171
  raise NoDatasourceException(
171
172
  f"Could not find any way to resolve datasources for required concepts {required} with derivation {[x.derivation for x in self.all_concepts]}"
@@ -20,7 +20,7 @@ from trilogy.core.models import (
20
20
  DataType,
21
21
  )
22
22
 
23
- from trilogy.core.enums import Purpose, Granularity
23
+ from trilogy.core.enums import Purpose, Granularity, BooleanOperator
24
24
  from trilogy.core.constants import CONSTANT_DATASET
25
25
  from enum import Enum
26
26
  from trilogy.utility import unique
@@ -62,7 +62,7 @@ def create_log_lambda(prefix: str, depth: int, logger: Logger):
62
62
  pad = padding(depth)
63
63
 
64
64
  def log_lambda(msg: str):
65
- logger.info(f"{pad} {prefix} {msg}")
65
+ logger.info(f"{pad}{prefix} {msg}")
66
66
 
67
67
  return log_lambda
68
68
 
@@ -328,14 +328,6 @@ def get_disconnected_components(
328
328
  for datasource, concepts in concept_map.items():
329
329
  graph.add_node(datasource, type=NodeType.NODE)
330
330
  for concept in concepts:
331
- # TODO: determine if this is the right way to handle things
332
- # if concept.derivation in (PurposeLineage.FILTER, PurposeLineage.WINDOW):
333
- # if isinstance(concept.lineage, FilterItem):
334
- # graph.add_node(concept.lineage.content.address, type=NodeType.CONCEPT)
335
- # graph.add_edge(datasource, concept.lineage.content.address)
336
- # if isinstance(concept.lineage, WindowItem):
337
- # graph.add_node(concept.lineage.content.address, type=NodeType.CONCEPT)
338
- # graph.add_edge(datasource, concept.lineage.content.address)
339
331
  graph.add_node(concept.address, type=NodeType.CONCEPT)
340
332
  graph.add_edge(datasource, concept.address)
341
333
  all_concepts.add(concept)
@@ -363,18 +355,58 @@ def is_scalar_condition(
363
355
  | MagicConstants
364
356
  | DataType
365
357
  ),
358
+ materialized: set[str] | None = None,
366
359
  ) -> bool:
367
360
  if isinstance(element, Parenthetical):
368
- return is_scalar_condition(element.content)
361
+ return is_scalar_condition(element.content, materialized)
369
362
  elif isinstance(element, SubselectComparison):
370
363
  return True
371
364
  elif isinstance(element, Comparison):
372
- return is_scalar_condition(element.left) and is_scalar_condition(element.right)
365
+ return is_scalar_condition(element.left, materialized) and is_scalar_condition(
366
+ element.right, materialized
367
+ )
373
368
  elif isinstance(element, Function):
374
369
  if element.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
375
370
  return False
371
+ elif isinstance(element, Concept):
372
+ if materialized and element.address in materialized:
373
+ return True
374
+ if element.lineage and isinstance(element.lineage, AggregateWrapper):
375
+ return is_scalar_condition(element.lineage, materialized)
376
+ return True
376
377
  elif isinstance(element, AggregateWrapper):
377
- return is_scalar_condition(element.function)
378
+ return is_scalar_condition(element.function, materialized)
378
379
  elif isinstance(element, Conditional):
379
- return is_scalar_condition(element.left) and is_scalar_condition(element.right)
380
+ return is_scalar_condition(element.left, materialized) and is_scalar_condition(
381
+ element.right, materialized
382
+ )
380
383
  return True
384
+
385
+
386
+ def decompose_condition(
387
+ conditional: Conditional | Comparison | Parenthetical,
388
+ ) -> list[SubselectComparison | Comparison | Conditional | Parenthetical]:
389
+ chunks: list[SubselectComparison | Comparison | Conditional | Parenthetical] = []
390
+ if not isinstance(conditional, Conditional):
391
+ return [conditional]
392
+ if conditional.operator == BooleanOperator.AND:
393
+ if not (
394
+ isinstance(
395
+ conditional.left,
396
+ (SubselectComparison, Comparison, Conditional, Parenthetical),
397
+ )
398
+ and isinstance(
399
+ conditional.right,
400
+ (SubselectComparison, Comparison, Conditional, Parenthetical),
401
+ )
402
+ ):
403
+ chunks.append(conditional)
404
+ else:
405
+ for val in [conditional.left, conditional.right]:
406
+ if isinstance(val, Conditional):
407
+ chunks.extend(decompose_condition(val))
408
+ else:
409
+ chunks.append(val)
410
+ else:
411
+ chunks.append(conditional)
412
+ return chunks
@@ -4,9 +4,9 @@ from trilogy.core.env_processor import generate_graph
4
4
  from trilogy.core.graph_models import ReferenceGraph
5
5
  from trilogy.core.constants import CONSTANT_DATASET
6
6
  from trilogy.core.processing.concept_strategies_v3 import source_query_concepts
7
- from trilogy.core.enums import SelectFiltering
7
+ from trilogy.core.enums import SelectFiltering, BooleanOperator
8
8
  from trilogy.constants import CONFIG, DEFAULT_NAMESPACE
9
- from trilogy.core.processing.nodes import GroupNode, SelectNode, StrategyNode
9
+ from trilogy.core.processing.nodes import GroupNode, SelectNode, StrategyNode, History
10
10
  from trilogy.core.models import (
11
11
  Concept,
12
12
  Environment,
@@ -24,6 +24,7 @@ from trilogy.core.models import (
24
24
  Datasource,
25
25
  BaseJoin,
26
26
  InstantiatedUnnestJoin,
27
+ Conditional,
27
28
  )
28
29
 
29
30
  from trilogy.utility import unique
@@ -260,7 +261,7 @@ def datasource_to_ctes(
260
261
 
261
262
  human_id = generate_cte_name(query_datasource.full_name, name_map)
262
263
  logger.info(
263
- f"Finished building source map for {human_id} with {len(parents)} parents, have {source_map}, query_datasource had non-empty keys {[k for k, v in query_datasource.source_map.items() if v]} "
264
+ f"Finished building source map for {human_id} with {len(parents)} parents, have {source_map}, query_datasource had non-empty keys {[k for k, v in query_datasource.source_map.items() if v]} and existence had non-empty keys {[k for k, v in query_datasource.existence_source_map.items() if v]} "
264
265
  )
265
266
  final_joins = [
266
267
  x
@@ -307,7 +308,10 @@ def datasource_to_ctes(
307
308
 
308
309
 
309
310
  def append_existence_check(
310
- node: StrategyNode, environment: Environment, graph: ReferenceGraph
311
+ node: StrategyNode,
312
+ environment: Environment,
313
+ graph: ReferenceGraph,
314
+ history: History | None = None,
311
315
  ):
312
316
  # we if we have a where clause doing an existence check
313
317
  # treat that as separate subquery
@@ -318,25 +322,22 @@ def append_existence_check(
318
322
  logger.info(
319
323
  f"{LOGGER_PREFIX} fetching existance clause inputs {[str(c) for c in subselect]}"
320
324
  )
321
- eds = source_query_concepts([*subselect], environment=environment, g=graph)
322
-
323
- final_eds = eds.resolve()
324
- first_parent = node.resolve()
325
- first_parent.datasources.append(final_eds)
326
- for x in final_eds.output_concepts:
327
- if x.address not in first_parent.existence_source_map:
328
- first_parent.existence_source_map[x.address] = {final_eds}
325
+ eds = source_query_concepts(
326
+ [*subselect], environment=environment, g=graph, history=history
327
+ )
328
+ node.add_parents([eds])
329
+ node.add_existence_concepts([*subselect])
329
330
 
330
331
 
331
- def get_query_datasources(
332
+ def get_query_node(
332
333
  environment: Environment,
333
334
  statement: SelectStatement | MultiSelectStatement,
334
335
  graph: Optional[ReferenceGraph] = None,
335
- hooks: Optional[List[BaseHook]] = None,
336
- ) -> QueryDatasource:
336
+ history: History | None = None,
337
+ ) -> StrategyNode:
337
338
  graph = graph or generate_graph(environment)
338
339
  logger.info(
339
- f"{LOGGER_PREFIX} getting source datasource for query with output {[str(c) for c in statement.output_components]}"
340
+ f"{LOGGER_PREFIX} getting source datasource for query with filtering {statement.where_clause_category} and output {[str(c) for c in statement.output_components]}"
340
341
  )
341
342
  if not statement.output_components:
342
343
  raise ValueError(f"Statement has no output components {statement}")
@@ -353,22 +354,28 @@ def get_query_datasources(
353
354
  )
354
355
  nest_where = True
355
356
 
356
- ods = source_query_concepts(
357
+ ods: StrategyNode = source_query_concepts(
357
358
  search_concepts,
358
359
  environment=environment,
359
360
  g=graph,
361
+ conditions=(statement.where_clause if statement.where_clause else None),
362
+ history=history,
360
363
  )
361
- ds: GroupNode | SelectNode
364
+ if not ods:
365
+ raise ValueError(
366
+ f"Could not find source query concepts for {[x.address for x in search_concepts]}"
367
+ )
368
+ ds: StrategyNode
362
369
  if nest_where and statement.where_clause:
363
370
  if not all_aggregate:
364
371
  ods.conditions = statement.where_clause.conditional
365
- ods.output_concepts = search_concepts
372
+ ods.output_concepts = statement.output_components
366
373
  # ods.hidden_concepts = where_delta
367
374
  ods.rebuild_cache()
368
- append_existence_check(ods, environment, graph)
375
+ append_existence_check(ods, environment, graph, history)
369
376
  ds = GroupNode(
370
377
  output_concepts=statement.output_components,
371
- input_concepts=search_concepts,
378
+ input_concepts=statement.output_components,
372
379
  parents=[ods],
373
380
  environment=ods.environment,
374
381
  g=ods.g,
@@ -390,7 +397,26 @@ def get_query_datasources(
390
397
 
391
398
  else:
392
399
  ds = ods
400
+ if statement.having_clause:
401
+ if ds.conditions:
402
+ ds.conditions = Conditional(
403
+ left=ds.conditions,
404
+ right=statement.having_clause.conditional,
405
+ operator=BooleanOperator.AND,
406
+ )
407
+ else:
408
+ ds.conditions = statement.having_clause.conditional
409
+ return ds
410
+
411
+
412
+ def get_query_datasources(
413
+ environment: Environment,
414
+ statement: SelectStatement | MultiSelectStatement,
415
+ graph: Optional[ReferenceGraph] = None,
416
+ hooks: Optional[List[BaseHook]] = None,
417
+ ) -> QueryDatasource:
393
418
 
419
+ ds = get_query_node(environment, statement, graph)
394
420
  final_qds = ds.resolve()
395
421
  if hooks:
396
422
  for hook in hooks:
@@ -475,6 +501,7 @@ def process_query(
475
501
  grain=statement.grain,
476
502
  limit=statement.limit,
477
503
  where_clause=statement.where_clause,
504
+ having_clause=statement.having_clause,
478
505
  output_columns=statement.output_components,
479
506
  ctes=final_ctes,
480
507
  base=root_cte,
trilogy/dialect/base.py CHANGED
@@ -2,7 +2,7 @@ from typing import List, Union, Optional, Dict, Any, Sequence, Callable
2
2
 
3
3
  from jinja2 import Template
4
4
 
5
- from trilogy.core.processing.utility import is_scalar_condition
5
+ from trilogy.core.processing.utility import is_scalar_condition, decompose_condition
6
6
  from trilogy.constants import CONFIG, logger, MagicConstants
7
7
  from trilogy.core.internal import DEFAULT_CONCEPTS
8
8
  from trilogy.core.enums import (
@@ -386,14 +386,19 @@ class BaseDialect:
386
386
  e.right.address,
387
387
  [
388
388
  INVALID_REFERENCE_STRING(
389
- f"Missing source reference to {e.right.name}"
389
+ f"Missing source reference to {e.right.address}"
390
390
  )
391
391
  ],
392
392
  )
393
393
  else:
394
394
  lookup = lookup_cte.existence_source_map[e.right.address]
395
-
396
- return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {lookup[0]}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {lookup[0]} where {lookup[0]}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
395
+ if len(lookup) > 0:
396
+ target = lookup[0]
397
+ else:
398
+ target = INVALID_REFERENCE_STRING(
399
+ f"Missing source CTE for {e.right.address}"
400
+ )
401
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
397
402
  elif isinstance(e.right, (ListWrapper, Parenthetical, list)):
398
403
  return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
399
404
 
@@ -514,7 +519,9 @@ class BaseDialect:
514
519
  ):
515
520
  source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_concept_sql, cte)}"
516
521
  # direct - eg DUCK DB - can be directly selected inline
517
- elif cte.join_derived_concepts and self.UNNEST_MODE == UnnestMode.DIRECT:
522
+ elif (
523
+ cte.join_derived_concepts and self.UNNEST_MODE == UnnestMode.DIRECT
524
+ ):
518
525
  source = None
519
526
  else:
520
527
  raise SyntaxError("CTE has joins but no from clause")
@@ -531,6 +538,20 @@ class BaseDialect:
531
538
  final_joins = []
532
539
  else:
533
540
  final_joins = cte.joins or []
541
+ where: Conditional | Parenthetical | Comparison | None = None
542
+ having: Conditional | Parenthetical | Comparison | None = None
543
+ materialized = {x for x, v in cte.source_map.items() if v}
544
+ if cte.condition:
545
+ if is_scalar_condition(cte.condition, materialized=materialized):
546
+ where = cte.condition
547
+ else:
548
+ components = decompose_condition(cte.condition)
549
+ for x in components:
550
+ if is_scalar_condition(x, materialized=materialized):
551
+ where = where + x if where else x
552
+ else:
553
+ having = having + x if having else x
554
+
534
555
  return CompiledCTE(
535
556
  name=cte.name,
536
557
  statement=self.SQL_TEMPLATE.render(
@@ -554,16 +575,8 @@ class BaseDialect:
554
575
  ]
555
576
  if j
556
577
  ],
557
- where=(
558
- self.render_expr(cte.condition, cte)
559
- if cte.condition and is_scalar_condition(cte.condition)
560
- else None
561
- ),
562
- having=(
563
- self.render_expr(cte.condition, cte)
564
- if cte.condition and not is_scalar_condition(cte.condition)
565
- else None
566
- ),
578
+ where=(self.render_expr(where, cte) if where else None),
579
+ having=(self.render_expr(having, cte) if having else None),
567
580
  order_by=(
568
581
  [self.render_order_item(i, cte) for i in cte.order_by.items]
569
582
  if cte.order_by
trilogy/dialect/duckdb.py CHANGED
@@ -36,7 +36,7 @@ FUNCTION_MAP = {
36
36
  # we may return a static value
37
37
  FUNCTION_GRAIN_MATCH_MAP = {
38
38
  **FUNCTION_MAP,
39
- FunctionType.COUNT: lambda args: "1",
39
+ FunctionType.COUNT: lambda args: f"{args[0]}",
40
40
  FunctionType.SUM: lambda args: f"{args[0]}",
41
41
  FunctionType.AVG: lambda args: f"{args[0]}",
42
42
  }