pytrilogy 0.0.1.109__py3-none-any.whl → 0.0.1.111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (34) hide show
  1. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/METADATA +1 -1
  2. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/RECORD +34 -34
  3. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +11 -3
  6. trilogy/core/enums.py +1 -0
  7. trilogy/core/models.py +94 -67
  8. trilogy/core/optimization.py +134 -12
  9. trilogy/core/processing/concept_strategies_v3.py +44 -19
  10. trilogy/core/processing/node_generators/basic_node.py +2 -0
  11. trilogy/core/processing/node_generators/common.py +3 -1
  12. trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
  13. trilogy/core/processing/node_generators/filter_node.py +36 -6
  14. trilogy/core/processing/node_generators/node_merge_node.py +34 -23
  15. trilogy/core/processing/node_generators/rowset_node.py +37 -8
  16. trilogy/core/processing/node_generators/select_node.py +23 -9
  17. trilogy/core/processing/node_generators/unnest_node.py +24 -3
  18. trilogy/core/processing/node_generators/window_node.py +4 -2
  19. trilogy/core/processing/nodes/__init__.py +7 -6
  20. trilogy/core/processing/nodes/base_node.py +40 -6
  21. trilogy/core/processing/nodes/filter_node.py +15 -1
  22. trilogy/core/processing/nodes/group_node.py +20 -1
  23. trilogy/core/processing/nodes/merge_node.py +37 -10
  24. trilogy/core/processing/nodes/select_node_v2.py +34 -39
  25. trilogy/core/processing/nodes/unnest_node.py +12 -0
  26. trilogy/core/processing/nodes/window_node.py +11 -0
  27. trilogy/core/processing/utility.py +0 -14
  28. trilogy/core/query_processor.py +125 -29
  29. trilogy/dialect/base.py +45 -40
  30. trilogy/executor.py +31 -3
  31. trilogy/parsing/parse_engine.py +49 -17
  32. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/LICENSE.md +0 -0
  33. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/entry_points.txt +0 -0
  34. {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,8 @@ from trilogy.core.models import (
12
12
  Concept,
13
13
  UnnestJoin,
14
14
  Conditional,
15
+ Comparison,
16
+ Parenthetical,
15
17
  )
16
18
  from trilogy.utility import unique
17
19
  from trilogy.core.processing.nodes.base_node import (
@@ -103,8 +105,9 @@ class MergeNode(StrategyNode):
103
105
  force_group: bool | None = None,
104
106
  depth: int = 0,
105
107
  grain: Grain | None = None,
106
- conditions: Conditional | None = None,
108
+ conditions: Conditional | Comparison | Parenthetical | None = None,
107
109
  hidden_concepts: List[Concept] | None = None,
110
+ virtual_output_concepts: List[Concept] | None = None,
108
111
  ):
109
112
  super().__init__(
110
113
  input_concepts=input_concepts,
@@ -119,10 +122,12 @@ class MergeNode(StrategyNode):
119
122
  grain=grain,
120
123
  conditions=conditions,
121
124
  hidden_concepts=hidden_concepts,
125
+ virtual_output_concepts=virtual_output_concepts,
122
126
  )
123
127
  self.join_concepts = join_concepts
124
128
  self.force_join_type = force_join_type
125
129
  self.node_joins = node_joins
130
+
126
131
  final_joins = []
127
132
  if self.node_joins:
128
133
  for join in self.node_joins:
@@ -282,6 +287,7 @@ class MergeNode(StrategyNode):
282
287
  if c.address in [x.address for x in self.output_concepts]
283
288
  ]
284
289
  )
290
+
285
291
  logger.info(
286
292
  f"{self.logging_prefix}{LOGGER_PREFIX} has pre grain {pregrain} and final merge node grain {grain}"
287
293
  )
@@ -307,24 +313,25 @@ class MergeNode(StrategyNode):
307
313
  f"{self.logging_prefix}{LOGGER_PREFIX} no parents include full grain {grain} and pregrain {pregrain} does not match, assume must group to grain. Have {[str(d.grain) for d in final_datasets]}"
308
314
  )
309
315
  force_group = True
310
- # Grain<returns.customer.id,returns.store.id,returns.item.id,returns.store_sales.ticket_number>
311
- # Grain<returns.customer.id,returns.store.id,returns.return_date.id,returns.item.id,returns.store_sales.ticket_number>
312
- # Grain<returns.customer.id,returns.store.id,returns.item.id,returns.store_sales.ticket_number>
313
316
  else:
314
317
  force_group = None
315
318
 
316
319
  qd_joins: List[BaseJoin | UnnestJoin] = [*joins]
320
+ source_map = resolve_concept_map(
321
+ parent_sources,
322
+ targets=self.output_concepts,
323
+ inherited_inputs=self.input_concepts + self.existence_concepts,
324
+ full_joins=full_join_concepts,
325
+ )
326
+ logger.info(
327
+ f"{self.logging_prefix}{LOGGER_PREFIX} source_map {str(source_map)}"
328
+ )
317
329
  qds = QueryDatasource(
318
330
  input_concepts=unique(self.input_concepts, "address"),
319
331
  output_concepts=unique(self.output_concepts, "address"),
320
332
  datasources=final_datasets,
321
333
  source_type=self.source_type,
322
- source_map=resolve_concept_map(
323
- parent_sources,
324
- self.output_concepts,
325
- self.input_concepts,
326
- full_joins=full_join_concepts,
327
- ),
334
+ source_map=source_map,
328
335
  joins=qd_joins,
329
336
  grain=grain,
330
337
  partial_concepts=self.partial_concepts,
@@ -333,3 +340,23 @@ class MergeNode(StrategyNode):
333
340
  hidden_concepts=self.hidden_concepts,
334
341
  )
335
342
  return qds
343
+
344
+ def copy(self) -> "MergeNode":
345
+ return MergeNode(
346
+ input_concepts=list(self.input_concepts),
347
+ output_concepts=list(self.output_concepts),
348
+ environment=self.environment,
349
+ g=self.g,
350
+ whole_grain=self.whole_grain,
351
+ parents=self.parents,
352
+ depth=self.depth,
353
+ partial_concepts=list(self.partial_concepts),
354
+ force_group=self.force_group,
355
+ grain=self.grain,
356
+ conditions=self.conditions,
357
+ hidden_concepts=list(self.hidden_concepts),
358
+ virtual_output_concepts=list(self.virtual_output_concepts),
359
+ node_joins=self.node_joins,
360
+ join_concepts=list(self.join_concepts) if self.join_concepts else None,
361
+ force_join_type=self.force_join_type,
362
+ )
@@ -5,14 +5,14 @@ from trilogy.constants import logger
5
5
  from trilogy.core.constants import CONSTANT_DATASET
6
6
  from trilogy.core.enums import Purpose, PurposeLineage
7
7
  from trilogy.core.models import (
8
- Datasource,
8
+ Function,
9
+ Grain,
9
10
  QueryDatasource,
10
11
  SourceType,
11
- Environment,
12
12
  Concept,
13
- Grain,
14
- Function,
13
+ Environment,
15
14
  UnnestJoin,
15
+ Datasource,
16
16
  )
17
17
  from trilogy.utility import unique
18
18
  from trilogy.core.processing.nodes.base_node import StrategyNode
@@ -22,39 +22,6 @@ from trilogy.core.exceptions import NoDatasourceException
22
22
  LOGGER_PREFIX = "[CONCEPT DETAIL - SELECT NODE]"
23
23
 
24
24
 
25
- class StaticSelectNode(StrategyNode):
26
- """Static select nodes."""
27
-
28
- source_type = SourceType.SELECT
29
-
30
- def __init__(
31
- self,
32
- input_concepts: List[Concept],
33
- output_concepts: List[Concept],
34
- environment: Environment,
35
- g,
36
- datasource: QueryDatasource,
37
- depth: int = 0,
38
- partial_concepts: List[Concept] | None = None,
39
- ):
40
- super().__init__(
41
- input_concepts=input_concepts,
42
- output_concepts=output_concepts,
43
- environment=environment,
44
- g=g,
45
- whole_grain=True,
46
- parents=[],
47
- depth=depth,
48
- partial_concepts=partial_concepts,
49
- )
50
- self.datasource = datasource
51
-
52
- def _resolve(self):
53
- if self.datasource.grain == Grain():
54
- raise NotImplementedError
55
- return self.datasource
56
-
57
-
58
25
  class SelectNode(StrategyNode):
59
26
  """Select nodes actually fetch raw data from a table
60
27
  Responsible for selecting the cheapest option from which to select.
@@ -75,7 +42,7 @@ class SelectNode(StrategyNode):
75
42
  partial_concepts: List[Concept] | None = None,
76
43
  accept_partial: bool = False,
77
44
  grain: Optional[Grain] = None,
78
- force_group: bool = False,
45
+ force_group: bool | None = False,
79
46
  ):
80
47
  super().__init__(
81
48
  input_concepts=input_concepts,
@@ -119,6 +86,7 @@ class SelectNode(StrategyNode):
119
86
  if x.address not in source_map and x.derivation in (
120
87
  PurposeLineage.MULTISELECT,
121
88
  PurposeLineage.MERGE,
89
+ PurposeLineage.FILTER,
122
90
  ):
123
91
  source_map[x.address] = set()
124
92
 
@@ -140,6 +108,7 @@ class SelectNode(StrategyNode):
140
108
  c.concept for c in datasource.columns if not c.is_complete
141
109
  ],
142
110
  source_type=SourceType.DIRECT_SELECT,
111
+ condition=self.conditions,
143
112
  # select nodes should never group
144
113
  force_group=self.force_group,
145
114
  )
@@ -154,6 +123,7 @@ class SelectNode(StrategyNode):
154
123
  source_map={concept.address: set() for concept in self.all_concepts},
155
124
  datasources=[datasource],
156
125
  grain=datasource.grain,
126
+ condition=self.conditions,
157
127
  joins=[],
158
128
  partial_concepts=[],
159
129
  source_type=SourceType.CONSTANT,
@@ -191,8 +161,33 @@ class SelectNode(StrategyNode):
191
161
  f"Could not find any way to associate required concepts {required}"
192
162
  )
193
163
 
164
+ def copy(self) -> "SelectNode":
165
+ return SelectNode(
166
+ input_concepts=list(self.input_concepts),
167
+ output_concepts=list(self.output_concepts),
168
+ environment=self.environment,
169
+ g=self.g,
170
+ datasource=self.datasource,
171
+ depth=self.depth,
172
+ parents=self.parents,
173
+ whole_grain=self.whole_grain,
174
+ partial_concepts=list(self.partial_concepts),
175
+ accept_partial=self.accept_partial,
176
+ grain=self.grain,
177
+ force_group=self.force_group,
178
+ )
179
+
194
180
 
195
181
  class ConstantNode(SelectNode):
196
182
  """Represents a constant value."""
197
183
 
198
- pass
184
+ def copy(self) -> "ConstantNode":
185
+ return ConstantNode(
186
+ input_concepts=list(self.input_concepts),
187
+ output_concepts=list(self.output_concepts),
188
+ environment=self.environment,
189
+ g=self.g,
190
+ datasource=self.datasource,
191
+ depth=self.depth,
192
+ partial_concepts=list(self.partial_concepts),
193
+ )
@@ -52,3 +52,15 @@ class UnnestNode(StrategyNode):
52
52
  base.source_map[self.unnest_concept.address] = {unnest}
53
53
  base.join_derived_concepts = [self.unnest_concept]
54
54
  return base
55
+
56
+ def copy(self) -> "UnnestNode":
57
+ return UnnestNode(
58
+ unnest_concept=self.unnest_concept,
59
+ input_concepts=list(self.input_concepts),
60
+ output_concepts=list(self.output_concepts),
61
+ environment=self.environment,
62
+ g=self.g,
63
+ whole_grain=self.whole_grain,
64
+ parents=self.parents,
65
+ depth=self.depth,
66
+ )
@@ -32,3 +32,14 @@ class WindowNode(StrategyNode):
32
32
  base = super()._resolve()
33
33
  base.grain = Grain(components=self.input_concepts)
34
34
  return base
35
+
36
+ def copy(self) -> "WindowNode":
37
+ return WindowNode(
38
+ input_concepts=list(self.input_concepts),
39
+ output_concepts=list(self.output_concepts),
40
+ environment=self.environment,
41
+ g=self.g,
42
+ whole_grain=self.whole_grain,
43
+ parents=self.parents,
44
+ depth=self.depth,
45
+ )
@@ -232,20 +232,6 @@ def get_node_joins(
232
232
  raise SyntaxError(
233
233
  f"Could not find join for {x.identifier} with output {[c.address for c in x.output_concepts]}, all {[z.identifier for z in datasources]}"
234
234
  )
235
- single_row = [x for x in datasources if x.grain.abstract]
236
- for x in single_row:
237
- for join in final_joins:
238
- found = False
239
- for join in final_joins:
240
- if (
241
- join.left_datasource.identifier == x.identifier
242
- or join.right_datasource.identifier == x.identifier
243
- ):
244
- found = True
245
- if not found:
246
- raise SyntaxError(
247
- f"Could not find join for {x.identifier} with output {[c.address for c in x.output_concepts]}, all {[z.identifier for z in datasources]}"
248
- )
249
235
  return final_joins
250
236
 
251
237
 
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Set, Union, Dict
1
+ from typing import List, Optional, Set, Union, Dict, Tuple
2
2
 
3
3
  from trilogy.core.env_processor import generate_graph
4
4
  from trilogy.core.graph_models import ReferenceGraph
@@ -84,19 +84,31 @@ def base_join_to_join(
84
84
 
85
85
  def generate_source_map(
86
86
  query_datasource: QueryDatasource, all_new_ctes: List[CTE]
87
- ) -> Dict[str, str | list[str]]:
87
+ ) -> Tuple[Dict[str, list[str]], Dict[str, list[str]]]:
88
88
  source_map: Dict[str, list[str]] = defaultdict(list)
89
89
  # now populate anything derived in this level
90
90
  for qdk, qdv in query_datasource.source_map.items():
91
+ unnest = [x for x in qdv if isinstance(x, UnnestJoin)]
92
+ for x in unnest:
93
+ source_map[qdk] = []
91
94
  if (
92
95
  qdk not in source_map
93
96
  and len(qdv) == 1
94
97
  and isinstance(list(qdv)[0], UnnestJoin)
95
98
  ):
96
99
  source_map[qdk] = []
100
+ basic = [x for x in qdv if isinstance(x, Datasource)]
101
+ for base in basic:
102
+ source_map[qdk].append(base.name)
97
103
 
98
- else:
99
- for cte in all_new_ctes:
104
+ ctes = [x for x in qdv if isinstance(x, QueryDatasource)]
105
+ if ctes:
106
+ names = set([x.name for x in ctes])
107
+ matches = [cte for cte in all_new_ctes if cte.source.name in names]
108
+
109
+ if not matches and names:
110
+ raise SyntaxError(query_datasource.source_map)
111
+ for cte in matches:
100
112
  output_address = [
101
113
  x.address
102
114
  for x in cte.output_columns
@@ -105,21 +117,27 @@ def generate_source_map(
105
117
  if qdk in output_address:
106
118
  source_map[qdk].append(cte.name)
107
119
  # now do a pass that accepts partials
108
- # TODO: move this into a second loop by first creationg all sub sourcdes
120
+ # TODO: move this into a second loop by first creationg all sub sources
109
121
  # then loop through this
110
- for cte in all_new_ctes:
111
- output_address = [x.address for x in cte.output_columns]
112
- if qdk in output_address:
113
- if qdk not in source_map:
114
- source_map[qdk] = [cte.name]
115
- if qdk not in source_map and not qdv:
116
- # set source to empty, as it must be derived in this element
117
- source_map[qdk] = []
122
+ for cte in matches:
123
+ if qdk not in source_map:
124
+ source_map[qdk] = [cte.name]
118
125
  if qdk not in source_map:
119
- raise ValueError(
120
- f"Missing {qdk} in {source_map}, source map {query_datasource.source_map.keys()} "
121
- )
122
- return {k: "" if not v else v for k, v in source_map.items()}
126
+ if not qdv:
127
+ source_map[qdk] = []
128
+ elif CONFIG.validate_missing:
129
+ raise ValueError(
130
+ f"Missing {qdk} in {source_map}, source map {query_datasource.source_map} "
131
+ )
132
+
133
+ # existence lookups use a separate map
134
+ # as they cannot be referenced in row resolution
135
+ existence_source_map: Dict[str, list[str]] = defaultdict(list)
136
+ for ek, ev in query_datasource.existence_source_map.items():
137
+ names = set([x.name for x in ev])
138
+ ematches = [cte.name for cte in all_new_ctes if cte.source.name in names]
139
+ existence_source_map[ek] = ematches
140
+ return {k: [] if not v else v for k, v in source_map.items()}, existence_source_map
123
141
 
124
142
 
125
143
  def datasource_to_query_datasource(datasource: Datasource) -> QueryDatasource:
@@ -158,6 +176,52 @@ def generate_cte_name(full_name: str, name_map: dict[str, str]) -> str:
158
176
  return full_name.replace("<", "").replace(">", "").replace(",", "_")
159
177
 
160
178
 
179
+ def resolve_cte_base_name_and_alias(
180
+ name: str,
181
+ source: QueryDatasource,
182
+ parents: List[CTE],
183
+ joins: List[Join | InstantiatedUnnestJoin],
184
+ ) -> Tuple[str | None, str | None]:
185
+
186
+ valid_joins: List[Join] = [join for join in joins if isinstance(join, Join)]
187
+ relevant_parent_sources = set()
188
+ for k, v in source.source_map.items():
189
+ if v:
190
+ relevant_parent_sources.update(v)
191
+ eligible = [x for x in source.datasources if x in relevant_parent_sources]
192
+ if (
193
+ len(eligible) == 1
194
+ and isinstance(eligible[0], Datasource)
195
+ and not eligible[0].name == CONSTANT_DATASET
196
+ ):
197
+ ds = eligible[0]
198
+ return ds.safe_location, ds.identifier
199
+
200
+ # if we have multiple joined CTEs, pick the base
201
+ # as the root
202
+ elif len(eligible) == 1 and len(parents) == 1:
203
+ return parents[0].name, parents[0].name
204
+ elif valid_joins and len(valid_joins) > 0:
205
+ candidates = [x.left_cte.name for x in valid_joins]
206
+ disallowed = [x.right_cte.name for x in valid_joins]
207
+ try:
208
+ cte = [y for y in candidates if y not in disallowed][0]
209
+ return cte, cte
210
+ except IndexError:
211
+ raise SyntaxError(
212
+ f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in parents]}"
213
+ )
214
+ elif eligible:
215
+ matched = [x for x in parents if x.source.name == eligible[0].name]
216
+ if matched:
217
+ return matched[0].name, matched[0].name
218
+
219
+ logger.info(
220
+ f"Could not determine CTE base name for {name} with relevant sources {relevant_parent_sources}"
221
+ )
222
+ return None, None
223
+
224
+
161
225
  def datasource_to_ctes(
162
226
  query_datasource: QueryDatasource, name_map: dict[str, str]
163
227
  ) -> List[CTE]:
@@ -176,7 +240,8 @@ def datasource_to_ctes(
176
240
  sub_cte = datasource_to_ctes(sub_datasource, name_map)
177
241
  parents += sub_cte
178
242
  all_new_ctes += sub_cte
179
- source_map = generate_source_map(query_datasource, all_new_ctes)
243
+ source_map, existence_map = generate_source_map(query_datasource, all_new_ctes)
244
+
180
245
  else:
181
246
  # source is the first datasource of the query datasource
182
247
  source = query_datasource.datasources[0]
@@ -184,13 +249,27 @@ def datasource_to_ctes(
184
249
  # render properly on initial access; since they have
185
250
  # no actual source
186
251
  if source.full_name == DEFAULT_NAMESPACE + "_" + CONSTANT_DATASET:
187
- source_map = {k: "" for k in query_datasource.source_map}
252
+ source_map = {k: [] for k in query_datasource.source_map}
253
+ existence_map = source_map
188
254
  else:
189
255
  source_map = {
190
- k: "" if not v else source.identifier
256
+ k: [] if not v else [source.identifier]
191
257
  for k, v in query_datasource.source_map.items()
192
258
  }
259
+ existence_map = source_map
260
+
193
261
  human_id = generate_cte_name(query_datasource.full_name, name_map)
262
+ logger.info(
263
+ f"Finished building source map for {human_id} with {len(parents)} parents, have {source_map}, parent had non-empty keys {[k for k, v in query_datasource.source_map.items() if v]} "
264
+ )
265
+ final_joins = [
266
+ x
267
+ for x in [base_join_to_join(join, parents) for join in query_datasource.joins]
268
+ if x
269
+ ]
270
+ base_name, base_alias = resolve_cte_base_name_and_alias(
271
+ human_id, query_datasource, parents, final_joins
272
+ )
194
273
  cte = CTE(
195
274
  name=human_id,
196
275
  source=query_datasource,
@@ -200,14 +279,9 @@ def datasource_to_ctes(
200
279
  for c in query_datasource.output_concepts
201
280
  ],
202
281
  source_map=source_map,
282
+ existence_source_map=existence_map,
203
283
  # related columns include all referenced columns, such as filtering
204
- joins=[
205
- x
206
- for x in [
207
- base_join_to_join(join, parents) for join in query_datasource.joins
208
- ]
209
- if x
210
- ],
284
+ joins=final_joins,
211
285
  grain=query_datasource.grain,
212
286
  group_to_grain=query_datasource.group_required,
213
287
  # we restrict parent_ctes to one level
@@ -217,11 +291,13 @@ def datasource_to_ctes(
217
291
  partial_concepts=query_datasource.partial_concepts,
218
292
  join_derived_concepts=query_datasource.join_derived_concepts,
219
293
  hidden_concepts=query_datasource.hidden_concepts,
294
+ base_name_override=base_name,
295
+ base_alias_override=base_alias,
220
296
  )
221
297
  if cte.grain != query_datasource.grain:
222
298
  raise ValueError("Grain was corrupted in CTE generation")
223
299
  for x in cte.output_columns:
224
- if x.address not in cte.source_map:
300
+ if x.address not in cte.source_map and CONFIG.validate_missing:
225
301
  raise ValueError(
226
302
  f"Missing {x.address} in {cte.source_map}, source map {cte.source.source_map.keys()} "
227
303
  )
@@ -243,12 +319,32 @@ def get_query_datasources(
243
319
  if not statement.output_components:
244
320
  raise ValueError(f"Statement has no output components {statement}")
245
321
  ds = source_query_concepts(
246
- statement.output_components, environment=environment, g=graph
322
+ statement.output_components,
323
+ environment=environment,
324
+ g=graph,
247
325
  )
248
326
  if hooks:
249
327
  for hook in hooks:
250
328
  hook.process_root_strategy_node(ds)
251
329
  final_qds = ds.resolve()
330
+
331
+ # we if we have a where clause doing an existence check
332
+ # treat that as separate subquery
333
+ if (where := statement.where_clause) and where.existence_arguments:
334
+ for subselect in where.existence_arguments:
335
+ if not subselect:
336
+ continue
337
+ logger.info(
338
+ f"{LOGGER_PREFIX} fetching existance clause inputs {[str(c) for c in subselect]}"
339
+ )
340
+ eds = source_query_concepts([*subselect], environment=environment, g=graph)
341
+
342
+ final_eds = eds.resolve()
343
+ first_parent = final_qds
344
+ first_parent.datasources.append(final_eds)
345
+ for x in final_eds.output_concepts:
346
+ if x.address not in first_parent.existence_source_map:
347
+ first_parent.existence_source_map[x.address] = {final_eds}
252
348
  return final_qds
253
349
 
254
350
 
trilogy/dialect/base.py CHANGED
@@ -193,27 +193,6 @@ ORDER BY {% for order in order_by %}
193
193
  )
194
194
 
195
195
 
196
- def check_lineage(c: Concept, cte: CTE) -> bool:
197
- checks = []
198
- if not c.lineage:
199
- return True
200
- for sub_c in c.lineage.concept_arguments:
201
- if not isinstance(sub_c, Concept):
202
- continue
203
- if sub_c.address in cte.source_map or (
204
- sub_c.lineage and check_lineage(sub_c, cte)
205
- ):
206
- checks.append(True)
207
- else:
208
- logger.debug(
209
- f"{LOGGER_PREFIX} [{sub_c.address}] not found in source map for"
210
- f" {cte.name}, have cte keys {[c for c in cte.source_map.keys()]} and"
211
- f" datasource keys {[c for c in cte.source.source_map.keys()]}"
212
- )
213
- checks.append(False)
214
- return all(checks)
215
-
216
-
217
196
  def safe_quote(string: str, quote_char: str):
218
197
  # split dotted identifiers
219
198
  # TODO: evaluate if we need smarter parsing for strings that could actually include .
@@ -259,7 +238,7 @@ class BaseDialect:
259
238
  f"{LOGGER_PREFIX} [{c.address}] Starting rendering loop on cte: {cte.name}"
260
239
  )
261
240
 
262
- if c.lineage and cte.source_map.get(c.address, "") == "":
241
+ if c.lineage and cte.source_map.get(c.address, []) == []:
263
242
  logger.debug(
264
243
  f"{LOGGER_PREFIX} [{c.address}] rendering concept with lineage that is not already existing"
265
244
  )
@@ -273,7 +252,11 @@ class BaseDialect:
273
252
  ]
274
253
  rval = f"{self.WINDOW_FUNCTION_MAP[c.lineage.type](concept = self.render_concept_sql(c.lineage.content, cte=cte, alias=False), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
275
254
  elif isinstance(c.lineage, FilterItem):
276
- rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_concept_sql(c.lineage.content, cte=cte, alias=False)} ELSE NULL END"
255
+ # for cases when we've optimized this
256
+ if len(cte.output_columns) == 1:
257
+ rval = self.render_expr(c.lineage.content, cte=cte)
258
+ else:
259
+ rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_concept_sql(c.lineage.content, cte=cte, alias=False)} ELSE NULL END"
277
260
  elif isinstance(c.lineage, RowsetItem):
278
261
  rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False)}"
279
262
  elif isinstance(c.lineage, MultiSelectStatement):
@@ -356,17 +339,28 @@ class BaseDialect:
356
339
  cte: Optional[CTE] = None,
357
340
  cte_map: Optional[Dict[str, CTE]] = None,
358
341
  ) -> str:
359
- # if isinstance(e, Concept):
360
- # cte = cte or cte_map.get(e.address, None)
361
342
 
362
343
  if isinstance(e, SubselectComparison):
363
- assert cte, "Subselects must be rendered with a CTE in context"
344
+
364
345
  if isinstance(e.right, Concept):
365
- return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {self.render_expr(e.right, cte=cte, cte_map=cte_map)} from {cte.source_map[e.right.address][0]})"
346
+ # we won't always have an existnce map
347
+ # so fall back to the normal map
348
+ lookup_cte = cte
349
+ if cte_map and not lookup_cte:
350
+ lookup_cte = cte_map.get(e.right.address)
351
+ assert lookup_cte, "Subselects must be rendered with a CTE in context"
352
+ if e.right.address not in lookup_cte.existence_source_map:
353
+ lookup = lookup_cte.source_map[e.right.address]
354
+ else:
355
+ lookup = lookup_cte.existence_source_map[e.right.address]
356
+
357
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {lookup[0]}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {lookup[0]})"
358
+ elif isinstance(e.right, (ListWrapper, Parenthetical)):
359
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
360
+ elif isinstance(e.right, (str, int, bool, float, list)):
361
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} ({self.render_expr(e.right, cte=cte, cte_map=cte_map)})"
366
362
  else:
367
- raise NotImplementedError(
368
- f"Subselects must be a concept, got {e.right}"
369
- )
363
+ return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} ({self.render_expr(e.right, cte=cte, cte_map=cte_map)})"
370
364
  elif isinstance(e, Comparison):
371
365
  return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
372
366
  elif isinstance(e, Conditional):
@@ -449,15 +443,15 @@ class BaseDialect:
449
443
  for c in cte.output_columns
450
444
  if c.address not in [y.address for y in cte.hidden_concepts]
451
445
  ]
446
+ if cte.base_name == cte.base_alias:
447
+ source = cte.base_name
448
+ else:
449
+ source = f"{cte.base_name} as {cte.base_alias}"
452
450
  return CompiledCTE(
453
451
  name=cte.name,
454
452
  statement=self.SQL_TEMPLATE.render(
455
453
  select_columns=select_columns,
456
- base=(
457
- f"{cte.base_name} as {cte.base_alias}"
458
- if cte.render_from_clause
459
- else None
460
- ),
454
+ base=(f"{source}" if cte.render_from_clause else None),
461
455
  grain=cte.grain,
462
456
  limit=cte.limit,
463
457
  # some joins may not need to be rendered
@@ -513,7 +507,7 @@ class BaseDialect:
513
507
  c
514
508
  for c in cte.output_columns
515
509
  if c.purpose == Purpose.CONSTANT
516
- and cte.source_map[c.address] != ""
510
+ and cte.source_map[c.address] != []
517
511
  ],
518
512
  "address",
519
513
  )
@@ -639,7 +633,7 @@ class BaseDialect:
639
633
  filter = set(
640
634
  [
641
635
  str(x.address)
642
- for x in query.where_clause.concept_arguments
636
+ for x in query.where_clause.row_arguments
643
637
  if not x.derivation == PurposeLineage.CONSTANT
644
638
  ]
645
639
  )
@@ -650,10 +644,21 @@ class BaseDialect:
650
644
 
651
645
  if not found:
652
646
  raise NotImplementedError(
653
- f"Cannot generate query with filtering on grain {filter} that is"
654
- f" not a subset of the query output grain {query_output}. Use a"
655
- " filtered concept instead."
647
+ f"Cannot generate query with filtering on row arguments {filter} that is"
648
+ f" not a subset of the query output grain {query_output}. Try a"
649
+ " filtered concept instead, or include it in the select clause"
656
650
  )
651
+ for ex_set in query.where_clause.existence_arguments:
652
+ for c in ex_set:
653
+ if c.address not in cte_output_map:
654
+ cts = [
655
+ ct
656
+ for ct in query.ctes
657
+ if ct.name in query.base.existence_source_map[c.address]
658
+ ]
659
+ if not cts:
660
+ raise ValueError(query.base.existence_source_map[c.address])
661
+ cte_output_map[c.address] = cts[0]
657
662
 
658
663
  compiled_ctes = self.generate_ctes(query)
659
664