pytrilogy 0.0.1.109__py3-none-any.whl → 0.0.1.111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/RECORD +34 -34
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +11 -3
- trilogy/core/enums.py +1 -0
- trilogy/core/models.py +94 -67
- trilogy/core/optimization.py +134 -12
- trilogy/core/processing/concept_strategies_v3.py +44 -19
- trilogy/core/processing/node_generators/basic_node.py +2 -0
- trilogy/core/processing/node_generators/common.py +3 -1
- trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
- trilogy/core/processing/node_generators/filter_node.py +36 -6
- trilogy/core/processing/node_generators/node_merge_node.py +34 -23
- trilogy/core/processing/node_generators/rowset_node.py +37 -8
- trilogy/core/processing/node_generators/select_node.py +23 -9
- trilogy/core/processing/node_generators/unnest_node.py +24 -3
- trilogy/core/processing/node_generators/window_node.py +4 -2
- trilogy/core/processing/nodes/__init__.py +7 -6
- trilogy/core/processing/nodes/base_node.py +40 -6
- trilogy/core/processing/nodes/filter_node.py +15 -1
- trilogy/core/processing/nodes/group_node.py +20 -1
- trilogy/core/processing/nodes/merge_node.py +37 -10
- trilogy/core/processing/nodes/select_node_v2.py +34 -39
- trilogy/core/processing/nodes/unnest_node.py +12 -0
- trilogy/core/processing/nodes/window_node.py +11 -0
- trilogy/core/processing/utility.py +0 -14
- trilogy/core/query_processor.py +125 -29
- trilogy/dialect/base.py +45 -40
- trilogy/executor.py +31 -3
- trilogy/parsing/parse_engine.py +49 -17
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.1.109.dist-info → pytrilogy-0.0.1.111.dist-info}/top_level.txt +0 -0
|
@@ -12,6 +12,8 @@ from trilogy.core.models import (
|
|
|
12
12
|
Concept,
|
|
13
13
|
UnnestJoin,
|
|
14
14
|
Conditional,
|
|
15
|
+
Comparison,
|
|
16
|
+
Parenthetical,
|
|
15
17
|
)
|
|
16
18
|
from trilogy.utility import unique
|
|
17
19
|
from trilogy.core.processing.nodes.base_node import (
|
|
@@ -103,8 +105,9 @@ class MergeNode(StrategyNode):
|
|
|
103
105
|
force_group: bool | None = None,
|
|
104
106
|
depth: int = 0,
|
|
105
107
|
grain: Grain | None = None,
|
|
106
|
-
conditions: Conditional | None = None,
|
|
108
|
+
conditions: Conditional | Comparison | Parenthetical | None = None,
|
|
107
109
|
hidden_concepts: List[Concept] | None = None,
|
|
110
|
+
virtual_output_concepts: List[Concept] | None = None,
|
|
108
111
|
):
|
|
109
112
|
super().__init__(
|
|
110
113
|
input_concepts=input_concepts,
|
|
@@ -119,10 +122,12 @@ class MergeNode(StrategyNode):
|
|
|
119
122
|
grain=grain,
|
|
120
123
|
conditions=conditions,
|
|
121
124
|
hidden_concepts=hidden_concepts,
|
|
125
|
+
virtual_output_concepts=virtual_output_concepts,
|
|
122
126
|
)
|
|
123
127
|
self.join_concepts = join_concepts
|
|
124
128
|
self.force_join_type = force_join_type
|
|
125
129
|
self.node_joins = node_joins
|
|
130
|
+
|
|
126
131
|
final_joins = []
|
|
127
132
|
if self.node_joins:
|
|
128
133
|
for join in self.node_joins:
|
|
@@ -282,6 +287,7 @@ class MergeNode(StrategyNode):
|
|
|
282
287
|
if c.address in [x.address for x in self.output_concepts]
|
|
283
288
|
]
|
|
284
289
|
)
|
|
290
|
+
|
|
285
291
|
logger.info(
|
|
286
292
|
f"{self.logging_prefix}{LOGGER_PREFIX} has pre grain {pregrain} and final merge node grain {grain}"
|
|
287
293
|
)
|
|
@@ -307,24 +313,25 @@ class MergeNode(StrategyNode):
|
|
|
307
313
|
f"{self.logging_prefix}{LOGGER_PREFIX} no parents include full grain {grain} and pregrain {pregrain} does not match, assume must group to grain. Have {[str(d.grain) for d in final_datasets]}"
|
|
308
314
|
)
|
|
309
315
|
force_group = True
|
|
310
|
-
# Grain<returns.customer.id,returns.store.id,returns.item.id,returns.store_sales.ticket_number>
|
|
311
|
-
# Grain<returns.customer.id,returns.store.id,returns.return_date.id,returns.item.id,returns.store_sales.ticket_number>
|
|
312
|
-
# Grain<returns.customer.id,returns.store.id,returns.item.id,returns.store_sales.ticket_number>
|
|
313
316
|
else:
|
|
314
317
|
force_group = None
|
|
315
318
|
|
|
316
319
|
qd_joins: List[BaseJoin | UnnestJoin] = [*joins]
|
|
320
|
+
source_map = resolve_concept_map(
|
|
321
|
+
parent_sources,
|
|
322
|
+
targets=self.output_concepts,
|
|
323
|
+
inherited_inputs=self.input_concepts + self.existence_concepts,
|
|
324
|
+
full_joins=full_join_concepts,
|
|
325
|
+
)
|
|
326
|
+
logger.info(
|
|
327
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} source_map {str(source_map)}"
|
|
328
|
+
)
|
|
317
329
|
qds = QueryDatasource(
|
|
318
330
|
input_concepts=unique(self.input_concepts, "address"),
|
|
319
331
|
output_concepts=unique(self.output_concepts, "address"),
|
|
320
332
|
datasources=final_datasets,
|
|
321
333
|
source_type=self.source_type,
|
|
322
|
-
source_map=
|
|
323
|
-
parent_sources,
|
|
324
|
-
self.output_concepts,
|
|
325
|
-
self.input_concepts,
|
|
326
|
-
full_joins=full_join_concepts,
|
|
327
|
-
),
|
|
334
|
+
source_map=source_map,
|
|
328
335
|
joins=qd_joins,
|
|
329
336
|
grain=grain,
|
|
330
337
|
partial_concepts=self.partial_concepts,
|
|
@@ -333,3 +340,23 @@ class MergeNode(StrategyNode):
|
|
|
333
340
|
hidden_concepts=self.hidden_concepts,
|
|
334
341
|
)
|
|
335
342
|
return qds
|
|
343
|
+
|
|
344
|
+
def copy(self) -> "MergeNode":
|
|
345
|
+
return MergeNode(
|
|
346
|
+
input_concepts=list(self.input_concepts),
|
|
347
|
+
output_concepts=list(self.output_concepts),
|
|
348
|
+
environment=self.environment,
|
|
349
|
+
g=self.g,
|
|
350
|
+
whole_grain=self.whole_grain,
|
|
351
|
+
parents=self.parents,
|
|
352
|
+
depth=self.depth,
|
|
353
|
+
partial_concepts=list(self.partial_concepts),
|
|
354
|
+
force_group=self.force_group,
|
|
355
|
+
grain=self.grain,
|
|
356
|
+
conditions=self.conditions,
|
|
357
|
+
hidden_concepts=list(self.hidden_concepts),
|
|
358
|
+
virtual_output_concepts=list(self.virtual_output_concepts),
|
|
359
|
+
node_joins=self.node_joins,
|
|
360
|
+
join_concepts=list(self.join_concepts) if self.join_concepts else None,
|
|
361
|
+
force_join_type=self.force_join_type,
|
|
362
|
+
)
|
|
@@ -5,14 +5,14 @@ from trilogy.constants import logger
|
|
|
5
5
|
from trilogy.core.constants import CONSTANT_DATASET
|
|
6
6
|
from trilogy.core.enums import Purpose, PurposeLineage
|
|
7
7
|
from trilogy.core.models import (
|
|
8
|
-
|
|
8
|
+
Function,
|
|
9
|
+
Grain,
|
|
9
10
|
QueryDatasource,
|
|
10
11
|
SourceType,
|
|
11
|
-
Environment,
|
|
12
12
|
Concept,
|
|
13
|
-
|
|
14
|
-
Function,
|
|
13
|
+
Environment,
|
|
15
14
|
UnnestJoin,
|
|
15
|
+
Datasource,
|
|
16
16
|
)
|
|
17
17
|
from trilogy.utility import unique
|
|
18
18
|
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
@@ -22,39 +22,6 @@ from trilogy.core.exceptions import NoDatasourceException
|
|
|
22
22
|
LOGGER_PREFIX = "[CONCEPT DETAIL - SELECT NODE]"
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class StaticSelectNode(StrategyNode):
|
|
26
|
-
"""Static select nodes."""
|
|
27
|
-
|
|
28
|
-
source_type = SourceType.SELECT
|
|
29
|
-
|
|
30
|
-
def __init__(
|
|
31
|
-
self,
|
|
32
|
-
input_concepts: List[Concept],
|
|
33
|
-
output_concepts: List[Concept],
|
|
34
|
-
environment: Environment,
|
|
35
|
-
g,
|
|
36
|
-
datasource: QueryDatasource,
|
|
37
|
-
depth: int = 0,
|
|
38
|
-
partial_concepts: List[Concept] | None = None,
|
|
39
|
-
):
|
|
40
|
-
super().__init__(
|
|
41
|
-
input_concepts=input_concepts,
|
|
42
|
-
output_concepts=output_concepts,
|
|
43
|
-
environment=environment,
|
|
44
|
-
g=g,
|
|
45
|
-
whole_grain=True,
|
|
46
|
-
parents=[],
|
|
47
|
-
depth=depth,
|
|
48
|
-
partial_concepts=partial_concepts,
|
|
49
|
-
)
|
|
50
|
-
self.datasource = datasource
|
|
51
|
-
|
|
52
|
-
def _resolve(self):
|
|
53
|
-
if self.datasource.grain == Grain():
|
|
54
|
-
raise NotImplementedError
|
|
55
|
-
return self.datasource
|
|
56
|
-
|
|
57
|
-
|
|
58
25
|
class SelectNode(StrategyNode):
|
|
59
26
|
"""Select nodes actually fetch raw data from a table
|
|
60
27
|
Responsible for selecting the cheapest option from which to select.
|
|
@@ -75,7 +42,7 @@ class SelectNode(StrategyNode):
|
|
|
75
42
|
partial_concepts: List[Concept] | None = None,
|
|
76
43
|
accept_partial: bool = False,
|
|
77
44
|
grain: Optional[Grain] = None,
|
|
78
|
-
force_group: bool = False,
|
|
45
|
+
force_group: bool | None = False,
|
|
79
46
|
):
|
|
80
47
|
super().__init__(
|
|
81
48
|
input_concepts=input_concepts,
|
|
@@ -119,6 +86,7 @@ class SelectNode(StrategyNode):
|
|
|
119
86
|
if x.address not in source_map and x.derivation in (
|
|
120
87
|
PurposeLineage.MULTISELECT,
|
|
121
88
|
PurposeLineage.MERGE,
|
|
89
|
+
PurposeLineage.FILTER,
|
|
122
90
|
):
|
|
123
91
|
source_map[x.address] = set()
|
|
124
92
|
|
|
@@ -140,6 +108,7 @@ class SelectNode(StrategyNode):
|
|
|
140
108
|
c.concept for c in datasource.columns if not c.is_complete
|
|
141
109
|
],
|
|
142
110
|
source_type=SourceType.DIRECT_SELECT,
|
|
111
|
+
condition=self.conditions,
|
|
143
112
|
# select nodes should never group
|
|
144
113
|
force_group=self.force_group,
|
|
145
114
|
)
|
|
@@ -154,6 +123,7 @@ class SelectNode(StrategyNode):
|
|
|
154
123
|
source_map={concept.address: set() for concept in self.all_concepts},
|
|
155
124
|
datasources=[datasource],
|
|
156
125
|
grain=datasource.grain,
|
|
126
|
+
condition=self.conditions,
|
|
157
127
|
joins=[],
|
|
158
128
|
partial_concepts=[],
|
|
159
129
|
source_type=SourceType.CONSTANT,
|
|
@@ -191,8 +161,33 @@ class SelectNode(StrategyNode):
|
|
|
191
161
|
f"Could not find any way to associate required concepts {required}"
|
|
192
162
|
)
|
|
193
163
|
|
|
164
|
+
def copy(self) -> "SelectNode":
|
|
165
|
+
return SelectNode(
|
|
166
|
+
input_concepts=list(self.input_concepts),
|
|
167
|
+
output_concepts=list(self.output_concepts),
|
|
168
|
+
environment=self.environment,
|
|
169
|
+
g=self.g,
|
|
170
|
+
datasource=self.datasource,
|
|
171
|
+
depth=self.depth,
|
|
172
|
+
parents=self.parents,
|
|
173
|
+
whole_grain=self.whole_grain,
|
|
174
|
+
partial_concepts=list(self.partial_concepts),
|
|
175
|
+
accept_partial=self.accept_partial,
|
|
176
|
+
grain=self.grain,
|
|
177
|
+
force_group=self.force_group,
|
|
178
|
+
)
|
|
179
|
+
|
|
194
180
|
|
|
195
181
|
class ConstantNode(SelectNode):
|
|
196
182
|
"""Represents a constant value."""
|
|
197
183
|
|
|
198
|
-
|
|
184
|
+
def copy(self) -> "ConstantNode":
|
|
185
|
+
return ConstantNode(
|
|
186
|
+
input_concepts=list(self.input_concepts),
|
|
187
|
+
output_concepts=list(self.output_concepts),
|
|
188
|
+
environment=self.environment,
|
|
189
|
+
g=self.g,
|
|
190
|
+
datasource=self.datasource,
|
|
191
|
+
depth=self.depth,
|
|
192
|
+
partial_concepts=list(self.partial_concepts),
|
|
193
|
+
)
|
|
@@ -52,3 +52,15 @@ class UnnestNode(StrategyNode):
|
|
|
52
52
|
base.source_map[self.unnest_concept.address] = {unnest}
|
|
53
53
|
base.join_derived_concepts = [self.unnest_concept]
|
|
54
54
|
return base
|
|
55
|
+
|
|
56
|
+
def copy(self) -> "UnnestNode":
|
|
57
|
+
return UnnestNode(
|
|
58
|
+
unnest_concept=self.unnest_concept,
|
|
59
|
+
input_concepts=list(self.input_concepts),
|
|
60
|
+
output_concepts=list(self.output_concepts),
|
|
61
|
+
environment=self.environment,
|
|
62
|
+
g=self.g,
|
|
63
|
+
whole_grain=self.whole_grain,
|
|
64
|
+
parents=self.parents,
|
|
65
|
+
depth=self.depth,
|
|
66
|
+
)
|
|
@@ -32,3 +32,14 @@ class WindowNode(StrategyNode):
|
|
|
32
32
|
base = super()._resolve()
|
|
33
33
|
base.grain = Grain(components=self.input_concepts)
|
|
34
34
|
return base
|
|
35
|
+
|
|
36
|
+
def copy(self) -> "WindowNode":
|
|
37
|
+
return WindowNode(
|
|
38
|
+
input_concepts=list(self.input_concepts),
|
|
39
|
+
output_concepts=list(self.output_concepts),
|
|
40
|
+
environment=self.environment,
|
|
41
|
+
g=self.g,
|
|
42
|
+
whole_grain=self.whole_grain,
|
|
43
|
+
parents=self.parents,
|
|
44
|
+
depth=self.depth,
|
|
45
|
+
)
|
|
@@ -232,20 +232,6 @@ def get_node_joins(
|
|
|
232
232
|
raise SyntaxError(
|
|
233
233
|
f"Could not find join for {x.identifier} with output {[c.address for c in x.output_concepts]}, all {[z.identifier for z in datasources]}"
|
|
234
234
|
)
|
|
235
|
-
single_row = [x for x in datasources if x.grain.abstract]
|
|
236
|
-
for x in single_row:
|
|
237
|
-
for join in final_joins:
|
|
238
|
-
found = False
|
|
239
|
-
for join in final_joins:
|
|
240
|
-
if (
|
|
241
|
-
join.left_datasource.identifier == x.identifier
|
|
242
|
-
or join.right_datasource.identifier == x.identifier
|
|
243
|
-
):
|
|
244
|
-
found = True
|
|
245
|
-
if not found:
|
|
246
|
-
raise SyntaxError(
|
|
247
|
-
f"Could not find join for {x.identifier} with output {[c.address for c in x.output_concepts]}, all {[z.identifier for z in datasources]}"
|
|
248
|
-
)
|
|
249
235
|
return final_joins
|
|
250
236
|
|
|
251
237
|
|
trilogy/core/query_processor.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Optional, Set, Union, Dict
|
|
1
|
+
from typing import List, Optional, Set, Union, Dict, Tuple
|
|
2
2
|
|
|
3
3
|
from trilogy.core.env_processor import generate_graph
|
|
4
4
|
from trilogy.core.graph_models import ReferenceGraph
|
|
@@ -84,19 +84,31 @@ def base_join_to_join(
|
|
|
84
84
|
|
|
85
85
|
def generate_source_map(
|
|
86
86
|
query_datasource: QueryDatasource, all_new_ctes: List[CTE]
|
|
87
|
-
) -> Dict[str, str
|
|
87
|
+
) -> Tuple[Dict[str, list[str]], Dict[str, list[str]]]:
|
|
88
88
|
source_map: Dict[str, list[str]] = defaultdict(list)
|
|
89
89
|
# now populate anything derived in this level
|
|
90
90
|
for qdk, qdv in query_datasource.source_map.items():
|
|
91
|
+
unnest = [x for x in qdv if isinstance(x, UnnestJoin)]
|
|
92
|
+
for x in unnest:
|
|
93
|
+
source_map[qdk] = []
|
|
91
94
|
if (
|
|
92
95
|
qdk not in source_map
|
|
93
96
|
and len(qdv) == 1
|
|
94
97
|
and isinstance(list(qdv)[0], UnnestJoin)
|
|
95
98
|
):
|
|
96
99
|
source_map[qdk] = []
|
|
100
|
+
basic = [x for x in qdv if isinstance(x, Datasource)]
|
|
101
|
+
for base in basic:
|
|
102
|
+
source_map[qdk].append(base.name)
|
|
97
103
|
|
|
98
|
-
|
|
99
|
-
|
|
104
|
+
ctes = [x for x in qdv if isinstance(x, QueryDatasource)]
|
|
105
|
+
if ctes:
|
|
106
|
+
names = set([x.name for x in ctes])
|
|
107
|
+
matches = [cte for cte in all_new_ctes if cte.source.name in names]
|
|
108
|
+
|
|
109
|
+
if not matches and names:
|
|
110
|
+
raise SyntaxError(query_datasource.source_map)
|
|
111
|
+
for cte in matches:
|
|
100
112
|
output_address = [
|
|
101
113
|
x.address
|
|
102
114
|
for x in cte.output_columns
|
|
@@ -105,21 +117,27 @@ def generate_source_map(
|
|
|
105
117
|
if qdk in output_address:
|
|
106
118
|
source_map[qdk].append(cte.name)
|
|
107
119
|
# now do a pass that accepts partials
|
|
108
|
-
# TODO: move this into a second loop by first creationg all sub
|
|
120
|
+
# TODO: move this into a second loop by first creationg all sub sources
|
|
109
121
|
# then loop through this
|
|
110
|
-
for cte in
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
if qdk not in source_map:
|
|
114
|
-
source_map[qdk] = [cte.name]
|
|
115
|
-
if qdk not in source_map and not qdv:
|
|
116
|
-
# set source to empty, as it must be derived in this element
|
|
117
|
-
source_map[qdk] = []
|
|
122
|
+
for cte in matches:
|
|
123
|
+
if qdk not in source_map:
|
|
124
|
+
source_map[qdk] = [cte.name]
|
|
118
125
|
if qdk not in source_map:
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
126
|
+
if not qdv:
|
|
127
|
+
source_map[qdk] = []
|
|
128
|
+
elif CONFIG.validate_missing:
|
|
129
|
+
raise ValueError(
|
|
130
|
+
f"Missing {qdk} in {source_map}, source map {query_datasource.source_map} "
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# existence lookups use a separate map
|
|
134
|
+
# as they cannot be referenced in row resolution
|
|
135
|
+
existence_source_map: Dict[str, list[str]] = defaultdict(list)
|
|
136
|
+
for ek, ev in query_datasource.existence_source_map.items():
|
|
137
|
+
names = set([x.name for x in ev])
|
|
138
|
+
ematches = [cte.name for cte in all_new_ctes if cte.source.name in names]
|
|
139
|
+
existence_source_map[ek] = ematches
|
|
140
|
+
return {k: [] if not v else v for k, v in source_map.items()}, existence_source_map
|
|
123
141
|
|
|
124
142
|
|
|
125
143
|
def datasource_to_query_datasource(datasource: Datasource) -> QueryDatasource:
|
|
@@ -158,6 +176,52 @@ def generate_cte_name(full_name: str, name_map: dict[str, str]) -> str:
|
|
|
158
176
|
return full_name.replace("<", "").replace(">", "").replace(",", "_")
|
|
159
177
|
|
|
160
178
|
|
|
179
|
+
def resolve_cte_base_name_and_alias(
|
|
180
|
+
name: str,
|
|
181
|
+
source: QueryDatasource,
|
|
182
|
+
parents: List[CTE],
|
|
183
|
+
joins: List[Join | InstantiatedUnnestJoin],
|
|
184
|
+
) -> Tuple[str | None, str | None]:
|
|
185
|
+
|
|
186
|
+
valid_joins: List[Join] = [join for join in joins if isinstance(join, Join)]
|
|
187
|
+
relevant_parent_sources = set()
|
|
188
|
+
for k, v in source.source_map.items():
|
|
189
|
+
if v:
|
|
190
|
+
relevant_parent_sources.update(v)
|
|
191
|
+
eligible = [x for x in source.datasources if x in relevant_parent_sources]
|
|
192
|
+
if (
|
|
193
|
+
len(eligible) == 1
|
|
194
|
+
and isinstance(eligible[0], Datasource)
|
|
195
|
+
and not eligible[0].name == CONSTANT_DATASET
|
|
196
|
+
):
|
|
197
|
+
ds = eligible[0]
|
|
198
|
+
return ds.safe_location, ds.identifier
|
|
199
|
+
|
|
200
|
+
# if we have multiple joined CTEs, pick the base
|
|
201
|
+
# as the root
|
|
202
|
+
elif len(eligible) == 1 and len(parents) == 1:
|
|
203
|
+
return parents[0].name, parents[0].name
|
|
204
|
+
elif valid_joins and len(valid_joins) > 0:
|
|
205
|
+
candidates = [x.left_cte.name for x in valid_joins]
|
|
206
|
+
disallowed = [x.right_cte.name for x in valid_joins]
|
|
207
|
+
try:
|
|
208
|
+
cte = [y for y in candidates if y not in disallowed][0]
|
|
209
|
+
return cte, cte
|
|
210
|
+
except IndexError:
|
|
211
|
+
raise SyntaxError(
|
|
212
|
+
f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in parents]}"
|
|
213
|
+
)
|
|
214
|
+
elif eligible:
|
|
215
|
+
matched = [x for x in parents if x.source.name == eligible[0].name]
|
|
216
|
+
if matched:
|
|
217
|
+
return matched[0].name, matched[0].name
|
|
218
|
+
|
|
219
|
+
logger.info(
|
|
220
|
+
f"Could not determine CTE base name for {name} with relevant sources {relevant_parent_sources}"
|
|
221
|
+
)
|
|
222
|
+
return None, None
|
|
223
|
+
|
|
224
|
+
|
|
161
225
|
def datasource_to_ctes(
|
|
162
226
|
query_datasource: QueryDatasource, name_map: dict[str, str]
|
|
163
227
|
) -> List[CTE]:
|
|
@@ -176,7 +240,8 @@ def datasource_to_ctes(
|
|
|
176
240
|
sub_cte = datasource_to_ctes(sub_datasource, name_map)
|
|
177
241
|
parents += sub_cte
|
|
178
242
|
all_new_ctes += sub_cte
|
|
179
|
-
source_map = generate_source_map(query_datasource, all_new_ctes)
|
|
243
|
+
source_map, existence_map = generate_source_map(query_datasource, all_new_ctes)
|
|
244
|
+
|
|
180
245
|
else:
|
|
181
246
|
# source is the first datasource of the query datasource
|
|
182
247
|
source = query_datasource.datasources[0]
|
|
@@ -184,13 +249,27 @@ def datasource_to_ctes(
|
|
|
184
249
|
# render properly on initial access; since they have
|
|
185
250
|
# no actual source
|
|
186
251
|
if source.full_name == DEFAULT_NAMESPACE + "_" + CONSTANT_DATASET:
|
|
187
|
-
source_map = {k:
|
|
252
|
+
source_map = {k: [] for k in query_datasource.source_map}
|
|
253
|
+
existence_map = source_map
|
|
188
254
|
else:
|
|
189
255
|
source_map = {
|
|
190
|
-
k:
|
|
256
|
+
k: [] if not v else [source.identifier]
|
|
191
257
|
for k, v in query_datasource.source_map.items()
|
|
192
258
|
}
|
|
259
|
+
existence_map = source_map
|
|
260
|
+
|
|
193
261
|
human_id = generate_cte_name(query_datasource.full_name, name_map)
|
|
262
|
+
logger.info(
|
|
263
|
+
f"Finished building source map for {human_id} with {len(parents)} parents, have {source_map}, parent had non-empty keys {[k for k, v in query_datasource.source_map.items() if v]} "
|
|
264
|
+
)
|
|
265
|
+
final_joins = [
|
|
266
|
+
x
|
|
267
|
+
for x in [base_join_to_join(join, parents) for join in query_datasource.joins]
|
|
268
|
+
if x
|
|
269
|
+
]
|
|
270
|
+
base_name, base_alias = resolve_cte_base_name_and_alias(
|
|
271
|
+
human_id, query_datasource, parents, final_joins
|
|
272
|
+
)
|
|
194
273
|
cte = CTE(
|
|
195
274
|
name=human_id,
|
|
196
275
|
source=query_datasource,
|
|
@@ -200,14 +279,9 @@ def datasource_to_ctes(
|
|
|
200
279
|
for c in query_datasource.output_concepts
|
|
201
280
|
],
|
|
202
281
|
source_map=source_map,
|
|
282
|
+
existence_source_map=existence_map,
|
|
203
283
|
# related columns include all referenced columns, such as filtering
|
|
204
|
-
joins=
|
|
205
|
-
x
|
|
206
|
-
for x in [
|
|
207
|
-
base_join_to_join(join, parents) for join in query_datasource.joins
|
|
208
|
-
]
|
|
209
|
-
if x
|
|
210
|
-
],
|
|
284
|
+
joins=final_joins,
|
|
211
285
|
grain=query_datasource.grain,
|
|
212
286
|
group_to_grain=query_datasource.group_required,
|
|
213
287
|
# we restrict parent_ctes to one level
|
|
@@ -217,11 +291,13 @@ def datasource_to_ctes(
|
|
|
217
291
|
partial_concepts=query_datasource.partial_concepts,
|
|
218
292
|
join_derived_concepts=query_datasource.join_derived_concepts,
|
|
219
293
|
hidden_concepts=query_datasource.hidden_concepts,
|
|
294
|
+
base_name_override=base_name,
|
|
295
|
+
base_alias_override=base_alias,
|
|
220
296
|
)
|
|
221
297
|
if cte.grain != query_datasource.grain:
|
|
222
298
|
raise ValueError("Grain was corrupted in CTE generation")
|
|
223
299
|
for x in cte.output_columns:
|
|
224
|
-
if x.address not in cte.source_map:
|
|
300
|
+
if x.address not in cte.source_map and CONFIG.validate_missing:
|
|
225
301
|
raise ValueError(
|
|
226
302
|
f"Missing {x.address} in {cte.source_map}, source map {cte.source.source_map.keys()} "
|
|
227
303
|
)
|
|
@@ -243,12 +319,32 @@ def get_query_datasources(
|
|
|
243
319
|
if not statement.output_components:
|
|
244
320
|
raise ValueError(f"Statement has no output components {statement}")
|
|
245
321
|
ds = source_query_concepts(
|
|
246
|
-
statement.output_components,
|
|
322
|
+
statement.output_components,
|
|
323
|
+
environment=environment,
|
|
324
|
+
g=graph,
|
|
247
325
|
)
|
|
248
326
|
if hooks:
|
|
249
327
|
for hook in hooks:
|
|
250
328
|
hook.process_root_strategy_node(ds)
|
|
251
329
|
final_qds = ds.resolve()
|
|
330
|
+
|
|
331
|
+
# we if we have a where clause doing an existence check
|
|
332
|
+
# treat that as separate subquery
|
|
333
|
+
if (where := statement.where_clause) and where.existence_arguments:
|
|
334
|
+
for subselect in where.existence_arguments:
|
|
335
|
+
if not subselect:
|
|
336
|
+
continue
|
|
337
|
+
logger.info(
|
|
338
|
+
f"{LOGGER_PREFIX} fetching existance clause inputs {[str(c) for c in subselect]}"
|
|
339
|
+
)
|
|
340
|
+
eds = source_query_concepts([*subselect], environment=environment, g=graph)
|
|
341
|
+
|
|
342
|
+
final_eds = eds.resolve()
|
|
343
|
+
first_parent = final_qds
|
|
344
|
+
first_parent.datasources.append(final_eds)
|
|
345
|
+
for x in final_eds.output_concepts:
|
|
346
|
+
if x.address not in first_parent.existence_source_map:
|
|
347
|
+
first_parent.existence_source_map[x.address] = {final_eds}
|
|
252
348
|
return final_qds
|
|
253
349
|
|
|
254
350
|
|
trilogy/dialect/base.py
CHANGED
|
@@ -193,27 +193,6 @@ ORDER BY {% for order in order_by %}
|
|
|
193
193
|
)
|
|
194
194
|
|
|
195
195
|
|
|
196
|
-
def check_lineage(c: Concept, cte: CTE) -> bool:
|
|
197
|
-
checks = []
|
|
198
|
-
if not c.lineage:
|
|
199
|
-
return True
|
|
200
|
-
for sub_c in c.lineage.concept_arguments:
|
|
201
|
-
if not isinstance(sub_c, Concept):
|
|
202
|
-
continue
|
|
203
|
-
if sub_c.address in cte.source_map or (
|
|
204
|
-
sub_c.lineage and check_lineage(sub_c, cte)
|
|
205
|
-
):
|
|
206
|
-
checks.append(True)
|
|
207
|
-
else:
|
|
208
|
-
logger.debug(
|
|
209
|
-
f"{LOGGER_PREFIX} [{sub_c.address}] not found in source map for"
|
|
210
|
-
f" {cte.name}, have cte keys {[c for c in cte.source_map.keys()]} and"
|
|
211
|
-
f" datasource keys {[c for c in cte.source.source_map.keys()]}"
|
|
212
|
-
)
|
|
213
|
-
checks.append(False)
|
|
214
|
-
return all(checks)
|
|
215
|
-
|
|
216
|
-
|
|
217
196
|
def safe_quote(string: str, quote_char: str):
|
|
218
197
|
# split dotted identifiers
|
|
219
198
|
# TODO: evaluate if we need smarter parsing for strings that could actually include .
|
|
@@ -259,7 +238,7 @@ class BaseDialect:
|
|
|
259
238
|
f"{LOGGER_PREFIX} [{c.address}] Starting rendering loop on cte: {cte.name}"
|
|
260
239
|
)
|
|
261
240
|
|
|
262
|
-
if c.lineage and cte.source_map.get(c.address,
|
|
241
|
+
if c.lineage and cte.source_map.get(c.address, []) == []:
|
|
263
242
|
logger.debug(
|
|
264
243
|
f"{LOGGER_PREFIX} [{c.address}] rendering concept with lineage that is not already existing"
|
|
265
244
|
)
|
|
@@ -273,7 +252,11 @@ class BaseDialect:
|
|
|
273
252
|
]
|
|
274
253
|
rval = f"{self.WINDOW_FUNCTION_MAP[c.lineage.type](concept = self.render_concept_sql(c.lineage.content, cte=cte, alias=False), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
|
|
275
254
|
elif isinstance(c.lineage, FilterItem):
|
|
276
|
-
|
|
255
|
+
# for cases when we've optimized this
|
|
256
|
+
if len(cte.output_columns) == 1:
|
|
257
|
+
rval = self.render_expr(c.lineage.content, cte=cte)
|
|
258
|
+
else:
|
|
259
|
+
rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_concept_sql(c.lineage.content, cte=cte, alias=False)} ELSE NULL END"
|
|
277
260
|
elif isinstance(c.lineage, RowsetItem):
|
|
278
261
|
rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False)}"
|
|
279
262
|
elif isinstance(c.lineage, MultiSelectStatement):
|
|
@@ -356,17 +339,28 @@ class BaseDialect:
|
|
|
356
339
|
cte: Optional[CTE] = None,
|
|
357
340
|
cte_map: Optional[Dict[str, CTE]] = None,
|
|
358
341
|
) -> str:
|
|
359
|
-
# if isinstance(e, Concept):
|
|
360
|
-
# cte = cte or cte_map.get(e.address, None)
|
|
361
342
|
|
|
362
343
|
if isinstance(e, SubselectComparison):
|
|
363
|
-
|
|
344
|
+
|
|
364
345
|
if isinstance(e.right, Concept):
|
|
365
|
-
|
|
346
|
+
# we won't always have an existnce map
|
|
347
|
+
# so fall back to the normal map
|
|
348
|
+
lookup_cte = cte
|
|
349
|
+
if cte_map and not lookup_cte:
|
|
350
|
+
lookup_cte = cte_map.get(e.right.address)
|
|
351
|
+
assert lookup_cte, "Subselects must be rendered with a CTE in context"
|
|
352
|
+
if e.right.address not in lookup_cte.existence_source_map:
|
|
353
|
+
lookup = lookup_cte.source_map[e.right.address]
|
|
354
|
+
else:
|
|
355
|
+
lookup = lookup_cte.existence_source_map[e.right.address]
|
|
356
|
+
|
|
357
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {lookup[0]}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {lookup[0]})"
|
|
358
|
+
elif isinstance(e.right, (ListWrapper, Parenthetical)):
|
|
359
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
|
|
360
|
+
elif isinstance(e.right, (str, int, bool, float, list)):
|
|
361
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} ({self.render_expr(e.right, cte=cte, cte_map=cte_map)})"
|
|
366
362
|
else:
|
|
367
|
-
|
|
368
|
-
f"Subselects must be a concept, got {e.right}"
|
|
369
|
-
)
|
|
363
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} ({self.render_expr(e.right, cte=cte, cte_map=cte_map)})"
|
|
370
364
|
elif isinstance(e, Comparison):
|
|
371
365
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
|
|
372
366
|
elif isinstance(e, Conditional):
|
|
@@ -449,15 +443,15 @@ class BaseDialect:
|
|
|
449
443
|
for c in cte.output_columns
|
|
450
444
|
if c.address not in [y.address for y in cte.hidden_concepts]
|
|
451
445
|
]
|
|
446
|
+
if cte.base_name == cte.base_alias:
|
|
447
|
+
source = cte.base_name
|
|
448
|
+
else:
|
|
449
|
+
source = f"{cte.base_name} as {cte.base_alias}"
|
|
452
450
|
return CompiledCTE(
|
|
453
451
|
name=cte.name,
|
|
454
452
|
statement=self.SQL_TEMPLATE.render(
|
|
455
453
|
select_columns=select_columns,
|
|
456
|
-
base=(
|
|
457
|
-
f"{cte.base_name} as {cte.base_alias}"
|
|
458
|
-
if cte.render_from_clause
|
|
459
|
-
else None
|
|
460
|
-
),
|
|
454
|
+
base=(f"{source}" if cte.render_from_clause else None),
|
|
461
455
|
grain=cte.grain,
|
|
462
456
|
limit=cte.limit,
|
|
463
457
|
# some joins may not need to be rendered
|
|
@@ -513,7 +507,7 @@ class BaseDialect:
|
|
|
513
507
|
c
|
|
514
508
|
for c in cte.output_columns
|
|
515
509
|
if c.purpose == Purpose.CONSTANT
|
|
516
|
-
and cte.source_map[c.address] !=
|
|
510
|
+
and cte.source_map[c.address] != []
|
|
517
511
|
],
|
|
518
512
|
"address",
|
|
519
513
|
)
|
|
@@ -639,7 +633,7 @@ class BaseDialect:
|
|
|
639
633
|
filter = set(
|
|
640
634
|
[
|
|
641
635
|
str(x.address)
|
|
642
|
-
for x in query.where_clause.
|
|
636
|
+
for x in query.where_clause.row_arguments
|
|
643
637
|
if not x.derivation == PurposeLineage.CONSTANT
|
|
644
638
|
]
|
|
645
639
|
)
|
|
@@ -650,10 +644,21 @@ class BaseDialect:
|
|
|
650
644
|
|
|
651
645
|
if not found:
|
|
652
646
|
raise NotImplementedError(
|
|
653
|
-
f"Cannot generate query with filtering on
|
|
654
|
-
f" not a subset of the query output grain {query_output}.
|
|
655
|
-
" filtered concept instead
|
|
647
|
+
f"Cannot generate query with filtering on row arguments {filter} that is"
|
|
648
|
+
f" not a subset of the query output grain {query_output}. Try a"
|
|
649
|
+
" filtered concept instead, or include it in the select clause"
|
|
656
650
|
)
|
|
651
|
+
for ex_set in query.where_clause.existence_arguments:
|
|
652
|
+
for c in ex_set:
|
|
653
|
+
if c.address not in cte_output_map:
|
|
654
|
+
cts = [
|
|
655
|
+
ct
|
|
656
|
+
for ct in query.ctes
|
|
657
|
+
if ct.name in query.base.existence_source_map[c.address]
|
|
658
|
+
]
|
|
659
|
+
if not cts:
|
|
660
|
+
raise ValueError(query.base.existence_source_map[c.address])
|
|
661
|
+
cte_output_map[c.address] = cts[0]
|
|
657
662
|
|
|
658
663
|
compiled_ctes = self.generate_ctes(query)
|
|
659
664
|
|