pytrilogy 0.0.2.7__py3-none-any.whl → 0.0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/RECORD +34 -34
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -0
- trilogy/core/enums.py +1 -0
- trilogy/core/models.py +154 -56
- trilogy/core/optimization.py +44 -5
- trilogy/core/optimizations/inline_datasource.py +14 -8
- trilogy/core/optimizations/predicate_pushdown.py +73 -44
- trilogy/core/processing/concept_strategies_v3.py +69 -28
- trilogy/core/processing/node_generators/common.py +42 -16
- trilogy/core/processing/node_generators/filter_node.py +89 -48
- trilogy/core/processing/node_generators/group_node.py +3 -1
- trilogy/core/processing/node_generators/rowset_node.py +13 -54
- trilogy/core/processing/node_generators/select_node.py +10 -13
- trilogy/core/processing/node_generators/unnest_node.py +5 -3
- trilogy/core/processing/node_generators/window_node.py +23 -2
- trilogy/core/processing/nodes/__init__.py +34 -6
- trilogy/core/processing/nodes/base_node.py +67 -13
- trilogy/core/processing/nodes/filter_node.py +3 -0
- trilogy/core/processing/nodes/group_node.py +3 -0
- trilogy/core/processing/nodes/merge_node.py +1 -11
- trilogy/core/processing/nodes/select_node_v2.py +1 -0
- trilogy/core/processing/utility.py +29 -10
- trilogy/core/query_processor.py +47 -20
- trilogy/dialect/base.py +47 -14
- trilogy/dialect/common.py +15 -3
- trilogy/dialect/presto.py +2 -1
- trilogy/parsing/parse_engine.py +20 -1
- trilogy/parsing/trilogy.lark +3 -1
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,7 @@ from trilogy.core.models import (
|
|
|
9
9
|
FilterItem,
|
|
10
10
|
Environment,
|
|
11
11
|
LooseConceptList,
|
|
12
|
+
WhereClause,
|
|
12
13
|
)
|
|
13
14
|
from trilogy.utility import unique
|
|
14
15
|
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
@@ -45,6 +46,17 @@ def resolve_function_parent_concepts(concept: Concept) -> List[Concept]:
|
|
|
45
46
|
return unique(concept.lineage.concept_arguments, "address")
|
|
46
47
|
|
|
47
48
|
|
|
49
|
+
def resolve_condition_parent_concepts(
|
|
50
|
+
condition: WhereClause,
|
|
51
|
+
) -> Tuple[List[Concept], List[Tuple[Concept, ...]]]:
|
|
52
|
+
base_existence = []
|
|
53
|
+
base_rows = []
|
|
54
|
+
base_rows += condition.row_arguments
|
|
55
|
+
for ctuple in condition.existence_arguments:
|
|
56
|
+
base_existence.append(ctuple)
|
|
57
|
+
return unique(base_rows, "address"), base_existence
|
|
58
|
+
|
|
59
|
+
|
|
48
60
|
def resolve_filter_parent_concepts(
|
|
49
61
|
concept: Concept,
|
|
50
62
|
) -> Tuple[Concept, List[Concept], List[Tuple[Concept, ...]]]:
|
|
@@ -55,10 +67,11 @@ def resolve_filter_parent_concepts(
|
|
|
55
67
|
direct_parent = concept.lineage.content
|
|
56
68
|
base_existence = []
|
|
57
69
|
base_rows = [direct_parent]
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
70
|
+
condition_rows, condition_existence = resolve_condition_parent_concepts(
|
|
71
|
+
concept.lineage.where
|
|
72
|
+
)
|
|
73
|
+
base_rows += condition_rows
|
|
74
|
+
base_existence += condition_existence
|
|
62
75
|
if direct_parent.grain:
|
|
63
76
|
base_rows += direct_parent.grain.components_copy
|
|
64
77
|
if (
|
|
@@ -84,6 +97,7 @@ def gen_property_enrichment_node(
|
|
|
84
97
|
depth: int,
|
|
85
98
|
source_concepts,
|
|
86
99
|
history: History | None = None,
|
|
100
|
+
conditions: WhereClause | None = None,
|
|
87
101
|
):
|
|
88
102
|
required_keys: dict[str, set[str]] = defaultdict(set)
|
|
89
103
|
for x in extra_properties:
|
|
@@ -102,6 +116,7 @@ def gen_property_enrichment_node(
|
|
|
102
116
|
g=g,
|
|
103
117
|
depth=depth + 1,
|
|
104
118
|
history=history,
|
|
119
|
+
conditions=conditions,
|
|
105
120
|
)
|
|
106
121
|
final_nodes.append(enrich_node)
|
|
107
122
|
node_joins.append(
|
|
@@ -147,11 +162,12 @@ def gen_enrichment_node(
|
|
|
147
162
|
source_concepts,
|
|
148
163
|
log_lambda,
|
|
149
164
|
history: History | None = None,
|
|
165
|
+
conditions: WhereClause | None = None,
|
|
150
166
|
):
|
|
151
167
|
|
|
152
168
|
local_opts = LooseConceptList(concepts=local_optional)
|
|
153
169
|
|
|
154
|
-
if local_opts.issubset(
|
|
170
|
+
if local_opts.issubset(base_node.output_lcl):
|
|
155
171
|
log_lambda(
|
|
156
172
|
f"{str(type(base_node).__name__)} has all optional { base_node.output_lcl}, skipping enrichmennt"
|
|
157
173
|
)
|
|
@@ -180,6 +196,7 @@ def gen_enrichment_node(
|
|
|
180
196
|
depth,
|
|
181
197
|
source_concepts,
|
|
182
198
|
history=history,
|
|
199
|
+
conditions=conditions,
|
|
183
200
|
)
|
|
184
201
|
|
|
185
202
|
enrich_node: StrategyNode = source_concepts( # this fetches the parent + join keys
|
|
@@ -189,6 +206,7 @@ def gen_enrichment_node(
|
|
|
189
206
|
g=g,
|
|
190
207
|
depth=depth,
|
|
191
208
|
history=history,
|
|
209
|
+
conditions=conditions,
|
|
192
210
|
)
|
|
193
211
|
if not enrich_node:
|
|
194
212
|
log_lambda(
|
|
@@ -225,17 +243,31 @@ def gen_enrichment_node(
|
|
|
225
243
|
|
|
226
244
|
|
|
227
245
|
def resolve_join_order(joins: List[NodeJoin]) -> List[NodeJoin]:
|
|
246
|
+
if not joins:
|
|
247
|
+
return []
|
|
228
248
|
available_aliases: set[StrategyNode] = set()
|
|
229
249
|
final_joins_pre = [*joins]
|
|
230
250
|
final_joins = []
|
|
251
|
+
left = set()
|
|
252
|
+
right = set()
|
|
253
|
+
for join in joins:
|
|
254
|
+
left.add(join.left_node)
|
|
255
|
+
right.add(join.right_node)
|
|
256
|
+
|
|
257
|
+
potential_basis = left.difference(right)
|
|
258
|
+
base_candidates = [x for x in final_joins_pre if x.left_node in potential_basis]
|
|
259
|
+
if not base_candidates:
|
|
260
|
+
raise SyntaxError(
|
|
261
|
+
f"Unresolvable join dependencies, left requires {left} and right requires {right}"
|
|
262
|
+
)
|
|
263
|
+
base = base_candidates[0]
|
|
264
|
+
final_joins.append(base)
|
|
265
|
+
available_aliases.add(base.left_node)
|
|
266
|
+
available_aliases.add(base.right_node)
|
|
231
267
|
while final_joins_pre:
|
|
232
268
|
new_final_joins_pre: List[NodeJoin] = []
|
|
233
269
|
for join in final_joins_pre:
|
|
234
|
-
if
|
|
235
|
-
final_joins.append(join)
|
|
236
|
-
available_aliases.add(join.left_node)
|
|
237
|
-
available_aliases.add(join.right_node)
|
|
238
|
-
elif join.left_node in available_aliases:
|
|
270
|
+
if join.left_node in available_aliases:
|
|
239
271
|
# we don't need to join twice
|
|
240
272
|
# so whatever join we found first, works
|
|
241
273
|
if join.right_node in available_aliases:
|
|
@@ -245,11 +277,5 @@ def resolve_join_order(joins: List[NodeJoin]) -> List[NodeJoin]:
|
|
|
245
277
|
available_aliases.add(join.right_node)
|
|
246
278
|
else:
|
|
247
279
|
new_final_joins_pre.append(join)
|
|
248
|
-
if len(new_final_joins_pre) == len(final_joins_pre):
|
|
249
|
-
remaining = [join.left_node for join in new_final_joins_pre]
|
|
250
|
-
remaining_right = [join.right_node for join in new_final_joins_pre]
|
|
251
|
-
raise SyntaxError(
|
|
252
|
-
f"did not find any new joins, available {available_aliases} remaining is {remaining + remaining_right} "
|
|
253
|
-
)
|
|
254
280
|
final_joins_pre = new_final_joins_pre
|
|
255
281
|
return final_joins
|
|
@@ -2,13 +2,14 @@ from typing import List
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
from trilogy.core.enums import JoinType
|
|
5
|
-
from trilogy.core.models import Concept, Environment, FilterItem, Grain
|
|
5
|
+
from trilogy.core.models import Concept, Environment, FilterItem, Grain, WhereClause
|
|
6
6
|
from trilogy.core.processing.nodes import (
|
|
7
7
|
FilterNode,
|
|
8
8
|
MergeNode,
|
|
9
9
|
NodeJoin,
|
|
10
10
|
History,
|
|
11
11
|
StrategyNode,
|
|
12
|
+
SelectNode,
|
|
12
13
|
)
|
|
13
14
|
from trilogy.core.processing.node_generators.common import (
|
|
14
15
|
resolve_filter_parent_concepts,
|
|
@@ -28,6 +29,7 @@ def gen_filter_node(
|
|
|
28
29
|
depth: int,
|
|
29
30
|
source_concepts,
|
|
30
31
|
history: History | None = None,
|
|
32
|
+
conditions: WhereClause | None = None,
|
|
31
33
|
) -> StrategyNode | None:
|
|
32
34
|
immediate_parent, parent_row_concepts, parent_existence_concepts = (
|
|
33
35
|
resolve_filter_parent_concepts(concept)
|
|
@@ -37,10 +39,10 @@ def gen_filter_node(
|
|
|
37
39
|
where = concept.lineage.where
|
|
38
40
|
|
|
39
41
|
logger.info(
|
|
40
|
-
f"{padding(depth)}{LOGGER_PREFIX}
|
|
42
|
+
f"{padding(depth)}{LOGGER_PREFIX} filter {concept.address} derived from {immediate_parent.address} row parents {[x.address for x in parent_row_concepts]} and {[[y.address] for x in parent_existence_concepts for y in x]} existence parents"
|
|
41
43
|
)
|
|
42
44
|
core_parents = []
|
|
43
|
-
|
|
45
|
+
row_parent: StrategyNode = source_concepts(
|
|
44
46
|
mandatory_list=parent_row_concepts,
|
|
45
47
|
environment=environment,
|
|
46
48
|
g=g,
|
|
@@ -48,33 +50,7 @@ def gen_filter_node(
|
|
|
48
50
|
history=history,
|
|
49
51
|
)
|
|
50
52
|
|
|
51
|
-
|
|
52
|
-
logger.info(
|
|
53
|
-
f"{padding(depth)}{LOGGER_PREFIX} filter node row parents {[x.address for x in parent_row_concepts]} could not be found"
|
|
54
|
-
)
|
|
55
|
-
return None
|
|
56
|
-
|
|
57
|
-
if not local_optional and not parent_existence_concepts:
|
|
58
|
-
optimized_pushdown = True
|
|
59
|
-
else:
|
|
60
|
-
optimized_pushdown = False
|
|
61
|
-
|
|
62
|
-
if optimized_pushdown:
|
|
63
|
-
if parent.conditions:
|
|
64
|
-
parent.conditions = parent.conditions + where.conditional
|
|
65
|
-
else:
|
|
66
|
-
parent.conditions = where.conditional
|
|
67
|
-
parent.output_concepts = [concept]
|
|
68
|
-
parent.grain = Grain(components=[concept])
|
|
69
|
-
parent.rebuild_cache()
|
|
70
|
-
|
|
71
|
-
logger.info(
|
|
72
|
-
f"{padding(depth)}{LOGGER_PREFIX} returning optimized filter node with pushdown to parent with condition {where.conditional}"
|
|
73
|
-
)
|
|
74
|
-
return parent
|
|
75
|
-
|
|
76
|
-
core_parents.append(parent)
|
|
77
|
-
|
|
53
|
+
flattened_existence = [x for y in parent_existence_concepts for x in y]
|
|
78
54
|
if parent_existence_concepts:
|
|
79
55
|
for existence_tuple in parent_existence_concepts:
|
|
80
56
|
if not existence_tuple:
|
|
@@ -95,26 +71,91 @@ def gen_filter_node(
|
|
|
95
71
|
)
|
|
96
72
|
return None
|
|
97
73
|
core_parents.append(parent_existence)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
)
|
|
111
|
-
|
|
74
|
+
if not row_parent:
|
|
75
|
+
logger.info(
|
|
76
|
+
f"{padding(depth)}{LOGGER_PREFIX} filter node row parents {[x.address for x in parent_row_concepts]} could not be found"
|
|
77
|
+
)
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
optimized_pushdown = False
|
|
81
|
+
if not local_optional:
|
|
82
|
+
optimized_pushdown = True
|
|
83
|
+
elif conditions and conditions == where:
|
|
84
|
+
logger.info(
|
|
85
|
+
f"{padding(depth)}{LOGGER_PREFIX} query conditions are the same as filter conditions, can optimize across all concepts"
|
|
86
|
+
)
|
|
87
|
+
optimized_pushdown = True
|
|
88
|
+
|
|
89
|
+
if optimized_pushdown:
|
|
90
|
+
if isinstance(row_parent, SelectNode):
|
|
91
|
+
parent = StrategyNode(
|
|
92
|
+
input_concepts=row_parent.output_concepts,
|
|
93
|
+
output_concepts=[concept] + row_parent.output_concepts,
|
|
94
|
+
environment=row_parent.environment,
|
|
95
|
+
g=row_parent.g,
|
|
96
|
+
parents=[row_parent] + core_parents,
|
|
97
|
+
depth=row_parent.depth,
|
|
98
|
+
partial_concepts=row_parent.partial_concepts,
|
|
99
|
+
force_group=False,
|
|
100
|
+
conditions=(
|
|
101
|
+
row_parent.conditions + where.conditional
|
|
102
|
+
if row_parent.conditions
|
|
103
|
+
else where.conditional
|
|
104
|
+
),
|
|
105
|
+
existence_concepts=row_parent.existence_concepts,
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
parent = row_parent
|
|
109
|
+
|
|
110
|
+
expected_output = [concept] + [
|
|
111
|
+
x
|
|
112
|
+
for x in local_optional
|
|
113
|
+
if x.address in [y.address for y in parent.output_concepts]
|
|
114
|
+
]
|
|
115
|
+
parent.add_parents(core_parents)
|
|
116
|
+
parent.add_condition(where.conditional)
|
|
117
|
+
parent.add_existence_concepts(flattened_existence)
|
|
118
|
+
parent.set_output_concepts(expected_output)
|
|
119
|
+
parent.grain = Grain(
|
|
120
|
+
components=(
|
|
121
|
+
list(immediate_parent.keys)
|
|
122
|
+
if immediate_parent.keys
|
|
123
|
+
else [immediate_parent]
|
|
124
|
+
)
|
|
125
|
+
+ [
|
|
126
|
+
x
|
|
127
|
+
for x in local_optional
|
|
128
|
+
if x.address in [y.address for y in parent.output_concepts]
|
|
129
|
+
]
|
|
130
|
+
)
|
|
131
|
+
parent.rebuild_cache()
|
|
132
|
+
|
|
133
|
+
logger.info(
|
|
134
|
+
f"{padding(depth)}{LOGGER_PREFIX} returning optimized filter node with pushdown to parent with condition {where.conditional}"
|
|
135
|
+
)
|
|
136
|
+
filter_node = parent
|
|
137
|
+
else:
|
|
138
|
+
core_parents.append(row_parent)
|
|
139
|
+
|
|
140
|
+
filter_node = FilterNode(
|
|
141
|
+
input_concepts=unique(
|
|
142
|
+
[immediate_parent] + parent_row_concepts + flattened_existence,
|
|
143
|
+
"address",
|
|
144
|
+
),
|
|
145
|
+
output_concepts=[concept, immediate_parent] + parent_row_concepts,
|
|
146
|
+
environment=environment,
|
|
147
|
+
g=g,
|
|
148
|
+
parents=core_parents,
|
|
149
|
+
grain=Grain(
|
|
150
|
+
components=[immediate_parent] + parent_row_concepts,
|
|
151
|
+
),
|
|
152
|
+
)
|
|
112
153
|
|
|
113
|
-
assert filter_node.resolve().grain == Grain(
|
|
114
|
-
components=[immediate_parent] + parent_row_concepts,
|
|
115
|
-
)
|
|
116
154
|
if not local_optional or all(
|
|
117
|
-
[
|
|
155
|
+
[
|
|
156
|
+
x.address in [y.address for y in filter_node.output_concepts]
|
|
157
|
+
for x in local_optional
|
|
158
|
+
]
|
|
118
159
|
):
|
|
119
160
|
outputs = [
|
|
120
161
|
x
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from trilogy.core.models import Concept, Environment, LooseConceptList
|
|
1
|
+
from trilogy.core.models import Concept, Environment, LooseConceptList, WhereClause
|
|
2
2
|
from trilogy.utility import unique
|
|
3
3
|
from trilogy.core.processing.nodes import GroupNode, StrategyNode, History
|
|
4
4
|
from typing import List
|
|
@@ -22,6 +22,7 @@ def gen_group_node(
|
|
|
22
22
|
depth: int,
|
|
23
23
|
source_concepts,
|
|
24
24
|
history: History | None = None,
|
|
25
|
+
conditions: WhereClause | None = None,
|
|
25
26
|
):
|
|
26
27
|
# aggregates MUST always group to the proper grain
|
|
27
28
|
# except when the
|
|
@@ -53,6 +54,7 @@ def gen_group_node(
|
|
|
53
54
|
g=g,
|
|
54
55
|
depth=depth,
|
|
55
56
|
history=history,
|
|
57
|
+
conditions=conditions,
|
|
56
58
|
)
|
|
57
59
|
if not parent:
|
|
58
60
|
logger.info(
|
|
@@ -12,7 +12,7 @@ from typing import List
|
|
|
12
12
|
|
|
13
13
|
from trilogy.core.enums import JoinType, PurposeLineage
|
|
14
14
|
from trilogy.constants import logger
|
|
15
|
-
from trilogy.core.processing.utility import padding
|
|
15
|
+
from trilogy.core.processing.utility import padding
|
|
16
16
|
from trilogy.core.processing.node_generators.common import concept_to_relevant_joins
|
|
17
17
|
|
|
18
18
|
|
|
@@ -28,6 +28,8 @@ def gen_rowset_node(
|
|
|
28
28
|
source_concepts,
|
|
29
29
|
history: History | None = None,
|
|
30
30
|
) -> StrategyNode | None:
|
|
31
|
+
from trilogy.core.query_processor import get_query_node
|
|
32
|
+
|
|
31
33
|
if not isinstance(concept.lineage, RowsetItem):
|
|
32
34
|
raise SyntaxError(
|
|
33
35
|
f"Invalid lineage passed into rowset fetch, got {type(concept.lineage)}, expected {RowsetItem}"
|
|
@@ -35,54 +37,14 @@ def gen_rowset_node(
|
|
|
35
37
|
lineage: RowsetItem = concept.lineage
|
|
36
38
|
rowset: RowsetDerivationStatement = lineage.rowset
|
|
37
39
|
select: SelectStatement | MultiSelectStatement = lineage.rowset.select
|
|
38
|
-
|
|
39
|
-
if where := select.where_clause:
|
|
40
|
-
targets = select.output_components + where.conditional.row_arguments
|
|
41
|
-
for sub_select in where.conditional.existence_arguments:
|
|
42
|
-
logger.info(
|
|
43
|
-
f"{padding(depth)}{LOGGER_PREFIX} generating parent existence node with {[x.address for x in sub_select]}"
|
|
44
|
-
)
|
|
45
|
-
parent_check = source_concepts(
|
|
46
|
-
mandatory_list=sub_select,
|
|
47
|
-
environment=environment,
|
|
48
|
-
g=g,
|
|
49
|
-
depth=depth + 1,
|
|
50
|
-
history=history,
|
|
51
|
-
)
|
|
52
|
-
if not parent_check:
|
|
53
|
-
logger.info(
|
|
54
|
-
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate parent existence node for rowset node for {concept}"
|
|
55
|
-
)
|
|
56
|
-
return None
|
|
57
|
-
existence_parents.append(parent_check)
|
|
58
|
-
else:
|
|
59
|
-
targets = select.output_components
|
|
60
|
-
node: StrategyNode = source_concepts(
|
|
61
|
-
mandatory_list=unique(targets, "address"),
|
|
62
|
-
environment=environment,
|
|
63
|
-
g=g,
|
|
64
|
-
depth=depth + 1,
|
|
65
|
-
history=history,
|
|
66
|
-
)
|
|
40
|
+
node = get_query_node(environment, select, graph=g, history=history)
|
|
67
41
|
|
|
68
42
|
if not node:
|
|
69
43
|
logger.info(
|
|
70
|
-
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate rowset node for {concept}"
|
|
44
|
+
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate parent rowset node for {concept}"
|
|
71
45
|
)
|
|
72
46
|
return None
|
|
73
|
-
# add our existence concepts in
|
|
74
|
-
if existence_parents:
|
|
75
|
-
node.parents += existence_parents
|
|
76
|
-
# we don't need to join to any existence parents
|
|
77
|
-
# if isinstance(node, MergeNode) and node.node_joins is None:
|
|
78
|
-
# # set it explicitly to empty to avoid inference
|
|
79
|
-
# node.node_joins = []
|
|
80
|
-
for parent in existence_parents:
|
|
81
|
-
for x in parent.output_concepts:
|
|
82
|
-
if x.address not in node.output_lcl:
|
|
83
|
-
node.existence_concepts.append(x)
|
|
84
47
|
|
|
85
|
-
node.conditions = select.where_clause.conditional if select.where_clause else None
|
|
86
48
|
enrichment = set([x.address for x in local_optional])
|
|
87
49
|
rowset_relevant = [x for x in rowset.derived_concepts]
|
|
88
50
|
select_hidden = set([x.address for x in select.hidden_components])
|
|
@@ -113,26 +75,23 @@ def gen_rowset_node(
|
|
|
113
75
|
# but don't include anything aggregate at this point
|
|
114
76
|
assert node.resolution_cache
|
|
115
77
|
|
|
116
|
-
node.
|
|
78
|
+
node.grain = concept_list_to_grain(
|
|
117
79
|
node.output_concepts, parent_sources=node.resolution_cache.datasources
|
|
118
80
|
)
|
|
119
81
|
|
|
82
|
+
node.rebuild_cache()
|
|
83
|
+
|
|
120
84
|
possible_joins = concept_to_relevant_joins(additional_relevant)
|
|
121
|
-
if not local_optional
|
|
85
|
+
if not local_optional or all(
|
|
86
|
+
x.address in [y.address for y in node.output_concepts] for x in local_optional
|
|
87
|
+
):
|
|
122
88
|
logger.info(
|
|
123
|
-
f"{padding(depth)}{LOGGER_PREFIX} no
|
|
89
|
+
f"{padding(depth)}{LOGGER_PREFIX} no enrichment required for rowset node as all optional found or no optional; exiting early."
|
|
124
90
|
)
|
|
125
91
|
return node
|
|
126
92
|
if not possible_joins:
|
|
127
93
|
logger.info(
|
|
128
|
-
f"{padding(depth)}{LOGGER_PREFIX} no possible joins for rowset node;
|
|
129
|
-
)
|
|
130
|
-
return node
|
|
131
|
-
if all(
|
|
132
|
-
[x.address in [y.address for y in node.output_concepts] for x in local_optional]
|
|
133
|
-
):
|
|
134
|
-
logger.info(
|
|
135
|
-
f"{padding(depth)}{LOGGER_PREFIX} all enriched concepts returned from base rowset node; exiting early"
|
|
94
|
+
f"{padding(depth)}{LOGGER_PREFIX} no possible joins for rowset node to get {[x.address for x in local_optional]}; have {[x.address for x in node.output_concepts]}"
|
|
136
95
|
)
|
|
137
96
|
return node
|
|
138
97
|
enrich_node: MergeNode = source_concepts( # this fetches the parent + join keys
|
|
@@ -7,6 +7,7 @@ from trilogy.core.models import (
|
|
|
7
7
|
Grain,
|
|
8
8
|
LooseConceptList,
|
|
9
9
|
Datasource,
|
|
10
|
+
WhereClause,
|
|
10
11
|
)
|
|
11
12
|
from trilogy.core.processing.nodes import (
|
|
12
13
|
StrategyNode,
|
|
@@ -67,17 +68,6 @@ def dm_to_strategy_node(
|
|
|
67
68
|
f"{padding(depth)}{LOGGER_PREFIX} target grain is not subset of datasource grain {datasource.grain}, required to group"
|
|
68
69
|
)
|
|
69
70
|
force_group = True
|
|
70
|
-
# if isinstance(datasource, MergeDatasource):
|
|
71
|
-
# # if we're within a namespace, don't find merge nodes
|
|
72
|
-
# bcandidate: StrategyNode = gen_environment_merge_node(
|
|
73
|
-
# all_concepts=dm.matched.concepts,
|
|
74
|
-
# environment=environment,
|
|
75
|
-
# g=g,
|
|
76
|
-
# depth=depth,
|
|
77
|
-
# datasource=datasource,
|
|
78
|
-
# source_concepts=source_concepts,
|
|
79
|
-
# )
|
|
80
|
-
# else:
|
|
81
71
|
bcandidate: StrategyNode = SelectNode(
|
|
82
72
|
input_concepts=[c.concept for c in datasource.columns],
|
|
83
73
|
output_concepts=dm.matched.concepts,
|
|
@@ -116,6 +106,7 @@ def gen_select_nodes_from_tables_v2(
|
|
|
116
106
|
target_grain: Grain,
|
|
117
107
|
source_concepts: Callable,
|
|
118
108
|
accept_partial: bool = False,
|
|
109
|
+
conditions: WhereClause | None = None,
|
|
119
110
|
) -> tuple[bool, list[Concept], list[StrategyNode]]:
|
|
120
111
|
# if we have only constants
|
|
121
112
|
# we don't need a table
|
|
@@ -252,6 +243,7 @@ def gen_select_node_from_table(
|
|
|
252
243
|
target_grain: Grain,
|
|
253
244
|
source_concepts,
|
|
254
245
|
accept_partial: bool = False,
|
|
246
|
+
conditions: WhereClause | None = None,
|
|
255
247
|
) -> Optional[StrategyNode]:
|
|
256
248
|
# if we have only constants
|
|
257
249
|
# we don't need a table
|
|
@@ -372,6 +364,9 @@ def gen_select_node_from_table(
|
|
|
372
364
|
grain=Grain(components=all_concepts),
|
|
373
365
|
conditions=datasource.where.conditional if datasource.where else None,
|
|
374
366
|
)
|
|
367
|
+
# if conditions:
|
|
368
|
+
# for component in conditions.components:
|
|
369
|
+
# if
|
|
375
370
|
# we need to nest the group node one further
|
|
376
371
|
if force_group is True:
|
|
377
372
|
candidate: StrategyNode = GroupNode(
|
|
@@ -407,6 +402,7 @@ def gen_select_node(
|
|
|
407
402
|
fail_if_not_found: bool = True,
|
|
408
403
|
accept_partial_optional: bool = True,
|
|
409
404
|
target_grain: Grain | None = None,
|
|
405
|
+
conditions: WhereClause | None = None,
|
|
410
406
|
) -> StrategyNode | None:
|
|
411
407
|
all_concepts = [concept] + local_optional
|
|
412
408
|
all_lcl = LooseConceptList(concepts=all_concepts)
|
|
@@ -445,6 +441,7 @@ def gen_select_node(
|
|
|
445
441
|
accept_partial=accept_partial,
|
|
446
442
|
target_grain=target_grain,
|
|
447
443
|
source_concepts=source_concepts,
|
|
444
|
+
conditions=conditions,
|
|
448
445
|
)
|
|
449
446
|
if ds:
|
|
450
447
|
logger.info(
|
|
@@ -461,6 +458,7 @@ def gen_select_node(
|
|
|
461
458
|
target_grain=target_grain,
|
|
462
459
|
accept_partial=accept_partial,
|
|
463
460
|
source_concepts=source_concepts,
|
|
461
|
+
conditions=conditions,
|
|
464
462
|
)
|
|
465
463
|
if parents and (all_found or accept_partial_optional):
|
|
466
464
|
all_partial = [
|
|
@@ -488,7 +486,6 @@ def gen_select_node(
|
|
|
488
486
|
if len(parents) == 1:
|
|
489
487
|
candidate = parents[0]
|
|
490
488
|
else:
|
|
491
|
-
|
|
492
489
|
candidate = MergeNode(
|
|
493
490
|
output_concepts=[concept] + found,
|
|
494
491
|
input_concepts=[concept] + found,
|
|
@@ -499,8 +496,8 @@ def gen_select_node(
|
|
|
499
496
|
partial_concepts=all_partial,
|
|
500
497
|
grain=inferred_grain,
|
|
501
498
|
)
|
|
499
|
+
|
|
502
500
|
candidate.depth += 1
|
|
503
|
-
# source_grain = candidate.grain
|
|
504
501
|
if force_group:
|
|
505
502
|
logger.info(
|
|
506
503
|
f"{padding(depth)}{LOGGER_PREFIX} datasource grain {inferred_grain} does not match target grain {target_grain} for select, adding group node"
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
from trilogy.core.models import Concept, Function
|
|
5
|
-
from trilogy.core.processing.nodes import
|
|
4
|
+
from trilogy.core.models import Concept, Function, WhereClause
|
|
5
|
+
from trilogy.core.processing.nodes import UnnestNode, History, StrategyNode
|
|
6
6
|
from trilogy.core.processing.utility import padding
|
|
7
7
|
from trilogy.constants import logger
|
|
8
8
|
|
|
@@ -17,6 +17,7 @@ def gen_unnest_node(
|
|
|
17
17
|
depth: int,
|
|
18
18
|
source_concepts,
|
|
19
19
|
history: History | None = None,
|
|
20
|
+
conditions: WhereClause | None = None,
|
|
20
21
|
) -> StrategyNode | None:
|
|
21
22
|
arguments = []
|
|
22
23
|
if isinstance(concept.lineage, Function):
|
|
@@ -28,6 +29,7 @@ def gen_unnest_node(
|
|
|
28
29
|
g=g,
|
|
29
30
|
depth=depth + 1,
|
|
30
31
|
history=history,
|
|
32
|
+
conditions=conditions,
|
|
31
33
|
)
|
|
32
34
|
if not parent:
|
|
33
35
|
logger.info(
|
|
@@ -46,7 +48,7 @@ def gen_unnest_node(
|
|
|
46
48
|
# we need to sometimes nest an unnest node,
|
|
47
49
|
# as unnest operations are not valid in all situations
|
|
48
50
|
# TODO: inline this node when we can detect it's safe
|
|
49
|
-
new =
|
|
51
|
+
new = StrategyNode(
|
|
50
52
|
input_concepts=[concept] + local_optional,
|
|
51
53
|
output_concepts=[concept] + local_optional,
|
|
52
54
|
environment=environment,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
from trilogy.core.models import Concept, WindowItem, Environment
|
|
4
|
+
from trilogy.core.models import Concept, WindowItem, Environment, WhereClause
|
|
5
5
|
from trilogy.utility import unique
|
|
6
6
|
from trilogy.core.processing.nodes import (
|
|
7
7
|
WindowNode,
|
|
@@ -38,19 +38,36 @@ def gen_window_node(
|
|
|
38
38
|
depth: int,
|
|
39
39
|
source_concepts,
|
|
40
40
|
history: History | None = None,
|
|
41
|
+
conditions: WhereClause | None = None,
|
|
41
42
|
) -> WindowNode | MergeNode | None:
|
|
42
43
|
parent_concepts = resolve_window_parent_concepts(concept)
|
|
43
|
-
|
|
44
44
|
parent_node = source_concepts(
|
|
45
45
|
mandatory_list=parent_concepts,
|
|
46
46
|
environment=environment,
|
|
47
47
|
g=g,
|
|
48
48
|
depth=depth + 1,
|
|
49
49
|
history=history,
|
|
50
|
+
conditions=conditions,
|
|
50
51
|
)
|
|
51
52
|
if not parent_node:
|
|
52
53
|
logger.info(f"{padding(depth)}{LOGGER_PREFIX} window node parents unresolvable")
|
|
53
54
|
return None
|
|
55
|
+
parent_node.resolve()
|
|
56
|
+
if not all(
|
|
57
|
+
[
|
|
58
|
+
x.address in [y.address for y in parent_node.output_concepts]
|
|
59
|
+
for x in parent_concepts
|
|
60
|
+
]
|
|
61
|
+
):
|
|
62
|
+
missing = [
|
|
63
|
+
x
|
|
64
|
+
for x in parent_concepts
|
|
65
|
+
if x.address not in [y.address for y in parent_node.output_concepts]
|
|
66
|
+
]
|
|
67
|
+
logger.info(
|
|
68
|
+
f"{padding(depth)}{LOGGER_PREFIX} window node parents unresolvable, missing {missing}"
|
|
69
|
+
)
|
|
70
|
+
raise SyntaxError
|
|
54
71
|
_window_node = WindowNode(
|
|
55
72
|
input_concepts=parent_concepts,
|
|
56
73
|
output_concepts=[concept] + parent_concepts,
|
|
@@ -61,6 +78,8 @@ def gen_window_node(
|
|
|
61
78
|
],
|
|
62
79
|
depth=depth,
|
|
63
80
|
)
|
|
81
|
+
_window_node.rebuild_cache()
|
|
82
|
+
_window_node.resolve()
|
|
64
83
|
window_node = MergeNode(
|
|
65
84
|
parents=[_window_node],
|
|
66
85
|
environment=environment,
|
|
@@ -71,6 +90,7 @@ def gen_window_node(
|
|
|
71
90
|
force_group=False,
|
|
72
91
|
depth=depth,
|
|
73
92
|
)
|
|
93
|
+
window_node.resolve()
|
|
74
94
|
if not local_optional:
|
|
75
95
|
return window_node
|
|
76
96
|
logger.info(f"{padding(depth)}{LOGGER_PREFIX} window node requires enrichment")
|
|
@@ -84,4 +104,5 @@ def gen_window_node(
|
|
|
84
104
|
source_concepts=source_concepts,
|
|
85
105
|
log_lambda=create_log_lambda(LOGGER_PREFIX, depth, logger),
|
|
86
106
|
history=history,
|
|
107
|
+
conditions=conditions,
|
|
87
108
|
)
|