pytrilogy 0.0.1.103__py3-none-any.whl → 0.0.1.105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.103.dist-info → pytrilogy-0.0.1.105.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.1.103.dist-info → pytrilogy-0.0.1.105.dist-info}/RECORD +18 -18
- {pytrilogy-0.0.1.103.dist-info → pytrilogy-0.0.1.105.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/core/models.py +176 -45
- trilogy/core/processing/concept_strategies_v3.py +6 -3
- trilogy/core/processing/node_generators/common.py +19 -7
- trilogy/core/processing/node_generators/filter_node.py +39 -10
- trilogy/core/processing/node_generators/merge_node.py +11 -1
- trilogy/core/processing/node_generators/select_node.py +275 -53
- trilogy/core/processing/nodes/__init__.py +54 -1
- trilogy/dialect/base.py +12 -3
- trilogy/parsing/common.py +30 -0
- trilogy/parsing/parse_engine.py +65 -94
- trilogy/parsing/render.py +0 -122
- {pytrilogy-0.0.1.103.dist-info → pytrilogy-0.0.1.105.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.103.dist-info → pytrilogy-0.0.1.105.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.1.103.dist-info → pytrilogy-0.0.1.105.dist-info}/top_level.txt +0 -0
|
@@ -11,7 +11,7 @@ from trilogy.core.processing.node_generators.common import (
|
|
|
11
11
|
resolve_filter_parent_concepts,
|
|
12
12
|
)
|
|
13
13
|
from trilogy.constants import logger
|
|
14
|
-
from trilogy.core.processing.utility import padding
|
|
14
|
+
from trilogy.core.processing.utility import padding, unique
|
|
15
15
|
from trilogy.core.processing.node_generators.common import concept_to_relevant_joins
|
|
16
16
|
|
|
17
17
|
LOGGER_PREFIX = "[GEN_FILTER_NODE]"
|
|
@@ -26,35 +26,64 @@ def gen_filter_node(
|
|
|
26
26
|
source_concepts,
|
|
27
27
|
history: History | None = None,
|
|
28
28
|
) -> MergeNode | FilterNode | None:
|
|
29
|
-
immediate_parent,
|
|
29
|
+
immediate_parent, parent_row_concepts, parent_existence_concepts = (
|
|
30
|
+
resolve_filter_parent_concepts(concept)
|
|
31
|
+
)
|
|
30
32
|
|
|
31
|
-
logger.info(
|
|
33
|
+
logger.info(
|
|
34
|
+
f"{padding(depth)}{LOGGER_PREFIX} fetching filter node row parents {[x.address for x in parent_row_concepts]}"
|
|
35
|
+
)
|
|
36
|
+
core_parents = []
|
|
32
37
|
parent = source_concepts(
|
|
33
|
-
mandatory_list=
|
|
38
|
+
mandatory_list=parent_row_concepts,
|
|
34
39
|
environment=environment,
|
|
35
40
|
g=g,
|
|
36
41
|
depth=depth + 1,
|
|
37
42
|
history=history,
|
|
38
43
|
)
|
|
44
|
+
|
|
39
45
|
if not parent:
|
|
40
46
|
return None
|
|
47
|
+
core_parents.append(parent)
|
|
48
|
+
if parent_existence_concepts:
|
|
49
|
+
logger.info(
|
|
50
|
+
f"{padding(depth)}{LOGGER_PREFIX} fetching filter node existence parents {[x.address for x in parent_existence_concepts]}"
|
|
51
|
+
)
|
|
52
|
+
parent_existence = source_concepts(
|
|
53
|
+
mandatory_list=parent_existence_concepts,
|
|
54
|
+
environment=environment,
|
|
55
|
+
g=g,
|
|
56
|
+
depth=depth + 1,
|
|
57
|
+
history=history,
|
|
58
|
+
)
|
|
59
|
+
if not parent_existence:
|
|
60
|
+
return None
|
|
61
|
+
core_parents.append(parent_existence)
|
|
62
|
+
|
|
41
63
|
filter_node = FilterNode(
|
|
42
|
-
input_concepts=
|
|
43
|
-
|
|
64
|
+
input_concepts=unique(
|
|
65
|
+
[immediate_parent] + parent_row_concepts + parent_existence_concepts,
|
|
66
|
+
"address",
|
|
67
|
+
),
|
|
68
|
+
output_concepts=[concept, immediate_parent] + parent_row_concepts,
|
|
44
69
|
environment=environment,
|
|
45
70
|
g=g,
|
|
46
|
-
parents=
|
|
71
|
+
parents=core_parents,
|
|
47
72
|
)
|
|
48
|
-
if not local_optional
|
|
73
|
+
if not local_optional or all(
|
|
74
|
+
[x.address in [y.address for y in parent_row_concepts] for x in local_optional]
|
|
75
|
+
):
|
|
49
76
|
return filter_node
|
|
50
77
|
enrich_node = source_concepts( # this fetches the parent + join keys
|
|
51
78
|
# to then connect to the rest of the query
|
|
52
|
-
mandatory_list=[immediate_parent] +
|
|
79
|
+
mandatory_list=[immediate_parent] + parent_row_concepts + local_optional,
|
|
53
80
|
environment=environment,
|
|
54
81
|
g=g,
|
|
55
82
|
depth=depth + 1,
|
|
56
83
|
history=history,
|
|
57
84
|
)
|
|
85
|
+
if not enrich_node:
|
|
86
|
+
return filter_node
|
|
58
87
|
x = MergeNode(
|
|
59
88
|
input_concepts=[concept, immediate_parent] + local_optional,
|
|
60
89
|
output_concepts=[
|
|
@@ -73,7 +102,7 @@ def gen_filter_node(
|
|
|
73
102
|
left_node=enrich_node,
|
|
74
103
|
right_node=filter_node,
|
|
75
104
|
concepts=concept_to_relevant_joins(
|
|
76
|
-
[immediate_parent] +
|
|
105
|
+
[immediate_parent] + parent_row_concepts
|
|
77
106
|
),
|
|
78
107
|
join_type=JoinType.LEFT_OUTER,
|
|
79
108
|
filter_to_mutual=False,
|
|
@@ -87,8 +87,18 @@ def gen_merge_node(
|
|
|
87
87
|
) -> Optional[MergeNode]:
|
|
88
88
|
join_candidates: List[PathInfo] = []
|
|
89
89
|
# anchor on datasources
|
|
90
|
+
final_all_concepts = []
|
|
91
|
+
# implicit_upstream = {}
|
|
92
|
+
for x in all_concepts:
|
|
93
|
+
# if x.derivation in (PurposeLineage.AGGREGATE, PurposeLineage.BASIC):
|
|
94
|
+
# final_all_concepts +=resolve_function_parent_concepts(x)
|
|
95
|
+
# elif x.derivation == PurposeLineage.FILTER:
|
|
96
|
+
# final_all_concepts +=resolve_filter_parent_concepts(x)
|
|
97
|
+
# else:
|
|
98
|
+
# final_all_concepts.append(x)
|
|
99
|
+
final_all_concepts.append(x)
|
|
90
100
|
for datasource in environment.datasources.values():
|
|
91
|
-
path = identify_ds_join_paths(
|
|
101
|
+
path = identify_ds_join_paths(final_all_concepts, g, datasource, accept_partial)
|
|
92
102
|
if path and path.reduced_concepts:
|
|
93
103
|
join_candidates.append(path)
|
|
94
104
|
join_candidates.sort(key=lambda x: sum([len(v) for v in x.paths.values()]))
|
|
@@ -2,7 +2,13 @@ from itertools import combinations
|
|
|
2
2
|
from typing import List, Optional
|
|
3
3
|
|
|
4
4
|
from trilogy.core.enums import PurposeLineage
|
|
5
|
-
from trilogy.core.models import
|
|
5
|
+
from trilogy.core.models import (
|
|
6
|
+
Concept,
|
|
7
|
+
Environment,
|
|
8
|
+
Grain,
|
|
9
|
+
LooseConceptList,
|
|
10
|
+
Datasource,
|
|
11
|
+
)
|
|
6
12
|
from trilogy.core.processing.nodes import (
|
|
7
13
|
StrategyNode,
|
|
8
14
|
SelectNode,
|
|
@@ -15,10 +21,211 @@ import networkx as nx
|
|
|
15
21
|
from trilogy.core.graph_models import concept_to_node, datasource_to_node
|
|
16
22
|
from trilogy.constants import logger
|
|
17
23
|
from trilogy.core.processing.utility import padding
|
|
24
|
+
from dataclasses import dataclass
|
|
18
25
|
|
|
19
26
|
LOGGER_PREFIX = "[GEN_SELECT_NODE]"
|
|
20
27
|
|
|
21
28
|
|
|
29
|
+
@dataclass
|
|
30
|
+
class DatasourceMatch:
|
|
31
|
+
key: str
|
|
32
|
+
datasource: Datasource
|
|
33
|
+
matched: LooseConceptList
|
|
34
|
+
partial: LooseConceptList
|
|
35
|
+
|
|
36
|
+
def __repr__(self):
|
|
37
|
+
return f"DatasourceMatch({self.key}, {self.datasource.identifier}, {str(self.matched)}, {str(self.partial)})"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def dm_to_strategy_node(
|
|
41
|
+
dm: DatasourceMatch,
|
|
42
|
+
target_grain: Grain,
|
|
43
|
+
environment: Environment,
|
|
44
|
+
g: nx.DiGraph,
|
|
45
|
+
depth: int,
|
|
46
|
+
accept_partial: bool = False,
|
|
47
|
+
) -> StrategyNode:
|
|
48
|
+
datasource = dm.datasource
|
|
49
|
+
if target_grain and target_grain.issubset(datasource.grain):
|
|
50
|
+
if all([x in dm.matched for x in target_grain.components]):
|
|
51
|
+
force_group = False
|
|
52
|
+
# if we are not returning the grain
|
|
53
|
+
# we have to group
|
|
54
|
+
else:
|
|
55
|
+
logger.info(
|
|
56
|
+
f"{padding(depth)}{LOGGER_PREFIX} not all grain components are in output {str(dm.matched)}, group to actual grain"
|
|
57
|
+
)
|
|
58
|
+
force_group = True
|
|
59
|
+
elif all([x in dm.matched for x in datasource.grain.components]):
|
|
60
|
+
logger.info(
|
|
61
|
+
f"{padding(depth)}{LOGGER_PREFIX} query output includes all grain components, no reason to group further"
|
|
62
|
+
)
|
|
63
|
+
force_group = False
|
|
64
|
+
else:
|
|
65
|
+
logger.info(
|
|
66
|
+
f"{padding(depth)}{LOGGER_PREFIX} target grain is not subset of datasource grain {datasource.grain}, required to group"
|
|
67
|
+
)
|
|
68
|
+
force_group = True
|
|
69
|
+
bcandidate: StrategyNode = SelectNode(
|
|
70
|
+
input_concepts=[c.concept for c in datasource.columns],
|
|
71
|
+
output_concepts=dm.matched.concepts,
|
|
72
|
+
environment=environment,
|
|
73
|
+
g=g,
|
|
74
|
+
parents=[],
|
|
75
|
+
depth=depth,
|
|
76
|
+
partial_concepts=dm.partial.concepts,
|
|
77
|
+
accept_partial=accept_partial,
|
|
78
|
+
datasource=datasource,
|
|
79
|
+
grain=Grain(components=dm.matched.concepts),
|
|
80
|
+
)
|
|
81
|
+
# we need to nest the group node one further
|
|
82
|
+
if force_group is True:
|
|
83
|
+
candidate: StrategyNode = GroupNode(
|
|
84
|
+
output_concepts=dm.matched.concepts,
|
|
85
|
+
input_concepts=dm.matched.concepts,
|
|
86
|
+
environment=environment,
|
|
87
|
+
g=g,
|
|
88
|
+
parents=[bcandidate],
|
|
89
|
+
depth=depth,
|
|
90
|
+
partial_concepts=bcandidate.partial_concepts,
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
candidate = bcandidate
|
|
94
|
+
return candidate
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def gen_select_nodes_from_tables_v2(
|
|
98
|
+
mandatory_concept: Concept,
|
|
99
|
+
all_concepts: List[Concept],
|
|
100
|
+
g: nx.DiGraph,
|
|
101
|
+
environment: Environment,
|
|
102
|
+
depth: int,
|
|
103
|
+
target_grain: Grain,
|
|
104
|
+
accept_partial: bool = False,
|
|
105
|
+
) -> tuple[bool, list[Concept], list[StrategyNode]]:
|
|
106
|
+
# if we have only constants
|
|
107
|
+
# we don't need a table
|
|
108
|
+
# so verify nothing, select node will render
|
|
109
|
+
all_lcl = LooseConceptList(concepts=all_concepts)
|
|
110
|
+
if all([c.derivation == PurposeLineage.CONSTANT for c in all_lcl]):
|
|
111
|
+
logger.info(
|
|
112
|
+
f"{padding(depth)}{LOGGER_PREFIX} All concepts {[x.address for x in all_lcl]} are constants, returning constant node"
|
|
113
|
+
)
|
|
114
|
+
return (
|
|
115
|
+
True,
|
|
116
|
+
all_lcl.concepts,
|
|
117
|
+
[
|
|
118
|
+
ConstantNode(
|
|
119
|
+
output_concepts=all_lcl.concepts,
|
|
120
|
+
input_concepts=[],
|
|
121
|
+
environment=environment,
|
|
122
|
+
g=g,
|
|
123
|
+
parents=[],
|
|
124
|
+
depth=depth,
|
|
125
|
+
# no partial for constants
|
|
126
|
+
partial_concepts=[],
|
|
127
|
+
force_group=False,
|
|
128
|
+
)
|
|
129
|
+
],
|
|
130
|
+
)
|
|
131
|
+
# otherwise, we need to look for a table
|
|
132
|
+
nodes_to_find = [concept_to_node(x.with_default_grain()) for x in all_lcl.concepts]
|
|
133
|
+
matches: dict[str, DatasourceMatch] = {}
|
|
134
|
+
for k, datasource in environment.datasources.items():
|
|
135
|
+
matched = []
|
|
136
|
+
matched_paths = []
|
|
137
|
+
for idx, req_concept in enumerate(nodes_to_find):
|
|
138
|
+
try:
|
|
139
|
+
path = nx.shortest_path(
|
|
140
|
+
g,
|
|
141
|
+
source=datasource_to_node(datasource),
|
|
142
|
+
target=req_concept,
|
|
143
|
+
)
|
|
144
|
+
ds_valid = (
|
|
145
|
+
sum(
|
|
146
|
+
[
|
|
147
|
+
1 if g.nodes[node]["type"] == "datasource" else 0
|
|
148
|
+
for node in path
|
|
149
|
+
]
|
|
150
|
+
)
|
|
151
|
+
== 1
|
|
152
|
+
)
|
|
153
|
+
address_valid = (
|
|
154
|
+
sum(
|
|
155
|
+
[
|
|
156
|
+
(
|
|
157
|
+
1
|
|
158
|
+
if g.nodes[node]["type"] == "concept"
|
|
159
|
+
and g.nodes[node]["concept"].address
|
|
160
|
+
!= all_lcl.concepts[idx].address
|
|
161
|
+
else 0
|
|
162
|
+
)
|
|
163
|
+
for node in path
|
|
164
|
+
]
|
|
165
|
+
)
|
|
166
|
+
== 0
|
|
167
|
+
)
|
|
168
|
+
if ds_valid and address_valid:
|
|
169
|
+
matched_paths.append(path)
|
|
170
|
+
matched.append(all_lcl.concepts[idx])
|
|
171
|
+
except nx.NodeNotFound:
|
|
172
|
+
continue
|
|
173
|
+
except nx.exception.NetworkXNoPath:
|
|
174
|
+
continue
|
|
175
|
+
dm = DatasourceMatch(
|
|
176
|
+
key=k,
|
|
177
|
+
datasource=datasource,
|
|
178
|
+
matched=LooseConceptList(concepts=matched),
|
|
179
|
+
partial=LooseConceptList(
|
|
180
|
+
concepts=[
|
|
181
|
+
c.concept
|
|
182
|
+
for c in datasource.columns
|
|
183
|
+
if not c.is_complete and c.concept.address in all_lcl
|
|
184
|
+
]
|
|
185
|
+
),
|
|
186
|
+
)
|
|
187
|
+
if not matched:
|
|
188
|
+
continue
|
|
189
|
+
if mandatory_concept.address not in dm.matched:
|
|
190
|
+
continue
|
|
191
|
+
if not accept_partial and dm.partial.addresses:
|
|
192
|
+
continue
|
|
193
|
+
matches[k] = dm
|
|
194
|
+
found: set[str] = set()
|
|
195
|
+
all_found = False
|
|
196
|
+
all_checked = False
|
|
197
|
+
to_return: list[StrategyNode] = []
|
|
198
|
+
if not matches:
|
|
199
|
+
return False, [], []
|
|
200
|
+
while not all_found and not all_checked:
|
|
201
|
+
final_key: str = max(
|
|
202
|
+
matches,
|
|
203
|
+
key=lambda x: len(
|
|
204
|
+
[m for m in matches[x].matched.addresses if m not in found]
|
|
205
|
+
)
|
|
206
|
+
- 0.1 * len(matches[x].partial.addresses),
|
|
207
|
+
)
|
|
208
|
+
final: DatasourceMatch = matches[final_key]
|
|
209
|
+
candidate = dm_to_strategy_node(
|
|
210
|
+
final,
|
|
211
|
+
target_grain=Grain(
|
|
212
|
+
components=[
|
|
213
|
+
x for x in target_grain.components if x.address in final.matched
|
|
214
|
+
]
|
|
215
|
+
),
|
|
216
|
+
environment=environment,
|
|
217
|
+
g=g,
|
|
218
|
+
depth=depth,
|
|
219
|
+
accept_partial=accept_partial,
|
|
220
|
+
)
|
|
221
|
+
to_return.append(candidate)
|
|
222
|
+
del matches[final_key]
|
|
223
|
+
found = found.union(final.matched.addresses)
|
|
224
|
+
all_found = all_lcl.addresses.issubset(found)
|
|
225
|
+
all_checked = len(matches) == 0
|
|
226
|
+
return all_found, [x for x in all_concepts if x.address in found], to_return
|
|
227
|
+
|
|
228
|
+
|
|
22
229
|
def gen_select_node_from_table(
|
|
23
230
|
target_concept: Concept,
|
|
24
231
|
all_concepts: List[Concept],
|
|
@@ -166,58 +373,15 @@ def gen_select_node_from_table(
|
|
|
166
373
|
return candidates[final]
|
|
167
374
|
|
|
168
375
|
|
|
169
|
-
def
|
|
170
|
-
concept: Concept,
|
|
376
|
+
def gen_select_nodes_from_tables(
|
|
171
377
|
local_optional: List[Concept],
|
|
172
|
-
environment: Environment,
|
|
173
|
-
g,
|
|
174
378
|
depth: int,
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
all_lcl = LooseConceptList(concepts=all_concepts)
|
|
182
|
-
materialized_lcl = LooseConceptList(
|
|
183
|
-
concepts=[
|
|
184
|
-
x
|
|
185
|
-
for x in all_concepts
|
|
186
|
-
if x.address in [z.address for z in environment.materialized_concepts]
|
|
187
|
-
or x.derivation == PurposeLineage.CONSTANT
|
|
188
|
-
]
|
|
189
|
-
)
|
|
190
|
-
if not target_grain:
|
|
191
|
-
target_grain = Grain()
|
|
192
|
-
for ac in all_concepts:
|
|
193
|
-
target_grain += ac.grain
|
|
194
|
-
if materialized_lcl != all_lcl:
|
|
195
|
-
logger.info(
|
|
196
|
-
f"{padding(depth)}{LOGGER_PREFIX} Skipping select node generation for {concept.address} "
|
|
197
|
-
f" as it + optional (looking for all {all_lcl}) includes non-materialized concepts {all_lcl.difference(materialized_lcl)} vs materialized: {materialized_lcl}"
|
|
198
|
-
)
|
|
199
|
-
if fail_if_not_found:
|
|
200
|
-
raise NoDatasourceException(f"No datasource exists for {concept}")
|
|
201
|
-
return None
|
|
202
|
-
|
|
203
|
-
ds: StrategyNode | None = None
|
|
204
|
-
|
|
205
|
-
# attempt to select all concepts from table
|
|
206
|
-
ds = gen_select_node_from_table(
|
|
207
|
-
concept,
|
|
208
|
-
[concept] + local_optional,
|
|
209
|
-
g=g,
|
|
210
|
-
environment=environment,
|
|
211
|
-
depth=depth,
|
|
212
|
-
accept_partial=accept_partial,
|
|
213
|
-
target_grain=target_grain,
|
|
214
|
-
)
|
|
215
|
-
if ds:
|
|
216
|
-
logger.info(
|
|
217
|
-
f"{padding(depth)}{LOGGER_PREFIX} Found select node with all target concepts, force group is {ds.force_group}, target grain {target_grain}"
|
|
218
|
-
)
|
|
219
|
-
return ds
|
|
220
|
-
# if we cannot find a match
|
|
379
|
+
concept: Concept,
|
|
380
|
+
environment: Environment,
|
|
381
|
+
g: nx.DiGraph,
|
|
382
|
+
accept_partial: bool,
|
|
383
|
+
all_concepts: List[Concept],
|
|
384
|
+
) -> tuple[bool, list[Concept], list[StrategyNode]]:
|
|
221
385
|
parents: List[StrategyNode] = []
|
|
222
386
|
found: List[Concept] = []
|
|
223
387
|
logger.info(
|
|
@@ -238,7 +402,7 @@ def gen_select_node(
|
|
|
238
402
|
)
|
|
239
403
|
if not ds:
|
|
240
404
|
unreachable.append(opt_con.address)
|
|
241
|
-
|
|
405
|
+
all_found = False
|
|
242
406
|
for x in reversed(range(1, len(local_optional) + 1)):
|
|
243
407
|
if all_found:
|
|
244
408
|
break
|
|
@@ -275,6 +439,64 @@ def gen_select_node(
|
|
|
275
439
|
f"{padding(depth)}{LOGGER_PREFIX} found all optional {[c.address for c in local_optional]}"
|
|
276
440
|
)
|
|
277
441
|
all_found = True
|
|
442
|
+
return all_found, found, parents
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def gen_select_node(
|
|
446
|
+
concept: Concept,
|
|
447
|
+
local_optional: List[Concept],
|
|
448
|
+
environment: Environment,
|
|
449
|
+
g,
|
|
450
|
+
depth: int,
|
|
451
|
+
accept_partial: bool = False,
|
|
452
|
+
fail_if_not_found: bool = True,
|
|
453
|
+
accept_partial_optional: bool = True,
|
|
454
|
+
target_grain: Grain | None = None,
|
|
455
|
+
) -> StrategyNode | None:
|
|
456
|
+
all_concepts = [concept] + local_optional
|
|
457
|
+
all_lcl = LooseConceptList(concepts=all_concepts)
|
|
458
|
+
materialized_lcl = LooseConceptList(
|
|
459
|
+
concepts=[
|
|
460
|
+
x
|
|
461
|
+
for x in all_concepts
|
|
462
|
+
if x.address in [z.address for z in environment.materialized_concepts]
|
|
463
|
+
or x.derivation == PurposeLineage.CONSTANT
|
|
464
|
+
]
|
|
465
|
+
)
|
|
466
|
+
if not target_grain:
|
|
467
|
+
target_grain = Grain()
|
|
468
|
+
for ac in all_concepts:
|
|
469
|
+
target_grain += ac.grain
|
|
470
|
+
if materialized_lcl != all_lcl:
|
|
471
|
+
logger.info(
|
|
472
|
+
f"{padding(depth)}{LOGGER_PREFIX} Skipping select node generation for {concept.address} "
|
|
473
|
+
f" as it + optional (looking for all {all_lcl}) includes non-materialized concepts {all_lcl.difference(materialized_lcl)} vs materialized: {materialized_lcl}"
|
|
474
|
+
)
|
|
475
|
+
if fail_if_not_found:
|
|
476
|
+
raise NoDatasourceException(f"No datasource exists for {concept}")
|
|
477
|
+
return None
|
|
478
|
+
|
|
479
|
+
ds: StrategyNode | None = None
|
|
480
|
+
|
|
481
|
+
# attempt to select all concepts from table
|
|
482
|
+
ds = gen_select_node_from_table(
|
|
483
|
+
concept,
|
|
484
|
+
[concept] + local_optional,
|
|
485
|
+
g=g,
|
|
486
|
+
environment=environment,
|
|
487
|
+
depth=depth,
|
|
488
|
+
accept_partial=accept_partial,
|
|
489
|
+
target_grain=target_grain,
|
|
490
|
+
)
|
|
491
|
+
if ds:
|
|
492
|
+
logger.info(
|
|
493
|
+
f"{padding(depth)}{LOGGER_PREFIX} Found select node with all target concepts, force group is {ds.force_group}, target grain {target_grain}"
|
|
494
|
+
)
|
|
495
|
+
return ds
|
|
496
|
+
# if we cannot find a match
|
|
497
|
+
all_found, found, parents = gen_select_nodes_from_tables_v2(
|
|
498
|
+
concept, all_concepts, g, environment, depth, target_grain, accept_partial
|
|
499
|
+
)
|
|
278
500
|
if parents and (all_found or accept_partial_optional):
|
|
279
501
|
if all_found:
|
|
280
502
|
logger.info(
|
|
@@ -282,7 +504,7 @@ def gen_select_node(
|
|
|
282
504
|
)
|
|
283
505
|
else:
|
|
284
506
|
logger.info(
|
|
285
|
-
f"{padding(depth)}{LOGGER_PREFIX} found some optional
|
|
507
|
+
f"{padding(depth)}{LOGGER_PREFIX} found some optional, returning"
|
|
286
508
|
)
|
|
287
509
|
all_partial = [
|
|
288
510
|
c
|
|
@@ -6,11 +6,12 @@ from .window_node import WindowNode
|
|
|
6
6
|
from .base_node import StrategyNode, NodeJoin
|
|
7
7
|
from .unnest_node import UnnestNode
|
|
8
8
|
from pydantic import BaseModel, Field, ConfigDict
|
|
9
|
-
from trilogy.core.models import Concept
|
|
9
|
+
from trilogy.core.models import Concept, Environment
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class History(BaseModel):
|
|
13
13
|
history: dict[str, StrategyNode | None] = Field(default_factory=dict)
|
|
14
|
+
select_history: dict[str, StrategyNode | None] = Field(default_factory=dict)
|
|
14
15
|
started: set[str] = Field(default_factory=set)
|
|
15
16
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
16
17
|
|
|
@@ -60,6 +61,58 @@ class History(BaseModel):
|
|
|
60
61
|
in self.started
|
|
61
62
|
)
|
|
62
63
|
|
|
64
|
+
def _select_concepts_to_lookup(
|
|
65
|
+
self,
|
|
66
|
+
main: Concept,
|
|
67
|
+
search: list[Concept],
|
|
68
|
+
accept_partial: bool,
|
|
69
|
+
fail_if_not_found: bool,
|
|
70
|
+
accept_partial_optional: bool,
|
|
71
|
+
) -> str:
|
|
72
|
+
return (
|
|
73
|
+
str(main.address)
|
|
74
|
+
+ "|"
|
|
75
|
+
+ "-".join([c.address for c in search])
|
|
76
|
+
+ str(accept_partial)
|
|
77
|
+
+ str(fail_if_not_found)
|
|
78
|
+
+ str(accept_partial_optional)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def gen_select_node(
|
|
82
|
+
self,
|
|
83
|
+
concept: Concept,
|
|
84
|
+
local_optional: list[Concept],
|
|
85
|
+
environment: Environment,
|
|
86
|
+
g,
|
|
87
|
+
depth: int,
|
|
88
|
+
fail_if_not_found: bool = False,
|
|
89
|
+
accept_partial: bool = False,
|
|
90
|
+
accept_partial_optional: bool = False,
|
|
91
|
+
) -> StrategyNode | None:
|
|
92
|
+
from trilogy.core.processing.node_generators.select_node import gen_select_node
|
|
93
|
+
|
|
94
|
+
fingerprint = self._select_concepts_to_lookup(
|
|
95
|
+
concept,
|
|
96
|
+
local_optional,
|
|
97
|
+
accept_partial,
|
|
98
|
+
fail_if_not_found,
|
|
99
|
+
accept_partial_optional,
|
|
100
|
+
)
|
|
101
|
+
if fingerprint in self.select_history:
|
|
102
|
+
return self.select_history[fingerprint]
|
|
103
|
+
gen = gen_select_node(
|
|
104
|
+
concept,
|
|
105
|
+
local_optional,
|
|
106
|
+
environment,
|
|
107
|
+
g,
|
|
108
|
+
depth + 1,
|
|
109
|
+
fail_if_not_found=fail_if_not_found,
|
|
110
|
+
accept_partial=accept_partial,
|
|
111
|
+
accept_partial_optional=accept_partial_optional,
|
|
112
|
+
)
|
|
113
|
+
self.select_history[fingerprint] = gen
|
|
114
|
+
return gen
|
|
115
|
+
|
|
63
116
|
|
|
64
117
|
__all__ = [
|
|
65
118
|
"FilterNode",
|
trilogy/dialect/base.py
CHANGED
|
@@ -22,6 +22,7 @@ from trilogy.core.models import (
|
|
|
22
22
|
CompiledCTE,
|
|
23
23
|
Conditional,
|
|
24
24
|
Comparison,
|
|
25
|
+
SubselectComparison,
|
|
25
26
|
OrderItem,
|
|
26
27
|
WindowItem,
|
|
27
28
|
FilterItem,
|
|
@@ -273,14 +274,13 @@ class BaseDialect:
|
|
|
273
274
|
]
|
|
274
275
|
rval = f"{self.WINDOW_FUNCTION_MAP[c.lineage.type](concept = self.render_concept_sql(c.lineage.content, cte=cte, alias=False), window=','.join(rendered_over_components), sort=','.join(rendered_order_components))}" # noqa: E501
|
|
275
276
|
elif isinstance(c.lineage, FilterItem):
|
|
276
|
-
rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional)} THEN {self.render_concept_sql(c.lineage.content, cte=cte, alias=False)} ELSE NULL END"
|
|
277
|
+
rval = f"CASE WHEN {self.render_expr(c.lineage.where.conditional, cte=cte)} THEN {self.render_concept_sql(c.lineage.content, cte=cte, alias=False)} ELSE NULL END"
|
|
277
278
|
elif isinstance(c.lineage, RowsetItem):
|
|
278
279
|
rval = f"{self.render_concept_sql(c.lineage.content, cte=cte, alias=False)}"
|
|
279
280
|
elif isinstance(c.lineage, MultiSelectStatement):
|
|
280
281
|
rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False)}"
|
|
281
282
|
elif isinstance(c.lineage, MergeStatement):
|
|
282
283
|
rval = f"{self.render_concept_sql(c.lineage.find_source(c, cte), cte=cte, alias=False)}"
|
|
283
|
-
# rval = f"{self.FUNCTION_MAP[FunctionType.COALESCE](*[self.render_concept_sql(parent, cte=cte, alias=False) for parent in c.lineage.find_sources(c, cte)])}"
|
|
284
284
|
elif isinstance(c.lineage, AggregateWrapper):
|
|
285
285
|
args = [
|
|
286
286
|
self.render_expr(v, cte) # , alias=False)
|
|
@@ -330,6 +330,7 @@ class BaseDialect:
|
|
|
330
330
|
Function,
|
|
331
331
|
Conditional,
|
|
332
332
|
Comparison,
|
|
333
|
+
SubselectComparison,
|
|
333
334
|
Concept,
|
|
334
335
|
str,
|
|
335
336
|
int,
|
|
@@ -358,7 +359,15 @@ class BaseDialect:
|
|
|
358
359
|
# if isinstance(e, Concept):
|
|
359
360
|
# cte = cte or cte_map.get(e.address, None)
|
|
360
361
|
|
|
361
|
-
if isinstance(e,
|
|
362
|
+
if isinstance(e, SubselectComparison):
|
|
363
|
+
assert cte, "Subselects must be rendered with a CTE in context"
|
|
364
|
+
if isinstance(e.right, Concept):
|
|
365
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {self.render_expr(e.right, cte=cte, cte_map=cte_map)} from {cte.source_map[e.right.address][0]})"
|
|
366
|
+
else:
|
|
367
|
+
raise NotImplementedError(
|
|
368
|
+
f"Subselects must be a concept, got {e.right}"
|
|
369
|
+
)
|
|
370
|
+
elif isinstance(e, Comparison):
|
|
362
371
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
|
|
363
372
|
elif isinstance(e, Conditional):
|
|
364
373
|
# conditions need to be nested in parentheses
|
trilogy/parsing/common.py
CHANGED
|
@@ -174,3 +174,33 @@ def agg_wrapper_to_concept(
|
|
|
174
174
|
keys=tuple(parent.by) if parent.by else keys,
|
|
175
175
|
)
|
|
176
176
|
return out
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def arbitrary_to_concept(
|
|
180
|
+
parent: (
|
|
181
|
+
AggregateWrapper
|
|
182
|
+
| WindowItem
|
|
183
|
+
| FilterItem
|
|
184
|
+
| Function
|
|
185
|
+
| ListWrapper
|
|
186
|
+
| int
|
|
187
|
+
| float
|
|
188
|
+
| str
|
|
189
|
+
),
|
|
190
|
+
namespace: str,
|
|
191
|
+
name: str,
|
|
192
|
+
metadata: Metadata | None = None,
|
|
193
|
+
purpose: Purpose | None = None,
|
|
194
|
+
) -> Concept:
|
|
195
|
+
if isinstance(parent, AggregateWrapper):
|
|
196
|
+
return agg_wrapper_to_concept(parent, namespace, name, metadata, purpose)
|
|
197
|
+
elif isinstance(parent, WindowItem):
|
|
198
|
+
return window_item_to_concept(parent, name, namespace, purpose, metadata)
|
|
199
|
+
elif isinstance(parent, FilterItem):
|
|
200
|
+
return filter_item_to_concept(parent, name, namespace, purpose, metadata)
|
|
201
|
+
elif isinstance(parent, Function):
|
|
202
|
+
return function_to_concept(parent, name, namespace)
|
|
203
|
+
elif isinstance(parent, ListWrapper):
|
|
204
|
+
return constant_to_concept(parent, name, namespace, purpose, metadata)
|
|
205
|
+
else:
|
|
206
|
+
return constant_to_concept(parent, name, namespace, purpose, metadata)
|