pytrilogy 0.0.2.7__py3-none-any.whl → 0.0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/RECORD +34 -34
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -0
- trilogy/core/enums.py +1 -0
- trilogy/core/models.py +154 -56
- trilogy/core/optimization.py +44 -5
- trilogy/core/optimizations/inline_datasource.py +14 -8
- trilogy/core/optimizations/predicate_pushdown.py +73 -44
- trilogy/core/processing/concept_strategies_v3.py +69 -28
- trilogy/core/processing/node_generators/common.py +42 -16
- trilogy/core/processing/node_generators/filter_node.py +89 -48
- trilogy/core/processing/node_generators/group_node.py +3 -1
- trilogy/core/processing/node_generators/rowset_node.py +13 -54
- trilogy/core/processing/node_generators/select_node.py +10 -13
- trilogy/core/processing/node_generators/unnest_node.py +5 -3
- trilogy/core/processing/node_generators/window_node.py +23 -2
- trilogy/core/processing/nodes/__init__.py +34 -6
- trilogy/core/processing/nodes/base_node.py +67 -13
- trilogy/core/processing/nodes/filter_node.py +3 -0
- trilogy/core/processing/nodes/group_node.py +3 -0
- trilogy/core/processing/nodes/merge_node.py +1 -11
- trilogy/core/processing/nodes/select_node_v2.py +1 -0
- trilogy/core/processing/utility.py +29 -10
- trilogy/core/query_processor.py +47 -20
- trilogy/dialect/base.py +47 -14
- trilogy/dialect/common.py +15 -3
- trilogy/dialect/presto.py +2 -1
- trilogy/parsing/parse_engine.py +20 -1
- trilogy/parsing/trilogy.lark +3 -1
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.7.dist-info → pytrilogy-0.0.2.9.dist-info}/top_level.txt +0 -0
|
@@ -6,7 +6,7 @@ from .window_node import WindowNode
|
|
|
6
6
|
from .base_node import StrategyNode, NodeJoin
|
|
7
7
|
from .unnest_node import UnnestNode
|
|
8
8
|
from pydantic import BaseModel, Field, ConfigDict
|
|
9
|
-
from trilogy.core.models import Concept, Environment
|
|
9
|
+
from trilogy.core.models import Concept, Environment, WhereClause
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class History(BaseModel):
|
|
@@ -15,23 +15,42 @@ class History(BaseModel):
|
|
|
15
15
|
started: set[str] = Field(default_factory=set)
|
|
16
16
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
17
17
|
|
|
18
|
-
def _concepts_to_lookup(
|
|
18
|
+
def _concepts_to_lookup(
|
|
19
|
+
self,
|
|
20
|
+
search: list[Concept],
|
|
21
|
+
accept_partial: bool,
|
|
22
|
+
conditions: WhereClause | None = None,
|
|
23
|
+
) -> str:
|
|
24
|
+
if conditions:
|
|
25
|
+
return (
|
|
26
|
+
"-".join([c.address for c in search])
|
|
27
|
+
+ str(accept_partial)
|
|
28
|
+
+ str(conditions)
|
|
29
|
+
)
|
|
19
30
|
return "-".join([c.address for c in search]) + str(accept_partial)
|
|
20
31
|
|
|
21
32
|
def search_to_history(
|
|
22
|
-
self,
|
|
33
|
+
self,
|
|
34
|
+
search: list[Concept],
|
|
35
|
+
accept_partial: bool,
|
|
36
|
+
output: StrategyNode | None,
|
|
37
|
+
conditions: WhereClause | None = None,
|
|
23
38
|
):
|
|
24
|
-
self.history[
|
|
39
|
+
self.history[
|
|
40
|
+
self._concepts_to_lookup(search, accept_partial, conditions=conditions)
|
|
41
|
+
] = output
|
|
25
42
|
|
|
26
43
|
def get_history(
|
|
27
44
|
self,
|
|
28
45
|
search: list[Concept],
|
|
46
|
+
conditions: WhereClause | None = None,
|
|
29
47
|
accept_partial: bool = False,
|
|
30
48
|
parent_key: str = "",
|
|
31
49
|
) -> StrategyNode | None | bool:
|
|
32
50
|
key = self._concepts_to_lookup(
|
|
33
51
|
search,
|
|
34
52
|
accept_partial,
|
|
53
|
+
conditions,
|
|
35
54
|
)
|
|
36
55
|
if parent_key and parent_key == key:
|
|
37
56
|
raise ValueError(
|
|
@@ -48,11 +67,13 @@ class History(BaseModel):
|
|
|
48
67
|
self,
|
|
49
68
|
search: list[Concept],
|
|
50
69
|
accept_partial: bool = False,
|
|
70
|
+
conditions: WhereClause | None = None,
|
|
51
71
|
):
|
|
52
72
|
self.started.add(
|
|
53
73
|
self._concepts_to_lookup(
|
|
54
74
|
search,
|
|
55
|
-
accept_partial,
|
|
75
|
+
accept_partial=accept_partial,
|
|
76
|
+
conditions=conditions,
|
|
56
77
|
)
|
|
57
78
|
)
|
|
58
79
|
|
|
@@ -60,11 +81,13 @@ class History(BaseModel):
|
|
|
60
81
|
self,
|
|
61
82
|
search: list[Concept],
|
|
62
83
|
accept_partial: bool = False,
|
|
84
|
+
conditions: WhereClause | None = None,
|
|
63
85
|
):
|
|
64
86
|
return (
|
|
65
87
|
self._concepts_to_lookup(
|
|
66
88
|
search,
|
|
67
89
|
accept_partial,
|
|
90
|
+
conditions=conditions,
|
|
68
91
|
)
|
|
69
92
|
in self.started
|
|
70
93
|
)
|
|
@@ -76,6 +99,7 @@ class History(BaseModel):
|
|
|
76
99
|
accept_partial: bool,
|
|
77
100
|
fail_if_not_found: bool,
|
|
78
101
|
accept_partial_optional: bool,
|
|
102
|
+
conditions: WhereClause | None = None,
|
|
79
103
|
) -> str:
|
|
80
104
|
return (
|
|
81
105
|
str(main.address)
|
|
@@ -84,6 +108,7 @@ class History(BaseModel):
|
|
|
84
108
|
+ str(accept_partial)
|
|
85
109
|
+ str(fail_if_not_found)
|
|
86
110
|
+ str(accept_partial_optional)
|
|
111
|
+
+ str(conditions)
|
|
87
112
|
)
|
|
88
113
|
|
|
89
114
|
def gen_select_node(
|
|
@@ -97,6 +122,7 @@ class History(BaseModel):
|
|
|
97
122
|
fail_if_not_found: bool = False,
|
|
98
123
|
accept_partial: bool = False,
|
|
99
124
|
accept_partial_optional: bool = False,
|
|
125
|
+
conditions: WhereClause | None = None,
|
|
100
126
|
) -> StrategyNode | None:
|
|
101
127
|
from trilogy.core.processing.node_generators.select_node import gen_select_node
|
|
102
128
|
|
|
@@ -105,7 +131,8 @@ class History(BaseModel):
|
|
|
105
131
|
local_optional,
|
|
106
132
|
accept_partial,
|
|
107
133
|
fail_if_not_found,
|
|
108
|
-
accept_partial_optional,
|
|
134
|
+
accept_partial_optional=accept_partial_optional,
|
|
135
|
+
conditions=conditions,
|
|
109
136
|
)
|
|
110
137
|
if fingerprint in self.select_history:
|
|
111
138
|
return self.select_history[fingerprint]
|
|
@@ -119,6 +146,7 @@ class History(BaseModel):
|
|
|
119
146
|
accept_partial=accept_partial,
|
|
120
147
|
accept_partial_optional=accept_partial_optional,
|
|
121
148
|
source_concepts=source_concepts,
|
|
149
|
+
conditions=conditions,
|
|
122
150
|
)
|
|
123
151
|
self.select_history[fingerprint] = gen
|
|
124
152
|
return gen
|
|
@@ -17,6 +17,7 @@ from trilogy.core.models import (
|
|
|
17
17
|
from trilogy.core.enums import Purpose, JoinType, PurposeLineage, Granularity
|
|
18
18
|
from trilogy.utility import unique
|
|
19
19
|
from dataclasses import dataclass
|
|
20
|
+
from trilogy.core.enums import BooleanOperator
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
def concept_list_to_grain(
|
|
@@ -71,7 +72,6 @@ def resolve_concept_map(
|
|
|
71
72
|
concept_map[concept.address].add(input)
|
|
72
73
|
elif concept.address not in concept_map:
|
|
73
74
|
concept_map[concept.address].add(input)
|
|
74
|
-
|
|
75
75
|
# second loop, include partials
|
|
76
76
|
for input in inputs:
|
|
77
77
|
for concept in input.output_concepts:
|
|
@@ -92,14 +92,31 @@ def resolve_concept_map(
|
|
|
92
92
|
return concept_map
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
def get_all_parent_partial(
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
95
|
+
def get_all_parent_partial(
|
|
96
|
+
all_concepts: List[Concept], parents: List["StrategyNode"]
|
|
97
|
+
) -> List[Concept]:
|
|
98
|
+
return unique(
|
|
99
|
+
[
|
|
100
|
+
c
|
|
101
|
+
for c in all_concepts
|
|
102
|
+
if len(
|
|
103
|
+
[
|
|
104
|
+
p
|
|
105
|
+
for p in parents
|
|
106
|
+
if c.address in [x.address for x in p.partial_concepts]
|
|
107
|
+
]
|
|
108
|
+
)
|
|
109
|
+
>= 1
|
|
110
|
+
and all(
|
|
111
|
+
[
|
|
112
|
+
c.address in p.partial_lcl
|
|
113
|
+
for p in parents
|
|
114
|
+
if c.address in p.output_lcl
|
|
115
|
+
]
|
|
116
|
+
)
|
|
117
|
+
],
|
|
118
|
+
"address",
|
|
119
|
+
)
|
|
103
120
|
|
|
104
121
|
|
|
105
122
|
class StrategyNode:
|
|
@@ -137,7 +154,7 @@ class StrategyNode:
|
|
|
137
154
|
self.partial_concepts = partial_concepts or get_all_parent_partial(
|
|
138
155
|
self.output_concepts, self.parents
|
|
139
156
|
)
|
|
140
|
-
|
|
157
|
+
|
|
141
158
|
self.depth = depth
|
|
142
159
|
self.conditions = conditions
|
|
143
160
|
self.grain = grain
|
|
@@ -146,13 +163,50 @@ class StrategyNode:
|
|
|
146
163
|
self.hidden_concepts = hidden_concepts or []
|
|
147
164
|
self.existence_concepts = existence_concepts or []
|
|
148
165
|
self.virtual_output_concepts = virtual_output_concepts or []
|
|
166
|
+
self.validate_parents()
|
|
167
|
+
|
|
168
|
+
def add_parents(self, parents: list["StrategyNode"]):
|
|
169
|
+
self.parents += parents
|
|
170
|
+
self.validate_parents()
|
|
171
|
+
|
|
172
|
+
def add_condition(self, condition: Conditional | Comparison | Parenthetical):
|
|
173
|
+
if self.conditions:
|
|
174
|
+
self.conditions = Conditional(
|
|
175
|
+
left=self.conditions, right=condition, operator=BooleanOperator.AND
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
self.conditions = condition
|
|
179
|
+
|
|
180
|
+
def validate_parents(self):
|
|
181
|
+
# validate parents exist
|
|
182
|
+
# assign partial values where needed
|
|
149
183
|
for parent in self.parents:
|
|
150
184
|
if not parent:
|
|
151
185
|
raise SyntaxError("Unresolvable parent")
|
|
152
186
|
|
|
187
|
+
# TODO: make this accurate
|
|
188
|
+
if self.parents:
|
|
189
|
+
self.partial_concepts = get_all_parent_partial(
|
|
190
|
+
self.output_concepts, self.parents
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
self.partial_lcl = LooseConceptList(concepts=self.partial_concepts)
|
|
194
|
+
|
|
153
195
|
def add_output_concepts(self, concepts: List[Concept]):
|
|
154
196
|
for concept in concepts:
|
|
155
|
-
self.
|
|
197
|
+
if concept.address not in self.output_lcl.addresses:
|
|
198
|
+
self.output_concepts.append(concept)
|
|
199
|
+
self.output_lcl = LooseConceptList(concepts=self.output_concepts)
|
|
200
|
+
self.rebuild_cache()
|
|
201
|
+
|
|
202
|
+
def add_existence_concepts(self, concepts: List[Concept]):
|
|
203
|
+
for concept in concepts:
|
|
204
|
+
if concept.address not in [x.address for x in self.output_concepts]:
|
|
205
|
+
self.existence_concepts.append(concept)
|
|
206
|
+
self.rebuild_cache()
|
|
207
|
+
|
|
208
|
+
def set_output_concepts(self, concepts: List[Concept]):
|
|
209
|
+
self.output_concepts = concepts
|
|
156
210
|
self.output_lcl = LooseConceptList(concepts=self.output_concepts)
|
|
157
211
|
self.rebuild_cache()
|
|
158
212
|
|
|
@@ -195,8 +249,8 @@ class StrategyNode:
|
|
|
195
249
|
grain = self.grain if self.grain else Grain(components=self.output_concepts)
|
|
196
250
|
source_map = resolve_concept_map(
|
|
197
251
|
parent_sources,
|
|
198
|
-
self.output_concepts,
|
|
199
|
-
self.input_concepts + self.existence_concepts,
|
|
252
|
+
targets=self.output_concepts,
|
|
253
|
+
inherited_inputs=self.input_concepts + self.existence_concepts,
|
|
200
254
|
)
|
|
201
255
|
return QueryDatasource(
|
|
202
256
|
input_concepts=self.input_concepts,
|
|
@@ -36,6 +36,7 @@ class FilterNode(StrategyNode):
|
|
|
36
36
|
partial_concepts: List[Concept] | None = None,
|
|
37
37
|
force_group: bool | None = False,
|
|
38
38
|
grain: Grain | None = None,
|
|
39
|
+
existence_concepts: List[Concept] | None = None,
|
|
39
40
|
):
|
|
40
41
|
super().__init__(
|
|
41
42
|
output_concepts=output_concepts,
|
|
@@ -49,6 +50,7 @@ class FilterNode(StrategyNode):
|
|
|
49
50
|
partial_concepts=partial_concepts,
|
|
50
51
|
force_group=force_group,
|
|
51
52
|
grain=grain,
|
|
53
|
+
existence_concepts=existence_concepts,
|
|
52
54
|
)
|
|
53
55
|
|
|
54
56
|
def copy(self) -> "FilterNode":
|
|
@@ -64,4 +66,5 @@ class FilterNode(StrategyNode):
|
|
|
64
66
|
partial_concepts=list(self.partial_concepts),
|
|
65
67
|
force_group=self.force_group,
|
|
66
68
|
grain=self.grain,
|
|
69
|
+
existence_concepts=list(self.existence_concepts),
|
|
67
70
|
)
|
|
@@ -39,6 +39,7 @@ class GroupNode(StrategyNode):
|
|
|
39
39
|
partial_concepts: Optional[List[Concept]] = None,
|
|
40
40
|
force_group: bool | None = None,
|
|
41
41
|
conditions: Conditional | Comparison | Parenthetical | None = None,
|
|
42
|
+
existence_concepts: List[Concept] | None = None,
|
|
42
43
|
):
|
|
43
44
|
super().__init__(
|
|
44
45
|
input_concepts=input_concepts,
|
|
@@ -51,6 +52,7 @@ class GroupNode(StrategyNode):
|
|
|
51
52
|
partial_concepts=partial_concepts,
|
|
52
53
|
force_group=force_group,
|
|
53
54
|
conditions=conditions,
|
|
55
|
+
existence_concepts=existence_concepts,
|
|
54
56
|
)
|
|
55
57
|
|
|
56
58
|
def _resolve(self) -> QueryDatasource:
|
|
@@ -173,4 +175,5 @@ class GroupNode(StrategyNode):
|
|
|
173
175
|
partial_concepts=list(self.partial_concepts),
|
|
174
176
|
force_group=self.force_group,
|
|
175
177
|
conditions=self.conditions,
|
|
178
|
+
existence_concepts=list(self.existence_concepts),
|
|
176
179
|
)
|
|
@@ -138,16 +138,6 @@ class MergeNode(StrategyNode):
|
|
|
138
138
|
continue
|
|
139
139
|
final_joins.append(join)
|
|
140
140
|
self.node_joins = final_joins
|
|
141
|
-
partial_lookup: list[Concept] = []
|
|
142
|
-
non_partial: List[Concept] = []
|
|
143
|
-
for node in parents or []:
|
|
144
|
-
partial_lookup += node.partial_concepts
|
|
145
|
-
non_partial += [
|
|
146
|
-
x for x in node.output_concepts if x not in node.partial_concepts
|
|
147
|
-
]
|
|
148
|
-
|
|
149
|
-
final_partial = [x for x in partial_lookup if x not in non_partial]
|
|
150
|
-
self.partial_concepts = final_partial
|
|
151
141
|
|
|
152
142
|
def translate_node_joins(self, node_joins: List[NodeJoin]) -> List[BaseJoin]:
|
|
153
143
|
joins = []
|
|
@@ -265,7 +255,6 @@ class MergeNode(StrategyNode):
|
|
|
265
255
|
for x in final_datasets
|
|
266
256
|
if all([y in self.existence_concepts for y in x.output_concepts])
|
|
267
257
|
]
|
|
268
|
-
|
|
269
258
|
if len(merged.keys()) == 1:
|
|
270
259
|
final: QueryDatasource | Datasource = list(merged.values())[0]
|
|
271
260
|
if (
|
|
@@ -378,4 +367,5 @@ class MergeNode(StrategyNode):
|
|
|
378
367
|
node_joins=self.node_joins,
|
|
379
368
|
join_concepts=list(self.join_concepts) if self.join_concepts else None,
|
|
380
369
|
force_join_type=self.force_join_type,
|
|
370
|
+
existence_concepts=list(self.existence_concepts),
|
|
381
371
|
)
|
|
@@ -166,6 +166,7 @@ class SelectNode(StrategyNode):
|
|
|
166
166
|
resolution = self.resolve_from_provided_datasource()
|
|
167
167
|
if resolution:
|
|
168
168
|
return resolution
|
|
169
|
+
|
|
169
170
|
required = [c.address for c in self.all_concepts]
|
|
170
171
|
raise NoDatasourceException(
|
|
171
172
|
f"Could not find any way to resolve datasources for required concepts {required} with derivation {[x.derivation for x in self.all_concepts]}"
|
|
@@ -20,7 +20,7 @@ from trilogy.core.models import (
|
|
|
20
20
|
DataType,
|
|
21
21
|
)
|
|
22
22
|
|
|
23
|
-
from trilogy.core.enums import Purpose, Granularity
|
|
23
|
+
from trilogy.core.enums import Purpose, Granularity, BooleanOperator
|
|
24
24
|
from trilogy.core.constants import CONSTANT_DATASET
|
|
25
25
|
from enum import Enum
|
|
26
26
|
from trilogy.utility import unique
|
|
@@ -62,7 +62,7 @@ def create_log_lambda(prefix: str, depth: int, logger: Logger):
|
|
|
62
62
|
pad = padding(depth)
|
|
63
63
|
|
|
64
64
|
def log_lambda(msg: str):
|
|
65
|
-
logger.info(f"{pad}
|
|
65
|
+
logger.info(f"{pad}{prefix} {msg}")
|
|
66
66
|
|
|
67
67
|
return log_lambda
|
|
68
68
|
|
|
@@ -328,14 +328,6 @@ def get_disconnected_components(
|
|
|
328
328
|
for datasource, concepts in concept_map.items():
|
|
329
329
|
graph.add_node(datasource, type=NodeType.NODE)
|
|
330
330
|
for concept in concepts:
|
|
331
|
-
# TODO: determine if this is the right way to handle things
|
|
332
|
-
# if concept.derivation in (PurposeLineage.FILTER, PurposeLineage.WINDOW):
|
|
333
|
-
# if isinstance(concept.lineage, FilterItem):
|
|
334
|
-
# graph.add_node(concept.lineage.content.address, type=NodeType.CONCEPT)
|
|
335
|
-
# graph.add_edge(datasource, concept.lineage.content.address)
|
|
336
|
-
# if isinstance(concept.lineage, WindowItem):
|
|
337
|
-
# graph.add_node(concept.lineage.content.address, type=NodeType.CONCEPT)
|
|
338
|
-
# graph.add_edge(datasource, concept.lineage.content.address)
|
|
339
331
|
graph.add_node(concept.address, type=NodeType.CONCEPT)
|
|
340
332
|
graph.add_edge(datasource, concept.address)
|
|
341
333
|
all_concepts.add(concept)
|
|
@@ -378,3 +370,30 @@ def is_scalar_condition(
|
|
|
378
370
|
elif isinstance(element, Conditional):
|
|
379
371
|
return is_scalar_condition(element.left) and is_scalar_condition(element.right)
|
|
380
372
|
return True
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def decompose_condition(
|
|
376
|
+
conditional: Conditional,
|
|
377
|
+
) -> list[SubselectComparison | Comparison | Conditional | Parenthetical]:
|
|
378
|
+
chunks: list[SubselectComparison | Comparison | Conditional | Parenthetical] = []
|
|
379
|
+
if conditional.operator == BooleanOperator.AND:
|
|
380
|
+
if not (
|
|
381
|
+
isinstance(
|
|
382
|
+
conditional.left,
|
|
383
|
+
(SubselectComparison, Comparison, Conditional, Parenthetical),
|
|
384
|
+
)
|
|
385
|
+
and isinstance(
|
|
386
|
+
conditional.right,
|
|
387
|
+
(SubselectComparison, Comparison, Conditional, Parenthetical),
|
|
388
|
+
)
|
|
389
|
+
):
|
|
390
|
+
chunks.append(conditional)
|
|
391
|
+
else:
|
|
392
|
+
for val in [conditional.left, conditional.right]:
|
|
393
|
+
if isinstance(val, Conditional):
|
|
394
|
+
chunks.extend(decompose_condition(val))
|
|
395
|
+
else:
|
|
396
|
+
chunks.append(val)
|
|
397
|
+
else:
|
|
398
|
+
chunks.append(conditional)
|
|
399
|
+
return chunks
|
trilogy/core/query_processor.py
CHANGED
|
@@ -4,9 +4,9 @@ from trilogy.core.env_processor import generate_graph
|
|
|
4
4
|
from trilogy.core.graph_models import ReferenceGraph
|
|
5
5
|
from trilogy.core.constants import CONSTANT_DATASET
|
|
6
6
|
from trilogy.core.processing.concept_strategies_v3 import source_query_concepts
|
|
7
|
-
from trilogy.core.enums import SelectFiltering
|
|
7
|
+
from trilogy.core.enums import SelectFiltering, BooleanOperator
|
|
8
8
|
from trilogy.constants import CONFIG, DEFAULT_NAMESPACE
|
|
9
|
-
from trilogy.core.processing.nodes import GroupNode, SelectNode, StrategyNode
|
|
9
|
+
from trilogy.core.processing.nodes import GroupNode, SelectNode, StrategyNode, History
|
|
10
10
|
from trilogy.core.models import (
|
|
11
11
|
Concept,
|
|
12
12
|
Environment,
|
|
@@ -24,6 +24,7 @@ from trilogy.core.models import (
|
|
|
24
24
|
Datasource,
|
|
25
25
|
BaseJoin,
|
|
26
26
|
InstantiatedUnnestJoin,
|
|
27
|
+
Conditional,
|
|
27
28
|
)
|
|
28
29
|
|
|
29
30
|
from trilogy.utility import unique
|
|
@@ -307,7 +308,10 @@ def datasource_to_ctes(
|
|
|
307
308
|
|
|
308
309
|
|
|
309
310
|
def append_existence_check(
|
|
310
|
-
node: StrategyNode,
|
|
311
|
+
node: StrategyNode,
|
|
312
|
+
environment: Environment,
|
|
313
|
+
graph: ReferenceGraph,
|
|
314
|
+
history: History | None = None,
|
|
311
315
|
):
|
|
312
316
|
# we if we have a where clause doing an existence check
|
|
313
317
|
# treat that as separate subquery
|
|
@@ -318,25 +322,22 @@ def append_existence_check(
|
|
|
318
322
|
logger.info(
|
|
319
323
|
f"{LOGGER_PREFIX} fetching existance clause inputs {[str(c) for c in subselect]}"
|
|
320
324
|
)
|
|
321
|
-
eds = source_query_concepts(
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
for x in final_eds.output_concepts:
|
|
327
|
-
if x.address not in first_parent.existence_source_map:
|
|
328
|
-
first_parent.existence_source_map[x.address] = {final_eds}
|
|
325
|
+
eds = source_query_concepts(
|
|
326
|
+
[*subselect], environment=environment, g=graph, history=history
|
|
327
|
+
)
|
|
328
|
+
node.add_parents([eds])
|
|
329
|
+
node.add_existence_concepts([*subselect])
|
|
329
330
|
|
|
330
331
|
|
|
331
|
-
def
|
|
332
|
+
def get_query_node(
|
|
332
333
|
environment: Environment,
|
|
333
334
|
statement: SelectStatement | MultiSelectStatement,
|
|
334
335
|
graph: Optional[ReferenceGraph] = None,
|
|
335
|
-
|
|
336
|
-
) ->
|
|
336
|
+
history: History | None = None,
|
|
337
|
+
) -> StrategyNode:
|
|
337
338
|
graph = graph or generate_graph(environment)
|
|
338
339
|
logger.info(
|
|
339
|
-
f"{LOGGER_PREFIX} getting source datasource for query with output {[str(c) for c in statement.output_components]}"
|
|
340
|
+
f"{LOGGER_PREFIX} getting source datasource for query with filtering {statement.where_clause_category} and output {[str(c) for c in statement.output_components]}"
|
|
340
341
|
)
|
|
341
342
|
if not statement.output_components:
|
|
342
343
|
raise ValueError(f"Statement has no output components {statement}")
|
|
@@ -353,22 +354,28 @@ def get_query_datasources(
|
|
|
353
354
|
)
|
|
354
355
|
nest_where = True
|
|
355
356
|
|
|
356
|
-
ods = source_query_concepts(
|
|
357
|
+
ods: StrategyNode = source_query_concepts(
|
|
357
358
|
search_concepts,
|
|
358
359
|
environment=environment,
|
|
359
360
|
g=graph,
|
|
361
|
+
conditions=(statement.where_clause if statement.where_clause else None),
|
|
362
|
+
history=history,
|
|
360
363
|
)
|
|
361
|
-
|
|
364
|
+
if not ods:
|
|
365
|
+
raise ValueError(
|
|
366
|
+
f"Could not find source query concepts for {[x.address for x in search_concepts]}"
|
|
367
|
+
)
|
|
368
|
+
ds: StrategyNode
|
|
362
369
|
if nest_where and statement.where_clause:
|
|
363
370
|
if not all_aggregate:
|
|
364
371
|
ods.conditions = statement.where_clause.conditional
|
|
365
|
-
ods.output_concepts =
|
|
372
|
+
ods.output_concepts = statement.output_components
|
|
366
373
|
# ods.hidden_concepts = where_delta
|
|
367
374
|
ods.rebuild_cache()
|
|
368
|
-
append_existence_check(ods, environment, graph)
|
|
375
|
+
append_existence_check(ods, environment, graph, history)
|
|
369
376
|
ds = GroupNode(
|
|
370
377
|
output_concepts=statement.output_components,
|
|
371
|
-
input_concepts=
|
|
378
|
+
input_concepts=statement.output_components,
|
|
372
379
|
parents=[ods],
|
|
373
380
|
environment=ods.environment,
|
|
374
381
|
g=ods.g,
|
|
@@ -390,7 +397,26 @@ def get_query_datasources(
|
|
|
390
397
|
|
|
391
398
|
else:
|
|
392
399
|
ds = ods
|
|
400
|
+
if statement.having_clause:
|
|
401
|
+
if ds.conditions:
|
|
402
|
+
ds.conditions = Conditional(
|
|
403
|
+
left=ds.conditions,
|
|
404
|
+
right=statement.having_clause.conditional,
|
|
405
|
+
operator=BooleanOperator.AND,
|
|
406
|
+
)
|
|
407
|
+
else:
|
|
408
|
+
ds.conditions = statement.having_clause.conditional
|
|
409
|
+
return ds
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def get_query_datasources(
|
|
413
|
+
environment: Environment,
|
|
414
|
+
statement: SelectStatement | MultiSelectStatement,
|
|
415
|
+
graph: Optional[ReferenceGraph] = None,
|
|
416
|
+
hooks: Optional[List[BaseHook]] = None,
|
|
417
|
+
) -> QueryDatasource:
|
|
393
418
|
|
|
419
|
+
ds = get_query_node(environment, statement, graph)
|
|
394
420
|
final_qds = ds.resolve()
|
|
395
421
|
if hooks:
|
|
396
422
|
for hook in hooks:
|
|
@@ -475,6 +501,7 @@ def process_query(
|
|
|
475
501
|
grain=statement.grain,
|
|
476
502
|
limit=statement.limit,
|
|
477
503
|
where_clause=statement.where_clause,
|
|
504
|
+
having_clause=statement.having_clause,
|
|
478
505
|
output_columns=statement.output_components,
|
|
479
506
|
ctes=final_ctes,
|
|
480
507
|
base=root_cte,
|
trilogy/dialect/base.py
CHANGED
|
@@ -51,7 +51,7 @@ from trilogy.core.models import (
|
|
|
51
51
|
MergeStatementV2,
|
|
52
52
|
)
|
|
53
53
|
from trilogy.core.query_processor import process_query, process_persist
|
|
54
|
-
from trilogy.dialect.common import render_join
|
|
54
|
+
from trilogy.dialect.common import render_join, render_unnest
|
|
55
55
|
from trilogy.hooks.base_hook import BaseHook
|
|
56
56
|
from trilogy.core.enums import UnnestMode
|
|
57
57
|
|
|
@@ -128,6 +128,7 @@ FUNCTION_MAP = {
|
|
|
128
128
|
FunctionType.UNNEST: lambda x: f"unnest({x[0]})",
|
|
129
129
|
FunctionType.ATTR_ACCESS: lambda x: f"""{x[0]}.{x[1].replace("'", "")}""",
|
|
130
130
|
FunctionType.STRUCT: lambda x: f"{{{', '.join(struct_arg(x))}}}",
|
|
131
|
+
FunctionType.ARRAY: lambda x: f"[{', '.join(x)}]",
|
|
131
132
|
# math
|
|
132
133
|
FunctionType.ADD: lambda x: f"{x[0]} + {x[1]}",
|
|
133
134
|
FunctionType.SUBTRACT: lambda x: f"{x[0]} - {x[1]}",
|
|
@@ -385,14 +386,19 @@ class BaseDialect:
|
|
|
385
386
|
e.right.address,
|
|
386
387
|
[
|
|
387
388
|
INVALID_REFERENCE_STRING(
|
|
388
|
-
f"Missing source reference to {e.right.
|
|
389
|
+
f"Missing source reference to {e.right.address}"
|
|
389
390
|
)
|
|
390
391
|
],
|
|
391
392
|
)
|
|
392
393
|
else:
|
|
393
394
|
lookup = lookup_cte.existence_source_map[e.right.address]
|
|
394
|
-
|
|
395
|
-
|
|
395
|
+
if len(lookup) > 0:
|
|
396
|
+
target = lookup[0]
|
|
397
|
+
else:
|
|
398
|
+
target = INVALID_REFERENCE_STRING(
|
|
399
|
+
f"Missing source CTE for {e.right.address}"
|
|
400
|
+
)
|
|
401
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
|
|
396
402
|
elif isinstance(e.right, (ListWrapper, Parenthetical, list)):
|
|
397
403
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
|
|
398
404
|
|
|
@@ -466,7 +472,7 @@ class BaseDialect:
|
|
|
466
472
|
elif isinstance(e, MapWrapper):
|
|
467
473
|
return f"MAP {{{','.join([f'{self.render_expr(k, cte=cte, cte_map=cte_map)}:{self.render_expr(v, cte=cte, cte_map=cte_map)}' for k, v in e.items()])}}}"
|
|
468
474
|
elif isinstance(e, list):
|
|
469
|
-
return f"
|
|
475
|
+
return f"{self.FUNCTION_MAP[FunctionType.ARRAY]([self.render_expr(x, cte=cte, cte_map=cte_map) for x in e])}"
|
|
470
476
|
elif isinstance(e, DataType):
|
|
471
477
|
return str(e.value)
|
|
472
478
|
elif isinstance(e, DatePart):
|
|
@@ -480,8 +486,12 @@ class BaseDialect:
|
|
|
480
486
|
raise ValueError(f"Unable to render type {type(e)} {e}")
|
|
481
487
|
|
|
482
488
|
def render_cte(self, cte: CTE):
|
|
483
|
-
if self.UNNEST_MODE in (
|
|
484
|
-
|
|
489
|
+
if self.UNNEST_MODE in (
|
|
490
|
+
UnnestMode.CROSS_APPLY,
|
|
491
|
+
UnnestMode.CROSS_JOIN,
|
|
492
|
+
UnnestMode.CROSS_JOIN_ALIAS,
|
|
493
|
+
):
|
|
494
|
+
# for a cross apply, derivation happens in the join
|
|
485
495
|
# so we only use the alias to select
|
|
486
496
|
select_columns = [
|
|
487
497
|
self.render_concept_sql(c, cte)
|
|
@@ -499,17 +509,40 @@ class BaseDialect:
|
|
|
499
509
|
for c in cte.output_columns
|
|
500
510
|
if c.address not in [y.address for y in cte.hidden_concepts]
|
|
501
511
|
]
|
|
502
|
-
|
|
503
|
-
|
|
512
|
+
source: str | None = cte.base_name
|
|
513
|
+
if not cte.render_from_clause:
|
|
514
|
+
if len(cte.joins) > 0:
|
|
515
|
+
if cte.join_derived_concepts and self.UNNEST_MODE in (
|
|
516
|
+
UnnestMode.CROSS_JOIN_ALIAS,
|
|
517
|
+
UnnestMode.CROSS_JOIN,
|
|
518
|
+
UnnestMode.CROSS_APPLY,
|
|
519
|
+
):
|
|
520
|
+
source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_concept_sql, cte)}"
|
|
521
|
+
# direct - eg DUCK DB - can be directly selected inline
|
|
522
|
+
elif (
|
|
523
|
+
cte.join_derived_concepts and self.UNNEST_MODE == UnnestMode.DIRECT
|
|
524
|
+
):
|
|
525
|
+
source = None
|
|
526
|
+
else:
|
|
527
|
+
raise SyntaxError("CTE has joins but no from clause")
|
|
528
|
+
else:
|
|
529
|
+
source = None
|
|
530
|
+
else:
|
|
531
|
+
if cte.quote_address:
|
|
532
|
+
source = f"{self.QUOTE_CHARACTER}{cte.base_name}{self.QUOTE_CHARACTER}"
|
|
533
|
+
else:
|
|
534
|
+
source = cte.base_name
|
|
535
|
+
if cte.base_name != cte.base_alias:
|
|
536
|
+
source = f"{source} as {cte.base_alias}"
|
|
537
|
+
if not cte.render_from_clause:
|
|
538
|
+
final_joins = []
|
|
504
539
|
else:
|
|
505
|
-
|
|
506
|
-
if cte.base_name != cte.base_alias:
|
|
507
|
-
source = f"{source} as {cte.base_alias}"
|
|
540
|
+
final_joins = cte.joins or []
|
|
508
541
|
return CompiledCTE(
|
|
509
542
|
name=cte.name,
|
|
510
543
|
statement=self.SQL_TEMPLATE.render(
|
|
511
544
|
select_columns=select_columns,
|
|
512
|
-
base=
|
|
545
|
+
base=f"{source}" if source else None,
|
|
513
546
|
grain=cte.grain,
|
|
514
547
|
limit=cte.limit,
|
|
515
548
|
# some joins may not need to be rendered
|
|
@@ -524,7 +557,7 @@ class BaseDialect:
|
|
|
524
557
|
cte,
|
|
525
558
|
self.UNNEST_MODE,
|
|
526
559
|
)
|
|
527
|
-
for join in
|
|
560
|
+
for join in final_joins
|
|
528
561
|
]
|
|
529
562
|
if j
|
|
530
563
|
],
|