pytrilogy 0.0.2.8__py3-none-any.whl → 0.0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/RECORD +32 -32
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -0
- trilogy/core/models.py +161 -59
- trilogy/core/optimization.py +44 -5
- trilogy/core/optimizations/inline_datasource.py +14 -8
- trilogy/core/optimizations/predicate_pushdown.py +73 -44
- trilogy/core/processing/concept_strategies_v3.py +69 -28
- trilogy/core/processing/node_generators/common.py +42 -16
- trilogy/core/processing/node_generators/filter_node.py +94 -48
- trilogy/core/processing/node_generators/group_node.py +3 -1
- trilogy/core/processing/node_generators/rowset_node.py +13 -54
- trilogy/core/processing/node_generators/select_node.py +10 -13
- trilogy/core/processing/node_generators/unnest_node.py +5 -3
- trilogy/core/processing/node_generators/window_node.py +23 -2
- trilogy/core/processing/nodes/__init__.py +34 -6
- trilogy/core/processing/nodes/base_node.py +67 -13
- trilogy/core/processing/nodes/filter_node.py +3 -0
- trilogy/core/processing/nodes/group_node.py +4 -5
- trilogy/core/processing/nodes/merge_node.py +1 -11
- trilogy/core/processing/nodes/select_node_v2.py +1 -0
- trilogy/core/processing/utility.py +46 -14
- trilogy/core/query_processor.py +48 -21
- trilogy/dialect/base.py +28 -15
- trilogy/dialect/duckdb.py +1 -1
- trilogy/parsing/parse_engine.py +39 -2
- trilogy/parsing/trilogy.lark +3 -1
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.8.dist-info → pytrilogy-0.0.2.10.dist-info}/top_level.txt +0 -0
|
@@ -6,7 +6,7 @@ from .window_node import WindowNode
|
|
|
6
6
|
from .base_node import StrategyNode, NodeJoin
|
|
7
7
|
from .unnest_node import UnnestNode
|
|
8
8
|
from pydantic import BaseModel, Field, ConfigDict
|
|
9
|
-
from trilogy.core.models import Concept, Environment
|
|
9
|
+
from trilogy.core.models import Concept, Environment, WhereClause
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class History(BaseModel):
|
|
@@ -15,23 +15,42 @@ class History(BaseModel):
|
|
|
15
15
|
started: set[str] = Field(default_factory=set)
|
|
16
16
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
17
17
|
|
|
18
|
-
def _concepts_to_lookup(
|
|
18
|
+
def _concepts_to_lookup(
|
|
19
|
+
self,
|
|
20
|
+
search: list[Concept],
|
|
21
|
+
accept_partial: bool,
|
|
22
|
+
conditions: WhereClause | None = None,
|
|
23
|
+
) -> str:
|
|
24
|
+
if conditions:
|
|
25
|
+
return (
|
|
26
|
+
"-".join([c.address for c in search])
|
|
27
|
+
+ str(accept_partial)
|
|
28
|
+
+ str(conditions)
|
|
29
|
+
)
|
|
19
30
|
return "-".join([c.address for c in search]) + str(accept_partial)
|
|
20
31
|
|
|
21
32
|
def search_to_history(
|
|
22
|
-
self,
|
|
33
|
+
self,
|
|
34
|
+
search: list[Concept],
|
|
35
|
+
accept_partial: bool,
|
|
36
|
+
output: StrategyNode | None,
|
|
37
|
+
conditions: WhereClause | None = None,
|
|
23
38
|
):
|
|
24
|
-
self.history[
|
|
39
|
+
self.history[
|
|
40
|
+
self._concepts_to_lookup(search, accept_partial, conditions=conditions)
|
|
41
|
+
] = output
|
|
25
42
|
|
|
26
43
|
def get_history(
|
|
27
44
|
self,
|
|
28
45
|
search: list[Concept],
|
|
46
|
+
conditions: WhereClause | None = None,
|
|
29
47
|
accept_partial: bool = False,
|
|
30
48
|
parent_key: str = "",
|
|
31
49
|
) -> StrategyNode | None | bool:
|
|
32
50
|
key = self._concepts_to_lookup(
|
|
33
51
|
search,
|
|
34
52
|
accept_partial,
|
|
53
|
+
conditions,
|
|
35
54
|
)
|
|
36
55
|
if parent_key and parent_key == key:
|
|
37
56
|
raise ValueError(
|
|
@@ -48,11 +67,13 @@ class History(BaseModel):
|
|
|
48
67
|
self,
|
|
49
68
|
search: list[Concept],
|
|
50
69
|
accept_partial: bool = False,
|
|
70
|
+
conditions: WhereClause | None = None,
|
|
51
71
|
):
|
|
52
72
|
self.started.add(
|
|
53
73
|
self._concepts_to_lookup(
|
|
54
74
|
search,
|
|
55
|
-
accept_partial,
|
|
75
|
+
accept_partial=accept_partial,
|
|
76
|
+
conditions=conditions,
|
|
56
77
|
)
|
|
57
78
|
)
|
|
58
79
|
|
|
@@ -60,11 +81,13 @@ class History(BaseModel):
|
|
|
60
81
|
self,
|
|
61
82
|
search: list[Concept],
|
|
62
83
|
accept_partial: bool = False,
|
|
84
|
+
conditions: WhereClause | None = None,
|
|
63
85
|
):
|
|
64
86
|
return (
|
|
65
87
|
self._concepts_to_lookup(
|
|
66
88
|
search,
|
|
67
89
|
accept_partial,
|
|
90
|
+
conditions=conditions,
|
|
68
91
|
)
|
|
69
92
|
in self.started
|
|
70
93
|
)
|
|
@@ -76,6 +99,7 @@ class History(BaseModel):
|
|
|
76
99
|
accept_partial: bool,
|
|
77
100
|
fail_if_not_found: bool,
|
|
78
101
|
accept_partial_optional: bool,
|
|
102
|
+
conditions: WhereClause | None = None,
|
|
79
103
|
) -> str:
|
|
80
104
|
return (
|
|
81
105
|
str(main.address)
|
|
@@ -84,6 +108,7 @@ class History(BaseModel):
|
|
|
84
108
|
+ str(accept_partial)
|
|
85
109
|
+ str(fail_if_not_found)
|
|
86
110
|
+ str(accept_partial_optional)
|
|
111
|
+
+ str(conditions)
|
|
87
112
|
)
|
|
88
113
|
|
|
89
114
|
def gen_select_node(
|
|
@@ -97,6 +122,7 @@ class History(BaseModel):
|
|
|
97
122
|
fail_if_not_found: bool = False,
|
|
98
123
|
accept_partial: bool = False,
|
|
99
124
|
accept_partial_optional: bool = False,
|
|
125
|
+
conditions: WhereClause | None = None,
|
|
100
126
|
) -> StrategyNode | None:
|
|
101
127
|
from trilogy.core.processing.node_generators.select_node import gen_select_node
|
|
102
128
|
|
|
@@ -105,7 +131,8 @@ class History(BaseModel):
|
|
|
105
131
|
local_optional,
|
|
106
132
|
accept_partial,
|
|
107
133
|
fail_if_not_found,
|
|
108
|
-
accept_partial_optional,
|
|
134
|
+
accept_partial_optional=accept_partial_optional,
|
|
135
|
+
conditions=conditions,
|
|
109
136
|
)
|
|
110
137
|
if fingerprint in self.select_history:
|
|
111
138
|
return self.select_history[fingerprint]
|
|
@@ -119,6 +146,7 @@ class History(BaseModel):
|
|
|
119
146
|
accept_partial=accept_partial,
|
|
120
147
|
accept_partial_optional=accept_partial_optional,
|
|
121
148
|
source_concepts=source_concepts,
|
|
149
|
+
conditions=conditions,
|
|
122
150
|
)
|
|
123
151
|
self.select_history[fingerprint] = gen
|
|
124
152
|
return gen
|
|
@@ -17,6 +17,7 @@ from trilogy.core.models import (
|
|
|
17
17
|
from trilogy.core.enums import Purpose, JoinType, PurposeLineage, Granularity
|
|
18
18
|
from trilogy.utility import unique
|
|
19
19
|
from dataclasses import dataclass
|
|
20
|
+
from trilogy.core.enums import BooleanOperator
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
def concept_list_to_grain(
|
|
@@ -71,7 +72,6 @@ def resolve_concept_map(
|
|
|
71
72
|
concept_map[concept.address].add(input)
|
|
72
73
|
elif concept.address not in concept_map:
|
|
73
74
|
concept_map[concept.address].add(input)
|
|
74
|
-
|
|
75
75
|
# second loop, include partials
|
|
76
76
|
for input in inputs:
|
|
77
77
|
for concept in input.output_concepts:
|
|
@@ -92,14 +92,31 @@ def resolve_concept_map(
|
|
|
92
92
|
return concept_map
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
def get_all_parent_partial(
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
95
|
+
def get_all_parent_partial(
|
|
96
|
+
all_concepts: List[Concept], parents: List["StrategyNode"]
|
|
97
|
+
) -> List[Concept]:
|
|
98
|
+
return unique(
|
|
99
|
+
[
|
|
100
|
+
c
|
|
101
|
+
for c in all_concepts
|
|
102
|
+
if len(
|
|
103
|
+
[
|
|
104
|
+
p
|
|
105
|
+
for p in parents
|
|
106
|
+
if c.address in [x.address for x in p.partial_concepts]
|
|
107
|
+
]
|
|
108
|
+
)
|
|
109
|
+
>= 1
|
|
110
|
+
and all(
|
|
111
|
+
[
|
|
112
|
+
c.address in p.partial_lcl
|
|
113
|
+
for p in parents
|
|
114
|
+
if c.address in p.output_lcl
|
|
115
|
+
]
|
|
116
|
+
)
|
|
117
|
+
],
|
|
118
|
+
"address",
|
|
119
|
+
)
|
|
103
120
|
|
|
104
121
|
|
|
105
122
|
class StrategyNode:
|
|
@@ -137,7 +154,7 @@ class StrategyNode:
|
|
|
137
154
|
self.partial_concepts = partial_concepts or get_all_parent_partial(
|
|
138
155
|
self.output_concepts, self.parents
|
|
139
156
|
)
|
|
140
|
-
|
|
157
|
+
|
|
141
158
|
self.depth = depth
|
|
142
159
|
self.conditions = conditions
|
|
143
160
|
self.grain = grain
|
|
@@ -146,13 +163,50 @@ class StrategyNode:
|
|
|
146
163
|
self.hidden_concepts = hidden_concepts or []
|
|
147
164
|
self.existence_concepts = existence_concepts or []
|
|
148
165
|
self.virtual_output_concepts = virtual_output_concepts or []
|
|
166
|
+
self.validate_parents()
|
|
167
|
+
|
|
168
|
+
def add_parents(self, parents: list["StrategyNode"]):
|
|
169
|
+
self.parents += parents
|
|
170
|
+
self.validate_parents()
|
|
171
|
+
|
|
172
|
+
def add_condition(self, condition: Conditional | Comparison | Parenthetical):
|
|
173
|
+
if self.conditions:
|
|
174
|
+
self.conditions = Conditional(
|
|
175
|
+
left=self.conditions, right=condition, operator=BooleanOperator.AND
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
self.conditions = condition
|
|
179
|
+
|
|
180
|
+
def validate_parents(self):
|
|
181
|
+
# validate parents exist
|
|
182
|
+
# assign partial values where needed
|
|
149
183
|
for parent in self.parents:
|
|
150
184
|
if not parent:
|
|
151
185
|
raise SyntaxError("Unresolvable parent")
|
|
152
186
|
|
|
187
|
+
# TODO: make this accurate
|
|
188
|
+
if self.parents:
|
|
189
|
+
self.partial_concepts = get_all_parent_partial(
|
|
190
|
+
self.output_concepts, self.parents
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
self.partial_lcl = LooseConceptList(concepts=self.partial_concepts)
|
|
194
|
+
|
|
153
195
|
def add_output_concepts(self, concepts: List[Concept]):
|
|
154
196
|
for concept in concepts:
|
|
155
|
-
self.
|
|
197
|
+
if concept.address not in self.output_lcl.addresses:
|
|
198
|
+
self.output_concepts.append(concept)
|
|
199
|
+
self.output_lcl = LooseConceptList(concepts=self.output_concepts)
|
|
200
|
+
self.rebuild_cache()
|
|
201
|
+
|
|
202
|
+
def add_existence_concepts(self, concepts: List[Concept]):
|
|
203
|
+
for concept in concepts:
|
|
204
|
+
if concept.address not in [x.address for x in self.output_concepts]:
|
|
205
|
+
self.existence_concepts.append(concept)
|
|
206
|
+
self.rebuild_cache()
|
|
207
|
+
|
|
208
|
+
def set_output_concepts(self, concepts: List[Concept]):
|
|
209
|
+
self.output_concepts = concepts
|
|
156
210
|
self.output_lcl = LooseConceptList(concepts=self.output_concepts)
|
|
157
211
|
self.rebuild_cache()
|
|
158
212
|
|
|
@@ -195,8 +249,8 @@ class StrategyNode:
|
|
|
195
249
|
grain = self.grain if self.grain else Grain(components=self.output_concepts)
|
|
196
250
|
source_map = resolve_concept_map(
|
|
197
251
|
parent_sources,
|
|
198
|
-
self.output_concepts,
|
|
199
|
-
self.input_concepts + self.existence_concepts,
|
|
252
|
+
targets=self.output_concepts,
|
|
253
|
+
inherited_inputs=self.input_concepts + self.existence_concepts,
|
|
200
254
|
)
|
|
201
255
|
return QueryDatasource(
|
|
202
256
|
input_concepts=self.input_concepts,
|
|
@@ -36,6 +36,7 @@ class FilterNode(StrategyNode):
|
|
|
36
36
|
partial_concepts: List[Concept] | None = None,
|
|
37
37
|
force_group: bool | None = False,
|
|
38
38
|
grain: Grain | None = None,
|
|
39
|
+
existence_concepts: List[Concept] | None = None,
|
|
39
40
|
):
|
|
40
41
|
super().__init__(
|
|
41
42
|
output_concepts=output_concepts,
|
|
@@ -49,6 +50,7 @@ class FilterNode(StrategyNode):
|
|
|
49
50
|
partial_concepts=partial_concepts,
|
|
50
51
|
force_group=force_group,
|
|
51
52
|
grain=grain,
|
|
53
|
+
existence_concepts=existence_concepts,
|
|
52
54
|
)
|
|
53
55
|
|
|
54
56
|
def copy(self) -> "FilterNode":
|
|
@@ -64,4 +66,5 @@ class FilterNode(StrategyNode):
|
|
|
64
66
|
partial_concepts=list(self.partial_concepts),
|
|
65
67
|
force_group=self.force_group,
|
|
66
68
|
grain=self.grain,
|
|
69
|
+
existence_concepts=list(self.existence_concepts),
|
|
67
70
|
)
|
|
@@ -39,6 +39,7 @@ class GroupNode(StrategyNode):
|
|
|
39
39
|
partial_concepts: Optional[List[Concept]] = None,
|
|
40
40
|
force_group: bool | None = None,
|
|
41
41
|
conditions: Conditional | Comparison | Parenthetical | None = None,
|
|
42
|
+
existence_concepts: List[Concept] | None = None,
|
|
42
43
|
):
|
|
43
44
|
super().__init__(
|
|
44
45
|
input_concepts=input_concepts,
|
|
@@ -51,6 +52,7 @@ class GroupNode(StrategyNode):
|
|
|
51
52
|
partial_concepts=partial_concepts,
|
|
52
53
|
force_group=force_group,
|
|
53
54
|
conditions=conditions,
|
|
55
|
+
existence_concepts=existence_concepts,
|
|
54
56
|
)
|
|
55
57
|
|
|
56
58
|
def _resolve(self) -> QueryDatasource:
|
|
@@ -128,7 +130,7 @@ class GroupNode(StrategyNode):
|
|
|
128
130
|
if self.conditions
|
|
129
131
|
else self.output_concepts
|
|
130
132
|
),
|
|
131
|
-
inherited_inputs=self.input_concepts,
|
|
133
|
+
inherited_inputs=self.input_concepts + self.existence_concepts,
|
|
132
134
|
),
|
|
133
135
|
joins=[],
|
|
134
136
|
grain=grain,
|
|
@@ -137,10 +139,6 @@ class GroupNode(StrategyNode):
|
|
|
137
139
|
)
|
|
138
140
|
# if there is a condition on a group node and it's not scalar
|
|
139
141
|
# inject an additional CTE
|
|
140
|
-
if self.conditions:
|
|
141
|
-
logger.info("CONDITIONS")
|
|
142
|
-
logger.info(str(self.conditions))
|
|
143
|
-
logger.info(is_scalar_condition(self.conditions))
|
|
144
142
|
if self.conditions and not is_scalar_condition(self.conditions):
|
|
145
143
|
base.condition = None
|
|
146
144
|
base.output_concepts = self.output_concepts + self.conditions.row_arguments
|
|
@@ -173,4 +171,5 @@ class GroupNode(StrategyNode):
|
|
|
173
171
|
partial_concepts=list(self.partial_concepts),
|
|
174
172
|
force_group=self.force_group,
|
|
175
173
|
conditions=self.conditions,
|
|
174
|
+
existence_concepts=list(self.existence_concepts),
|
|
176
175
|
)
|
|
@@ -138,16 +138,6 @@ class MergeNode(StrategyNode):
|
|
|
138
138
|
continue
|
|
139
139
|
final_joins.append(join)
|
|
140
140
|
self.node_joins = final_joins
|
|
141
|
-
partial_lookup: list[Concept] = []
|
|
142
|
-
non_partial: List[Concept] = []
|
|
143
|
-
for node in parents or []:
|
|
144
|
-
partial_lookup += node.partial_concepts
|
|
145
|
-
non_partial += [
|
|
146
|
-
x for x in node.output_concepts if x not in node.partial_concepts
|
|
147
|
-
]
|
|
148
|
-
|
|
149
|
-
final_partial = [x for x in partial_lookup if x not in non_partial]
|
|
150
|
-
self.partial_concepts = final_partial
|
|
151
141
|
|
|
152
142
|
def translate_node_joins(self, node_joins: List[NodeJoin]) -> List[BaseJoin]:
|
|
153
143
|
joins = []
|
|
@@ -265,7 +255,6 @@ class MergeNode(StrategyNode):
|
|
|
265
255
|
for x in final_datasets
|
|
266
256
|
if all([y in self.existence_concepts for y in x.output_concepts])
|
|
267
257
|
]
|
|
268
|
-
|
|
269
258
|
if len(merged.keys()) == 1:
|
|
270
259
|
final: QueryDatasource | Datasource = list(merged.values())[0]
|
|
271
260
|
if (
|
|
@@ -378,4 +367,5 @@ class MergeNode(StrategyNode):
|
|
|
378
367
|
node_joins=self.node_joins,
|
|
379
368
|
join_concepts=list(self.join_concepts) if self.join_concepts else None,
|
|
380
369
|
force_join_type=self.force_join_type,
|
|
370
|
+
existence_concepts=list(self.existence_concepts),
|
|
381
371
|
)
|
|
@@ -166,6 +166,7 @@ class SelectNode(StrategyNode):
|
|
|
166
166
|
resolution = self.resolve_from_provided_datasource()
|
|
167
167
|
if resolution:
|
|
168
168
|
return resolution
|
|
169
|
+
|
|
169
170
|
required = [c.address for c in self.all_concepts]
|
|
170
171
|
raise NoDatasourceException(
|
|
171
172
|
f"Could not find any way to resolve datasources for required concepts {required} with derivation {[x.derivation for x in self.all_concepts]}"
|
|
@@ -20,7 +20,7 @@ from trilogy.core.models import (
|
|
|
20
20
|
DataType,
|
|
21
21
|
)
|
|
22
22
|
|
|
23
|
-
from trilogy.core.enums import Purpose, Granularity
|
|
23
|
+
from trilogy.core.enums import Purpose, Granularity, BooleanOperator
|
|
24
24
|
from trilogy.core.constants import CONSTANT_DATASET
|
|
25
25
|
from enum import Enum
|
|
26
26
|
from trilogy.utility import unique
|
|
@@ -62,7 +62,7 @@ def create_log_lambda(prefix: str, depth: int, logger: Logger):
|
|
|
62
62
|
pad = padding(depth)
|
|
63
63
|
|
|
64
64
|
def log_lambda(msg: str):
|
|
65
|
-
logger.info(f"{pad}
|
|
65
|
+
logger.info(f"{pad}{prefix} {msg}")
|
|
66
66
|
|
|
67
67
|
return log_lambda
|
|
68
68
|
|
|
@@ -328,14 +328,6 @@ def get_disconnected_components(
|
|
|
328
328
|
for datasource, concepts in concept_map.items():
|
|
329
329
|
graph.add_node(datasource, type=NodeType.NODE)
|
|
330
330
|
for concept in concepts:
|
|
331
|
-
# TODO: determine if this is the right way to handle things
|
|
332
|
-
# if concept.derivation in (PurposeLineage.FILTER, PurposeLineage.WINDOW):
|
|
333
|
-
# if isinstance(concept.lineage, FilterItem):
|
|
334
|
-
# graph.add_node(concept.lineage.content.address, type=NodeType.CONCEPT)
|
|
335
|
-
# graph.add_edge(datasource, concept.lineage.content.address)
|
|
336
|
-
# if isinstance(concept.lineage, WindowItem):
|
|
337
|
-
# graph.add_node(concept.lineage.content.address, type=NodeType.CONCEPT)
|
|
338
|
-
# graph.add_edge(datasource, concept.lineage.content.address)
|
|
339
331
|
graph.add_node(concept.address, type=NodeType.CONCEPT)
|
|
340
332
|
graph.add_edge(datasource, concept.address)
|
|
341
333
|
all_concepts.add(concept)
|
|
@@ -363,18 +355,58 @@ def is_scalar_condition(
|
|
|
363
355
|
| MagicConstants
|
|
364
356
|
| DataType
|
|
365
357
|
),
|
|
358
|
+
materialized: set[str] | None = None,
|
|
366
359
|
) -> bool:
|
|
367
360
|
if isinstance(element, Parenthetical):
|
|
368
|
-
return is_scalar_condition(element.content)
|
|
361
|
+
return is_scalar_condition(element.content, materialized)
|
|
369
362
|
elif isinstance(element, SubselectComparison):
|
|
370
363
|
return True
|
|
371
364
|
elif isinstance(element, Comparison):
|
|
372
|
-
return is_scalar_condition(element.left) and is_scalar_condition(
|
|
365
|
+
return is_scalar_condition(element.left, materialized) and is_scalar_condition(
|
|
366
|
+
element.right, materialized
|
|
367
|
+
)
|
|
373
368
|
elif isinstance(element, Function):
|
|
374
369
|
if element.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
|
|
375
370
|
return False
|
|
371
|
+
elif isinstance(element, Concept):
|
|
372
|
+
if materialized and element.address in materialized:
|
|
373
|
+
return True
|
|
374
|
+
if element.lineage and isinstance(element.lineage, AggregateWrapper):
|
|
375
|
+
return is_scalar_condition(element.lineage, materialized)
|
|
376
|
+
return True
|
|
376
377
|
elif isinstance(element, AggregateWrapper):
|
|
377
|
-
return is_scalar_condition(element.function)
|
|
378
|
+
return is_scalar_condition(element.function, materialized)
|
|
378
379
|
elif isinstance(element, Conditional):
|
|
379
|
-
return is_scalar_condition(element.left) and is_scalar_condition(
|
|
380
|
+
return is_scalar_condition(element.left, materialized) and is_scalar_condition(
|
|
381
|
+
element.right, materialized
|
|
382
|
+
)
|
|
380
383
|
return True
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def decompose_condition(
|
|
387
|
+
conditional: Conditional | Comparison | Parenthetical,
|
|
388
|
+
) -> list[SubselectComparison | Comparison | Conditional | Parenthetical]:
|
|
389
|
+
chunks: list[SubselectComparison | Comparison | Conditional | Parenthetical] = []
|
|
390
|
+
if not isinstance(conditional, Conditional):
|
|
391
|
+
return [conditional]
|
|
392
|
+
if conditional.operator == BooleanOperator.AND:
|
|
393
|
+
if not (
|
|
394
|
+
isinstance(
|
|
395
|
+
conditional.left,
|
|
396
|
+
(SubselectComparison, Comparison, Conditional, Parenthetical),
|
|
397
|
+
)
|
|
398
|
+
and isinstance(
|
|
399
|
+
conditional.right,
|
|
400
|
+
(SubselectComparison, Comparison, Conditional, Parenthetical),
|
|
401
|
+
)
|
|
402
|
+
):
|
|
403
|
+
chunks.append(conditional)
|
|
404
|
+
else:
|
|
405
|
+
for val in [conditional.left, conditional.right]:
|
|
406
|
+
if isinstance(val, Conditional):
|
|
407
|
+
chunks.extend(decompose_condition(val))
|
|
408
|
+
else:
|
|
409
|
+
chunks.append(val)
|
|
410
|
+
else:
|
|
411
|
+
chunks.append(conditional)
|
|
412
|
+
return chunks
|
trilogy/core/query_processor.py
CHANGED
|
@@ -4,9 +4,9 @@ from trilogy.core.env_processor import generate_graph
|
|
|
4
4
|
from trilogy.core.graph_models import ReferenceGraph
|
|
5
5
|
from trilogy.core.constants import CONSTANT_DATASET
|
|
6
6
|
from trilogy.core.processing.concept_strategies_v3 import source_query_concepts
|
|
7
|
-
from trilogy.core.enums import SelectFiltering
|
|
7
|
+
from trilogy.core.enums import SelectFiltering, BooleanOperator
|
|
8
8
|
from trilogy.constants import CONFIG, DEFAULT_NAMESPACE
|
|
9
|
-
from trilogy.core.processing.nodes import GroupNode, SelectNode, StrategyNode
|
|
9
|
+
from trilogy.core.processing.nodes import GroupNode, SelectNode, StrategyNode, History
|
|
10
10
|
from trilogy.core.models import (
|
|
11
11
|
Concept,
|
|
12
12
|
Environment,
|
|
@@ -24,6 +24,7 @@ from trilogy.core.models import (
|
|
|
24
24
|
Datasource,
|
|
25
25
|
BaseJoin,
|
|
26
26
|
InstantiatedUnnestJoin,
|
|
27
|
+
Conditional,
|
|
27
28
|
)
|
|
28
29
|
|
|
29
30
|
from trilogy.utility import unique
|
|
@@ -260,7 +261,7 @@ def datasource_to_ctes(
|
|
|
260
261
|
|
|
261
262
|
human_id = generate_cte_name(query_datasource.full_name, name_map)
|
|
262
263
|
logger.info(
|
|
263
|
-
f"Finished building source map for {human_id} with {len(parents)} parents, have {source_map}, query_datasource had non-empty keys {[k for k, v in query_datasource.source_map.items() if v]} "
|
|
264
|
+
f"Finished building source map for {human_id} with {len(parents)} parents, have {source_map}, query_datasource had non-empty keys {[k for k, v in query_datasource.source_map.items() if v]} and existence had non-empty keys {[k for k, v in query_datasource.existence_source_map.items() if v]} "
|
|
264
265
|
)
|
|
265
266
|
final_joins = [
|
|
266
267
|
x
|
|
@@ -307,7 +308,10 @@ def datasource_to_ctes(
|
|
|
307
308
|
|
|
308
309
|
|
|
309
310
|
def append_existence_check(
|
|
310
|
-
node: StrategyNode,
|
|
311
|
+
node: StrategyNode,
|
|
312
|
+
environment: Environment,
|
|
313
|
+
graph: ReferenceGraph,
|
|
314
|
+
history: History | None = None,
|
|
311
315
|
):
|
|
312
316
|
# we if we have a where clause doing an existence check
|
|
313
317
|
# treat that as separate subquery
|
|
@@ -318,25 +322,22 @@ def append_existence_check(
|
|
|
318
322
|
logger.info(
|
|
319
323
|
f"{LOGGER_PREFIX} fetching existance clause inputs {[str(c) for c in subselect]}"
|
|
320
324
|
)
|
|
321
|
-
eds = source_query_concepts(
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
for x in final_eds.output_concepts:
|
|
327
|
-
if x.address not in first_parent.existence_source_map:
|
|
328
|
-
first_parent.existence_source_map[x.address] = {final_eds}
|
|
325
|
+
eds = source_query_concepts(
|
|
326
|
+
[*subselect], environment=environment, g=graph, history=history
|
|
327
|
+
)
|
|
328
|
+
node.add_parents([eds])
|
|
329
|
+
node.add_existence_concepts([*subselect])
|
|
329
330
|
|
|
330
331
|
|
|
331
|
-
def
|
|
332
|
+
def get_query_node(
|
|
332
333
|
environment: Environment,
|
|
333
334
|
statement: SelectStatement | MultiSelectStatement,
|
|
334
335
|
graph: Optional[ReferenceGraph] = None,
|
|
335
|
-
|
|
336
|
-
) ->
|
|
336
|
+
history: History | None = None,
|
|
337
|
+
) -> StrategyNode:
|
|
337
338
|
graph = graph or generate_graph(environment)
|
|
338
339
|
logger.info(
|
|
339
|
-
f"{LOGGER_PREFIX} getting source datasource for query with output {[str(c) for c in statement.output_components]}"
|
|
340
|
+
f"{LOGGER_PREFIX} getting source datasource for query with filtering {statement.where_clause_category} and output {[str(c) for c in statement.output_components]}"
|
|
340
341
|
)
|
|
341
342
|
if not statement.output_components:
|
|
342
343
|
raise ValueError(f"Statement has no output components {statement}")
|
|
@@ -353,22 +354,28 @@ def get_query_datasources(
|
|
|
353
354
|
)
|
|
354
355
|
nest_where = True
|
|
355
356
|
|
|
356
|
-
ods = source_query_concepts(
|
|
357
|
+
ods: StrategyNode = source_query_concepts(
|
|
357
358
|
search_concepts,
|
|
358
359
|
environment=environment,
|
|
359
360
|
g=graph,
|
|
361
|
+
conditions=(statement.where_clause if statement.where_clause else None),
|
|
362
|
+
history=history,
|
|
360
363
|
)
|
|
361
|
-
|
|
364
|
+
if not ods:
|
|
365
|
+
raise ValueError(
|
|
366
|
+
f"Could not find source query concepts for {[x.address for x in search_concepts]}"
|
|
367
|
+
)
|
|
368
|
+
ds: StrategyNode
|
|
362
369
|
if nest_where and statement.where_clause:
|
|
363
370
|
if not all_aggregate:
|
|
364
371
|
ods.conditions = statement.where_clause.conditional
|
|
365
|
-
ods.output_concepts =
|
|
372
|
+
ods.output_concepts = statement.output_components
|
|
366
373
|
# ods.hidden_concepts = where_delta
|
|
367
374
|
ods.rebuild_cache()
|
|
368
|
-
append_existence_check(ods, environment, graph)
|
|
375
|
+
append_existence_check(ods, environment, graph, history)
|
|
369
376
|
ds = GroupNode(
|
|
370
377
|
output_concepts=statement.output_components,
|
|
371
|
-
input_concepts=
|
|
378
|
+
input_concepts=statement.output_components,
|
|
372
379
|
parents=[ods],
|
|
373
380
|
environment=ods.environment,
|
|
374
381
|
g=ods.g,
|
|
@@ -390,7 +397,26 @@ def get_query_datasources(
|
|
|
390
397
|
|
|
391
398
|
else:
|
|
392
399
|
ds = ods
|
|
400
|
+
if statement.having_clause:
|
|
401
|
+
if ds.conditions:
|
|
402
|
+
ds.conditions = Conditional(
|
|
403
|
+
left=ds.conditions,
|
|
404
|
+
right=statement.having_clause.conditional,
|
|
405
|
+
operator=BooleanOperator.AND,
|
|
406
|
+
)
|
|
407
|
+
else:
|
|
408
|
+
ds.conditions = statement.having_clause.conditional
|
|
409
|
+
return ds
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def get_query_datasources(
|
|
413
|
+
environment: Environment,
|
|
414
|
+
statement: SelectStatement | MultiSelectStatement,
|
|
415
|
+
graph: Optional[ReferenceGraph] = None,
|
|
416
|
+
hooks: Optional[List[BaseHook]] = None,
|
|
417
|
+
) -> QueryDatasource:
|
|
393
418
|
|
|
419
|
+
ds = get_query_node(environment, statement, graph)
|
|
394
420
|
final_qds = ds.resolve()
|
|
395
421
|
if hooks:
|
|
396
422
|
for hook in hooks:
|
|
@@ -475,6 +501,7 @@ def process_query(
|
|
|
475
501
|
grain=statement.grain,
|
|
476
502
|
limit=statement.limit,
|
|
477
503
|
where_clause=statement.where_clause,
|
|
504
|
+
having_clause=statement.having_clause,
|
|
478
505
|
output_columns=statement.output_components,
|
|
479
506
|
ctes=final_ctes,
|
|
480
507
|
base=root_cte,
|
trilogy/dialect/base.py
CHANGED
|
@@ -2,7 +2,7 @@ from typing import List, Union, Optional, Dict, Any, Sequence, Callable
|
|
|
2
2
|
|
|
3
3
|
from jinja2 import Template
|
|
4
4
|
|
|
5
|
-
from trilogy.core.processing.utility import is_scalar_condition
|
|
5
|
+
from trilogy.core.processing.utility import is_scalar_condition, decompose_condition
|
|
6
6
|
from trilogy.constants import CONFIG, logger, MagicConstants
|
|
7
7
|
from trilogy.core.internal import DEFAULT_CONCEPTS
|
|
8
8
|
from trilogy.core.enums import (
|
|
@@ -386,14 +386,19 @@ class BaseDialect:
|
|
|
386
386
|
e.right.address,
|
|
387
387
|
[
|
|
388
388
|
INVALID_REFERENCE_STRING(
|
|
389
|
-
f"Missing source reference to {e.right.
|
|
389
|
+
f"Missing source reference to {e.right.address}"
|
|
390
390
|
)
|
|
391
391
|
],
|
|
392
392
|
)
|
|
393
393
|
else:
|
|
394
394
|
lookup = lookup_cte.existence_source_map[e.right.address]
|
|
395
|
-
|
|
396
|
-
|
|
395
|
+
if len(lookup) > 0:
|
|
396
|
+
target = lookup[0]
|
|
397
|
+
else:
|
|
398
|
+
target = INVALID_REFERENCE_STRING(
|
|
399
|
+
f"Missing source CTE for {e.right.address}"
|
|
400
|
+
)
|
|
401
|
+
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} (select {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} from {target} where {target}.{self.QUOTE_CHARACTER}{e.right.safe_address}{self.QUOTE_CHARACTER} is not null)"
|
|
397
402
|
elif isinstance(e.right, (ListWrapper, Parenthetical, list)):
|
|
398
403
|
return f"{self.render_expr(e.left, cte=cte, cte_map=cte_map)} {e.operator.value} {self.render_expr(e.right, cte=cte, cte_map=cte_map)}"
|
|
399
404
|
|
|
@@ -514,7 +519,9 @@ class BaseDialect:
|
|
|
514
519
|
):
|
|
515
520
|
source = f"{render_unnest(self.UNNEST_MODE, self.QUOTE_CHARACTER, cte.join_derived_concepts[0], self.render_concept_sql, cte)}"
|
|
516
521
|
# direct - eg DUCK DB - can be directly selected inline
|
|
517
|
-
elif
|
|
522
|
+
elif (
|
|
523
|
+
cte.join_derived_concepts and self.UNNEST_MODE == UnnestMode.DIRECT
|
|
524
|
+
):
|
|
518
525
|
source = None
|
|
519
526
|
else:
|
|
520
527
|
raise SyntaxError("CTE has joins but no from clause")
|
|
@@ -531,6 +538,20 @@ class BaseDialect:
|
|
|
531
538
|
final_joins = []
|
|
532
539
|
else:
|
|
533
540
|
final_joins = cte.joins or []
|
|
541
|
+
where: Conditional | Parenthetical | Comparison | None = None
|
|
542
|
+
having: Conditional | Parenthetical | Comparison | None = None
|
|
543
|
+
materialized = {x for x, v in cte.source_map.items() if v}
|
|
544
|
+
if cte.condition:
|
|
545
|
+
if is_scalar_condition(cte.condition, materialized=materialized):
|
|
546
|
+
where = cte.condition
|
|
547
|
+
else:
|
|
548
|
+
components = decompose_condition(cte.condition)
|
|
549
|
+
for x in components:
|
|
550
|
+
if is_scalar_condition(x, materialized=materialized):
|
|
551
|
+
where = where + x if where else x
|
|
552
|
+
else:
|
|
553
|
+
having = having + x if having else x
|
|
554
|
+
|
|
534
555
|
return CompiledCTE(
|
|
535
556
|
name=cte.name,
|
|
536
557
|
statement=self.SQL_TEMPLATE.render(
|
|
@@ -554,16 +575,8 @@ class BaseDialect:
|
|
|
554
575
|
]
|
|
555
576
|
if j
|
|
556
577
|
],
|
|
557
|
-
where=(
|
|
558
|
-
|
|
559
|
-
if cte.condition and is_scalar_condition(cte.condition)
|
|
560
|
-
else None
|
|
561
|
-
),
|
|
562
|
-
having=(
|
|
563
|
-
self.render_expr(cte.condition, cte)
|
|
564
|
-
if cte.condition and not is_scalar_condition(cte.condition)
|
|
565
|
-
else None
|
|
566
|
-
),
|
|
578
|
+
where=(self.render_expr(where, cte) if where else None),
|
|
579
|
+
having=(self.render_expr(having, cte) if having else None),
|
|
567
580
|
order_by=(
|
|
568
581
|
[self.render_order_item(i, cte) for i in cte.order_by.items]
|
|
569
582
|
if cte.order_by
|
trilogy/dialect/duckdb.py
CHANGED
|
@@ -36,7 +36,7 @@ FUNCTION_MAP = {
|
|
|
36
36
|
# we may return a static value
|
|
37
37
|
FUNCTION_GRAIN_MATCH_MAP = {
|
|
38
38
|
**FUNCTION_MAP,
|
|
39
|
-
FunctionType.COUNT: lambda args: "
|
|
39
|
+
FunctionType.COUNT: lambda args: f"{args[0]}",
|
|
40
40
|
FunctionType.SUM: lambda args: f"{args[0]}",
|
|
41
41
|
FunctionType.AVG: lambda args: f"{args[0]}",
|
|
42
42
|
}
|