pytrilogy 0.0.2.57__py3-none-any.whl → 0.0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
- pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +2 -2
- trilogy/core/enums.py +1 -7
- trilogy/core/env_processor.py +17 -5
- trilogy/core/environment_helpers.py +11 -25
- trilogy/core/exceptions.py +4 -0
- trilogy/core/functions.py +695 -261
- trilogy/core/graph_models.py +10 -10
- trilogy/core/internal.py +11 -2
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2110 -0
- trilogy/core/models/build.py +1845 -0
- trilogy/core/models/build_environment.py +151 -0
- trilogy/core/models/core.py +370 -0
- trilogy/core/models/datasource.py +297 -0
- trilogy/core/models/environment.py +696 -0
- trilogy/core/models/execute.py +931 -0
- trilogy/core/optimization.py +17 -22
- trilogy/core/optimizations/base_optimization.py +1 -1
- trilogy/core/optimizations/inline_constant.py +6 -6
- trilogy/core/optimizations/inline_datasource.py +17 -11
- trilogy/core/optimizations/predicate_pushdown.py +17 -16
- trilogy/core/processing/concept_strategies_v3.py +181 -146
- trilogy/core/processing/graph_utils.py +1 -1
- trilogy/core/processing/node_generators/basic_node.py +19 -18
- trilogy/core/processing/node_generators/common.py +51 -45
- trilogy/core/processing/node_generators/filter_node.py +26 -13
- trilogy/core/processing/node_generators/group_node.py +26 -21
- trilogy/core/processing/node_generators/group_to_node.py +13 -10
- trilogy/core/processing/node_generators/multiselect_node.py +60 -43
- trilogy/core/processing/node_generators/node_merge_node.py +76 -38
- trilogy/core/processing/node_generators/rowset_node.py +59 -36
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
- trilogy/core/processing/node_generators/select_merge_node.py +161 -64
- trilogy/core/processing/node_generators/select_node.py +13 -13
- trilogy/core/processing/node_generators/union_node.py +12 -11
- trilogy/core/processing/node_generators/unnest_node.py +9 -7
- trilogy/core/processing/node_generators/window_node.py +19 -16
- trilogy/core/processing/nodes/__init__.py +21 -18
- trilogy/core/processing/nodes/base_node.py +92 -77
- trilogy/core/processing/nodes/filter_node.py +19 -13
- trilogy/core/processing/nodes/group_node.py +55 -40
- trilogy/core/processing/nodes/merge_node.py +47 -38
- trilogy/core/processing/nodes/select_node_v2.py +54 -40
- trilogy/core/processing/nodes/union_node.py +5 -7
- trilogy/core/processing/nodes/unnest_node.py +7 -11
- trilogy/core/processing/nodes/window_node.py +9 -4
- trilogy/core/processing/utility.py +108 -80
- trilogy/core/query_processor.py +67 -49
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +413 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +30 -0
- trilogy/core/statements/execute.py +42 -0
- trilogy/dialect/base.py +152 -111
- trilogy/dialect/common.py +9 -10
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/enums.py +4 -2
- trilogy/dialect/presto.py +1 -1
- trilogy/dialect/sql_server.py +1 -1
- trilogy/executor.py +44 -32
- trilogy/hooks/base_hook.py +6 -4
- trilogy/hooks/query_debugger.py +110 -93
- trilogy/parser.py +1 -1
- trilogy/parsing/common.py +303 -64
- trilogy/parsing/parse_engine.py +263 -617
- trilogy/parsing/render.py +50 -26
- trilogy/scripts/trilogy.py +2 -1
- pytrilogy-0.0.2.57.dist-info/RECORD +0 -87
- trilogy/core/models.py +0 -4960
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
trilogy/core/optimization.py
CHANGED
|
@@ -1,12 +1,9 @@
|
|
|
1
1
|
from trilogy.constants import CONFIG, logger
|
|
2
|
-
from trilogy.core.enums import BooleanOperator,
|
|
3
|
-
from trilogy.core.models import (
|
|
4
|
-
|
|
5
|
-
Conditional,
|
|
6
|
-
MultiSelectStatement,
|
|
7
|
-
SelectStatement,
|
|
8
|
-
UnionCTE,
|
|
2
|
+
from trilogy.core.enums import BooleanOperator, Derivation
|
|
3
|
+
from trilogy.core.models.build import (
|
|
4
|
+
BuildConditional,
|
|
9
5
|
)
|
|
6
|
+
from trilogy.core.models.execute import CTE, UnionCTE
|
|
10
7
|
from trilogy.core.optimizations import (
|
|
11
8
|
InlineConstant,
|
|
12
9
|
InlineDatasource,
|
|
@@ -15,6 +12,7 @@ from trilogy.core.optimizations import (
|
|
|
15
12
|
PredicatePushdownRemove,
|
|
16
13
|
)
|
|
17
14
|
from trilogy.core.processing.utility import sort_select_output
|
|
15
|
+
from trilogy.core.statements.author import MultiSelectStatement, SelectStatement
|
|
18
16
|
|
|
19
17
|
MAX_OPTIMIZATION_LOOPS = 100
|
|
20
18
|
|
|
@@ -136,33 +134,29 @@ def is_direct_return_eligible(cte: CTE | UnionCTE) -> CTE | UnionCTE | None:
|
|
|
136
134
|
|
|
137
135
|
assert isinstance(cte, CTE)
|
|
138
136
|
derived_concepts = [
|
|
139
|
-
c
|
|
140
|
-
for c in cte.source.output_concepts + cte.source.hidden_concepts
|
|
141
|
-
if c not in cte.source.input_concepts
|
|
137
|
+
c for c in cte.source.output_concepts if c not in cte.source.input_concepts
|
|
142
138
|
]
|
|
143
139
|
|
|
144
140
|
parent_derived_concepts = [
|
|
145
141
|
c
|
|
146
142
|
for c in direct_parent.source.output_concepts
|
|
147
|
-
+ direct_parent.source.hidden_concepts
|
|
148
143
|
if c not in direct_parent.source.input_concepts
|
|
149
144
|
]
|
|
150
145
|
condition_arguments = cte.condition.row_arguments if cte.condition else []
|
|
151
146
|
for x in derived_concepts:
|
|
152
|
-
if x.derivation ==
|
|
147
|
+
if x.derivation == Derivation.WINDOW:
|
|
153
148
|
return None
|
|
154
|
-
if x.derivation ==
|
|
149
|
+
if x.derivation == Derivation.UNNEST:
|
|
155
150
|
return None
|
|
156
|
-
if x.derivation ==
|
|
151
|
+
if x.derivation == Derivation.AGGREGATE:
|
|
157
152
|
return None
|
|
158
153
|
for x in parent_derived_concepts:
|
|
159
154
|
if x.address not in condition_arguments:
|
|
160
155
|
continue
|
|
161
|
-
if x.derivation ==
|
|
156
|
+
if x.derivation == Derivation.UNNEST:
|
|
162
157
|
return None
|
|
163
|
-
if x.derivation ==
|
|
158
|
+
if x.derivation == Derivation.WINDOW:
|
|
164
159
|
return None
|
|
165
|
-
|
|
166
160
|
logger.info(
|
|
167
161
|
f"[Optimization][EarlyReturn] Removing redundant output CTE with derived_concepts {[x.address for x in derived_concepts]}"
|
|
168
162
|
)
|
|
@@ -180,12 +174,12 @@ def optimize_ctes(
|
|
|
180
174
|
):
|
|
181
175
|
direct_parent.order_by = root_cte.order_by
|
|
182
176
|
direct_parent.limit = root_cte.limit
|
|
183
|
-
direct_parent.hidden_concepts = (
|
|
184
|
-
|
|
177
|
+
direct_parent.hidden_concepts = root_cte.hidden_concepts.union(
|
|
178
|
+
direct_parent.hidden_concepts
|
|
185
179
|
)
|
|
186
180
|
if root_cte.condition:
|
|
187
181
|
if direct_parent.condition:
|
|
188
|
-
direct_parent.condition =
|
|
182
|
+
direct_parent.condition = BuildConditional(
|
|
189
183
|
left=direct_parent.condition,
|
|
190
184
|
operator=BooleanOperator.AND,
|
|
191
185
|
right=root_cte.condition,
|
|
@@ -197,14 +191,15 @@ def optimize_ctes(
|
|
|
197
191
|
sort_select_output(root_cte, select)
|
|
198
192
|
|
|
199
193
|
REGISTERED_RULES: list["OptimizationRule"] = []
|
|
200
|
-
|
|
201
|
-
REGISTERED_RULES.append(InlineConstant())
|
|
194
|
+
|
|
202
195
|
if CONFIG.optimizations.datasource_inlining:
|
|
203
196
|
REGISTERED_RULES.append(InlineDatasource())
|
|
204
197
|
if CONFIG.optimizations.predicate_pushdown:
|
|
205
198
|
REGISTERED_RULES.append(PredicatePushdown())
|
|
206
199
|
if CONFIG.optimizations.predicate_pushdown:
|
|
207
200
|
REGISTERED_RULES.append(PredicatePushdownRemove())
|
|
201
|
+
if CONFIG.optimizations.constant_inlining:
|
|
202
|
+
REGISTERED_RULES.append(InlineConstant())
|
|
208
203
|
for rule in REGISTERED_RULES:
|
|
209
204
|
loops = 0
|
|
210
205
|
complete = False
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from trilogy.core.enums import
|
|
2
|
-
from trilogy.core.models import
|
|
1
|
+
from trilogy.core.enums import Derivation
|
|
2
|
+
from trilogy.core.models.build import BuildConcept
|
|
3
|
+
from trilogy.core.models.execute import (
|
|
3
4
|
CTE,
|
|
4
|
-
Concept,
|
|
5
5
|
UnionCTE,
|
|
6
6
|
)
|
|
7
7
|
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
@@ -14,11 +14,11 @@ class InlineConstant(OptimizationRule):
|
|
|
14
14
|
if isinstance(cte, UnionCTE):
|
|
15
15
|
return any(self.optimize(x, inverse_map) for x in cte.internal_ctes)
|
|
16
16
|
|
|
17
|
-
to_inline: list[
|
|
17
|
+
to_inline: list[BuildConcept] = []
|
|
18
18
|
for x in cte.source.input_concepts:
|
|
19
19
|
if x.address not in cte.source_map:
|
|
20
20
|
continue
|
|
21
|
-
if x.derivation ==
|
|
21
|
+
if x.derivation == Derivation.CONSTANT:
|
|
22
22
|
self.log(f"Found constant {x.address} on {cte.name}")
|
|
23
23
|
to_inline.append(x)
|
|
24
24
|
if to_inline:
|
|
@@ -27,7 +27,7 @@ class InlineConstant(OptimizationRule):
|
|
|
27
27
|
self.log(f"Attempting to inline constant {c.address} on {cte.name}")
|
|
28
28
|
test = cte.inline_constant(c)
|
|
29
29
|
if test:
|
|
30
|
-
self.log(f"Successfully inlined constant to {cte.name}")
|
|
30
|
+
self.log(f"Successfully inlined constant {c.address} to {cte.name}")
|
|
31
31
|
inlined = True
|
|
32
32
|
else:
|
|
33
33
|
self.log(f"Could not inline constant to {cte.name}")
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
|
|
3
3
|
from trilogy.constants import CONFIG
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
# from trilogy.core.models.datasource import Datasource
|
|
6
|
+
from trilogy.core.models.build import BuildDatasource
|
|
7
|
+
from trilogy.core.models.execute import (
|
|
5
8
|
CTE,
|
|
6
|
-
Datasource,
|
|
7
9
|
UnionCTE,
|
|
8
10
|
)
|
|
9
11
|
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
@@ -35,21 +37,25 @@ class InlineDatasource(OptimizationRule):
|
|
|
35
37
|
if isinstance(parent_cte, UnionCTE):
|
|
36
38
|
continue
|
|
37
39
|
if not parent_cte.is_root_datasource:
|
|
38
|
-
self.debug(f"parent {parent_cte.name} is not root")
|
|
40
|
+
self.debug(f"Cannot inline: parent {parent_cte.name} is not root")
|
|
39
41
|
continue
|
|
40
42
|
if parent_cte.parent_ctes:
|
|
41
|
-
self.debug(f"parent {parent_cte.name} has parents")
|
|
43
|
+
self.debug(f"Cannot inline: parent {parent_cte.name} has parents")
|
|
42
44
|
continue
|
|
43
45
|
if parent_cte.condition:
|
|
44
|
-
self.debug(
|
|
46
|
+
self.debug(
|
|
47
|
+
f"Cannot inline: parent {parent_cte.name} has condition, cannot be inlined"
|
|
48
|
+
)
|
|
45
49
|
continue
|
|
46
50
|
raw_root = parent_cte.source.datasources[0]
|
|
47
|
-
if not isinstance(raw_root,
|
|
48
|
-
self.debug(f"Parent {parent_cte.name} is not datasource")
|
|
51
|
+
if not isinstance(raw_root, BuildDatasource):
|
|
52
|
+
self.debug(f"Cannot inline: Parent {parent_cte.name} is not datasource")
|
|
49
53
|
continue
|
|
50
|
-
root:
|
|
54
|
+
root: BuildDatasource = raw_root
|
|
51
55
|
if not root.can_be_inlined:
|
|
52
|
-
self.debug(
|
|
56
|
+
self.debug(
|
|
57
|
+
f"Cannot inline: Parent {parent_cte.name} datasource is not inlineable"
|
|
58
|
+
)
|
|
53
59
|
continue
|
|
54
60
|
root_outputs = {x.address for x in root.output_concepts}
|
|
55
61
|
inherited = {
|
|
@@ -58,12 +64,12 @@ class InlineDatasource(OptimizationRule):
|
|
|
58
64
|
if not inherited.issubset(root_outputs):
|
|
59
65
|
cte_missing = inherited - root_outputs
|
|
60
66
|
self.log(
|
|
61
|
-
f"Not all {parent_cte.name}
|
|
67
|
+
f"Cannot inline: Not all required inputs to {parent_cte.name} are found on datasource, missing {cte_missing}"
|
|
62
68
|
)
|
|
63
69
|
continue
|
|
64
70
|
if not root.grain.issubset(parent_cte.grain):
|
|
65
71
|
self.log(
|
|
66
|
-
f"{parent_cte.name} is at wrong grain to inline ({root.grain} vs {parent_cte.grain})"
|
|
72
|
+
f"Cannot inline: {parent_cte.name} is at wrong grain to inline ({root.grain} vs {parent_cte.grain})"
|
|
67
73
|
)
|
|
68
74
|
continue
|
|
69
75
|
to_inline.append(parent_cte)
|
|
@@ -1,14 +1,15 @@
|
|
|
1
|
-
from trilogy.core.
|
|
2
|
-
CTE,
|
|
1
|
+
from trilogy.core.enums import (
|
|
3
2
|
BooleanOperator,
|
|
4
|
-
Comparison,
|
|
5
|
-
ConceptArgs,
|
|
6
|
-
Conditional,
|
|
7
|
-
Datasource,
|
|
8
|
-
Parenthetical,
|
|
9
|
-
UnionCTE,
|
|
10
|
-
WindowItem,
|
|
11
3
|
)
|
|
4
|
+
from trilogy.core.models.build import (
|
|
5
|
+
BuildComparison,
|
|
6
|
+
BuildConceptArgs,
|
|
7
|
+
BuildConditional,
|
|
8
|
+
BuildDatasource,
|
|
9
|
+
BuildParenthetical,
|
|
10
|
+
BuildWindowItem,
|
|
11
|
+
)
|
|
12
|
+
from trilogy.core.models.execute import CTE, UnionCTE
|
|
12
13
|
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
13
14
|
from trilogy.core.processing.utility import is_scalar_condition
|
|
14
15
|
from trilogy.utility import unique
|
|
@@ -18,7 +19,7 @@ def is_child_of(a, comparison):
|
|
|
18
19
|
base = comparison == a
|
|
19
20
|
if base:
|
|
20
21
|
return True
|
|
21
|
-
if isinstance(comparison,
|
|
22
|
+
if isinstance(comparison, BuildConditional):
|
|
22
23
|
return (
|
|
23
24
|
is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
|
|
24
25
|
) and comparison.operator == BooleanOperator.AND
|
|
@@ -34,10 +35,10 @@ class PredicatePushdown(OptimizationRule):
|
|
|
34
35
|
self,
|
|
35
36
|
cte: CTE | UnionCTE,
|
|
36
37
|
parent_cte: CTE | UnionCTE,
|
|
37
|
-
candidate:
|
|
38
|
+
candidate: BuildConditional | BuildComparison | BuildParenthetical | None,
|
|
38
39
|
inverse_map: dict[str, list[CTE | UnionCTE]],
|
|
39
40
|
):
|
|
40
|
-
if not isinstance(candidate,
|
|
41
|
+
if not isinstance(candidate, BuildConceptArgs):
|
|
41
42
|
return False
|
|
42
43
|
if not isinstance(parent_cte, CTE):
|
|
43
44
|
return False
|
|
@@ -52,7 +53,7 @@ class PredicatePushdown(OptimizationRule):
|
|
|
52
53
|
concrete = [
|
|
53
54
|
x for x in parent_cte.output_columns if x.address in non_materialized
|
|
54
55
|
]
|
|
55
|
-
if any(isinstance(x.lineage,
|
|
56
|
+
if any(isinstance(x.lineage, BuildWindowItem) for x in concrete):
|
|
56
57
|
self.debug(
|
|
57
58
|
f"CTE {parent_cte.name} has window clause calculation, cannot push up to this without changing results"
|
|
58
59
|
)
|
|
@@ -88,7 +89,7 @@ class PredicatePushdown(OptimizationRule):
|
|
|
88
89
|
self.log("Parent condition is not scalar, not safe to push up")
|
|
89
90
|
return False
|
|
90
91
|
if parent_cte.condition:
|
|
91
|
-
parent_cte.condition =
|
|
92
|
+
parent_cte.condition = BuildConditional(
|
|
92
93
|
left=parent_cte.condition,
|
|
93
94
|
operator=BooleanOperator.AND,
|
|
94
95
|
right=candidate,
|
|
@@ -137,7 +138,7 @@ class PredicatePushdown(OptimizationRule):
|
|
|
137
138
|
self.debug(
|
|
138
139
|
f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
|
|
139
140
|
)
|
|
140
|
-
if isinstance(cte.condition,
|
|
141
|
+
if isinstance(cte.condition, BuildConditional):
|
|
141
142
|
candidates = cte.condition.decompose()
|
|
142
143
|
else:
|
|
143
144
|
candidates = [cte.condition]
|
|
@@ -216,7 +217,7 @@ class PredicatePushdownRemove(OptimizationRule):
|
|
|
216
217
|
for key, value in parent_filter_status.items()
|
|
217
218
|
if key not in existence_only
|
|
218
219
|
]
|
|
219
|
-
) and not any([isinstance(x,
|
|
220
|
+
) and not any([isinstance(x, BuildDatasource) for x in cte.source.datasources]):
|
|
220
221
|
self.log(
|
|
221
222
|
f"All parents of {cte.name} have same filter or are existence only inputs, removing filter from {cte.name}"
|
|
222
223
|
)
|