pytrilogy 0.0.1.115__tar.gz → 0.0.1.116__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.115/pytrilogy.egg-info → pytrilogy-0.0.1.116}/PKG-INFO +1 -1
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116/pytrilogy.egg-info}/PKG-INFO +1 -1
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/pytrilogy.egg-info/SOURCES.txt +5 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_parsing.py +12 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/__init__.py +1 -1
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/constants.py +1 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/models.py +51 -0
- pytrilogy-0.0.1.116/trilogy/core/optimization.py +131 -0
- pytrilogy-0.0.1.116/trilogy/core/optimizations/__init__.py +11 -0
- pytrilogy-0.0.1.116/trilogy/core/optimizations/base_optimization.py +17 -0
- pytrilogy-0.0.1.116/trilogy/core/optimizations/inline_constant.py +29 -0
- pytrilogy-0.0.1.116/trilogy/core/optimizations/inline_datasource.py +54 -0
- pytrilogy-0.0.1.116/trilogy/core/optimizations/predicate_pushdown.py +88 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/dialect/base.py +24 -3
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/dialect/sql_server.py +8 -1
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/executor.py +25 -5
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/parsing/parse_engine.py +22 -4
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/parsing/render.py +5 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/parsing/trilogy.lark +4 -0
- pytrilogy-0.0.1.115/trilogy/core/optimization.py +0 -262
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/README.md +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/pyproject.toml +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/pytrilogy.egg-info/dependency_links.txt +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/pytrilogy.egg-info/entry_points.txt +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/pytrilogy.egg-info/requires.txt +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/pytrilogy.egg-info/top_level.txt +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/setup.cfg +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/setup.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_declarations.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_derived_concepts.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_discovery_nodes.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_environment.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_functions.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_imports.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_metadata.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_models.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_multi_join_assignments.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_partial_handling.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_query_processing.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_select.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_statements.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_undefined_concept.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/tests/test_where_clause.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/compiler.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/constants.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/enums.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/env_processor.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/environment_helpers.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/ergonomics.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/exceptions.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/functions.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/graph_models.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/internal.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/concept_strategies_v3.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/graph_utils.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/basic_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/common.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/concept_merge_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/filter_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/group_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/node_merge_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/rowset_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/select_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/unnest_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/window_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/base_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/filter_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/group_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/merge_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/select_node_v2.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/unnest_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/window_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/processing/utility.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/core/query_processor.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/dialect/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/dialect/bigquery.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/dialect/common.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/dialect/config.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/dialect/duckdb.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/dialect/enums.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/dialect/postgres.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/dialect/presto.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/dialect/snowflake.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/engine.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/hooks/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/hooks/base_hook.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/hooks/graph_hook.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/hooks/query_debugger.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/metadata/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/parser.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/parsing/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/parsing/common.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/parsing/config.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/parsing/exceptions.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/parsing/helpers.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/py.typed +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/scripts/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/scripts/trilogy.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.116}/trilogy/utility.py +0 -0
|
@@ -45,6 +45,11 @@ trilogy/core/internal.py
|
|
|
45
45
|
trilogy/core/models.py
|
|
46
46
|
trilogy/core/optimization.py
|
|
47
47
|
trilogy/core/query_processor.py
|
|
48
|
+
trilogy/core/optimizations/__init__.py
|
|
49
|
+
trilogy/core/optimizations/base_optimization.py
|
|
50
|
+
trilogy/core/optimizations/inline_constant.py
|
|
51
|
+
trilogy/core/optimizations/inline_datasource.py
|
|
52
|
+
trilogy/core/optimizations/predicate_pushdown.py
|
|
48
53
|
trilogy/core/processing/__init__.py
|
|
49
54
|
trilogy/core/processing/concept_strategies_v3.py
|
|
50
55
|
trilogy/core/processing/graph_utils.py
|
|
@@ -292,3 +292,15 @@ CASE WHEN dates.year BETWEEN 1883 AND 1900 THEN 'Lost Generation'
|
|
|
292
292
|
)
|
|
293
293
|
|
|
294
294
|
assert env2.concepts["dates.generation"].purpose == Purpose.PROPERTY
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def test_rawsql():
|
|
298
|
+
env, parsed = parse_text(
|
|
299
|
+
"""
|
|
300
|
+
raw_sql('''select 1''');
|
|
301
|
+
|
|
302
|
+
select 1 as test;
|
|
303
|
+
|
|
304
|
+
"""
|
|
305
|
+
)
|
|
306
|
+
assert parsed[0].text == "select 1"
|
|
@@ -1165,11 +1165,17 @@ class OrderBy(Namespaced, BaseModel):
|
|
|
1165
1165
|
return OrderBy(items=[x.with_namespace(namespace) for x in self.items])
|
|
1166
1166
|
|
|
1167
1167
|
|
|
1168
|
+
class RawSQLStatement(BaseModel):
|
|
1169
|
+
text: str
|
|
1170
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1171
|
+
|
|
1172
|
+
|
|
1168
1173
|
class SelectStatement(Namespaced, BaseModel):
|
|
1169
1174
|
selection: List[SelectItem]
|
|
1170
1175
|
where_clause: Optional["WhereClause"] = None
|
|
1171
1176
|
order_by: Optional[OrderBy] = None
|
|
1172
1177
|
limit: Optional[int] = None
|
|
1178
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1173
1179
|
|
|
1174
1180
|
def __str__(self):
|
|
1175
1181
|
from trilogy.parsing.render import render_query
|
|
@@ -1371,6 +1377,7 @@ class MultiSelectStatement(Namespaced, BaseModel):
|
|
|
1371
1377
|
where_clause: Optional["WhereClause"] = None
|
|
1372
1378
|
order_by: Optional[OrderBy] = None
|
|
1373
1379
|
limit: Optional[int] = None
|
|
1380
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1374
1381
|
|
|
1375
1382
|
def __repr__(self):
|
|
1376
1383
|
return "MultiSelect<" + " MERGE ".join([str(s) for s in self.selects]) + ">"
|
|
@@ -2038,6 +2045,40 @@ class CTE(BaseModel):
|
|
|
2038
2045
|
def validate_output_columns(cls, v):
|
|
2039
2046
|
return unique(v, "address")
|
|
2040
2047
|
|
|
2048
|
+
def inline_constant(self, concept: Concept):
|
|
2049
|
+
if not concept.derivation == PurposeLineage.CONSTANT:
|
|
2050
|
+
return False
|
|
2051
|
+
if not isinstance(concept.lineage, Function):
|
|
2052
|
+
return False
|
|
2053
|
+
if not concept.lineage.operator == FunctionType.CONSTANT:
|
|
2054
|
+
return False
|
|
2055
|
+
# remove the constant
|
|
2056
|
+
removed: set = set()
|
|
2057
|
+
if concept.address in self.source_map:
|
|
2058
|
+
removed = removed.union(self.source_map[concept.address])
|
|
2059
|
+
del self.source_map[concept.address]
|
|
2060
|
+
# if we've entirely removed the need to join to someplace to get the concept
|
|
2061
|
+
# drop the join as well.
|
|
2062
|
+
for removed_cte in removed:
|
|
2063
|
+
still_required = any([removed_cte in x for x in self.source_map.values()])
|
|
2064
|
+
if not still_required:
|
|
2065
|
+
self.joins = [
|
|
2066
|
+
join
|
|
2067
|
+
for join in self.joins
|
|
2068
|
+
if not isinstance(join, Join)
|
|
2069
|
+
or (
|
|
2070
|
+
join.right_cte.name != removed_cte
|
|
2071
|
+
and join.left_cte.name != removed_cte
|
|
2072
|
+
)
|
|
2073
|
+
]
|
|
2074
|
+
self.parent_ctes = [
|
|
2075
|
+
x for x in self.parent_ctes if x.name != removed_cte
|
|
2076
|
+
]
|
|
2077
|
+
if removed_cte == self.base_name_override:
|
|
2078
|
+
candidates = [x.name for x in self.parent_ctes]
|
|
2079
|
+
self.base_name_override = candidates[0] if candidates else None
|
|
2080
|
+
self.base_alias_override = candidates[0] if candidates else None
|
|
2081
|
+
|
|
2041
2082
|
def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
|
|
2042
2083
|
qds_being_inlined = parent.source
|
|
2043
2084
|
ds_being_inlined = qds_being_inlined.datasources[0]
|
|
@@ -2186,6 +2227,11 @@ class CTE(BaseModel):
|
|
|
2186
2227
|
and not self.group_to_grain
|
|
2187
2228
|
):
|
|
2188
2229
|
return False
|
|
2230
|
+
# if we don't need to source any concepts from anywhere
|
|
2231
|
+
# render without from
|
|
2232
|
+
# most likely to happen from inlining constants
|
|
2233
|
+
if not any([v for v in self.source_map.values()]):
|
|
2234
|
+
return False
|
|
2189
2235
|
if (
|
|
2190
2236
|
len(self.source.datasources) == 1
|
|
2191
2237
|
and self.source.datasources[0].name == CONSTANT_DATASET
|
|
@@ -3184,6 +3230,10 @@ class ProcessedShowStatement(BaseModel):
|
|
|
3184
3230
|
output_values: List[Union[Concept, Datasource, ProcessedQuery]]
|
|
3185
3231
|
|
|
3186
3232
|
|
|
3233
|
+
class ProcessedRawSQLStatement(BaseModel):
|
|
3234
|
+
text: str
|
|
3235
|
+
|
|
3236
|
+
|
|
3187
3237
|
class Limit(BaseModel):
|
|
3188
3238
|
count: int
|
|
3189
3239
|
|
|
@@ -3386,6 +3436,7 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
3386
3436
|
class PersistStatement(BaseModel):
|
|
3387
3437
|
datasource: Datasource
|
|
3388
3438
|
select: SelectStatement
|
|
3439
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
3389
3440
|
|
|
3390
3441
|
@property
|
|
3391
3442
|
def identifier(self):
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
SelectStatement,
|
|
4
|
+
PersistStatement,
|
|
5
|
+
MultiSelectStatement,
|
|
6
|
+
Conditional,
|
|
7
|
+
BooleanOperator,
|
|
8
|
+
)
|
|
9
|
+
from trilogy.core.enums import PurposeLineage
|
|
10
|
+
from trilogy.constants import logger, CONFIG
|
|
11
|
+
from trilogy.core.optimizations import (
|
|
12
|
+
OptimizationRule,
|
|
13
|
+
InlineConstant,
|
|
14
|
+
PredicatePushdown,
|
|
15
|
+
InlineDatasource,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
MAX_OPTIMIZATION_LOOPS = 100
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def filter_irrelevant_ctes(
|
|
23
|
+
input: list[CTE],
|
|
24
|
+
root_cte: CTE,
|
|
25
|
+
):
|
|
26
|
+
relevant_ctes = set()
|
|
27
|
+
|
|
28
|
+
def recurse(cte: CTE):
|
|
29
|
+
relevant_ctes.add(cte.name)
|
|
30
|
+
for cte in cte.parent_ctes:
|
|
31
|
+
recurse(cte)
|
|
32
|
+
|
|
33
|
+
recurse(root_cte)
|
|
34
|
+
return [cte for cte in input if cte.name in relevant_ctes]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def gen_inverse_map(input: list[CTE]) -> dict[str, list[CTE]]:
|
|
38
|
+
inverse_map: dict[str, list[CTE]] = {}
|
|
39
|
+
for cte in input:
|
|
40
|
+
for parent in cte.parent_ctes:
|
|
41
|
+
if parent.name not in inverse_map:
|
|
42
|
+
inverse_map[parent.name] = []
|
|
43
|
+
inverse_map[parent.name].append(cte)
|
|
44
|
+
return inverse_map
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def is_direct_return_eligible(
|
|
48
|
+
cte: CTE, select: SelectStatement | PersistStatement | MultiSelectStatement
|
|
49
|
+
) -> bool:
|
|
50
|
+
if isinstance(select, (PersistStatement, MultiSelectStatement)):
|
|
51
|
+
return False
|
|
52
|
+
derived_concepts = [
|
|
53
|
+
c for c in cte.source.output_concepts if c not in cte.source.input_concepts
|
|
54
|
+
]
|
|
55
|
+
eligible = True
|
|
56
|
+
conditions = (
|
|
57
|
+
set(x.address for x in select.where_clause.concept_arguments)
|
|
58
|
+
if select.where_clause
|
|
59
|
+
else set()
|
|
60
|
+
)
|
|
61
|
+
if conditions and select.limit:
|
|
62
|
+
return False
|
|
63
|
+
for x in derived_concepts:
|
|
64
|
+
if x.derivation == PurposeLineage.WINDOW:
|
|
65
|
+
return False
|
|
66
|
+
if x.derivation == PurposeLineage.UNNEST:
|
|
67
|
+
return False
|
|
68
|
+
if x.derivation == PurposeLineage.AGGREGATE:
|
|
69
|
+
if x.address in conditions:
|
|
70
|
+
return False
|
|
71
|
+
logger.info(
|
|
72
|
+
f"Upleveling output select to final CTE with derived_concepts {[x.address for x in derived_concepts]}"
|
|
73
|
+
)
|
|
74
|
+
return eligible
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def sort_select_output(cte: CTE, query: SelectStatement | MultiSelectStatement):
|
|
78
|
+
hidden_addresses = [c.address for c in query.hidden_components]
|
|
79
|
+
output_addresses = [
|
|
80
|
+
c.address for c in query.output_components if c.address not in hidden_addresses
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
mapping = {x.address: x for x in cte.output_columns}
|
|
84
|
+
|
|
85
|
+
new_output = []
|
|
86
|
+
for x in output_addresses:
|
|
87
|
+
new_output.append(mapping[x])
|
|
88
|
+
cte.output_columns = new_output
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def optimize_ctes(
|
|
92
|
+
input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
|
|
93
|
+
):
|
|
94
|
+
complete = False
|
|
95
|
+
REGISTERED_RULES: list["OptimizationRule"] = []
|
|
96
|
+
|
|
97
|
+
if CONFIG.optimizations.datasource_inlining:
|
|
98
|
+
REGISTERED_RULES.append(InlineDatasource())
|
|
99
|
+
if CONFIG.optimizations.predicate_pushdown:
|
|
100
|
+
REGISTERED_RULES.append(PredicatePushdown())
|
|
101
|
+
if CONFIG.optimizations.constant_inlining:
|
|
102
|
+
REGISTERED_RULES.append(InlineConstant())
|
|
103
|
+
loops = 0
|
|
104
|
+
while not complete and (loops <= MAX_OPTIMIZATION_LOOPS):
|
|
105
|
+
actions_taken = False
|
|
106
|
+
for rule in REGISTERED_RULES:
|
|
107
|
+
for cte in input:
|
|
108
|
+
inverse_map = gen_inverse_map(input)
|
|
109
|
+
actions_taken = rule.optimize(cte, inverse_map)
|
|
110
|
+
complete = not actions_taken
|
|
111
|
+
loops += 1
|
|
112
|
+
|
|
113
|
+
if CONFIG.optimizations.direct_return and is_direct_return_eligible(
|
|
114
|
+
root_cte, select
|
|
115
|
+
):
|
|
116
|
+
root_cte.order_by = select.order_by
|
|
117
|
+
root_cte.limit = select.limit
|
|
118
|
+
if select.where_clause:
|
|
119
|
+
|
|
120
|
+
if root_cte.condition:
|
|
121
|
+
root_cte.condition = Conditional(
|
|
122
|
+
left=root_cte.condition,
|
|
123
|
+
operator=BooleanOperator.AND,
|
|
124
|
+
right=select.where_clause.conditional,
|
|
125
|
+
)
|
|
126
|
+
else:
|
|
127
|
+
root_cte.condition = select.where_clause.conditional
|
|
128
|
+
root_cte.requires_nesting = False
|
|
129
|
+
sort_select_output(root_cte, select)
|
|
130
|
+
|
|
131
|
+
return filter_irrelevant_ctes(input, root_cte)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .inline_constant import InlineConstant
|
|
2
|
+
from .inline_datasource import InlineDatasource
|
|
3
|
+
from .predicate_pushdown import PredicatePushdown
|
|
4
|
+
from .base_optimization import OptimizationRule
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"OptimizationRule",
|
|
8
|
+
"InlineConstant",
|
|
9
|
+
"InlineDatasource",
|
|
10
|
+
"PredicatePushdown",
|
|
11
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
)
|
|
4
|
+
from trilogy.constants import logger
|
|
5
|
+
from abc import ABC
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class OptimizationRule(ABC):
|
|
9
|
+
|
|
10
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
11
|
+
raise NotImplementedError
|
|
12
|
+
|
|
13
|
+
def log(self, message: str):
|
|
14
|
+
logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
|
|
15
|
+
|
|
16
|
+
def debug(self, message: str):
|
|
17
|
+
logger.debug(f"[Optimization][{self.__class__.__name__}] {message}")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
Concept,
|
|
4
|
+
)
|
|
5
|
+
from trilogy.core.enums import PurposeLineage
|
|
6
|
+
|
|
7
|
+
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class InlineConstant(OptimizationRule):
|
|
11
|
+
|
|
12
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
13
|
+
|
|
14
|
+
to_inline: list[Concept] = []
|
|
15
|
+
for x in cte.source.input_concepts:
|
|
16
|
+
if x.address not in cte.source_map:
|
|
17
|
+
continue
|
|
18
|
+
if x.derivation == PurposeLineage.CONSTANT:
|
|
19
|
+
self.log(f"Found constant {x.address} on {cte.name}")
|
|
20
|
+
to_inline.append(x)
|
|
21
|
+
if to_inline:
|
|
22
|
+
inlined = False
|
|
23
|
+
for c in to_inline:
|
|
24
|
+
self.log(f"Inlining constant {c.address} on {cte.name}")
|
|
25
|
+
test = cte.inline_constant(c)
|
|
26
|
+
if test:
|
|
27
|
+
inlined = True
|
|
28
|
+
return inlined
|
|
29
|
+
return False
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
Datasource,
|
|
4
|
+
)
|
|
5
|
+
|
|
6
|
+
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class InlineDatasource(OptimizationRule):
|
|
10
|
+
|
|
11
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
12
|
+
if not cte.parent_ctes:
|
|
13
|
+
return False
|
|
14
|
+
|
|
15
|
+
optimized = False
|
|
16
|
+
self.log(
|
|
17
|
+
f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
|
|
18
|
+
)
|
|
19
|
+
to_inline: list[CTE] = []
|
|
20
|
+
force_group = False
|
|
21
|
+
for parent_cte in cte.parent_ctes:
|
|
22
|
+
if not parent_cte.is_root_datasource:
|
|
23
|
+
self.log(f"parent {parent_cte.name} is not root")
|
|
24
|
+
continue
|
|
25
|
+
if parent_cte.parent_ctes:
|
|
26
|
+
self.log(f"parent {parent_cte.name} has parents")
|
|
27
|
+
continue
|
|
28
|
+
raw_root = parent_cte.source.datasources[0]
|
|
29
|
+
if not isinstance(raw_root, Datasource):
|
|
30
|
+
self.log(f"parent {parent_cte.name} is not datasource")
|
|
31
|
+
continue
|
|
32
|
+
root: Datasource = raw_root
|
|
33
|
+
if not root.can_be_inlined:
|
|
34
|
+
self.log(f"parent {parent_cte.name} datasource is not inlineable")
|
|
35
|
+
continue
|
|
36
|
+
root_outputs = {x.address for x in root.output_concepts}
|
|
37
|
+
cte_outputs = {x.address for x in parent_cte.output_columns}
|
|
38
|
+
grain_components = {x.address for x in root.grain.components}
|
|
39
|
+
if not cte_outputs.issubset(root_outputs):
|
|
40
|
+
self.log(f"Not all {parent_cte.name} outputs are found on datasource")
|
|
41
|
+
continue
|
|
42
|
+
if not grain_components.issubset(cte_outputs):
|
|
43
|
+
self.log("Not all datasource components in cte outputs, forcing group")
|
|
44
|
+
force_group = True
|
|
45
|
+
to_inline.append(parent_cte)
|
|
46
|
+
|
|
47
|
+
for replaceable in to_inline:
|
|
48
|
+
|
|
49
|
+
result = cte.inline_parent_datasource(replaceable, force_group=force_group)
|
|
50
|
+
if result:
|
|
51
|
+
self.log(f"Inlined parent {replaceable.name}")
|
|
52
|
+
else:
|
|
53
|
+
self.log(f"Failed to inline {replaceable.name}")
|
|
54
|
+
return optimized
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
Conditional,
|
|
4
|
+
BooleanOperator,
|
|
5
|
+
)
|
|
6
|
+
from trilogy.constants import logger
|
|
7
|
+
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def decompose_condition(conditional: Conditional):
|
|
11
|
+
chunks = []
|
|
12
|
+
if conditional.operator == BooleanOperator.AND:
|
|
13
|
+
for val in [conditional.left, conditional.right]:
|
|
14
|
+
if isinstance(val, Conditional):
|
|
15
|
+
chunks.extend(decompose_condition(val))
|
|
16
|
+
else:
|
|
17
|
+
chunks.append(val)
|
|
18
|
+
else:
|
|
19
|
+
chunks.append(conditional)
|
|
20
|
+
return chunks
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def is_child_of(a, comparison):
|
|
24
|
+
if isinstance(comparison, Conditional):
|
|
25
|
+
return (
|
|
26
|
+
is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
|
|
27
|
+
) and comparison.operator == BooleanOperator.AND
|
|
28
|
+
return comparison == a
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PredicatePushdown(OptimizationRule):
|
|
32
|
+
|
|
33
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
34
|
+
|
|
35
|
+
if not cte.parent_ctes:
|
|
36
|
+
self.debug(f"No parent CTEs for {cte.name}")
|
|
37
|
+
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
optimized = False
|
|
41
|
+
if not cte.condition:
|
|
42
|
+
self.debug(f"No CTE condition for {cte.name}")
|
|
43
|
+
return False
|
|
44
|
+
self.log(
|
|
45
|
+
f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
|
|
46
|
+
)
|
|
47
|
+
if isinstance(cte.condition, Conditional):
|
|
48
|
+
candidates = cte.condition.decompose()
|
|
49
|
+
else:
|
|
50
|
+
candidates = [cte.condition]
|
|
51
|
+
logger.info(f"Have {len(candidates)} candidates to try to push down")
|
|
52
|
+
for candidate in candidates:
|
|
53
|
+
conditions = {x.address for x in candidate.concept_arguments}
|
|
54
|
+
for parent_cte in cte.parent_ctes:
|
|
55
|
+
materialized = {k for k, v in parent_cte.source_map.items() if v != []}
|
|
56
|
+
if conditions.issubset(materialized):
|
|
57
|
+
if all(
|
|
58
|
+
[
|
|
59
|
+
is_child_of(candidate, child.condition)
|
|
60
|
+
for child in inverse_map[parent_cte.name]
|
|
61
|
+
]
|
|
62
|
+
):
|
|
63
|
+
self.log(
|
|
64
|
+
f"All concepts are found on {parent_cte.name} and all it's children include same filter; pushing up filter"
|
|
65
|
+
)
|
|
66
|
+
if parent_cte.condition:
|
|
67
|
+
parent_cte.condition = Conditional(
|
|
68
|
+
left=parent_cte.condition,
|
|
69
|
+
operator=BooleanOperator.AND,
|
|
70
|
+
right=candidate,
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
parent_cte.condition = candidate
|
|
74
|
+
optimized = True
|
|
75
|
+
else:
|
|
76
|
+
logger.info("conditions not subset of parent materialized")
|
|
77
|
+
|
|
78
|
+
if all(
|
|
79
|
+
[
|
|
80
|
+
is_child_of(cte.condition, parent_cte.condition)
|
|
81
|
+
for parent_cte in cte.parent_ctes
|
|
82
|
+
]
|
|
83
|
+
):
|
|
84
|
+
self.log("All parents have same filter, removing filter")
|
|
85
|
+
cte.condition = None
|
|
86
|
+
optimized = True
|
|
87
|
+
|
|
88
|
+
return optimized
|
|
@@ -44,6 +44,8 @@ from trilogy.core.models import (
|
|
|
44
44
|
RowsetDerivationStatement,
|
|
45
45
|
ConceptDeclarationStatement,
|
|
46
46
|
ImportStatement,
|
|
47
|
+
RawSQLStatement,
|
|
48
|
+
ProcessedRawSQLStatement,
|
|
47
49
|
)
|
|
48
50
|
from trilogy.core.query_processor import process_query, process_persist
|
|
49
51
|
from trilogy.dialect.common import render_join
|
|
@@ -558,11 +560,20 @@ class BaseDialect:
|
|
|
558
560
|
| RowsetDerivationStatement
|
|
559
561
|
| MergeStatement
|
|
560
562
|
| ImportStatement
|
|
563
|
+
| RawSQLStatement
|
|
561
564
|
],
|
|
562
565
|
hooks: Optional[List[BaseHook]] = None,
|
|
563
|
-
) -> List[
|
|
566
|
+
) -> List[
|
|
567
|
+
ProcessedQuery
|
|
568
|
+
| ProcessedQueryPersist
|
|
569
|
+
| ProcessedShowStatement
|
|
570
|
+
| ProcessedRawSQLStatement
|
|
571
|
+
]:
|
|
564
572
|
output: List[
|
|
565
|
-
ProcessedQuery
|
|
573
|
+
ProcessedQuery
|
|
574
|
+
| ProcessedQueryPersist
|
|
575
|
+
| ProcessedShowStatement
|
|
576
|
+
| ProcessedRawSQLStatement
|
|
566
577
|
] = []
|
|
567
578
|
for statement in statements:
|
|
568
579
|
if isinstance(statement, PersistStatement):
|
|
@@ -604,6 +615,8 @@ class BaseDialect:
|
|
|
604
615
|
)
|
|
605
616
|
else:
|
|
606
617
|
raise NotImplementedError(type(statement))
|
|
618
|
+
elif isinstance(statement, RawSQLStatement):
|
|
619
|
+
output.append(ProcessedRawSQLStatement(text=statement.text))
|
|
607
620
|
elif isinstance(
|
|
608
621
|
statement,
|
|
609
622
|
(
|
|
@@ -619,10 +632,18 @@ class BaseDialect:
|
|
|
619
632
|
return output
|
|
620
633
|
|
|
621
634
|
def compile_statement(
|
|
622
|
-
self,
|
|
635
|
+
self,
|
|
636
|
+
query: (
|
|
637
|
+
ProcessedQuery
|
|
638
|
+
| ProcessedQueryPersist
|
|
639
|
+
| ProcessedShowStatement
|
|
640
|
+
| ProcessedRawSQLStatement
|
|
641
|
+
),
|
|
623
642
|
) -> str:
|
|
624
643
|
if isinstance(query, ProcessedShowStatement):
|
|
625
644
|
return ";\n".join([str(x) for x in query.output_values])
|
|
645
|
+
elif isinstance(query, ProcessedRawSQLStatement):
|
|
646
|
+
return query.text
|
|
626
647
|
select_columns: Dict[str, str] = {}
|
|
627
648
|
cte_output_map = {}
|
|
628
649
|
selected = set()
|
|
@@ -9,6 +9,7 @@ from trilogy.core.models import (
|
|
|
9
9
|
ProcessedQuery,
|
|
10
10
|
ProcessedQueryPersist,
|
|
11
11
|
ProcessedShowStatement,
|
|
12
|
+
ProcessedRawSQLStatement,
|
|
12
13
|
)
|
|
13
14
|
from trilogy.dialect.base import BaseDialect
|
|
14
15
|
|
|
@@ -81,7 +82,13 @@ class SqlServerDialect(BaseDialect):
|
|
|
81
82
|
SQL_TEMPLATE = TSQL_TEMPLATE
|
|
82
83
|
|
|
83
84
|
def compile_statement(
|
|
84
|
-
self,
|
|
85
|
+
self,
|
|
86
|
+
query: (
|
|
87
|
+
ProcessedQuery
|
|
88
|
+
| ProcessedQueryPersist
|
|
89
|
+
| ProcessedShowStatement
|
|
90
|
+
| ProcessedRawSQLStatement
|
|
91
|
+
),
|
|
85
92
|
) -> str:
|
|
86
93
|
base = super().compile_statement(query)
|
|
87
94
|
if isinstance(base, (ProcessedQuery, ProcessedQueryPersist)):
|
|
@@ -9,6 +9,8 @@ from trilogy.core.models import (
|
|
|
9
9
|
ProcessedQuery,
|
|
10
10
|
ProcessedShowStatement,
|
|
11
11
|
ProcessedQueryPersist,
|
|
12
|
+
ProcessedRawSQLStatement,
|
|
13
|
+
RawSQLStatement,
|
|
12
14
|
MultiSelectStatement,
|
|
13
15
|
SelectStatement,
|
|
14
16
|
PersistStatement,
|
|
@@ -112,6 +114,10 @@ class Executor(object):
|
|
|
112
114
|
)
|
|
113
115
|
return self.execute_query(sql[0])
|
|
114
116
|
|
|
117
|
+
@execute_query.register
|
|
118
|
+
def _(self, query: RawSQLStatement) -> CursorResult:
|
|
119
|
+
return self.execute_raw_sql(query.text)
|
|
120
|
+
|
|
115
121
|
@execute_query.register
|
|
116
122
|
def _(self, query: ProcessedShowStatement) -> CursorResult:
|
|
117
123
|
return generate_result_set(
|
|
@@ -123,6 +129,10 @@ class Executor(object):
|
|
|
123
129
|
],
|
|
124
130
|
)
|
|
125
131
|
|
|
132
|
+
@execute_query.register
|
|
133
|
+
def _(self, query: ProcessedRawSQLStatement) -> CursorResult:
|
|
134
|
+
return self.execute_raw_sql(query.text)
|
|
135
|
+
|
|
126
136
|
@execute_query.register
|
|
127
137
|
def _(self, query: ProcessedQuery) -> CursorResult:
|
|
128
138
|
sql = self.generator.compile_statement(query)
|
|
@@ -195,7 +205,12 @@ class Executor(object):
|
|
|
195
205
|
|
|
196
206
|
def parse_text(
|
|
197
207
|
self, command: str, persist: bool = False
|
|
198
|
-
) -> List[
|
|
208
|
+
) -> List[
|
|
209
|
+
ProcessedQuery
|
|
210
|
+
| ProcessedQueryPersist
|
|
211
|
+
| ProcessedShowStatement
|
|
212
|
+
| ProcessedRawSQLStatement
|
|
213
|
+
]:
|
|
199
214
|
"""Process a preql text command"""
|
|
200
215
|
_, parsed = parse_text(command, self.environment)
|
|
201
216
|
generatable = [
|
|
@@ -208,6 +223,7 @@ class Executor(object):
|
|
|
208
223
|
PersistStatement,
|
|
209
224
|
MultiSelectStatement,
|
|
210
225
|
ShowStatement,
|
|
226
|
+
RawSQLStatement,
|
|
211
227
|
),
|
|
212
228
|
)
|
|
213
229
|
]
|
|
@@ -222,10 +238,13 @@ class Executor(object):
|
|
|
222
238
|
sql.append(x)
|
|
223
239
|
return sql
|
|
224
240
|
|
|
225
|
-
def parse_text_generator(
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
241
|
+
def parse_text_generator(self, command: str, persist: bool = False) -> Generator[
|
|
242
|
+
ProcessedQuery
|
|
243
|
+
| ProcessedQueryPersist
|
|
244
|
+
| ProcessedShowStatement
|
|
245
|
+
| ProcessedRawSQLStatement,
|
|
246
|
+
None,
|
|
247
|
+
None,
|
|
229
248
|
]:
|
|
230
249
|
"""Process a preql text command"""
|
|
231
250
|
_, parsed = parse_text(command, self.environment)
|
|
@@ -239,6 +258,7 @@ class Executor(object):
|
|
|
239
258
|
PersistStatement,
|
|
240
259
|
MultiSelectStatement,
|
|
241
260
|
ShowStatement,
|
|
261
|
+
RawSQLStatement,
|
|
242
262
|
),
|
|
243
263
|
)
|
|
244
264
|
]
|