pytrilogy 0.0.1.114__tar.gz → 0.0.1.116__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.114/pytrilogy.egg-info → pytrilogy-0.0.1.116}/PKG-INFO +1 -1
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116/pytrilogy.egg-info}/PKG-INFO +1 -1
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/pytrilogy.egg-info/SOURCES.txt +5 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_parsing.py +41 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/__init__.py +1 -1
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/constants.py +1 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/models.py +54 -1
- pytrilogy-0.0.1.116/trilogy/core/optimization.py +131 -0
- pytrilogy-0.0.1.116/trilogy/core/optimizations/__init__.py +11 -0
- pytrilogy-0.0.1.116/trilogy/core/optimizations/base_optimization.py +17 -0
- pytrilogy-0.0.1.116/trilogy/core/optimizations/inline_constant.py +29 -0
- pytrilogy-0.0.1.116/trilogy/core/optimizations/inline_datasource.py +54 -0
- pytrilogy-0.0.1.116/trilogy/core/optimizations/predicate_pushdown.py +88 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/dialect/base.py +24 -3
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/dialect/sql_server.py +8 -1
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/executor.py +25 -5
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/parsing/parse_engine.py +28 -7
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/parsing/render.py +5 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/parsing/trilogy.lark +4 -0
- pytrilogy-0.0.1.114/trilogy/core/optimization.py +0 -262
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/README.md +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/pyproject.toml +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/pytrilogy.egg-info/dependency_links.txt +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/pytrilogy.egg-info/entry_points.txt +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/pytrilogy.egg-info/requires.txt +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/pytrilogy.egg-info/top_level.txt +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/setup.cfg +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/setup.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_declarations.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_derived_concepts.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_discovery_nodes.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_environment.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_functions.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_imports.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_metadata.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_models.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_multi_join_assignments.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_partial_handling.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_query_processing.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_select.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_statements.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_undefined_concept.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/tests/test_where_clause.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/compiler.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/__init__.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/constants.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/enums.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/env_processor.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/environment_helpers.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/ergonomics.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/exceptions.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/functions.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/graph_models.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/internal.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/__init__.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/concept_strategies_v3.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/graph_utils.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/__init__.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/basic_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/common.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/concept_merge_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/filter_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/group_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/node_merge_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/rowset_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/select_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/unnest_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/node_generators/window_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/__init__.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/base_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/filter_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/group_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/merge_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/select_node_v2.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/unnest_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/nodes/window_node.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/processing/utility.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/core/query_processor.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/dialect/__init__.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/dialect/bigquery.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/dialect/common.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/dialect/config.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/dialect/duckdb.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/dialect/enums.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/dialect/postgres.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/dialect/presto.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/dialect/snowflake.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/engine.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/hooks/__init__.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/hooks/base_hook.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/hooks/graph_hook.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/hooks/query_debugger.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/metadata/__init__.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/parser.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/parsing/__init__.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/parsing/common.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/parsing/config.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/parsing/exceptions.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/parsing/helpers.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/py.typed +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/scripts/__init__.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/scripts/trilogy.py +0 -0
- {pytrilogy-0.0.1.114 → pytrilogy-0.0.1.116}/trilogy/utility.py +0 -0
|
@@ -45,6 +45,11 @@ trilogy/core/internal.py
|
|
|
45
45
|
trilogy/core/models.py
|
|
46
46
|
trilogy/core/optimization.py
|
|
47
47
|
trilogy/core/query_processor.py
|
|
48
|
+
trilogy/core/optimizations/__init__.py
|
|
49
|
+
trilogy/core/optimizations/base_optimization.py
|
|
50
|
+
trilogy/core/optimizations/inline_constant.py
|
|
51
|
+
trilogy/core/optimizations/inline_datasource.py
|
|
52
|
+
trilogy/core/optimizations/predicate_pushdown.py
|
|
48
53
|
trilogy/core/processing/__init__.py
|
|
49
54
|
trilogy/core/processing/concept_strategies_v3.py
|
|
50
55
|
trilogy/core/processing/graph_utils.py
|
|
@@ -263,3 +263,44 @@ def test_the_comments():
|
|
|
263
263
|
assert isinstance(right, MagicConstants), type(right)
|
|
264
264
|
rendered = BaseDialect().render_expr(right)
|
|
265
265
|
assert rendered == "null"
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def test_purpose_nesting():
|
|
269
|
+
|
|
270
|
+
env, parsed = parse_text(
|
|
271
|
+
"""key year int;
|
|
272
|
+
"""
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
env2: Environment = Environment()
|
|
276
|
+
env2.add_import("dates", env)
|
|
277
|
+
|
|
278
|
+
env2, _ = parse_text(
|
|
279
|
+
"""
|
|
280
|
+
property <dates.year>.generation <-
|
|
281
|
+
CASE WHEN dates.year BETWEEN 1883 AND 1900 THEN 'Lost Generation'
|
|
282
|
+
WHEN dates.year BETWEEN 1901 AND 1927 THEN 'The Greatest Generation'
|
|
283
|
+
WHEN dates.year BETWEEN 1928 AND 1945 THEN 'The Silent Generation'
|
|
284
|
+
WHEN dates.year BETWEEN 1946 AND 1964 THEN 'Baby Boomer'
|
|
285
|
+
WHEN dates.year BETWEEN 1965 AND 1980 THEN 'Generation X'
|
|
286
|
+
WHEN dates.year BETWEEN 1981 AND 1996 THEN 'Millennials'
|
|
287
|
+
WHEN dates.year BETWEEN 1997 AND 2012 THEN 'Generation Z'
|
|
288
|
+
ELSE 'Unknown'
|
|
289
|
+
END;
|
|
290
|
+
""",
|
|
291
|
+
env2,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
assert env2.concepts["dates.generation"].purpose == Purpose.PROPERTY
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def test_rawsql():
|
|
298
|
+
env, parsed = parse_text(
|
|
299
|
+
"""
|
|
300
|
+
raw_sql('''select 1''');
|
|
301
|
+
|
|
302
|
+
select 1 as test;
|
|
303
|
+
|
|
304
|
+
"""
|
|
305
|
+
)
|
|
306
|
+
assert parsed[0].text == "select 1"
|
|
@@ -1165,11 +1165,17 @@ class OrderBy(Namespaced, BaseModel):
|
|
|
1165
1165
|
return OrderBy(items=[x.with_namespace(namespace) for x in self.items])
|
|
1166
1166
|
|
|
1167
1167
|
|
|
1168
|
+
class RawSQLStatement(BaseModel):
|
|
1169
|
+
text: str
|
|
1170
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1171
|
+
|
|
1172
|
+
|
|
1168
1173
|
class SelectStatement(Namespaced, BaseModel):
|
|
1169
1174
|
selection: List[SelectItem]
|
|
1170
1175
|
where_clause: Optional["WhereClause"] = None
|
|
1171
1176
|
order_by: Optional[OrderBy] = None
|
|
1172
1177
|
limit: Optional[int] = None
|
|
1178
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1173
1179
|
|
|
1174
1180
|
def __str__(self):
|
|
1175
1181
|
from trilogy.parsing.render import render_query
|
|
@@ -1371,6 +1377,7 @@ class MultiSelectStatement(Namespaced, BaseModel):
|
|
|
1371
1377
|
where_clause: Optional["WhereClause"] = None
|
|
1372
1378
|
order_by: Optional[OrderBy] = None
|
|
1373
1379
|
limit: Optional[int] = None
|
|
1380
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1374
1381
|
|
|
1375
1382
|
def __repr__(self):
|
|
1376
1383
|
return "MultiSelect<" + " MERGE ".join([str(s) for s in self.selects]) + ">"
|
|
@@ -2038,6 +2045,40 @@ class CTE(BaseModel):
|
|
|
2038
2045
|
def validate_output_columns(cls, v):
|
|
2039
2046
|
return unique(v, "address")
|
|
2040
2047
|
|
|
2048
|
+
def inline_constant(self, concept: Concept):
|
|
2049
|
+
if not concept.derivation == PurposeLineage.CONSTANT:
|
|
2050
|
+
return False
|
|
2051
|
+
if not isinstance(concept.lineage, Function):
|
|
2052
|
+
return False
|
|
2053
|
+
if not concept.lineage.operator == FunctionType.CONSTANT:
|
|
2054
|
+
return False
|
|
2055
|
+
# remove the constant
|
|
2056
|
+
removed: set = set()
|
|
2057
|
+
if concept.address in self.source_map:
|
|
2058
|
+
removed = removed.union(self.source_map[concept.address])
|
|
2059
|
+
del self.source_map[concept.address]
|
|
2060
|
+
# if we've entirely removed the need to join to someplace to get the concept
|
|
2061
|
+
# drop the join as well.
|
|
2062
|
+
for removed_cte in removed:
|
|
2063
|
+
still_required = any([removed_cte in x for x in self.source_map.values()])
|
|
2064
|
+
if not still_required:
|
|
2065
|
+
self.joins = [
|
|
2066
|
+
join
|
|
2067
|
+
for join in self.joins
|
|
2068
|
+
if not isinstance(join, Join)
|
|
2069
|
+
or (
|
|
2070
|
+
join.right_cte.name != removed_cte
|
|
2071
|
+
and join.left_cte.name != removed_cte
|
|
2072
|
+
)
|
|
2073
|
+
]
|
|
2074
|
+
self.parent_ctes = [
|
|
2075
|
+
x for x in self.parent_ctes if x.name != removed_cte
|
|
2076
|
+
]
|
|
2077
|
+
if removed_cte == self.base_name_override:
|
|
2078
|
+
candidates = [x.name for x in self.parent_ctes]
|
|
2079
|
+
self.base_name_override = candidates[0] if candidates else None
|
|
2080
|
+
self.base_alias_override = candidates[0] if candidates else None
|
|
2081
|
+
|
|
2041
2082
|
def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
|
|
2042
2083
|
qds_being_inlined = parent.source
|
|
2043
2084
|
ds_being_inlined = qds_being_inlined.datasources[0]
|
|
@@ -2186,6 +2227,11 @@ class CTE(BaseModel):
|
|
|
2186
2227
|
and not self.group_to_grain
|
|
2187
2228
|
):
|
|
2188
2229
|
return False
|
|
2230
|
+
# if we don't need to source any concepts from anywhere
|
|
2231
|
+
# render without from
|
|
2232
|
+
# most likely to happen from inlining constants
|
|
2233
|
+
if not any([v for v in self.source_map.values()]):
|
|
2234
|
+
return False
|
|
2189
2235
|
if (
|
|
2190
2236
|
len(self.source.datasources) == 1
|
|
2191
2237
|
and self.source.datasources[0].name == CONSTANT_DATASET
|
|
@@ -2669,7 +2715,8 @@ class Environment(BaseModel):
|
|
|
2669
2715
|
|
|
2670
2716
|
|
|
2671
2717
|
class LazyEnvironment(Environment):
|
|
2672
|
-
"""Variant of environment to defer parsing of a path
|
|
2718
|
+
"""Variant of environment to defer parsing of a path
|
|
2719
|
+
until relevant attributes accessed."""
|
|
2673
2720
|
|
|
2674
2721
|
load_path: Path
|
|
2675
2722
|
loaded: bool = False
|
|
@@ -2681,6 +2728,7 @@ class LazyEnvironment(Environment):
|
|
|
2681
2728
|
"working_path",
|
|
2682
2729
|
"model_config",
|
|
2683
2730
|
"model_fields",
|
|
2731
|
+
"model_post_init",
|
|
2684
2732
|
) or name.startswith("_"):
|
|
2685
2733
|
return super().__getattribute__(name)
|
|
2686
2734
|
if not self.loaded:
|
|
@@ -3182,6 +3230,10 @@ class ProcessedShowStatement(BaseModel):
|
|
|
3182
3230
|
output_values: List[Union[Concept, Datasource, ProcessedQuery]]
|
|
3183
3231
|
|
|
3184
3232
|
|
|
3233
|
+
class ProcessedRawSQLStatement(BaseModel):
|
|
3234
|
+
text: str
|
|
3235
|
+
|
|
3236
|
+
|
|
3185
3237
|
class Limit(BaseModel):
|
|
3186
3238
|
count: int
|
|
3187
3239
|
|
|
@@ -3384,6 +3436,7 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
3384
3436
|
class PersistStatement(BaseModel):
|
|
3385
3437
|
datasource: Datasource
|
|
3386
3438
|
select: SelectStatement
|
|
3439
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
3387
3440
|
|
|
3388
3441
|
@property
|
|
3389
3442
|
def identifier(self):
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
SelectStatement,
|
|
4
|
+
PersistStatement,
|
|
5
|
+
MultiSelectStatement,
|
|
6
|
+
Conditional,
|
|
7
|
+
BooleanOperator,
|
|
8
|
+
)
|
|
9
|
+
from trilogy.core.enums import PurposeLineage
|
|
10
|
+
from trilogy.constants import logger, CONFIG
|
|
11
|
+
from trilogy.core.optimizations import (
|
|
12
|
+
OptimizationRule,
|
|
13
|
+
InlineConstant,
|
|
14
|
+
PredicatePushdown,
|
|
15
|
+
InlineDatasource,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
MAX_OPTIMIZATION_LOOPS = 100
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def filter_irrelevant_ctes(
|
|
23
|
+
input: list[CTE],
|
|
24
|
+
root_cte: CTE,
|
|
25
|
+
):
|
|
26
|
+
relevant_ctes = set()
|
|
27
|
+
|
|
28
|
+
def recurse(cte: CTE):
|
|
29
|
+
relevant_ctes.add(cte.name)
|
|
30
|
+
for cte in cte.parent_ctes:
|
|
31
|
+
recurse(cte)
|
|
32
|
+
|
|
33
|
+
recurse(root_cte)
|
|
34
|
+
return [cte for cte in input if cte.name in relevant_ctes]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def gen_inverse_map(input: list[CTE]) -> dict[str, list[CTE]]:
|
|
38
|
+
inverse_map: dict[str, list[CTE]] = {}
|
|
39
|
+
for cte in input:
|
|
40
|
+
for parent in cte.parent_ctes:
|
|
41
|
+
if parent.name not in inverse_map:
|
|
42
|
+
inverse_map[parent.name] = []
|
|
43
|
+
inverse_map[parent.name].append(cte)
|
|
44
|
+
return inverse_map
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def is_direct_return_eligible(
|
|
48
|
+
cte: CTE, select: SelectStatement | PersistStatement | MultiSelectStatement
|
|
49
|
+
) -> bool:
|
|
50
|
+
if isinstance(select, (PersistStatement, MultiSelectStatement)):
|
|
51
|
+
return False
|
|
52
|
+
derived_concepts = [
|
|
53
|
+
c for c in cte.source.output_concepts if c not in cte.source.input_concepts
|
|
54
|
+
]
|
|
55
|
+
eligible = True
|
|
56
|
+
conditions = (
|
|
57
|
+
set(x.address for x in select.where_clause.concept_arguments)
|
|
58
|
+
if select.where_clause
|
|
59
|
+
else set()
|
|
60
|
+
)
|
|
61
|
+
if conditions and select.limit:
|
|
62
|
+
return False
|
|
63
|
+
for x in derived_concepts:
|
|
64
|
+
if x.derivation == PurposeLineage.WINDOW:
|
|
65
|
+
return False
|
|
66
|
+
if x.derivation == PurposeLineage.UNNEST:
|
|
67
|
+
return False
|
|
68
|
+
if x.derivation == PurposeLineage.AGGREGATE:
|
|
69
|
+
if x.address in conditions:
|
|
70
|
+
return False
|
|
71
|
+
logger.info(
|
|
72
|
+
f"Upleveling output select to final CTE with derived_concepts {[x.address for x in derived_concepts]}"
|
|
73
|
+
)
|
|
74
|
+
return eligible
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def sort_select_output(cte: CTE, query: SelectStatement | MultiSelectStatement):
|
|
78
|
+
hidden_addresses = [c.address for c in query.hidden_components]
|
|
79
|
+
output_addresses = [
|
|
80
|
+
c.address for c in query.output_components if c.address not in hidden_addresses
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
mapping = {x.address: x for x in cte.output_columns}
|
|
84
|
+
|
|
85
|
+
new_output = []
|
|
86
|
+
for x in output_addresses:
|
|
87
|
+
new_output.append(mapping[x])
|
|
88
|
+
cte.output_columns = new_output
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def optimize_ctes(
|
|
92
|
+
input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
|
|
93
|
+
):
|
|
94
|
+
complete = False
|
|
95
|
+
REGISTERED_RULES: list["OptimizationRule"] = []
|
|
96
|
+
|
|
97
|
+
if CONFIG.optimizations.datasource_inlining:
|
|
98
|
+
REGISTERED_RULES.append(InlineDatasource())
|
|
99
|
+
if CONFIG.optimizations.predicate_pushdown:
|
|
100
|
+
REGISTERED_RULES.append(PredicatePushdown())
|
|
101
|
+
if CONFIG.optimizations.constant_inlining:
|
|
102
|
+
REGISTERED_RULES.append(InlineConstant())
|
|
103
|
+
loops = 0
|
|
104
|
+
while not complete and (loops <= MAX_OPTIMIZATION_LOOPS):
|
|
105
|
+
actions_taken = False
|
|
106
|
+
for rule in REGISTERED_RULES:
|
|
107
|
+
for cte in input:
|
|
108
|
+
inverse_map = gen_inverse_map(input)
|
|
109
|
+
actions_taken = rule.optimize(cte, inverse_map)
|
|
110
|
+
complete = not actions_taken
|
|
111
|
+
loops += 1
|
|
112
|
+
|
|
113
|
+
if CONFIG.optimizations.direct_return and is_direct_return_eligible(
|
|
114
|
+
root_cte, select
|
|
115
|
+
):
|
|
116
|
+
root_cte.order_by = select.order_by
|
|
117
|
+
root_cte.limit = select.limit
|
|
118
|
+
if select.where_clause:
|
|
119
|
+
|
|
120
|
+
if root_cte.condition:
|
|
121
|
+
root_cte.condition = Conditional(
|
|
122
|
+
left=root_cte.condition,
|
|
123
|
+
operator=BooleanOperator.AND,
|
|
124
|
+
right=select.where_clause.conditional,
|
|
125
|
+
)
|
|
126
|
+
else:
|
|
127
|
+
root_cte.condition = select.where_clause.conditional
|
|
128
|
+
root_cte.requires_nesting = False
|
|
129
|
+
sort_select_output(root_cte, select)
|
|
130
|
+
|
|
131
|
+
return filter_irrelevant_ctes(input, root_cte)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .inline_constant import InlineConstant
|
|
2
|
+
from .inline_datasource import InlineDatasource
|
|
3
|
+
from .predicate_pushdown import PredicatePushdown
|
|
4
|
+
from .base_optimization import OptimizationRule
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"OptimizationRule",
|
|
8
|
+
"InlineConstant",
|
|
9
|
+
"InlineDatasource",
|
|
10
|
+
"PredicatePushdown",
|
|
11
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
)
|
|
4
|
+
from trilogy.constants import logger
|
|
5
|
+
from abc import ABC
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class OptimizationRule(ABC):
|
|
9
|
+
|
|
10
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
11
|
+
raise NotImplementedError
|
|
12
|
+
|
|
13
|
+
def log(self, message: str):
|
|
14
|
+
logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
|
|
15
|
+
|
|
16
|
+
def debug(self, message: str):
|
|
17
|
+
logger.debug(f"[Optimization][{self.__class__.__name__}] {message}")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
Concept,
|
|
4
|
+
)
|
|
5
|
+
from trilogy.core.enums import PurposeLineage
|
|
6
|
+
|
|
7
|
+
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class InlineConstant(OptimizationRule):
|
|
11
|
+
|
|
12
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
13
|
+
|
|
14
|
+
to_inline: list[Concept] = []
|
|
15
|
+
for x in cte.source.input_concepts:
|
|
16
|
+
if x.address not in cte.source_map:
|
|
17
|
+
continue
|
|
18
|
+
if x.derivation == PurposeLineage.CONSTANT:
|
|
19
|
+
self.log(f"Found constant {x.address} on {cte.name}")
|
|
20
|
+
to_inline.append(x)
|
|
21
|
+
if to_inline:
|
|
22
|
+
inlined = False
|
|
23
|
+
for c in to_inline:
|
|
24
|
+
self.log(f"Inlining constant {c.address} on {cte.name}")
|
|
25
|
+
test = cte.inline_constant(c)
|
|
26
|
+
if test:
|
|
27
|
+
inlined = True
|
|
28
|
+
return inlined
|
|
29
|
+
return False
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
Datasource,
|
|
4
|
+
)
|
|
5
|
+
|
|
6
|
+
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class InlineDatasource(OptimizationRule):
|
|
10
|
+
|
|
11
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
12
|
+
if not cte.parent_ctes:
|
|
13
|
+
return False
|
|
14
|
+
|
|
15
|
+
optimized = False
|
|
16
|
+
self.log(
|
|
17
|
+
f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
|
|
18
|
+
)
|
|
19
|
+
to_inline: list[CTE] = []
|
|
20
|
+
force_group = False
|
|
21
|
+
for parent_cte in cte.parent_ctes:
|
|
22
|
+
if not parent_cte.is_root_datasource:
|
|
23
|
+
self.log(f"parent {parent_cte.name} is not root")
|
|
24
|
+
continue
|
|
25
|
+
if parent_cte.parent_ctes:
|
|
26
|
+
self.log(f"parent {parent_cte.name} has parents")
|
|
27
|
+
continue
|
|
28
|
+
raw_root = parent_cte.source.datasources[0]
|
|
29
|
+
if not isinstance(raw_root, Datasource):
|
|
30
|
+
self.log(f"parent {parent_cte.name} is not datasource")
|
|
31
|
+
continue
|
|
32
|
+
root: Datasource = raw_root
|
|
33
|
+
if not root.can_be_inlined:
|
|
34
|
+
self.log(f"parent {parent_cte.name} datasource is not inlineable")
|
|
35
|
+
continue
|
|
36
|
+
root_outputs = {x.address for x in root.output_concepts}
|
|
37
|
+
cte_outputs = {x.address for x in parent_cte.output_columns}
|
|
38
|
+
grain_components = {x.address for x in root.grain.components}
|
|
39
|
+
if not cte_outputs.issubset(root_outputs):
|
|
40
|
+
self.log(f"Not all {parent_cte.name} outputs are found on datasource")
|
|
41
|
+
continue
|
|
42
|
+
if not grain_components.issubset(cte_outputs):
|
|
43
|
+
self.log("Not all datasource components in cte outputs, forcing group")
|
|
44
|
+
force_group = True
|
|
45
|
+
to_inline.append(parent_cte)
|
|
46
|
+
|
|
47
|
+
for replaceable in to_inline:
|
|
48
|
+
|
|
49
|
+
result = cte.inline_parent_datasource(replaceable, force_group=force_group)
|
|
50
|
+
if result:
|
|
51
|
+
self.log(f"Inlined parent {replaceable.name}")
|
|
52
|
+
else:
|
|
53
|
+
self.log(f"Failed to inline {replaceable.name}")
|
|
54
|
+
return optimized
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
Conditional,
|
|
4
|
+
BooleanOperator,
|
|
5
|
+
)
|
|
6
|
+
from trilogy.constants import logger
|
|
7
|
+
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def decompose_condition(conditional: Conditional):
|
|
11
|
+
chunks = []
|
|
12
|
+
if conditional.operator == BooleanOperator.AND:
|
|
13
|
+
for val in [conditional.left, conditional.right]:
|
|
14
|
+
if isinstance(val, Conditional):
|
|
15
|
+
chunks.extend(decompose_condition(val))
|
|
16
|
+
else:
|
|
17
|
+
chunks.append(val)
|
|
18
|
+
else:
|
|
19
|
+
chunks.append(conditional)
|
|
20
|
+
return chunks
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def is_child_of(a, comparison):
|
|
24
|
+
if isinstance(comparison, Conditional):
|
|
25
|
+
return (
|
|
26
|
+
is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
|
|
27
|
+
) and comparison.operator == BooleanOperator.AND
|
|
28
|
+
return comparison == a
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PredicatePushdown(OptimizationRule):
|
|
32
|
+
|
|
33
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
34
|
+
|
|
35
|
+
if not cte.parent_ctes:
|
|
36
|
+
self.debug(f"No parent CTEs for {cte.name}")
|
|
37
|
+
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
optimized = False
|
|
41
|
+
if not cte.condition:
|
|
42
|
+
self.debug(f"No CTE condition for {cte.name}")
|
|
43
|
+
return False
|
|
44
|
+
self.log(
|
|
45
|
+
f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
|
|
46
|
+
)
|
|
47
|
+
if isinstance(cte.condition, Conditional):
|
|
48
|
+
candidates = cte.condition.decompose()
|
|
49
|
+
else:
|
|
50
|
+
candidates = [cte.condition]
|
|
51
|
+
logger.info(f"Have {len(candidates)} candidates to try to push down")
|
|
52
|
+
for candidate in candidates:
|
|
53
|
+
conditions = {x.address for x in candidate.concept_arguments}
|
|
54
|
+
for parent_cte in cte.parent_ctes:
|
|
55
|
+
materialized = {k for k, v in parent_cte.source_map.items() if v != []}
|
|
56
|
+
if conditions.issubset(materialized):
|
|
57
|
+
if all(
|
|
58
|
+
[
|
|
59
|
+
is_child_of(candidate, child.condition)
|
|
60
|
+
for child in inverse_map[parent_cte.name]
|
|
61
|
+
]
|
|
62
|
+
):
|
|
63
|
+
self.log(
|
|
64
|
+
f"All concepts are found on {parent_cte.name} and all it's children include same filter; pushing up filter"
|
|
65
|
+
)
|
|
66
|
+
if parent_cte.condition:
|
|
67
|
+
parent_cte.condition = Conditional(
|
|
68
|
+
left=parent_cte.condition,
|
|
69
|
+
operator=BooleanOperator.AND,
|
|
70
|
+
right=candidate,
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
parent_cte.condition = candidate
|
|
74
|
+
optimized = True
|
|
75
|
+
else:
|
|
76
|
+
logger.info("conditions not subset of parent materialized")
|
|
77
|
+
|
|
78
|
+
if all(
|
|
79
|
+
[
|
|
80
|
+
is_child_of(cte.condition, parent_cte.condition)
|
|
81
|
+
for parent_cte in cte.parent_ctes
|
|
82
|
+
]
|
|
83
|
+
):
|
|
84
|
+
self.log("All parents have same filter, removing filter")
|
|
85
|
+
cte.condition = None
|
|
86
|
+
optimized = True
|
|
87
|
+
|
|
88
|
+
return optimized
|
|
@@ -44,6 +44,8 @@ from trilogy.core.models import (
|
|
|
44
44
|
RowsetDerivationStatement,
|
|
45
45
|
ConceptDeclarationStatement,
|
|
46
46
|
ImportStatement,
|
|
47
|
+
RawSQLStatement,
|
|
48
|
+
ProcessedRawSQLStatement,
|
|
47
49
|
)
|
|
48
50
|
from trilogy.core.query_processor import process_query, process_persist
|
|
49
51
|
from trilogy.dialect.common import render_join
|
|
@@ -558,11 +560,20 @@ class BaseDialect:
|
|
|
558
560
|
| RowsetDerivationStatement
|
|
559
561
|
| MergeStatement
|
|
560
562
|
| ImportStatement
|
|
563
|
+
| RawSQLStatement
|
|
561
564
|
],
|
|
562
565
|
hooks: Optional[List[BaseHook]] = None,
|
|
563
|
-
) -> List[
|
|
566
|
+
) -> List[
|
|
567
|
+
ProcessedQuery
|
|
568
|
+
| ProcessedQueryPersist
|
|
569
|
+
| ProcessedShowStatement
|
|
570
|
+
| ProcessedRawSQLStatement
|
|
571
|
+
]:
|
|
564
572
|
output: List[
|
|
565
|
-
ProcessedQuery
|
|
573
|
+
ProcessedQuery
|
|
574
|
+
| ProcessedQueryPersist
|
|
575
|
+
| ProcessedShowStatement
|
|
576
|
+
| ProcessedRawSQLStatement
|
|
566
577
|
] = []
|
|
567
578
|
for statement in statements:
|
|
568
579
|
if isinstance(statement, PersistStatement):
|
|
@@ -604,6 +615,8 @@ class BaseDialect:
|
|
|
604
615
|
)
|
|
605
616
|
else:
|
|
606
617
|
raise NotImplementedError(type(statement))
|
|
618
|
+
elif isinstance(statement, RawSQLStatement):
|
|
619
|
+
output.append(ProcessedRawSQLStatement(text=statement.text))
|
|
607
620
|
elif isinstance(
|
|
608
621
|
statement,
|
|
609
622
|
(
|
|
@@ -619,10 +632,18 @@ class BaseDialect:
|
|
|
619
632
|
return output
|
|
620
633
|
|
|
621
634
|
def compile_statement(
|
|
622
|
-
self,
|
|
635
|
+
self,
|
|
636
|
+
query: (
|
|
637
|
+
ProcessedQuery
|
|
638
|
+
| ProcessedQueryPersist
|
|
639
|
+
| ProcessedShowStatement
|
|
640
|
+
| ProcessedRawSQLStatement
|
|
641
|
+
),
|
|
623
642
|
) -> str:
|
|
624
643
|
if isinstance(query, ProcessedShowStatement):
|
|
625
644
|
return ";\n".join([str(x) for x in query.output_values])
|
|
645
|
+
elif isinstance(query, ProcessedRawSQLStatement):
|
|
646
|
+
return query.text
|
|
626
647
|
select_columns: Dict[str, str] = {}
|
|
627
648
|
cte_output_map = {}
|
|
628
649
|
selected = set()
|
|
@@ -9,6 +9,7 @@ from trilogy.core.models import (
|
|
|
9
9
|
ProcessedQuery,
|
|
10
10
|
ProcessedQueryPersist,
|
|
11
11
|
ProcessedShowStatement,
|
|
12
|
+
ProcessedRawSQLStatement,
|
|
12
13
|
)
|
|
13
14
|
from trilogy.dialect.base import BaseDialect
|
|
14
15
|
|
|
@@ -81,7 +82,13 @@ class SqlServerDialect(BaseDialect):
|
|
|
81
82
|
SQL_TEMPLATE = TSQL_TEMPLATE
|
|
82
83
|
|
|
83
84
|
def compile_statement(
|
|
84
|
-
self,
|
|
85
|
+
self,
|
|
86
|
+
query: (
|
|
87
|
+
ProcessedQuery
|
|
88
|
+
| ProcessedQueryPersist
|
|
89
|
+
| ProcessedShowStatement
|
|
90
|
+
| ProcessedRawSQLStatement
|
|
91
|
+
),
|
|
85
92
|
) -> str:
|
|
86
93
|
base = super().compile_statement(query)
|
|
87
94
|
if isinstance(base, (ProcessedQuery, ProcessedQueryPersist)):
|