pytrilogy 0.0.1.115__tar.gz → 0.0.1.117__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.115/pytrilogy.egg-info → pytrilogy-0.0.1.117}/PKG-INFO +1 -1
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117/pytrilogy.egg-info}/PKG-INFO +1 -1
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/pytrilogy.egg-info/SOURCES.txt +6 -0
- pytrilogy-0.0.1.117/tests/test_datatypes.py +13 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_parsing.py +12 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/__init__.py +1 -1
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/constants.py +1 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/functions.py +5 -4
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/models.py +79 -8
- pytrilogy-0.0.1.117/trilogy/core/optimization.py +131 -0
- pytrilogy-0.0.1.117/trilogy/core/optimizations/__init__.py +11 -0
- pytrilogy-0.0.1.117/trilogy/core/optimizations/base_optimization.py +17 -0
- pytrilogy-0.0.1.117/trilogy/core/optimizations/inline_constant.py +29 -0
- pytrilogy-0.0.1.117/trilogy/core/optimizations/inline_datasource.py +54 -0
- pytrilogy-0.0.1.117/trilogy/core/optimizations/predicate_pushdown.py +105 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/dialect/base.py +29 -3
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/dialect/presto.py +3 -1
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/dialect/sql_server.py +8 -1
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/executor.py +25 -5
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/parsing/parse_engine.py +34 -6
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/parsing/render.py +10 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/parsing/trilogy.lark +9 -3
- pytrilogy-0.0.1.115/trilogy/core/optimization.py +0 -262
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/README.md +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/pyproject.toml +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/pytrilogy.egg-info/dependency_links.txt +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/pytrilogy.egg-info/entry_points.txt +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/pytrilogy.egg-info/requires.txt +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/pytrilogy.egg-info/top_level.txt +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/setup.cfg +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/setup.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_declarations.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_derived_concepts.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_discovery_nodes.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_environment.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_functions.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_imports.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_metadata.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_models.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_multi_join_assignments.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_partial_handling.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_query_processing.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_select.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_statements.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_undefined_concept.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/tests/test_where_clause.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/compiler.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/constants.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/enums.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/env_processor.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/environment_helpers.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/ergonomics.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/exceptions.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/graph_models.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/internal.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/concept_strategies_v3.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/graph_utils.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/basic_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/common.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/concept_merge_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/filter_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/group_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/node_merge_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/rowset_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/select_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/unnest_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/node_generators/window_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/nodes/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/nodes/base_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/nodes/filter_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/nodes/group_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/nodes/merge_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/nodes/select_node_v2.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/nodes/unnest_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/nodes/window_node.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/processing/utility.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/core/query_processor.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/dialect/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/dialect/bigquery.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/dialect/common.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/dialect/config.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/dialect/duckdb.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/dialect/enums.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/dialect/postgres.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/dialect/snowflake.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/engine.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/hooks/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/hooks/base_hook.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/hooks/graph_hook.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/hooks/query_debugger.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/metadata/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/parser.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/parsing/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/parsing/common.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/parsing/config.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/parsing/exceptions.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/parsing/helpers.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/py.typed +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/scripts/__init__.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/scripts/trilogy.py +0 -0
- {pytrilogy-0.0.1.115 → pytrilogy-0.0.1.117}/trilogy/utility.py +0 -0
|
@@ -8,6 +8,7 @@ pytrilogy.egg-info/dependency_links.txt
|
|
|
8
8
|
pytrilogy.egg-info/entry_points.txt
|
|
9
9
|
pytrilogy.egg-info/requires.txt
|
|
10
10
|
pytrilogy.egg-info/top_level.txt
|
|
11
|
+
tests/test_datatypes.py
|
|
11
12
|
tests/test_declarations.py
|
|
12
13
|
tests/test_derived_concepts.py
|
|
13
14
|
tests/test_discovery_nodes.py
|
|
@@ -45,6 +46,11 @@ trilogy/core/internal.py
|
|
|
45
46
|
trilogy/core/models.py
|
|
46
47
|
trilogy/core/optimization.py
|
|
47
48
|
trilogy/core/query_processor.py
|
|
49
|
+
trilogy/core/optimizations/__init__.py
|
|
50
|
+
trilogy/core/optimizations/base_optimization.py
|
|
51
|
+
trilogy/core/optimizations/inline_constant.py
|
|
52
|
+
trilogy/core/optimizations/inline_datasource.py
|
|
53
|
+
trilogy/core/optimizations/predicate_pushdown.py
|
|
48
54
|
trilogy/core/processing/__init__.py
|
|
49
55
|
trilogy/core/processing/concept_strategies_v3.py
|
|
50
56
|
trilogy/core/processing/graph_utils.py
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
NumericType,
|
|
3
|
+
)
|
|
4
|
+
from trilogy.parsing.parse_engine import (
|
|
5
|
+
parse_text,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_numeric():
|
|
10
|
+
env, _ = parse_text(
|
|
11
|
+
"const order_id numeric(12,2); const rounded <- cast(order_id as numeric(15,2));"
|
|
12
|
+
)
|
|
13
|
+
assert env.concepts["order_id"].datatype == NumericType(precision=12, scale=2)
|
|
@@ -292,3 +292,15 @@ CASE WHEN dates.year BETWEEN 1883 AND 1900 THEN 'Lost Generation'
|
|
|
292
292
|
)
|
|
293
293
|
|
|
294
294
|
assert env2.concepts["dates.generation"].purpose == Purpose.PROPERTY
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def test_rawsql():
|
|
298
|
+
env, parsed = parse_text(
|
|
299
|
+
"""
|
|
300
|
+
raw_sql('''select 1''');
|
|
301
|
+
|
|
302
|
+
select 1 as test;
|
|
303
|
+
|
|
304
|
+
"""
|
|
305
|
+
)
|
|
306
|
+
assert parsed[0].text == "select 1"
|
|
@@ -9,6 +9,7 @@ from trilogy.core.models import (
|
|
|
9
9
|
ListType,
|
|
10
10
|
StructType,
|
|
11
11
|
MapType,
|
|
12
|
+
NumericType,
|
|
12
13
|
)
|
|
13
14
|
from trilogy.core.enums import FunctionType, Purpose, Granularity, DatePart
|
|
14
15
|
from trilogy.core.exceptions import InvalidSyntaxException
|
|
@@ -21,7 +22,9 @@ def create_function_derived_concept(
|
|
|
21
22
|
namespace: str,
|
|
22
23
|
operator: FunctionType,
|
|
23
24
|
arguments: list[Concept],
|
|
24
|
-
output_type: Optional[
|
|
25
|
+
output_type: Optional[
|
|
26
|
+
DataType | ListType | StructType | MapType | NumericType
|
|
27
|
+
] = None,
|
|
25
28
|
output_purpose: Optional[Purpose] = None,
|
|
26
29
|
) -> Concept:
|
|
27
30
|
purpose = (
|
|
@@ -56,9 +59,7 @@ def argument_to_purpose(arg) -> Purpose:
|
|
|
56
59
|
return Purpose.PROPERTY
|
|
57
60
|
elif isinstance(arg, Concept):
|
|
58
61
|
return arg.purpose
|
|
59
|
-
elif isinstance(arg, (int, float, str, bool, list)):
|
|
60
|
-
return Purpose.CONSTANT
|
|
61
|
-
elif isinstance(arg, DataType):
|
|
62
|
+
elif isinstance(arg, (int, float, str, bool, list, NumericType, DataType)):
|
|
62
63
|
return Purpose.CONSTANT
|
|
63
64
|
elif isinstance(arg, DatePart):
|
|
64
65
|
return Purpose.CONSTANT
|
|
@@ -105,7 +105,9 @@ def get_concept_arguments(expr) -> List["Concept"]:
|
|
|
105
105
|
return output
|
|
106
106
|
|
|
107
107
|
|
|
108
|
-
ALL_TYPES = Union[
|
|
108
|
+
ALL_TYPES = Union[
|
|
109
|
+
"DataType", "MapType", "ListType", "NumericType", "StructType", "Concept"
|
|
110
|
+
]
|
|
109
111
|
|
|
110
112
|
NAMESPACED_TYPES = Union[
|
|
111
113
|
"WindowItem",
|
|
@@ -176,6 +178,19 @@ class DataType(Enum):
|
|
|
176
178
|
return self
|
|
177
179
|
|
|
178
180
|
|
|
181
|
+
class NumericType(BaseModel):
|
|
182
|
+
precision: int = 20
|
|
183
|
+
scale: int = 5
|
|
184
|
+
|
|
185
|
+
@property
|
|
186
|
+
def data_type(self):
|
|
187
|
+
return DataType.NUMERIC
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def value(self):
|
|
191
|
+
return self.data_type.value
|
|
192
|
+
|
|
193
|
+
|
|
179
194
|
class ListType(BaseModel):
|
|
180
195
|
model_config = ConfigDict(frozen=True)
|
|
181
196
|
type: ALL_TYPES
|
|
@@ -192,7 +207,9 @@ class ListType(BaseModel):
|
|
|
192
207
|
return self.data_type.value
|
|
193
208
|
|
|
194
209
|
@property
|
|
195
|
-
def value_data_type(
|
|
210
|
+
def value_data_type(
|
|
211
|
+
self,
|
|
212
|
+
) -> DataType | StructType | MapType | ListType | NumericType:
|
|
196
213
|
if isinstance(self.type, Concept):
|
|
197
214
|
return self.type.datatype
|
|
198
215
|
return self.type
|
|
@@ -270,7 +287,7 @@ def empty_grain() -> Grain:
|
|
|
270
287
|
|
|
271
288
|
class Concept(Namespaced, SelectGrain, BaseModel):
|
|
272
289
|
name: str
|
|
273
|
-
datatype: DataType | ListType | StructType | MapType
|
|
290
|
+
datatype: DataType | ListType | StructType | MapType | NumericType
|
|
274
291
|
purpose: Purpose
|
|
275
292
|
metadata: Optional[Metadata] = Field(
|
|
276
293
|
default_factory=lambda: Metadata(description=None, line_number=None),
|
|
@@ -790,12 +807,12 @@ class LooseConceptList(BaseModel):
|
|
|
790
807
|
class Function(Namespaced, SelectGrain, BaseModel):
|
|
791
808
|
operator: FunctionType
|
|
792
809
|
arg_count: int = Field(default=1)
|
|
793
|
-
output_datatype: DataType | ListType | StructType | MapType
|
|
810
|
+
output_datatype: DataType | ListType | StructType | MapType | NumericType
|
|
794
811
|
output_purpose: Purpose
|
|
795
812
|
valid_inputs: Optional[
|
|
796
813
|
Union[
|
|
797
|
-
Set[DataType | ListType | StructType],
|
|
798
|
-
List[Set[DataType | ListType | StructType]],
|
|
814
|
+
Set[DataType | ListType | StructType | NumericType],
|
|
815
|
+
List[Set[DataType | ListType | StructType] | NumericType],
|
|
799
816
|
]
|
|
800
817
|
] = None
|
|
801
818
|
arguments: Sequence[
|
|
@@ -808,6 +825,7 @@ class Function(Namespaced, SelectGrain, BaseModel):
|
|
|
808
825
|
str,
|
|
809
826
|
DataType,
|
|
810
827
|
ListType,
|
|
828
|
+
NumericType,
|
|
811
829
|
DatePart,
|
|
812
830
|
"Parenthetical",
|
|
813
831
|
CaseWhen,
|
|
@@ -1165,11 +1183,17 @@ class OrderBy(Namespaced, BaseModel):
|
|
|
1165
1183
|
return OrderBy(items=[x.with_namespace(namespace) for x in self.items])
|
|
1166
1184
|
|
|
1167
1185
|
|
|
1186
|
+
class RawSQLStatement(BaseModel):
|
|
1187
|
+
text: str
|
|
1188
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1189
|
+
|
|
1190
|
+
|
|
1168
1191
|
class SelectStatement(Namespaced, BaseModel):
|
|
1169
1192
|
selection: List[SelectItem]
|
|
1170
1193
|
where_clause: Optional["WhereClause"] = None
|
|
1171
1194
|
order_by: Optional[OrderBy] = None
|
|
1172
1195
|
limit: Optional[int] = None
|
|
1196
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1173
1197
|
|
|
1174
1198
|
def __str__(self):
|
|
1175
1199
|
from trilogy.parsing.render import render_query
|
|
@@ -1371,6 +1395,7 @@ class MultiSelectStatement(Namespaced, BaseModel):
|
|
|
1371
1395
|
where_clause: Optional["WhereClause"] = None
|
|
1372
1396
|
order_by: Optional[OrderBy] = None
|
|
1373
1397
|
limit: Optional[int] = None
|
|
1398
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1374
1399
|
|
|
1375
1400
|
def __repr__(self):
|
|
1376
1401
|
return "MultiSelect<" + " MERGE ".join([str(s) for s in self.selects]) + ">"
|
|
@@ -1492,7 +1517,7 @@ class DatasourceMetadata(BaseModel):
|
|
|
1492
1517
|
|
|
1493
1518
|
class MergeStatement(Namespaced, BaseModel):
|
|
1494
1519
|
concepts: List[Concept]
|
|
1495
|
-
datatype: DataType | ListType | StructType | MapType
|
|
1520
|
+
datatype: DataType | ListType | StructType | MapType | NumericType
|
|
1496
1521
|
|
|
1497
1522
|
@cached_property
|
|
1498
1523
|
def concepts_lcl(self):
|
|
@@ -2038,6 +2063,40 @@ class CTE(BaseModel):
|
|
|
2038
2063
|
def validate_output_columns(cls, v):
|
|
2039
2064
|
return unique(v, "address")
|
|
2040
2065
|
|
|
2066
|
+
def inline_constant(self, concept: Concept):
|
|
2067
|
+
if not concept.derivation == PurposeLineage.CONSTANT:
|
|
2068
|
+
return False
|
|
2069
|
+
if not isinstance(concept.lineage, Function):
|
|
2070
|
+
return False
|
|
2071
|
+
if not concept.lineage.operator == FunctionType.CONSTANT:
|
|
2072
|
+
return False
|
|
2073
|
+
# remove the constant
|
|
2074
|
+
removed: set = set()
|
|
2075
|
+
if concept.address in self.source_map:
|
|
2076
|
+
removed = removed.union(self.source_map[concept.address])
|
|
2077
|
+
del self.source_map[concept.address]
|
|
2078
|
+
# if we've entirely removed the need to join to someplace to get the concept
|
|
2079
|
+
# drop the join as well.
|
|
2080
|
+
for removed_cte in removed:
|
|
2081
|
+
still_required = any([removed_cte in x for x in self.source_map.values()])
|
|
2082
|
+
if not still_required:
|
|
2083
|
+
self.joins = [
|
|
2084
|
+
join
|
|
2085
|
+
for join in self.joins
|
|
2086
|
+
if not isinstance(join, Join)
|
|
2087
|
+
or (
|
|
2088
|
+
join.right_cte.name != removed_cte
|
|
2089
|
+
and join.left_cte.name != removed_cte
|
|
2090
|
+
)
|
|
2091
|
+
]
|
|
2092
|
+
self.parent_ctes = [
|
|
2093
|
+
x for x in self.parent_ctes if x.name != removed_cte
|
|
2094
|
+
]
|
|
2095
|
+
if removed_cte == self.base_name_override:
|
|
2096
|
+
candidates = [x.name for x in self.parent_ctes]
|
|
2097
|
+
self.base_name_override = candidates[0] if candidates else None
|
|
2098
|
+
self.base_alias_override = candidates[0] if candidates else None
|
|
2099
|
+
|
|
2041
2100
|
def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
|
|
2042
2101
|
qds_being_inlined = parent.source
|
|
2043
2102
|
ds_being_inlined = qds_being_inlined.datasources[0]
|
|
@@ -2186,6 +2245,11 @@ class CTE(BaseModel):
|
|
|
2186
2245
|
and not self.group_to_grain
|
|
2187
2246
|
):
|
|
2188
2247
|
return False
|
|
2248
|
+
# if we don't need to source any concepts from anywhere
|
|
2249
|
+
# render without from
|
|
2250
|
+
# most likely to happen from inlining constants
|
|
2251
|
+
if not any([v for v in self.source_map.values()]):
|
|
2252
|
+
return False
|
|
2189
2253
|
if (
|
|
2190
2254
|
len(self.source.datasources) == 1
|
|
2191
2255
|
and self.source.datasources[0].name == CONSTANT_DATASET
|
|
@@ -3184,6 +3248,10 @@ class ProcessedShowStatement(BaseModel):
|
|
|
3184
3248
|
output_values: List[Union[Concept, Datasource, ProcessedQuery]]
|
|
3185
3249
|
|
|
3186
3250
|
|
|
3251
|
+
class ProcessedRawSQLStatement(BaseModel):
|
|
3252
|
+
text: str
|
|
3253
|
+
|
|
3254
|
+
|
|
3187
3255
|
class Limit(BaseModel):
|
|
3188
3256
|
count: int
|
|
3189
3257
|
|
|
@@ -3386,6 +3454,7 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
3386
3454
|
class PersistStatement(BaseModel):
|
|
3387
3455
|
datasource: Datasource
|
|
3388
3456
|
select: SelectStatement
|
|
3457
|
+
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
3389
3458
|
|
|
3390
3459
|
@property
|
|
3391
3460
|
def identifier(self):
|
|
@@ -3447,7 +3516,7 @@ def list_to_wrapper(args):
|
|
|
3447
3516
|
return ListWrapper(args, type=types[0])
|
|
3448
3517
|
|
|
3449
3518
|
|
|
3450
|
-
def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
|
|
3519
|
+
def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType | NumericType:
|
|
3451
3520
|
if isinstance(arg, Function):
|
|
3452
3521
|
return arg.output_datatype
|
|
3453
3522
|
elif isinstance(arg, Concept):
|
|
@@ -3460,6 +3529,8 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
|
|
|
3460
3529
|
return DataType.STRING
|
|
3461
3530
|
elif isinstance(arg, float):
|
|
3462
3531
|
return DataType.FLOAT
|
|
3532
|
+
elif isinstance(arg, NumericType):
|
|
3533
|
+
return arg
|
|
3463
3534
|
elif isinstance(arg, ListWrapper):
|
|
3464
3535
|
return ListType(type=arg.type)
|
|
3465
3536
|
elif isinstance(arg, AggregateWrapper):
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
SelectStatement,
|
|
4
|
+
PersistStatement,
|
|
5
|
+
MultiSelectStatement,
|
|
6
|
+
Conditional,
|
|
7
|
+
BooleanOperator,
|
|
8
|
+
)
|
|
9
|
+
from trilogy.core.enums import PurposeLineage
|
|
10
|
+
from trilogy.constants import logger, CONFIG
|
|
11
|
+
from trilogy.core.optimizations import (
|
|
12
|
+
OptimizationRule,
|
|
13
|
+
InlineConstant,
|
|
14
|
+
PredicatePushdown,
|
|
15
|
+
InlineDatasource,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
MAX_OPTIMIZATION_LOOPS = 100
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def filter_irrelevant_ctes(
|
|
23
|
+
input: list[CTE],
|
|
24
|
+
root_cte: CTE,
|
|
25
|
+
):
|
|
26
|
+
relevant_ctes = set()
|
|
27
|
+
|
|
28
|
+
def recurse(cte: CTE):
|
|
29
|
+
relevant_ctes.add(cte.name)
|
|
30
|
+
for cte in cte.parent_ctes:
|
|
31
|
+
recurse(cte)
|
|
32
|
+
|
|
33
|
+
recurse(root_cte)
|
|
34
|
+
return [cte for cte in input if cte.name in relevant_ctes]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def gen_inverse_map(input: list[CTE]) -> dict[str, list[CTE]]:
|
|
38
|
+
inverse_map: dict[str, list[CTE]] = {}
|
|
39
|
+
for cte in input:
|
|
40
|
+
for parent in cte.parent_ctes:
|
|
41
|
+
if parent.name not in inverse_map:
|
|
42
|
+
inverse_map[parent.name] = []
|
|
43
|
+
inverse_map[parent.name].append(cte)
|
|
44
|
+
return inverse_map
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def is_direct_return_eligible(
|
|
48
|
+
cte: CTE, select: SelectStatement | PersistStatement | MultiSelectStatement
|
|
49
|
+
) -> bool:
|
|
50
|
+
if isinstance(select, (PersistStatement, MultiSelectStatement)):
|
|
51
|
+
return False
|
|
52
|
+
derived_concepts = [
|
|
53
|
+
c
|
|
54
|
+
for c in cte.source.output_concepts + cte.source.hidden_concepts
|
|
55
|
+
if c not in cte.source.input_concepts
|
|
56
|
+
]
|
|
57
|
+
eligible = True
|
|
58
|
+
conditions = (
|
|
59
|
+
set(x.address for x in select.where_clause.concept_arguments)
|
|
60
|
+
if select.where_clause
|
|
61
|
+
else set()
|
|
62
|
+
)
|
|
63
|
+
if conditions and select.limit:
|
|
64
|
+
return False
|
|
65
|
+
for x in derived_concepts:
|
|
66
|
+
if x.derivation == PurposeLineage.WINDOW:
|
|
67
|
+
return False
|
|
68
|
+
if x.derivation == PurposeLineage.UNNEST:
|
|
69
|
+
return False
|
|
70
|
+
if x.derivation == PurposeLineage.AGGREGATE:
|
|
71
|
+
if x.address in conditions:
|
|
72
|
+
return False
|
|
73
|
+
logger.info(
|
|
74
|
+
f"Upleveling output select to final CTE with derived_concepts {[x.address for x in derived_concepts]}"
|
|
75
|
+
)
|
|
76
|
+
return eligible
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def sort_select_output(cte: CTE, query: SelectStatement | MultiSelectStatement):
|
|
80
|
+
hidden_addresses = [c.address for c in query.hidden_components]
|
|
81
|
+
output_addresses = [
|
|
82
|
+
c.address for c in query.output_components if c.address not in hidden_addresses
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
mapping = {x.address: x for x in cte.output_columns}
|
|
86
|
+
|
|
87
|
+
new_output = []
|
|
88
|
+
for x in output_addresses:
|
|
89
|
+
new_output.append(mapping[x])
|
|
90
|
+
cte.output_columns = new_output
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def optimize_ctes(
|
|
94
|
+
input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
|
|
95
|
+
) -> list[CTE]:
|
|
96
|
+
complete = False
|
|
97
|
+
REGISTERED_RULES: list["OptimizationRule"] = []
|
|
98
|
+
if CONFIG.optimizations.direct_return and is_direct_return_eligible(
|
|
99
|
+
root_cte, select
|
|
100
|
+
):
|
|
101
|
+
root_cte.order_by = select.order_by
|
|
102
|
+
root_cte.limit = select.limit
|
|
103
|
+
if select.where_clause:
|
|
104
|
+
|
|
105
|
+
if root_cte.condition:
|
|
106
|
+
root_cte.condition = Conditional(
|
|
107
|
+
left=root_cte.condition,
|
|
108
|
+
operator=BooleanOperator.AND,
|
|
109
|
+
right=select.where_clause.conditional,
|
|
110
|
+
)
|
|
111
|
+
else:
|
|
112
|
+
root_cte.condition = select.where_clause.conditional
|
|
113
|
+
root_cte.requires_nesting = False
|
|
114
|
+
sort_select_output(root_cte, select)
|
|
115
|
+
if CONFIG.optimizations.datasource_inlining:
|
|
116
|
+
REGISTERED_RULES.append(InlineDatasource())
|
|
117
|
+
if CONFIG.optimizations.predicate_pushdown:
|
|
118
|
+
REGISTERED_RULES.append(PredicatePushdown())
|
|
119
|
+
if CONFIG.optimizations.constant_inlining:
|
|
120
|
+
REGISTERED_RULES.append(InlineConstant())
|
|
121
|
+
loops = 0
|
|
122
|
+
while not complete and (loops <= MAX_OPTIMIZATION_LOOPS):
|
|
123
|
+
actions_taken = False
|
|
124
|
+
for rule in REGISTERED_RULES:
|
|
125
|
+
for cte in input:
|
|
126
|
+
inverse_map = gen_inverse_map(input)
|
|
127
|
+
actions_taken = actions_taken or rule.optimize(cte, inverse_map)
|
|
128
|
+
complete = not actions_taken
|
|
129
|
+
loops += 1
|
|
130
|
+
|
|
131
|
+
return filter_irrelevant_ctes(input, root_cte)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .inline_constant import InlineConstant
|
|
2
|
+
from .inline_datasource import InlineDatasource
|
|
3
|
+
from .predicate_pushdown import PredicatePushdown
|
|
4
|
+
from .base_optimization import OptimizationRule
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"OptimizationRule",
|
|
8
|
+
"InlineConstant",
|
|
9
|
+
"InlineDatasource",
|
|
10
|
+
"PredicatePushdown",
|
|
11
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
)
|
|
4
|
+
from trilogy.constants import logger
|
|
5
|
+
from abc import ABC
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class OptimizationRule(ABC):
|
|
9
|
+
|
|
10
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
11
|
+
raise NotImplementedError
|
|
12
|
+
|
|
13
|
+
def log(self, message: str):
|
|
14
|
+
logger.info(f"[Optimization][{self.__class__.__name__}] {message}")
|
|
15
|
+
|
|
16
|
+
def debug(self, message: str):
|
|
17
|
+
logger.debug(f"[Optimization][{self.__class__.__name__}] {message}")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
Concept,
|
|
4
|
+
)
|
|
5
|
+
from trilogy.core.enums import PurposeLineage
|
|
6
|
+
|
|
7
|
+
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class InlineConstant(OptimizationRule):
|
|
11
|
+
|
|
12
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
13
|
+
|
|
14
|
+
to_inline: list[Concept] = []
|
|
15
|
+
for x in cte.source.input_concepts:
|
|
16
|
+
if x.address not in cte.source_map:
|
|
17
|
+
continue
|
|
18
|
+
if x.derivation == PurposeLineage.CONSTANT:
|
|
19
|
+
self.log(f"Found constant {x.address} on {cte.name}")
|
|
20
|
+
to_inline.append(x)
|
|
21
|
+
if to_inline:
|
|
22
|
+
inlined = False
|
|
23
|
+
for c in to_inline:
|
|
24
|
+
self.log(f"Inlining constant {c.address} on {cte.name}")
|
|
25
|
+
test = cte.inline_constant(c)
|
|
26
|
+
if test:
|
|
27
|
+
inlined = True
|
|
28
|
+
return inlined
|
|
29
|
+
return False
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
Datasource,
|
|
4
|
+
)
|
|
5
|
+
|
|
6
|
+
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class InlineDatasource(OptimizationRule):
|
|
10
|
+
|
|
11
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
12
|
+
if not cte.parent_ctes:
|
|
13
|
+
return False
|
|
14
|
+
|
|
15
|
+
optimized = False
|
|
16
|
+
self.log(
|
|
17
|
+
f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
|
|
18
|
+
)
|
|
19
|
+
to_inline: list[CTE] = []
|
|
20
|
+
force_group = False
|
|
21
|
+
for parent_cte in cte.parent_ctes:
|
|
22
|
+
if not parent_cte.is_root_datasource:
|
|
23
|
+
self.log(f"parent {parent_cte.name} is not root")
|
|
24
|
+
continue
|
|
25
|
+
if parent_cte.parent_ctes:
|
|
26
|
+
self.log(f"parent {parent_cte.name} has parents")
|
|
27
|
+
continue
|
|
28
|
+
raw_root = parent_cte.source.datasources[0]
|
|
29
|
+
if not isinstance(raw_root, Datasource):
|
|
30
|
+
self.log(f"parent {parent_cte.name} is not datasource")
|
|
31
|
+
continue
|
|
32
|
+
root: Datasource = raw_root
|
|
33
|
+
if not root.can_be_inlined:
|
|
34
|
+
self.log(f"parent {parent_cte.name} datasource is not inlineable")
|
|
35
|
+
continue
|
|
36
|
+
root_outputs = {x.address for x in root.output_concepts}
|
|
37
|
+
cte_outputs = {x.address for x in parent_cte.output_columns}
|
|
38
|
+
grain_components = {x.address for x in root.grain.components}
|
|
39
|
+
if not cte_outputs.issubset(root_outputs):
|
|
40
|
+
self.log(f"Not all {parent_cte.name} outputs are found on datasource")
|
|
41
|
+
continue
|
|
42
|
+
if not grain_components.issubset(cte_outputs):
|
|
43
|
+
self.log("Not all datasource components in cte outputs, forcing group")
|
|
44
|
+
force_group = True
|
|
45
|
+
to_inline.append(parent_cte)
|
|
46
|
+
|
|
47
|
+
for replaceable in to_inline:
|
|
48
|
+
|
|
49
|
+
result = cte.inline_parent_datasource(replaceable, force_group=force_group)
|
|
50
|
+
if result:
|
|
51
|
+
self.log(f"Inlined parent {replaceable.name}")
|
|
52
|
+
else:
|
|
53
|
+
self.log(f"Failed to inline {replaceable.name}")
|
|
54
|
+
return optimized
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
CTE,
|
|
3
|
+
Conditional,
|
|
4
|
+
BooleanOperator,
|
|
5
|
+
Datasource,
|
|
6
|
+
)
|
|
7
|
+
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def decompose_condition(conditional: Conditional):
|
|
11
|
+
chunks = []
|
|
12
|
+
if conditional.operator == BooleanOperator.AND:
|
|
13
|
+
for val in [conditional.left, conditional.right]:
|
|
14
|
+
if isinstance(val, Conditional):
|
|
15
|
+
chunks.extend(decompose_condition(val))
|
|
16
|
+
else:
|
|
17
|
+
chunks.append(val)
|
|
18
|
+
else:
|
|
19
|
+
chunks.append(conditional)
|
|
20
|
+
return chunks
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def is_child_of(a, comparison):
|
|
24
|
+
if isinstance(comparison, Conditional):
|
|
25
|
+
return (
|
|
26
|
+
is_child_of(a, comparison.left) or is_child_of(a, comparison.right)
|
|
27
|
+
) and comparison.operator == BooleanOperator.AND
|
|
28
|
+
return comparison == a
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PredicatePushdown(OptimizationRule):
|
|
32
|
+
|
|
33
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
34
|
+
|
|
35
|
+
if not cte.parent_ctes:
|
|
36
|
+
self.debug(f"No parent CTEs for {cte.name}")
|
|
37
|
+
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
optimized = False
|
|
41
|
+
if not cte.condition:
|
|
42
|
+
self.debug(f"No CTE condition for {cte.name}")
|
|
43
|
+
return False
|
|
44
|
+
self.log(
|
|
45
|
+
f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
|
|
46
|
+
)
|
|
47
|
+
if isinstance(cte.condition, Conditional):
|
|
48
|
+
candidates = cte.condition.decompose()
|
|
49
|
+
else:
|
|
50
|
+
candidates = [cte.condition]
|
|
51
|
+
self.log(f"Have {len(candidates)} candidates to try to push down")
|
|
52
|
+
for candidate in candidates:
|
|
53
|
+
conditions = {x.address for x in candidate.concept_arguments}
|
|
54
|
+
for parent_cte in cte.parent_ctes:
|
|
55
|
+
if is_child_of(cte.condition, parent_cte.condition):
|
|
56
|
+
continue
|
|
57
|
+
materialized = {k for k, v in parent_cte.source_map.items() if v != []}
|
|
58
|
+
# if it's a root datasource, we can filter on _any_ of the output concepts
|
|
59
|
+
if parent_cte.is_root_datasource:
|
|
60
|
+
extra_check = {
|
|
61
|
+
x.address
|
|
62
|
+
for x in parent_cte.source.datasources[0].output_concepts
|
|
63
|
+
}
|
|
64
|
+
if conditions.issubset(extra_check):
|
|
65
|
+
for x in conditions:
|
|
66
|
+
if x not in materialized:
|
|
67
|
+
materialized.add(x)
|
|
68
|
+
parent_cte.source_map[x] = [
|
|
69
|
+
parent_cte.source.datasources[0].name
|
|
70
|
+
]
|
|
71
|
+
if conditions.issubset(materialized):
|
|
72
|
+
if all(
|
|
73
|
+
[
|
|
74
|
+
is_child_of(candidate, child.condition)
|
|
75
|
+
for child in inverse_map.get(parent_cte.name, [])
|
|
76
|
+
]
|
|
77
|
+
):
|
|
78
|
+
self.log(
|
|
79
|
+
f"All concepts are found on {parent_cte.name} and all it's children include same filter; pushing up filter"
|
|
80
|
+
)
|
|
81
|
+
if parent_cte.condition:
|
|
82
|
+
parent_cte.condition = Conditional(
|
|
83
|
+
left=parent_cte.condition,
|
|
84
|
+
operator=BooleanOperator.AND,
|
|
85
|
+
right=candidate,
|
|
86
|
+
)
|
|
87
|
+
else:
|
|
88
|
+
parent_cte.condition = candidate
|
|
89
|
+
optimized = True
|
|
90
|
+
else:
|
|
91
|
+
self.log(
|
|
92
|
+
f"conditions {conditions} not subset of parent {parent_cte.name} parent has {materialized} "
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if all(
|
|
96
|
+
[
|
|
97
|
+
is_child_of(cte.condition, parent_cte.condition)
|
|
98
|
+
for parent_cte in cte.parent_ctes
|
|
99
|
+
]
|
|
100
|
+
) and not any([isinstance(x, Datasource) for x in cte.source.datasources]):
|
|
101
|
+
self.log("All parents have same filter, removing filter")
|
|
102
|
+
cte.condition = None
|
|
103
|
+
optimized = True
|
|
104
|
+
|
|
105
|
+
return optimized
|