pytrilogy 0.0.2.8__tar.gz → 0.0.2.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.8/pytrilogy.egg-info → pytrilogy-0.0.2.10}/PKG-INFO +1 -1
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10/pytrilogy.egg-info}/PKG-INFO +1 -1
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/__init__.py +1 -1
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/constants.py +1 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/models.py +161 -59
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/optimization.py +44 -5
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/optimizations/inline_datasource.py +14 -8
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/optimizations/predicate_pushdown.py +73 -44
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/concept_strategies_v3.py +69 -28
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/common.py +42 -16
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/filter_node.py +94 -48
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/group_node.py +3 -1
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/rowset_node.py +13 -54
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/select_node.py +10 -13
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/unnest_node.py +5 -3
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/window_node.py +23 -2
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/nodes/__init__.py +34 -6
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/nodes/base_node.py +67 -13
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/nodes/filter_node.py +3 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/nodes/group_node.py +4 -5
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/nodes/merge_node.py +1 -11
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/nodes/select_node_v2.py +1 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/utility.py +46 -14
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/query_processor.py +48 -21
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/dialect/base.py +28 -15
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/dialect/duckdb.py +1 -1
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/parsing/parse_engine.py +39 -2
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/parsing/trilogy.lark +3 -1
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/README.md +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/pyproject.toml +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/pytrilogy.egg-info/SOURCES.txt +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/pytrilogy.egg-info/dependency_links.txt +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/pytrilogy.egg-info/entry_points.txt +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/pytrilogy.egg-info/requires.txt +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/pytrilogy.egg-info/top_level.txt +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/setup.cfg +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/setup.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_datatypes.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_declarations.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_derived_concepts.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_discovery_nodes.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_environment.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_functions.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_imports.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_metadata.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_models.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_multi_join_assignments.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_parsing.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_partial_handling.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_query_processing.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_select.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_statements.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_undefined_concept.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/tests/test_where_clause.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/compiler.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/__init__.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/constants.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/enums.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/env_processor.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/environment_helpers.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/ergonomics.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/exceptions.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/functions.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/graph_models.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/internal.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/optimizations/__init__.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/optimizations/base_optimization.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/optimizations/inline_constant.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/__init__.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/graph_utils.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/__init__.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/basic_node.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/node_generators/node_merge_node.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/nodes/unnest_node.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/core/processing/nodes/window_node.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/dialect/__init__.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/dialect/bigquery.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/dialect/common.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/dialect/config.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/dialect/enums.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/dialect/postgres.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/dialect/presto.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/dialect/snowflake.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/dialect/sql_server.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/engine.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/executor.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/hooks/__init__.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/hooks/base_hook.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/hooks/graph_hook.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/hooks/query_debugger.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/metadata/__init__.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/parser.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/parsing/__init__.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/parsing/common.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/parsing/config.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/parsing/exceptions.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/parsing/helpers.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/parsing/render.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/py.typed +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/scripts/__init__.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/scripts/trilogy.py +0 -0
- {pytrilogy-0.0.2.8 → pytrilogy-0.0.2.10}/trilogy/utility.py +0 -0
|
@@ -70,7 +70,7 @@ from trilogy.utility import unique
|
|
|
70
70
|
from collections import UserList, UserDict
|
|
71
71
|
from functools import cached_property
|
|
72
72
|
from abc import ABC
|
|
73
|
-
|
|
73
|
+
from collections import defaultdict
|
|
74
74
|
|
|
75
75
|
LOGGER_PREFIX = "[MODELS]"
|
|
76
76
|
|
|
@@ -154,7 +154,10 @@ class ConceptArgs(ABC):
|
|
|
154
154
|
class SelectContext(ABC):
|
|
155
155
|
|
|
156
156
|
def with_select_context(
|
|
157
|
-
self,
|
|
157
|
+
self,
|
|
158
|
+
grain: Grain,
|
|
159
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
160
|
+
environment: Environment | None = None,
|
|
158
161
|
):
|
|
159
162
|
raise NotImplementedError
|
|
160
163
|
|
|
@@ -166,6 +169,7 @@ class ConstantInlineable(ABC):
|
|
|
166
169
|
|
|
167
170
|
class SelectTypeMixin(BaseModel):
|
|
168
171
|
where_clause: Union["WhereClause", None] = Field(default=None)
|
|
172
|
+
having_clause: Union["HavingClause", None] = Field(default=None)
|
|
169
173
|
|
|
170
174
|
@property
|
|
171
175
|
def output_components(self) -> List[Concept]:
|
|
@@ -595,13 +599,16 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
595
599
|
self,
|
|
596
600
|
grain: Optional["Grain"] = None,
|
|
597
601
|
conditional: Conditional | Comparison | Parenthetical | None = None,
|
|
602
|
+
environment: Environment | None = None,
|
|
598
603
|
) -> "Concept":
|
|
599
604
|
if not all([isinstance(x, Concept) for x in self.keys or []]):
|
|
600
605
|
raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
|
|
601
606
|
new_grain = grain or self.grain
|
|
602
607
|
new_lineage = self.lineage
|
|
603
608
|
if isinstance(self.lineage, SelectContext):
|
|
604
|
-
new_lineage = self.lineage.with_select_context(
|
|
609
|
+
new_lineage = self.lineage.with_select_context(
|
|
610
|
+
new_grain, conditional, environment=environment
|
|
611
|
+
)
|
|
605
612
|
return self.__class__(
|
|
606
613
|
name=self.name,
|
|
607
614
|
datatype=self.datatype,
|
|
@@ -788,7 +795,9 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
788
795
|
return Granularity.MULTI_ROW
|
|
789
796
|
|
|
790
797
|
def with_filter(
|
|
791
|
-
self,
|
|
798
|
+
self,
|
|
799
|
+
condition: "Conditional | Comparison | Parenthetical",
|
|
800
|
+
environment: Environment | None = None,
|
|
792
801
|
) -> "Concept":
|
|
793
802
|
from trilogy.utility import string_to_hash
|
|
794
803
|
|
|
@@ -805,12 +814,15 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
805
814
|
modifiers=self.modifiers,
|
|
806
815
|
pseudonyms=self.pseudonyms,
|
|
807
816
|
)
|
|
817
|
+
if environment:
|
|
818
|
+
environment.add_concept(new)
|
|
808
819
|
return new
|
|
809
820
|
|
|
810
821
|
|
|
811
822
|
class Grain(Mergeable, BaseModel):
|
|
812
823
|
nested: bool = False
|
|
813
824
|
components: List[Concept] = Field(default_factory=list, validate_default=True)
|
|
825
|
+
where_clause: Optional[WhereClause] = Field(default=None)
|
|
814
826
|
|
|
815
827
|
@field_validator("components")
|
|
816
828
|
def component_validator(cls, v, info: ValidationInfo):
|
|
@@ -836,10 +848,12 @@ class Grain(Mergeable, BaseModel):
|
|
|
836
848
|
|
|
837
849
|
def __str__(self):
|
|
838
850
|
if self.abstract:
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
)
|
|
842
|
-
|
|
851
|
+
base = "Grain<Abstract>"
|
|
852
|
+
else:
|
|
853
|
+
base = "Grain<" + ",".join([c.address for c in self.components]) + ">"
|
|
854
|
+
if self.where_clause:
|
|
855
|
+
base += f"|{str(self.where_clause)}"
|
|
856
|
+
return base
|
|
843
857
|
|
|
844
858
|
def with_namespace(self, namespace: str) -> "Grain":
|
|
845
859
|
return Grain(
|
|
@@ -1046,12 +1060,15 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
1046
1060
|
return self.output_datatype
|
|
1047
1061
|
|
|
1048
1062
|
def with_select_context(
|
|
1049
|
-
self,
|
|
1063
|
+
self,
|
|
1064
|
+
grain: Grain,
|
|
1065
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
1066
|
+
environment: Environment | None = None,
|
|
1050
1067
|
) -> Function:
|
|
1051
1068
|
if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value and conditional:
|
|
1052
1069
|
base = [
|
|
1053
1070
|
(
|
|
1054
|
-
c.with_select_context(grain, conditional)
|
|
1071
|
+
c.with_select_context(grain, conditional, environment)
|
|
1055
1072
|
if isinstance(
|
|
1056
1073
|
c,
|
|
1057
1074
|
SelectContext,
|
|
@@ -1061,7 +1078,7 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
1061
1078
|
for c in self.arguments
|
|
1062
1079
|
]
|
|
1063
1080
|
final = [
|
|
1064
|
-
c.with_filter(conditional) if isinstance(c, Concept) else c
|
|
1081
|
+
c.with_filter(conditional, environment) if isinstance(c, Concept) else c
|
|
1065
1082
|
for c in base
|
|
1066
1083
|
]
|
|
1067
1084
|
return Function(
|
|
@@ -1077,7 +1094,7 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
1077
1094
|
operator=self.operator,
|
|
1078
1095
|
arguments=[
|
|
1079
1096
|
(
|
|
1080
|
-
c.with_select_context(grain, conditional)
|
|
1097
|
+
c.with_select_context(grain, conditional, environment)
|
|
1081
1098
|
if isinstance(
|
|
1082
1099
|
c,
|
|
1083
1100
|
SelectContext,
|
|
@@ -1293,13 +1310,22 @@ class WindowItem(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
1293
1310
|
)
|
|
1294
1311
|
|
|
1295
1312
|
def with_select_context(
|
|
1296
|
-
self,
|
|
1313
|
+
self,
|
|
1314
|
+
grain: Grain,
|
|
1315
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
1316
|
+
environment: Environment | None = None,
|
|
1297
1317
|
) -> "WindowItem":
|
|
1298
1318
|
return WindowItem(
|
|
1299
1319
|
type=self.type,
|
|
1300
|
-
content=self.content.with_select_context(grain, conditional),
|
|
1301
|
-
over=[
|
|
1302
|
-
|
|
1320
|
+
content=self.content.with_select_context(grain, conditional, environment),
|
|
1321
|
+
over=[
|
|
1322
|
+
x.with_select_context(grain, conditional, environment)
|
|
1323
|
+
for x in self.over
|
|
1324
|
+
],
|
|
1325
|
+
order_by=[
|
|
1326
|
+
x.with_select_context(grain, conditional, environment)
|
|
1327
|
+
for x in self.order_by
|
|
1328
|
+
],
|
|
1303
1329
|
)
|
|
1304
1330
|
|
|
1305
1331
|
@property
|
|
@@ -1368,11 +1394,14 @@ class FilterItem(Namespaced, SelectContext, BaseModel):
|
|
|
1368
1394
|
)
|
|
1369
1395
|
|
|
1370
1396
|
def with_select_context(
|
|
1371
|
-
self,
|
|
1397
|
+
self,
|
|
1398
|
+
grain: Grain,
|
|
1399
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
1400
|
+
environment: Environment | None = None,
|
|
1372
1401
|
) -> FilterItem:
|
|
1373
1402
|
return FilterItem(
|
|
1374
|
-
content=self.content.with_select_context(grain, conditional),
|
|
1375
|
-
where=self.where.with_select_context(grain, conditional),
|
|
1403
|
+
content=self.content.with_select_context(grain, conditional, environment),
|
|
1404
|
+
where=self.where.with_select_context(grain, conditional, environment),
|
|
1376
1405
|
)
|
|
1377
1406
|
|
|
1378
1407
|
@property
|
|
@@ -1452,9 +1481,17 @@ class OrderItem(Mergeable, SelectContext, Namespaced, BaseModel):
|
|
|
1452
1481
|
return OrderItem(expr=self.expr.with_namespace(namespace), order=self.order)
|
|
1453
1482
|
|
|
1454
1483
|
def with_select_context(
|
|
1455
|
-
self,
|
|
1484
|
+
self,
|
|
1485
|
+
grain: Grain,
|
|
1486
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
1487
|
+
environment: Environment | None = None,
|
|
1456
1488
|
) -> "OrderItem":
|
|
1457
|
-
return OrderItem(
|
|
1489
|
+
return OrderItem(
|
|
1490
|
+
expr=self.expr.with_select_context(
|
|
1491
|
+
grain, conditional=conditional, environment=environment
|
|
1492
|
+
),
|
|
1493
|
+
order=self.order,
|
|
1494
|
+
)
|
|
1458
1495
|
|
|
1459
1496
|
def with_merge(
|
|
1460
1497
|
self, source: Concept, target: Concept, modifiers: List[Modifier]
|
|
@@ -1643,7 +1680,9 @@ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
|
1643
1680
|
)
|
|
1644
1681
|
):
|
|
1645
1682
|
output.append(item)
|
|
1646
|
-
return Grain(
|
|
1683
|
+
return Grain(
|
|
1684
|
+
components=unique(output, "address"), where_clause=self.where_clause
|
|
1685
|
+
)
|
|
1647
1686
|
|
|
1648
1687
|
def with_namespace(self, namespace: str) -> "SelectStatement":
|
|
1649
1688
|
return SelectStatement(
|
|
@@ -1988,7 +2027,7 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1988
2027
|
return self.__repr__()
|
|
1989
2028
|
|
|
1990
2029
|
def __hash__(self):
|
|
1991
|
-
return
|
|
2030
|
+
return self.full_name.__hash__()
|
|
1992
2031
|
|
|
1993
2032
|
def with_namespace(self, namespace: str):
|
|
1994
2033
|
new_namespace = (
|
|
@@ -2173,9 +2212,9 @@ class BaseJoin(BaseModel):
|
|
|
2173
2212
|
class QueryDatasource(BaseModel):
|
|
2174
2213
|
input_concepts: List[Concept]
|
|
2175
2214
|
output_concepts: List[Concept]
|
|
2215
|
+
datasources: List[Union[Datasource, "QueryDatasource"]]
|
|
2176
2216
|
source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
|
|
2177
2217
|
|
|
2178
|
-
datasources: List[Union[Datasource, "QueryDatasource"]]
|
|
2179
2218
|
grain: Grain
|
|
2180
2219
|
joins: List[BaseJoin | UnnestJoin]
|
|
2181
2220
|
limit: Optional[int] = None
|
|
@@ -2227,19 +2266,16 @@ class QueryDatasource(BaseModel):
|
|
|
2227
2266
|
|
|
2228
2267
|
@field_validator("source_map")
|
|
2229
2268
|
@classmethod
|
|
2230
|
-
def validate_source_map(cls, v, info: ValidationInfo):
|
|
2269
|
+
def validate_source_map(cls, v: dict, info: ValidationInfo):
|
|
2231
2270
|
values = info.data
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
raise SyntaxError(
|
|
2241
|
-
f"source map missing {x} on (expected {expected}, have {seen})"
|
|
2242
|
-
)
|
|
2271
|
+
for key in ("input_concepts", "output_concepts"):
|
|
2272
|
+
if not values.get(key):
|
|
2273
|
+
continue
|
|
2274
|
+
for concept in values[key]:
|
|
2275
|
+
if concept.address not in v and CONFIG.validate_missing:
|
|
2276
|
+
raise SyntaxError(
|
|
2277
|
+
f"Missing source map for {concept.address} on {key}, have {v}"
|
|
2278
|
+
)
|
|
2243
2279
|
return v
|
|
2244
2280
|
|
|
2245
2281
|
def __str__(self):
|
|
@@ -2308,11 +2344,23 @@ class QueryDatasource(BaseModel):
|
|
|
2308
2344
|
)
|
|
2309
2345
|
|
|
2310
2346
|
merged_datasources = {}
|
|
2347
|
+
|
|
2311
2348
|
for ds in [*self.datasources, *other.datasources]:
|
|
2312
2349
|
if ds.full_name in merged_datasources:
|
|
2313
2350
|
merged_datasources[ds.full_name] = merged_datasources[ds.full_name] + ds
|
|
2314
2351
|
else:
|
|
2315
2352
|
merged_datasources[ds.full_name] = ds
|
|
2353
|
+
|
|
2354
|
+
final_source_map = defaultdict(set)
|
|
2355
|
+
for key in self.source_map:
|
|
2356
|
+
final_source_map[key] = self.source_map[key].union(
|
|
2357
|
+
other.source_map.get(key, set())
|
|
2358
|
+
)
|
|
2359
|
+
for key in other.source_map:
|
|
2360
|
+
if key not in final_source_map:
|
|
2361
|
+
final_source_map[key] = other.source_map[key]
|
|
2362
|
+
for k, v in final_source_map.items():
|
|
2363
|
+
final_source_map[k] = set(merged_datasources[x.full_name] for x in list(v))
|
|
2316
2364
|
qds = QueryDatasource(
|
|
2317
2365
|
input_concepts=unique(
|
|
2318
2366
|
self.input_concepts + other.input_concepts, "address"
|
|
@@ -2320,7 +2368,7 @@ class QueryDatasource(BaseModel):
|
|
|
2320
2368
|
output_concepts=unique(
|
|
2321
2369
|
self.output_concepts + other.output_concepts, "address"
|
|
2322
2370
|
),
|
|
2323
|
-
source_map=
|
|
2371
|
+
source_map=final_source_map,
|
|
2324
2372
|
datasources=list(merged_datasources.values()),
|
|
2325
2373
|
grain=self.grain,
|
|
2326
2374
|
joins=unique(self.joins + other.joins, "unique_id"),
|
|
@@ -2849,6 +2897,7 @@ class UndefinedConcept(Concept, Mergeable, Namespaced):
|
|
|
2849
2897
|
self,
|
|
2850
2898
|
grain: Optional["Grain"] = None,
|
|
2851
2899
|
conditional: Conditional | Comparison | Parenthetical | None = None,
|
|
2900
|
+
environment: Environment | None = None,
|
|
2852
2901
|
) -> "UndefinedConcept":
|
|
2853
2902
|
if not all([isinstance(x, Concept) for x in self.keys or []]):
|
|
2854
2903
|
raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
|
|
@@ -2856,7 +2905,9 @@ class UndefinedConcept(Concept, Mergeable, Namespaced):
|
|
|
2856
2905
|
if self.lineage:
|
|
2857
2906
|
new_lineage = self.lineage
|
|
2858
2907
|
if isinstance(self.lineage, SelectContext):
|
|
2859
|
-
new_lineage = self.lineage.with_select_context(
|
|
2908
|
+
new_lineage = self.lineage.with_select_context(
|
|
2909
|
+
new_grain, conditional, environment
|
|
2910
|
+
)
|
|
2860
2911
|
else:
|
|
2861
2912
|
new_lineage = None
|
|
2862
2913
|
return self.__class__(
|
|
@@ -3299,7 +3350,9 @@ class LazyEnvironment(Environment):
|
|
|
3299
3350
|
) or name.startswith("_"):
|
|
3300
3351
|
return super().__getattribute__(name)
|
|
3301
3352
|
if not self.loaded:
|
|
3302
|
-
|
|
3353
|
+
logger.info(
|
|
3354
|
+
f"lazily evaluating load path {self.load_path} to access {name}"
|
|
3355
|
+
)
|
|
3303
3356
|
from trilogy import parse
|
|
3304
3357
|
|
|
3305
3358
|
env = Environment(working_path=str(self.working_path))
|
|
@@ -3441,16 +3494,23 @@ class Comparison(
|
|
|
3441
3494
|
)
|
|
3442
3495
|
|
|
3443
3496
|
def with_select_context(
|
|
3444
|
-
self,
|
|
3497
|
+
self,
|
|
3498
|
+
grain: Grain,
|
|
3499
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
3500
|
+
environment: Environment | None = None,
|
|
3445
3501
|
):
|
|
3446
3502
|
return self.__class__(
|
|
3447
3503
|
left=(
|
|
3448
|
-
self.left.with_select_context(grain, conditional)
|
|
3504
|
+
self.left.with_select_context(grain, conditional, environment)
|
|
3449
3505
|
if isinstance(self.left, SelectContext)
|
|
3450
3506
|
else self.left
|
|
3451
3507
|
),
|
|
3452
3508
|
# the right side does NOT need to inherit select grain
|
|
3453
|
-
right=
|
|
3509
|
+
right=(
|
|
3510
|
+
self.right.with_select_context(grain, conditional, environment)
|
|
3511
|
+
if isinstance(self.right, SelectContext)
|
|
3512
|
+
else self.right
|
|
3513
|
+
),
|
|
3454
3514
|
operator=self.operator,
|
|
3455
3515
|
)
|
|
3456
3516
|
|
|
@@ -3534,12 +3594,15 @@ class SubselectComparison(Comparison):
|
|
|
3534
3594
|
return [tuple(get_concept_arguments(self.right))]
|
|
3535
3595
|
|
|
3536
3596
|
def with_select_context(
|
|
3537
|
-
self,
|
|
3597
|
+
self,
|
|
3598
|
+
grain: Grain,
|
|
3599
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
3600
|
+
environment: Environment | None = None,
|
|
3538
3601
|
):
|
|
3539
|
-
# there's no need to pass the select grain through to a subselect comparison
|
|
3602
|
+
# there's no need to pass the select grain through to a subselect comparison on the right
|
|
3540
3603
|
return self.__class__(
|
|
3541
3604
|
left=(
|
|
3542
|
-
self.left.with_select_context(grain, conditional)
|
|
3605
|
+
self.left.with_select_context(grain, conditional, environment)
|
|
3543
3606
|
if isinstance(self.left, SelectContext)
|
|
3544
3607
|
else self.left
|
|
3545
3608
|
),
|
|
@@ -3570,12 +3633,17 @@ class CaseWhen(Namespaced, SelectContext, BaseModel):
|
|
|
3570
3633
|
)
|
|
3571
3634
|
|
|
3572
3635
|
def with_select_context(
|
|
3573
|
-
self,
|
|
3636
|
+
self,
|
|
3637
|
+
grain: Grain,
|
|
3638
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
3639
|
+
environment: Environment | None = None,
|
|
3574
3640
|
) -> CaseWhen:
|
|
3575
3641
|
return CaseWhen(
|
|
3576
|
-
comparison=self.comparison.with_select_context(
|
|
3642
|
+
comparison=self.comparison.with_select_context(
|
|
3643
|
+
grain, conditional, environment
|
|
3644
|
+
),
|
|
3577
3645
|
expr=(
|
|
3578
|
-
(self.expr.with_select_context(grain, conditional))
|
|
3646
|
+
(self.expr.with_select_context(grain, conditional, environment))
|
|
3579
3647
|
if isinstance(self.expr, SelectContext)
|
|
3580
3648
|
else self.expr
|
|
3581
3649
|
),
|
|
@@ -3592,12 +3660,15 @@ class CaseElse(Namespaced, SelectContext, BaseModel):
|
|
|
3592
3660
|
return get_concept_arguments(self.expr)
|
|
3593
3661
|
|
|
3594
3662
|
def with_select_context(
|
|
3595
|
-
self,
|
|
3663
|
+
self,
|
|
3664
|
+
grain: Grain,
|
|
3665
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
3666
|
+
environment: Environment | None = None,
|
|
3596
3667
|
) -> CaseElse:
|
|
3597
3668
|
return CaseElse(
|
|
3598
3669
|
discriminant=self.discriminant,
|
|
3599
3670
|
expr=(
|
|
3600
|
-
self.expr.with_select_context(grain, conditional)
|
|
3671
|
+
self.expr.with_select_context(grain, conditional, environment)
|
|
3601
3672
|
if isinstance(
|
|
3602
3673
|
self.expr,
|
|
3603
3674
|
SelectContext,
|
|
@@ -3737,16 +3808,19 @@ class Conditional(
|
|
|
3737
3808
|
)
|
|
3738
3809
|
|
|
3739
3810
|
def with_select_context(
|
|
3740
|
-
self,
|
|
3811
|
+
self,
|
|
3812
|
+
grain: Grain,
|
|
3813
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
3814
|
+
environment: Environment | None = None,
|
|
3741
3815
|
):
|
|
3742
3816
|
return Conditional(
|
|
3743
3817
|
left=(
|
|
3744
|
-
self.left.with_select_context(grain, conditional)
|
|
3818
|
+
self.left.with_select_context(grain, conditional, environment)
|
|
3745
3819
|
if isinstance(self.left, SelectContext)
|
|
3746
3820
|
else self.left
|
|
3747
3821
|
),
|
|
3748
3822
|
right=(
|
|
3749
|
-
self.right.with_select_context(grain, conditional)
|
|
3823
|
+
self.right.with_select_context(grain, conditional, environment)
|
|
3750
3824
|
if isinstance(self.right, SelectContext)
|
|
3751
3825
|
else self.right
|
|
3752
3826
|
),
|
|
@@ -3855,13 +3929,16 @@ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
3855
3929
|
)
|
|
3856
3930
|
|
|
3857
3931
|
def with_select_context(
|
|
3858
|
-
self,
|
|
3932
|
+
self,
|
|
3933
|
+
grain: Grain,
|
|
3934
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
3935
|
+
environment: Environment | None = None,
|
|
3859
3936
|
) -> AggregateWrapper:
|
|
3860
3937
|
if not self.by:
|
|
3861
3938
|
by = grain.components_copy
|
|
3862
3939
|
else:
|
|
3863
3940
|
by = self.by
|
|
3864
|
-
parent = self.function.with_select_context(grain, conditional)
|
|
3941
|
+
parent = self.function.with_select_context(grain, conditional, environment)
|
|
3865
3942
|
return AggregateWrapper(function=parent, by=by)
|
|
3866
3943
|
|
|
3867
3944
|
|
|
@@ -3893,10 +3970,15 @@ class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
|
|
|
3893
3970
|
return WhereClause(conditional=self.conditional.with_namespace(namespace))
|
|
3894
3971
|
|
|
3895
3972
|
def with_select_context(
|
|
3896
|
-
self,
|
|
3973
|
+
self,
|
|
3974
|
+
grain: Grain,
|
|
3975
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
3976
|
+
environment: Environment | None = None,
|
|
3897
3977
|
) -> WhereClause:
|
|
3898
3978
|
return WhereClause(
|
|
3899
|
-
conditional=self.conditional.with_select_context(
|
|
3979
|
+
conditional=self.conditional.with_select_context(
|
|
3980
|
+
grain, conditional, environment
|
|
3981
|
+
)
|
|
3900
3982
|
)
|
|
3901
3983
|
|
|
3902
3984
|
@property
|
|
@@ -3909,6 +3991,22 @@ class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
|
|
|
3909
3991
|
output += item.grain.components if item.grain else []
|
|
3910
3992
|
return Grain(components=list(set(output)))
|
|
3911
3993
|
|
|
3994
|
+
@property
|
|
3995
|
+
def components(self):
|
|
3996
|
+
from trilogy.core.processing.utility import decompose_condition
|
|
3997
|
+
|
|
3998
|
+
return decompose_condition(self.conditional)
|
|
3999
|
+
|
|
4000
|
+
@property
|
|
4001
|
+
def is_scalar(self):
|
|
4002
|
+
from trilogy.core.processing.utility import is_scalar_condition
|
|
4003
|
+
|
|
4004
|
+
return is_scalar_condition(self.conditional)
|
|
4005
|
+
|
|
4006
|
+
|
|
4007
|
+
class HavingClause(WhereClause):
|
|
4008
|
+
pass
|
|
4009
|
+
|
|
3912
4010
|
|
|
3913
4011
|
class MaterializedDataset(BaseModel):
|
|
3914
4012
|
address: Address
|
|
@@ -3928,6 +4026,7 @@ class ProcessedQuery(BaseModel):
|
|
|
3928
4026
|
hidden_columns: List[Concept] = Field(default_factory=list)
|
|
3929
4027
|
limit: Optional[int] = None
|
|
3930
4028
|
where_clause: Optional[WhereClause] = None
|
|
4029
|
+
having_clause: Optional[HavingClause] = None
|
|
3931
4030
|
order_by: Optional[OrderBy] = None
|
|
3932
4031
|
|
|
3933
4032
|
|
|
@@ -4130,11 +4229,14 @@ class Parenthetical(
|
|
|
4130
4229
|
)
|
|
4131
4230
|
|
|
4132
4231
|
def with_select_context(
|
|
4133
|
-
self,
|
|
4232
|
+
self,
|
|
4233
|
+
grain: Grain,
|
|
4234
|
+
conditional: Conditional | Comparison | Parenthetical | None,
|
|
4235
|
+
environment: Environment | None = None,
|
|
4134
4236
|
):
|
|
4135
4237
|
return Parenthetical(
|
|
4136
4238
|
content=(
|
|
4137
|
-
self.content.with_select_context(grain, conditional)
|
|
4239
|
+
self.content.with_select_context(grain, conditional, environment)
|
|
4138
4240
|
if isinstance(self.content, SelectContext)
|
|
4139
4241
|
else self.content
|
|
4140
4242
|
)
|
|
@@ -17,19 +17,58 @@ from trilogy.core.optimizations import (
|
|
|
17
17
|
MAX_OPTIMIZATION_LOOPS = 100
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
# other optimizations may make a CTE a pure passthrough
|
|
21
|
+
# remove those
|
|
22
|
+
# def is_locally_irrelevant(cte: CTE) -> CTE | bool:
|
|
23
|
+
# if not len(cte.parent_ctes) == 1:
|
|
24
|
+
# return False
|
|
25
|
+
# parent = cte.parent_ctes[0]
|
|
26
|
+
# if not parent.output_columns == cte.output_columns:
|
|
27
|
+
# return False
|
|
28
|
+
# if cte.condition is not None:
|
|
29
|
+
# return False
|
|
30
|
+
# if cte.group_to_grain:
|
|
31
|
+
# return False
|
|
32
|
+
# if len(cte.joins)>1:
|
|
33
|
+
# return False
|
|
34
|
+
# return parent
|
|
35
|
+
|
|
36
|
+
|
|
20
37
|
def filter_irrelevant_ctes(
|
|
21
38
|
input: list[CTE],
|
|
22
39
|
root_cte: CTE,
|
|
23
40
|
):
|
|
24
41
|
relevant_ctes = set()
|
|
25
42
|
|
|
26
|
-
def recurse(cte: CTE):
|
|
43
|
+
def recurse(cte: CTE, inverse_map: dict[str, list[CTE]]):
|
|
44
|
+
# TODO: revisit this
|
|
45
|
+
# if parent := is_locally_irrelevant(cte):
|
|
46
|
+
# logger.info(
|
|
47
|
+
# f"[Optimization][Irrelevent CTE filtering] Removing redundant CTE {cte.name} and replacing with {parent.name}"
|
|
48
|
+
# )
|
|
49
|
+
# for child in inverse_map.get(cte.name, []):
|
|
50
|
+
# child.parent_ctes = [
|
|
51
|
+
# x for x in child.parent_ctes if x.name != cte.name
|
|
52
|
+
# ] + [parent]
|
|
53
|
+
# for x in child.source_map:
|
|
54
|
+
# if cte.name in child.source_map[x]:
|
|
55
|
+
# child.source_map[x].remove(cte.name)
|
|
56
|
+
# child.source_map[x].append(parent.name)
|
|
57
|
+
# for x2 in child.existence_source_map:
|
|
58
|
+
# if cte.name in child.existence_source_map[x2]:
|
|
59
|
+
# child.existence_source_map[x2].remove(cte.name)
|
|
60
|
+
# child.existence_source_map[x2].append(parent.name)
|
|
61
|
+
# else:
|
|
27
62
|
relevant_ctes.add(cte.name)
|
|
28
63
|
for cte in cte.parent_ctes:
|
|
29
|
-
recurse(cte)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
64
|
+
recurse(cte, inverse_map)
|
|
65
|
+
|
|
66
|
+
inverse_map = gen_inverse_map(input)
|
|
67
|
+
recurse(root_cte, inverse_map)
|
|
68
|
+
final = [cte for cte in input if cte.name in relevant_ctes]
|
|
69
|
+
if len(final) == len(input):
|
|
70
|
+
return input
|
|
71
|
+
return filter_irrelevant_ctes(final, root_cte)
|
|
33
72
|
|
|
34
73
|
|
|
35
74
|
def gen_inverse_map(input: list[CTE]) -> dict[str, list[CTE]]:
|
|
@@ -5,6 +5,7 @@ from trilogy.core.models import (
|
|
|
5
5
|
|
|
6
6
|
from trilogy.core.optimizations.base_optimization import OptimizationRule
|
|
7
7
|
from collections import defaultdict
|
|
8
|
+
from trilogy.constants import CONFIG
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class InlineDatasource(OptimizationRule):
|
|
@@ -18,28 +19,28 @@ class InlineDatasource(OptimizationRule):
|
|
|
18
19
|
if not cte.parent_ctes:
|
|
19
20
|
return False
|
|
20
21
|
|
|
21
|
-
self.
|
|
22
|
+
self.debug(
|
|
22
23
|
f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
|
|
23
24
|
)
|
|
24
25
|
to_inline: list[CTE] = []
|
|
25
26
|
force_group = False
|
|
26
27
|
for parent_cte in cte.parent_ctes:
|
|
27
28
|
if not parent_cte.is_root_datasource:
|
|
28
|
-
self.
|
|
29
|
+
self.debug(f"parent {parent_cte.name} is not root")
|
|
29
30
|
continue
|
|
30
31
|
if parent_cte.parent_ctes:
|
|
31
|
-
self.
|
|
32
|
+
self.debug(f"parent {parent_cte.name} has parents")
|
|
32
33
|
continue
|
|
33
34
|
if parent_cte.condition:
|
|
34
|
-
self.
|
|
35
|
+
self.debug(f"parent {parent_cte.name} has condition, cannot be inlined")
|
|
35
36
|
continue
|
|
36
37
|
raw_root = parent_cte.source.datasources[0]
|
|
37
38
|
if not isinstance(raw_root, Datasource):
|
|
38
|
-
self.
|
|
39
|
+
self.debug(f"Parent {parent_cte.name} is not datasource")
|
|
39
40
|
continue
|
|
40
41
|
root: Datasource = raw_root
|
|
41
42
|
if not root.can_be_inlined:
|
|
42
|
-
self.
|
|
43
|
+
self.debug(f"Parent {parent_cte.name} datasource is not inlineable")
|
|
43
44
|
continue
|
|
44
45
|
root_outputs = {x.address for x in root.output_concepts}
|
|
45
46
|
inherited = {
|
|
@@ -52,7 +53,9 @@ class InlineDatasource(OptimizationRule):
|
|
|
52
53
|
)
|
|
53
54
|
continue
|
|
54
55
|
if not root.grain.issubset(parent_cte.grain):
|
|
55
|
-
self.log(
|
|
56
|
+
self.log(
|
|
57
|
+
f"{parent_cte.name} is at wrong grain to inline ({root.grain} vs {parent_cte.grain})"
|
|
58
|
+
)
|
|
56
59
|
continue
|
|
57
60
|
to_inline.append(parent_cte)
|
|
58
61
|
|
|
@@ -62,7 +65,10 @@ class InlineDatasource(OptimizationRule):
|
|
|
62
65
|
self.candidates[cte.name].add(replaceable.name)
|
|
63
66
|
self.count[replaceable.source.name] += 1
|
|
64
67
|
return True
|
|
65
|
-
if
|
|
68
|
+
if (
|
|
69
|
+
self.count[replaceable.source.name]
|
|
70
|
+
> CONFIG.optimizations.constant_inline_cutoff
|
|
71
|
+
):
|
|
66
72
|
self.log(
|
|
67
73
|
f"Skipping inlining raw datasource {replaceable.source.name} ({replaceable.name}) due to multiple references"
|
|
68
74
|
)
|