pytrilogy 0.0.1.110__tar.gz → 0.0.1.111__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.110/pytrilogy.egg-info → pytrilogy-0.0.1.111}/PKG-INFO +1 -1
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111/pytrilogy.egg-info}/PKG-INFO +1 -1
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_models.py +2 -2
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_parsing.py +35 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/__init__.py +1 -1
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/constants.py +1 -1
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/models.py +85 -67
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/optimization.py +23 -8
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/concept_strategies_v3.py +44 -19
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/basic_node.py +2 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/common.py +3 -1
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/filter_node.py +36 -6
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/node_merge_node.py +34 -23
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/rowset_node.py +30 -6
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/select_node.py +23 -9
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/unnest_node.py +24 -3
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/window_node.py +4 -2
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/__init__.py +7 -6
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/base_node.py +40 -6
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/filter_node.py +15 -1
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/group_node.py +20 -1
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/merge_node.py +36 -7
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/select_node_v2.py +34 -39
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/unnest_node.py +12 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/window_node.py +11 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/utility.py +0 -14
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/query_processor.py +125 -29
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/base.py +45 -40
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/executor.py +31 -3
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/parse_engine.py +49 -17
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/README.md +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pyproject.toml +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/SOURCES.txt +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/dependency_links.txt +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/entry_points.txt +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/requires.txt +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/top_level.txt +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/setup.cfg +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/setup.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_declarations.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_derived_concepts.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_discovery_nodes.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_environment.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_functions.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_imports.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_metadata.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_multi_join_assignments.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_partial_handling.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_query_processing.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_select.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_statements.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_undefined_concept.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/tests/test_where_clause.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/compiler.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/__init__.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/constants.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/enums.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/env_processor.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/environment_helpers.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/ergonomics.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/exceptions.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/functions.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/graph_models.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/internal.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/__init__.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/graph_utils.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/__init__.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/group_node.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/__init__.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/bigquery.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/common.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/config.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/duckdb.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/enums.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/postgres.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/presto.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/snowflake.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/dialect/sql_server.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/engine.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/hooks/__init__.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/hooks/base_hook.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/hooks/graph_hook.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/hooks/query_debugger.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/metadata/__init__.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parser.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/__init__.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/common.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/config.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/exceptions.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/helpers.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/parsing/render.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/py.typed +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/scripts/__init__.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/scripts/trilogy.py +0 -0
- {pytrilogy-0.0.1.110 → pytrilogy-0.0.1.111}/trilogy/utility.py +0 -0
|
@@ -34,7 +34,7 @@ def test_cte_merge(test_environment, test_environment_graph):
|
|
|
34
34
|
joins=[],
|
|
35
35
|
source_map={outputs[0].address: {datasource}},
|
|
36
36
|
),
|
|
37
|
-
source_map={c.address: datasource.identifier for c in outputs},
|
|
37
|
+
source_map={c.address: [datasource.identifier] for c in outputs},
|
|
38
38
|
)
|
|
39
39
|
b = CTE(
|
|
40
40
|
name="testb",
|
|
@@ -48,7 +48,7 @@ def test_cte_merge(test_environment, test_environment_graph):
|
|
|
48
48
|
joins=[],
|
|
49
49
|
source_map=output_map,
|
|
50
50
|
),
|
|
51
|
-
source_map={c.address: datasource.identifier for c in outputs},
|
|
51
|
+
source_map={c.address: [datasource.identifier] for c in outputs},
|
|
52
52
|
)
|
|
53
53
|
|
|
54
54
|
merged = a + b
|
|
@@ -30,6 +30,19 @@ def test_in():
|
|
|
30
30
|
rendered = BaseDialect().render_expr(right)
|
|
31
31
|
assert rendered.strip() == "( 1,2,3 )".strip()
|
|
32
32
|
|
|
33
|
+
_, parsed = parse_text(
|
|
34
|
+
"const order_id <- 3; SELECT order_id WHERE order_id IN (1);"
|
|
35
|
+
)
|
|
36
|
+
query = parsed[-1]
|
|
37
|
+
right = query.where_clause.conditional.right
|
|
38
|
+
assert isinstance(
|
|
39
|
+
right,
|
|
40
|
+
Parenthetical,
|
|
41
|
+
), type(right)
|
|
42
|
+
assert right.content == 1
|
|
43
|
+
rendered = BaseDialect().render_expr(right)
|
|
44
|
+
assert rendered.strip() == "( 1 )".strip()
|
|
45
|
+
|
|
33
46
|
|
|
34
47
|
def test_not_in():
|
|
35
48
|
_, parsed = parse_text(
|
|
@@ -160,6 +173,28 @@ select
|
|
|
160
173
|
assert env.concepts[name].keys == (env.concepts["id"],)
|
|
161
174
|
|
|
162
175
|
|
|
176
|
+
def test_purpose_and_derivation():
|
|
177
|
+
env, parsed = parse_text(
|
|
178
|
+
"""key id int;
|
|
179
|
+
key other_id int;
|
|
180
|
+
property <id, other_id>.join_id <- id*10+other_id;
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
select
|
|
184
|
+
join_id
|
|
185
|
+
;
|
|
186
|
+
"""
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
for name in ["join_id"]:
|
|
190
|
+
assert name in env.concepts
|
|
191
|
+
assert env.concepts[name].purpose == Purpose.PROPERTY
|
|
192
|
+
assert env.concepts[name].keys == (
|
|
193
|
+
env.concepts["id"],
|
|
194
|
+
env.concepts["other_id"],
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
163
198
|
def test_output_purpose():
|
|
164
199
|
|
|
165
200
|
env, parsed = parse_text(
|
|
@@ -33,7 +33,13 @@ from pydantic import (
|
|
|
33
33
|
)
|
|
34
34
|
from lark.tree import Meta
|
|
35
35
|
from pathlib import Path
|
|
36
|
-
from trilogy.constants import
|
|
36
|
+
from trilogy.constants import (
|
|
37
|
+
logger,
|
|
38
|
+
DEFAULT_NAMESPACE,
|
|
39
|
+
ENV_CACHE_NAME,
|
|
40
|
+
MagicConstants,
|
|
41
|
+
CONFIG,
|
|
42
|
+
)
|
|
37
43
|
from trilogy.core.constants import (
|
|
38
44
|
ALL_ROWS_CONCEPT,
|
|
39
45
|
INTERNAL_NAMESPACE,
|
|
@@ -61,7 +67,6 @@ from trilogy.core.enums import (
|
|
|
61
67
|
from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
|
|
62
68
|
from trilogy.utility import unique
|
|
63
69
|
from collections import UserList
|
|
64
|
-
from trilogy.utility import string_to_hash
|
|
65
70
|
from functools import cached_property
|
|
66
71
|
from abc import ABC
|
|
67
72
|
|
|
@@ -129,7 +134,7 @@ class ConceptArgs(ABC):
|
|
|
129
134
|
raise NotImplementedError
|
|
130
135
|
|
|
131
136
|
@property
|
|
132
|
-
def existence_arguments(self) ->
|
|
137
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
133
138
|
return []
|
|
134
139
|
|
|
135
140
|
@property
|
|
@@ -281,9 +286,6 @@ class Concept(Namespaced, SelectGrain, BaseModel):
|
|
|
281
286
|
MultiSelectStatement | MergeStatement,
|
|
282
287
|
]
|
|
283
288
|
] = None
|
|
284
|
-
# lineage: Annotated[Optional[
|
|
285
|
-
# Union[Function, WindowItem, FilterItem, AggregateWrapper]
|
|
286
|
-
# ], WrapValidator(lineage_validator)] = None
|
|
287
289
|
namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
|
|
288
290
|
keys: Optional[Tuple["Concept", ...]] = None
|
|
289
291
|
grain: "Grain" = Field(default=None, validate_default=True)
|
|
@@ -621,6 +623,12 @@ class Grain(BaseModel):
|
|
|
621
623
|
if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
|
|
622
624
|
if all([c in v2 for c in sub.keys]):
|
|
623
625
|
continue
|
|
626
|
+
elif sub.derivation == PurposeLineage.MERGE and isinstance(
|
|
627
|
+
sub.lineage, MergeStatement
|
|
628
|
+
):
|
|
629
|
+
parents = sub.lineage.concepts
|
|
630
|
+
if any([p in v2 for p in parents]):
|
|
631
|
+
continue
|
|
624
632
|
final.append(sub)
|
|
625
633
|
v2 = sorted(final, key=lambda x: x.name)
|
|
626
634
|
return v2
|
|
@@ -966,23 +974,6 @@ class ConceptTransform(Namespaced, BaseModel):
|
|
|
966
974
|
modifiers=self.modifiers,
|
|
967
975
|
)
|
|
968
976
|
|
|
969
|
-
def with_filter(self, where: "WhereClause") -> "ConceptTransform":
|
|
970
|
-
id_hash = string_to_hash(str(where))
|
|
971
|
-
new_parent_concept = Concept(
|
|
972
|
-
name=f"_anon_concept_transform_filter_input_{id_hash}",
|
|
973
|
-
datatype=self.output.datatype,
|
|
974
|
-
purpose=self.output.purpose,
|
|
975
|
-
lineage=self.output.lineage,
|
|
976
|
-
namespace=DEFAULT_NAMESPACE,
|
|
977
|
-
grain=self.output.grain,
|
|
978
|
-
keys=self.output.keys,
|
|
979
|
-
)
|
|
980
|
-
new_parent = FilterItem(content=new_parent_concept, where=where)
|
|
981
|
-
self.output.lineage = new_parent
|
|
982
|
-
return ConceptTransform(
|
|
983
|
-
function=new_parent, output=self.output, modifiers=self.modifiers
|
|
984
|
-
)
|
|
985
|
-
|
|
986
977
|
|
|
987
978
|
class Window(BaseModel):
|
|
988
979
|
count: int
|
|
@@ -1611,13 +1602,15 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1611
1602
|
def __add__(self, other):
|
|
1612
1603
|
if not other == self:
|
|
1613
1604
|
raise ValueError(
|
|
1614
|
-
"Attempted to add two datasources that are not identical, this
|
|
1615
|
-
" never happen"
|
|
1605
|
+
"Attempted to add two datasources that are not identical, this is not a valid operation"
|
|
1616
1606
|
)
|
|
1617
1607
|
return self
|
|
1618
1608
|
|
|
1609
|
+
def __repr__(self):
|
|
1610
|
+
return f"Datasource<{self.namespace}.{self.identifier}@<{self.grain}>"
|
|
1611
|
+
|
|
1619
1612
|
def __str__(self):
|
|
1620
|
-
return
|
|
1613
|
+
return self.__repr__()
|
|
1621
1614
|
|
|
1622
1615
|
def __hash__(self):
|
|
1623
1616
|
return (self.namespace + self.identifier).__hash__()
|
|
@@ -1786,6 +1779,7 @@ class QueryDatasource(BaseModel):
|
|
|
1786
1779
|
input_concepts: List[Concept]
|
|
1787
1780
|
output_concepts: List[Concept]
|
|
1788
1781
|
source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
|
|
1782
|
+
|
|
1789
1783
|
datasources: List[Union[Datasource, "QueryDatasource"]]
|
|
1790
1784
|
grain: Grain
|
|
1791
1785
|
joins: List[BaseJoin | UnnestJoin]
|
|
@@ -1799,6 +1793,12 @@ class QueryDatasource(BaseModel):
|
|
|
1799
1793
|
join_derived_concepts: List[Concept] = Field(default_factory=list)
|
|
1800
1794
|
hidden_concepts: List[Concept] = Field(default_factory=list)
|
|
1801
1795
|
force_group: bool | None = None
|
|
1796
|
+
existence_source_map: Dict[str, Set[Union[Datasource, "QueryDatasource"]]] = Field(
|
|
1797
|
+
default_factory=dict
|
|
1798
|
+
)
|
|
1799
|
+
|
|
1800
|
+
def __repr__(self):
|
|
1801
|
+
return f"{self.identifier}@<{self.grain}>"
|
|
1802
1802
|
|
|
1803
1803
|
@property
|
|
1804
1804
|
def non_partial_concept_addresses(self) -> List[str]:
|
|
@@ -1841,14 +1841,14 @@ class QueryDatasource(BaseModel):
|
|
|
1841
1841
|
for k, _ in v.items():
|
|
1842
1842
|
seen.add(k)
|
|
1843
1843
|
for x in expected:
|
|
1844
|
-
if x not in seen:
|
|
1844
|
+
if x not in seen and CONFIG.validate_missing:
|
|
1845
1845
|
raise SyntaxError(
|
|
1846
1846
|
f"source map missing {x} on (expected {expected}, have {seen})"
|
|
1847
1847
|
)
|
|
1848
1848
|
return v
|
|
1849
1849
|
|
|
1850
1850
|
def __str__(self):
|
|
1851
|
-
return
|
|
1851
|
+
return self.__repr__()
|
|
1852
1852
|
|
|
1853
1853
|
def __hash__(self):
|
|
1854
1854
|
return (self.identifier).__hash__()
|
|
@@ -2010,10 +2010,11 @@ class CTE(BaseModel):
|
|
|
2010
2010
|
name: str
|
|
2011
2011
|
source: "QueryDatasource"
|
|
2012
2012
|
output_columns: List[Concept]
|
|
2013
|
-
source_map: Dict[str,
|
|
2013
|
+
source_map: Dict[str, list[str]]
|
|
2014
2014
|
grain: Grain
|
|
2015
2015
|
base: bool = False
|
|
2016
2016
|
group_to_grain: bool = False
|
|
2017
|
+
existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
|
|
2017
2018
|
parent_ctes: List["CTE"] = Field(default_factory=list)
|
|
2018
2019
|
joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
|
|
2019
2020
|
condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
|
|
@@ -2024,6 +2025,7 @@ class CTE(BaseModel):
|
|
|
2024
2025
|
limit: Optional[int] = None
|
|
2025
2026
|
requires_nesting: bool = True
|
|
2026
2027
|
base_name_override: Optional[str] = None
|
|
2028
|
+
base_alias_override: Optional[str] = None
|
|
2027
2029
|
|
|
2028
2030
|
@computed_field # type: ignore
|
|
2029
2031
|
@property
|
|
@@ -2034,7 +2036,7 @@ class CTE(BaseModel):
|
|
|
2034
2036
|
def validate_output_columns(cls, v):
|
|
2035
2037
|
return unique(v, "address")
|
|
2036
2038
|
|
|
2037
|
-
def inline_parent_datasource(self, parent: CTE) -> bool:
|
|
2039
|
+
def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
|
|
2038
2040
|
qds_being_inlined = parent.source
|
|
2039
2041
|
ds_being_inlined = qds_being_inlined.datasources[0]
|
|
2040
2042
|
if not isinstance(ds_being_inlined, Datasource):
|
|
@@ -2050,6 +2052,7 @@ class CTE(BaseModel):
|
|
|
2050
2052
|
# need to identify this before updating joins
|
|
2051
2053
|
if self.base_name == parent.name:
|
|
2052
2054
|
self.base_name_override = ds_being_inlined.safe_location
|
|
2055
|
+
self.base_alias_override = ds_being_inlined.identifier
|
|
2053
2056
|
|
|
2054
2057
|
for join in self.joins:
|
|
2055
2058
|
if isinstance(join, InstantiatedUnnestJoin):
|
|
@@ -2066,6 +2069,8 @@ class CTE(BaseModel):
|
|
|
2066
2069
|
elif v == parent.name:
|
|
2067
2070
|
self.source_map[k] = ds_being_inlined.name
|
|
2068
2071
|
self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
|
|
2072
|
+
if force_group:
|
|
2073
|
+
self.group_to_grain = True
|
|
2069
2074
|
return True
|
|
2070
2075
|
|
|
2071
2076
|
def __add__(self, other: "CTE"):
|
|
@@ -2126,9 +2131,6 @@ class CTE(BaseModel):
|
|
|
2126
2131
|
if self.base_name_override:
|
|
2127
2132
|
return self.base_name_override
|
|
2128
2133
|
# if this cte selects from a single datasource, select right from it
|
|
2129
|
-
valid_joins: List[Join] = [
|
|
2130
|
-
join for join in self.joins if isinstance(join, Join)
|
|
2131
|
-
]
|
|
2132
2134
|
if self.is_root_datasource:
|
|
2133
2135
|
return self.source.datasources[0].safe_location
|
|
2134
2136
|
|
|
@@ -2136,33 +2138,16 @@ class CTE(BaseModel):
|
|
|
2136
2138
|
# as the root
|
|
2137
2139
|
elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
|
|
2138
2140
|
return self.parent_ctes[0].name
|
|
2139
|
-
elif valid_joins and len(valid_joins) > 0:
|
|
2140
|
-
candidates = [x.left_cte.name for x in valid_joins]
|
|
2141
|
-
disallowed = [x.right_cte.name for x in valid_joins]
|
|
2142
|
-
try:
|
|
2143
|
-
return [y for y in candidates if y not in disallowed][0]
|
|
2144
|
-
except IndexError:
|
|
2145
|
-
raise SyntaxError(
|
|
2146
|
-
f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in self.parent_ctes]}"
|
|
2147
|
-
)
|
|
2148
2141
|
elif self.relevant_base_ctes:
|
|
2149
2142
|
return self.relevant_base_ctes[0].name
|
|
2150
|
-
elif self.parent_ctes:
|
|
2151
|
-
raise SyntaxError(
|
|
2152
|
-
f"{self.name} has no relevant base CTEs, {self.source_map},"
|
|
2153
|
-
f" {[x.name for x in self.parent_ctes]}, outputs"
|
|
2154
|
-
f" {[x.address for x in self.output_columns]}"
|
|
2155
|
-
)
|
|
2156
2143
|
return self.source.name
|
|
2157
2144
|
|
|
2158
2145
|
@property
|
|
2159
2146
|
def base_alias(self) -> str:
|
|
2160
|
-
|
|
2147
|
+
if self.base_alias_override:
|
|
2148
|
+
return self.base_alias_override
|
|
2161
2149
|
if self.is_root_datasource:
|
|
2162
2150
|
return self.source.datasources[0].identifier
|
|
2163
|
-
relevant_joins = [j for j in self.joins if isinstance(j, Join)]
|
|
2164
|
-
if relevant_joins:
|
|
2165
|
-
return relevant_joins[0].left_cte.name
|
|
2166
2151
|
elif self.relevant_base_ctes:
|
|
2167
2152
|
return self.relevant_base_ctes[0].name
|
|
2168
2153
|
elif self.parent_ctes:
|
|
@@ -2492,9 +2477,17 @@ class Environment(BaseModel):
|
|
|
2492
2477
|
for datasource in self.datasources.values():
|
|
2493
2478
|
for concept in datasource.output_concepts:
|
|
2494
2479
|
concrete_addresses.add(concept.address)
|
|
2480
|
+
current_mat = [x.address for x in self.materialized_concepts]
|
|
2495
2481
|
self.materialized_concepts = [
|
|
2496
2482
|
c for c in self.concepts.values() if c.address in concrete_addresses
|
|
2497
2483
|
]
|
|
2484
|
+
new = [
|
|
2485
|
+
x.address
|
|
2486
|
+
for x in self.materialized_concepts
|
|
2487
|
+
if x.address not in current_mat
|
|
2488
|
+
]
|
|
2489
|
+
if new:
|
|
2490
|
+
logger.info(f"Environment added new materialized concepts {new}")
|
|
2498
2491
|
for concept in self.concepts.values():
|
|
2499
2492
|
if concept.derivation == PurposeLineage.MERGE:
|
|
2500
2493
|
ms = concept.lineage
|
|
@@ -2653,6 +2646,17 @@ class Environment(BaseModel):
|
|
|
2653
2646
|
self.gen_concept_list_caches()
|
|
2654
2647
|
return datasource
|
|
2655
2648
|
|
|
2649
|
+
def delete_datasource(
|
|
2650
|
+
self,
|
|
2651
|
+
address: str,
|
|
2652
|
+
meta: Meta | None = None,
|
|
2653
|
+
) -> bool:
|
|
2654
|
+
if address in self.datasources:
|
|
2655
|
+
del self.datasources[address]
|
|
2656
|
+
self.gen_concept_list_caches()
|
|
2657
|
+
return True
|
|
2658
|
+
return False
|
|
2659
|
+
|
|
2656
2660
|
|
|
2657
2661
|
class LazyEnvironment(Environment):
|
|
2658
2662
|
"""Variant of environment to defer parsing of a path"""
|
|
@@ -2759,11 +2763,8 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
2759
2763
|
if isinstance(self.left, SelectGrain)
|
|
2760
2764
|
else self.left
|
|
2761
2765
|
),
|
|
2762
|
-
right
|
|
2763
|
-
|
|
2764
|
-
if isinstance(self.right, SelectGrain)
|
|
2765
|
-
else self.right
|
|
2766
|
-
),
|
|
2766
|
+
# the right side does NOT need to inherit select grain
|
|
2767
|
+
right=self.right,
|
|
2767
2768
|
operator=self.operator,
|
|
2768
2769
|
)
|
|
2769
2770
|
|
|
@@ -2809,8 +2810,8 @@ class SubselectComparison(Comparison):
|
|
|
2809
2810
|
return get_concept_arguments(self.left)
|
|
2810
2811
|
|
|
2811
2812
|
@property
|
|
2812
|
-
def existence_arguments(self) ->
|
|
2813
|
-
return get_concept_arguments(self.right)
|
|
2813
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
2814
|
+
return [tuple(get_concept_arguments(self.right))]
|
|
2814
2815
|
|
|
2815
2816
|
def with_select_grain(self, grain: Grain):
|
|
2816
2817
|
# there's no need to pass the select grain through to a subselect comparison
|
|
@@ -3002,18 +3003,26 @@ class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
3002
3003
|
return output
|
|
3003
3004
|
|
|
3004
3005
|
@property
|
|
3005
|
-
def existence_arguments(self) ->
|
|
3006
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
3006
3007
|
output = []
|
|
3007
3008
|
if isinstance(self.left, ConceptArgs):
|
|
3008
3009
|
output += self.left.existence_arguments
|
|
3009
|
-
else:
|
|
3010
|
-
output += get_concept_arguments(self.left)
|
|
3011
3010
|
if isinstance(self.right, ConceptArgs):
|
|
3012
3011
|
output += self.right.existence_arguments
|
|
3013
|
-
else:
|
|
3014
|
-
output += get_concept_arguments(self.right)
|
|
3015
3012
|
return output
|
|
3016
3013
|
|
|
3014
|
+
def decompose(self):
|
|
3015
|
+
chunks = []
|
|
3016
|
+
if self.operator == BooleanOperator.AND:
|
|
3017
|
+
for val in [self.left, self.right]:
|
|
3018
|
+
if isinstance(val, Conditional):
|
|
3019
|
+
chunks.extend(val.decompose())
|
|
3020
|
+
else:
|
|
3021
|
+
chunks.append(val)
|
|
3022
|
+
else:
|
|
3023
|
+
chunks.append(self)
|
|
3024
|
+
return chunks
|
|
3025
|
+
|
|
3017
3026
|
|
|
3018
3027
|
class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
|
|
3019
3028
|
function: Function
|
|
@@ -3073,7 +3082,7 @@ class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
3073
3082
|
return self.conditional.row_arguments
|
|
3074
3083
|
|
|
3075
3084
|
@property
|
|
3076
|
-
def existence_arguments(self) ->
|
|
3085
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
3077
3086
|
return self.conditional.existence_arguments
|
|
3078
3087
|
|
|
3079
3088
|
def with_namespace(self, namespace: str) -> WhereClause:
|
|
@@ -3314,10 +3323,10 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
3314
3323
|
return self.concept_arguments
|
|
3315
3324
|
|
|
3316
3325
|
@property
|
|
3317
|
-
def existence_arguments(self) ->
|
|
3326
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
3318
3327
|
if isinstance(self.content, ConceptArgs):
|
|
3319
3328
|
return self.content.existence_arguments
|
|
3320
|
-
return
|
|
3329
|
+
return []
|
|
3321
3330
|
|
|
3322
3331
|
@property
|
|
3323
3332
|
def input(self):
|
|
@@ -3386,6 +3395,12 @@ Function.model_rebuild()
|
|
|
3386
3395
|
Grain.model_rebuild()
|
|
3387
3396
|
|
|
3388
3397
|
|
|
3398
|
+
def list_to_wrapper(args):
|
|
3399
|
+
types = [arg_to_datatype(arg) for arg in args]
|
|
3400
|
+
assert len(set(types)) == 1
|
|
3401
|
+
return ListWrapper(args, type=types[0])
|
|
3402
|
+
|
|
3403
|
+
|
|
3389
3404
|
def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
|
|
3390
3405
|
if isinstance(arg, Function):
|
|
3391
3406
|
return arg.output_datatype
|
|
@@ -3409,5 +3424,8 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
|
|
|
3409
3424
|
if arg.type in (WindowType.RANK, WindowType.ROW_NUMBER):
|
|
3410
3425
|
return DataType.INTEGER
|
|
3411
3426
|
return arg_to_datatype(arg.content)
|
|
3427
|
+
elif isinstance(arg, list):
|
|
3428
|
+
wrapper = list_to_wrapper(arg)
|
|
3429
|
+
return ListType(type=wrapper.type)
|
|
3412
3430
|
else:
|
|
3413
3431
|
raise ValueError(f"Cannot parse arg datatype for arg of raw type {type(arg)}")
|
|
@@ -38,6 +38,7 @@ class InlineDatasource(OptimizationRule):
|
|
|
38
38
|
f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
|
|
39
39
|
)
|
|
40
40
|
to_inline: list[CTE] = []
|
|
41
|
+
force_group = False
|
|
41
42
|
for parent_cte in cte.parent_ctes:
|
|
42
43
|
if not parent_cte.is_root_datasource:
|
|
43
44
|
self.log(f"parent {parent_cte.name} is not root")
|
|
@@ -55,15 +56,18 @@ class InlineDatasource(OptimizationRule):
|
|
|
55
56
|
continue
|
|
56
57
|
root_outputs = {x.address for x in root.output_concepts}
|
|
57
58
|
cte_outputs = {x.address for x in parent_cte.output_columns}
|
|
59
|
+
grain_components = {x.address for x in root.grain.components}
|
|
58
60
|
if not cte_outputs.issubset(root_outputs):
|
|
59
61
|
self.log(f"Not all {parent_cte.name} outputs are found on datasource")
|
|
60
62
|
continue
|
|
61
|
-
|
|
63
|
+
if not grain_components.issubset(cte_outputs):
|
|
64
|
+
self.log("Not all datasource components in cte outputs, forcing group")
|
|
65
|
+
force_group = True
|
|
62
66
|
to_inline.append(parent_cte)
|
|
63
67
|
|
|
64
68
|
for replaceable in to_inline:
|
|
65
69
|
self.log(f"Inlining parent {replaceable.name}")
|
|
66
|
-
cte.inline_parent_datasource(replaceable)
|
|
70
|
+
cte.inline_parent_datasource(replaceable, force_group=force_group)
|
|
67
71
|
|
|
68
72
|
return optimized
|
|
69
73
|
|
|
@@ -107,14 +111,14 @@ class PredicatePushdown(OptimizationRule):
|
|
|
107
111
|
f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
|
|
108
112
|
)
|
|
109
113
|
if isinstance(cte.condition, Conditional):
|
|
110
|
-
candidates =
|
|
114
|
+
candidates = cte.condition.decompose()
|
|
111
115
|
else:
|
|
112
116
|
candidates = [cte.condition]
|
|
113
117
|
logger.info(f"Have {len(candidates)} candidates to try to push down")
|
|
114
118
|
for candidate in candidates:
|
|
115
119
|
conditions = {x.address for x in candidate.concept_arguments}
|
|
116
120
|
for parent_cte in cte.parent_ctes:
|
|
117
|
-
materialized = {k for k, v in parent_cte.source_map.items() if v !=
|
|
121
|
+
materialized = {k for k, v in parent_cte.source_map.items() if v != []}
|
|
118
122
|
if conditions.issubset(materialized):
|
|
119
123
|
if all(
|
|
120
124
|
[
|
|
@@ -200,6 +204,8 @@ def is_direct_return_eligible(
|
|
|
200
204
|
for x in derived_concepts:
|
|
201
205
|
if x.derivation == PurposeLineage.WINDOW:
|
|
202
206
|
return False
|
|
207
|
+
if x.derivation == PurposeLineage.UNNEST:
|
|
208
|
+
return False
|
|
203
209
|
if x.derivation == PurposeLineage.AGGREGATE:
|
|
204
210
|
if x.address in conditions:
|
|
205
211
|
return False
|
|
@@ -236,12 +242,21 @@ def optimize_ctes(
|
|
|
236
242
|
actions_taken = rule.optimize(cte, inverse_map)
|
|
237
243
|
complete = not actions_taken
|
|
238
244
|
|
|
239
|
-
if is_direct_return_eligible(
|
|
245
|
+
if CONFIG.optimizations.direct_return and is_direct_return_eligible(
|
|
246
|
+
root_cte, select
|
|
247
|
+
):
|
|
240
248
|
root_cte.order_by = select.order_by
|
|
241
249
|
root_cte.limit = select.limit
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
250
|
+
if select.where_clause:
|
|
251
|
+
|
|
252
|
+
if root_cte.condition:
|
|
253
|
+
root_cte.condition = Conditional(
|
|
254
|
+
left=root_cte.condition,
|
|
255
|
+
operator=BooleanOperator.AND,
|
|
256
|
+
right=select.where_clause.conditional,
|
|
257
|
+
)
|
|
258
|
+
else:
|
|
259
|
+
root_cte.condition = select.where_clause.conditional
|
|
245
260
|
root_cte.requires_nesting = False
|
|
246
261
|
sort_select_output(root_cte, select)
|
|
247
262
|
|