pytrilogy 0.0.1.109__tar.gz → 0.0.1.111__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.109/pytrilogy.egg-info → pytrilogy-0.0.1.111}/PKG-INFO +1 -1
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111/pytrilogy.egg-info}/PKG-INFO +1 -1
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_models.py +2 -2
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_parsing.py +35 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/__init__.py +1 -1
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/constants.py +11 -3
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/enums.py +1 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/models.py +94 -67
- pytrilogy-0.0.1.111/trilogy/core/optimization.py +263 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/concept_strategies_v3.py +44 -19
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/basic_node.py +2 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/common.py +3 -1
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/filter_node.py +36 -6
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/node_merge_node.py +34 -23
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/rowset_node.py +37 -8
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/select_node.py +23 -9
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/unnest_node.py +24 -3
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/window_node.py +4 -2
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/__init__.py +7 -6
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/base_node.py +40 -6
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/filter_node.py +15 -1
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/group_node.py +20 -1
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/merge_node.py +37 -10
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/select_node_v2.py +34 -39
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/unnest_node.py +12 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/nodes/window_node.py +11 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/utility.py +0 -14
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/query_processor.py +125 -29
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/base.py +45 -40
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/executor.py +31 -3
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/parse_engine.py +49 -17
- pytrilogy-0.0.1.109/trilogy/core/optimization.py +0 -141
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/README.md +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pyproject.toml +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/SOURCES.txt +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/dependency_links.txt +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/entry_points.txt +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/requires.txt +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/pytrilogy.egg-info/top_level.txt +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/setup.cfg +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/setup.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_declarations.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_derived_concepts.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_discovery_nodes.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_environment.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_functions.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_imports.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_metadata.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_multi_join_assignments.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_partial_handling.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_query_processing.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_select.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_statements.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_undefined_concept.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/tests/test_where_clause.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/compiler.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/__init__.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/constants.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/env_processor.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/environment_helpers.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/ergonomics.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/exceptions.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/functions.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/graph_models.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/internal.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/__init__.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/graph_utils.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/__init__.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/group_node.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/__init__.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/bigquery.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/common.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/config.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/duckdb.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/enums.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/postgres.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/presto.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/snowflake.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/dialect/sql_server.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/engine.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/hooks/__init__.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/hooks/base_hook.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/hooks/graph_hook.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/hooks/query_debugger.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/metadata/__init__.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parser.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/__init__.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/common.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/config.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/exceptions.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/helpers.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/parsing/render.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/py.typed +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/scripts/__init__.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/scripts/trilogy.py +0 -0
- {pytrilogy-0.0.1.109 → pytrilogy-0.0.1.111}/trilogy/utility.py +0 -0
|
@@ -34,7 +34,7 @@ def test_cte_merge(test_environment, test_environment_graph):
|
|
|
34
34
|
joins=[],
|
|
35
35
|
source_map={outputs[0].address: {datasource}},
|
|
36
36
|
),
|
|
37
|
-
source_map={c.address: datasource.identifier for c in outputs},
|
|
37
|
+
source_map={c.address: [datasource.identifier] for c in outputs},
|
|
38
38
|
)
|
|
39
39
|
b = CTE(
|
|
40
40
|
name="testb",
|
|
@@ -48,7 +48,7 @@ def test_cte_merge(test_environment, test_environment_graph):
|
|
|
48
48
|
joins=[],
|
|
49
49
|
source_map=output_map,
|
|
50
50
|
),
|
|
51
|
-
source_map={c.address: datasource.identifier for c in outputs},
|
|
51
|
+
source_map={c.address: [datasource.identifier] for c in outputs},
|
|
52
52
|
)
|
|
53
53
|
|
|
54
54
|
merged = a + b
|
|
@@ -30,6 +30,19 @@ def test_in():
|
|
|
30
30
|
rendered = BaseDialect().render_expr(right)
|
|
31
31
|
assert rendered.strip() == "( 1,2,3 )".strip()
|
|
32
32
|
|
|
33
|
+
_, parsed = parse_text(
|
|
34
|
+
"const order_id <- 3; SELECT order_id WHERE order_id IN (1);"
|
|
35
|
+
)
|
|
36
|
+
query = parsed[-1]
|
|
37
|
+
right = query.where_clause.conditional.right
|
|
38
|
+
assert isinstance(
|
|
39
|
+
right,
|
|
40
|
+
Parenthetical,
|
|
41
|
+
), type(right)
|
|
42
|
+
assert right.content == 1
|
|
43
|
+
rendered = BaseDialect().render_expr(right)
|
|
44
|
+
assert rendered.strip() == "( 1 )".strip()
|
|
45
|
+
|
|
33
46
|
|
|
34
47
|
def test_not_in():
|
|
35
48
|
_, parsed = parse_text(
|
|
@@ -160,6 +173,28 @@ select
|
|
|
160
173
|
assert env.concepts[name].keys == (env.concepts["id"],)
|
|
161
174
|
|
|
162
175
|
|
|
176
|
+
def test_purpose_and_derivation():
|
|
177
|
+
env, parsed = parse_text(
|
|
178
|
+
"""key id int;
|
|
179
|
+
key other_id int;
|
|
180
|
+
property <id, other_id>.join_id <- id*10+other_id;
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
select
|
|
184
|
+
join_id
|
|
185
|
+
;
|
|
186
|
+
"""
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
for name in ["join_id"]:
|
|
190
|
+
assert name in env.concepts
|
|
191
|
+
assert env.concepts[name].purpose == Purpose.PROPERTY
|
|
192
|
+
assert env.concepts[name].keys == (
|
|
193
|
+
env.concepts["id"],
|
|
194
|
+
env.concepts["other_id"],
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
163
198
|
def test_output_purpose():
|
|
164
199
|
|
|
165
200
|
env, parsed = parse_text(
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from logging import getLogger
|
|
2
|
-
from dataclasses import dataclass
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
3
|
from enum import Enum
|
|
4
4
|
|
|
5
|
-
logger = getLogger("
|
|
5
|
+
logger = getLogger("trilogy")
|
|
6
6
|
|
|
7
7
|
DEFAULT_NAMESPACE = "local"
|
|
8
8
|
|
|
@@ -18,12 +18,20 @@ class MagicConstants(Enum):
|
|
|
18
18
|
NULL_VALUE = MagicConstants.NULL
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
@dataclass
|
|
22
|
+
class Optimizations:
|
|
23
|
+
predicate_pushdown: bool = True
|
|
24
|
+
datasource_inlining: bool = True
|
|
25
|
+
direct_return: bool = True
|
|
26
|
+
|
|
27
|
+
|
|
21
28
|
# TODO: support loading from environments
|
|
22
29
|
@dataclass
|
|
23
30
|
class Config:
|
|
24
31
|
strict_mode: bool = True
|
|
25
32
|
human_identifiers: bool = True
|
|
26
|
-
|
|
33
|
+
validate_missing: bool = True
|
|
34
|
+
optimizations: Optimizations = field(default_factory=Optimizations)
|
|
27
35
|
|
|
28
36
|
|
|
29
37
|
CONFIG = Config()
|
|
@@ -33,7 +33,13 @@ from pydantic import (
|
|
|
33
33
|
)
|
|
34
34
|
from lark.tree import Meta
|
|
35
35
|
from pathlib import Path
|
|
36
|
-
from trilogy.constants import
|
|
36
|
+
from trilogy.constants import (
|
|
37
|
+
logger,
|
|
38
|
+
DEFAULT_NAMESPACE,
|
|
39
|
+
ENV_CACHE_NAME,
|
|
40
|
+
MagicConstants,
|
|
41
|
+
CONFIG,
|
|
42
|
+
)
|
|
37
43
|
from trilogy.core.constants import (
|
|
38
44
|
ALL_ROWS_CONCEPT,
|
|
39
45
|
INTERNAL_NAMESPACE,
|
|
@@ -61,7 +67,6 @@ from trilogy.core.enums import (
|
|
|
61
67
|
from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
|
|
62
68
|
from trilogy.utility import unique
|
|
63
69
|
from collections import UserList
|
|
64
|
-
from trilogy.utility import string_to_hash
|
|
65
70
|
from functools import cached_property
|
|
66
71
|
from abc import ABC
|
|
67
72
|
|
|
@@ -129,7 +134,7 @@ class ConceptArgs(ABC):
|
|
|
129
134
|
raise NotImplementedError
|
|
130
135
|
|
|
131
136
|
@property
|
|
132
|
-
def existence_arguments(self) ->
|
|
137
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
133
138
|
return []
|
|
134
139
|
|
|
135
140
|
@property
|
|
@@ -281,9 +286,6 @@ class Concept(Namespaced, SelectGrain, BaseModel):
|
|
|
281
286
|
MultiSelectStatement | MergeStatement,
|
|
282
287
|
]
|
|
283
288
|
] = None
|
|
284
|
-
# lineage: Annotated[Optional[
|
|
285
|
-
# Union[Function, WindowItem, FilterItem, AggregateWrapper]
|
|
286
|
-
# ], WrapValidator(lineage_validator)] = None
|
|
287
289
|
namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
|
|
288
290
|
keys: Optional[Tuple["Concept", ...]] = None
|
|
289
291
|
grain: "Grain" = Field(default=None, validate_default=True)
|
|
@@ -621,6 +623,12 @@ class Grain(BaseModel):
|
|
|
621
623
|
if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
|
|
622
624
|
if all([c in v2 for c in sub.keys]):
|
|
623
625
|
continue
|
|
626
|
+
elif sub.derivation == PurposeLineage.MERGE and isinstance(
|
|
627
|
+
sub.lineage, MergeStatement
|
|
628
|
+
):
|
|
629
|
+
parents = sub.lineage.concepts
|
|
630
|
+
if any([p in v2 for p in parents]):
|
|
631
|
+
continue
|
|
624
632
|
final.append(sub)
|
|
625
633
|
v2 = sorted(final, key=lambda x: x.name)
|
|
626
634
|
return v2
|
|
@@ -966,23 +974,6 @@ class ConceptTransform(Namespaced, BaseModel):
|
|
|
966
974
|
modifiers=self.modifiers,
|
|
967
975
|
)
|
|
968
976
|
|
|
969
|
-
def with_filter(self, where: "WhereClause") -> "ConceptTransform":
|
|
970
|
-
id_hash = string_to_hash(str(where))
|
|
971
|
-
new_parent_concept = Concept(
|
|
972
|
-
name=f"_anon_concept_transform_filter_input_{id_hash}",
|
|
973
|
-
datatype=self.output.datatype,
|
|
974
|
-
purpose=self.output.purpose,
|
|
975
|
-
lineage=self.output.lineage,
|
|
976
|
-
namespace=DEFAULT_NAMESPACE,
|
|
977
|
-
grain=self.output.grain,
|
|
978
|
-
keys=self.output.keys,
|
|
979
|
-
)
|
|
980
|
-
new_parent = FilterItem(content=new_parent_concept, where=where)
|
|
981
|
-
self.output.lineage = new_parent
|
|
982
|
-
return ConceptTransform(
|
|
983
|
-
function=new_parent, output=self.output, modifiers=self.modifiers
|
|
984
|
-
)
|
|
985
|
-
|
|
986
977
|
|
|
987
978
|
class Window(BaseModel):
|
|
988
979
|
count: int
|
|
@@ -1611,13 +1602,15 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1611
1602
|
def __add__(self, other):
|
|
1612
1603
|
if not other == self:
|
|
1613
1604
|
raise ValueError(
|
|
1614
|
-
"Attempted to add two datasources that are not identical, this
|
|
1615
|
-
" never happen"
|
|
1605
|
+
"Attempted to add two datasources that are not identical, this is not a valid operation"
|
|
1616
1606
|
)
|
|
1617
1607
|
return self
|
|
1618
1608
|
|
|
1609
|
+
def __repr__(self):
|
|
1610
|
+
return f"Datasource<{self.namespace}.{self.identifier}@<{self.grain}>"
|
|
1611
|
+
|
|
1619
1612
|
def __str__(self):
|
|
1620
|
-
return
|
|
1613
|
+
return self.__repr__()
|
|
1621
1614
|
|
|
1622
1615
|
def __hash__(self):
|
|
1623
1616
|
return (self.namespace + self.identifier).__hash__()
|
|
@@ -1786,6 +1779,7 @@ class QueryDatasource(BaseModel):
|
|
|
1786
1779
|
input_concepts: List[Concept]
|
|
1787
1780
|
output_concepts: List[Concept]
|
|
1788
1781
|
source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
|
|
1782
|
+
|
|
1789
1783
|
datasources: List[Union[Datasource, "QueryDatasource"]]
|
|
1790
1784
|
grain: Grain
|
|
1791
1785
|
joins: List[BaseJoin | UnnestJoin]
|
|
@@ -1799,6 +1793,12 @@ class QueryDatasource(BaseModel):
|
|
|
1799
1793
|
join_derived_concepts: List[Concept] = Field(default_factory=list)
|
|
1800
1794
|
hidden_concepts: List[Concept] = Field(default_factory=list)
|
|
1801
1795
|
force_group: bool | None = None
|
|
1796
|
+
existence_source_map: Dict[str, Set[Union[Datasource, "QueryDatasource"]]] = Field(
|
|
1797
|
+
default_factory=dict
|
|
1798
|
+
)
|
|
1799
|
+
|
|
1800
|
+
def __repr__(self):
|
|
1801
|
+
return f"{self.identifier}@<{self.grain}>"
|
|
1802
1802
|
|
|
1803
1803
|
@property
|
|
1804
1804
|
def non_partial_concept_addresses(self) -> List[str]:
|
|
@@ -1841,14 +1841,14 @@ class QueryDatasource(BaseModel):
|
|
|
1841
1841
|
for k, _ in v.items():
|
|
1842
1842
|
seen.add(k)
|
|
1843
1843
|
for x in expected:
|
|
1844
|
-
if x not in seen:
|
|
1844
|
+
if x not in seen and CONFIG.validate_missing:
|
|
1845
1845
|
raise SyntaxError(
|
|
1846
1846
|
f"source map missing {x} on (expected {expected}, have {seen})"
|
|
1847
1847
|
)
|
|
1848
1848
|
return v
|
|
1849
1849
|
|
|
1850
1850
|
def __str__(self):
|
|
1851
|
-
return
|
|
1851
|
+
return self.__repr__()
|
|
1852
1852
|
|
|
1853
1853
|
def __hash__(self):
|
|
1854
1854
|
return (self.identifier).__hash__()
|
|
@@ -1941,6 +1941,9 @@ class QueryDatasource(BaseModel):
|
|
|
1941
1941
|
),
|
|
1942
1942
|
join_derived_concepts=self.join_derived_concepts,
|
|
1943
1943
|
force_group=self.force_group,
|
|
1944
|
+
hidden_concepts=unique(
|
|
1945
|
+
self.hidden_concepts + other.hidden_concepts, "address"
|
|
1946
|
+
),
|
|
1944
1947
|
)
|
|
1945
1948
|
|
|
1946
1949
|
return qds
|
|
@@ -2007,10 +2010,11 @@ class CTE(BaseModel):
|
|
|
2007
2010
|
name: str
|
|
2008
2011
|
source: "QueryDatasource"
|
|
2009
2012
|
output_columns: List[Concept]
|
|
2010
|
-
source_map: Dict[str,
|
|
2013
|
+
source_map: Dict[str, list[str]]
|
|
2011
2014
|
grain: Grain
|
|
2012
2015
|
base: bool = False
|
|
2013
2016
|
group_to_grain: bool = False
|
|
2017
|
+
existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
|
|
2014
2018
|
parent_ctes: List["CTE"] = Field(default_factory=list)
|
|
2015
2019
|
joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
|
|
2016
2020
|
condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
|
|
@@ -2021,6 +2025,7 @@ class CTE(BaseModel):
|
|
|
2021
2025
|
limit: Optional[int] = None
|
|
2022
2026
|
requires_nesting: bool = True
|
|
2023
2027
|
base_name_override: Optional[str] = None
|
|
2028
|
+
base_alias_override: Optional[str] = None
|
|
2024
2029
|
|
|
2025
2030
|
@computed_field # type: ignore
|
|
2026
2031
|
@property
|
|
@@ -2031,7 +2036,7 @@ class CTE(BaseModel):
|
|
|
2031
2036
|
def validate_output_columns(cls, v):
|
|
2032
2037
|
return unique(v, "address")
|
|
2033
2038
|
|
|
2034
|
-
def inline_parent_datasource(self, parent: CTE) -> bool:
|
|
2039
|
+
def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
|
|
2035
2040
|
qds_being_inlined = parent.source
|
|
2036
2041
|
ds_being_inlined = qds_being_inlined.datasources[0]
|
|
2037
2042
|
if not isinstance(ds_being_inlined, Datasource):
|
|
@@ -2047,6 +2052,7 @@ class CTE(BaseModel):
|
|
|
2047
2052
|
# need to identify this before updating joins
|
|
2048
2053
|
if self.base_name == parent.name:
|
|
2049
2054
|
self.base_name_override = ds_being_inlined.safe_location
|
|
2055
|
+
self.base_alias_override = ds_being_inlined.identifier
|
|
2050
2056
|
|
|
2051
2057
|
for join in self.joins:
|
|
2052
2058
|
if isinstance(join, InstantiatedUnnestJoin):
|
|
@@ -2063,6 +2069,8 @@ class CTE(BaseModel):
|
|
|
2063
2069
|
elif v == parent.name:
|
|
2064
2070
|
self.source_map[k] = ds_being_inlined.name
|
|
2065
2071
|
self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
|
|
2072
|
+
if force_group:
|
|
2073
|
+
self.group_to_grain = True
|
|
2066
2074
|
return True
|
|
2067
2075
|
|
|
2068
2076
|
def __add__(self, other: "CTE"):
|
|
@@ -2101,6 +2109,9 @@ class CTE(BaseModel):
|
|
|
2101
2109
|
self.source.output_concepts = unique(
|
|
2102
2110
|
self.source.output_concepts + other.source.output_concepts, "address"
|
|
2103
2111
|
)
|
|
2112
|
+
self.hidden_concepts = unique(
|
|
2113
|
+
self.hidden_concepts + other.hidden_concepts, "address"
|
|
2114
|
+
)
|
|
2104
2115
|
return self
|
|
2105
2116
|
|
|
2106
2117
|
@property
|
|
@@ -2120,9 +2131,6 @@ class CTE(BaseModel):
|
|
|
2120
2131
|
if self.base_name_override:
|
|
2121
2132
|
return self.base_name_override
|
|
2122
2133
|
# if this cte selects from a single datasource, select right from it
|
|
2123
|
-
valid_joins: List[Join] = [
|
|
2124
|
-
join for join in self.joins if isinstance(join, Join)
|
|
2125
|
-
]
|
|
2126
2134
|
if self.is_root_datasource:
|
|
2127
2135
|
return self.source.datasources[0].safe_location
|
|
2128
2136
|
|
|
@@ -2130,33 +2138,16 @@ class CTE(BaseModel):
|
|
|
2130
2138
|
# as the root
|
|
2131
2139
|
elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
|
|
2132
2140
|
return self.parent_ctes[0].name
|
|
2133
|
-
elif valid_joins and len(valid_joins) > 0:
|
|
2134
|
-
candidates = [x.left_cte.name for x in valid_joins]
|
|
2135
|
-
disallowed = [x.right_cte.name for x in valid_joins]
|
|
2136
|
-
try:
|
|
2137
|
-
return [y for y in candidates if y not in disallowed][0]
|
|
2138
|
-
except IndexError:
|
|
2139
|
-
raise SyntaxError(
|
|
2140
|
-
f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in self.parent_ctes]}"
|
|
2141
|
-
)
|
|
2142
2141
|
elif self.relevant_base_ctes:
|
|
2143
2142
|
return self.relevant_base_ctes[0].name
|
|
2144
|
-
elif self.parent_ctes:
|
|
2145
|
-
raise SyntaxError(
|
|
2146
|
-
f"{self.name} has no relevant base CTEs, {self.source_map},"
|
|
2147
|
-
f" {[x.name for x in self.parent_ctes]}, outputs"
|
|
2148
|
-
f" {[x.address for x in self.output_columns]}"
|
|
2149
|
-
)
|
|
2150
2143
|
return self.source.name
|
|
2151
2144
|
|
|
2152
2145
|
@property
|
|
2153
2146
|
def base_alias(self) -> str:
|
|
2154
|
-
|
|
2147
|
+
if self.base_alias_override:
|
|
2148
|
+
return self.base_alias_override
|
|
2155
2149
|
if self.is_root_datasource:
|
|
2156
2150
|
return self.source.datasources[0].identifier
|
|
2157
|
-
relevant_joins = [j for j in self.joins if isinstance(j, Join)]
|
|
2158
|
-
if relevant_joins:
|
|
2159
|
-
return relevant_joins[0].left_cte.name
|
|
2160
2151
|
elif self.relevant_base_ctes:
|
|
2161
2152
|
return self.relevant_base_ctes[0].name
|
|
2162
2153
|
elif self.parent_ctes:
|
|
@@ -2486,9 +2477,17 @@ class Environment(BaseModel):
|
|
|
2486
2477
|
for datasource in self.datasources.values():
|
|
2487
2478
|
for concept in datasource.output_concepts:
|
|
2488
2479
|
concrete_addresses.add(concept.address)
|
|
2480
|
+
current_mat = [x.address for x in self.materialized_concepts]
|
|
2489
2481
|
self.materialized_concepts = [
|
|
2490
2482
|
c for c in self.concepts.values() if c.address in concrete_addresses
|
|
2491
2483
|
]
|
|
2484
|
+
new = [
|
|
2485
|
+
x.address
|
|
2486
|
+
for x in self.materialized_concepts
|
|
2487
|
+
if x.address not in current_mat
|
|
2488
|
+
]
|
|
2489
|
+
if new:
|
|
2490
|
+
logger.info(f"Environment added new materialized concepts {new}")
|
|
2492
2491
|
for concept in self.concepts.values():
|
|
2493
2492
|
if concept.derivation == PurposeLineage.MERGE:
|
|
2494
2493
|
ms = concept.lineage
|
|
@@ -2647,6 +2646,17 @@ class Environment(BaseModel):
|
|
|
2647
2646
|
self.gen_concept_list_caches()
|
|
2648
2647
|
return datasource
|
|
2649
2648
|
|
|
2649
|
+
def delete_datasource(
|
|
2650
|
+
self,
|
|
2651
|
+
address: str,
|
|
2652
|
+
meta: Meta | None = None,
|
|
2653
|
+
) -> bool:
|
|
2654
|
+
if address in self.datasources:
|
|
2655
|
+
del self.datasources[address]
|
|
2656
|
+
self.gen_concept_list_caches()
|
|
2657
|
+
return True
|
|
2658
|
+
return False
|
|
2659
|
+
|
|
2650
2660
|
|
|
2651
2661
|
class LazyEnvironment(Environment):
|
|
2652
2662
|
"""Variant of environment to defer parsing of a path"""
|
|
@@ -2728,6 +2738,9 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
2728
2738
|
def __repr__(self):
|
|
2729
2739
|
return f"{str(self.left)} {self.operator.value} {str(self.right)}"
|
|
2730
2740
|
|
|
2741
|
+
def __str__(self):
|
|
2742
|
+
return self.__repr__()
|
|
2743
|
+
|
|
2731
2744
|
def with_namespace(self, namespace: str):
|
|
2732
2745
|
return self.__class__(
|
|
2733
2746
|
left=(
|
|
@@ -2750,11 +2763,8 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
2750
2763
|
if isinstance(self.left, SelectGrain)
|
|
2751
2764
|
else self.left
|
|
2752
2765
|
),
|
|
2753
|
-
right
|
|
2754
|
-
|
|
2755
|
-
if isinstance(self.right, SelectGrain)
|
|
2756
|
-
else self.right
|
|
2757
|
-
),
|
|
2766
|
+
# the right side does NOT need to inherit select grain
|
|
2767
|
+
right=self.right,
|
|
2758
2768
|
operator=self.operator,
|
|
2759
2769
|
)
|
|
2760
2770
|
|
|
@@ -2800,8 +2810,8 @@ class SubselectComparison(Comparison):
|
|
|
2800
2810
|
return get_concept_arguments(self.left)
|
|
2801
2811
|
|
|
2802
2812
|
@property
|
|
2803
|
-
def existence_arguments(self) ->
|
|
2804
|
-
return get_concept_arguments(self.right)
|
|
2813
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
2814
|
+
return [tuple(get_concept_arguments(self.right))]
|
|
2805
2815
|
|
|
2806
2816
|
def with_select_grain(self, grain: Grain):
|
|
2807
2817
|
# there's no need to pass the select grain through to a subselect comparison
|
|
@@ -2993,18 +3003,26 @@ class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
2993
3003
|
return output
|
|
2994
3004
|
|
|
2995
3005
|
@property
|
|
2996
|
-
def existence_arguments(self) ->
|
|
3006
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
2997
3007
|
output = []
|
|
2998
3008
|
if isinstance(self.left, ConceptArgs):
|
|
2999
3009
|
output += self.left.existence_arguments
|
|
3000
|
-
else:
|
|
3001
|
-
output += get_concept_arguments(self.left)
|
|
3002
3010
|
if isinstance(self.right, ConceptArgs):
|
|
3003
3011
|
output += self.right.existence_arguments
|
|
3004
|
-
else:
|
|
3005
|
-
output += get_concept_arguments(self.right)
|
|
3006
3012
|
return output
|
|
3007
3013
|
|
|
3014
|
+
def decompose(self):
|
|
3015
|
+
chunks = []
|
|
3016
|
+
if self.operator == BooleanOperator.AND:
|
|
3017
|
+
for val in [self.left, self.right]:
|
|
3018
|
+
if isinstance(val, Conditional):
|
|
3019
|
+
chunks.extend(val.decompose())
|
|
3020
|
+
else:
|
|
3021
|
+
chunks.append(val)
|
|
3022
|
+
else:
|
|
3023
|
+
chunks.append(self)
|
|
3024
|
+
return chunks
|
|
3025
|
+
|
|
3008
3026
|
|
|
3009
3027
|
class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
|
|
3010
3028
|
function: Function
|
|
@@ -3064,7 +3082,7 @@ class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
3064
3082
|
return self.conditional.row_arguments
|
|
3065
3083
|
|
|
3066
3084
|
@property
|
|
3067
|
-
def existence_arguments(self) ->
|
|
3085
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
3068
3086
|
return self.conditional.existence_arguments
|
|
3069
3087
|
|
|
3070
3088
|
def with_namespace(self, namespace: str) -> WhereClause:
|
|
@@ -3305,10 +3323,10 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
3305
3323
|
return self.concept_arguments
|
|
3306
3324
|
|
|
3307
3325
|
@property
|
|
3308
|
-
def existence_arguments(self) ->
|
|
3326
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
3309
3327
|
if isinstance(self.content, ConceptArgs):
|
|
3310
3328
|
return self.content.existence_arguments
|
|
3311
|
-
return
|
|
3329
|
+
return []
|
|
3312
3330
|
|
|
3313
3331
|
@property
|
|
3314
3332
|
def input(self):
|
|
@@ -3377,6 +3395,12 @@ Function.model_rebuild()
|
|
|
3377
3395
|
Grain.model_rebuild()
|
|
3378
3396
|
|
|
3379
3397
|
|
|
3398
|
+
def list_to_wrapper(args):
|
|
3399
|
+
types = [arg_to_datatype(arg) for arg in args]
|
|
3400
|
+
assert len(set(types)) == 1
|
|
3401
|
+
return ListWrapper(args, type=types[0])
|
|
3402
|
+
|
|
3403
|
+
|
|
3380
3404
|
def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
|
|
3381
3405
|
if isinstance(arg, Function):
|
|
3382
3406
|
return arg.output_datatype
|
|
@@ -3400,5 +3424,8 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
|
|
|
3400
3424
|
if arg.type in (WindowType.RANK, WindowType.ROW_NUMBER):
|
|
3401
3425
|
return DataType.INTEGER
|
|
3402
3426
|
return arg_to_datatype(arg.content)
|
|
3427
|
+
elif isinstance(arg, list):
|
|
3428
|
+
wrapper = list_to_wrapper(arg)
|
|
3429
|
+
return ListType(type=wrapper.type)
|
|
3403
3430
|
else:
|
|
3404
3431
|
raise ValueError(f"Cannot parse arg datatype for arg of raw type {type(arg)}")
|