pytrilogy 0.0.2.10__tar.gz → 0.0.2.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.10/pytrilogy.egg-info → pytrilogy-0.0.2.12}/PKG-INFO +1 -1
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12/pytrilogy.egg-info}/PKG-INFO +1 -1
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_models.py +18 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_parsing.py +24 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/__init__.py +1 -1
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/enums.py +0 -1
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/environment_helpers.py +44 -6
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/models.py +47 -26
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/optimization.py +31 -3
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/optimizations/__init__.py +2 -1
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/optimizations/predicate_pushdown.py +60 -42
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/concept_strategies_v3.py +8 -4
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/basic_node.py +15 -9
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/filter_node.py +20 -3
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/group_node.py +2 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/node_merge_node.py +28 -2
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/unnest_node.py +10 -3
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/nodes/base_node.py +7 -2
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/nodes/group_node.py +0 -1
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/nodes/merge_node.py +11 -4
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/nodes/unnest_node.py +13 -9
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/utility.py +3 -1
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/query_processor.py +20 -5
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/dialect/base.py +96 -56
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/dialect/common.py +3 -3
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/parsing/common.py +58 -1
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/parsing/parse_engine.py +111 -136
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/parsing/trilogy.lark +5 -1
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/README.md +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/pyproject.toml +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/pytrilogy.egg-info/SOURCES.txt +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/pytrilogy.egg-info/dependency_links.txt +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/pytrilogy.egg-info/entry_points.txt +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/pytrilogy.egg-info/requires.txt +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/pytrilogy.egg-info/top_level.txt +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/setup.cfg +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/setup.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_datatypes.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_declarations.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_derived_concepts.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_discovery_nodes.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_environment.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_functions.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_imports.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_metadata.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_multi_join_assignments.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_partial_handling.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_query_processing.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_select.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_statements.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_undefined_concept.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/tests/test_where_clause.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/compiler.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/constants.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/__init__.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/constants.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/env_processor.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/ergonomics.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/exceptions.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/functions.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/graph_models.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/internal.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/optimizations/base_optimization.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/optimizations/inline_constant.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/optimizations/inline_datasource.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/__init__.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/graph_utils.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/__init__.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/common.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/group_to_node.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/multiselect_node.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/rowset_node.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/select_node.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/window_node.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/nodes/__init__.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/nodes/filter_node.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/nodes/select_node_v2.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/nodes/window_node.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/dialect/__init__.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/dialect/bigquery.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/dialect/config.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/dialect/duckdb.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/dialect/enums.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/dialect/postgres.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/dialect/presto.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/dialect/snowflake.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/dialect/sql_server.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/engine.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/executor.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/hooks/__init__.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/hooks/base_hook.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/hooks/graph_hook.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/hooks/query_debugger.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/metadata/__init__.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/parser.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/parsing/__init__.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/parsing/config.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/parsing/exceptions.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/parsing/helpers.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/parsing/render.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/py.typed +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/scripts/__init__.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/scripts/trilogy.py +0 -0
- {pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/utility.py +0 -0
|
@@ -12,6 +12,7 @@ from trilogy.core.models import (
|
|
|
12
12
|
Comparison,
|
|
13
13
|
Join,
|
|
14
14
|
JoinKey,
|
|
15
|
+
Concept,
|
|
15
16
|
)
|
|
16
17
|
|
|
17
18
|
|
|
@@ -69,6 +70,23 @@ def test_concept(test_environment, test_environment_graph):
|
|
|
69
70
|
)
|
|
70
71
|
|
|
71
72
|
|
|
73
|
+
def test_concept_filter(test_environment, test_environment_graph):
|
|
74
|
+
test_concept: Concept = list(test_environment.concepts.values())[0]
|
|
75
|
+
new = test_concept.with_filter(
|
|
76
|
+
Comparison(left=1, right="abc", operator=ComparisonOperator.EQ)
|
|
77
|
+
)
|
|
78
|
+
new2 = test_concept.with_filter(
|
|
79
|
+
Comparison(left=1, right="abc", operator=ComparisonOperator.EQ)
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
assert new.name == new2.name != test_concept.name
|
|
83
|
+
|
|
84
|
+
new3 = new.with_filter(
|
|
85
|
+
Comparison(left=1, right="abc", operator=ComparisonOperator.EQ)
|
|
86
|
+
)
|
|
87
|
+
assert new3 == new
|
|
88
|
+
|
|
89
|
+
|
|
72
90
|
def test_conditional(test_environment, test_environment_graph):
|
|
73
91
|
test_concept = list(test_environment.concepts.values())[-1]
|
|
74
92
|
|
|
@@ -492,3 +492,27 @@ select x;
|
|
|
492
492
|
results = Dialects.DUCK_DB.default_executor().generate_sql(text)[0]
|
|
493
493
|
|
|
494
494
|
assert "abcdef as test" in results, results
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def test_filter_concise():
|
|
498
|
+
|
|
499
|
+
text = """
|
|
500
|
+
key x int;
|
|
501
|
+
key y int;
|
|
502
|
+
|
|
503
|
+
datasource test (
|
|
504
|
+
x:x,
|
|
505
|
+
y:y)
|
|
506
|
+
grain(x)
|
|
507
|
+
address `abc:def`
|
|
508
|
+
;
|
|
509
|
+
|
|
510
|
+
auto filtered_test <- x ? y > 10;
|
|
511
|
+
|
|
512
|
+
select filtered_test;
|
|
513
|
+
"""
|
|
514
|
+
env, parsed = parse_text(text)
|
|
515
|
+
|
|
516
|
+
results = Dialects.DUCK_DB.default_executor().generate_sql(text)[0]
|
|
517
|
+
|
|
518
|
+
assert "filtered_test" in results, results
|
|
@@ -1,6 +1,15 @@
|
|
|
1
|
-
from trilogy.core.models import
|
|
1
|
+
from trilogy.core.models import (
|
|
2
|
+
DataType,
|
|
3
|
+
Concept,
|
|
4
|
+
Environment,
|
|
5
|
+
Function,
|
|
6
|
+
Metadata,
|
|
7
|
+
StructType,
|
|
8
|
+
)
|
|
9
|
+
from trilogy.core.functions import AttrAccess
|
|
2
10
|
from trilogy.core.enums import Purpose, FunctionType, ConceptSource
|
|
3
11
|
from trilogy.constants import DEFAULT_NAMESPACE
|
|
12
|
+
from trilogy.parsing.common import process_function_args, arg_to_datatype, Meta
|
|
4
13
|
|
|
5
14
|
|
|
6
15
|
def generate_date_concepts(concept: Concept, environment: Environment):
|
|
@@ -142,15 +151,44 @@ def generate_key_concepts(concept: Concept, environment: Environment):
|
|
|
142
151
|
environment.add_concept(new_concept, add_derived=False)
|
|
143
152
|
|
|
144
153
|
|
|
145
|
-
def generate_related_concepts(
|
|
154
|
+
def generate_related_concepts(
|
|
155
|
+
concept: Concept,
|
|
156
|
+
environment: Environment,
|
|
157
|
+
meta: Meta | None = None,
|
|
158
|
+
add_derived: bool = False,
|
|
159
|
+
):
|
|
146
160
|
"""Auto populate common derived concepts on types"""
|
|
147
|
-
if concept.purpose == Purpose.KEY:
|
|
161
|
+
if concept.purpose == Purpose.KEY and add_derived:
|
|
148
162
|
generate_key_concepts(concept, environment)
|
|
149
|
-
|
|
163
|
+
|
|
164
|
+
# datatype types
|
|
165
|
+
if concept.datatype == DataType.DATE and add_derived:
|
|
150
166
|
generate_date_concepts(concept, environment)
|
|
151
|
-
elif concept.datatype == DataType.DATETIME:
|
|
167
|
+
elif concept.datatype == DataType.DATETIME and add_derived:
|
|
152
168
|
generate_date_concepts(concept, environment)
|
|
153
169
|
generate_datetime_concepts(concept, environment)
|
|
154
|
-
elif concept.datatype == DataType.TIMESTAMP:
|
|
170
|
+
elif concept.datatype == DataType.TIMESTAMP and add_derived:
|
|
155
171
|
generate_date_concepts(concept, environment)
|
|
156
172
|
generate_datetime_concepts(concept, environment)
|
|
173
|
+
|
|
174
|
+
if isinstance(concept.datatype, StructType):
|
|
175
|
+
for key, value in concept.datatype.fields_map.items():
|
|
176
|
+
args = process_function_args(
|
|
177
|
+
[concept, key], meta=meta, environment=environment
|
|
178
|
+
)
|
|
179
|
+
auto = Concept(
|
|
180
|
+
name=key,
|
|
181
|
+
datatype=arg_to_datatype(value),
|
|
182
|
+
purpose=Purpose.PROPERTY,
|
|
183
|
+
namespace=(
|
|
184
|
+
environment.namespace + "." + concept.name
|
|
185
|
+
if environment.namespace
|
|
186
|
+
and environment.namespace != DEFAULT_NAMESPACE
|
|
187
|
+
else concept.name
|
|
188
|
+
),
|
|
189
|
+
lineage=AttrAccess(args),
|
|
190
|
+
)
|
|
191
|
+
environment.add_concept(auto, meta=meta)
|
|
192
|
+
if isinstance(value, Concept):
|
|
193
|
+
environment.merge_concept(auto, value, modifiers=[])
|
|
194
|
+
assert value.pseudonyms is not None
|
|
@@ -300,7 +300,7 @@ class MapType(BaseModel):
|
|
|
300
300
|
|
|
301
301
|
class StructType(BaseModel):
|
|
302
302
|
fields: List[ALL_TYPES]
|
|
303
|
-
fields_map: Dict[str, Concept | int | float | str]
|
|
303
|
+
fields_map: Dict[str, Concept | int | float | str]
|
|
304
304
|
|
|
305
305
|
@property
|
|
306
306
|
def data_type(self):
|
|
@@ -801,15 +801,18 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
801
801
|
) -> "Concept":
|
|
802
802
|
from trilogy.utility import string_to_hash
|
|
803
803
|
|
|
804
|
-
|
|
804
|
+
if self.lineage and isinstance(self.lineage, FilterItem):
|
|
805
|
+
if self.lineage.where.conditional == condition:
|
|
806
|
+
return self
|
|
807
|
+
hash = string_to_hash(self.name + str(condition))
|
|
805
808
|
new = Concept(
|
|
806
|
-
name=f"{self.name}
|
|
809
|
+
name=f"{self.name}_filter_{hash}",
|
|
807
810
|
datatype=self.datatype,
|
|
808
811
|
purpose=self.purpose,
|
|
809
812
|
metadata=self.metadata,
|
|
810
813
|
lineage=FilterItem(content=self, where=WhereClause(conditional=condition)),
|
|
811
|
-
keys=None,
|
|
812
|
-
grain=
|
|
814
|
+
keys=(self.keys if self.purpose == Purpose.PROPERTY else None),
|
|
815
|
+
grain=self.grain if self.grain else Grain(components=[]),
|
|
813
816
|
namespace=self.namespace,
|
|
814
817
|
modifiers=self.modifiers,
|
|
815
818
|
pseudonyms=self.pseudonyms,
|
|
@@ -842,6 +845,16 @@ class Grain(Mergeable, BaseModel):
|
|
|
842
845
|
v2 = sorted(final, key=lambda x: x.name)
|
|
843
846
|
return v2
|
|
844
847
|
|
|
848
|
+
def with_filter(
|
|
849
|
+
self,
|
|
850
|
+
condition: "Conditional | Comparison | Parenthetical",
|
|
851
|
+
environment: Environment | None = None,
|
|
852
|
+
) -> "Grain":
|
|
853
|
+
return Grain(
|
|
854
|
+
components=[c.with_filter(condition, environment) for c in self.components],
|
|
855
|
+
nested=self.nested,
|
|
856
|
+
)
|
|
857
|
+
|
|
845
858
|
@property
|
|
846
859
|
def components_copy(self) -> List[Concept]:
|
|
847
860
|
return [*self.components]
|
|
@@ -1680,6 +1693,9 @@ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
|
1680
1693
|
)
|
|
1681
1694
|
):
|
|
1682
1695
|
output.append(item)
|
|
1696
|
+
# TODO: explore implicit filtering more
|
|
1697
|
+
# if self.where_clause.conditional and self.where_clause_category == SelectFiltering.IMPLICIT:
|
|
1698
|
+
# output =[x.with_filter(self.where_clause.conditional) for x in output]
|
|
1683
1699
|
return Grain(
|
|
1684
1700
|
components=unique(output, "address"), where_clause=self.where_clause
|
|
1685
1701
|
)
|
|
@@ -2103,16 +2119,19 @@ class Datasource(Namespaced, BaseModel):
|
|
|
2103
2119
|
|
|
2104
2120
|
|
|
2105
2121
|
class UnnestJoin(BaseModel):
|
|
2106
|
-
|
|
2122
|
+
concepts: list[Concept]
|
|
2123
|
+
parent: Function
|
|
2107
2124
|
alias: str = "unnest"
|
|
2108
2125
|
rendering_required: bool = True
|
|
2109
2126
|
|
|
2110
2127
|
def __hash__(self):
|
|
2111
|
-
return (
|
|
2128
|
+
return (
|
|
2129
|
+
self.alias + "".join([str(s.address) for s in self.concepts])
|
|
2130
|
+
).__hash__()
|
|
2112
2131
|
|
|
2113
2132
|
|
|
2114
2133
|
class InstantiatedUnnestJoin(BaseModel):
|
|
2115
|
-
|
|
2134
|
+
concept_to_unnest: Concept
|
|
2116
2135
|
alias: str = "unnest"
|
|
2117
2136
|
|
|
2118
2137
|
|
|
@@ -2252,6 +2271,7 @@ class QueryDatasource(BaseModel):
|
|
|
2252
2271
|
raise SyntaxError(
|
|
2253
2272
|
f"Cannot join a datasource to itself, joining {join.left_datasource}"
|
|
2254
2273
|
)
|
|
2274
|
+
|
|
2255
2275
|
return v
|
|
2256
2276
|
|
|
2257
2277
|
@field_validator("input_concepts")
|
|
@@ -2271,8 +2291,13 @@ class QueryDatasource(BaseModel):
|
|
|
2271
2291
|
for key in ("input_concepts", "output_concepts"):
|
|
2272
2292
|
if not values.get(key):
|
|
2273
2293
|
continue
|
|
2294
|
+
concept: Concept
|
|
2274
2295
|
for concept in values[key]:
|
|
2275
|
-
if
|
|
2296
|
+
if (
|
|
2297
|
+
concept.address not in v
|
|
2298
|
+
and not any(x in v for x in concept.pseudonyms)
|
|
2299
|
+
and CONFIG.validate_missing
|
|
2300
|
+
):
|
|
2276
2301
|
raise SyntaxError(
|
|
2277
2302
|
f"Missing source map for {concept.address} on {key}, have {v}"
|
|
2278
2303
|
)
|
|
@@ -2517,7 +2542,7 @@ class CTE(BaseModel):
|
|
|
2517
2542
|
)
|
|
2518
2543
|
]
|
|
2519
2544
|
for join in self.joins:
|
|
2520
|
-
if isinstance(join, UnnestJoin) and
|
|
2545
|
+
if isinstance(join, UnnestJoin) and concept in join.concepts:
|
|
2521
2546
|
join.rendering_required = False
|
|
2522
2547
|
|
|
2523
2548
|
self.parent_ctes = [
|
|
@@ -2980,8 +3005,8 @@ class EnvironmentDatasourceDict(dict):
|
|
|
2980
3005
|
except KeyError:
|
|
2981
3006
|
if DEFAULT_NAMESPACE + "." + key in self:
|
|
2982
3007
|
return self.__getitem__(DEFAULT_NAMESPACE + "." + key)
|
|
2983
|
-
if "." in key and key.split(".")[0] == DEFAULT_NAMESPACE:
|
|
2984
|
-
return self.__getitem__(key.split(".")[1])
|
|
3008
|
+
if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
|
|
3009
|
+
return self.__getitem__(key.split(".", 1)[1])
|
|
2985
3010
|
raise
|
|
2986
3011
|
|
|
2987
3012
|
def values(self) -> ValuesView[Datasource]: # type: ignore
|
|
@@ -3011,8 +3036,8 @@ class EnvironmentConceptDict(dict):
|
|
|
3011
3036
|
return super(EnvironmentConceptDict, self).__getitem__(key)
|
|
3012
3037
|
|
|
3013
3038
|
except KeyError:
|
|
3014
|
-
if "." in key and key.split(".")[0] == DEFAULT_NAMESPACE:
|
|
3015
|
-
return self.__getitem__(key.split(".")[1], line_no)
|
|
3039
|
+
if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
|
|
3040
|
+
return self.__getitem__(key.split(".", 1)[1], line_no)
|
|
3016
3041
|
if DEFAULT_NAMESPACE + "." + key in self:
|
|
3017
3042
|
return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
|
|
3018
3043
|
if not self.fail_on_missing:
|
|
@@ -3277,10 +3302,9 @@ class Environment(BaseModel):
|
|
|
3277
3302
|
self.concepts[concept.name] = concept
|
|
3278
3303
|
else:
|
|
3279
3304
|
self.concepts[concept.address] = concept
|
|
3280
|
-
|
|
3281
|
-
from trilogy.core.environment_helpers import generate_related_concepts
|
|
3305
|
+
from trilogy.core.environment_helpers import generate_related_concepts
|
|
3282
3306
|
|
|
3283
|
-
|
|
3307
|
+
generate_related_concepts(concept, self, meta=meta, add_derived=add_derived)
|
|
3284
3308
|
self.gen_concept_list_caches()
|
|
3285
3309
|
return concept
|
|
3286
3310
|
|
|
@@ -3407,14 +3431,6 @@ class Comparison(
|
|
|
3407
3431
|
raise SyntaxError(
|
|
3408
3432
|
f"Cannot compare {self.left} and {self.right} of different types"
|
|
3409
3433
|
)
|
|
3410
|
-
if self.operator == ComparisonOperator.BETWEEN:
|
|
3411
|
-
if (
|
|
3412
|
-
not isinstance(self.right, ComparisonOperator)
|
|
3413
|
-
and self.right.operator == BooleanOperator.AND
|
|
3414
|
-
):
|
|
3415
|
-
raise SyntaxError(
|
|
3416
|
-
f"Between operator must have two operands with and, not {self.right}"
|
|
3417
|
-
)
|
|
3418
3434
|
|
|
3419
3435
|
def __add__(self, other):
|
|
3420
3436
|
if other is None:
|
|
@@ -4074,8 +4090,13 @@ class RowsetDerivationStatement(Namespaced, BaseModel):
|
|
|
4074
4090
|
output: list[Concept] = []
|
|
4075
4091
|
orig: dict[str, Concept] = {}
|
|
4076
4092
|
for orig_concept in self.select.output_components:
|
|
4093
|
+
name = orig_concept.name
|
|
4094
|
+
if isinstance(orig_concept.lineage, FilterItem):
|
|
4095
|
+
if orig_concept.lineage.where == self.select.where_clause:
|
|
4096
|
+
name = orig_concept.lineage.content.name
|
|
4097
|
+
|
|
4077
4098
|
new_concept = Concept(
|
|
4078
|
-
name=
|
|
4099
|
+
name=name,
|
|
4079
4100
|
datatype=orig_concept.datatype,
|
|
4080
4101
|
purpose=orig_concept.purpose,
|
|
4081
4102
|
lineage=RowsetItem(
|
|
@@ -10,6 +10,7 @@ from trilogy.core.optimizations import (
|
|
|
10
10
|
OptimizationRule,
|
|
11
11
|
InlineConstant,
|
|
12
12
|
PredicatePushdown,
|
|
13
|
+
PredicatePushdownRemove,
|
|
13
14
|
InlineDatasource,
|
|
14
15
|
)
|
|
15
16
|
|
|
@@ -34,6 +35,31 @@ MAX_OPTIMIZATION_LOOPS = 100
|
|
|
34
35
|
# return parent
|
|
35
36
|
|
|
36
37
|
|
|
38
|
+
def reorder_ctes(
|
|
39
|
+
input: list[CTE],
|
|
40
|
+
):
|
|
41
|
+
import networkx as nx
|
|
42
|
+
|
|
43
|
+
# Create a directed graph
|
|
44
|
+
G = nx.DiGraph()
|
|
45
|
+
mapping: dict[str, CTE] = {}
|
|
46
|
+
for cte in input:
|
|
47
|
+
mapping[cte.name] = cte
|
|
48
|
+
for parent in cte.parent_ctes:
|
|
49
|
+
G.add_edge(parent.name, cte.name)
|
|
50
|
+
# Perform topological sort (only works for DAGs)
|
|
51
|
+
try:
|
|
52
|
+
topological_order = list(nx.topological_sort(G))
|
|
53
|
+
if not topological_order:
|
|
54
|
+
return input
|
|
55
|
+
return [mapping[x] for x in topological_order]
|
|
56
|
+
except nx.NetworkXUnfeasible as e:
|
|
57
|
+
print(
|
|
58
|
+
"The graph is not a DAG (contains cycles) and cannot be topologically sorted."
|
|
59
|
+
)
|
|
60
|
+
raise e
|
|
61
|
+
|
|
62
|
+
|
|
37
63
|
def filter_irrelevant_ctes(
|
|
38
64
|
input: list[CTE],
|
|
39
65
|
root_cte: CTE,
|
|
@@ -169,20 +195,22 @@ def optimize_ctes(
|
|
|
169
195
|
REGISTERED_RULES.append(InlineDatasource())
|
|
170
196
|
if CONFIG.optimizations.predicate_pushdown:
|
|
171
197
|
REGISTERED_RULES.append(PredicatePushdown())
|
|
172
|
-
|
|
198
|
+
if CONFIG.optimizations.predicate_pushdown:
|
|
199
|
+
REGISTERED_RULES.append(PredicatePushdownRemove())
|
|
173
200
|
for rule in REGISTERED_RULES:
|
|
174
201
|
loops = 0
|
|
175
202
|
complete = False
|
|
176
203
|
while not complete and (loops <= MAX_OPTIMIZATION_LOOPS):
|
|
177
204
|
actions_taken = False
|
|
178
205
|
# assume we go through all CTEs once
|
|
179
|
-
look_at = [root_cte, *input]
|
|
206
|
+
look_at = [root_cte, *reversed(input)]
|
|
180
207
|
inverse_map = gen_inverse_map(look_at)
|
|
181
208
|
for cte in look_at:
|
|
182
209
|
opt = rule.optimize(cte, inverse_map)
|
|
183
210
|
actions_taken = actions_taken or opt
|
|
184
211
|
complete = not actions_taken
|
|
185
212
|
loops += 1
|
|
213
|
+
input = reorder_ctes(filter_irrelevant_ctes(input, root_cte))
|
|
186
214
|
logger.info(f"finished checking for {type(rule).__name__} in {loops} loops")
|
|
187
215
|
|
|
188
|
-
return filter_irrelevant_ctes(input, root_cte)
|
|
216
|
+
return reorder_ctes(filter_irrelevant_ctes(input, root_cte))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .inline_constant import InlineConstant
|
|
2
2
|
from .inline_datasource import InlineDatasource
|
|
3
|
-
from .predicate_pushdown import PredicatePushdown
|
|
3
|
+
from .predicate_pushdown import PredicatePushdown, PredicatePushdownRemove
|
|
4
4
|
from .base_optimization import OptimizationRule
|
|
5
5
|
|
|
6
6
|
__all__ = [
|
|
@@ -8,4 +8,5 @@ __all__ = [
|
|
|
8
8
|
"InlineConstant",
|
|
9
9
|
"InlineDatasource",
|
|
10
10
|
"PredicatePushdown",
|
|
11
|
+
"PredicatePushdownRemove",
|
|
11
12
|
]
|
|
@@ -114,48 +114,6 @@ class PredicatePushdown(OptimizationRule):
|
|
|
114
114
|
if not cte.condition:
|
|
115
115
|
self.debug(f"No CTE condition for {cte.name}")
|
|
116
116
|
return False
|
|
117
|
-
|
|
118
|
-
parent_filter_status = {
|
|
119
|
-
parent.name: is_child_of(cte.condition, parent.condition)
|
|
120
|
-
for parent in cte.parent_ctes
|
|
121
|
-
}
|
|
122
|
-
# flatten existnce argument tuples to a list
|
|
123
|
-
|
|
124
|
-
flattened_existence = [
|
|
125
|
-
x.address for y in cte.condition.existence_arguments for x in y
|
|
126
|
-
]
|
|
127
|
-
|
|
128
|
-
existence_only = [
|
|
129
|
-
parent.name
|
|
130
|
-
for parent in cte.parent_ctes
|
|
131
|
-
if all([x.address in flattened_existence for x in parent.output_columns])
|
|
132
|
-
and len(flattened_existence) > 0
|
|
133
|
-
]
|
|
134
|
-
if all(
|
|
135
|
-
[
|
|
136
|
-
value
|
|
137
|
-
for key, value in parent_filter_status.items()
|
|
138
|
-
if key not in existence_only
|
|
139
|
-
]
|
|
140
|
-
) and not any([isinstance(x, Datasource) for x in cte.source.datasources]):
|
|
141
|
-
self.log(
|
|
142
|
-
f"All parents of {cte.name} have same filter or are existence only inputs, removing filter from {cte.name}"
|
|
143
|
-
)
|
|
144
|
-
cte.condition = None
|
|
145
|
-
# remove any "parent" CTEs that provided only existence inputs
|
|
146
|
-
if existence_only:
|
|
147
|
-
original = [y.name for y in cte.parent_ctes]
|
|
148
|
-
cte.parent_ctes = [
|
|
149
|
-
x for x in cte.parent_ctes if x.name not in existence_only
|
|
150
|
-
]
|
|
151
|
-
self.log(
|
|
152
|
-
f"new parents for {cte.name} are {[x.name for x in cte.parent_ctes]}, vs {original}"
|
|
153
|
-
)
|
|
154
|
-
return True
|
|
155
|
-
else:
|
|
156
|
-
self.log(
|
|
157
|
-
f"Could not remove filter from {cte.name}, as not all parents have the same filter: {parent_filter_status}"
|
|
158
|
-
)
|
|
159
117
|
if self.complete.get(cte.name):
|
|
160
118
|
self.debug("Have done this CTE before")
|
|
161
119
|
return False
|
|
@@ -197,3 +155,63 @@ class PredicatePushdown(OptimizationRule):
|
|
|
197
155
|
|
|
198
156
|
self.complete[cte.name] = True
|
|
199
157
|
return optimized
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class PredicatePushdownRemove(OptimizationRule):
|
|
161
|
+
|
|
162
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
163
|
+
super().__init__(*args, **kwargs)
|
|
164
|
+
self.complete: dict[str, bool] = {}
|
|
165
|
+
|
|
166
|
+
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
167
|
+
optimized = False
|
|
168
|
+
|
|
169
|
+
if not cte.parent_ctes:
|
|
170
|
+
self.debug(f"No parent CTEs for {cte.name}")
|
|
171
|
+
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
if not cte.condition:
|
|
175
|
+
self.debug(f"No CTE condition for {cte.name}")
|
|
176
|
+
return False
|
|
177
|
+
|
|
178
|
+
parent_filter_status = {
|
|
179
|
+
parent.name: is_child_of(cte.condition, parent.condition)
|
|
180
|
+
for parent in cte.parent_ctes
|
|
181
|
+
}
|
|
182
|
+
# flatten existnce argument tuples to a list
|
|
183
|
+
|
|
184
|
+
flattened_existence = [
|
|
185
|
+
x.address for y in cte.condition.existence_arguments for x in y
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
existence_only = [
|
|
189
|
+
parent.name
|
|
190
|
+
for parent in cte.parent_ctes
|
|
191
|
+
if all([x.address in flattened_existence for x in parent.output_columns])
|
|
192
|
+
and len(flattened_existence) > 0
|
|
193
|
+
]
|
|
194
|
+
if all(
|
|
195
|
+
[
|
|
196
|
+
value
|
|
197
|
+
for key, value in parent_filter_status.items()
|
|
198
|
+
if key not in existence_only
|
|
199
|
+
]
|
|
200
|
+
) and not any([isinstance(x, Datasource) for x in cte.source.datasources]):
|
|
201
|
+
self.log(
|
|
202
|
+
f"All parents of {cte.name} have same filter or are existence only inputs, removing filter from {cte.name}"
|
|
203
|
+
)
|
|
204
|
+
cte.condition = None
|
|
205
|
+
# remove any "parent" CTEs that provided only existence inputs
|
|
206
|
+
if existence_only:
|
|
207
|
+
original = [y.name for y in cte.parent_ctes]
|
|
208
|
+
cte.parent_ctes = [
|
|
209
|
+
x for x in cte.parent_ctes if x.name not in existence_only
|
|
210
|
+
]
|
|
211
|
+
self.log(
|
|
212
|
+
f"new parents for {cte.name} are {[x.name for x in cte.parent_ctes]}, vs {original}"
|
|
213
|
+
)
|
|
214
|
+
return True
|
|
215
|
+
|
|
216
|
+
self.complete[cte.name] = True
|
|
217
|
+
return optimized
|
|
@@ -180,7 +180,10 @@ def generate_candidates_restrictive(
|
|
|
180
180
|
local_candidates = [
|
|
181
181
|
x
|
|
182
182
|
for x in list(candidates)
|
|
183
|
-
if x.address not in exhausted
|
|
183
|
+
if x.address not in exhausted
|
|
184
|
+
and x.granularity != Granularity.SINGLE_ROW
|
|
185
|
+
and x.address not in priority_concept.pseudonyms
|
|
186
|
+
and priority_concept.address not in x.pseudonyms
|
|
184
187
|
]
|
|
185
188
|
combos: list[list[Concept]] = []
|
|
186
189
|
grain_check = Grain(components=[*local_candidates]).components_copy
|
|
@@ -191,6 +194,7 @@ def generate_candidates_restrictive(
|
|
|
191
194
|
):
|
|
192
195
|
combos.append(local_candidates)
|
|
193
196
|
combos.append(grain_check)
|
|
197
|
+
# combos.append(local_candidates)
|
|
194
198
|
# append the empty set for sourcing concept by itself last
|
|
195
199
|
combos.append([])
|
|
196
200
|
return combos
|
|
@@ -607,7 +611,7 @@ def _search_concepts(
|
|
|
607
611
|
if len(stack) == 1:
|
|
608
612
|
output = stack[0]
|
|
609
613
|
logger.info(
|
|
610
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)} with output {[x.address for x in output.output_concepts]}"
|
|
614
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Source stack has single node, returning that {type(output)} with output {[x.address for x in output.output_concepts]} and {output.resolve().source_map}"
|
|
611
615
|
)
|
|
612
616
|
return output
|
|
613
617
|
|
|
@@ -645,6 +649,7 @@ def _search_concepts(
|
|
|
645
649
|
depth=depth,
|
|
646
650
|
source_concepts=search_concepts,
|
|
647
651
|
history=history,
|
|
652
|
+
search_conditions=conditions,
|
|
648
653
|
)
|
|
649
654
|
|
|
650
655
|
if expanded:
|
|
@@ -656,8 +661,7 @@ def _search_concepts(
|
|
|
656
661
|
if x.address not in [y.address for y in mandatory_list]
|
|
657
662
|
and x not in ex_resolve.grain.components
|
|
658
663
|
]
|
|
659
|
-
expanded.
|
|
660
|
-
expanded.rebuild_cache()
|
|
664
|
+
expanded.set_output_concepts(mandatory_list)
|
|
661
665
|
|
|
662
666
|
logger.info(
|
|
663
667
|
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in mandatory_list]} via concept addition; removing extra {[c.address for c in extra]}"
|
{pytrilogy-0.0.2.10 → pytrilogy-0.0.2.12}/trilogy/core/processing/node_generators/basic_node.py
RENAMED
|
@@ -10,6 +10,7 @@ from trilogy.core.processing.node_generators.common import (
|
|
|
10
10
|
)
|
|
11
11
|
from trilogy.utility import unique
|
|
12
12
|
from trilogy.constants import logger
|
|
13
|
+
from itertools import combinations
|
|
13
14
|
|
|
14
15
|
LOGGER_PREFIX = "[GEN_BASIC_NODE]"
|
|
15
16
|
|
|
@@ -31,12 +32,17 @@ def gen_basic_node(
|
|
|
31
32
|
)
|
|
32
33
|
|
|
33
34
|
local_optional_redundant = [x for x in local_optional if x in parent_concepts]
|
|
34
|
-
attempts
|
|
35
|
-
|
|
35
|
+
attempts: List[tuple[list[Concept], list[Concept]]] = [
|
|
36
|
+
(parent_concepts, [concept] + local_optional_redundant)
|
|
37
|
+
]
|
|
38
|
+
equivalent_optional = [x for x in local_optional if x.lineage == concept.lineage]
|
|
39
|
+
non_equivalent_optional = [
|
|
40
|
+
x for x in local_optional if x not in equivalent_optional
|
|
41
|
+
]
|
|
36
42
|
|
|
37
43
|
if local_optional:
|
|
38
|
-
for combo in range(1, len(
|
|
39
|
-
combos = combinations(
|
|
44
|
+
for combo in range(1, len(non_equivalent_optional) + 1):
|
|
45
|
+
combos = combinations(non_equivalent_optional, combo)
|
|
40
46
|
for optional_set in combos:
|
|
41
47
|
attempts.append(
|
|
42
48
|
(
|
|
@@ -64,13 +70,10 @@ def gen_basic_node(
|
|
|
64
70
|
continue
|
|
65
71
|
if all(x in source.partial_concepts for source in sources):
|
|
66
72
|
partials.append(x)
|
|
67
|
-
outputs = parent_node.output_concepts + [concept]
|
|
68
|
-
logger.info(
|
|
69
|
-
f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in outputs]}"
|
|
70
|
-
)
|
|
71
|
-
# parents.resolve()
|
|
72
73
|
|
|
73
74
|
parent_node.add_output_concept(concept)
|
|
75
|
+
for x in equivalent_optional:
|
|
76
|
+
parent_node.add_output_concept(x)
|
|
74
77
|
|
|
75
78
|
parent_node.remove_output_concepts(
|
|
76
79
|
[
|
|
@@ -79,6 +82,9 @@ def gen_basic_node(
|
|
|
79
82
|
if x.address not in [y.address for y in basic_output]
|
|
80
83
|
]
|
|
81
84
|
)
|
|
85
|
+
logger.info(
|
|
86
|
+
f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in parent_node.output_concepts]}"
|
|
87
|
+
)
|
|
82
88
|
return parent_node
|
|
83
89
|
logger.info(
|
|
84
90
|
f"{depth_prefix}{LOGGER_PREFIX} No basic node could be generated for {concept}"
|