pytrilogy 0.0.1.110__py3-none-any.whl → 0.0.1.112__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/RECORD +33 -33
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -1
- trilogy/core/models.py +93 -67
- trilogy/core/optimization.py +33 -19
- trilogy/core/processing/concept_strategies_v3.py +44 -19
- trilogy/core/processing/node_generators/basic_node.py +2 -0
- trilogy/core/processing/node_generators/common.py +3 -1
- trilogy/core/processing/node_generators/concept_merge_node.py +24 -8
- trilogy/core/processing/node_generators/filter_node.py +36 -6
- trilogy/core/processing/node_generators/node_merge_node.py +34 -23
- trilogy/core/processing/node_generators/rowset_node.py +30 -6
- trilogy/core/processing/node_generators/select_node.py +23 -9
- trilogy/core/processing/node_generators/unnest_node.py +24 -3
- trilogy/core/processing/node_generators/window_node.py +4 -2
- trilogy/core/processing/nodes/__init__.py +7 -6
- trilogy/core/processing/nodes/base_node.py +40 -6
- trilogy/core/processing/nodes/filter_node.py +15 -1
- trilogy/core/processing/nodes/group_node.py +20 -1
- trilogy/core/processing/nodes/merge_node.py +36 -7
- trilogy/core/processing/nodes/select_node_v2.py +34 -39
- trilogy/core/processing/nodes/unnest_node.py +12 -0
- trilogy/core/processing/nodes/window_node.py +11 -0
- trilogy/core/processing/utility.py +0 -14
- trilogy/core/query_processor.py +125 -29
- trilogy/dialect/base.py +45 -40
- trilogy/executor.py +31 -3
- trilogy/parsing/parse_engine.py +65 -18
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.1.110.dist-info → pytrilogy-0.0.1.112.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
trilogy/__init__.py,sha256=DnY_8Giv8_IhdMGbFdiIMOk0nDhb3uLiFVA0tLyveas,292
|
|
2
2
|
trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
trilogy/constants.py,sha256=
|
|
3
|
+
trilogy/constants.py,sha256=DJi3ESttmvqgy6fPRXiaQzqJVye6jYwf6XM89NHv0_M,735
|
|
4
4
|
trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
|
|
5
|
-
trilogy/executor.py,sha256=
|
|
5
|
+
trilogy/executor.py,sha256=_ZbjrKsUdWL52tWgpxqZnmccAuPXcIPEPN_dDSLNeAQ,9696
|
|
6
6
|
trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
|
|
7
7
|
trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
|
|
@@ -16,36 +16,36 @@ trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,56
|
|
|
16
16
|
trilogy/core/functions.py,sha256=zkRReytiotOBAW-a3Ri5eoejZDYTt2-7Op80ZxZxUmw,9129
|
|
17
17
|
trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
|
|
18
18
|
trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
|
|
19
|
-
trilogy/core/models.py,sha256=
|
|
20
|
-
trilogy/core/optimization.py,sha256=
|
|
21
|
-
trilogy/core/query_processor.py,sha256=
|
|
19
|
+
trilogy/core/models.py,sha256=r8Wn-cvcSgkZ399TGqAYlJURqdiWi5zxzHMY2CUnbP4,110175
|
|
20
|
+
trilogy/core/optimization.py,sha256=942MnGRzscAHcG9LsfMslIRRQBslbIiPHnAvJ3w8YRg,9157
|
|
21
|
+
trilogy/core/query_processor.py,sha256=clIRJ6IcsqIVBPKFsxt8bqCLsLyajvAu02MUIcKQhTo,15713
|
|
22
22
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
-
trilogy/core/processing/concept_strategies_v3.py,sha256=
|
|
23
|
+
trilogy/core/processing/concept_strategies_v3.py,sha256=MYrpNMidqvPOg123RekOcqVTjcj03i_538gBo0MzoWE,23432
|
|
24
24
|
trilogy/core/processing/graph_utils.py,sha256=ulCJ4hYAISbUxLD6VM2fah9RBPGIXSEHEPeRBSFl0Rs,1197
|
|
25
|
-
trilogy/core/processing/utility.py,sha256=
|
|
25
|
+
trilogy/core/processing/utility.py,sha256=acxH5448-j8JXqxMRibyAxjz1Wqu7QudbR0PfMuucww,9902
|
|
26
26
|
trilogy/core/processing/node_generators/__init__.py,sha256=LIs6uBEum8LDc-26zjyAwjxa-ay2ok9tKtPjDNvbVkE,757
|
|
27
|
-
trilogy/core/processing/node_generators/basic_node.py,sha256=
|
|
28
|
-
trilogy/core/processing/node_generators/common.py,sha256=
|
|
29
|
-
trilogy/core/processing/node_generators/concept_merge_node.py,sha256=
|
|
30
|
-
trilogy/core/processing/node_generators/filter_node.py,sha256=
|
|
27
|
+
trilogy/core/processing/node_generators/basic_node.py,sha256=HJnIhZLgkUdorKYcofe-QnKSM3Lf_3QO91cbSJhsqf4,2242
|
|
28
|
+
trilogy/core/processing/node_generators/common.py,sha256=liZDth7mvhkF_sUFXK7JitJsiaKD132w3ySLbF7l-nE,8956
|
|
29
|
+
trilogy/core/processing/node_generators/concept_merge_node.py,sha256=x4M8VVZZmBcqHDY1uq7M9KGKCBwjU6mcE_x2BOEk2Mg,7328
|
|
30
|
+
trilogy/core/processing/node_generators/filter_node.py,sha256=y_tqYe2So18vWHASMwVPLzDO-PnyQCO-MAlI4B-rY3Y,4526
|
|
31
31
|
trilogy/core/processing/node_generators/group_node.py,sha256=xWI1xNIXEOj6jlRGD9hcv2_vVNvY6lpzJl6pQ8HuFBE,2988
|
|
32
32
|
trilogy/core/processing/node_generators/group_to_node.py,sha256=BzPdYwzoo8gRMH7BDffTTXq4z-mjfCEzvfB5I-P0_nw,2941
|
|
33
33
|
trilogy/core/processing/node_generators/multiselect_node.py,sha256=vP84dnLQy6dtypi6mUbt9sMAcmmrTgQ1Oz4GI6X1IEo,6421
|
|
34
|
-
trilogy/core/processing/node_generators/node_merge_node.py,sha256=
|
|
35
|
-
trilogy/core/processing/node_generators/rowset_node.py,sha256=
|
|
36
|
-
trilogy/core/processing/node_generators/select_node.py,sha256=
|
|
37
|
-
trilogy/core/processing/node_generators/unnest_node.py,sha256=
|
|
38
|
-
trilogy/core/processing/node_generators/window_node.py,sha256=
|
|
39
|
-
trilogy/core/processing/nodes/__init__.py,sha256=
|
|
40
|
-
trilogy/core/processing/nodes/base_node.py,sha256=
|
|
41
|
-
trilogy/core/processing/nodes/filter_node.py,sha256=
|
|
42
|
-
trilogy/core/processing/nodes/group_node.py,sha256=
|
|
43
|
-
trilogy/core/processing/nodes/merge_node.py,sha256=
|
|
44
|
-
trilogy/core/processing/nodes/select_node_v2.py,sha256=
|
|
45
|
-
trilogy/core/processing/nodes/unnest_node.py,sha256=
|
|
46
|
-
trilogy/core/processing/nodes/window_node.py,sha256=
|
|
34
|
+
trilogy/core/processing/node_generators/node_merge_node.py,sha256=wNDHAbRrKSjsns-EROM_G12mRyOMjbcWpYav2uefXOE,6045
|
|
35
|
+
trilogy/core/processing/node_generators/rowset_node.py,sha256=eNG6rfLifUKraoRGxE8pesQMy5cKT6R5XNIaa3Wuiwk,6081
|
|
36
|
+
trilogy/core/processing/node_generators/select_node.py,sha256=Qb00Kizsv-877UMkGfusl5jXKXMZtZTtLks5pxU07SU,20698
|
|
37
|
+
trilogy/core/processing/node_generators/unnest_node.py,sha256=6CH66eGwpadNX7TzUhWZ8aqIisOtQeHINbLV6X3QBUk,1779
|
|
38
|
+
trilogy/core/processing/node_generators/window_node.py,sha256=9nXUXUgQrNczU1gaOqhOZPNzCUxw-lkxt0R7HORI6ss,2582
|
|
39
|
+
trilogy/core/processing/nodes/__init__.py,sha256=baODkJfvUoWEEbu843GEd7snubwLeOG5FQ8l-CwIaC8,3928
|
|
40
|
+
trilogy/core/processing/nodes/base_node.py,sha256=yhjmsAUmhHDqgbQjz_9YdfP-M5pj4xbrPRDF6Y4XVuw,10498
|
|
41
|
+
trilogy/core/processing/nodes/filter_node.py,sha256=rDw4vfE6tqWxuKT0arihVmIOoOWDDCyzRA-2yONX_Ek,1860
|
|
42
|
+
trilogy/core/processing/nodes/group_node.py,sha256=vzeU9J4xMhRrPj4-KPJTgNbH-KFu2ZS8b57SOynsdw0,4448
|
|
43
|
+
trilogy/core/processing/nodes/merge_node.py,sha256=FvSiTWKOzaUsXBkf6wJD8QQqQxp_aphS_I5VzNRw8Yo,13600
|
|
44
|
+
trilogy/core/processing/nodes/select_node_v2.py,sha256=ERCflBFzKpD5SzweMevnJLyQnxmF_-IQ6VRu5yVeiBg,6552
|
|
45
|
+
trilogy/core/processing/nodes/unnest_node.py,sha256=JFtm90IVM-46aCYkTNIaJah6v9ApAfonjVhcVM1HmDE,1903
|
|
46
|
+
trilogy/core/processing/nodes/window_node.py,sha256=X7qxLUKd3tekjUUsmH_4vz5b-U89gMnGd04VBxuu2Ns,1280
|
|
47
47
|
trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
-
trilogy/dialect/base.py,sha256=
|
|
48
|
+
trilogy/dialect/base.py,sha256=ii9P_OO8BhKsQVAr9A13rhx_dzRZd4wxnkL-Ul5OS74,30398
|
|
49
49
|
trilogy/dialect/bigquery.py,sha256=9vxQn2BMv_oTGQSWQpoN5ho_OgqMWaHH9e-5vQVf44c,2906
|
|
50
50
|
trilogy/dialect/common.py,sha256=zWrYmvevlXznocw9uGHmY5Ws1rp_kICm9zA_ulTe4eg,2165
|
|
51
51
|
trilogy/dialect/config.py,sha256=tLVEMctaTDhUgARKXUNfHUcIolGaALkQ0RavUvXAY4w,2994
|
|
@@ -65,13 +65,13 @@ trilogy/parsing/common.py,sha256=lz0IyVA8v-u-DGFgzkmdb4_00I--Kegmo9HNF7CrajI,579
|
|
|
65
65
|
trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
|
|
66
66
|
trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
|
|
67
67
|
trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
68
|
-
trilogy/parsing/parse_engine.py,sha256=
|
|
68
|
+
trilogy/parsing/parse_engine.py,sha256=LdxdYQpv-9pBNXXs1QdnD08fflnQ82tG6H6mVgpVeIY,65532
|
|
69
69
|
trilogy/parsing/render.py,sha256=fxjpq2FZLgllw_d4cru-t_IXNPAz2DmYkT7v9ED0XRI,11540
|
|
70
70
|
trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
71
|
trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
|
|
72
|
-
pytrilogy-0.0.1.
|
|
73
|
-
pytrilogy-0.0.1.
|
|
74
|
-
pytrilogy-0.0.1.
|
|
75
|
-
pytrilogy-0.0.1.
|
|
76
|
-
pytrilogy-0.0.1.
|
|
77
|
-
pytrilogy-0.0.1.
|
|
72
|
+
pytrilogy-0.0.1.112.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
73
|
+
pytrilogy-0.0.1.112.dist-info/METADATA,sha256=jhGh-RxeT78DDSvj9YY4K_ww6-C00dhYHyD5NMs1EFY,7882
|
|
74
|
+
pytrilogy-0.0.1.112.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
|
|
75
|
+
pytrilogy-0.0.1.112.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
|
|
76
|
+
pytrilogy-0.0.1.112.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
77
|
+
pytrilogy-0.0.1.112.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
trilogy/constants.py
CHANGED
trilogy/core/models.py
CHANGED
|
@@ -33,7 +33,13 @@ from pydantic import (
|
|
|
33
33
|
)
|
|
34
34
|
from lark.tree import Meta
|
|
35
35
|
from pathlib import Path
|
|
36
|
-
from trilogy.constants import
|
|
36
|
+
from trilogy.constants import (
|
|
37
|
+
logger,
|
|
38
|
+
DEFAULT_NAMESPACE,
|
|
39
|
+
ENV_CACHE_NAME,
|
|
40
|
+
MagicConstants,
|
|
41
|
+
CONFIG,
|
|
42
|
+
)
|
|
37
43
|
from trilogy.core.constants import (
|
|
38
44
|
ALL_ROWS_CONCEPT,
|
|
39
45
|
INTERNAL_NAMESPACE,
|
|
@@ -61,7 +67,6 @@ from trilogy.core.enums import (
|
|
|
61
67
|
from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
|
|
62
68
|
from trilogy.utility import unique
|
|
63
69
|
from collections import UserList
|
|
64
|
-
from trilogy.utility import string_to_hash
|
|
65
70
|
from functools import cached_property
|
|
66
71
|
from abc import ABC
|
|
67
72
|
|
|
@@ -129,7 +134,7 @@ class ConceptArgs(ABC):
|
|
|
129
134
|
raise NotImplementedError
|
|
130
135
|
|
|
131
136
|
@property
|
|
132
|
-
def existence_arguments(self) ->
|
|
137
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
133
138
|
return []
|
|
134
139
|
|
|
135
140
|
@property
|
|
@@ -281,9 +286,6 @@ class Concept(Namespaced, SelectGrain, BaseModel):
|
|
|
281
286
|
MultiSelectStatement | MergeStatement,
|
|
282
287
|
]
|
|
283
288
|
] = None
|
|
284
|
-
# lineage: Annotated[Optional[
|
|
285
|
-
# Union[Function, WindowItem, FilterItem, AggregateWrapper]
|
|
286
|
-
# ], WrapValidator(lineage_validator)] = None
|
|
287
289
|
namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
|
|
288
290
|
keys: Optional[Tuple["Concept", ...]] = None
|
|
289
291
|
grain: "Grain" = Field(default=None, validate_default=True)
|
|
@@ -621,6 +623,12 @@ class Grain(BaseModel):
|
|
|
621
623
|
if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
|
|
622
624
|
if all([c in v2 for c in sub.keys]):
|
|
623
625
|
continue
|
|
626
|
+
elif sub.derivation == PurposeLineage.MERGE and isinstance(
|
|
627
|
+
sub.lineage, MergeStatement
|
|
628
|
+
):
|
|
629
|
+
parents = sub.lineage.concepts
|
|
630
|
+
if any([p in v2 for p in parents]):
|
|
631
|
+
continue
|
|
624
632
|
final.append(sub)
|
|
625
633
|
v2 = sorted(final, key=lambda x: x.name)
|
|
626
634
|
return v2
|
|
@@ -966,23 +974,6 @@ class ConceptTransform(Namespaced, BaseModel):
|
|
|
966
974
|
modifiers=self.modifiers,
|
|
967
975
|
)
|
|
968
976
|
|
|
969
|
-
def with_filter(self, where: "WhereClause") -> "ConceptTransform":
|
|
970
|
-
id_hash = string_to_hash(str(where))
|
|
971
|
-
new_parent_concept = Concept(
|
|
972
|
-
name=f"_anon_concept_transform_filter_input_{id_hash}",
|
|
973
|
-
datatype=self.output.datatype,
|
|
974
|
-
purpose=self.output.purpose,
|
|
975
|
-
lineage=self.output.lineage,
|
|
976
|
-
namespace=DEFAULT_NAMESPACE,
|
|
977
|
-
grain=self.output.grain,
|
|
978
|
-
keys=self.output.keys,
|
|
979
|
-
)
|
|
980
|
-
new_parent = FilterItem(content=new_parent_concept, where=where)
|
|
981
|
-
self.output.lineage = new_parent
|
|
982
|
-
return ConceptTransform(
|
|
983
|
-
function=new_parent, output=self.output, modifiers=self.modifiers
|
|
984
|
-
)
|
|
985
|
-
|
|
986
977
|
|
|
987
978
|
class Window(BaseModel):
|
|
988
979
|
count: int
|
|
@@ -1611,13 +1602,15 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1611
1602
|
def __add__(self, other):
|
|
1612
1603
|
if not other == self:
|
|
1613
1604
|
raise ValueError(
|
|
1614
|
-
"Attempted to add two datasources that are not identical, this
|
|
1615
|
-
" never happen"
|
|
1605
|
+
"Attempted to add two datasources that are not identical, this is not a valid operation"
|
|
1616
1606
|
)
|
|
1617
1607
|
return self
|
|
1618
1608
|
|
|
1609
|
+
def __repr__(self):
|
|
1610
|
+
return f"Datasource<{self.namespace}.{self.identifier}@<{self.grain}>"
|
|
1611
|
+
|
|
1619
1612
|
def __str__(self):
|
|
1620
|
-
return
|
|
1613
|
+
return self.__repr__()
|
|
1621
1614
|
|
|
1622
1615
|
def __hash__(self):
|
|
1623
1616
|
return (self.namespace + self.identifier).__hash__()
|
|
@@ -1786,6 +1779,7 @@ class QueryDatasource(BaseModel):
|
|
|
1786
1779
|
input_concepts: List[Concept]
|
|
1787
1780
|
output_concepts: List[Concept]
|
|
1788
1781
|
source_map: Dict[str, Set[Union[Datasource, "QueryDatasource", "UnnestJoin"]]]
|
|
1782
|
+
|
|
1789
1783
|
datasources: List[Union[Datasource, "QueryDatasource"]]
|
|
1790
1784
|
grain: Grain
|
|
1791
1785
|
joins: List[BaseJoin | UnnestJoin]
|
|
@@ -1799,6 +1793,12 @@ class QueryDatasource(BaseModel):
|
|
|
1799
1793
|
join_derived_concepts: List[Concept] = Field(default_factory=list)
|
|
1800
1794
|
hidden_concepts: List[Concept] = Field(default_factory=list)
|
|
1801
1795
|
force_group: bool | None = None
|
|
1796
|
+
existence_source_map: Dict[str, Set[Union[Datasource, "QueryDatasource"]]] = Field(
|
|
1797
|
+
default_factory=dict
|
|
1798
|
+
)
|
|
1799
|
+
|
|
1800
|
+
def __repr__(self):
|
|
1801
|
+
return f"{self.identifier}@<{self.grain}>"
|
|
1802
1802
|
|
|
1803
1803
|
@property
|
|
1804
1804
|
def non_partial_concept_addresses(self) -> List[str]:
|
|
@@ -1841,14 +1841,14 @@ class QueryDatasource(BaseModel):
|
|
|
1841
1841
|
for k, _ in v.items():
|
|
1842
1842
|
seen.add(k)
|
|
1843
1843
|
for x in expected:
|
|
1844
|
-
if x not in seen:
|
|
1844
|
+
if x not in seen and CONFIG.validate_missing:
|
|
1845
1845
|
raise SyntaxError(
|
|
1846
1846
|
f"source map missing {x} on (expected {expected}, have {seen})"
|
|
1847
1847
|
)
|
|
1848
1848
|
return v
|
|
1849
1849
|
|
|
1850
1850
|
def __str__(self):
|
|
1851
|
-
return
|
|
1851
|
+
return self.__repr__()
|
|
1852
1852
|
|
|
1853
1853
|
def __hash__(self):
|
|
1854
1854
|
return (self.identifier).__hash__()
|
|
@@ -2010,10 +2010,11 @@ class CTE(BaseModel):
|
|
|
2010
2010
|
name: str
|
|
2011
2011
|
source: "QueryDatasource"
|
|
2012
2012
|
output_columns: List[Concept]
|
|
2013
|
-
source_map: Dict[str,
|
|
2013
|
+
source_map: Dict[str, list[str]]
|
|
2014
2014
|
grain: Grain
|
|
2015
2015
|
base: bool = False
|
|
2016
2016
|
group_to_grain: bool = False
|
|
2017
|
+
existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
|
|
2017
2018
|
parent_ctes: List["CTE"] = Field(default_factory=list)
|
|
2018
2019
|
joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
|
|
2019
2020
|
condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
|
|
@@ -2024,6 +2025,7 @@ class CTE(BaseModel):
|
|
|
2024
2025
|
limit: Optional[int] = None
|
|
2025
2026
|
requires_nesting: bool = True
|
|
2026
2027
|
base_name_override: Optional[str] = None
|
|
2028
|
+
base_alias_override: Optional[str] = None
|
|
2027
2029
|
|
|
2028
2030
|
@computed_field # type: ignore
|
|
2029
2031
|
@property
|
|
@@ -2034,11 +2036,19 @@ class CTE(BaseModel):
|
|
|
2034
2036
|
def validate_output_columns(cls, v):
|
|
2035
2037
|
return unique(v, "address")
|
|
2036
2038
|
|
|
2037
|
-
def inline_parent_datasource(self, parent: CTE) -> bool:
|
|
2039
|
+
def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
|
|
2038
2040
|
qds_being_inlined = parent.source
|
|
2039
2041
|
ds_being_inlined = qds_being_inlined.datasources[0]
|
|
2040
2042
|
if not isinstance(ds_being_inlined, Datasource):
|
|
2041
2043
|
return False
|
|
2044
|
+
if any(
|
|
2045
|
+
[
|
|
2046
|
+
x.identifier == ds_being_inlined.identifier
|
|
2047
|
+
for x in self.source.datasources
|
|
2048
|
+
]
|
|
2049
|
+
):
|
|
2050
|
+
return False
|
|
2051
|
+
|
|
2042
2052
|
self.source.datasources = [
|
|
2043
2053
|
ds_being_inlined,
|
|
2044
2054
|
*[
|
|
@@ -2050,6 +2060,7 @@ class CTE(BaseModel):
|
|
|
2050
2060
|
# need to identify this before updating joins
|
|
2051
2061
|
if self.base_name == parent.name:
|
|
2052
2062
|
self.base_name_override = ds_being_inlined.safe_location
|
|
2063
|
+
self.base_alias_override = ds_being_inlined.identifier
|
|
2053
2064
|
|
|
2054
2065
|
for join in self.joins:
|
|
2055
2066
|
if isinstance(join, InstantiatedUnnestJoin):
|
|
@@ -2066,6 +2077,8 @@ class CTE(BaseModel):
|
|
|
2066
2077
|
elif v == parent.name:
|
|
2067
2078
|
self.source_map[k] = ds_being_inlined.name
|
|
2068
2079
|
self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
|
|
2080
|
+
if force_group:
|
|
2081
|
+
self.group_to_grain = True
|
|
2069
2082
|
return True
|
|
2070
2083
|
|
|
2071
2084
|
def __add__(self, other: "CTE"):
|
|
@@ -2126,9 +2139,6 @@ class CTE(BaseModel):
|
|
|
2126
2139
|
if self.base_name_override:
|
|
2127
2140
|
return self.base_name_override
|
|
2128
2141
|
# if this cte selects from a single datasource, select right from it
|
|
2129
|
-
valid_joins: List[Join] = [
|
|
2130
|
-
join for join in self.joins if isinstance(join, Join)
|
|
2131
|
-
]
|
|
2132
2142
|
if self.is_root_datasource:
|
|
2133
2143
|
return self.source.datasources[0].safe_location
|
|
2134
2144
|
|
|
@@ -2136,33 +2146,16 @@ class CTE(BaseModel):
|
|
|
2136
2146
|
# as the root
|
|
2137
2147
|
elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
|
|
2138
2148
|
return self.parent_ctes[0].name
|
|
2139
|
-
elif valid_joins and len(valid_joins) > 0:
|
|
2140
|
-
candidates = [x.left_cte.name for x in valid_joins]
|
|
2141
|
-
disallowed = [x.right_cte.name for x in valid_joins]
|
|
2142
|
-
try:
|
|
2143
|
-
return [y for y in candidates if y not in disallowed][0]
|
|
2144
|
-
except IndexError:
|
|
2145
|
-
raise SyntaxError(
|
|
2146
|
-
f"Invalid join configuration {candidates} {disallowed} with all parents {[x.base_name for x in self.parent_ctes]}"
|
|
2147
|
-
)
|
|
2148
2149
|
elif self.relevant_base_ctes:
|
|
2149
2150
|
return self.relevant_base_ctes[0].name
|
|
2150
|
-
elif self.parent_ctes:
|
|
2151
|
-
raise SyntaxError(
|
|
2152
|
-
f"{self.name} has no relevant base CTEs, {self.source_map},"
|
|
2153
|
-
f" {[x.name for x in self.parent_ctes]}, outputs"
|
|
2154
|
-
f" {[x.address for x in self.output_columns]}"
|
|
2155
|
-
)
|
|
2156
2151
|
return self.source.name
|
|
2157
2152
|
|
|
2158
2153
|
@property
|
|
2159
2154
|
def base_alias(self) -> str:
|
|
2160
|
-
|
|
2155
|
+
if self.base_alias_override:
|
|
2156
|
+
return self.base_alias_override
|
|
2161
2157
|
if self.is_root_datasource:
|
|
2162
2158
|
return self.source.datasources[0].identifier
|
|
2163
|
-
relevant_joins = [j for j in self.joins if isinstance(j, Join)]
|
|
2164
|
-
if relevant_joins:
|
|
2165
|
-
return relevant_joins[0].left_cte.name
|
|
2166
2159
|
elif self.relevant_base_ctes:
|
|
2167
2160
|
return self.relevant_base_ctes[0].name
|
|
2168
2161
|
elif self.parent_ctes:
|
|
@@ -2492,9 +2485,17 @@ class Environment(BaseModel):
|
|
|
2492
2485
|
for datasource in self.datasources.values():
|
|
2493
2486
|
for concept in datasource.output_concepts:
|
|
2494
2487
|
concrete_addresses.add(concept.address)
|
|
2488
|
+
current_mat = [x.address for x in self.materialized_concepts]
|
|
2495
2489
|
self.materialized_concepts = [
|
|
2496
2490
|
c for c in self.concepts.values() if c.address in concrete_addresses
|
|
2497
2491
|
]
|
|
2492
|
+
new = [
|
|
2493
|
+
x.address
|
|
2494
|
+
for x in self.materialized_concepts
|
|
2495
|
+
if x.address not in current_mat
|
|
2496
|
+
]
|
|
2497
|
+
if new:
|
|
2498
|
+
logger.info(f"Environment added new materialized concepts {new}")
|
|
2498
2499
|
for concept in self.concepts.values():
|
|
2499
2500
|
if concept.derivation == PurposeLineage.MERGE:
|
|
2500
2501
|
ms = concept.lineage
|
|
@@ -2653,6 +2654,17 @@ class Environment(BaseModel):
|
|
|
2653
2654
|
self.gen_concept_list_caches()
|
|
2654
2655
|
return datasource
|
|
2655
2656
|
|
|
2657
|
+
def delete_datasource(
|
|
2658
|
+
self,
|
|
2659
|
+
address: str,
|
|
2660
|
+
meta: Meta | None = None,
|
|
2661
|
+
) -> bool:
|
|
2662
|
+
if address in self.datasources:
|
|
2663
|
+
del self.datasources[address]
|
|
2664
|
+
self.gen_concept_list_caches()
|
|
2665
|
+
return True
|
|
2666
|
+
return False
|
|
2667
|
+
|
|
2656
2668
|
|
|
2657
2669
|
class LazyEnvironment(Environment):
|
|
2658
2670
|
"""Variant of environment to defer parsing of a path"""
|
|
@@ -2759,11 +2771,8 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
2759
2771
|
if isinstance(self.left, SelectGrain)
|
|
2760
2772
|
else self.left
|
|
2761
2773
|
),
|
|
2762
|
-
right
|
|
2763
|
-
|
|
2764
|
-
if isinstance(self.right, SelectGrain)
|
|
2765
|
-
else self.right
|
|
2766
|
-
),
|
|
2774
|
+
# the right side does NOT need to inherit select grain
|
|
2775
|
+
right=self.right,
|
|
2767
2776
|
operator=self.operator,
|
|
2768
2777
|
)
|
|
2769
2778
|
|
|
@@ -2809,8 +2818,8 @@ class SubselectComparison(Comparison):
|
|
|
2809
2818
|
return get_concept_arguments(self.left)
|
|
2810
2819
|
|
|
2811
2820
|
@property
|
|
2812
|
-
def existence_arguments(self) ->
|
|
2813
|
-
return get_concept_arguments(self.right)
|
|
2821
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
2822
|
+
return [tuple(get_concept_arguments(self.right))]
|
|
2814
2823
|
|
|
2815
2824
|
def with_select_grain(self, grain: Grain):
|
|
2816
2825
|
# there's no need to pass the select grain through to a subselect comparison
|
|
@@ -3002,18 +3011,26 @@ class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
3002
3011
|
return output
|
|
3003
3012
|
|
|
3004
3013
|
@property
|
|
3005
|
-
def existence_arguments(self) ->
|
|
3014
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
3006
3015
|
output = []
|
|
3007
3016
|
if isinstance(self.left, ConceptArgs):
|
|
3008
3017
|
output += self.left.existence_arguments
|
|
3009
|
-
else:
|
|
3010
|
-
output += get_concept_arguments(self.left)
|
|
3011
3018
|
if isinstance(self.right, ConceptArgs):
|
|
3012
3019
|
output += self.right.existence_arguments
|
|
3013
|
-
else:
|
|
3014
|
-
output += get_concept_arguments(self.right)
|
|
3015
3020
|
return output
|
|
3016
3021
|
|
|
3022
|
+
def decompose(self):
|
|
3023
|
+
chunks = []
|
|
3024
|
+
if self.operator == BooleanOperator.AND:
|
|
3025
|
+
for val in [self.left, self.right]:
|
|
3026
|
+
if isinstance(val, Conditional):
|
|
3027
|
+
chunks.extend(val.decompose())
|
|
3028
|
+
else:
|
|
3029
|
+
chunks.append(val)
|
|
3030
|
+
else:
|
|
3031
|
+
chunks.append(self)
|
|
3032
|
+
return chunks
|
|
3033
|
+
|
|
3017
3034
|
|
|
3018
3035
|
class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
|
|
3019
3036
|
function: Function
|
|
@@ -3073,7 +3090,7 @@ class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
3073
3090
|
return self.conditional.row_arguments
|
|
3074
3091
|
|
|
3075
3092
|
@property
|
|
3076
|
-
def existence_arguments(self) ->
|
|
3093
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
3077
3094
|
return self.conditional.existence_arguments
|
|
3078
3095
|
|
|
3079
3096
|
def with_namespace(self, namespace: str) -> WhereClause:
|
|
@@ -3314,10 +3331,10 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
|
|
|
3314
3331
|
return self.concept_arguments
|
|
3315
3332
|
|
|
3316
3333
|
@property
|
|
3317
|
-
def existence_arguments(self) ->
|
|
3334
|
+
def existence_arguments(self) -> list[tuple["Concept", ...]]:
|
|
3318
3335
|
if isinstance(self.content, ConceptArgs):
|
|
3319
3336
|
return self.content.existence_arguments
|
|
3320
|
-
return
|
|
3337
|
+
return []
|
|
3321
3338
|
|
|
3322
3339
|
@property
|
|
3323
3340
|
def input(self):
|
|
@@ -3386,6 +3403,12 @@ Function.model_rebuild()
|
|
|
3386
3403
|
Grain.model_rebuild()
|
|
3387
3404
|
|
|
3388
3405
|
|
|
3406
|
+
def list_to_wrapper(args):
|
|
3407
|
+
types = [arg_to_datatype(arg) for arg in args]
|
|
3408
|
+
assert len(set(types)) == 1
|
|
3409
|
+
return ListWrapper(args, type=types[0])
|
|
3410
|
+
|
|
3411
|
+
|
|
3389
3412
|
def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
|
|
3390
3413
|
if isinstance(arg, Function):
|
|
3391
3414
|
return arg.output_datatype
|
|
@@ -3409,5 +3432,8 @@ def arg_to_datatype(arg) -> DataType | ListType | StructType | MapType:
|
|
|
3409
3432
|
if arg.type in (WindowType.RANK, WindowType.ROW_NUMBER):
|
|
3410
3433
|
return DataType.INTEGER
|
|
3411
3434
|
return arg_to_datatype(arg.content)
|
|
3435
|
+
elif isinstance(arg, list):
|
|
3436
|
+
wrapper = list_to_wrapper(arg)
|
|
3437
|
+
return ListType(type=wrapper.type)
|
|
3412
3438
|
else:
|
|
3413
3439
|
raise ValueError(f"Cannot parse arg datatype for arg of raw type {type(arg)}")
|
trilogy/core/optimization.py
CHANGED
|
@@ -12,9 +12,6 @@ from trilogy.constants import logger, CONFIG
|
|
|
12
12
|
from abc import ABC
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
REGISTERED_RULES: list["OptimizationRule"] = []
|
|
16
|
-
|
|
17
|
-
|
|
18
15
|
class OptimizationRule(ABC):
|
|
19
16
|
|
|
20
17
|
def optimize(self, cte: CTE, inverse_map: dict[str, list[CTE]]) -> bool:
|
|
@@ -38,6 +35,7 @@ class InlineDatasource(OptimizationRule):
|
|
|
38
35
|
f"Checking {cte.name} for consolidating inline tables with {len(cte.parent_ctes)} parents"
|
|
39
36
|
)
|
|
40
37
|
to_inline: list[CTE] = []
|
|
38
|
+
force_group = False
|
|
41
39
|
for parent_cte in cte.parent_ctes:
|
|
42
40
|
if not parent_cte.is_root_datasource:
|
|
43
41
|
self.log(f"parent {parent_cte.name} is not root")
|
|
@@ -55,20 +53,25 @@ class InlineDatasource(OptimizationRule):
|
|
|
55
53
|
continue
|
|
56
54
|
root_outputs = {x.address for x in root.output_concepts}
|
|
57
55
|
cte_outputs = {x.address for x in parent_cte.output_columns}
|
|
56
|
+
grain_components = {x.address for x in root.grain.components}
|
|
58
57
|
if not cte_outputs.issubset(root_outputs):
|
|
59
58
|
self.log(f"Not all {parent_cte.name} outputs are found on datasource")
|
|
60
59
|
continue
|
|
61
|
-
|
|
60
|
+
if not grain_components.issubset(cte_outputs):
|
|
61
|
+
self.log("Not all datasource components in cte outputs, forcing group")
|
|
62
|
+
force_group = True
|
|
62
63
|
to_inline.append(parent_cte)
|
|
63
64
|
|
|
64
65
|
for replaceable in to_inline:
|
|
65
|
-
self.log(f"Inlining parent {replaceable.name}")
|
|
66
|
-
cte.inline_parent_datasource(replaceable)
|
|
67
66
|
|
|
67
|
+
result = cte.inline_parent_datasource(replaceable, force_group=force_group)
|
|
68
|
+
if result:
|
|
69
|
+
self.log(f"Inlined parent {replaceable.name}")
|
|
70
|
+
else:
|
|
71
|
+
self.log(f"Failed to inline {replaceable.name}")
|
|
68
72
|
return optimized
|
|
69
73
|
|
|
70
74
|
|
|
71
|
-
# This will be used in the future for more complex condition decomposition
|
|
72
75
|
def decompose_condition(conditional: Conditional):
|
|
73
76
|
chunks = []
|
|
74
77
|
if conditional.operator == BooleanOperator.AND:
|
|
@@ -107,14 +110,14 @@ class PredicatePushdown(OptimizationRule):
|
|
|
107
110
|
f"Checking {cte.name} for predicate pushdown with {len(cte.parent_ctes)} parents"
|
|
108
111
|
)
|
|
109
112
|
if isinstance(cte.condition, Conditional):
|
|
110
|
-
candidates =
|
|
113
|
+
candidates = cte.condition.decompose()
|
|
111
114
|
else:
|
|
112
115
|
candidates = [cte.condition]
|
|
113
116
|
logger.info(f"Have {len(candidates)} candidates to try to push down")
|
|
114
117
|
for candidate in candidates:
|
|
115
118
|
conditions = {x.address for x in candidate.concept_arguments}
|
|
116
119
|
for parent_cte in cte.parent_ctes:
|
|
117
|
-
materialized = {k for k, v in parent_cte.source_map.items() if v !=
|
|
120
|
+
materialized = {k for k, v in parent_cte.source_map.items() if v != []}
|
|
118
121
|
if conditions.issubset(materialized):
|
|
119
122
|
if all(
|
|
120
123
|
[
|
|
@@ -150,12 +153,6 @@ class PredicatePushdown(OptimizationRule):
|
|
|
150
153
|
return optimized
|
|
151
154
|
|
|
152
155
|
|
|
153
|
-
if CONFIG.optimizations.datasource_inlining:
|
|
154
|
-
REGISTERED_RULES.append(InlineDatasource())
|
|
155
|
-
if CONFIG.optimizations.predicate_pushdown:
|
|
156
|
-
REGISTERED_RULES.append(PredicatePushdown())
|
|
157
|
-
|
|
158
|
-
|
|
159
156
|
def filter_irrelevant_ctes(
|
|
160
157
|
input: list[CTE],
|
|
161
158
|
root_cte: CTE,
|
|
@@ -200,6 +197,8 @@ def is_direct_return_eligible(
|
|
|
200
197
|
for x in derived_concepts:
|
|
201
198
|
if x.derivation == PurposeLineage.WINDOW:
|
|
202
199
|
return False
|
|
200
|
+
if x.derivation == PurposeLineage.UNNEST:
|
|
201
|
+
return False
|
|
203
202
|
if x.derivation == PurposeLineage.AGGREGATE:
|
|
204
203
|
if x.address in conditions:
|
|
205
204
|
return False
|
|
@@ -227,6 +226,12 @@ def optimize_ctes(
|
|
|
227
226
|
input: list[CTE], root_cte: CTE, select: SelectStatement | MultiSelectStatement
|
|
228
227
|
):
|
|
229
228
|
complete = False
|
|
229
|
+
REGISTERED_RULES: list["OptimizationRule"] = []
|
|
230
|
+
|
|
231
|
+
if CONFIG.optimizations.datasource_inlining:
|
|
232
|
+
REGISTERED_RULES.append(InlineDatasource())
|
|
233
|
+
if CONFIG.optimizations.predicate_pushdown:
|
|
234
|
+
REGISTERED_RULES.append(PredicatePushdown())
|
|
230
235
|
|
|
231
236
|
while not complete:
|
|
232
237
|
actions_taken = False
|
|
@@ -236,12 +241,21 @@ def optimize_ctes(
|
|
|
236
241
|
actions_taken = rule.optimize(cte, inverse_map)
|
|
237
242
|
complete = not actions_taken
|
|
238
243
|
|
|
239
|
-
if is_direct_return_eligible(
|
|
244
|
+
if CONFIG.optimizations.direct_return and is_direct_return_eligible(
|
|
245
|
+
root_cte, select
|
|
246
|
+
):
|
|
240
247
|
root_cte.order_by = select.order_by
|
|
241
248
|
root_cte.limit = select.limit
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
249
|
+
if select.where_clause:
|
|
250
|
+
|
|
251
|
+
if root_cte.condition:
|
|
252
|
+
root_cte.condition = Conditional(
|
|
253
|
+
left=root_cte.condition,
|
|
254
|
+
operator=BooleanOperator.AND,
|
|
255
|
+
right=select.where_clause.conditional,
|
|
256
|
+
)
|
|
257
|
+
else:
|
|
258
|
+
root_cte.condition = select.where_clause.conditional
|
|
245
259
|
root_cte.requires_nesting = False
|
|
246
260
|
sort_select_output(root_cte, select)
|
|
247
261
|
|