pytrilogy 0.0.2.6__py3-none-any.whl → 0.0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.6.dist-info → pytrilogy-0.0.2.7.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.6.dist-info → pytrilogy-0.0.2.7.dist-info}/RECORD +24 -24
- {pytrilogy-0.0.2.6.dist-info → pytrilogy-0.0.2.7.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -2
- trilogy/core/models.py +65 -19
- trilogy/core/optimizations/inline_datasource.py +11 -7
- trilogy/core/processing/concept_strategies_v3.py +12 -2
- trilogy/core/processing/node_generators/common.py +1 -0
- trilogy/core/processing/node_generators/filter_node.py +19 -0
- trilogy/core/processing/node_generators/group_node.py +1 -1
- trilogy/core/processing/node_generators/group_to_node.py +0 -1
- trilogy/core/processing/node_generators/node_merge_node.py +4 -0
- trilogy/core/processing/node_generators/rowset_node.py +3 -2
- trilogy/core/processing/nodes/base_node.py +1 -1
- trilogy/core/processing/nodes/filter_node.py +1 -0
- trilogy/core/processing/nodes/merge_node.py +28 -23
- trilogy/core/query_processor.py +24 -31
- trilogy/dialect/base.py +1 -0
- trilogy/dialect/duckdb.py +5 -3
- trilogy/parsing/common.py +6 -2
- {pytrilogy-0.0.2.6.dist-info → pytrilogy-0.0.2.7.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.6.dist-info → pytrilogy-0.0.2.7.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.6.dist-info → pytrilogy-0.0.2.7.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
trilogy/__init__.py,sha256=Tmie80hiyGWAbZBCPHnj73ajJrgy-wgEl2dB5EZ5_Is,290
|
|
2
2
|
trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
trilogy/constants.py,sha256=
|
|
3
|
+
trilogy/constants.py,sha256=3YKp-xvS7yB5z9t9YDcGofkrdZ6v13iePDnn4O5mwTE,886
|
|
4
4
|
trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
|
|
5
5
|
trilogy/executor.py,sha256=5cRbU4Rj7p1pNV76rfp1pz704Hx_0q8_O8HFURjgXxQ,11016
|
|
6
6
|
trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
|
|
@@ -16,44 +16,44 @@ trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,56
|
|
|
16
16
|
trilogy/core/functions.py,sha256=ARJAyBjeS415-54k3G_bx807rkPZonEulMaLRxSP7vU,10371
|
|
17
17
|
trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
|
|
18
18
|
trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
|
|
19
|
-
trilogy/core/models.py,sha256=
|
|
19
|
+
trilogy/core/models.py,sha256=xhEpN8MIF5HW41E5aBB70xeeH0sbwuc-xFbDH9frGDk,139559
|
|
20
20
|
trilogy/core/optimization.py,sha256=A8S9C9H5RcQcFSQLYtEEBnm-r1CW_e9GEWlLK7q3MqA,4930
|
|
21
|
-
trilogy/core/query_processor.py,sha256=
|
|
21
|
+
trilogy/core/query_processor.py,sha256=fIRJWLBfMeKZ_o1-QB2rsQ8jZwsd9zxYkZ5QQzkGBus,17678
|
|
22
22
|
trilogy/core/optimizations/__init__.py,sha256=pxRzNzd2g8oRMy4f_ub5va6bNS2pd4hnyp9JBzTKc1E,300
|
|
23
23
|
trilogy/core/optimizations/base_optimization.py,sha256=tWWT-xnTbnEU-mNi_isMNbywm8B9WTRsNFwGpeh3rqE,468
|
|
24
24
|
trilogy/core/optimizations/inline_constant.py,sha256=kHNyc2UoaPVdYfVAPAFwnWuk4sJ_IF5faRtVcDOrBtw,1110
|
|
25
|
-
trilogy/core/optimizations/inline_datasource.py,sha256=
|
|
25
|
+
trilogy/core/optimizations/inline_datasource.py,sha256=PyThSsQeZ6fi5sM2mUprC8kXXKo1sP8qLVwaGqpFDJI,3428
|
|
26
26
|
trilogy/core/optimizations/predicate_pushdown.py,sha256=iVZV_BVaL3I6wlELPOuJAiuF0heOM7bCUnNqyCxxRVw,6370
|
|
27
27
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
-
trilogy/core/processing/concept_strategies_v3.py,sha256=
|
|
28
|
+
trilogy/core/processing/concept_strategies_v3.py,sha256=A0-RVwoqD06gUapGRD4SANj3g9hniKPWs7LvqyeHtbg,24445
|
|
29
29
|
trilogy/core/processing/graph_utils.py,sha256=aq-kqk4Iado2HywDxWEejWc-7PGO6Oa-ZQLAM6XWPHw,1199
|
|
30
30
|
trilogy/core/processing/utility.py,sha256=rhnkn3KgB1MW17mZYSEZ8XFEbC__B7PPmZgpDql4Myg,13614
|
|
31
31
|
trilogy/core/processing/node_generators/__init__.py,sha256=-mzYkRsaRNa_dfTckYkKVFSR8h8a3ihEiPJDU_tAmDo,672
|
|
32
32
|
trilogy/core/processing/node_generators/basic_node.py,sha256=4242PNGTCm2tklqMIkqVu5Iv4m_IeTnOYXxDveuCDZM,2856
|
|
33
|
-
trilogy/core/processing/node_generators/common.py,sha256=
|
|
34
|
-
trilogy/core/processing/node_generators/filter_node.py,sha256=
|
|
35
|
-
trilogy/core/processing/node_generators/group_node.py,sha256=
|
|
36
|
-
trilogy/core/processing/node_generators/group_to_node.py,sha256=
|
|
33
|
+
trilogy/core/processing/node_generators/common.py,sha256=Z3R5Ucb0s09pzs3lkz-t9sI2A5R-SIum1UdwGvqKpXA,8975
|
|
34
|
+
trilogy/core/processing/node_generators/filter_node.py,sha256=0zey11fmXhI-ihkv-cK43tUKHlwG10cHYogByOnGbiw,5433
|
|
35
|
+
trilogy/core/processing/node_generators/group_node.py,sha256=u9X0gAEY7AAWwYsK4g3jPgxcTwSSlI1uDQqNIY6vfG8,2988
|
|
36
|
+
trilogy/core/processing/node_generators/group_to_node.py,sha256=nzITnhaALIT7FMonyo16nNo-kSrLfefa9sZBYecrvkU,2887
|
|
37
37
|
trilogy/core/processing/node_generators/multiselect_node.py,sha256=vP84dnLQy6dtypi6mUbt9sMAcmmrTgQ1Oz4GI6X1IEo,6421
|
|
38
|
-
trilogy/core/processing/node_generators/node_merge_node.py,sha256=
|
|
39
|
-
trilogy/core/processing/node_generators/rowset_node.py,sha256=
|
|
38
|
+
trilogy/core/processing/node_generators/node_merge_node.py,sha256=yZ75lsl8RtM8kr-XQxqZEU6fm-JN5DTJxNbneayu36c,12319
|
|
39
|
+
trilogy/core/processing/node_generators/rowset_node.py,sha256=wFkzQtXfudvAGifySvobgc9Qo1usTEEKpSbjkzchNy0,6282
|
|
40
40
|
trilogy/core/processing/node_generators/select_node.py,sha256=XGcz74XsWv5POWhV2gGC_jbi7T9g7sY-VtP3sjlppFc,19057
|
|
41
41
|
trilogy/core/processing/node_generators/unnest_node.py,sha256=6CH66eGwpadNX7TzUhWZ8aqIisOtQeHINbLV6X3QBUk,1779
|
|
42
42
|
trilogy/core/processing/node_generators/window_node.py,sha256=9nXUXUgQrNczU1gaOqhOZPNzCUxw-lkxt0R7HORI6ss,2582
|
|
43
43
|
trilogy/core/processing/nodes/__init__.py,sha256=-8ONG7Nqa6sNLpst5wNNyMigyaHRfUKGdE3xce8E3mM,3998
|
|
44
|
-
trilogy/core/processing/nodes/base_node.py,sha256=
|
|
45
|
-
trilogy/core/processing/nodes/filter_node.py,sha256=
|
|
44
|
+
trilogy/core/processing/nodes/base_node.py,sha256=7AeDm9WK3uiDGUL6QefeZ8Ok9OcWhbh5h1f9K0hr8IQ,11191
|
|
45
|
+
trilogy/core/processing/nodes/filter_node.py,sha256=stY0tkppSdfCD43ZKRfCxfX9XxdLJ9gEtDA2_wPDcGE,1962
|
|
46
46
|
trilogy/core/processing/nodes/group_node.py,sha256=j3vtFQIn_t4EfCke8dGULlwVkG2KKPYfgGwo2rvgcww,6322
|
|
47
|
-
trilogy/core/processing/nodes/merge_node.py,sha256=
|
|
47
|
+
trilogy/core/processing/nodes/merge_node.py,sha256=RAuBPHPIQcGHqyU_R4HT6Kq9E40fWjQoOUCIngLEGQI,14629
|
|
48
48
|
trilogy/core/processing/nodes/select_node_v2.py,sha256=COu-WPuyabGAc3HTkJB-_7eNzR4OgxIL4YDuZIRrST0,7179
|
|
49
49
|
trilogy/core/processing/nodes/unnest_node.py,sha256=JFtm90IVM-46aCYkTNIaJah6v9ApAfonjVhcVM1HmDE,1903
|
|
50
50
|
trilogy/core/processing/nodes/window_node.py,sha256=X7qxLUKd3tekjUUsmH_4vz5b-U89gMnGd04VBxuu2Ns,1280
|
|
51
51
|
trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
|
-
trilogy/dialect/base.py,sha256=
|
|
52
|
+
trilogy/dialect/base.py,sha256=Ry5uGD-r1ht9e9tZRJ9LciZ1Qz2ZCMcli9p-xv1XDHE,28960
|
|
53
53
|
trilogy/dialect/bigquery.py,sha256=15KJ-cOpBlk9O7FPviPgmg8xIydJeKx7WfmL3SSsPE8,2953
|
|
54
54
|
trilogy/dialect/common.py,sha256=1oBJ44P4Dw1bMeek-wmZVwcflVYHbAiKb1ByN-iFXVk,2988
|
|
55
55
|
trilogy/dialect/config.py,sha256=tLVEMctaTDhUgARKXUNfHUcIolGaALkQ0RavUvXAY4w,2994
|
|
56
|
-
trilogy/dialect/duckdb.py,sha256=
|
|
56
|
+
trilogy/dialect/duckdb.py,sha256=2ERqiG2owb3uvmU90Bdo1e2fq4ICWC3we2RUQpHVPwg,3173
|
|
57
57
|
trilogy/dialect/enums.py,sha256=4NdpsydBpDn6jnh0JzFz5VvQEtnShErWtWHVyT6TNpw,3948
|
|
58
58
|
trilogy/dialect/postgres.py,sha256=ev1RJZsC8BB3vJSxJ4q-TTYqZ4Hk1NXUtuRkLrQEBX0,3254
|
|
59
59
|
trilogy/dialect/presto.py,sha256=ohWpG7-AWEUGQEpmnJbqVeGRDISX1u18DI-6GjYjwRA,3350
|
|
@@ -65,7 +65,7 @@ trilogy/hooks/graph_hook.py,sha256=onHvMQPwj_KOS3HOTpRFiy7QLLKAiycq2MzJ_Q0Oh5Y,2
|
|
|
65
65
|
trilogy/hooks/query_debugger.py,sha256=NDChfkPmmW-KINa4TaQmDe_adGiwsKFdGLDSYpbodeU,4282
|
|
66
66
|
trilogy/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
67
|
trilogy/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
|
-
trilogy/parsing/common.py,sha256=
|
|
68
|
+
trilogy/parsing/common.py,sha256=zNd5buKxK4z9WSszOk4zOI0GexDFukzZtdNfQxg3kVw,6218
|
|
69
69
|
trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
|
|
70
70
|
trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
|
|
71
71
|
trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
@@ -74,9 +74,9 @@ trilogy/parsing/render.py,sha256=Gy_6wVYPwYLf35Iota08sbqveuWILtUhI8MYStcvtJM,121
|
|
|
74
74
|
trilogy/parsing/trilogy.lark,sha256=cUcwxUTlxU7jKFzYEXYARLTsPHG5cVLk-Xhltw6m2lY,11357
|
|
75
75
|
trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
76
|
trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
|
|
77
|
-
pytrilogy-0.0.2.
|
|
78
|
-
pytrilogy-0.0.2.
|
|
79
|
-
pytrilogy-0.0.2.
|
|
80
|
-
pytrilogy-0.0.2.
|
|
81
|
-
pytrilogy-0.0.2.
|
|
82
|
-
pytrilogy-0.0.2.
|
|
77
|
+
pytrilogy-0.0.2.7.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
78
|
+
pytrilogy-0.0.2.7.dist-info/METADATA,sha256=iloiI0JW-NDtGOmQU-hJ5jd1XhDMRD_-QkDymPuYKNA,7906
|
|
79
|
+
pytrilogy-0.0.2.7.dist-info/WHEEL,sha256=ixB2d4u7mugx_bCBycvM9OzZ5yD7NmPXFRtKlORZS2Y,91
|
|
80
|
+
pytrilogy-0.0.2.7.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
|
|
81
|
+
pytrilogy-0.0.2.7.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
82
|
+
pytrilogy-0.0.2.7.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
trilogy/constants.py
CHANGED
|
@@ -33,6 +33,7 @@ class Config:
|
|
|
33
33
|
strict_mode: bool = True
|
|
34
34
|
human_identifiers: bool = True
|
|
35
35
|
validate_missing: bool = True
|
|
36
|
+
show_comments: bool = False
|
|
36
37
|
optimizations: Optimizations = field(default_factory=Optimizations)
|
|
37
38
|
|
|
38
39
|
def set_random_seed(self, seed: int):
|
|
@@ -42,5 +43,3 @@ class Config:
|
|
|
42
43
|
CONFIG = Config()
|
|
43
44
|
|
|
44
45
|
CONFIG.set_random_seed(42)
|
|
45
|
-
|
|
46
|
-
CONFIG.strict_mode = True
|
trilogy/core/models.py
CHANGED
|
@@ -945,12 +945,14 @@ class ColumnAssignment(BaseModel):
|
|
|
945
945
|
)
|
|
946
946
|
|
|
947
947
|
def with_merge(
|
|
948
|
-
self,
|
|
948
|
+
self, source: Concept, target: Concept, modifiers: List[Modifier]
|
|
949
949
|
) -> "ColumnAssignment":
|
|
950
950
|
return ColumnAssignment(
|
|
951
951
|
alias=self.alias,
|
|
952
|
-
concept=concept,
|
|
953
|
-
modifiers=
|
|
952
|
+
concept=self.concept.with_merge(source, target, modifiers),
|
|
953
|
+
modifiers=(
|
|
954
|
+
modifiers if self.concept.address == source.address else self.modifiers
|
|
955
|
+
),
|
|
954
956
|
)
|
|
955
957
|
|
|
956
958
|
|
|
@@ -1890,20 +1892,22 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1890
1892
|
self, source: Concept, target: Concept, modifiers: List[Modifier]
|
|
1891
1893
|
):
|
|
1892
1894
|
original = [c for c in self.columns if c.concept.address == source.address]
|
|
1895
|
+
if len(original) != 1:
|
|
1896
|
+
raise ValueError(
|
|
1897
|
+
f"Expected exactly one column to merge, got {len(original)} for {source.address}, {[x.alias for x in original]}"
|
|
1898
|
+
)
|
|
1893
1899
|
# map to the alias with the modifier, and the original
|
|
1894
1900
|
self.columns = [
|
|
1895
|
-
(
|
|
1896
|
-
c.with_merge(target, modifiers)
|
|
1897
|
-
if c.concept.address == source.address
|
|
1898
|
-
else c
|
|
1899
|
-
)
|
|
1901
|
+
c.with_merge(source, target, modifiers)
|
|
1900
1902
|
for c in self.columns
|
|
1903
|
+
if c.concept.address != source.address
|
|
1901
1904
|
] + original
|
|
1902
1905
|
self.grain = self.grain.with_merge(source, target, modifiers)
|
|
1903
1906
|
self.where = (
|
|
1904
1907
|
self.where.with_merge(source, target, modifiers) if self.where else None
|
|
1905
1908
|
)
|
|
1906
|
-
|
|
1909
|
+
|
|
1910
|
+
self.add_column(target, original[0].alias, modifiers)
|
|
1907
1911
|
|
|
1908
1912
|
@property
|
|
1909
1913
|
def env_label(self) -> str:
|
|
@@ -1915,7 +1919,7 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1915
1919
|
def condition(self):
|
|
1916
1920
|
return None
|
|
1917
1921
|
|
|
1918
|
-
@
|
|
1922
|
+
@property
|
|
1919
1923
|
def output_lcl(self) -> LooseConceptList:
|
|
1920
1924
|
return LooseConceptList(concepts=self.output_concepts)
|
|
1921
1925
|
|
|
@@ -1923,9 +1927,9 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1923
1927
|
def can_be_inlined(self) -> bool:
|
|
1924
1928
|
if isinstance(self.address, Address) and self.address.is_query:
|
|
1925
1929
|
return False
|
|
1926
|
-
for x in self.columns:
|
|
1927
|
-
|
|
1928
|
-
|
|
1930
|
+
# for x in self.columns:
|
|
1931
|
+
# if not isinstance(x.alias, str):
|
|
1932
|
+
# return False
|
|
1929
1933
|
return True
|
|
1930
1934
|
|
|
1931
1935
|
@property
|
|
@@ -1960,12 +1964,15 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1960
1964
|
)
|
|
1961
1965
|
return grain
|
|
1962
1966
|
|
|
1963
|
-
def add_column(
|
|
1967
|
+
def add_column(
|
|
1968
|
+
self,
|
|
1969
|
+
concept: Concept,
|
|
1970
|
+
alias: str | RawColumnExpr | Function,
|
|
1971
|
+
modifiers: List[Modifier] | None = None,
|
|
1972
|
+
):
|
|
1964
1973
|
self.columns.append(
|
|
1965
|
-
ColumnAssignment(alias=alias, concept=concept, modifiers=modifiers)
|
|
1974
|
+
ColumnAssignment(alias=alias, concept=concept, modifiers=modifiers or [])
|
|
1966
1975
|
)
|
|
1967
|
-
# force refresh
|
|
1968
|
-
del self.output_lcl
|
|
1969
1976
|
|
|
1970
1977
|
def __add__(self, other):
|
|
1971
1978
|
if not other == self:
|
|
@@ -1998,7 +2005,7 @@ class Datasource(Namespaced, BaseModel):
|
|
|
1998
2005
|
where=self.where.with_namespace(namespace) if self.where else None,
|
|
1999
2006
|
)
|
|
2000
2007
|
|
|
2001
|
-
@
|
|
2008
|
+
@property
|
|
2002
2009
|
def concepts(self) -> List[Concept]:
|
|
2003
2010
|
return [c.concept for c in self.columns]
|
|
2004
2011
|
|
|
@@ -2149,6 +2156,12 @@ class BaseJoin(BaseModel):
|
|
|
2149
2156
|
)
|
|
2150
2157
|
|
|
2151
2158
|
def __str__(self):
|
|
2159
|
+
if self.concept_pairs:
|
|
2160
|
+
return (
|
|
2161
|
+
f"{self.join_type.value} JOIN {self.left_datasource.identifier} and"
|
|
2162
|
+
f" {self.right_datasource.identifier} on"
|
|
2163
|
+
f" {','.join([str(k[0])+'='+str(k[1]) for k in self.concept_pairs])}"
|
|
2164
|
+
)
|
|
2152
2165
|
return (
|
|
2153
2166
|
f"{self.join_type.value} JOIN {self.left_datasource.identifier} and"
|
|
2154
2167
|
f" {self.right_datasource.identifier} on"
|
|
@@ -2460,6 +2473,19 @@ class CTE(BaseModel):
|
|
|
2460
2473
|
self.base_alias_override = candidates[0] if candidates else None
|
|
2461
2474
|
return True
|
|
2462
2475
|
|
|
2476
|
+
@property
|
|
2477
|
+
def comment(self) -> str:
|
|
2478
|
+
base = f"Target: {str(self.grain)}."
|
|
2479
|
+
if self.parent_ctes:
|
|
2480
|
+
base += f" References: {', '.join([x.name for x in self.parent_ctes])}."
|
|
2481
|
+
if self.joins:
|
|
2482
|
+
base += f"\n-- Joins: {', '.join([str(x) for x in self.joins])}."
|
|
2483
|
+
if self.partial_concepts:
|
|
2484
|
+
base += (
|
|
2485
|
+
f"\n-- Partials: {', '.join([str(x) for x in self.partial_concepts])}."
|
|
2486
|
+
)
|
|
2487
|
+
return base
|
|
2488
|
+
|
|
2463
2489
|
def inline_parent_datasource(self, parent: CTE, force_group: bool = False) -> bool:
|
|
2464
2490
|
qds_being_inlined = parent.source
|
|
2465
2491
|
ds_being_inlined = qds_being_inlined.datasources[0]
|
|
@@ -2550,6 +2576,10 @@ class CTE(BaseModel):
|
|
|
2550
2576
|
self.hidden_concepts = unique(
|
|
2551
2577
|
self.hidden_concepts + other.hidden_concepts, "address"
|
|
2552
2578
|
)
|
|
2579
|
+
self.existence_source_map = {
|
|
2580
|
+
**self.existence_source_map,
|
|
2581
|
+
**other.existence_source_map,
|
|
2582
|
+
}
|
|
2553
2583
|
return self
|
|
2554
2584
|
|
|
2555
2585
|
@property
|
|
@@ -2741,6 +2771,12 @@ class Join(BaseModel):
|
|
|
2741
2771
|
return self.left_name + self.right_name + self.jointype.value
|
|
2742
2772
|
|
|
2743
2773
|
def __str__(self):
|
|
2774
|
+
if self.joinkey_pairs:
|
|
2775
|
+
return (
|
|
2776
|
+
f"{self.jointype.value} JOIN {self.left_name} and"
|
|
2777
|
+
f" {self.right_name} on"
|
|
2778
|
+
f" {','.join([str(k[0])+'='+str(k[1]) for k in self.joinkey_pairs])}"
|
|
2779
|
+
)
|
|
2744
2780
|
return (
|
|
2745
2781
|
f"{self.jointype.value} JOIN {self.left_name} and"
|
|
2746
2782
|
f" {self.right_name} on {','.join([str(k) for k in self.joinkeys])}"
|
|
@@ -3002,6 +3038,7 @@ class Environment(BaseModel):
|
|
|
3002
3038
|
|
|
3003
3039
|
materialized_concepts: List[Concept] = Field(default_factory=list)
|
|
3004
3040
|
alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
|
|
3041
|
+
canonical_map: Dict[str, str] = Field(default_factory=dict)
|
|
3005
3042
|
_parse_count: int = 0
|
|
3006
3043
|
|
|
3007
3044
|
@classmethod
|
|
@@ -3050,7 +3087,7 @@ class Environment(BaseModel):
|
|
|
3050
3087
|
if x.address not in current_mat
|
|
3051
3088
|
]
|
|
3052
3089
|
if new:
|
|
3053
|
-
logger.
|
|
3090
|
+
logger.debug(f"Environment added new materialized concepts {new}")
|
|
3054
3091
|
|
|
3055
3092
|
def validate_concept(self, lookup: str, meta: Meta | None = None):
|
|
3056
3093
|
existing: Concept = self.concepts.get(lookup) # type: ignore
|
|
@@ -3213,13 +3250,22 @@ class Environment(BaseModel):
|
|
|
3213
3250
|
self, source: Concept, target: Concept, modifiers: List[Modifier]
|
|
3214
3251
|
):
|
|
3215
3252
|
replacements = {}
|
|
3253
|
+
# exit early if we've run this
|
|
3254
|
+
if source.address in self.alias_origin_lookup:
|
|
3255
|
+
if self.concepts[source.address] == target:
|
|
3256
|
+
return
|
|
3216
3257
|
self.alias_origin_lookup[source.address] = source
|
|
3217
3258
|
for k, v in self.concepts.items():
|
|
3259
|
+
|
|
3218
3260
|
if v.address == target.address:
|
|
3219
3261
|
v.pseudonyms[source.address] = source
|
|
3220
3262
|
if v.address == source.address:
|
|
3221
3263
|
replacements[k] = target
|
|
3264
|
+
self.canonical_map[k] = target.address
|
|
3222
3265
|
v.pseudonyms[target.address] = target
|
|
3266
|
+
# we need to update keys and grains of all concepts
|
|
3267
|
+
else:
|
|
3268
|
+
replacements[k] = v.with_merge(source, target, modifiers)
|
|
3223
3269
|
self.concepts.update(replacements)
|
|
3224
3270
|
|
|
3225
3271
|
for k, ds in self.datasources.items():
|
|
@@ -42,19 +42,18 @@ class InlineDatasource(OptimizationRule):
|
|
|
42
42
|
self.log(f"parent {parent_cte.name} datasource is not inlineable")
|
|
43
43
|
continue
|
|
44
44
|
root_outputs = {x.address for x in root.output_concepts}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
grain_components = {x.address for x in root.grain.components}
|
|
45
|
+
inherited = {
|
|
46
|
+
x for x, v in cte.source_map.items() if v and parent_cte.name in v
|
|
47
|
+
}
|
|
49
48
|
if not inherited.issubset(root_outputs):
|
|
50
49
|
cte_missing = inherited - root_outputs
|
|
51
50
|
self.log(
|
|
52
51
|
f"Not all {parent_cte.name} require inputs are found on datasource, missing {cte_missing}"
|
|
53
52
|
)
|
|
54
53
|
continue
|
|
55
|
-
if not
|
|
56
|
-
self.log("Not all
|
|
57
|
-
|
|
54
|
+
if not root.grain.issubset(parent_cte.grain):
|
|
55
|
+
self.log(f"Not all {parent_cte.name} is at wrong grain to inline")
|
|
56
|
+
continue
|
|
58
57
|
to_inline.append(parent_cte)
|
|
59
58
|
|
|
60
59
|
optimized = False
|
|
@@ -68,6 +67,11 @@ class InlineDatasource(OptimizationRule):
|
|
|
68
67
|
f"Skipping inlining raw datasource {replaceable.source.name} ({replaceable.name}) due to multiple references"
|
|
69
68
|
)
|
|
70
69
|
continue
|
|
70
|
+
if not replaceable.source.datasources[0].grain.issubset(replaceable.grain):
|
|
71
|
+
self.log(
|
|
72
|
+
f"Forcing group ({parent_cte.grain} being replaced by inlined source {root.grain})"
|
|
73
|
+
)
|
|
74
|
+
force_group = True
|
|
71
75
|
result = cte.inline_parent_datasource(replaceable, force_group=force_group)
|
|
72
76
|
if result:
|
|
73
77
|
self.log(
|
|
@@ -612,9 +612,19 @@ def _search_concepts(
|
|
|
612
612
|
)
|
|
613
613
|
|
|
614
614
|
if expanded:
|
|
615
|
-
|
|
615
|
+
# we don't need to return the entire list; just the ones we needed pre-expansion
|
|
616
|
+
ex_resolve = expanded.resolve()
|
|
617
|
+
extra = [
|
|
618
|
+
x
|
|
619
|
+
for x in ex_resolve.output_concepts
|
|
620
|
+
if x.address not in [y.address for y in mandatory_list]
|
|
621
|
+
and x not in ex_resolve.grain.components
|
|
622
|
+
]
|
|
623
|
+
expanded.output_concepts = mandatory_list
|
|
624
|
+
expanded.rebuild_cache()
|
|
625
|
+
|
|
616
626
|
logger.info(
|
|
617
|
-
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in mandatory_list]} via concept addition;"
|
|
627
|
+
f"{depth_to_prefix(depth)}{LOGGER_PREFIX} Found connections for {[c.address for c in mandatory_list]} via concept addition; removing extra {[c.address for c in extra]}"
|
|
618
628
|
)
|
|
619
629
|
return expanded
|
|
620
630
|
# if we can't find it after expanding to a merge, then
|
|
@@ -105,10 +105,29 @@ def gen_filter_node(
|
|
|
105
105
|
environment=environment,
|
|
106
106
|
g=g,
|
|
107
107
|
parents=core_parents,
|
|
108
|
+
grain=Grain(
|
|
109
|
+
components=[immediate_parent] + parent_row_concepts,
|
|
110
|
+
),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
assert filter_node.resolve().grain == Grain(
|
|
114
|
+
components=[immediate_parent] + parent_row_concepts,
|
|
108
115
|
)
|
|
109
116
|
if not local_optional or all(
|
|
110
117
|
[x.address in [y.address for y in parent_row_concepts] for x in local_optional]
|
|
111
118
|
):
|
|
119
|
+
outputs = [
|
|
120
|
+
x
|
|
121
|
+
for x in filter_node.output_concepts
|
|
122
|
+
if x.address in [y.address for y in local_optional]
|
|
123
|
+
]
|
|
124
|
+
logger.info(
|
|
125
|
+
f"{padding(depth)}{LOGGER_PREFIX} no extra enrichment needed for filter node"
|
|
126
|
+
)
|
|
127
|
+
filter_node.output_concepts = [
|
|
128
|
+
concept,
|
|
129
|
+
] + outputs
|
|
130
|
+
filter_node.rebuild_cache()
|
|
112
131
|
return filter_node
|
|
113
132
|
enrich_node = source_concepts( # this fetches the parent + join keys
|
|
114
133
|
# to then connect to the rest of the query
|
|
@@ -29,7 +29,7 @@ def gen_group_node(
|
|
|
29
29
|
resolve_function_parent_concepts(concept), "address"
|
|
30
30
|
)
|
|
31
31
|
logger.info(
|
|
32
|
-
f"{padding(depth)}{LOGGER_PREFIX}
|
|
32
|
+
f"{padding(depth)}{LOGGER_PREFIX} parent concepts are {[x.address for x in parent_concepts]} from group grain {concept.grain}"
|
|
33
33
|
)
|
|
34
34
|
|
|
35
35
|
# if the aggregation has a grain, we need to ensure these are the ONLY optional in the output of the select
|
|
@@ -260,6 +260,7 @@ def subgraphs_to_merge_node(
|
|
|
260
260
|
source_concepts,
|
|
261
261
|
history,
|
|
262
262
|
conditions,
|
|
263
|
+
enable_early_exit: bool = True,
|
|
263
264
|
):
|
|
264
265
|
parents: List[StrategyNode] = []
|
|
265
266
|
logger.info(
|
|
@@ -290,6 +291,8 @@ def subgraphs_to_merge_node(
|
|
|
290
291
|
for x in parents:
|
|
291
292
|
for y in x.output_concepts:
|
|
292
293
|
input_c.append(y)
|
|
294
|
+
if len(parents) == 1 and enable_early_exit:
|
|
295
|
+
return parents[0]
|
|
293
296
|
|
|
294
297
|
return MergeNode(
|
|
295
298
|
input_concepts=unique(input_c, "address"),
|
|
@@ -350,6 +353,7 @@ def gen_merge_node(
|
|
|
350
353
|
source_concepts=source_concepts,
|
|
351
354
|
history=history,
|
|
352
355
|
conditions=conditions,
|
|
356
|
+
enable_early_exit=False,
|
|
353
357
|
)
|
|
354
358
|
if test:
|
|
355
359
|
return test
|
|
@@ -74,8 +74,9 @@ def gen_rowset_node(
|
|
|
74
74
|
if existence_parents:
|
|
75
75
|
node.parents += existence_parents
|
|
76
76
|
# we don't need to join to any existence parents
|
|
77
|
-
if isinstance(node, MergeNode):
|
|
78
|
-
|
|
77
|
+
# if isinstance(node, MergeNode) and node.node_joins is None:
|
|
78
|
+
# # set it explicitly to empty to avoid inference
|
|
79
|
+
# node.node_joins = []
|
|
79
80
|
for parent in existence_parents:
|
|
80
81
|
for x in parent.output_concepts:
|
|
81
82
|
if x.address not in node.output_lcl:
|
|
@@ -192,7 +192,7 @@ class StrategyNode:
|
|
|
192
192
|
p.resolve() for p in self.parents
|
|
193
193
|
]
|
|
194
194
|
|
|
195
|
-
grain = Grain(components=self.output_concepts)
|
|
195
|
+
grain = self.grain if self.grain else Grain(components=self.output_concepts)
|
|
196
196
|
source_map = resolve_concept_map(
|
|
197
197
|
parent_sources,
|
|
198
198
|
self.output_concepts,
|
|
@@ -78,7 +78,7 @@ def deduplicate_nodes_and_joins(
|
|
|
78
78
|
duplicates = False
|
|
79
79
|
duplicates, merged, removed = deduplicate_nodes(merged, logging_prefix)
|
|
80
80
|
# filter out any removed joins
|
|
81
|
-
if joins:
|
|
81
|
+
if joins is not None:
|
|
82
82
|
joins = [
|
|
83
83
|
j
|
|
84
84
|
for j in joins
|
|
@@ -138,6 +138,16 @@ class MergeNode(StrategyNode):
|
|
|
138
138
|
continue
|
|
139
139
|
final_joins.append(join)
|
|
140
140
|
self.node_joins = final_joins
|
|
141
|
+
partial_lookup: list[Concept] = []
|
|
142
|
+
non_partial: List[Concept] = []
|
|
143
|
+
for node in parents or []:
|
|
144
|
+
partial_lookup += node.partial_concepts
|
|
145
|
+
non_partial += [
|
|
146
|
+
x for x in node.output_concepts if x not in node.partial_concepts
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
final_partial = [x for x in partial_lookup if x not in non_partial]
|
|
150
|
+
self.partial_concepts = final_partial
|
|
141
151
|
|
|
142
152
|
def translate_node_joins(self, node_joins: List[NodeJoin]) -> List[BaseJoin]:
|
|
143
153
|
joins = []
|
|
@@ -219,12 +229,13 @@ class MergeNode(StrategyNode):
|
|
|
219
229
|
)
|
|
220
230
|
joins = self.translate_node_joins(final_joins)
|
|
221
231
|
else:
|
|
232
|
+
logger.info(
|
|
233
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} Final joins is not null {final_joins} but is empty, skipping join generation"
|
|
234
|
+
)
|
|
222
235
|
return []
|
|
223
236
|
|
|
224
237
|
for join in joins:
|
|
225
|
-
logger.info(
|
|
226
|
-
f"{self.logging_prefix}{LOGGER_PREFIX} final join {join.join_type} {[str(c) for c in join.concepts]}"
|
|
227
|
-
)
|
|
238
|
+
logger.info(f"{self.logging_prefix}{LOGGER_PREFIX} final join {str(join)}")
|
|
228
239
|
return joins
|
|
229
240
|
|
|
230
241
|
def _resolve(self) -> QueryDatasource:
|
|
@@ -249,6 +260,12 @@ class MergeNode(StrategyNode):
|
|
|
249
260
|
# early exit if we can just return the parent
|
|
250
261
|
final_datasets: List[QueryDatasource | Datasource] = list(merged.values())
|
|
251
262
|
|
|
263
|
+
existence_final = [
|
|
264
|
+
x
|
|
265
|
+
for x in final_datasets
|
|
266
|
+
if all([y in self.existence_concepts for y in x.output_concepts])
|
|
267
|
+
]
|
|
268
|
+
|
|
252
269
|
if len(merged.keys()) == 1:
|
|
253
270
|
final: QueryDatasource | Datasource = list(merged.values())[0]
|
|
254
271
|
if (
|
|
@@ -288,34 +305,25 @@ class MergeNode(StrategyNode):
|
|
|
288
305
|
for source in final_datasets:
|
|
289
306
|
pregrain += source.grain
|
|
290
307
|
|
|
291
|
-
grain =
|
|
292
|
-
self.grain
|
|
293
|
-
if self.grain
|
|
294
|
-
else Grain(
|
|
295
|
-
components=[
|
|
296
|
-
c
|
|
297
|
-
for c in pregrain.components
|
|
298
|
-
if c.address in [x.address for x in self.output_concepts]
|
|
299
|
-
]
|
|
300
|
-
)
|
|
301
|
-
)
|
|
308
|
+
grain = self.grain if self.grain else pregrain
|
|
302
309
|
|
|
303
310
|
logger.info(
|
|
304
311
|
f"{self.logging_prefix}{LOGGER_PREFIX} has pre grain {pregrain} and final merge node grain {grain}"
|
|
305
312
|
)
|
|
306
|
-
|
|
307
|
-
if len(
|
|
313
|
+
join_candidates = [x for x in final_datasets if x not in existence_final]
|
|
314
|
+
if len(join_candidates) > 1:
|
|
308
315
|
joins = self.generate_joins(
|
|
309
|
-
|
|
316
|
+
join_candidates, final_joins, pregrain, grain, self.environment
|
|
310
317
|
)
|
|
311
318
|
else:
|
|
312
319
|
joins = []
|
|
313
|
-
|
|
320
|
+
logger.info(
|
|
321
|
+
f"{self.logging_prefix}{LOGGER_PREFIX} Final join count for CTE parent count {len(join_candidates)} is {len(joins)}"
|
|
322
|
+
)
|
|
314
323
|
full_join_concepts = []
|
|
315
324
|
for join in joins:
|
|
316
325
|
if join.join_type == JoinType.FULL:
|
|
317
326
|
full_join_concepts += join.concepts
|
|
318
|
-
|
|
319
327
|
if self.whole_grain:
|
|
320
328
|
force_group = False
|
|
321
329
|
elif self.force_group is False:
|
|
@@ -337,9 +345,6 @@ class MergeNode(StrategyNode):
|
|
|
337
345
|
inherited_inputs=self.input_concepts + self.existence_concepts,
|
|
338
346
|
full_joins=full_join_concepts,
|
|
339
347
|
)
|
|
340
|
-
logger.info(
|
|
341
|
-
f"{self.logging_prefix}{LOGGER_PREFIX} source_map {str(source_map)}"
|
|
342
|
-
)
|
|
343
348
|
qds = QueryDatasource(
|
|
344
349
|
input_concepts=unique(self.input_concepts, "address"),
|
|
345
350
|
output_concepts=unique(self.output_concepts, "address"),
|
trilogy/core/query_processor.py
CHANGED
|
@@ -183,49 +183,42 @@ def generate_cte_name(full_name: str, name_map: dict[str, str]) -> str:
|
|
|
183
183
|
return full_name.replace("<", "").replace(">", "").replace(",", "_")
|
|
184
184
|
|
|
185
185
|
|
|
186
|
-
def
|
|
186
|
+
def resolve_cte_base_name_and_alias_v2(
|
|
187
187
|
name: str,
|
|
188
188
|
source: QueryDatasource,
|
|
189
|
-
|
|
190
|
-
|
|
189
|
+
source_map: Dict[str, list[str]],
|
|
190
|
+
raw_joins: List[Join | InstantiatedUnnestJoin],
|
|
191
191
|
) -> Tuple[str | None, str | None]:
|
|
192
|
-
|
|
193
|
-
valid_joins: List[Join] = [join for join in joins if isinstance(join, Join)]
|
|
194
|
-
relevant_parent_sources = set()
|
|
195
|
-
for k, v in source.source_map.items():
|
|
196
|
-
if v:
|
|
197
|
-
relevant_parent_sources.update(v)
|
|
198
|
-
eligible = [x for x in source.datasources if x in relevant_parent_sources]
|
|
192
|
+
joins: List[Join] = [join for join in raw_joins if isinstance(join, Join)]
|
|
199
193
|
if (
|
|
200
|
-
len(
|
|
201
|
-
and isinstance(
|
|
202
|
-
and not
|
|
194
|
+
len(source.datasources) == 1
|
|
195
|
+
and isinstance(source.datasources[0], Datasource)
|
|
196
|
+
and not source.datasources[0].name == CONSTANT_DATASET
|
|
203
197
|
):
|
|
204
|
-
ds =
|
|
198
|
+
ds = source.datasources[0]
|
|
205
199
|
return ds.safe_location, ds.identifier
|
|
206
200
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
return parents[0].name, parents[0].name
|
|
211
|
-
elif valid_joins and len(valid_joins) > 0:
|
|
212
|
-
candidates = [x.left_cte.name for x in valid_joins]
|
|
213
|
-
disallowed = [x.right_cte.name for x in valid_joins]
|
|
201
|
+
if joins and len(joins) > 0:
|
|
202
|
+
candidates = [x.left_cte.name for x in joins]
|
|
203
|
+
disallowed = [x.right_cte.name for x in joins]
|
|
214
204
|
try:
|
|
215
205
|
cte = [y for y in candidates if y not in disallowed][0]
|
|
216
206
|
return cte, cte
|
|
217
207
|
except IndexError:
|
|
218
208
|
raise SyntaxError(
|
|
219
|
-
f"Invalid join configuration {candidates} {disallowed}
|
|
209
|
+
f"Invalid join configuration {candidates} {disallowed} for {name}",
|
|
220
210
|
)
|
|
221
|
-
elif eligible:
|
|
222
|
-
matched = [x for x in parents if x.source.name == eligible[0].name]
|
|
223
|
-
if matched:
|
|
224
|
-
return matched[0].name, matched[0].name
|
|
225
211
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
)
|
|
212
|
+
counts: dict[str, int] = defaultdict(lambda: 0)
|
|
213
|
+
output_addresses = [x.address for x in source.output_concepts]
|
|
214
|
+
for k, v in source_map.items():
|
|
215
|
+
for vx in v:
|
|
216
|
+
if k in output_addresses:
|
|
217
|
+
counts[vx] = counts[vx] + 1
|
|
218
|
+
else:
|
|
219
|
+
counts[vx] = counts[vx]
|
|
220
|
+
if counts:
|
|
221
|
+
return max(counts, key=counts.get), max(counts, key=counts.get) # type: ignore
|
|
229
222
|
return None, None
|
|
230
223
|
|
|
231
224
|
|
|
@@ -274,8 +267,8 @@ def datasource_to_ctes(
|
|
|
274
267
|
for x in [base_join_to_join(join, parents) for join in query_datasource.joins]
|
|
275
268
|
if x
|
|
276
269
|
]
|
|
277
|
-
base_name, base_alias =
|
|
278
|
-
human_id, query_datasource,
|
|
270
|
+
base_name, base_alias = resolve_cte_base_name_and_alias_v2(
|
|
271
|
+
human_id, query_datasource, source_map, final_joins
|
|
279
272
|
)
|
|
280
273
|
cte = CTE(
|
|
281
274
|
name=human_id,
|
trilogy/dialect/base.py
CHANGED
trilogy/dialect/duckdb.py
CHANGED
|
@@ -47,8 +47,9 @@ CREATE OR REPLACE TABLE {{ output.address.location }} AS
|
|
|
47
47
|
{% endif %}{%- if ctes %}
|
|
48
48
|
WITH {% for cte in ctes %}
|
|
49
49
|
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
50
|
-
{
|
|
51
|
-
{
|
|
50
|
+
{%- if full_select -%}{{full_select}}
|
|
51
|
+
{%- else -%}{%- if comment %}
|
|
52
|
+
-- {{ comment }}{% endif %}
|
|
52
53
|
SELECT
|
|
53
54
|
{%- for select in select_columns %}
|
|
54
55
|
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
@@ -56,7 +57,8 @@ SELECT
|
|
|
56
57
|
{{ base }}{% endif %}{% if joins %}
|
|
57
58
|
{%- for join in joins %}
|
|
58
59
|
{{ join }}{% endfor %}{% endif %}
|
|
59
|
-
{
|
|
60
|
+
{%- if where %}
|
|
61
|
+
WHERE
|
|
60
62
|
{{ where }}
|
|
61
63
|
{% endif -%}{%- if group_by %}
|
|
62
64
|
GROUP BY {% for group in group_by %}
|
trilogy/parsing/common.py
CHANGED
|
@@ -112,12 +112,16 @@ def filter_item_to_concept(
|
|
|
112
112
|
return Concept(
|
|
113
113
|
name=name,
|
|
114
114
|
datatype=parent.content.datatype,
|
|
115
|
-
purpose=
|
|
115
|
+
purpose=Purpose.PROPERTY,
|
|
116
116
|
lineage=parent,
|
|
117
117
|
metadata=fmetadata,
|
|
118
118
|
namespace=namespace,
|
|
119
119
|
# filtered copies cannot inherit keys
|
|
120
|
-
keys=
|
|
120
|
+
keys=(
|
|
121
|
+
parent.content.keys
|
|
122
|
+
if parent.content.purpose == Purpose.PROPERTY
|
|
123
|
+
else (parent.content,)
|
|
124
|
+
),
|
|
121
125
|
grain=(
|
|
122
126
|
parent.content.grain
|
|
123
127
|
if parent.content.purpose == Purpose.PROPERTY
|
|
File without changes
|
|
File without changes
|
|
File without changes
|