pytrilogy 0.0.2.25__py3-none-any.whl → 0.0.2.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.26.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.26.dist-info}/RECORD +21 -21
- trilogy/__init__.py +1 -1
- trilogy/constants.py +1 -1
- trilogy/core/models.py +106 -67
- trilogy/core/processing/node_generators/common.py +0 -1
- trilogy/core/processing/node_generators/select_merge_node.py +49 -22
- trilogy/core/processing/nodes/merge_node.py +2 -2
- trilogy/core/processing/utility.py +236 -257
- trilogy/core/query_processor.py +47 -39
- trilogy/dialect/base.py +1 -0
- trilogy/dialect/common.py +4 -25
- trilogy/executor.py +12 -3
- trilogy/parsing/common.py +4 -6
- trilogy/parsing/parse_engine.py +3 -2
- trilogy/parsing/render.py +41 -17
- trilogy/parsing/trilogy.lark +2 -2
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.26.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.26.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.26.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.25.dist-info → pytrilogy-0.0.2.26.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
trilogy/__init__.py,sha256=
|
|
1
|
+
trilogy/__init__.py,sha256=Cc2rIa67kJZaRPTgItA7vo9mKdPdLIskVp1BDwgHWbc,291
|
|
2
2
|
trilogy/compiler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
trilogy/constants.py,sha256=
|
|
3
|
+
trilogy/constants.py,sha256=KiyYnctoZen4Hzv8WG2jeN-IE-dfQbWHdVCUeTZYjBg,1270
|
|
4
4
|
trilogy/engine.py,sha256=R5ubIxYyrxRExz07aZCUfrTsoXCHQ8DKFTDsobXdWdA,1102
|
|
5
|
-
trilogy/executor.py,sha256=
|
|
5
|
+
trilogy/executor.py,sha256=Gd9KRT1rNAQyF1oDtKMcidg6XWqGMBhPnErrzFpf7Ew,12139
|
|
6
6
|
trilogy/parser.py,sha256=UtuqSiGiCjpMAYgo1bvNq-b7NSzCA5hzbUW31RXaMII,281
|
|
7
7
|
trilogy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
trilogy/utility.py,sha256=zM__8r29EsyDW7K9VOHz8yvZC2bXFzh7xKy3cL7GKsk,707
|
|
@@ -16,9 +16,9 @@ trilogy/core/exceptions.py,sha256=NvV_4qLOgKXbpotgRf7c8BANDEvHxlqRPaA53IThQ2o,56
|
|
|
16
16
|
trilogy/core/functions.py,sha256=IhVpt3n6wEanKHnGu3oA2w6-hKIlxWpEyz7fHN66mpo,10720
|
|
17
17
|
trilogy/core/graph_models.py,sha256=oJUMSpmYhqXlavckHLpR07GJxuQ8dZ1VbB1fB0KaS8c,2036
|
|
18
18
|
trilogy/core/internal.py,sha256=jNGFHKENnbMiMCtAgsnLZYVSENDK4b5ALecXFZpTDzQ,1075
|
|
19
|
-
trilogy/core/models.py,sha256=
|
|
19
|
+
trilogy/core/models.py,sha256=ZPMOWmN4vDvXyLZvyiaN-WZnMDukDwr2nJYFIe6vJKo,158251
|
|
20
20
|
trilogy/core/optimization.py,sha256=od_60A9F8J8Nj24MHgrxl4vwRwmBFH13TMdoMQvgVKs,7717
|
|
21
|
-
trilogy/core/query_processor.py,sha256
|
|
21
|
+
trilogy/core/query_processor.py,sha256=-fKPlygk3aX1cY60dl4tKNQofKRFl3zhqz5klRIbtq0,17683
|
|
22
22
|
trilogy/core/optimizations/__init__.py,sha256=bWQecbeiwiDx9LJnLsa7dkWxdbl2wcnkcTN69JyP8iI,356
|
|
23
23
|
trilogy/core/optimizations/base_optimization.py,sha256=tWWT-xnTbnEU-mNi_isMNbywm8B9WTRsNFwGpeh3rqE,468
|
|
24
24
|
trilogy/core/optimizations/inline_constant.py,sha256=kHNyc2UoaPVdYfVAPAFwnWuk4sJ_IF5faRtVcDOrBtw,1110
|
|
@@ -27,17 +27,17 @@ trilogy/core/optimizations/predicate_pushdown.py,sha256=1l9WnFOSv79e341typG3tTdk
|
|
|
27
27
|
trilogy/core/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
28
|
trilogy/core/processing/concept_strategies_v3.py,sha256=7MT_x6QFHrbSDmjz21pYdQB5ux419ES4QS-8lO16eyw,36091
|
|
29
29
|
trilogy/core/processing/graph_utils.py,sha256=aq-kqk4Iado2HywDxWEejWc-7PGO6Oa-ZQLAM6XWPHw,1199
|
|
30
|
-
trilogy/core/processing/utility.py,sha256=
|
|
30
|
+
trilogy/core/processing/utility.py,sha256=xNl310JhBda0Vv1SSwg5EtMPmMDAWucxfWd7M53Sb9k,17422
|
|
31
31
|
trilogy/core/processing/node_generators/__init__.py,sha256=-mzYkRsaRNa_dfTckYkKVFSR8h8a3ihEiPJDU_tAmDo,672
|
|
32
32
|
trilogy/core/processing/node_generators/basic_node.py,sha256=WQNgJ1MwrMS_BQ-b3XwGGB6eToDykelAVj_fesJuqe0,2069
|
|
33
|
-
trilogy/core/processing/node_generators/common.py,sha256=
|
|
33
|
+
trilogy/core/processing/node_generators/common.py,sha256=eslHTTPFTkmwHwKIuUsbFn54jxj-Avtt-QScqtNwzdg,8945
|
|
34
34
|
trilogy/core/processing/node_generators/filter_node.py,sha256=Vz9Rb67e1dfZgnliekwwLeDPVkthMbdrnrKRdz7J1ik,7654
|
|
35
35
|
trilogy/core/processing/node_generators/group_node.py,sha256=r54IVEhXW-tzod6uEHIQObrxgQt6aNySk5emWkWyqCU,4938
|
|
36
36
|
trilogy/core/processing/node_generators/group_to_node.py,sha256=R9i_wHipxjXJyfYEwfeTw2EPpuanXVA327XyfcP2tBg,2537
|
|
37
37
|
trilogy/core/processing/node_generators/multiselect_node.py,sha256=_KO9lqzHQoy4VAviO0ttQlmK0tjaqrJj4SJPhmoIYm8,6229
|
|
38
38
|
trilogy/core/processing/node_generators/node_merge_node.py,sha256=dIEv5P2MTViAES2MzqJgccYzM3HldjHrQYFwH00cqyc,14003
|
|
39
39
|
trilogy/core/processing/node_generators/rowset_node.py,sha256=KtdN6t2xM8CJxobc4aQX4W8uX98U6IabeuBF_FtBLR4,4583
|
|
40
|
-
trilogy/core/processing/node_generators/select_merge_node.py,sha256=
|
|
40
|
+
trilogy/core/processing/node_generators/select_merge_node.py,sha256=z1sF04MQsMYbLjE__co5Nwi5hTvMeTZACzTnuBe7lsk,11341
|
|
41
41
|
trilogy/core/processing/node_generators/select_node.py,sha256=nwXHQF6C-aQUIelx9dyxN2pK3muL-4-6RIqnqQqNwtw,1808
|
|
42
42
|
trilogy/core/processing/node_generators/unnest_node.py,sha256=cZ26CN338CBnd6asML1OBUtNcDzmNlFpY0Vnade4yrc,2256
|
|
43
43
|
trilogy/core/processing/node_generators/window_node.py,sha256=jy3FF8uN0VA7yyrBeR40B9CAqR_5qBP4PiS6Gr-f-7w,2590
|
|
@@ -45,14 +45,14 @@ trilogy/core/processing/nodes/__init__.py,sha256=qS5EJDRwwIrCEfS7ibCA2ESE0RPzsAI
|
|
|
45
45
|
trilogy/core/processing/nodes/base_node.py,sha256=sc3HrXkWk-xpsAQ7B7ltX1ZejYAkqFiv8Ei8Jg5VGkQ,15579
|
|
46
46
|
trilogy/core/processing/nodes/filter_node.py,sha256=GfZ9eghpFDI-s7iQP2UqTljCmn25LT_T5TAxDlh7PkQ,2343
|
|
47
47
|
trilogy/core/processing/nodes/group_node.py,sha256=PrBHaGq_f8RmokUw9lXLGJ5YbjdP77P7Ag0pgR6e2cU,7293
|
|
48
|
-
trilogy/core/processing/nodes/merge_node.py,sha256=
|
|
48
|
+
trilogy/core/processing/nodes/merge_node.py,sha256=2BjE2bTyoMHLfn_pnl1fioJkm1AfWtVKnuzzL4aWS5I,14799
|
|
49
49
|
trilogy/core/processing/nodes/select_node_v2.py,sha256=gS9OQgS2TSEK59BQ9R0i83pTHfGJUxv7AkAmT21sYxI,8067
|
|
50
50
|
trilogy/core/processing/nodes/unnest_node.py,sha256=mAmFluzm2yeeiQ6NfIB7BU_8atRGh-UJfPf9ROwbhr8,2152
|
|
51
51
|
trilogy/core/processing/nodes/window_node.py,sha256=ro0QfMFi4ZmIn5Q4D0M_vJWfnHH_C0MN7XkVkx8Gygg,1214
|
|
52
52
|
trilogy/dialect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
-
trilogy/dialect/base.py,sha256=
|
|
53
|
+
trilogy/dialect/base.py,sha256=JpC-ig7iRzZxcuHmKRInKfFAoNTzFOcclAvmvUJiWOo,34087
|
|
54
54
|
trilogy/dialect/bigquery.py,sha256=15KJ-cOpBlk9O7FPviPgmg8xIydJeKx7WfmL3SSsPE8,2953
|
|
55
|
-
trilogy/dialect/common.py,sha256=
|
|
55
|
+
trilogy/dialect/common.py,sha256=Pm9naWL5eo-BfZTYFMWhX9lpsGuxEyvvn_EuXbFeos0,3817
|
|
56
56
|
trilogy/dialect/config.py,sha256=tLVEMctaTDhUgARKXUNfHUcIolGaALkQ0RavUvXAY4w,2994
|
|
57
57
|
trilogy/dialect/duckdb.py,sha256=_0a5HBU8zRNtZj7YED3ju4fHXRYG9jNeKwnlZwUDvwI,3419
|
|
58
58
|
trilogy/dialect/enums.py,sha256=4NdpsydBpDn6jnh0JzFz5VvQEtnShErWtWHVyT6TNpw,3948
|
|
@@ -66,18 +66,18 @@ trilogy/hooks/graph_hook.py,sha256=onHvMQPwj_KOS3HOTpRFiy7QLLKAiycq2MzJ_Q0Oh5Y,2
|
|
|
66
66
|
trilogy/hooks/query_debugger.py,sha256=Pe-Kw1JGngeLqQOMQb0E3-24jXEavqnPCQ-KOfTfjP8,4357
|
|
67
67
|
trilogy/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
68
|
trilogy/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
|
-
trilogy/parsing/common.py,sha256=
|
|
69
|
+
trilogy/parsing/common.py,sha256=t7yiL_3f6rz_rouF9et84v5orAgs-EprV4V9ghQ6ql4,10024
|
|
70
70
|
trilogy/parsing/config.py,sha256=Z-DaefdKhPDmSXLgg5V4pebhSB0h590vI0_VtHnlukI,111
|
|
71
71
|
trilogy/parsing/exceptions.py,sha256=92E5i2frv5hj9wxObJZsZqj5T6bglvPzvdvco_vW1Zk,38
|
|
72
72
|
trilogy/parsing/helpers.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
73
|
-
trilogy/parsing/parse_engine.py,sha256=
|
|
74
|
-
trilogy/parsing/render.py,sha256=
|
|
75
|
-
trilogy/parsing/trilogy.lark,sha256=
|
|
73
|
+
trilogy/parsing/parse_engine.py,sha256=nyNyClknlHVYeHHGrSXWDAuV_E_XQSvFLUZqZ2q97kE,64513
|
|
74
|
+
trilogy/parsing/render.py,sha256=_Jb1or0XFmrj2mHHv7My1VNdWkcpOAYWnRwFW2sh4U0,14052
|
|
75
|
+
trilogy/parsing/trilogy.lark,sha256=NZgFchImZsQ3fyyBh8kwq8esTQOR5QlZ9n6k-F5H8nI,12184
|
|
76
76
|
trilogy/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
trilogy/scripts/trilogy.py,sha256=PHxvv6f2ODv0esyyhWxlARgra8dVhqQhYl0lTrSyVNo,3729
|
|
78
|
-
pytrilogy-0.0.2.
|
|
79
|
-
pytrilogy-0.0.2.
|
|
80
|
-
pytrilogy-0.0.2.
|
|
81
|
-
pytrilogy-0.0.2.
|
|
82
|
-
pytrilogy-0.0.2.
|
|
83
|
-
pytrilogy-0.0.2.
|
|
78
|
+
pytrilogy-0.0.2.26.dist-info/LICENSE.md,sha256=5ZRvtTyCCFwz1THxDTjAu3Lidds9WjPvvzgVwPSYNDo,1042
|
|
79
|
+
pytrilogy-0.0.2.26.dist-info/METADATA,sha256=1tD8kmlqzPcPdU5SiPHkiiSvXXwSqEm4sBvjp1LIDY4,8403
|
|
80
|
+
pytrilogy-0.0.2.26.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
|
81
|
+
pytrilogy-0.0.2.26.dist-info/entry_points.txt,sha256=0petKryjvvtEfTlbZC1AuMFumH_WQ9v8A19LvoS6G6c,54
|
|
82
|
+
pytrilogy-0.0.2.26.dist-info/top_level.txt,sha256=cAy__NW_eMAa_yT9UnUNlZLFfxcg6eimUAZ184cdNiE,8
|
|
83
|
+
pytrilogy-0.0.2.26.dist-info/RECORD,,
|
trilogy/__init__.py
CHANGED
trilogy/constants.py
CHANGED
trilogy/core/models.py
CHANGED
|
@@ -73,6 +73,7 @@ from collections import UserList, UserDict
|
|
|
73
73
|
from functools import cached_property
|
|
74
74
|
from abc import ABC
|
|
75
75
|
from collections import defaultdict
|
|
76
|
+
import hashlib
|
|
76
77
|
|
|
77
78
|
LOGGER_PREFIX = "[MODELS]"
|
|
78
79
|
|
|
@@ -190,6 +191,13 @@ class ConstantInlineable(ABC):
|
|
|
190
191
|
raise NotImplementedError
|
|
191
192
|
|
|
192
193
|
|
|
194
|
+
class HasUUID(ABC):
|
|
195
|
+
|
|
196
|
+
@property
|
|
197
|
+
def uuid(self) -> str:
|
|
198
|
+
return hashlib.md5(str(self).encode()).hexdigest()
|
|
199
|
+
|
|
200
|
+
|
|
193
201
|
class SelectTypeMixin(BaseModel):
|
|
194
202
|
where_clause: Union["WhereClause", None] = Field(default=None)
|
|
195
203
|
having_clause: Union["HavingClause", None] = Field(default=None)
|
|
@@ -1606,7 +1614,7 @@ class RawSQLStatement(BaseModel):
|
|
|
1606
1614
|
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
1607
1615
|
|
|
1608
1616
|
|
|
1609
|
-
class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
1617
|
+
class SelectStatement(HasUUID, Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
1610
1618
|
selection: List[SelectItem]
|
|
1611
1619
|
order_by: Optional[OrderBy] = None
|
|
1612
1620
|
limit: Optional[int] = None
|
|
@@ -1724,19 +1732,34 @@ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
|
|
|
1724
1732
|
# if the concept is a locally derived concept, it cannot ever be partial
|
|
1725
1733
|
# but if it's a concept pulled in from upstream and we have a where clause, it should be partial
|
|
1726
1734
|
ColumnAssignment(
|
|
1727
|
-
alias=
|
|
1735
|
+
alias=(
|
|
1736
|
+
c.name.replace(".", "_")
|
|
1737
|
+
if c.namespace == DEFAULT_NAMESPACE
|
|
1738
|
+
else c.address.replace(".", "_")
|
|
1739
|
+
),
|
|
1728
1740
|
concept=c,
|
|
1729
1741
|
modifiers=modifiers if c.address not in self.locally_derived else [],
|
|
1730
1742
|
)
|
|
1731
1743
|
for c in self.output_components
|
|
1732
1744
|
]
|
|
1733
1745
|
|
|
1746
|
+
condition = None
|
|
1747
|
+
if self.where_clause:
|
|
1748
|
+
condition = self.where_clause.conditional
|
|
1749
|
+
if self.having_clause:
|
|
1750
|
+
if condition:
|
|
1751
|
+
condition = self.having_clause.conditional + condition
|
|
1752
|
+
else:
|
|
1753
|
+
condition = self.having_clause.conditional
|
|
1754
|
+
|
|
1734
1755
|
new_datasource = Datasource(
|
|
1735
1756
|
identifier=identifier,
|
|
1736
1757
|
address=address,
|
|
1737
1758
|
grain=grain or self.grain,
|
|
1738
1759
|
columns=columns,
|
|
1739
1760
|
namespace=namespace,
|
|
1761
|
+
where=WhereClause(conditional=condition) if condition else None,
|
|
1762
|
+
non_partial_for=WhereClause(conditional=condition) if condition else None,
|
|
1740
1763
|
)
|
|
1741
1764
|
for column in columns:
|
|
1742
1765
|
column.concept = column.concept.with_grain(new_datasource.grain)
|
|
@@ -1859,7 +1882,7 @@ class AlignClause(Namespaced, BaseModel):
|
|
|
1859
1882
|
return AlignClause(items=[x.with_namespace(namespace) for x in self.items])
|
|
1860
1883
|
|
|
1861
1884
|
|
|
1862
|
-
class MultiSelectStatement(SelectTypeMixin, Mergeable, Namespaced, BaseModel):
|
|
1885
|
+
class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, BaseModel):
|
|
1863
1886
|
selects: List[SelectStatement]
|
|
1864
1887
|
align: AlignClause
|
|
1865
1888
|
namespace: str
|
|
@@ -2021,7 +2044,7 @@ class DatasourceMetadata(BaseModel):
|
|
|
2021
2044
|
line_no: int | None = None
|
|
2022
2045
|
|
|
2023
2046
|
|
|
2024
|
-
class MergeStatementV2(Namespaced, BaseModel):
|
|
2047
|
+
class MergeStatementV2(HasUUID, Namespaced, BaseModel):
|
|
2025
2048
|
source: Concept
|
|
2026
2049
|
target: Concept
|
|
2027
2050
|
modifiers: List[Modifier] = Field(default_factory=list)
|
|
@@ -2035,7 +2058,7 @@ class MergeStatementV2(Namespaced, BaseModel):
|
|
|
2035
2058
|
return new
|
|
2036
2059
|
|
|
2037
2060
|
|
|
2038
|
-
class Datasource(Namespaced, BaseModel):
|
|
2061
|
+
class Datasource(HasUUID, Namespaced, BaseModel):
|
|
2039
2062
|
identifier: str
|
|
2040
2063
|
columns: List[ColumnAssignment]
|
|
2041
2064
|
address: Union[Address, str]
|
|
@@ -2047,6 +2070,7 @@ class Datasource(Namespaced, BaseModel):
|
|
|
2047
2070
|
default_factory=lambda: DatasourceMetadata(freshness_concept=None)
|
|
2048
2071
|
)
|
|
2049
2072
|
where: Optional[WhereClause] = None
|
|
2073
|
+
non_partial_for: Optional[WhereClause] = None
|
|
2050
2074
|
|
|
2051
2075
|
def merge_concept(
|
|
2052
2076
|
self, source: Concept, target: Concept, modifiers: List[Modifier]
|
|
@@ -2247,6 +2271,7 @@ class InstantiatedUnnestJoin(BaseModel):
|
|
|
2247
2271
|
class ConceptPair(BaseModel):
|
|
2248
2272
|
left: Concept
|
|
2249
2273
|
right: Concept
|
|
2274
|
+
existing_datasource: Union[Datasource, "QueryDatasource"]
|
|
2250
2275
|
modifiers: List[Modifier] = Field(default_factory=list)
|
|
2251
2276
|
|
|
2252
2277
|
@property
|
|
@@ -2258,17 +2283,23 @@ class ConceptPair(BaseModel):
|
|
|
2258
2283
|
return Modifier.NULLABLE in self.modifiers
|
|
2259
2284
|
|
|
2260
2285
|
|
|
2286
|
+
class CTEConceptPair(ConceptPair):
|
|
2287
|
+
cte: CTE
|
|
2288
|
+
|
|
2289
|
+
|
|
2261
2290
|
class BaseJoin(BaseModel):
|
|
2262
|
-
left_datasource: Union[Datasource, "QueryDatasource"]
|
|
2263
2291
|
right_datasource: Union[Datasource, "QueryDatasource"]
|
|
2264
|
-
concepts: List[Concept]
|
|
2265
2292
|
join_type: JoinType
|
|
2266
|
-
|
|
2293
|
+
concepts: Optional[List[Concept]] = None
|
|
2294
|
+
left_datasource: Optional[Union[Datasource, "QueryDatasource"]] = None
|
|
2267
2295
|
concept_pairs: list[ConceptPair] | None = None
|
|
2268
2296
|
|
|
2269
2297
|
def __init__(self, **data: Any):
|
|
2270
2298
|
super().__init__(**data)
|
|
2271
|
-
if
|
|
2299
|
+
if (
|
|
2300
|
+
self.left_datasource
|
|
2301
|
+
and self.left_datasource.full_name == self.right_datasource.full_name
|
|
2302
|
+
):
|
|
2272
2303
|
raise SyntaxError(
|
|
2273
2304
|
f"Cannot join a dataself to itself, joining {self.left_datasource} and"
|
|
2274
2305
|
f" {self.right_datasource}"
|
|
@@ -2278,8 +2309,10 @@ class BaseJoin(BaseModel):
|
|
|
2278
2309
|
# if we have a list of concept pairs
|
|
2279
2310
|
if self.concept_pairs:
|
|
2280
2311
|
return
|
|
2281
|
-
|
|
2282
|
-
|
|
2312
|
+
if self.concepts == []:
|
|
2313
|
+
return
|
|
2314
|
+
assert self.left_datasource and self.right_datasource
|
|
2315
|
+
for concept in self.concepts or []:
|
|
2283
2316
|
include = True
|
|
2284
2317
|
for ds in [self.left_datasource, self.right_datasource]:
|
|
2285
2318
|
synonyms = []
|
|
@@ -2289,13 +2322,10 @@ class BaseJoin(BaseModel):
|
|
|
2289
2322
|
concept.address not in [c.address for c in ds.output_concepts]
|
|
2290
2323
|
and concept.address not in synonyms
|
|
2291
2324
|
):
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
f"Invalid join, missing {concept} on {ds.name}, have"
|
|
2297
|
-
f" {[c.address for c in ds.output_concepts]}"
|
|
2298
|
-
)
|
|
2325
|
+
raise SyntaxError(
|
|
2326
|
+
f"Invalid join, missing {concept} on {ds.name}, have"
|
|
2327
|
+
f" {[c.address for c in ds.output_concepts]}"
|
|
2328
|
+
)
|
|
2299
2329
|
if include:
|
|
2300
2330
|
final_concepts.append(concept)
|
|
2301
2331
|
if not final_concepts and self.concepts:
|
|
@@ -2312,7 +2342,7 @@ class BaseJoin(BaseModel):
|
|
|
2312
2342
|
self.concepts = []
|
|
2313
2343
|
return
|
|
2314
2344
|
# if everything is at abstract grain, we can skip joins
|
|
2315
|
-
if all([c.grain
|
|
2345
|
+
if all([c.grain.abstract for c in ds.output_concepts]):
|
|
2316
2346
|
self.concepts = []
|
|
2317
2347
|
return
|
|
2318
2348
|
|
|
@@ -2330,21 +2360,27 @@ class BaseJoin(BaseModel):
|
|
|
2330
2360
|
|
|
2331
2361
|
@property
|
|
2332
2362
|
def unique_id(self) -> str:
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2363
|
+
return str(self)
|
|
2364
|
+
|
|
2365
|
+
@property
|
|
2366
|
+
def input_concepts(self) -> List[Concept]:
|
|
2367
|
+
base = []
|
|
2368
|
+
if self.concept_pairs:
|
|
2369
|
+
for pair in self.concept_pairs:
|
|
2370
|
+
base += [pair.left, pair.right]
|
|
2371
|
+
elif self.concepts:
|
|
2372
|
+
base += self.concepts
|
|
2373
|
+
return base
|
|
2339
2374
|
|
|
2340
2375
|
def __str__(self):
|
|
2341
2376
|
if self.concept_pairs:
|
|
2342
2377
|
return (
|
|
2343
|
-
f"{self.join_type.value} on"
|
|
2344
|
-
f" {','.join([str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
|
|
2378
|
+
f"{self.join_type.value} {self.right_datasource.name} on"
|
|
2379
|
+
f" {','.join([str(k.existing_datasource.name) + '.'+ str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
|
|
2345
2380
|
)
|
|
2346
2381
|
return (
|
|
2347
|
-
f"{self.join_type.value}
|
|
2382
|
+
f"{self.join_type.value} {self.right_datasource.name} on"
|
|
2383
|
+
f" {','.join([str(k) for k in self.concepts])}"
|
|
2348
2384
|
)
|
|
2349
2385
|
|
|
2350
2386
|
|
|
@@ -2389,19 +2425,9 @@ class QueryDatasource(BaseModel):
|
|
|
2389
2425
|
for join in v:
|
|
2390
2426
|
if not isinstance(join, BaseJoin):
|
|
2391
2427
|
continue
|
|
2392
|
-
|
|
2393
|
-
raise SyntaxError(
|
|
2394
|
-
f"Cannot join a datasource to itself, joining {join.left_datasource}"
|
|
2395
|
-
)
|
|
2396
|
-
pairing = "".join(
|
|
2397
|
-
sorted(
|
|
2398
|
-
[join.left_datasource.identifier, join.right_datasource.identifier]
|
|
2399
|
-
)
|
|
2400
|
-
)
|
|
2428
|
+
pairing = str(join)
|
|
2401
2429
|
if pairing in unique_pairs:
|
|
2402
|
-
raise SyntaxError(
|
|
2403
|
-
f"Duplicate join {join.left_datasource.identifier} and {join.right_datasource.identifier}"
|
|
2404
|
-
)
|
|
2430
|
+
raise SyntaxError(f"Duplicate join {str(join)}")
|
|
2405
2431
|
unique_pairs.add(pairing)
|
|
2406
2432
|
return v
|
|
2407
2433
|
|
|
@@ -2666,7 +2692,12 @@ class CTE(BaseModel):
|
|
|
2666
2692
|
isinstance(join, Join)
|
|
2667
2693
|
and (
|
|
2668
2694
|
join.right_cte.name != removed_cte
|
|
2669
|
-
and
|
|
2695
|
+
and any(
|
|
2696
|
+
[
|
|
2697
|
+
x.cte.name != removed_cte
|
|
2698
|
+
for x in (join.joinkey_pairs or [])
|
|
2699
|
+
]
|
|
2700
|
+
)
|
|
2670
2701
|
)
|
|
2671
2702
|
)
|
|
2672
2703
|
]
|
|
@@ -2737,8 +2768,12 @@ class CTE(BaseModel):
|
|
|
2737
2768
|
for join in self.joins:
|
|
2738
2769
|
if isinstance(join, InstantiatedUnnestJoin):
|
|
2739
2770
|
continue
|
|
2740
|
-
if join.left_cte.name == parent.name:
|
|
2771
|
+
if join.left_cte and join.left_cte.name == parent.name:
|
|
2741
2772
|
join.inline_cte(parent)
|
|
2773
|
+
if join.joinkey_pairs:
|
|
2774
|
+
for pair in join.joinkey_pairs:
|
|
2775
|
+
if pair.cte and pair.cte.name == parent.name:
|
|
2776
|
+
join.inline_cte(parent)
|
|
2742
2777
|
if join.right_cte.name == parent.name:
|
|
2743
2778
|
join.inline_cte(parent)
|
|
2744
2779
|
for k, v in self.source_map.items():
|
|
@@ -2961,21 +2996,26 @@ class JoinKey(BaseModel):
|
|
|
2961
2996
|
|
|
2962
2997
|
|
|
2963
2998
|
class Join(BaseModel):
|
|
2964
|
-
|
|
2999
|
+
|
|
2965
3000
|
right_cte: CTE
|
|
2966
3001
|
jointype: JoinType
|
|
2967
|
-
|
|
2968
|
-
joinkey_pairs: List[
|
|
3002
|
+
left_cte: CTE | None = None
|
|
3003
|
+
joinkey_pairs: List[CTEConceptPair] | None = None
|
|
2969
3004
|
inlined_ctes: set[str] = Field(default_factory=set)
|
|
2970
3005
|
|
|
2971
3006
|
def inline_cte(self, cte: CTE):
|
|
2972
3007
|
self.inlined_ctes.add(cte.name)
|
|
2973
3008
|
|
|
2974
|
-
@property
|
|
2975
|
-
def left_name(self) -> str:
|
|
2976
|
-
|
|
2977
|
-
|
|
2978
|
-
|
|
3009
|
+
# @property
|
|
3010
|
+
# def left_name(self) -> str:
|
|
3011
|
+
# if self.left_cte.name in self.inlined_ctes:
|
|
3012
|
+
# return self.left_cte.source.datasources[0].identifier
|
|
3013
|
+
# return self.left_cte.name
|
|
3014
|
+
|
|
3015
|
+
def get_name(self, cte: CTE):
|
|
3016
|
+
if cte.name in self.inlined_ctes:
|
|
3017
|
+
return cte.source.datasources[0].identifier
|
|
3018
|
+
return cte.name
|
|
2979
3019
|
|
|
2980
3020
|
@property
|
|
2981
3021
|
def right_name(self) -> str:
|
|
@@ -2983,12 +3023,6 @@ class Join(BaseModel):
|
|
|
2983
3023
|
return self.right_cte.source.datasources[0].identifier
|
|
2984
3024
|
return self.right_cte.name
|
|
2985
3025
|
|
|
2986
|
-
@property
|
|
2987
|
-
def left_ref(self) -> str:
|
|
2988
|
-
if self.left_cte.name in self.inlined_ctes:
|
|
2989
|
-
return f"{self.left_cte.source.datasources[0].safe_location} as {self.left_cte.source.datasources[0].identifier}"
|
|
2990
|
-
return self.left_cte.name
|
|
2991
|
-
|
|
2992
3026
|
@property
|
|
2993
3027
|
def right_ref(self) -> str:
|
|
2994
3028
|
if self.right_cte.name in self.inlined_ctes:
|
|
@@ -2997,19 +3031,21 @@ class Join(BaseModel):
|
|
|
2997
3031
|
|
|
2998
3032
|
@property
|
|
2999
3033
|
def unique_id(self) -> str:
|
|
3000
|
-
return self
|
|
3034
|
+
return str(self)
|
|
3001
3035
|
|
|
3002
3036
|
def __str__(self):
|
|
3003
3037
|
if self.joinkey_pairs:
|
|
3004
3038
|
return (
|
|
3005
|
-
f"{self.jointype.value}
|
|
3039
|
+
f"{self.jointype.value} join"
|
|
3006
3040
|
f" {self.right_name} on"
|
|
3007
|
-
f" {','.join([str(k.left)+'='+str(k.right
|
|
3041
|
+
f" {','.join([k.cte.name + '.'+str(k.left.address)+'='+str(k.right.address) for k in self.joinkey_pairs])}"
|
|
3008
3042
|
)
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3043
|
+
elif self.left_cte:
|
|
3044
|
+
return (
|
|
3045
|
+
f"{self.jointype.value} JOIN {self.left_cte.name} and"
|
|
3046
|
+
f" {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
|
|
3047
|
+
)
|
|
3048
|
+
return f"{self.jointype.value} JOIN {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
|
|
3013
3049
|
|
|
3014
3050
|
|
|
3015
3051
|
class UndefinedConcept(Concept, Mergeable, Namespaced):
|
|
@@ -3227,7 +3263,7 @@ class EnvironmentConceptDict(dict):
|
|
|
3227
3263
|
return super().items()
|
|
3228
3264
|
|
|
3229
3265
|
|
|
3230
|
-
class ImportStatement(BaseModel):
|
|
3266
|
+
class ImportStatement(HasUUID, BaseModel):
|
|
3231
3267
|
alias: str
|
|
3232
3268
|
path: Path
|
|
3233
3269
|
environment: Union["Environment", None] = None
|
|
@@ -4223,6 +4259,9 @@ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
|
|
|
4223
4259
|
class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
|
|
4224
4260
|
conditional: Union[SubselectComparison, Comparison, Conditional, "Parenthetical"]
|
|
4225
4261
|
|
|
4262
|
+
def __repr__(self):
|
|
4263
|
+
return str(self.conditional)
|
|
4264
|
+
|
|
4226
4265
|
@property
|
|
4227
4266
|
def input(self) -> List[Concept]:
|
|
4228
4267
|
return self.conditional.input
|
|
@@ -4341,7 +4380,7 @@ class Limit(BaseModel):
|
|
|
4341
4380
|
count: int
|
|
4342
4381
|
|
|
4343
4382
|
|
|
4344
|
-
class ConceptDeclarationStatement(BaseModel):
|
|
4383
|
+
class ConceptDeclarationStatement(HasUUID, BaseModel):
|
|
4345
4384
|
concept: Concept
|
|
4346
4385
|
|
|
4347
4386
|
|
|
@@ -4349,7 +4388,7 @@ class ConceptDerivation(BaseModel):
|
|
|
4349
4388
|
concept: Concept
|
|
4350
4389
|
|
|
4351
4390
|
|
|
4352
|
-
class RowsetDerivationStatement(Namespaced, BaseModel):
|
|
4391
|
+
class RowsetDerivationStatement(HasUUID, Namespaced, BaseModel):
|
|
4353
4392
|
name: str
|
|
4354
4393
|
select: SelectStatement | MultiSelectStatement
|
|
4355
4394
|
namespace: str
|
|
@@ -4614,7 +4653,7 @@ class TupleWrapper(Generic[VT], tuple):
|
|
|
4614
4653
|
return cls(v, type=arg_to_datatype(v[0]))
|
|
4615
4654
|
|
|
4616
4655
|
|
|
4617
|
-
class PersistStatement(BaseModel):
|
|
4656
|
+
class PersistStatement(HasUUID, BaseModel):
|
|
4618
4657
|
datasource: Datasource
|
|
4619
4658
|
select: SelectStatement
|
|
4620
4659
|
meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
|
|
@@ -28,14 +28,18 @@ def extract_address(node: str):
|
|
|
28
28
|
return node.split("~")[1].split("@")[0]
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def get_graph_partial_nodes(
|
|
31
|
+
def get_graph_partial_nodes(
|
|
32
|
+
g: nx.DiGraph, conditions: WhereClause | None
|
|
33
|
+
) -> dict[str, list[str]]:
|
|
32
34
|
datasources: dict[str, Datasource] = nx.get_node_attributes(g, "datasource")
|
|
33
35
|
partial: dict[str, list[str]] = {}
|
|
34
36
|
for node in g.nodes:
|
|
35
37
|
if node in datasources:
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
ds = datasources[node]
|
|
39
|
+
partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
|
|
40
|
+
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
41
|
+
partial[node] = []
|
|
42
|
+
|
|
39
43
|
return partial
|
|
40
44
|
|
|
41
45
|
|
|
@@ -49,7 +53,10 @@ def get_graph_grain_length(g: nx.DiGraph) -> dict[str, int]:
|
|
|
49
53
|
|
|
50
54
|
|
|
51
55
|
def create_pruned_concept_graph(
|
|
52
|
-
g: nx.DiGraph,
|
|
56
|
+
g: nx.DiGraph,
|
|
57
|
+
all_concepts: List[Concept],
|
|
58
|
+
accept_partial: bool = False,
|
|
59
|
+
conditions: WhereClause | None = None,
|
|
53
60
|
) -> nx.DiGraph:
|
|
54
61
|
orig_g = g
|
|
55
62
|
g = g.copy()
|
|
@@ -66,11 +73,7 @@ def create_pruned_concept_graph(
|
|
|
66
73
|
relevent_datasets: list[str] = []
|
|
67
74
|
if not accept_partial:
|
|
68
75
|
partial = {}
|
|
69
|
-
|
|
70
|
-
if node in datasources:
|
|
71
|
-
partial[node] = [
|
|
72
|
-
concept_to_node(c) for c in datasources[node].partial_concepts
|
|
73
|
-
]
|
|
76
|
+
partial = get_graph_partial_nodes(g, conditions)
|
|
74
77
|
to_remove = []
|
|
75
78
|
for edge in g.edges:
|
|
76
79
|
if (
|
|
@@ -133,31 +136,53 @@ def create_pruned_concept_graph(
|
|
|
133
136
|
return g
|
|
134
137
|
|
|
135
138
|
|
|
136
|
-
def resolve_subgraphs(
|
|
139
|
+
def resolve_subgraphs(
|
|
140
|
+
g: nx.DiGraph, conditions: WhereClause | None
|
|
141
|
+
) -> dict[str, list[str]]:
|
|
137
142
|
datasources = [n for n in g.nodes if n.startswith("ds~")]
|
|
138
|
-
subgraphs
|
|
139
|
-
|
|
143
|
+
subgraphs: dict[str, list[str]] = {
|
|
144
|
+
ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources
|
|
145
|
+
}
|
|
146
|
+
partial_map = get_graph_partial_nodes(g, conditions)
|
|
140
147
|
grain_length = get_graph_grain_length(g)
|
|
141
|
-
|
|
142
|
-
|
|
148
|
+
concepts: dict[str, Concept] = nx.get_node_attributes(g, "concept")
|
|
149
|
+
non_partial_map = {
|
|
150
|
+
ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
|
|
151
|
+
for ds in datasources
|
|
152
|
+
}
|
|
153
|
+
concept_map = {
|
|
154
|
+
ds: [concepts[c].address for c in subgraphs[ds]] for ds in datasources
|
|
143
155
|
}
|
|
144
156
|
pruned_subgraphs = {}
|
|
145
|
-
for key,
|
|
157
|
+
for key, nodes in subgraphs.items():
|
|
158
|
+
value = non_partial_map[key]
|
|
159
|
+
all_concepts = concept_map[key]
|
|
146
160
|
is_subset = False
|
|
147
161
|
matches = set()
|
|
148
162
|
# Compare current list with other lists
|
|
149
|
-
for other_key,
|
|
150
|
-
|
|
163
|
+
for other_key, other_all_concepts in concept_map.items():
|
|
164
|
+
other_value = non_partial_map[other_key]
|
|
165
|
+
# needs to be a subset of non partial and a subset of all
|
|
166
|
+
if (
|
|
167
|
+
key != other_key
|
|
168
|
+
and set(value).issubset(set(other_value))
|
|
169
|
+
and set(all_concepts).issubset(set(other_all_concepts))
|
|
170
|
+
):
|
|
151
171
|
if len(value) < len(other_value):
|
|
152
172
|
is_subset = True
|
|
173
|
+
logger.debug(
|
|
174
|
+
f"Dropping subgraph {key} with {value} as it is a subset of {other_key} with {other_value}"
|
|
175
|
+
)
|
|
153
176
|
break
|
|
154
|
-
elif len(value) == len(other_value)
|
|
177
|
+
elif len(value) == len(other_value) and len(all_concepts) == len(
|
|
178
|
+
other_all_concepts
|
|
179
|
+
):
|
|
155
180
|
matches.add(other_key)
|
|
156
181
|
matches.add(key)
|
|
157
182
|
if matches:
|
|
158
183
|
is_subset = key is not min(matches, key=lambda x: (grain_length[x], x))
|
|
159
184
|
if not is_subset:
|
|
160
|
-
pruned_subgraphs[key] =
|
|
185
|
+
pruned_subgraphs[key] = nodes
|
|
161
186
|
return pruned_subgraphs
|
|
162
187
|
|
|
163
188
|
|
|
@@ -261,7 +286,9 @@ def gen_select_merge_node(
|
|
|
261
286
|
force_group=False,
|
|
262
287
|
)
|
|
263
288
|
for attempt in [False, True]:
|
|
264
|
-
pruned_concept_graph = create_pruned_concept_graph(
|
|
289
|
+
pruned_concept_graph = create_pruned_concept_graph(
|
|
290
|
+
g, non_constant, attempt, conditions
|
|
291
|
+
)
|
|
265
292
|
if pruned_concept_graph:
|
|
266
293
|
logger.info(
|
|
267
294
|
f"{padding(depth)}{LOGGER_PREFIX} found covering graph w/ partial flag {attempt}"
|
|
@@ -274,7 +301,7 @@ def gen_select_merge_node(
|
|
|
274
301
|
)
|
|
275
302
|
return None
|
|
276
303
|
|
|
277
|
-
sub_nodes = resolve_subgraphs(pruned_concept_graph)
|
|
304
|
+
sub_nodes = resolve_subgraphs(pruned_concept_graph, conditions)
|
|
278
305
|
|
|
279
306
|
logger.info(f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {sub_nodes}")
|
|
280
307
|
parents = [
|
|
@@ -215,7 +215,7 @@ class MergeNode(StrategyNode):
|
|
|
215
215
|
logger.info(
|
|
216
216
|
f"{self.logging_prefix}{LOGGER_PREFIX} inferring node joins to target grain {str(grain)}"
|
|
217
217
|
)
|
|
218
|
-
joins = get_node_joins(dataset_list,
|
|
218
|
+
joins = get_node_joins(dataset_list, environment=environment)
|
|
219
219
|
elif final_joins:
|
|
220
220
|
logger.info(
|
|
221
221
|
f"{self.logging_prefix}{LOGGER_PREFIX} translating provided node joins {len(final_joins)}"
|
|
@@ -314,7 +314,7 @@ class MergeNode(StrategyNode):
|
|
|
314
314
|
full_join_concepts = []
|
|
315
315
|
for join in joins:
|
|
316
316
|
if isinstance(join, BaseJoin) and join.join_type == JoinType.FULL:
|
|
317
|
-
full_join_concepts += join.
|
|
317
|
+
full_join_concepts += join.input_concepts
|
|
318
318
|
if self.whole_grain:
|
|
319
319
|
force_group = False
|
|
320
320
|
elif self.force_group is False:
|