pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
- pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +2 -2
- trilogy/core/enums.py +1 -7
- trilogy/core/env_processor.py +17 -5
- trilogy/core/environment_helpers.py +11 -25
- trilogy/core/exceptions.py +4 -0
- trilogy/core/functions.py +695 -261
- trilogy/core/graph_models.py +10 -10
- trilogy/core/internal.py +11 -2
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2110 -0
- trilogy/core/models/build.py +1845 -0
- trilogy/core/models/build_environment.py +151 -0
- trilogy/core/models/core.py +370 -0
- trilogy/core/models/datasource.py +297 -0
- trilogy/core/models/environment.py +696 -0
- trilogy/core/models/execute.py +931 -0
- trilogy/core/optimization.py +14 -16
- trilogy/core/optimizations/base_optimization.py +1 -1
- trilogy/core/optimizations/inline_constant.py +6 -6
- trilogy/core/optimizations/inline_datasource.py +17 -11
- trilogy/core/optimizations/predicate_pushdown.py +17 -16
- trilogy/core/processing/concept_strategies_v3.py +180 -145
- trilogy/core/processing/graph_utils.py +1 -1
- trilogy/core/processing/node_generators/basic_node.py +19 -18
- trilogy/core/processing/node_generators/common.py +50 -44
- trilogy/core/processing/node_generators/filter_node.py +26 -13
- trilogy/core/processing/node_generators/group_node.py +26 -21
- trilogy/core/processing/node_generators/group_to_node.py +11 -8
- trilogy/core/processing/node_generators/multiselect_node.py +60 -43
- trilogy/core/processing/node_generators/node_merge_node.py +76 -38
- trilogy/core/processing/node_generators/rowset_node.py +57 -36
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
- trilogy/core/processing/node_generators/select_merge_node.py +161 -64
- trilogy/core/processing/node_generators/select_node.py +13 -13
- trilogy/core/processing/node_generators/union_node.py +12 -11
- trilogy/core/processing/node_generators/unnest_node.py +9 -7
- trilogy/core/processing/node_generators/window_node.py +19 -16
- trilogy/core/processing/nodes/__init__.py +21 -18
- trilogy/core/processing/nodes/base_node.py +82 -66
- trilogy/core/processing/nodes/filter_node.py +19 -13
- trilogy/core/processing/nodes/group_node.py +50 -35
- trilogy/core/processing/nodes/merge_node.py +45 -36
- trilogy/core/processing/nodes/select_node_v2.py +53 -39
- trilogy/core/processing/nodes/union_node.py +5 -7
- trilogy/core/processing/nodes/unnest_node.py +7 -11
- trilogy/core/processing/nodes/window_node.py +9 -4
- trilogy/core/processing/utility.py +103 -75
- trilogy/core/query_processor.py +65 -47
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +413 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +30 -0
- trilogy/core/statements/execute.py +42 -0
- trilogy/dialect/base.py +146 -106
- trilogy/dialect/common.py +9 -10
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/enums.py +4 -2
- trilogy/dialect/presto.py +1 -1
- trilogy/dialect/sql_server.py +1 -1
- trilogy/executor.py +44 -32
- trilogy/hooks/base_hook.py +6 -4
- trilogy/hooks/query_debugger.py +110 -93
- trilogy/parser.py +1 -1
- trilogy/parsing/common.py +303 -64
- trilogy/parsing/parse_engine.py +263 -617
- trilogy/parsing/render.py +50 -26
- trilogy/scripts/trilogy.py +2 -1
- pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
- trilogy/core/models.py +0 -4960
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,931 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from typing import Any, Dict, List, Optional, Set, Union
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field, ValidationInfo, computed_field, field_validator
|
|
7
|
+
|
|
8
|
+
from trilogy.constants import CONFIG, logger
|
|
9
|
+
from trilogy.core.constants import CONSTANT_DATASET
|
|
10
|
+
from trilogy.core.enums import (
|
|
11
|
+
Derivation,
|
|
12
|
+
FunctionType,
|
|
13
|
+
Granularity,
|
|
14
|
+
JoinType,
|
|
15
|
+
Modifier,
|
|
16
|
+
Purpose,
|
|
17
|
+
SourceType,
|
|
18
|
+
)
|
|
19
|
+
from trilogy.core.models.build import (
|
|
20
|
+
BuildComparison,
|
|
21
|
+
BuildConcept,
|
|
22
|
+
BuildConditional,
|
|
23
|
+
BuildDatasource,
|
|
24
|
+
BuildFunction,
|
|
25
|
+
BuildGrain,
|
|
26
|
+
BuildOrderBy,
|
|
27
|
+
BuildParenthetical,
|
|
28
|
+
BuildRowsetItem,
|
|
29
|
+
LooseBuildConceptList,
|
|
30
|
+
)
|
|
31
|
+
from trilogy.core.models.datasource import Address
|
|
32
|
+
from trilogy.utility import unique
|
|
33
|
+
|
|
34
|
+
LOGGER_PREFIX = "[MODELS_EXECUTE]"
|
|
35
|
+
|
|
36
|
+
DATASOURCE_TYPES = (BuildDatasource, BuildDatasource)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CTE(BaseModel):
|
|
40
|
+
name: str
|
|
41
|
+
source: "QueryDatasource"
|
|
42
|
+
output_columns: List[BuildConcept]
|
|
43
|
+
source_map: Dict[str, list[str]]
|
|
44
|
+
grain: BuildGrain
|
|
45
|
+
base: bool = False
|
|
46
|
+
group_to_grain: bool = False
|
|
47
|
+
existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
|
|
48
|
+
parent_ctes: List[Union["CTE", "UnionCTE"]] = Field(default_factory=list)
|
|
49
|
+
joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
|
|
50
|
+
condition: Optional[
|
|
51
|
+
Union[BuildComparison, BuildConditional, BuildParenthetical]
|
|
52
|
+
] = None
|
|
53
|
+
partial_concepts: List[BuildConcept] = Field(default_factory=list)
|
|
54
|
+
nullable_concepts: List[BuildConcept] = Field(default_factory=list)
|
|
55
|
+
join_derived_concepts: List[BuildConcept] = Field(default_factory=list)
|
|
56
|
+
hidden_concepts: set[str] = Field(default_factory=set)
|
|
57
|
+
order_by: Optional[BuildOrderBy] = None
|
|
58
|
+
limit: Optional[int] = None
|
|
59
|
+
base_name_override: Optional[str] = None
|
|
60
|
+
base_alias_override: Optional[str] = None
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def identifier(self):
|
|
64
|
+
return self.name
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def safe_identifier(self):
|
|
68
|
+
return self.name
|
|
69
|
+
|
|
70
|
+
@computed_field # type: ignore
|
|
71
|
+
@property
|
|
72
|
+
def output_lcl(self) -> LooseBuildConceptList:
|
|
73
|
+
return LooseBuildConceptList(concepts=self.output_columns)
|
|
74
|
+
|
|
75
|
+
@field_validator("output_columns")
|
|
76
|
+
def validate_output_columns(cls, v):
|
|
77
|
+
return unique(v, "address")
|
|
78
|
+
|
|
79
|
+
def inline_constant(self, concept: BuildConcept):
|
|
80
|
+
if not concept.derivation == Derivation.CONSTANT:
|
|
81
|
+
return False
|
|
82
|
+
if not isinstance(concept.lineage, BuildFunction):
|
|
83
|
+
return False
|
|
84
|
+
if not concept.lineage.operator == FunctionType.CONSTANT:
|
|
85
|
+
return False
|
|
86
|
+
# remove the constant
|
|
87
|
+
removed: set = set()
|
|
88
|
+
if concept.address in self.source_map:
|
|
89
|
+
removed = removed.union(self.source_map[concept.address])
|
|
90
|
+
del self.source_map[concept.address]
|
|
91
|
+
|
|
92
|
+
if self.condition:
|
|
93
|
+
self.condition = self.condition.inline_constant(concept)
|
|
94
|
+
# if we've entirely removed the need to join to someplace to get the concept
|
|
95
|
+
# drop the join as well.
|
|
96
|
+
for removed_cte in removed:
|
|
97
|
+
still_required = any(
|
|
98
|
+
[
|
|
99
|
+
removed_cte in x
|
|
100
|
+
for x in self.source_map.values()
|
|
101
|
+
or self.existence_source_map.values()
|
|
102
|
+
]
|
|
103
|
+
)
|
|
104
|
+
if not still_required:
|
|
105
|
+
self.joins = [
|
|
106
|
+
join
|
|
107
|
+
for join in self.joins
|
|
108
|
+
if not isinstance(join, Join)
|
|
109
|
+
or (
|
|
110
|
+
isinstance(join, Join)
|
|
111
|
+
and (
|
|
112
|
+
join.right_cte.name != removed_cte
|
|
113
|
+
and any(
|
|
114
|
+
[
|
|
115
|
+
x.cte.name != removed_cte
|
|
116
|
+
for x in (join.joinkey_pairs or [])
|
|
117
|
+
]
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
]
|
|
122
|
+
for join in self.joins:
|
|
123
|
+
if isinstance(join, UnnestJoin) and concept in join.concepts:
|
|
124
|
+
join.rendering_required = False
|
|
125
|
+
|
|
126
|
+
self.parent_ctes = [
|
|
127
|
+
x for x in self.parent_ctes if x.name != removed_cte
|
|
128
|
+
]
|
|
129
|
+
if removed_cte == self.base_name_override:
|
|
130
|
+
candidates = [x.name for x in self.parent_ctes]
|
|
131
|
+
self.base_name_override = candidates[0] if candidates else None
|
|
132
|
+
self.base_alias_override = candidates[0] if candidates else None
|
|
133
|
+
return True
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def comment(self) -> str:
|
|
137
|
+
base = f"Target: {str(self.grain)}. Group: {self.group_to_grain}"
|
|
138
|
+
base += f" Source: {self.source.source_type}."
|
|
139
|
+
if self.parent_ctes:
|
|
140
|
+
base += f" References: {', '.join([x.name for x in self.parent_ctes])}."
|
|
141
|
+
if self.joins:
|
|
142
|
+
base += f"\n-- Joins: {', '.join([str(x) for x in self.joins])}."
|
|
143
|
+
if self.partial_concepts:
|
|
144
|
+
base += (
|
|
145
|
+
f"\n-- Partials: {', '.join([str(x) for x in self.partial_concepts])}."
|
|
146
|
+
)
|
|
147
|
+
base += f"\n-- Source Map: {self.source_map}."
|
|
148
|
+
base += f"\n-- Output: {', '.join([str(x) for x in self.output_columns])}."
|
|
149
|
+
if self.source.input_concepts:
|
|
150
|
+
base += f"\n-- Inputs: {', '.join([str(x) for x in self.source.input_concepts])}."
|
|
151
|
+
if self.hidden_concepts:
|
|
152
|
+
base += f"\n-- Hidden: {', '.join([str(x) for x in self.hidden_concepts])}."
|
|
153
|
+
if self.nullable_concepts:
|
|
154
|
+
base += (
|
|
155
|
+
f"\n-- Nullable: {', '.join([str(x) for x in self.nullable_concepts])}."
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return base
|
|
159
|
+
|
|
160
|
+
def inline_parent_datasource(
|
|
161
|
+
self, parent: "CTE", force_group: bool = False
|
|
162
|
+
) -> bool:
|
|
163
|
+
qds_being_inlined = parent.source
|
|
164
|
+
ds_being_inlined = qds_being_inlined.datasources[0]
|
|
165
|
+
if not isinstance(ds_being_inlined, DATASOURCE_TYPES):
|
|
166
|
+
return False
|
|
167
|
+
if any(
|
|
168
|
+
[
|
|
169
|
+
x.safe_identifier == ds_being_inlined.safe_identifier
|
|
170
|
+
for x in self.source.datasources
|
|
171
|
+
]
|
|
172
|
+
):
|
|
173
|
+
return False
|
|
174
|
+
|
|
175
|
+
self.source.datasources = [
|
|
176
|
+
ds_being_inlined,
|
|
177
|
+
*[
|
|
178
|
+
x
|
|
179
|
+
for x in self.source.datasources
|
|
180
|
+
if x.safe_identifier != qds_being_inlined.safe_identifier
|
|
181
|
+
],
|
|
182
|
+
]
|
|
183
|
+
# need to identify this before updating joins
|
|
184
|
+
if self.base_name == parent.name:
|
|
185
|
+
self.base_name_override = ds_being_inlined.safe_location
|
|
186
|
+
self.base_alias_override = ds_being_inlined.safe_identifier
|
|
187
|
+
|
|
188
|
+
for join in self.joins:
|
|
189
|
+
if isinstance(join, InstantiatedUnnestJoin):
|
|
190
|
+
continue
|
|
191
|
+
if (
|
|
192
|
+
join.left_cte
|
|
193
|
+
and join.left_cte.safe_identifier == parent.safe_identifier
|
|
194
|
+
):
|
|
195
|
+
join.inline_cte(parent)
|
|
196
|
+
if join.joinkey_pairs:
|
|
197
|
+
for pair in join.joinkey_pairs:
|
|
198
|
+
if pair.cte and pair.cte.safe_identifier == parent.safe_identifier:
|
|
199
|
+
join.inline_cte(parent)
|
|
200
|
+
if join.right_cte.safe_identifier == parent.safe_identifier:
|
|
201
|
+
join.inline_cte(parent)
|
|
202
|
+
for k, v in self.source_map.items():
|
|
203
|
+
if isinstance(v, list):
|
|
204
|
+
self.source_map[k] = [
|
|
205
|
+
(
|
|
206
|
+
ds_being_inlined.safe_identifier
|
|
207
|
+
if x == parent.safe_identifier
|
|
208
|
+
else x
|
|
209
|
+
)
|
|
210
|
+
for x in v
|
|
211
|
+
]
|
|
212
|
+
elif v == parent.safe_identifier:
|
|
213
|
+
self.source_map[k] = [ds_being_inlined.safe_identifier]
|
|
214
|
+
|
|
215
|
+
# zip in any required values for lookups
|
|
216
|
+
for k in ds_being_inlined.output_lcl.addresses:
|
|
217
|
+
if k in self.source_map and self.source_map[k]:
|
|
218
|
+
continue
|
|
219
|
+
self.source_map[k] = [ds_being_inlined.safe_identifier]
|
|
220
|
+
self.parent_ctes = [
|
|
221
|
+
x for x in self.parent_ctes if x.safe_identifier != parent.safe_identifier
|
|
222
|
+
]
|
|
223
|
+
if force_group:
|
|
224
|
+
self.group_to_grain = True
|
|
225
|
+
return True
|
|
226
|
+
|
|
227
|
+
def __add__(self, other: "CTE" | "UnionCTE"):
|
|
228
|
+
if isinstance(other, UnionCTE):
|
|
229
|
+
raise ValueError("cannot merge CTE and union CTE")
|
|
230
|
+
logger.info('Merging two copies of CTE "%s"', self.name)
|
|
231
|
+
if not self.grain == other.grain:
|
|
232
|
+
error = (
|
|
233
|
+
"Attempting to merge two ctes of different grains"
|
|
234
|
+
f" {self.name} {other.name} grains {self.grain} {other.grain}| {self.group_to_grain} {other.group_to_grain}| {self.output_lcl} {other.output_lcl}"
|
|
235
|
+
)
|
|
236
|
+
raise ValueError(error)
|
|
237
|
+
if not self.condition == other.condition:
|
|
238
|
+
error = (
|
|
239
|
+
"Attempting to merge two ctes with different conditions"
|
|
240
|
+
f" {self.name} {other.name} conditions {self.condition} {other.condition}"
|
|
241
|
+
)
|
|
242
|
+
raise ValueError(error)
|
|
243
|
+
mutually_hidden = set()
|
|
244
|
+
for concept in self.hidden_concepts:
|
|
245
|
+
if concept in other.hidden_concepts:
|
|
246
|
+
mutually_hidden.add(concept)
|
|
247
|
+
self.partial_concepts = unique(
|
|
248
|
+
self.partial_concepts + other.partial_concepts, "address"
|
|
249
|
+
)
|
|
250
|
+
self.parent_ctes = merge_ctes(self.parent_ctes + other.parent_ctes)
|
|
251
|
+
|
|
252
|
+
self.source_map = {**self.source_map, **other.source_map}
|
|
253
|
+
|
|
254
|
+
self.output_columns = unique(
|
|
255
|
+
self.output_columns + other.output_columns, "address"
|
|
256
|
+
)
|
|
257
|
+
self.joins = unique(self.joins + other.joins, "unique_id")
|
|
258
|
+
self.partial_concepts = unique(
|
|
259
|
+
self.partial_concepts + other.partial_concepts, "address"
|
|
260
|
+
)
|
|
261
|
+
self.join_derived_concepts = unique(
|
|
262
|
+
self.join_derived_concepts + other.join_derived_concepts, "address"
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
self.source.source_map = {**self.source.source_map, **other.source.source_map}
|
|
266
|
+
self.source.output_concepts = unique(
|
|
267
|
+
self.source.output_concepts + other.source.output_concepts, "address"
|
|
268
|
+
)
|
|
269
|
+
self.nullable_concepts = unique(
|
|
270
|
+
self.nullable_concepts + other.nullable_concepts, "address"
|
|
271
|
+
)
|
|
272
|
+
self.hidden_concepts = mutually_hidden
|
|
273
|
+
self.existence_source_map = {
|
|
274
|
+
**self.existence_source_map,
|
|
275
|
+
**other.existence_source_map,
|
|
276
|
+
}
|
|
277
|
+
return self
|
|
278
|
+
|
|
279
|
+
@property
|
|
280
|
+
def relevant_base_ctes(self):
|
|
281
|
+
return self.parent_ctes
|
|
282
|
+
|
|
283
|
+
@property
|
|
284
|
+
def is_root_datasource(self) -> bool:
|
|
285
|
+
return (
|
|
286
|
+
len(self.source.datasources) == 1
|
|
287
|
+
and isinstance(self.source.datasources[0], DATASOURCE_TYPES)
|
|
288
|
+
and not self.source.datasources[0].name == CONSTANT_DATASET
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
@property
|
|
292
|
+
def base_name(self) -> str:
|
|
293
|
+
if self.base_name_override:
|
|
294
|
+
return self.base_name_override
|
|
295
|
+
# if this cte selects from a single datasource, select right from it
|
|
296
|
+
if self.is_root_datasource:
|
|
297
|
+
return self.source.datasources[0].safe_location
|
|
298
|
+
|
|
299
|
+
# if we have multiple joined CTEs, pick the base
|
|
300
|
+
# as the root
|
|
301
|
+
elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
|
|
302
|
+
return self.parent_ctes[0].name
|
|
303
|
+
elif self.relevant_base_ctes:
|
|
304
|
+
return self.relevant_base_ctes[0].name
|
|
305
|
+
return self.source.name
|
|
306
|
+
|
|
307
|
+
@property
|
|
308
|
+
def quote_address(self) -> bool:
|
|
309
|
+
if self.is_root_datasource:
|
|
310
|
+
candidate = self.source.datasources[0]
|
|
311
|
+
if isinstance(candidate, DATASOURCE_TYPES) and isinstance(
|
|
312
|
+
candidate.address, Address
|
|
313
|
+
):
|
|
314
|
+
return candidate.address.quoted
|
|
315
|
+
return False
|
|
316
|
+
|
|
317
|
+
@property
|
|
318
|
+
def base_alias(self) -> str:
|
|
319
|
+
if self.base_alias_override:
|
|
320
|
+
return self.base_alias_override
|
|
321
|
+
if self.is_root_datasource:
|
|
322
|
+
return self.source.datasources[0].identifier
|
|
323
|
+
elif self.relevant_base_ctes:
|
|
324
|
+
return self.relevant_base_ctes[0].name
|
|
325
|
+
elif self.parent_ctes:
|
|
326
|
+
return self.parent_ctes[0].name
|
|
327
|
+
return self.name
|
|
328
|
+
|
|
329
|
+
def get_concept(self, address: str) -> BuildConcept | None:
|
|
330
|
+
for cte in self.parent_ctes:
|
|
331
|
+
if address in cte.output_columns:
|
|
332
|
+
match = [x for x in cte.output_columns if x.address == address].pop()
|
|
333
|
+
if match:
|
|
334
|
+
return match
|
|
335
|
+
|
|
336
|
+
for array in [self.source.input_concepts, self.source.output_concepts]:
|
|
337
|
+
match_list = [x for x in array if x.address == address]
|
|
338
|
+
if match_list:
|
|
339
|
+
return match_list.pop()
|
|
340
|
+
match_list = [x for x in self.output_columns if x.address == address]
|
|
341
|
+
if match_list:
|
|
342
|
+
return match_list.pop()
|
|
343
|
+
return None
|
|
344
|
+
|
|
345
|
+
def get_alias(self, concept: BuildConcept, source: str | None = None) -> str:
|
|
346
|
+
for cte in self.parent_ctes:
|
|
347
|
+
if concept.address in cte.output_columns:
|
|
348
|
+
if source and source != cte.name:
|
|
349
|
+
continue
|
|
350
|
+
return concept.safe_address
|
|
351
|
+
|
|
352
|
+
try:
|
|
353
|
+
source = self.source.get_alias(concept, source=source)
|
|
354
|
+
|
|
355
|
+
if not source:
|
|
356
|
+
raise ValueError("No source found")
|
|
357
|
+
return source
|
|
358
|
+
except ValueError as e:
|
|
359
|
+
return f"INVALID_ALIAS: {str(e)}"
|
|
360
|
+
|
|
361
|
+
@property
|
|
362
|
+
def group_concepts(self) -> List[BuildConcept]:
|
|
363
|
+
def check_is_not_in_group(c: BuildConcept):
|
|
364
|
+
if len(self.source_map.get(c.address, [])) > 0:
|
|
365
|
+
return False
|
|
366
|
+
if c.derivation == Derivation.ROWSET:
|
|
367
|
+
assert isinstance(c.lineage, BuildRowsetItem)
|
|
368
|
+
return check_is_not_in_group(c.lineage.content)
|
|
369
|
+
if c.derivation == Derivation.CONSTANT:
|
|
370
|
+
return True
|
|
371
|
+
if c.purpose == Purpose.METRIC:
|
|
372
|
+
return True
|
|
373
|
+
|
|
374
|
+
if c.derivation == Derivation.BASIC and c.lineage:
|
|
375
|
+
if all([check_is_not_in_group(x) for x in c.lineage.concept_arguments]):
|
|
376
|
+
return True
|
|
377
|
+
if (
|
|
378
|
+
isinstance(c.lineage, BuildFunction)
|
|
379
|
+
and c.lineage.operator == FunctionType.GROUP
|
|
380
|
+
):
|
|
381
|
+
return check_is_not_in_group(c.lineage.concept_arguments[0])
|
|
382
|
+
return False
|
|
383
|
+
|
|
384
|
+
return (
|
|
385
|
+
unique(
|
|
386
|
+
[c for c in self.output_columns if not check_is_not_in_group(c)],
|
|
387
|
+
"address",
|
|
388
|
+
)
|
|
389
|
+
if self.group_to_grain
|
|
390
|
+
else []
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
@property
|
|
394
|
+
def render_from_clause(self) -> bool:
|
|
395
|
+
if (
|
|
396
|
+
all([c.derivation == Derivation.CONSTANT for c in self.output_columns])
|
|
397
|
+
and not self.parent_ctes
|
|
398
|
+
and not self.group_to_grain
|
|
399
|
+
):
|
|
400
|
+
return False
|
|
401
|
+
# if we don't need to source any concepts from anywhere
|
|
402
|
+
# render without from
|
|
403
|
+
# most likely to happen from inlining constants
|
|
404
|
+
if not any([v for v in self.source_map.values()]):
|
|
405
|
+
return False
|
|
406
|
+
if (
|
|
407
|
+
len(self.source.datasources) == 1
|
|
408
|
+
and self.source.datasources[0].name == CONSTANT_DATASET
|
|
409
|
+
):
|
|
410
|
+
return False
|
|
411
|
+
return True
|
|
412
|
+
|
|
413
|
+
@property
|
|
414
|
+
def sourced_concepts(self) -> List[BuildConcept]:
|
|
415
|
+
return [c for c in self.output_columns if c.address in self.source_map]
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
class ConceptPair(BaseModel):
|
|
419
|
+
left: BuildConcept
|
|
420
|
+
right: BuildConcept
|
|
421
|
+
existing_datasource: Union[BuildDatasource, "QueryDatasource"]
|
|
422
|
+
modifiers: List[Modifier] = Field(default_factory=list)
|
|
423
|
+
|
|
424
|
+
@property
|
|
425
|
+
def is_partial(self):
|
|
426
|
+
return Modifier.PARTIAL in self.modifiers
|
|
427
|
+
|
|
428
|
+
@property
|
|
429
|
+
def is_nullable(self):
|
|
430
|
+
return Modifier.NULLABLE in self.modifiers
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
class CTEConceptPair(ConceptPair):
|
|
434
|
+
cte: CTE
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
class InstantiatedUnnestJoin(BaseModel):
|
|
438
|
+
concept_to_unnest: BuildConcept
|
|
439
|
+
alias: str = "unnest"
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
class UnnestJoin(BaseModel):
|
|
443
|
+
concepts: list[BuildConcept]
|
|
444
|
+
parent: BuildFunction
|
|
445
|
+
alias: str = "unnest"
|
|
446
|
+
rendering_required: bool = True
|
|
447
|
+
|
|
448
|
+
def __hash__(self):
|
|
449
|
+
return self.safe_identifier.__hash__()
|
|
450
|
+
|
|
451
|
+
@property
|
|
452
|
+
def safe_identifier(self) -> str:
|
|
453
|
+
return self.alias + "".join([str(s.address) for s in self.concepts])
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
class BaseJoin(BaseModel):
|
|
457
|
+
right_datasource: Union[BuildDatasource, "QueryDatasource"]
|
|
458
|
+
join_type: JoinType
|
|
459
|
+
concepts: Optional[List[BuildConcept]] = None
|
|
460
|
+
left_datasource: Optional[Union[BuildDatasource, "QueryDatasource"]] = None
|
|
461
|
+
concept_pairs: list[ConceptPair] | None = None
|
|
462
|
+
|
|
463
|
+
def __init__(self, **data: Any):
|
|
464
|
+
super().__init__(**data)
|
|
465
|
+
if (
|
|
466
|
+
self.left_datasource
|
|
467
|
+
and self.left_datasource.identifier == self.right_datasource.identifier
|
|
468
|
+
):
|
|
469
|
+
raise SyntaxError(
|
|
470
|
+
f"Cannot join a dataself to itself, joining {self.left_datasource} and"
|
|
471
|
+
f" {self.right_datasource}"
|
|
472
|
+
)
|
|
473
|
+
final_concepts = []
|
|
474
|
+
|
|
475
|
+
# if we have a list of concept pairs
|
|
476
|
+
if self.concept_pairs:
|
|
477
|
+
return
|
|
478
|
+
if self.concepts == []:
|
|
479
|
+
return
|
|
480
|
+
assert self.left_datasource and self.right_datasource
|
|
481
|
+
for concept in self.concepts or []:
|
|
482
|
+
include = True
|
|
483
|
+
for ds in [self.left_datasource, self.right_datasource]:
|
|
484
|
+
synonyms = []
|
|
485
|
+
for c in ds.output_concepts:
|
|
486
|
+
synonyms += list(c.pseudonyms)
|
|
487
|
+
if (
|
|
488
|
+
concept.address not in [c.address for c in ds.output_concepts]
|
|
489
|
+
and concept.address not in synonyms
|
|
490
|
+
):
|
|
491
|
+
raise SyntaxError(
|
|
492
|
+
f"Invalid join, missing {concept} on {ds.name}, have"
|
|
493
|
+
f" {[c.address for c in ds.output_concepts]}"
|
|
494
|
+
)
|
|
495
|
+
if include:
|
|
496
|
+
final_concepts.append(concept)
|
|
497
|
+
if not final_concepts and self.concepts:
|
|
498
|
+
# if one datasource only has constants
|
|
499
|
+
# we can join on 1=1
|
|
500
|
+
for ds in [self.left_datasource, self.right_datasource]:
|
|
501
|
+
# single rows
|
|
502
|
+
if all(
|
|
503
|
+
[
|
|
504
|
+
c.granularity == Granularity.SINGLE_ROW
|
|
505
|
+
for c in ds.output_concepts
|
|
506
|
+
]
|
|
507
|
+
):
|
|
508
|
+
self.concepts = []
|
|
509
|
+
return
|
|
510
|
+
# if everything is at abstract grain, we can skip joins
|
|
511
|
+
if all([c.grain.abstract for c in ds.output_concepts]):
|
|
512
|
+
self.concepts = []
|
|
513
|
+
return
|
|
514
|
+
|
|
515
|
+
left_keys = [c.address for c in self.left_datasource.output_concepts]
|
|
516
|
+
right_keys = [c.address for c in self.right_datasource.output_concepts]
|
|
517
|
+
match_concepts = [c.address for c in self.concepts]
|
|
518
|
+
raise SyntaxError(
|
|
519
|
+
"No mutual join keys found between"
|
|
520
|
+
f" {self.left_datasource.identifier} and"
|
|
521
|
+
f" {self.right_datasource.identifier}, left_keys {left_keys},"
|
|
522
|
+
f" right_keys {right_keys},"
|
|
523
|
+
f" provided join concepts {match_concepts}"
|
|
524
|
+
)
|
|
525
|
+
self.concepts = final_concepts
|
|
526
|
+
|
|
527
|
+
@property
|
|
528
|
+
def unique_id(self) -> str:
|
|
529
|
+
return str(self)
|
|
530
|
+
|
|
531
|
+
@property
|
|
532
|
+
def input_concepts(self) -> List[BuildConcept]:
|
|
533
|
+
base = []
|
|
534
|
+
if self.concept_pairs:
|
|
535
|
+
for pair in self.concept_pairs:
|
|
536
|
+
base += [pair.left, pair.right]
|
|
537
|
+
elif self.concepts:
|
|
538
|
+
base += self.concepts
|
|
539
|
+
return base
|
|
540
|
+
|
|
541
|
+
def __str__(self):
|
|
542
|
+
if self.concept_pairs:
|
|
543
|
+
return (
|
|
544
|
+
f"{self.join_type.value} {self.right_datasource.name} on"
|
|
545
|
+
f" {','.join([str(k.existing_datasource.name) + '.'+ str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
|
|
546
|
+
)
|
|
547
|
+
return (
|
|
548
|
+
f"{self.join_type.value} {self.right_datasource.name} on"
|
|
549
|
+
f" {','.join([str(k) for k in self.concepts])}"
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
class QueryDatasource(BaseModel):
|
|
554
|
+
input_concepts: List[BuildConcept]
|
|
555
|
+
output_concepts: List[BuildConcept]
|
|
556
|
+
datasources: List[Union[BuildDatasource, "QueryDatasource"]]
|
|
557
|
+
source_map: Dict[str, Set[Union[BuildDatasource, "QueryDatasource", "UnnestJoin"]]]
|
|
558
|
+
|
|
559
|
+
grain: BuildGrain
|
|
560
|
+
joins: List[BaseJoin | UnnestJoin]
|
|
561
|
+
limit: Optional[int] = None
|
|
562
|
+
condition: Optional[
|
|
563
|
+
Union[BuildConditional, BuildComparison, BuildParenthetical]
|
|
564
|
+
] = Field(default=None)
|
|
565
|
+
source_type: SourceType = SourceType.SELECT
|
|
566
|
+
partial_concepts: List[BuildConcept] = Field(default_factory=list)
|
|
567
|
+
hidden_concepts: set[str] = Field(default_factory=set)
|
|
568
|
+
nullable_concepts: List[BuildConcept] = Field(default_factory=list)
|
|
569
|
+
join_derived_concepts: List[BuildConcept] = Field(default_factory=list)
|
|
570
|
+
force_group: bool | None = None
|
|
571
|
+
existence_source_map: Dict[str, Set[Union[BuildDatasource, "QueryDatasource"]]] = (
|
|
572
|
+
Field(default_factory=dict)
|
|
573
|
+
)
|
|
574
|
+
ordering: BuildOrderBy | None = None
|
|
575
|
+
|
|
576
|
+
def __repr__(self):
|
|
577
|
+
return f"{self.identifier}@<{self.grain}>"
|
|
578
|
+
|
|
579
|
+
@property
|
|
580
|
+
def safe_identifier(self):
|
|
581
|
+
return self.identifier.replace(".", "_")
|
|
582
|
+
|
|
583
|
+
@property
|
|
584
|
+
def full_concepts(self) -> List[BuildConcept]:
|
|
585
|
+
return [
|
|
586
|
+
c
|
|
587
|
+
for c in self.output_concepts
|
|
588
|
+
if c.address not in [z.address for z in self.partial_concepts]
|
|
589
|
+
]
|
|
590
|
+
|
|
591
|
+
@field_validator("joins")
|
|
592
|
+
@classmethod
|
|
593
|
+
def validate_joins(cls, v):
|
|
594
|
+
unique_pairs = set()
|
|
595
|
+
for join in v:
|
|
596
|
+
if not isinstance(join, BaseJoin):
|
|
597
|
+
continue
|
|
598
|
+
pairing = str(join)
|
|
599
|
+
if pairing in unique_pairs:
|
|
600
|
+
raise SyntaxError(f"Duplicate join {str(join)}")
|
|
601
|
+
unique_pairs.add(pairing)
|
|
602
|
+
return v
|
|
603
|
+
|
|
604
|
+
@field_validator("input_concepts")
|
|
605
|
+
@classmethod
|
|
606
|
+
def validate_inputs(cls, v):
|
|
607
|
+
return unique(v, "address")
|
|
608
|
+
|
|
609
|
+
@field_validator("output_concepts")
|
|
610
|
+
@classmethod
|
|
611
|
+
def validate_outputs(cls, v):
|
|
612
|
+
return unique(v, "address")
|
|
613
|
+
|
|
614
|
+
@field_validator("source_map")
|
|
615
|
+
@classmethod
|
|
616
|
+
def validate_source_map(cls, v: dict, info: ValidationInfo):
|
|
617
|
+
values = info.data
|
|
618
|
+
for key in ("input_concepts", "output_concepts"):
|
|
619
|
+
if not values.get(key):
|
|
620
|
+
continue
|
|
621
|
+
concept: BuildConcept
|
|
622
|
+
for concept in values[key]:
|
|
623
|
+
if (
|
|
624
|
+
concept.address not in v
|
|
625
|
+
and not any(x in v for x in concept.pseudonyms)
|
|
626
|
+
and CONFIG.validate_missing
|
|
627
|
+
):
|
|
628
|
+
raise SyntaxError(
|
|
629
|
+
f"On query datasource missing source map for {concept.address} on {key}, have {v}"
|
|
630
|
+
)
|
|
631
|
+
return v
|
|
632
|
+
|
|
633
|
+
def __str__(self):
|
|
634
|
+
return self.__repr__()
|
|
635
|
+
|
|
636
|
+
def __hash__(self):
|
|
637
|
+
return (self.identifier).__hash__()
|
|
638
|
+
|
|
639
|
+
@property
|
|
640
|
+
def concepts(self):
|
|
641
|
+
return self.output_concepts
|
|
642
|
+
|
|
643
|
+
@property
|
|
644
|
+
def name(self):
|
|
645
|
+
return self.identifier
|
|
646
|
+
|
|
647
|
+
@property
|
|
648
|
+
def group_required(self) -> bool:
|
|
649
|
+
if self.force_group is True:
|
|
650
|
+
return True
|
|
651
|
+
if self.force_group is False:
|
|
652
|
+
return False
|
|
653
|
+
if self.source_type:
|
|
654
|
+
if self.source_type in [
|
|
655
|
+
SourceType.FILTER,
|
|
656
|
+
]:
|
|
657
|
+
return False
|
|
658
|
+
elif self.source_type in [
|
|
659
|
+
SourceType.GROUP,
|
|
660
|
+
]:
|
|
661
|
+
return True
|
|
662
|
+
return False
|
|
663
|
+
|
|
664
|
+
def __add__(self, other) -> "QueryDatasource":
|
|
665
|
+
# these are syntax errors to avoid being caught by current
|
|
666
|
+
if not isinstance(other, QueryDatasource):
|
|
667
|
+
raise SyntaxError("Can only merge two query datasources")
|
|
668
|
+
if not other.grain == self.grain:
|
|
669
|
+
raise SyntaxError(
|
|
670
|
+
"Can only merge two query datasources with identical grain"
|
|
671
|
+
)
|
|
672
|
+
if not self.group_required == other.group_required:
|
|
673
|
+
raise SyntaxError(
|
|
674
|
+
"can only merge two datasources if the group required flag is the same"
|
|
675
|
+
)
|
|
676
|
+
if not self.join_derived_concepts == other.join_derived_concepts:
|
|
677
|
+
raise SyntaxError(
|
|
678
|
+
"can only merge two datasources if the join derived concepts are the same"
|
|
679
|
+
)
|
|
680
|
+
if not self.force_group == other.force_group:
|
|
681
|
+
raise SyntaxError(
|
|
682
|
+
"can only merge two datasources if the force_group flag is the same"
|
|
683
|
+
)
|
|
684
|
+
logger.debug(
|
|
685
|
+
f"{LOGGER_PREFIX} merging {self.name} with"
|
|
686
|
+
f" {[c.address for c in self.output_concepts]} concepts and"
|
|
687
|
+
f" {other.name} with {[c.address for c in other.output_concepts]} concepts"
|
|
688
|
+
)
|
|
689
|
+
|
|
690
|
+
merged_datasources: dict[str, Union[BuildDatasource, "QueryDatasource"]] = {}
|
|
691
|
+
|
|
692
|
+
for ds in [*self.datasources, *other.datasources]:
|
|
693
|
+
if ds.safe_identifier in merged_datasources:
|
|
694
|
+
merged_datasources[ds.safe_identifier] = (
|
|
695
|
+
merged_datasources[ds.safe_identifier] + ds
|
|
696
|
+
)
|
|
697
|
+
else:
|
|
698
|
+
merged_datasources[ds.safe_identifier] = ds
|
|
699
|
+
|
|
700
|
+
final_source_map: defaultdict[
|
|
701
|
+
str, Set[Union[BuildDatasource, QueryDatasource, UnnestJoin]]
|
|
702
|
+
] = defaultdict(set)
|
|
703
|
+
|
|
704
|
+
# add our sources
|
|
705
|
+
for key in self.source_map:
|
|
706
|
+
final_source_map[key] = self.source_map[key].union(
|
|
707
|
+
other.source_map.get(key, set())
|
|
708
|
+
)
|
|
709
|
+
# add their sources
|
|
710
|
+
for key in other.source_map:
|
|
711
|
+
if key not in final_source_map:
|
|
712
|
+
final_source_map[key] = other.source_map[key]
|
|
713
|
+
|
|
714
|
+
# if a ds was merged (to combine columns), we need to update the source map
|
|
715
|
+
# to use the merged item
|
|
716
|
+
for k, v in final_source_map.items():
|
|
717
|
+
final_source_map[k] = set(
|
|
718
|
+
merged_datasources.get(x.safe_identifier, x) for x in list(v)
|
|
719
|
+
)
|
|
720
|
+
self_hidden: set[str] = self.hidden_concepts or set()
|
|
721
|
+
other_hidden: set[str] = other.hidden_concepts or set()
|
|
722
|
+
# hidden is the minimum overlapping set
|
|
723
|
+
hidden = self_hidden.intersection(other_hidden)
|
|
724
|
+
qds = QueryDatasource(
|
|
725
|
+
input_concepts=unique(
|
|
726
|
+
self.input_concepts + other.input_concepts, "address"
|
|
727
|
+
),
|
|
728
|
+
output_concepts=unique(
|
|
729
|
+
self.output_concepts + other.output_concepts, "address"
|
|
730
|
+
),
|
|
731
|
+
source_map=final_source_map,
|
|
732
|
+
datasources=list(merged_datasources.values()),
|
|
733
|
+
grain=self.grain,
|
|
734
|
+
joins=unique(self.joins + other.joins, "unique_id"),
|
|
735
|
+
condition=(
|
|
736
|
+
self.condition + other.condition
|
|
737
|
+
if self.condition and other.condition
|
|
738
|
+
else self.condition or other.condition
|
|
739
|
+
),
|
|
740
|
+
source_type=self.source_type,
|
|
741
|
+
partial_concepts=unique(
|
|
742
|
+
self.partial_concepts + other.partial_concepts, "address"
|
|
743
|
+
),
|
|
744
|
+
join_derived_concepts=self.join_derived_concepts,
|
|
745
|
+
force_group=self.force_group,
|
|
746
|
+
hidden_concepts=hidden,
|
|
747
|
+
ordering=self.ordering,
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
return qds
|
|
751
|
+
|
|
752
|
+
@property
|
|
753
|
+
def identifier(self) -> str:
|
|
754
|
+
filters = abs(hash(str(self.condition))) if self.condition else ""
|
|
755
|
+
grain = "_".join([str(c).replace(".", "_") for c in self.grain.components])
|
|
756
|
+
return (
|
|
757
|
+
"_join_".join([d.identifier for d in self.datasources])
|
|
758
|
+
+ (f"_at_{grain}" if grain else "_at_abstract")
|
|
759
|
+
+ (f"_filtered_by_{filters}" if filters else "")
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
def get_alias(
|
|
763
|
+
self,
|
|
764
|
+
concept: BuildConcept,
|
|
765
|
+
use_raw_name: bool = False,
|
|
766
|
+
force_alias: bool = False,
|
|
767
|
+
source: str | None = None,
|
|
768
|
+
):
|
|
769
|
+
for x in self.datasources:
|
|
770
|
+
# query datasources should be referenced by their alias, always
|
|
771
|
+
force_alias = isinstance(x, QueryDatasource)
|
|
772
|
+
#
|
|
773
|
+
use_raw_name = isinstance(x, DATASOURCE_TYPES) and not force_alias
|
|
774
|
+
if source and x.safe_identifier != source:
|
|
775
|
+
continue
|
|
776
|
+
try:
|
|
777
|
+
return x.get_alias(
|
|
778
|
+
concept.with_grain(self.grain),
|
|
779
|
+
use_raw_name,
|
|
780
|
+
force_alias=force_alias,
|
|
781
|
+
)
|
|
782
|
+
except ValueError as e:
|
|
783
|
+
from trilogy.constants import logger
|
|
784
|
+
|
|
785
|
+
logger.debug(e)
|
|
786
|
+
continue
|
|
787
|
+
existing = [c.with_grain(self.grain) for c in self.output_concepts]
|
|
788
|
+
if concept in existing:
|
|
789
|
+
return concept.name
|
|
790
|
+
|
|
791
|
+
existing_str = [str(c) for c in existing]
|
|
792
|
+
datasources = [ds.identifier for ds in self.datasources]
|
|
793
|
+
raise ValueError(
|
|
794
|
+
f"{LOGGER_PREFIX} Concept {str(concept)} not found on {self.identifier};"
|
|
795
|
+
f" have {existing_str} from {datasources}."
|
|
796
|
+
)
|
|
797
|
+
|
|
798
|
+
@property
|
|
799
|
+
def safe_location(self):
|
|
800
|
+
return self.identifier
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
class UnionCTE(BaseModel):
|
|
804
|
+
name: str
|
|
805
|
+
source: QueryDatasource
|
|
806
|
+
parent_ctes: list[CTE | UnionCTE]
|
|
807
|
+
internal_ctes: list[CTE | UnionCTE]
|
|
808
|
+
output_columns: List[BuildConcept]
|
|
809
|
+
grain: BuildGrain
|
|
810
|
+
operator: str = "UNION ALL"
|
|
811
|
+
order_by: Optional[BuildOrderBy] = None
|
|
812
|
+
limit: Optional[int] = None
|
|
813
|
+
hidden_concepts: set[str] = Field(default_factory=set)
|
|
814
|
+
partial_concepts: list[BuildConcept] = Field(default_factory=list)
|
|
815
|
+
existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
|
|
816
|
+
|
|
817
|
+
@computed_field # type: ignore
|
|
818
|
+
@property
|
|
819
|
+
def output_lcl(self) -> LooseBuildConceptList:
|
|
820
|
+
return LooseBuildConceptList(concepts=self.output_columns)
|
|
821
|
+
|
|
822
|
+
def get_alias(self, concept: BuildConcept, source: str | None = None) -> str:
|
|
823
|
+
for cte in self.parent_ctes:
|
|
824
|
+
if concept.address in cte.output_columns:
|
|
825
|
+
if source and source != cte.name:
|
|
826
|
+
continue
|
|
827
|
+
return concept.safe_address
|
|
828
|
+
return "INVALID_ALIAS"
|
|
829
|
+
|
|
830
|
+
def get_concept(self, address: str) -> BuildConcept | None:
|
|
831
|
+
for cte in self.internal_ctes:
|
|
832
|
+
if address in cte.output_columns:
|
|
833
|
+
match = [x for x in cte.output_columns if x.address == address].pop()
|
|
834
|
+
return match
|
|
835
|
+
|
|
836
|
+
match_list = [x for x in self.output_columns if x.address == address]
|
|
837
|
+
if match_list:
|
|
838
|
+
return match_list.pop()
|
|
839
|
+
return None
|
|
840
|
+
|
|
841
|
+
@property
|
|
842
|
+
def source_map(self):
|
|
843
|
+
return {x.address: [] for x in self.output_columns}
|
|
844
|
+
|
|
845
|
+
@property
|
|
846
|
+
def condition(self):
|
|
847
|
+
return None
|
|
848
|
+
|
|
849
|
+
@condition.setter
|
|
850
|
+
def condition(self, value):
|
|
851
|
+
raise NotImplementedError
|
|
852
|
+
|
|
853
|
+
@property
|
|
854
|
+
def safe_identifier(self):
|
|
855
|
+
return self.name
|
|
856
|
+
|
|
857
|
+
@property
|
|
858
|
+
def group_to_grain(self) -> bool:
|
|
859
|
+
return False
|
|
860
|
+
|
|
861
|
+
def __add__(self, other):
|
|
862
|
+
if not isinstance(other, UnionCTE) or not other.name == self.name:
|
|
863
|
+
raise SyntaxError("Cannot merge union CTEs")
|
|
864
|
+
return self
|
|
865
|
+
|
|
866
|
+
|
|
867
|
+
class Join(BaseModel):
|
|
868
|
+
right_cte: CTE
|
|
869
|
+
jointype: JoinType
|
|
870
|
+
left_cte: CTE | None = None
|
|
871
|
+
joinkey_pairs: List[CTEConceptPair] | None = None
|
|
872
|
+
inlined_ctes: set[str] = Field(default_factory=set)
|
|
873
|
+
|
|
874
|
+
def inline_cte(self, cte: CTE):
|
|
875
|
+
self.inlined_ctes.add(cte.name)
|
|
876
|
+
|
|
877
|
+
def get_name(self, cte: CTE):
|
|
878
|
+
if cte.identifier in self.inlined_ctes:
|
|
879
|
+
return cte.source.datasources[0].safe_identifier
|
|
880
|
+
return cte.safe_identifier
|
|
881
|
+
|
|
882
|
+
@property
|
|
883
|
+
def right_name(self) -> str:
|
|
884
|
+
if self.right_cte.identifier in self.inlined_ctes:
|
|
885
|
+
return self.right_cte.source.datasources[0].safe_identifier
|
|
886
|
+
return self.right_cte.safe_identifier
|
|
887
|
+
|
|
888
|
+
@property
|
|
889
|
+
def right_ref(self) -> str:
|
|
890
|
+
if self.right_cte.identifier in self.inlined_ctes:
|
|
891
|
+
return f"{self.right_cte.source.datasources[0].safe_location} as {self.right_cte.source.datasources[0].safe_identifier}"
|
|
892
|
+
return self.right_cte.safe_identifier
|
|
893
|
+
|
|
894
|
+
@property
|
|
895
|
+
def unique_id(self) -> str:
|
|
896
|
+
return str(self)
|
|
897
|
+
|
|
898
|
+
def __str__(self):
|
|
899
|
+
if self.joinkey_pairs:
|
|
900
|
+
return (
|
|
901
|
+
f"{self.jointype.value} join"
|
|
902
|
+
f" {self.right_name} on"
|
|
903
|
+
f" {','.join([k.cte.name + '.'+str(k.left.address)+'='+str(k.right.address) for k in self.joinkey_pairs])}"
|
|
904
|
+
)
|
|
905
|
+
elif self.left_cte:
|
|
906
|
+
return (
|
|
907
|
+
f"{self.jointype.value} JOIN {self.left_cte.name} and"
|
|
908
|
+
f" {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
|
|
909
|
+
)
|
|
910
|
+
return f"{self.jointype.value} JOIN {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
def merge_ctes(ctes: List[CTE | UnionCTE]) -> List[CTE | UnionCTE]:
|
|
914
|
+
final_ctes_dict: Dict[str, CTE | UnionCTE] = {}
|
|
915
|
+
# merge CTEs
|
|
916
|
+
for cte in ctes:
|
|
917
|
+
if cte.name not in final_ctes_dict:
|
|
918
|
+
final_ctes_dict[cte.name] = cte
|
|
919
|
+
else:
|
|
920
|
+
final_ctes_dict[cte.name] = final_ctes_dict[cte.name] + cte
|
|
921
|
+
|
|
922
|
+
final_ctes = list(final_ctes_dict.values())
|
|
923
|
+
return final_ctes
|
|
924
|
+
|
|
925
|
+
|
|
926
|
+
class CompiledCTE(BaseModel):
|
|
927
|
+
name: str
|
|
928
|
+
statement: str
|
|
929
|
+
|
|
930
|
+
|
|
931
|
+
UnionCTE.model_rebuild()
|