pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.138.dist-info/METADATA +525 -0
  5. pytrilogy-0.3.138.dist-info/RECORD +182 -0
  6. pytrilogy-0.3.138.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +9 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +87 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +143 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2672 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +494 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +748 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +517 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +106 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1359 -0
  112. trilogy/dialect/bigquery.py +256 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +177 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +91 -0
  121. trilogy/dialect/presto.py +104 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +90 -0
  124. trilogy/dialect/sql_server.py +92 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +750 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +7 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/dependency/Cargo.lock +617 -0
  148. trilogy/scripts/dependency/Cargo.toml +39 -0
  149. trilogy/scripts/dependency/README.md +131 -0
  150. trilogy/scripts/dependency/build.sh +25 -0
  151. trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
  152. trilogy/scripts/dependency/src/lib.rs +16 -0
  153. trilogy/scripts/dependency/src/main.rs +770 -0
  154. trilogy/scripts/dependency/src/parser.rs +435 -0
  155. trilogy/scripts/dependency/src/preql.pest +208 -0
  156. trilogy/scripts/dependency/src/python_bindings.rs +289 -0
  157. trilogy/scripts/dependency/src/resolver.rs +716 -0
  158. trilogy/scripts/dependency/tests/base.preql +3 -0
  159. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  160. trilogy/scripts/dependency/tests/customer.preql +6 -0
  161. trilogy/scripts/dependency/tests/main.preql +9 -0
  162. trilogy/scripts/dependency/tests/orders.preql +7 -0
  163. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  164. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  165. trilogy/scripts/dependency.py +323 -0
  166. trilogy/scripts/display.py +460 -0
  167. trilogy/scripts/environment.py +46 -0
  168. trilogy/scripts/parallel_execution.py +483 -0
  169. trilogy/scripts/single_execution.py +131 -0
  170. trilogy/scripts/trilogy.py +772 -0
  171. trilogy/std/__init__.py +0 -0
  172. trilogy/std/color.preql +3 -0
  173. trilogy/std/date.preql +13 -0
  174. trilogy/std/display.preql +18 -0
  175. trilogy/std/geography.preql +22 -0
  176. trilogy/std/metric.preql +15 -0
  177. trilogy/std/money.preql +67 -0
  178. trilogy/std/net.preql +14 -0
  179. trilogy/std/ranking.preql +7 -0
  180. trilogy/std/report.preql +5 -0
  181. trilogy/std/semantic.preql +6 -0
  182. trilogy/utility.py +34 -0
@@ -0,0 +1,1177 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+ from typing import Dict, List, Optional, Set, Union
5
+
6
+ from pydantic import (
7
+ BaseModel,
8
+ Field,
9
+ ValidationInfo,
10
+ computed_field,
11
+ field_validator,
12
+ model_validator,
13
+ )
14
+
15
+ from trilogy.constants import (
16
+ CONFIG,
17
+ DEFAULT_NAMESPACE,
18
+ RECURSIVE_GATING_CONCEPT,
19
+ MagicConstants,
20
+ logger,
21
+ )
22
+ from trilogy.core.constants import CONSTANT_DATASET
23
+ from trilogy.core.enums import (
24
+ ComparisonOperator,
25
+ Derivation,
26
+ FunctionClass,
27
+ FunctionType,
28
+ JoinType,
29
+ Modifier,
30
+ Purpose,
31
+ SourceType,
32
+ )
33
+ from trilogy.core.exceptions import InvalidSyntaxException
34
+ from trilogy.core.models.build import (
35
+ BuildCaseElse,
36
+ BuildCaseWhen,
37
+ BuildComparison,
38
+ BuildConcept,
39
+ BuildConditional,
40
+ BuildDatasource,
41
+ BuildExpr,
42
+ BuildFunction,
43
+ BuildGrain,
44
+ BuildOrderBy,
45
+ BuildParamaterizedConceptReference,
46
+ BuildParenthetical,
47
+ BuildRowsetItem,
48
+ DataType,
49
+ LooseBuildConceptList,
50
+ )
51
+ from trilogy.core.models.datasource import Address
52
+ from trilogy.core.utility import safe_quote
53
+ from trilogy.utility import unique
54
+
55
+ LOGGER_PREFIX = "[MODELS_EXECUTE]"
56
+
57
+ DATASOURCE_TYPES = (BuildDatasource, BuildDatasource)
58
+
59
+
60
+ class InlinedCTE(BaseModel):
61
+ original_alias: str
62
+ new_alias: str
63
+ new_base: str
64
+
65
+
66
+ class CTE(BaseModel):
67
+ name: str
68
+ source: "QueryDatasource"
69
+ output_columns: List[BuildConcept]
70
+ source_map: Dict[str, list[str]]
71
+ grain: BuildGrain
72
+ base: bool = False
73
+ group_to_grain: bool = False
74
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
75
+ parent_ctes: List[Union["CTE", "UnionCTE"]] = Field(default_factory=list)
76
+ joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
77
+ condition: Optional[
78
+ Union[BuildComparison, BuildConditional, BuildParenthetical]
79
+ ] = None
80
+ partial_concepts: List[BuildConcept] = Field(default_factory=list)
81
+ nullable_concepts: List[BuildConcept] = Field(default_factory=list)
82
+ join_derived_concepts: List[BuildConcept] = Field(default_factory=list)
83
+ hidden_concepts: set[str] = Field(default_factory=set)
84
+ order_by: Optional[BuildOrderBy] = None
85
+ limit: Optional[int] = None
86
+ base_name_override: Optional[str] = None
87
+ base_alias_override: Optional[str] = None
88
+ inlined_ctes: dict[str, InlinedCTE] = Field(default_factory=dict)
89
+
90
+ @field_validator("join_derived_concepts")
91
+ def validate_join_derived_concepts(cls, v):
92
+ if len(v) > 1:
93
+ raise NotImplementedError(
94
+ "Multiple join derived concepts not yet supported."
95
+ )
96
+ return unique(v, "address")
97
+
98
+ @property
99
+ def identifier(self):
100
+ return self.name
101
+
102
+ @property
103
+ def safe_identifier(self):
104
+ return self.name
105
+
106
+ @computed_field # type: ignore
107
+ @property
108
+ def output_lcl(self) -> LooseBuildConceptList:
109
+ return LooseBuildConceptList(concepts=self.output_columns)
110
+
111
+ @field_validator("output_columns")
112
+ def validate_output_columns(cls, v):
113
+ return unique(v, "address")
114
+
115
+ @property
116
+ def comment(self) -> str:
117
+ base = f"Target: {str(self.grain)}. Group: {self.group_to_grain}"
118
+ base += f" Source: {self.source.source_type}."
119
+ if self.parent_ctes:
120
+ base += f" References: {', '.join([x.name for x in self.parent_ctes])}."
121
+ if self.joins and CONFIG.comments.joins:
122
+ base += f"\n-- Joins: {', '.join([str(x) for x in self.joins])}."
123
+ if self.partial_concepts and CONFIG.comments.partial:
124
+ base += (
125
+ f"\n-- Partials: {', '.join([str(x) for x in self.partial_concepts])}."
126
+ )
127
+ if CONFIG.comments.source_map:
128
+ base += f"\n-- Source Map: {self.source_map}."
129
+ base += f"\n-- Output: {', '.join([str(x) for x in self.output_columns])}."
130
+ if self.source.input_concepts:
131
+ base += f"\n-- Inputs: {', '.join([str(x) for x in self.source.input_concepts])}."
132
+ if self.hidden_concepts:
133
+ base += f"\n-- Hidden: {', '.join([str(x) for x in self.hidden_concepts])}."
134
+ if self.nullable_concepts and CONFIG.comments.nullable:
135
+ base += (
136
+ f"\n-- Nullable: {', '.join([str(x) for x in self.nullable_concepts])}."
137
+ )
138
+ base += "\n"
139
+ return base
140
+
141
+ def inline_parent_datasource(
142
+ self, parent: "CTE", force_group: bool = False
143
+ ) -> bool:
144
+ qds_being_inlined = parent.source
145
+ ds_being_inlined = qds_being_inlined.datasources[0]
146
+ if not isinstance(ds_being_inlined, DATASOURCE_TYPES):
147
+ return False
148
+ if any(
149
+ [
150
+ x.safe_identifier == ds_being_inlined.safe_identifier
151
+ for x in self.source.datasources
152
+ ]
153
+ ):
154
+ return False
155
+ self.source.datasources = [
156
+ ds_being_inlined,
157
+ *[
158
+ x
159
+ for x in self.source.datasources
160
+ if x.safe_identifier != qds_being_inlined.safe_identifier
161
+ ],
162
+ ]
163
+ # need to identify this before updating joins
164
+ if self.base_name == parent.name:
165
+ self.base_name_override = ds_being_inlined.safe_location
166
+ self.base_alias_override = ds_being_inlined.safe_identifier
167
+
168
+ # if we have a join to the parent, we need to remove it
169
+ for join in self.joins:
170
+ if isinstance(join, InstantiatedUnnestJoin):
171
+ continue
172
+ if (
173
+ join.left_cte
174
+ and join.left_cte.safe_identifier == parent.safe_identifier
175
+ ):
176
+ join.inline_cte(parent)
177
+ if join.joinkey_pairs:
178
+ for pair in join.joinkey_pairs:
179
+ if pair.cte and pair.cte.safe_identifier == parent.safe_identifier:
180
+ join.inline_cte(parent)
181
+ if join.right_cte.safe_identifier == parent.safe_identifier:
182
+ join.inline_cte(parent)
183
+ for k, v in self.source_map.items():
184
+ if isinstance(v, list):
185
+ self.source_map[k] = [
186
+ (
187
+ ds_being_inlined.safe_identifier
188
+ if x == parent.safe_identifier
189
+ else x
190
+ )
191
+ for x in v
192
+ ]
193
+ elif v == parent.safe_identifier:
194
+ self.source_map[k] = [ds_being_inlined.safe_identifier]
195
+ for k, v in self.existence_source_map.items():
196
+ if isinstance(v, list):
197
+ self.existence_source_map[k] = [
198
+ (
199
+ ds_being_inlined.safe_identifier
200
+ if x == parent.safe_identifier
201
+ else x
202
+ )
203
+ for x in v
204
+ ]
205
+ elif v == parent.safe_identifier:
206
+ self.existence_source_map[k] = [ds_being_inlined.safe_identifier]
207
+ # zip in any required values for lookups
208
+ for k in ds_being_inlined.output_lcl.addresses:
209
+ if k in self.source_map and self.source_map[k]:
210
+ continue
211
+ self.source_map[k] = [ds_being_inlined.safe_identifier]
212
+ self.parent_ctes = [
213
+ x for x in self.parent_ctes if x.safe_identifier != parent.safe_identifier
214
+ ]
215
+ if force_group:
216
+ self.group_to_grain = True
217
+ self.inlined_ctes[ds_being_inlined.safe_identifier] = InlinedCTE(
218
+ original_alias=parent.name,
219
+ new_alias=ds_being_inlined.safe_identifier,
220
+ new_base=ds_being_inlined.safe_location,
221
+ )
222
+ return True
223
+
224
+ def __add__(self, other: "CTE" | "UnionCTE"):
225
+ if isinstance(other, UnionCTE):
226
+ raise ValueError("cannot merge CTE and union CTE")
227
+ logger.info('Merging two copies of CTE "%s"', self.name)
228
+ if not self.grain == other.grain:
229
+ error = (
230
+ "Attempting to merge two ctes of different grains"
231
+ f" {self.name} {other.name} grains {self.grain} {other.grain}| {self.group_to_grain} {other.group_to_grain}| {self.output_lcl} {other.output_lcl}"
232
+ )
233
+ raise ValueError(error)
234
+ if not self.condition == other.condition:
235
+ error = (
236
+ "Attempting to merge two ctes with different conditions"
237
+ f" {self.name} {other.name} conditions {self.condition} {other.condition}"
238
+ )
239
+ raise ValueError(error)
240
+ mutually_hidden = set()
241
+ for concept in self.hidden_concepts:
242
+ if concept in other.hidden_concepts:
243
+ mutually_hidden.add(concept)
244
+ self.partial_concepts = unique(
245
+ self.partial_concepts + other.partial_concepts, "address"
246
+ )
247
+ self.parent_ctes = merge_ctes(self.parent_ctes + other.parent_ctes)
248
+
249
+ self.source_map = {**self.source_map, **other.source_map}
250
+
251
+ self.output_columns = unique(
252
+ self.output_columns + other.output_columns, "address"
253
+ )
254
+ self.joins = unique(self.joins + other.joins, "unique_id")
255
+ self.partial_concepts = unique(
256
+ self.partial_concepts + other.partial_concepts, "address"
257
+ )
258
+ self.join_derived_concepts = unique(
259
+ self.join_derived_concepts + other.join_derived_concepts, "address"
260
+ )
261
+
262
+ self.source.source_map = {**self.source.source_map, **other.source.source_map}
263
+ self.source.output_concepts = unique(
264
+ self.source.output_concepts + other.source.output_concepts, "address"
265
+ )
266
+ self.nullable_concepts = unique(
267
+ self.nullable_concepts + other.nullable_concepts, "address"
268
+ )
269
+ self.hidden_concepts = mutually_hidden
270
+ self.existence_source_map = {
271
+ **self.existence_source_map,
272
+ **other.existence_source_map,
273
+ }
274
+ self.inlined_ctes = {
275
+ **self.inlined_ctes,
276
+ **other.inlined_ctes,
277
+ }
278
+
279
+ return self
280
+
281
+ @property
282
+ def relevant_base_ctes(self):
283
+ return self.parent_ctes
284
+
285
+ @property
286
+ def is_root_datasource(self) -> bool:
287
+ return (
288
+ len(self.source.datasources) == 1
289
+ and isinstance(self.source.datasources[0], DATASOURCE_TYPES)
290
+ and not self.source.datasources[0].name == CONSTANT_DATASET
291
+ )
292
+
293
+ @property
294
+ def base_name(self) -> str:
295
+ if self.base_name_override:
296
+ return self.base_name_override
297
+ # if this cte selects from a single datasource, select right from it
298
+ if self.is_root_datasource:
299
+ return self.source.datasources[0].safe_location
300
+
301
+ # if we have multiple joined CTEs, pick the base
302
+ # as the root
303
+ elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
304
+ return self.parent_ctes[0].name
305
+ elif self.relevant_base_ctes:
306
+ return self.relevant_base_ctes[0].name
307
+ return self.source.name
308
+
309
+ @property
310
+ def quote_address(self) -> bool:
311
+ if self.is_root_datasource:
312
+ root = self.source.datasources[0]
313
+ if isinstance(root, BuildDatasource) and isinstance(root.address, Address):
314
+ return not root.address.is_query
315
+ return True
316
+ elif not self.source.datasources:
317
+ return False
318
+ base = self.source.datasources[0]
319
+ if isinstance(base, BuildDatasource):
320
+ if isinstance(base.address, Address):
321
+ return not base.address.is_query
322
+ return True
323
+ return True
324
+
325
+ @property
326
+ def base_alias(self) -> str:
327
+ if self.base_alias_override:
328
+ return self.base_alias_override
329
+ if self.is_root_datasource:
330
+ return self.source.datasources[0].identifier
331
+ elif self.relevant_base_ctes:
332
+ return self.relevant_base_ctes[0].name
333
+ elif self.parent_ctes:
334
+ return self.parent_ctes[0].name
335
+ return self.name
336
+
337
+ def get_concept(self, address: str) -> BuildConcept | None:
338
+ for cte in self.parent_ctes:
339
+ if address in cte.output_columns:
340
+ match = [x for x in cte.output_columns if x.address == address].pop()
341
+ if match:
342
+ return match
343
+
344
+ for array in [self.source.input_concepts, self.source.output_concepts]:
345
+ match_list = [x for x in array if x.address == address]
346
+ if match_list:
347
+ return match_list.pop()
348
+ match_list = [x for x in self.output_columns if x.address == address]
349
+ if match_list:
350
+ return match_list.pop()
351
+ return None
352
+
353
+ def get_alias(self, concept: BuildConcept, source: str | None = None) -> str:
354
+ for cte in self.parent_ctes:
355
+ if concept.address in cte.output_columns:
356
+ if source and source != cte.name:
357
+ continue
358
+ return concept.safe_address
359
+
360
+ try:
361
+ source = self.source.get_alias(concept, source=source)
362
+
363
+ if not source:
364
+ raise ValueError("No source found")
365
+ return source
366
+ except ValueError as e:
367
+ return f"INVALID_ALIAS: {str(e)}"
368
+
369
+ @property
370
+ def group_concepts(self) -> List[BuildConcept]:
371
+ def check_is_not_in_group(c: BuildConcept):
372
+ if len(self.source_map.get(c.address, [])) > 0:
373
+ return False
374
+ if c.derivation == Derivation.ROWSET:
375
+ assert isinstance(c.lineage, BuildRowsetItem)
376
+ return check_is_not_in_group(c.lineage.content)
377
+ if c.derivation == Derivation.CONSTANT:
378
+ return True
379
+ if (
380
+ c.purpose == Purpose.CONSTANT
381
+ and isinstance(c.lineage, BuildFunction)
382
+ and c.lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
383
+ ):
384
+ return True
385
+
386
+ if c.derivation == Derivation.BASIC and c.lineage:
387
+ if all(
388
+ [
389
+ check_is_not_in_group(x)
390
+ for x in c.lineage.rendered_concept_arguments
391
+ ]
392
+ ):
393
+ return True
394
+ if (
395
+ isinstance(c.lineage, BuildFunction)
396
+ and c.lineage.operator == FunctionType.GROUP
397
+ ):
398
+ return check_is_not_in_group(c.lineage.concept_arguments[0])
399
+ return False
400
+ if c.purpose == Purpose.METRIC:
401
+ return True
402
+
403
+ return False
404
+
405
+ return (
406
+ unique(
407
+ [c for c in self.output_columns if not check_is_not_in_group(c)],
408
+ "address",
409
+ )
410
+ if self.group_to_grain
411
+ else []
412
+ )
413
+
414
+ @property
415
+ def render_from_clause(self) -> bool:
416
+ if (
417
+ all([c.derivation == Derivation.CONSTANT for c in self.output_columns])
418
+ and not self.parent_ctes
419
+ and not self.group_to_grain
420
+ ):
421
+ return False
422
+ # if we don't need to source any concepts from anywhere
423
+ # render without from
424
+ # most likely to happen from inlining constants
425
+ if not any([v for v in self.source_map.values()]):
426
+ return False
427
+ if (
428
+ len(self.source.datasources) == 1
429
+ and self.source.datasources[0].name == CONSTANT_DATASET
430
+ ):
431
+ return False
432
+ return True
433
+
434
+ @property
435
+ def sourced_concepts(self) -> List[BuildConcept]:
436
+ return [c for c in self.output_columns if c.address in self.source_map]
437
+
438
+
439
+ class ConceptPair(BaseModel):
440
+ left: BuildConcept
441
+ right: BuildConcept
442
+ existing_datasource: Union[BuildDatasource, "QueryDatasource"]
443
+ modifiers: List[Modifier] = Field(default_factory=list)
444
+
445
+ @property
446
+ def is_partial(self):
447
+ return Modifier.PARTIAL in self.modifiers
448
+
449
+ @property
450
+ def is_nullable(self):
451
+ return Modifier.NULLABLE in self.modifiers
452
+
453
+
454
+ class CTEConceptPair(ConceptPair):
455
+ cte: CTE | UnionCTE
456
+
457
+
458
+ class InstantiatedUnnestJoin(BaseModel):
459
+ object_to_unnest: BuildConcept | BuildParamaterizedConceptReference | BuildFunction
460
+ alias: str = "unnest"
461
+
462
+
463
+ class UnnestJoin(BaseModel):
464
+ concepts: list[BuildConcept]
465
+ parent: BuildFunction
466
+ alias: str = "unnest"
467
+ rendering_required: bool = True
468
+
469
+ def __hash__(self):
470
+ return self.safe_identifier.__hash__()
471
+
472
+ @property
473
+ def safe_identifier(self) -> str:
474
+ return self.alias + "".join([str(s.address) for s in self.concepts])
475
+
476
+
477
+ def raise_helpful_join_validation_error(
478
+ concepts: List[BuildConcept],
479
+ left_datasource: BuildDatasource | QueryDatasource | None,
480
+ right_datasource: BuildDatasource | QueryDatasource | None,
481
+ ):
482
+
483
+ if not left_datasource or not right_datasource:
484
+ raise InvalidSyntaxException(
485
+ "No mutual keys found, and not two valid datasources"
486
+ )
487
+ left_keys = [c.address for c in left_datasource.output_concepts]
488
+ right_keys = [c.address for c in right_datasource.output_concepts]
489
+ match_concepts = [c.address for c in concepts]
490
+ assert left_datasource
491
+ assert right_datasource
492
+ raise InvalidSyntaxException(
493
+ "No mutual join keys found between"
494
+ f" {left_datasource.identifier} and"
495
+ f" {right_datasource.identifier}, left_keys {left_keys},"
496
+ f" right_keys {right_keys},"
497
+ f" provided join concepts {match_concepts}"
498
+ )
499
+
500
+
501
+ class BaseJoin(BaseModel):
502
+ right_datasource: Union[BuildDatasource, "QueryDatasource"]
503
+ join_type: JoinType
504
+ concepts: Optional[List[BuildConcept]] = None
505
+ left_datasource: Optional[Union[BuildDatasource, "QueryDatasource"]] = None
506
+ concept_pairs: list[ConceptPair] | None = None
507
+ modifiers: List[Modifier] = Field(default_factory=list)
508
+
509
+ @model_validator(mode="after")
510
+ def validate_join(self) -> "BaseJoin":
511
+ if (
512
+ self.left_datasource
513
+ and self.left_datasource.identifier == self.right_datasource.identifier
514
+ ):
515
+ raise SyntaxError(
516
+ f"Cannot join a dataself to itself, joining {self.left_datasource} and"
517
+ f" {self.right_datasource}"
518
+ )
519
+ # Early returns maintained as in original code
520
+ if self.concept_pairs or self.concepts == []:
521
+ return self
522
+
523
+ # reduce concept list to just the mutual keys
524
+ final_concepts = []
525
+ for concept in self.concepts or []:
526
+ include = True
527
+ for ds in [self.left_datasource, self.right_datasource]:
528
+ synonyms = []
529
+ if not ds:
530
+ continue
531
+ for c in ds.output_concepts:
532
+ synonyms += list(c.pseudonyms)
533
+ if (
534
+ concept.address not in ds.output_concepts
535
+ and concept.address not in synonyms
536
+ ):
537
+ raise InvalidSyntaxException(
538
+ f"Invalid join, missing {concept} on {ds.name}, have"
539
+ f" {[c.address for c in ds.output_concepts]}"
540
+ )
541
+ if include:
542
+ final_concepts.append(concept)
543
+
544
+ if not final_concepts and self.concepts:
545
+ raise_helpful_join_validation_error(
546
+ self.concepts,
547
+ self.left_datasource,
548
+ self.right_datasource,
549
+ )
550
+
551
+ self.concepts = final_concepts
552
+ return self
553
+
554
+ @property
555
+ def unique_id(self) -> str:
556
+ return str(self)
557
+
558
+ @property
559
+ def input_concepts(self) -> List[BuildConcept]:
560
+ base = []
561
+ if self.concept_pairs:
562
+ for pair in self.concept_pairs:
563
+ base += [pair.left, pair.right]
564
+ elif self.concepts:
565
+ base += self.concepts
566
+ return base
567
+
568
+ def __str__(self):
569
+ if self.concept_pairs:
570
+ return (
571
+ f"{self.join_type.value} {self.right_datasource.name} on"
572
+ f" {','.join([str(k.existing_datasource.name) + '.'+ str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
573
+ )
574
+ return (
575
+ f"{self.join_type.value} {self.right_datasource.name} on"
576
+ f" {','.join([str(k) for k in self.concepts])}"
577
+ )
578
+
579
+
580
+ class QueryDatasource(BaseModel):
581
+ input_concepts: List[BuildConcept]
582
+ output_concepts: List[BuildConcept]
583
+ datasources: List[Union[BuildDatasource, "QueryDatasource"]]
584
+ source_map: Dict[str, Set[Union[BuildDatasource, "QueryDatasource", "UnnestJoin"]]]
585
+
586
+ grain: BuildGrain
587
+ joins: List[BaseJoin | UnnestJoin]
588
+ limit: Optional[int] = None
589
+ condition: Optional[
590
+ Union[BuildConditional, BuildComparison, BuildParenthetical]
591
+ ] = Field(default=None)
592
+ source_type: SourceType = SourceType.SELECT
593
+ partial_concepts: List[BuildConcept] = Field(default_factory=list)
594
+ hidden_concepts: set[str] = Field(default_factory=set)
595
+ nullable_concepts: List[BuildConcept] = Field(default_factory=list)
596
+ join_derived_concepts: List[BuildConcept] = Field(default_factory=list)
597
+ force_group: bool | None = None
598
+ existence_source_map: Dict[str, Set[Union[BuildDatasource, "QueryDatasource"]]] = (
599
+ Field(default_factory=dict)
600
+ )
601
+ ordering: BuildOrderBy | None = None
602
+
603
+ def __repr__(self):
604
+ return f"{self.identifier}@<{self.grain}>"
605
+
606
+ @property
607
+ def safe_identifier(self):
608
+ return self.identifier.replace(".", "_")
609
+
610
+ @property
611
+ def full_concepts(self) -> List[BuildConcept]:
612
+ return [
613
+ c
614
+ for c in self.output_concepts
615
+ if c.address not in [z.address for z in self.partial_concepts]
616
+ ]
617
+
618
+ @field_validator("joins")
619
+ @classmethod
620
+ def validate_joins(cls, v):
621
+ unique_pairs = set()
622
+ for join in v:
623
+ if not isinstance(join, BaseJoin):
624
+ continue
625
+ pairing = str(join)
626
+ if pairing in unique_pairs:
627
+ raise SyntaxError(f"Duplicate join {str(join)}")
628
+ unique_pairs.add(pairing)
629
+ return v
630
+
631
+ @field_validator("input_concepts")
632
+ @classmethod
633
+ def validate_inputs(cls, v):
634
+ return unique(v, "address")
635
+
636
+ @field_validator("output_concepts")
637
+ @classmethod
638
+ def validate_outputs(cls, v):
639
+ return unique(v, "address")
640
+
641
+ @field_validator("source_map")
642
+ @classmethod
643
+ def validate_source_map(cls, v: dict, info: ValidationInfo):
644
+ values = info.data
645
+ hidden_concepts = values.get("hidden_concepts", set())
646
+ for key in ("input_concepts", "output_concepts"):
647
+ if not values.get(key):
648
+ continue
649
+ concept: BuildConcept
650
+ for concept in values[key]:
651
+ if concept.address in hidden_concepts:
652
+ continue
653
+ if (
654
+ concept.address not in v
655
+ and not any(x in v for x in concept.pseudonyms)
656
+ and CONFIG.validate_missing
657
+ ):
658
+ raise SyntaxError(
659
+ f"Missing source map entry for {concept.address} on {key} with pseudonyms {concept.pseudonyms}, have map: {v}"
660
+ )
661
+ return v
662
+
663
+ def __str__(self):
664
+ return self.__repr__()
665
+
666
+ def __hash__(self):
667
+ return (self.identifier).__hash__()
668
+
669
+ @property
670
+ def concepts(self):
671
+ return self.output_concepts
672
+
673
+ @property
674
+ def name(self):
675
+ return self.identifier
676
+
677
+ @property
678
+ def group_required(self) -> bool:
679
+ if self.force_group is True:
680
+ return True
681
+ if self.force_group is False:
682
+ return False
683
+ if self.source_type:
684
+ if self.source_type in [
685
+ SourceType.FILTER,
686
+ ]:
687
+ return False
688
+ elif self.source_type in [
689
+ SourceType.GROUP,
690
+ ]:
691
+ return True
692
+ return False
693
+
694
+ def __add__(self, other) -> "QueryDatasource":
695
+ # these are syntax errors to avoid being caught by current
696
+ if not isinstance(other, QueryDatasource):
697
+ raise SyntaxError("Can only merge two query datasources")
698
+ if not other.grain == self.grain:
699
+ raise SyntaxError(
700
+ "Can only merge two query datasources with identical grain"
701
+ )
702
+ if not self.group_required == other.group_required:
703
+ raise SyntaxError(
704
+ "can only merge two datasources if the group required flag is the same"
705
+ )
706
+ if not self.join_derived_concepts == other.join_derived_concepts:
707
+ raise SyntaxError(
708
+ "can only merge two datasources if the join derived concepts are the same"
709
+ )
710
+ if not self.force_group == other.force_group:
711
+ raise SyntaxError(
712
+ "can only merge two datasources if the force_group flag is the same"
713
+ )
714
+ logger.debug(
715
+ f"[Query Datasource] merging {self.name} with"
716
+ f" {[c.address for c in self.output_concepts]} concepts and"
717
+ f" {other.name} with {[c.address for c in other.output_concepts]} concepts"
718
+ )
719
+
720
+ merged_datasources: dict[str, Union[BuildDatasource, "QueryDatasource"]] = {}
721
+
722
+ for ds in [*self.datasources, *other.datasources]:
723
+ if ds.safe_identifier in merged_datasources:
724
+ merged_datasources[ds.safe_identifier] = (
725
+ merged_datasources[ds.safe_identifier] + ds
726
+ )
727
+ else:
728
+ merged_datasources[ds.safe_identifier] = ds
729
+
730
+ final_source_map: defaultdict[
731
+ str, Set[Union[BuildDatasource, QueryDatasource, UnnestJoin]]
732
+ ] = defaultdict(set)
733
+
734
+ # add our sources
735
+ for key in self.source_map:
736
+ final_source_map[key] = self.source_map[key].union(
737
+ other.source_map.get(key, set())
738
+ )
739
+ # add their sources
740
+ for key in other.source_map:
741
+ if key not in final_source_map:
742
+ final_source_map[key] = other.source_map[key]
743
+
744
+ # if a ds was merged (to combine columns), we need to update the source map
745
+ # to use the merged item
746
+ for k, v in final_source_map.items():
747
+ final_source_map[k] = set(
748
+ merged_datasources.get(x.safe_identifier, x) for x in list(v)
749
+ )
750
+ self_hidden: set[str] = self.hidden_concepts or set()
751
+ other_hidden: set[str] = other.hidden_concepts or set()
752
+ # hidden is the minimum overlapping set
753
+ hidden = self_hidden.intersection(other_hidden)
754
+ qds = QueryDatasource(
755
+ input_concepts=unique(
756
+ self.input_concepts + other.input_concepts, "address"
757
+ ),
758
+ output_concepts=unique(
759
+ self.output_concepts + other.output_concepts, "address"
760
+ ),
761
+ source_map=final_source_map,
762
+ datasources=list(merged_datasources.values()),
763
+ grain=self.grain,
764
+ joins=unique(self.joins + other.joins, "unique_id"),
765
+ condition=(
766
+ self.condition + other.condition
767
+ if self.condition and other.condition
768
+ else self.condition or other.condition
769
+ ),
770
+ source_type=self.source_type,
771
+ partial_concepts=unique(
772
+ self.partial_concepts + other.partial_concepts, "address"
773
+ ),
774
+ join_derived_concepts=self.join_derived_concepts,
775
+ force_group=self.force_group,
776
+ hidden_concepts=hidden,
777
+ ordering=self.ordering,
778
+ )
779
+ logger.debug(
780
+ f"[Query Datasource] merged with {[c.address for c in qds.output_concepts]} concepts"
781
+ )
782
+ logger.debug(qds.source_map)
783
+ return qds
784
+
785
+ @property
786
+ def identifier(self) -> str:
787
+ filters = abs(hash(str(self.condition))) if self.condition else ""
788
+ grain = "_".join([str(c).replace(".", "_") for c in self.grain.components])
789
+ group = ""
790
+ if self.group_required:
791
+ keys = [
792
+ x.address for x in self.output_concepts if x.purpose != Purpose.METRIC
793
+ ]
794
+ group = "_grouped_by_" + "_".join(keys)
795
+ return (
796
+ "_join_".join([d.identifier for d in self.datasources])
797
+ + group
798
+ + (f"_at_{grain}" if grain else "_at_abstract")
799
+ + (f"_filtered_by_{filters}" if filters else "")
800
+ )
801
+
802
+ def get_alias(
803
+ self,
804
+ concept: BuildConcept,
805
+ use_raw_name: bool = False,
806
+ force_alias: bool = False,
807
+ source: str | None = None,
808
+ ):
809
+ for x in self.datasources:
810
+ # query datasources should be referenced by their alias, always
811
+ force_alias = isinstance(x, QueryDatasource)
812
+ #
813
+ use_raw_name = isinstance(x, DATASOURCE_TYPES) and not force_alias
814
+ if source and x.safe_identifier != source:
815
+ continue
816
+ try:
817
+ return x.get_alias(
818
+ concept.with_grain(self.grain),
819
+ use_raw_name,
820
+ force_alias=force_alias,
821
+ )
822
+ except ValueError:
823
+ continue
824
+ existing = [c.with_grain(self.grain) for c in self.output_concepts]
825
+ if concept in existing:
826
+ return concept.name
827
+
828
+ existing_str = [str(c) for c in existing]
829
+ datasources = [ds.identifier for ds in self.datasources]
830
+ raise ValueError(
831
+ f"{LOGGER_PREFIX} Concept {str(concept)} not found on {self.identifier};"
832
+ f" have {existing_str} from {datasources}."
833
+ )
834
+
835
+ @property
836
+ def safe_location(self):
837
+ return self.identifier
838
+
839
+
840
+ class AliasedExpression(BaseModel):
841
+ expr: BuildExpr
842
+ alias: str
843
+
844
+
845
+ class RecursiveCTE(CTE):
846
+
847
+ def generate_loop_functions(
848
+ self,
849
+ recursive_derived: BuildConcept,
850
+ left_recurse_concept: BuildConcept,
851
+ right_recurse_concept: BuildConcept,
852
+ ) -> tuple[BuildConcept, BuildConcept, BuildConcept]:
853
+
854
+ join_gate = BuildConcept(
855
+ name=RECURSIVE_GATING_CONCEPT,
856
+ canonical_name=RECURSIVE_GATING_CONCEPT,
857
+ namespace=DEFAULT_NAMESPACE,
858
+ grain=recursive_derived.grain,
859
+ build_is_aggregate=False,
860
+ datatype=DataType.BOOL,
861
+ purpose=Purpose.KEY,
862
+ derivation=Derivation.BASIC,
863
+ lineage=BuildFunction(
864
+ operator=FunctionType.CASE,
865
+ arguments=[
866
+ BuildCaseWhen(
867
+ comparison=BuildComparison(
868
+ left=right_recurse_concept,
869
+ right=MagicConstants.NULL,
870
+ operator=ComparisonOperator.IS,
871
+ ),
872
+ expr=True,
873
+ ),
874
+ BuildCaseElse(expr=False),
875
+ ],
876
+ output_data_type=DataType.BOOL,
877
+ output_purpose=Purpose.KEY,
878
+ ),
879
+ )
880
+ bottom_join_gate = BuildConcept(
881
+ name=f"{RECURSIVE_GATING_CONCEPT}_two",
882
+ canonical_name=f"{RECURSIVE_GATING_CONCEPT}_two",
883
+ namespace=DEFAULT_NAMESPACE,
884
+ grain=recursive_derived.grain,
885
+ build_is_aggregate=False,
886
+ datatype=DataType.BOOL,
887
+ purpose=Purpose.KEY,
888
+ derivation=Derivation.BASIC,
889
+ lineage=BuildFunction(
890
+ operator=FunctionType.CASE,
891
+ arguments=[
892
+ BuildCaseWhen(
893
+ comparison=BuildComparison(
894
+ left=right_recurse_concept,
895
+ right=MagicConstants.NULL,
896
+ operator=ComparisonOperator.IS,
897
+ ),
898
+ expr=True,
899
+ ),
900
+ BuildCaseElse(expr=False),
901
+ ],
902
+ output_data_type=DataType.BOOL,
903
+ output_purpose=Purpose.KEY,
904
+ ),
905
+ )
906
+ bottom_derivation = BuildConcept(
907
+ name=recursive_derived.name + "_bottom",
908
+ canonical_name=recursive_derived.canonical_name + "_bottom",
909
+ namespace=recursive_derived.namespace,
910
+ grain=recursive_derived.grain,
911
+ build_is_aggregate=False,
912
+ datatype=recursive_derived.datatype,
913
+ purpose=recursive_derived.purpose,
914
+ derivation=Derivation.RECURSIVE,
915
+ lineage=BuildFunction(
916
+ operator=FunctionType.CASE,
917
+ arguments=[
918
+ BuildCaseWhen(
919
+ comparison=BuildComparison(
920
+ left=right_recurse_concept,
921
+ right=MagicConstants.NULL,
922
+ operator=ComparisonOperator.IS,
923
+ ),
924
+ expr=recursive_derived,
925
+ ),
926
+ BuildCaseElse(expr=right_recurse_concept),
927
+ ],
928
+ output_data_type=recursive_derived.datatype,
929
+ output_purpose=recursive_derived.purpose,
930
+ ),
931
+ )
932
+ return bottom_derivation, join_gate, bottom_join_gate
933
+
934
+ @property
935
+ def internal_ctes(self) -> List[CTE]:
936
+ filtered_output = [
937
+ x for x in self.output_columns if x.name != RECURSIVE_GATING_CONCEPT
938
+ ]
939
+ recursive_derived = [
940
+ x for x in self.output_columns if x.derivation == Derivation.RECURSIVE
941
+ ][0]
942
+ if not isinstance(recursive_derived.lineage, BuildFunction):
943
+ raise SyntaxError(
944
+ "Recursive CTEs must have a function lineage, found"
945
+ f" {recursive_derived.lineage}"
946
+ )
947
+ left_recurse_concept = recursive_derived.lineage.concept_arguments[0]
948
+ right_recurse_concept = recursive_derived.lineage.concept_arguments[1]
949
+ parent_ctes: List[CTE | UnionCTE]
950
+ if self.parent_ctes:
951
+ base = self.parent_ctes[0]
952
+ loop_input_cte = base
953
+ parent_ctes = [base]
954
+ parent_identifier = base.identifier
955
+ else:
956
+ raise SyntaxError("Recursive CTEs must have a parent CTE currently")
957
+ bottom_derivation, join_gate, bottom_join_gate = self.generate_loop_functions(
958
+ recursive_derived, left_recurse_concept, right_recurse_concept
959
+ )
960
+ base_output = [*filtered_output, join_gate]
961
+ bottom_output = []
962
+ for x in filtered_output:
963
+ if x.address == recursive_derived.address:
964
+ bottom_output.append(bottom_derivation)
965
+ else:
966
+ bottom_output.append(x)
967
+
968
+ bottom_output = [*bottom_output, bottom_join_gate]
969
+ top = CTE(
970
+ name=self.name,
971
+ source=self.source,
972
+ output_columns=base_output,
973
+ source_map=self.source_map,
974
+ grain=self.grain,
975
+ existence_source_map=self.existence_source_map,
976
+ parent_ctes=self.parent_ctes,
977
+ joins=self.joins,
978
+ condition=self.condition,
979
+ partial_concepts=self.partial_concepts,
980
+ hidden_concepts=self.hidden_concepts,
981
+ nullable_concepts=self.nullable_concepts,
982
+ join_derived_concepts=self.join_derived_concepts,
983
+ group_to_grain=self.group_to_grain,
984
+ order_by=self.order_by,
985
+ limit=self.limit,
986
+ )
987
+ top_cte_array: list[CTE | UnionCTE] = [top]
988
+ bottom_source_map = {
989
+ left_recurse_concept.address: [top.identifier],
990
+ right_recurse_concept.address: [parent_identifier],
991
+ # recursive_derived.address: self.source_map[recursive_derived.address],
992
+ join_gate.address: [top.identifier],
993
+ recursive_derived.address: [top.identifier],
994
+ }
995
+ bottom = CTE(
996
+ name=self.name,
997
+ source=self.source,
998
+ output_columns=bottom_output,
999
+ source_map=bottom_source_map,
1000
+ grain=self.grain,
1001
+ existence_source_map=self.existence_source_map,
1002
+ parent_ctes=top_cte_array + parent_ctes,
1003
+ joins=[
1004
+ Join(
1005
+ right_cte=loop_input_cte,
1006
+ jointype=JoinType.INNER,
1007
+ joinkey_pairs=[
1008
+ CTEConceptPair(
1009
+ left=recursive_derived,
1010
+ right=left_recurse_concept,
1011
+ existing_datasource=loop_input_cte.source,
1012
+ modifiers=[],
1013
+ cte=top,
1014
+ )
1015
+ ],
1016
+ condition=BuildComparison(
1017
+ left=join_gate, right=True, operator=ComparisonOperator.IS_NOT
1018
+ ),
1019
+ )
1020
+ ],
1021
+ partial_concepts=self.partial_concepts,
1022
+ hidden_concepts=self.hidden_concepts,
1023
+ nullable_concepts=self.nullable_concepts,
1024
+ join_derived_concepts=self.join_derived_concepts,
1025
+ group_to_grain=self.group_to_grain,
1026
+ order_by=self.order_by,
1027
+ limit=self.limit,
1028
+ )
1029
+ return [top, bottom]
1030
+
1031
+
1032
+ class UnionCTE(BaseModel):
1033
+ name: str
1034
+ source: QueryDatasource
1035
+ parent_ctes: list[CTE | UnionCTE]
1036
+ internal_ctes: list[CTE | UnionCTE]
1037
+ output_columns: List[BuildConcept]
1038
+ grain: BuildGrain
1039
+ operator: str = "UNION ALL"
1040
+ order_by: Optional[BuildOrderBy] = None
1041
+ limit: Optional[int] = None
1042
+ hidden_concepts: set[str] = Field(default_factory=set)
1043
+ partial_concepts: list[BuildConcept] = Field(default_factory=list)
1044
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
1045
+ inlined_ctes: Dict[str, InlinedCTE] = Field(default_factory=dict)
1046
+
1047
+ @computed_field # type: ignore
1048
+ @property
1049
+ def output_lcl(self) -> LooseBuildConceptList:
1050
+ return LooseBuildConceptList(concepts=self.output_columns)
1051
+
1052
+ def get_alias(self, concept: BuildConcept, source: str | None = None) -> str:
1053
+ for cte in self.parent_ctes:
1054
+ if concept.address in cte.output_columns:
1055
+ if source and source != cte.name:
1056
+ continue
1057
+ return concept.safe_address
1058
+ return "INVALID_ALIAS"
1059
+
1060
+ def get_concept(self, address: str) -> BuildConcept | None:
1061
+ for cte in self.internal_ctes:
1062
+ if address in cte.output_columns:
1063
+ match = [x for x in cte.output_columns if x.address == address].pop()
1064
+ return match
1065
+
1066
+ match_list = [x for x in self.output_columns if x.address == address]
1067
+ if match_list:
1068
+ return match_list.pop()
1069
+ return None
1070
+
1071
+ @property
1072
+ def source_map(self):
1073
+ return {x.address: [] for x in self.output_columns}
1074
+
1075
+ @property
1076
+ def condition(self):
1077
+ return None
1078
+
1079
+ @condition.setter
1080
+ def condition(self, value):
1081
+ raise NotImplementedError
1082
+
1083
+ @property
1084
+ def identifier(self) -> str:
1085
+ return self.name
1086
+
1087
+ @property
1088
+ def safe_identifier(self):
1089
+ return self.name
1090
+
1091
+ @property
1092
+ def group_to_grain(self) -> bool:
1093
+ return False
1094
+
1095
+ @property
1096
+ def group_concepts(self) -> List[BuildConcept]:
1097
+ # unions should always be on unique sets
1098
+ return []
1099
+
1100
+ def __add__(self, other):
1101
+ if not isinstance(other, UnionCTE) or not other.name == self.name:
1102
+ raise SyntaxError("Cannot merge union CTEs")
1103
+ return self
1104
+
1105
+
1106
+ class Join(BaseModel):
1107
+ right_cte: CTE | UnionCTE
1108
+ jointype: JoinType
1109
+ left_cte: CTE | UnionCTE | None = None
1110
+ joinkey_pairs: List[CTEConceptPair] | None = None
1111
+ inlined_ctes: set[str] = Field(default_factory=set)
1112
+ quote: str | None = None
1113
+ condition: BuildConditional | BuildComparison | BuildParenthetical | None = None
1114
+ modifiers: List[Modifier] = Field(default_factory=list)
1115
+
1116
+ def inline_cte(self, cte: CTE):
1117
+ self.inlined_ctes.add(cte.name)
1118
+
1119
+ def get_name(self, cte: CTE | UnionCTE) -> str:
1120
+ if cte.identifier in self.inlined_ctes:
1121
+ return cte.source.datasources[0].safe_identifier
1122
+ return cte.safe_identifier
1123
+
1124
+ @property
1125
+ def right_name(self) -> str:
1126
+ if self.right_cte.identifier in self.inlined_ctes:
1127
+ return self.right_cte.source.datasources[0].safe_identifier
1128
+ return self.right_cte.safe_identifier
1129
+
1130
+ @property
1131
+ def right_ref(self) -> str:
1132
+ if self.quote:
1133
+ if self.right_cte.identifier in self.inlined_ctes:
1134
+ return f"{safe_quote(self.right_cte.source.datasources[0].safe_location, self.quote)} as {self.quote}{self.right_cte.source.datasources[0].safe_identifier}{self.quote}"
1135
+ return f"{self.quote}{self.right_cte.safe_identifier}{self.quote}"
1136
+ if self.right_cte.identifier in self.inlined_ctes:
1137
+ return f"{self.right_cte.source.datasources[0].safe_location} as {self.right_cte.source.datasources[0].safe_identifier}"
1138
+ return self.right_cte.safe_identifier
1139
+
1140
+ @property
1141
+ def unique_id(self) -> str:
1142
+ return str(self)
1143
+
1144
+ def __str__(self):
1145
+ if self.joinkey_pairs:
1146
+ return (
1147
+ f"{self.jointype.value} join"
1148
+ f" {self.right_name} on"
1149
+ f" {','.join([k.cte.name + '.'+str(k.left.address)+'='+str(k.right.address) for k in self.joinkey_pairs])}"
1150
+ )
1151
+ elif self.left_cte:
1152
+ return (
1153
+ f"{self.jointype.value} JOIN {self.left_cte.name} and"
1154
+ f" {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
1155
+ )
1156
+ return f"{self.jointype.value} JOIN {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
1157
+
1158
+
1159
+ def merge_ctes(ctes: List[CTE | UnionCTE]) -> List[CTE | UnionCTE]:
1160
+ final_ctes_dict: Dict[str, CTE | UnionCTE] = {}
1161
+ # merge CTEs
1162
+ for cte in ctes:
1163
+ if cte.name not in final_ctes_dict:
1164
+ final_ctes_dict[cte.name] = cte
1165
+ else:
1166
+ final_ctes_dict[cte.name] = final_ctes_dict[cte.name] + cte
1167
+
1168
+ final_ctes = list(final_ctes_dict.values())
1169
+ return final_ctes
1170
+
1171
+
1172
+ class CompiledCTE(BaseModel):
1173
+ name: str
1174
+ statement: str
1175
+
1176
+
1177
+ UnionCTE.model_rebuild()