pytrilogy 0.3.149__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cp313-win_amd64.pyd +0 -0
  4. pytrilogy-0.3.149.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.149.dist-info/RECORD +207 -0
  6. pytrilogy-0.3.149.dist-info/WHEEL +4 -0
  7. pytrilogy-0.3.149.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.149.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2670 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +436 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +846 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1432 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +397 -0
  117. trilogy/dialect/enums.py +151 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/exceptions.py +26 -0
  130. trilogy/execution/state/file_state_store.py +0 -0
  131. trilogy/execution/state/sqllite_state_store.py +0 -0
  132. trilogy/execution/state/state_store.py +406 -0
  133. trilogy/executor.py +692 -0
  134. trilogy/hooks/__init__.py +4 -0
  135. trilogy/hooks/base_hook.py +40 -0
  136. trilogy/hooks/graph_hook.py +135 -0
  137. trilogy/hooks/query_debugger.py +166 -0
  138. trilogy/metadata/__init__.py +0 -0
  139. trilogy/parser.py +10 -0
  140. trilogy/parsing/README.md +21 -0
  141. trilogy/parsing/__init__.py +0 -0
  142. trilogy/parsing/common.py +1069 -0
  143. trilogy/parsing/config.py +5 -0
  144. trilogy/parsing/exceptions.py +8 -0
  145. trilogy/parsing/helpers.py +1 -0
  146. trilogy/parsing/parse_engine.py +2876 -0
  147. trilogy/parsing/render.py +775 -0
  148. trilogy/parsing/trilogy.lark +546 -0
  149. trilogy/py.typed +0 -0
  150. trilogy/render.py +45 -0
  151. trilogy/scripts/README.md +9 -0
  152. trilogy/scripts/__init__.py +0 -0
  153. trilogy/scripts/agent.py +41 -0
  154. trilogy/scripts/agent_info.py +306 -0
  155. trilogy/scripts/common.py +432 -0
  156. trilogy/scripts/dependency/Cargo.lock +617 -0
  157. trilogy/scripts/dependency/Cargo.toml +39 -0
  158. trilogy/scripts/dependency/README.md +131 -0
  159. trilogy/scripts/dependency/build.sh +25 -0
  160. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  161. trilogy/scripts/dependency/src/lib.rs +16 -0
  162. trilogy/scripts/dependency/src/main.rs +770 -0
  163. trilogy/scripts/dependency/src/parser.rs +435 -0
  164. trilogy/scripts/dependency/src/preql.pest +208 -0
  165. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  166. trilogy/scripts/dependency/src/resolver.rs +716 -0
  167. trilogy/scripts/dependency/tests/base.preql +3 -0
  168. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  169. trilogy/scripts/dependency/tests/customer.preql +6 -0
  170. trilogy/scripts/dependency/tests/main.preql +9 -0
  171. trilogy/scripts/dependency/tests/orders.preql +7 -0
  172. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  173. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  174. trilogy/scripts/dependency.py +323 -0
  175. trilogy/scripts/display.py +555 -0
  176. trilogy/scripts/environment.py +59 -0
  177. trilogy/scripts/fmt.py +32 -0
  178. trilogy/scripts/ingest.py +487 -0
  179. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  180. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  181. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  182. trilogy/scripts/ingest_helpers/typing.py +161 -0
  183. trilogy/scripts/init.py +105 -0
  184. trilogy/scripts/parallel_execution.py +762 -0
  185. trilogy/scripts/plan.py +189 -0
  186. trilogy/scripts/refresh.py +161 -0
  187. trilogy/scripts/run.py +79 -0
  188. trilogy/scripts/serve.py +202 -0
  189. trilogy/scripts/serve_helpers/__init__.py +41 -0
  190. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  191. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  192. trilogy/scripts/serve_helpers/models.py +38 -0
  193. trilogy/scripts/single_execution.py +131 -0
  194. trilogy/scripts/testing.py +143 -0
  195. trilogy/scripts/trilogy.py +75 -0
  196. trilogy/std/__init__.py +0 -0
  197. trilogy/std/color.preql +3 -0
  198. trilogy/std/date.preql +13 -0
  199. trilogy/std/display.preql +18 -0
  200. trilogy/std/geography.preql +22 -0
  201. trilogy/std/metric.preql +15 -0
  202. trilogy/std/money.preql +67 -0
  203. trilogy/std/net.preql +14 -0
  204. trilogy/std/ranking.preql +7 -0
  205. trilogy/std/report.preql +5 -0
  206. trilogy/std/semantic.preql +6 -0
  207. trilogy/utility.py +34 -0
@@ -0,0 +1,1213 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+ from typing import Dict, List, Optional, Set, Union
5
+
6
+ from pydantic import (
7
+ BaseModel,
8
+ Field,
9
+ ValidationInfo,
10
+ computed_field,
11
+ field_validator,
12
+ model_validator,
13
+ )
14
+
15
+ from trilogy.constants import (
16
+ CONFIG,
17
+ DEFAULT_NAMESPACE,
18
+ RECURSIVE_GATING_CONCEPT,
19
+ MagicConstants,
20
+ logger,
21
+ )
22
+ from trilogy.core.constants import CONSTANT_DATASET
23
+ from trilogy.core.enums import (
24
+ ComparisonOperator,
25
+ Derivation,
26
+ FunctionClass,
27
+ FunctionType,
28
+ JoinType,
29
+ Modifier,
30
+ Purpose,
31
+ SourceType,
32
+ )
33
+ from trilogy.core.exceptions import InvalidSyntaxException
34
+ from trilogy.core.models.build import (
35
+ BuildCaseElse,
36
+ BuildCaseWhen,
37
+ BuildComparison,
38
+ BuildConcept,
39
+ BuildConditional,
40
+ BuildDatasource,
41
+ BuildExpr,
42
+ BuildFunction,
43
+ BuildGrain,
44
+ BuildOrderBy,
45
+ BuildParamaterizedConceptReference,
46
+ BuildParenthetical,
47
+ BuildRowsetItem,
48
+ DataType,
49
+ LooseBuildConceptList,
50
+ )
51
+ from trilogy.core.models.datasource import Address
52
+ from trilogy.core.utility import safe_quote
53
+ from trilogy.utility import unique
54
+
55
+ LOGGER_PREFIX = "[MODELS_EXECUTE]"
56
+
57
+
58
+ class InlinedCTE(BaseModel):
59
+ original_alias: str
60
+ new_alias: str
61
+ new_base: str
62
+
63
+
64
+ class CTE(BaseModel):
65
+ name: str
66
+ source: "QueryDatasource"
67
+ output_columns: List[BuildConcept]
68
+ source_map: Dict[str, list[str]]
69
+ grain: BuildGrain
70
+ base: bool = False
71
+ group_to_grain: bool = False
72
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
73
+ parent_ctes: List[Union["CTE", "UnionCTE"]] = Field(default_factory=list)
74
+ joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
75
+ condition: Optional[
76
+ Union[BuildComparison, BuildConditional, BuildParenthetical]
77
+ ] = None
78
+ partial_concepts: List[BuildConcept] = Field(default_factory=list)
79
+ nullable_concepts: List[BuildConcept] = Field(default_factory=list)
80
+ join_derived_concepts: List[BuildConcept] = Field(default_factory=list)
81
+ hidden_concepts: set[str] = Field(default_factory=set)
82
+ order_by: Optional[BuildOrderBy] = None
83
+ limit: Optional[int] = None
84
+ base_name_override: Optional[Union["Address", str]] = None
85
+ base_alias_override: Optional[str] = None
86
+ inlined_ctes: dict[str, InlinedCTE] = Field(default_factory=dict)
87
+
88
+ @classmethod
89
+ def from_datasource(cls, datasource: BuildDatasource) -> "CTE":
90
+ qds = QueryDatasource.from_datasource(datasource)
91
+ return cls(
92
+ name=datasource.name,
93
+ source=qds,
94
+ output_columns=qds.output_concepts,
95
+ source_map={
96
+ c.address: [datasource.safe_identifier] for c in qds.output_concepts
97
+ },
98
+ grain=datasource.grain,
99
+ partial_concepts=datasource.partial_concepts,
100
+ nullable_concepts=datasource.nullable_concepts,
101
+ hidden_concepts={c.address for c in datasource.hidden_concepts},
102
+ base_alias_override=datasource.safe_identifier,
103
+ )
104
+
105
+ @field_validator("join_derived_concepts")
106
+ def validate_join_derived_concepts(cls, v):
107
+ if len(v) > 1:
108
+ raise NotImplementedError(
109
+ "Multiple join derived concepts not yet supported."
110
+ )
111
+ return unique(v, "address")
112
+
113
+ @property
114
+ def identifier(self):
115
+ return self.name
116
+
117
+ @property
118
+ def safe_identifier(self):
119
+ return self.name
120
+
121
+ @computed_field # type: ignore
122
+ @property
123
+ def output_lcl(self) -> LooseBuildConceptList:
124
+ return LooseBuildConceptList(concepts=self.output_columns)
125
+
126
+ @field_validator("output_columns")
127
+ def validate_output_columns(cls, v):
128
+ return unique(v, "address")
129
+
130
+ @property
131
+ def comment(self) -> str:
132
+ base = f"Target: {str(self.grain)}. Group: {self.group_to_grain}"
133
+ base += f" Source: {self.source.source_type}."
134
+ if self.parent_ctes:
135
+ base += f" References: {', '.join([x.name for x in self.parent_ctes])}."
136
+ if self.joins and CONFIG.comments.joins:
137
+ base += f"\n-- Joins: {', '.join([str(x) for x in self.joins])}."
138
+ if self.partial_concepts and CONFIG.comments.partial:
139
+ base += (
140
+ f"\n-- Partials: {', '.join([str(x) for x in self.partial_concepts])}."
141
+ )
142
+ if CONFIG.comments.source_map:
143
+ base += f"\n-- Source Map: {self.source_map}."
144
+ base += f"\n-- Output: {', '.join([str(x) for x in self.output_columns])}."
145
+ if self.source.input_concepts:
146
+ base += f"\n-- Inputs: {', '.join([str(x) for x in self.source.input_concepts])}."
147
+ if self.hidden_concepts:
148
+ base += f"\n-- Hidden: {', '.join([str(x) for x in self.hidden_concepts])}."
149
+ if self.nullable_concepts and CONFIG.comments.nullable:
150
+ base += (
151
+ f"\n-- Nullable: {', '.join([str(x) for x in self.nullable_concepts])}."
152
+ )
153
+ base += "\n"
154
+ return base
155
+
156
+ def inline_parent_datasource(
157
+ self, parent: "CTE", force_group: bool = False
158
+ ) -> bool:
159
+ qds_being_inlined = parent.source
160
+ ds_being_inlined = qds_being_inlined.datasources[0]
161
+ if not isinstance(ds_being_inlined, BuildDatasource):
162
+ return False
163
+ if any(
164
+ [
165
+ x.safe_identifier == ds_being_inlined.safe_identifier
166
+ for x in self.source.datasources
167
+ ]
168
+ ):
169
+ return False
170
+ self.source.datasources = [
171
+ ds_being_inlined,
172
+ *[
173
+ x
174
+ for x in self.source.datasources
175
+ if x.safe_identifier != qds_being_inlined.safe_identifier
176
+ ],
177
+ ]
178
+ # need to identify this before updating joins
179
+ if self.base_name == parent.name:
180
+ self.base_name_override = ds_being_inlined.safe_location
181
+ self.base_alias_override = ds_being_inlined.safe_identifier
182
+
183
+ # if we have a join to the parent, we need to remove it
184
+ for join in self.joins:
185
+ if isinstance(join, InstantiatedUnnestJoin):
186
+ continue
187
+ if (
188
+ join.left_cte
189
+ and join.left_cte.safe_identifier == parent.safe_identifier
190
+ ):
191
+ join.inline_cte(parent)
192
+ if join.joinkey_pairs:
193
+ for pair in join.joinkey_pairs:
194
+ if pair.cte and pair.cte.safe_identifier == parent.safe_identifier:
195
+ join.inline_cte(parent)
196
+ if join.right_cte.safe_identifier == parent.safe_identifier:
197
+ join.inline_cte(parent)
198
+ for k, v in self.source_map.items():
199
+ if isinstance(v, list):
200
+ self.source_map[k] = [
201
+ (
202
+ ds_being_inlined.safe_identifier
203
+ if x == parent.safe_identifier
204
+ else x
205
+ )
206
+ for x in v
207
+ ]
208
+ elif v == parent.safe_identifier:
209
+ self.source_map[k] = [ds_being_inlined.safe_identifier]
210
+ for k, v in self.existence_source_map.items():
211
+ if isinstance(v, list):
212
+ self.existence_source_map[k] = [
213
+ (
214
+ ds_being_inlined.safe_identifier
215
+ if x == parent.safe_identifier
216
+ else x
217
+ )
218
+ for x in v
219
+ ]
220
+ elif v == parent.safe_identifier:
221
+ self.existence_source_map[k] = [ds_being_inlined.safe_identifier]
222
+ # zip in any required values for lookups
223
+ for k in ds_being_inlined.output_lcl.addresses:
224
+ if k in self.source_map and self.source_map[k]:
225
+ continue
226
+ self.source_map[k] = [ds_being_inlined.safe_identifier]
227
+ self.parent_ctes = [
228
+ x for x in self.parent_ctes if x.safe_identifier != parent.safe_identifier
229
+ ]
230
+ if force_group:
231
+ self.group_to_grain = True
232
+ self.inlined_ctes[ds_being_inlined.safe_identifier] = InlinedCTE(
233
+ original_alias=parent.name,
234
+ new_alias=ds_being_inlined.safe_identifier,
235
+ new_base=ds_being_inlined.safe_location,
236
+ )
237
+ return True
238
+
239
+ def __add__(self, other: "CTE" | "UnionCTE"):
240
+ if isinstance(other, UnionCTE):
241
+ raise ValueError("cannot merge CTE and union CTE")
242
+ logger.info('Merging two copies of CTE "%s"', self.name)
243
+ if not self.grain == other.grain:
244
+ error = (
245
+ "Attempting to merge two ctes of different grains"
246
+ f" {self.name} {other.name} grains {self.grain} {other.grain}| {self.group_to_grain} {other.group_to_grain}| {self.output_lcl} {other.output_lcl}"
247
+ )
248
+ raise ValueError(error)
249
+ if not self.condition == other.condition:
250
+ error = (
251
+ "Attempting to merge two ctes with different conditions"
252
+ f" {self.name} {other.name} conditions {self.condition} {other.condition}"
253
+ )
254
+ raise ValueError(error)
255
+ mutually_hidden = set()
256
+ for concept in self.hidden_concepts:
257
+ if concept in other.hidden_concepts:
258
+ mutually_hidden.add(concept)
259
+ self.partial_concepts = unique(
260
+ self.partial_concepts + other.partial_concepts, "address"
261
+ )
262
+ self.parent_ctes = merge_ctes(self.parent_ctes + other.parent_ctes)
263
+
264
+ self.source_map = {**self.source_map, **other.source_map}
265
+
266
+ self.output_columns = unique(
267
+ self.output_columns + other.output_columns, "address"
268
+ )
269
+ self.joins = unique(self.joins + other.joins, "unique_id")
270
+ self.partial_concepts = unique(
271
+ self.partial_concepts + other.partial_concepts, "address"
272
+ )
273
+ self.join_derived_concepts = unique(
274
+ self.join_derived_concepts + other.join_derived_concepts, "address"
275
+ )
276
+
277
+ self.source.source_map = {**self.source.source_map, **other.source.source_map}
278
+ self.source.output_concepts = unique(
279
+ self.source.output_concepts + other.source.output_concepts, "address"
280
+ )
281
+ self.nullable_concepts = unique(
282
+ self.nullable_concepts + other.nullable_concepts, "address"
283
+ )
284
+ self.hidden_concepts = mutually_hidden
285
+ self.existence_source_map = {
286
+ **self.existence_source_map,
287
+ **other.existence_source_map,
288
+ }
289
+ self.inlined_ctes = {
290
+ **self.inlined_ctes,
291
+ **other.inlined_ctes,
292
+ }
293
+
294
+ return self
295
+
296
+ @property
297
+ def relevant_base_ctes(self):
298
+ return self.parent_ctes
299
+
300
+ @property
301
+ def is_root_datasource(self) -> bool:
302
+ return (
303
+ len(self.source.datasources) == 1
304
+ and isinstance(self.source.datasources[0], BuildDatasource)
305
+ and not self.source.datasources[0].name == CONSTANT_DATASET
306
+ )
307
+
308
+ @property
309
+ def source_address(self) -> Union["Address", str]:
310
+ if self.base_name_override:
311
+ return self.base_name_override
312
+ if self.is_root_datasource:
313
+ ds = self.source.datasources[0]
314
+ if isinstance(ds, BuildDatasource) and isinstance(ds.address, Address):
315
+ return ds.address
316
+ return ds.safe_location
317
+ elif len(self.source.datasources) == 1 and len(self.parent_ctes) == 1:
318
+ return self.parent_ctes[0].name
319
+ elif self.relevant_base_ctes:
320
+ return self.relevant_base_ctes[0].name
321
+ return self.source.name
322
+
323
+ @property
324
+ def base_name(self) -> str:
325
+ addr = self.source_address
326
+ if isinstance(addr, Address):
327
+ return addr.location
328
+ return addr
329
+
330
+ @property
331
+ def quote_address(self) -> bool:
332
+ if self.is_root_datasource:
333
+ root = self.source.datasources[0]
334
+ if isinstance(root, BuildDatasource) and isinstance(root.address, Address):
335
+ return not root.address.is_query
336
+ return True
337
+ elif not self.source.datasources:
338
+ return False
339
+ base = self.source.datasources[0]
340
+ if isinstance(base, BuildDatasource):
341
+ if isinstance(base.address, Address):
342
+ return not base.address.is_query
343
+ return True
344
+ return True
345
+
346
+ @property
347
+ def base_alias(self) -> str:
348
+ if self.base_alias_override:
349
+ return self.base_alias_override
350
+ if self.is_root_datasource:
351
+ return self.source.datasources[0].identifier
352
+ elif self.relevant_base_ctes:
353
+ return self.relevant_base_ctes[0].name
354
+ elif self.parent_ctes:
355
+ return self.parent_ctes[0].name
356
+ return self.name
357
+
358
+ def get_concept(self, address: str) -> BuildConcept | None:
359
+ for cte in self.parent_ctes:
360
+ if address in cte.output_columns:
361
+ match = [x for x in cte.output_columns if x.address == address].pop()
362
+ if match:
363
+ return match
364
+
365
+ for array in [self.source.input_concepts, self.source.output_concepts]:
366
+ match_list = [x for x in array if x.address == address]
367
+ if match_list:
368
+ return match_list.pop()
369
+ match_list = [x for x in self.output_columns if x.address == address]
370
+ if match_list:
371
+ return match_list.pop()
372
+ return None
373
+
374
+ def get_alias(self, concept: BuildConcept, source: str | None = None) -> str:
375
+ for cte in self.parent_ctes:
376
+ if concept.address in cte.output_columns:
377
+ if source and source != cte.name:
378
+ continue
379
+ return concept.safe_address
380
+
381
+ try:
382
+ source = self.source.get_alias(concept, source=source)
383
+
384
+ if not source:
385
+ raise ValueError("No source found")
386
+ return source
387
+ except ValueError as e:
388
+ return f"INVALID_ALIAS: {str(e)}"
389
+
390
+ @property
391
+ def group_concepts(self) -> List[BuildConcept]:
392
+ def check_is_not_in_group(c: BuildConcept):
393
+ if len(self.source_map.get(c.address, [])) > 0:
394
+ return False
395
+ if c.derivation == Derivation.ROWSET:
396
+ assert isinstance(c.lineage, BuildRowsetItem)
397
+ return check_is_not_in_group(c.lineage.content)
398
+ if c.derivation == Derivation.CONSTANT:
399
+ return True
400
+ if (
401
+ c.purpose == Purpose.CONSTANT
402
+ and isinstance(c.lineage, BuildFunction)
403
+ and c.lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
404
+ ):
405
+ return True
406
+
407
+ if c.derivation == Derivation.BASIC and c.lineage:
408
+ if all(
409
+ [
410
+ check_is_not_in_group(x)
411
+ for x in c.lineage.rendered_concept_arguments
412
+ ]
413
+ ):
414
+ return True
415
+ if (
416
+ isinstance(c.lineage, BuildFunction)
417
+ and c.lineage.operator == FunctionType.GROUP
418
+ ):
419
+ return check_is_not_in_group(c.lineage.concept_arguments[0])
420
+ return False
421
+ if c.purpose == Purpose.METRIC:
422
+ return True
423
+
424
+ return False
425
+
426
+ return (
427
+ unique(
428
+ [c for c in self.output_columns if not check_is_not_in_group(c)],
429
+ "address",
430
+ )
431
+ if self.group_to_grain
432
+ else []
433
+ )
434
+
435
+ @property
436
+ def render_from_clause(self) -> bool:
437
+ if (
438
+ all([c.derivation == Derivation.CONSTANT for c in self.output_columns])
439
+ and not self.parent_ctes
440
+ and not self.group_to_grain
441
+ ):
442
+ return False
443
+ # if we don't need to source any concepts from anywhere
444
+ # render without from
445
+ # most likely to happen from inlining constants
446
+ if not any([v for v in self.source_map.values()]):
447
+ return False
448
+ if (
449
+ len(self.source.datasources) == 1
450
+ and self.source.datasources[0].name == CONSTANT_DATASET
451
+ ):
452
+ return False
453
+ return True
454
+
455
+ @property
456
+ def sourced_concepts(self) -> List[BuildConcept]:
457
+ return [c for c in self.output_columns if c.address in self.source_map]
458
+
459
+
460
+ class ConceptPair(BaseModel):
461
+ left: BuildConcept
462
+ right: BuildConcept
463
+ existing_datasource: Union[BuildDatasource, "QueryDatasource"]
464
+ modifiers: List[Modifier] = Field(default_factory=list)
465
+
466
+ @property
467
+ def is_partial(self):
468
+ return Modifier.PARTIAL in self.modifiers
469
+
470
+ @property
471
+ def is_nullable(self):
472
+ return Modifier.NULLABLE in self.modifiers
473
+
474
+
475
+ class CTEConceptPair(ConceptPair):
476
+ cte: CTE | UnionCTE
477
+
478
+
479
+ class InstantiatedUnnestJoin(BaseModel):
480
+ object_to_unnest: BuildConcept | BuildParamaterizedConceptReference | BuildFunction
481
+ alias: str = "unnest"
482
+
483
+
484
+ class UnnestJoin(BaseModel):
485
+ concepts: list[BuildConcept]
486
+ parent: BuildFunction
487
+ alias: str = "unnest"
488
+ rendering_required: bool = True
489
+
490
+ def __hash__(self):
491
+ return self.safe_identifier.__hash__()
492
+
493
+ @property
494
+ def safe_identifier(self) -> str:
495
+ return self.alias + "".join([str(s.address) for s in self.concepts])
496
+
497
+
498
+ def raise_helpful_join_validation_error(
499
+ concepts: List[BuildConcept],
500
+ left_datasource: BuildDatasource | QueryDatasource | None,
501
+ right_datasource: BuildDatasource | QueryDatasource | None,
502
+ ):
503
+
504
+ if not left_datasource or not right_datasource:
505
+ raise InvalidSyntaxException(
506
+ "No mutual keys found, and not two valid datasources"
507
+ )
508
+ left_keys = [c.address for c in left_datasource.output_concepts]
509
+ right_keys = [c.address for c in right_datasource.output_concepts]
510
+ match_concepts = [c.address for c in concepts]
511
+ assert left_datasource
512
+ assert right_datasource
513
+ raise InvalidSyntaxException(
514
+ "No mutual join keys found between"
515
+ f" {left_datasource.identifier} and"
516
+ f" {right_datasource.identifier}, left_keys {left_keys},"
517
+ f" right_keys {right_keys},"
518
+ f" provided join concepts {match_concepts}"
519
+ )
520
+
521
+
522
+ class BaseJoin(BaseModel):
523
+ right_datasource: Union[BuildDatasource, "QueryDatasource"]
524
+ join_type: JoinType
525
+ concepts: Optional[List[BuildConcept]] = None
526
+ left_datasource: Optional[Union[BuildDatasource, "QueryDatasource"]] = None
527
+ concept_pairs: list[ConceptPair] | None = None
528
+ modifiers: List[Modifier] = Field(default_factory=list)
529
+
530
+ @model_validator(mode="after")
531
+ def validate_join(self) -> "BaseJoin":
532
+ if (
533
+ self.left_datasource
534
+ and self.left_datasource.identifier == self.right_datasource.identifier
535
+ ):
536
+ raise SyntaxError(
537
+ f"Cannot join a dataself to itself, joining {self.left_datasource} and"
538
+ f" {self.right_datasource}"
539
+ )
540
+ # Early returns maintained as in original code
541
+ if self.concept_pairs or self.concepts == []:
542
+ return self
543
+
544
+ # reduce concept list to just the mutual keys
545
+ final_concepts = []
546
+ for concept in self.concepts or []:
547
+ include = True
548
+ for ds in [self.left_datasource, self.right_datasource]:
549
+ synonyms = []
550
+ if not ds:
551
+ continue
552
+ for c in ds.output_concepts:
553
+ synonyms += list(c.pseudonyms)
554
+ if (
555
+ concept.address not in ds.output_concepts
556
+ and concept.address not in synonyms
557
+ ):
558
+ raise InvalidSyntaxException(
559
+ f"Invalid join, missing {concept} on {ds.name}, have"
560
+ f" {[c.address for c in ds.output_concepts]}"
561
+ )
562
+ if include:
563
+ final_concepts.append(concept)
564
+
565
+ if not final_concepts and self.concepts:
566
+ raise_helpful_join_validation_error(
567
+ self.concepts,
568
+ self.left_datasource,
569
+ self.right_datasource,
570
+ )
571
+
572
+ self.concepts = final_concepts
573
+ return self
574
+
575
+ @property
576
+ def unique_id(self) -> str:
577
+ return str(self)
578
+
579
+ @property
580
+ def input_concepts(self) -> List[BuildConcept]:
581
+ base = []
582
+ if self.concept_pairs:
583
+ for pair in self.concept_pairs:
584
+ base += [pair.left, pair.right]
585
+ elif self.concepts:
586
+ base += self.concepts
587
+ return base
588
+
589
+ def __str__(self):
590
+ if self.concept_pairs:
591
+ return (
592
+ f"{self.join_type.value} {self.right_datasource.name} on"
593
+ f" {','.join([str(k.existing_datasource.name) + '.'+ str(k.left)+'='+str(k.right) for k in self.concept_pairs])}"
594
+ )
595
+ return (
596
+ f"{self.join_type.value} {self.right_datasource.name} on"
597
+ f" {','.join([str(k) for k in self.concepts])}"
598
+ )
599
+
600
+
601
+ class QueryDatasource(BaseModel):
602
+ input_concepts: List[BuildConcept]
603
+ output_concepts: List[BuildConcept]
604
+ datasources: List[Union[BuildDatasource, "QueryDatasource"]]
605
+ source_map: Dict[str, Set[Union[BuildDatasource, "QueryDatasource", "UnnestJoin"]]]
606
+
607
+ grain: BuildGrain
608
+ joins: List[BaseJoin | UnnestJoin]
609
+ limit: Optional[int] = None
610
+ condition: Optional[
611
+ Union[BuildConditional, BuildComparison, BuildParenthetical]
612
+ ] = Field(default=None)
613
+ source_type: SourceType = SourceType.SELECT
614
+ partial_concepts: List[BuildConcept] = Field(default_factory=list)
615
+ hidden_concepts: set[str] = Field(default_factory=set)
616
+ nullable_concepts: List[BuildConcept] = Field(default_factory=list)
617
+ join_derived_concepts: List[BuildConcept] = Field(default_factory=list)
618
+ force_group: bool | None = None
619
+ existence_source_map: Dict[str, Set[Union[BuildDatasource, "QueryDatasource"]]] = (
620
+ Field(default_factory=dict)
621
+ )
622
+ ordering: BuildOrderBy | None = None
623
+
624
+ def __repr__(self):
625
+ return f"{self.identifier}@<{self.grain}>"
626
+
627
+ @classmethod
628
+ def from_datasource(cls, datasource: BuildDatasource) -> "QueryDatasource":
629
+ output_concepts = datasource.output_concepts
630
+ return cls(
631
+ input_concepts=output_concepts,
632
+ output_concepts=output_concepts,
633
+ datasources=[datasource],
634
+ source_map={c.address: {datasource} for c in output_concepts},
635
+ grain=datasource.grain,
636
+ joins=[],
637
+ partial_concepts=datasource.partial_concepts,
638
+ hidden_concepts={c.address for c in datasource.hidden_concepts},
639
+ nullable_concepts=datasource.nullable_concepts,
640
+ )
641
+
642
+ @property
643
+ def safe_identifier(self):
644
+ return self.identifier.replace(".", "_")
645
+
646
+ @property
647
+ def full_concepts(self) -> List[BuildConcept]:
648
+ return [
649
+ c
650
+ for c in self.output_concepts
651
+ if c.address not in [z.address for z in self.partial_concepts]
652
+ ]
653
+
654
+ @field_validator("joins")
655
+ @classmethod
656
+ def validate_joins(cls, v):
657
+ unique_pairs = set()
658
+ for join in v:
659
+ if not isinstance(join, BaseJoin):
660
+ continue
661
+ pairing = str(join)
662
+ if pairing in unique_pairs:
663
+ raise SyntaxError(f"Duplicate join {str(join)}")
664
+ unique_pairs.add(pairing)
665
+ return v
666
+
667
+ @field_validator("input_concepts")
668
+ @classmethod
669
+ def validate_inputs(cls, v):
670
+ return unique(v, "address")
671
+
672
+ @field_validator("output_concepts")
673
+ @classmethod
674
+ def validate_outputs(cls, v):
675
+ return unique(v, "address")
676
+
677
+ @field_validator("source_map")
678
+ @classmethod
679
+ def validate_source_map(cls, v: dict, info: ValidationInfo):
680
+ values = info.data
681
+ hidden_concepts = values.get("hidden_concepts", set())
682
+ for key in ("input_concepts", "output_concepts"):
683
+ if not values.get(key):
684
+ continue
685
+ concept: BuildConcept
686
+ for concept in values[key]:
687
+ if concept.address in hidden_concepts:
688
+ continue
689
+ if (
690
+ concept.address not in v
691
+ and not any(x in v for x in concept.pseudonyms)
692
+ and CONFIG.validate_missing
693
+ ):
694
+ raise SyntaxError(
695
+ f"Missing source map entry for {concept.address} on {key} with pseudonyms {concept.pseudonyms}, have map: {v}"
696
+ )
697
+ return v
698
+
699
+ def __str__(self):
700
+ return self.__repr__()
701
+
702
+ def __hash__(self):
703
+ return (self.identifier).__hash__()
704
+
705
+ @property
706
+ def concepts(self):
707
+ return self.output_concepts
708
+
709
+ @property
710
+ def name(self):
711
+ return self.identifier
712
+
713
+ @property
714
+ def group_required(self) -> bool:
715
+ if self.force_group is True:
716
+ return True
717
+ if self.force_group is False:
718
+ return False
719
+ if self.source_type:
720
+ if self.source_type in [
721
+ SourceType.FILTER,
722
+ ]:
723
+ return False
724
+ elif self.source_type in [
725
+ SourceType.GROUP,
726
+ ]:
727
+ return True
728
+ return False
729
+
730
+ def __add__(self, other) -> "QueryDatasource":
731
+ # these are syntax errors to avoid being caught by current
732
+ if not isinstance(other, QueryDatasource):
733
+ raise SyntaxError("Can only merge two query datasources")
734
+ if not other.grain == self.grain:
735
+ raise SyntaxError(
736
+ "Can only merge two query datasources with identical grain"
737
+ )
738
+ if not self.group_required == other.group_required:
739
+ raise SyntaxError(
740
+ "can only merge two datasources if the group required flag is the same"
741
+ )
742
+ if not self.join_derived_concepts == other.join_derived_concepts:
743
+ raise SyntaxError(
744
+ "can only merge two datasources if the join derived concepts are the same"
745
+ )
746
+ if not self.force_group == other.force_group:
747
+ raise SyntaxError(
748
+ "can only merge two datasources if the force_group flag is the same"
749
+ )
750
+ logger.debug(
751
+ f"[Query Datasource] merging {self.name} with"
752
+ f" {[c.address for c in self.output_concepts]} concepts and"
753
+ f" {other.name} with {[c.address for c in other.output_concepts]} concepts"
754
+ )
755
+
756
+ merged_datasources: dict[str, Union[BuildDatasource, "QueryDatasource"]] = {}
757
+
758
+ for ds in [*self.datasources, *other.datasources]:
759
+ if ds.safe_identifier in merged_datasources:
760
+ merged_datasources[ds.safe_identifier] = (
761
+ merged_datasources[ds.safe_identifier] + ds
762
+ )
763
+ else:
764
+ merged_datasources[ds.safe_identifier] = ds
765
+
766
+ final_source_map: defaultdict[
767
+ str, Set[Union[BuildDatasource, QueryDatasource, UnnestJoin]]
768
+ ] = defaultdict(set)
769
+
770
+ # add our sources
771
+ for key in self.source_map:
772
+ final_source_map[key] = self.source_map[key].union(
773
+ other.source_map.get(key, set())
774
+ )
775
+ # add their sources
776
+ for key in other.source_map:
777
+ if key not in final_source_map:
778
+ final_source_map[key] = other.source_map[key]
779
+
780
+ # if a ds was merged (to combine columns), we need to update the source map
781
+ # to use the merged item
782
+ for k, v in final_source_map.items():
783
+ final_source_map[k] = set(
784
+ merged_datasources.get(x.safe_identifier, x) for x in list(v)
785
+ )
786
+ self_hidden: set[str] = self.hidden_concepts or set()
787
+ other_hidden: set[str] = other.hidden_concepts or set()
788
+ # hidden is the minimum overlapping set
789
+ hidden = self_hidden.intersection(other_hidden)
790
+ qds = QueryDatasource(
791
+ input_concepts=unique(
792
+ self.input_concepts + other.input_concepts, "address"
793
+ ),
794
+ output_concepts=unique(
795
+ self.output_concepts + other.output_concepts, "address"
796
+ ),
797
+ source_map=final_source_map,
798
+ datasources=list(merged_datasources.values()),
799
+ grain=self.grain,
800
+ joins=unique(self.joins + other.joins, "unique_id"),
801
+ condition=(
802
+ self.condition + other.condition
803
+ if self.condition and other.condition
804
+ else self.condition or other.condition
805
+ ),
806
+ source_type=self.source_type,
807
+ partial_concepts=unique(
808
+ self.partial_concepts + other.partial_concepts, "address"
809
+ ),
810
+ join_derived_concepts=self.join_derived_concepts,
811
+ force_group=self.force_group,
812
+ hidden_concepts=hidden,
813
+ ordering=self.ordering,
814
+ )
815
+ logger.debug(
816
+ f"[Query Datasource] merged with {[c.address for c in qds.output_concepts]} concepts"
817
+ )
818
+ logger.debug(qds.source_map)
819
+ return qds
820
+
821
+ @property
822
+ def identifier(self) -> str:
823
+ filters = abs(hash(str(self.condition))) if self.condition else ""
824
+ grain = "_".join([str(c).replace(".", "_") for c in self.grain.components])
825
+ group = ""
826
+ if self.group_required:
827
+ keys = [
828
+ x.address for x in self.output_concepts if x.purpose != Purpose.METRIC
829
+ ]
830
+ group = "_grouped_by_" + "_".join(keys)
831
+ return (
832
+ "_join_".join([d.identifier for d in self.datasources])
833
+ + group
834
+ + (f"_at_{grain}" if grain else "_at_abstract")
835
+ + (f"_filtered_by_{filters}" if filters else "")
836
+ )
837
+
838
+ def get_alias(
839
+ self,
840
+ concept: BuildConcept,
841
+ use_raw_name: bool = False,
842
+ force_alias: bool = False,
843
+ source: str | None = None,
844
+ ):
845
+ for x in self.datasources:
846
+ # query datasources should be referenced by their alias, always
847
+ force_alias = isinstance(x, QueryDatasource)
848
+ #
849
+ use_raw_name = isinstance(x, BuildDatasource) and not force_alias
850
+ if source and x.safe_identifier != source:
851
+ continue
852
+ try:
853
+ return x.get_alias(
854
+ concept.with_grain(self.grain),
855
+ use_raw_name,
856
+ force_alias=force_alias,
857
+ )
858
+ except ValueError:
859
+ continue
860
+ existing = [c.with_grain(self.grain) for c in self.output_concepts]
861
+ if concept in existing:
862
+ return concept.name
863
+
864
+ existing_str = [str(c) for c in existing]
865
+ datasources = [ds.identifier for ds in self.datasources]
866
+ raise ValueError(
867
+ f"{LOGGER_PREFIX} Concept {str(concept)} not found on {self.identifier};"
868
+ f" have {existing_str} from {datasources}."
869
+ )
870
+
871
+ @property
872
+ def safe_location(self):
873
+ return self.identifier
874
+
875
+
876
+ class AliasedExpression(BaseModel):
877
+ expr: BuildExpr
878
+ alias: str
879
+
880
+
881
+ class RecursiveCTE(CTE):
882
+
883
+ def generate_loop_functions(
884
+ self,
885
+ recursive_derived: BuildConcept,
886
+ left_recurse_concept: BuildConcept,
887
+ right_recurse_concept: BuildConcept,
888
+ ) -> tuple[BuildConcept, BuildConcept, BuildConcept]:
889
+
890
+ join_gate = BuildConcept(
891
+ name=RECURSIVE_GATING_CONCEPT,
892
+ canonical_name=RECURSIVE_GATING_CONCEPT,
893
+ namespace=DEFAULT_NAMESPACE,
894
+ grain=recursive_derived.grain,
895
+ build_is_aggregate=False,
896
+ datatype=DataType.BOOL,
897
+ purpose=Purpose.KEY,
898
+ derivation=Derivation.BASIC,
899
+ lineage=BuildFunction(
900
+ operator=FunctionType.CASE,
901
+ arguments=[
902
+ BuildCaseWhen(
903
+ comparison=BuildComparison(
904
+ left=right_recurse_concept,
905
+ right=MagicConstants.NULL,
906
+ operator=ComparisonOperator.IS,
907
+ ),
908
+ expr=True,
909
+ ),
910
+ BuildCaseElse(expr=False),
911
+ ],
912
+ output_data_type=DataType.BOOL,
913
+ output_purpose=Purpose.KEY,
914
+ ),
915
+ )
916
+ bottom_join_gate = BuildConcept(
917
+ name=f"{RECURSIVE_GATING_CONCEPT}_two",
918
+ canonical_name=f"{RECURSIVE_GATING_CONCEPT}_two",
919
+ namespace=DEFAULT_NAMESPACE,
920
+ grain=recursive_derived.grain,
921
+ build_is_aggregate=False,
922
+ datatype=DataType.BOOL,
923
+ purpose=Purpose.KEY,
924
+ derivation=Derivation.BASIC,
925
+ lineage=BuildFunction(
926
+ operator=FunctionType.CASE,
927
+ arguments=[
928
+ BuildCaseWhen(
929
+ comparison=BuildComparison(
930
+ left=right_recurse_concept,
931
+ right=MagicConstants.NULL,
932
+ operator=ComparisonOperator.IS,
933
+ ),
934
+ expr=True,
935
+ ),
936
+ BuildCaseElse(expr=False),
937
+ ],
938
+ output_data_type=DataType.BOOL,
939
+ output_purpose=Purpose.KEY,
940
+ ),
941
+ )
942
+ bottom_derivation = BuildConcept(
943
+ name=recursive_derived.name + "_bottom",
944
+ canonical_name=recursive_derived.canonical_name + "_bottom",
945
+ namespace=recursive_derived.namespace,
946
+ grain=recursive_derived.grain,
947
+ build_is_aggregate=False,
948
+ datatype=recursive_derived.datatype,
949
+ purpose=recursive_derived.purpose,
950
+ derivation=Derivation.RECURSIVE,
951
+ lineage=BuildFunction(
952
+ operator=FunctionType.CASE,
953
+ arguments=[
954
+ BuildCaseWhen(
955
+ comparison=BuildComparison(
956
+ left=right_recurse_concept,
957
+ right=MagicConstants.NULL,
958
+ operator=ComparisonOperator.IS,
959
+ ),
960
+ expr=recursive_derived,
961
+ ),
962
+ BuildCaseElse(expr=right_recurse_concept),
963
+ ],
964
+ output_data_type=recursive_derived.datatype,
965
+ output_purpose=recursive_derived.purpose,
966
+ ),
967
+ )
968
+ return bottom_derivation, join_gate, bottom_join_gate
969
+
970
+ @property
971
+ def internal_ctes(self) -> List[CTE]:
972
+ filtered_output = [
973
+ x for x in self.output_columns if x.name != RECURSIVE_GATING_CONCEPT
974
+ ]
975
+ recursive_derived = [
976
+ x for x in self.output_columns if x.derivation == Derivation.RECURSIVE
977
+ ][0]
978
+ if not isinstance(recursive_derived.lineage, BuildFunction):
979
+ raise SyntaxError(
980
+ "Recursive CTEs must have a function lineage, found"
981
+ f" {recursive_derived.lineage}"
982
+ )
983
+ left_recurse_concept = recursive_derived.lineage.concept_arguments[0]
984
+ right_recurse_concept = recursive_derived.lineage.concept_arguments[1]
985
+ parent_ctes: List[CTE | UnionCTE]
986
+ if self.parent_ctes:
987
+ base = self.parent_ctes[0]
988
+ loop_input_cte = base
989
+ parent_ctes = [base]
990
+ parent_identifier = base.identifier
991
+ else:
992
+ raise SyntaxError("Recursive CTEs must have a parent CTE currently")
993
+ bottom_derivation, join_gate, bottom_join_gate = self.generate_loop_functions(
994
+ recursive_derived, left_recurse_concept, right_recurse_concept
995
+ )
996
+ base_output = [*filtered_output, join_gate]
997
+ bottom_output = []
998
+ for x in filtered_output:
999
+ if x.address == recursive_derived.address:
1000
+ bottom_output.append(bottom_derivation)
1001
+ else:
1002
+ bottom_output.append(x)
1003
+
1004
+ bottom_output = [*bottom_output, bottom_join_gate]
1005
+ top = CTE(
1006
+ name=self.name,
1007
+ source=self.source,
1008
+ output_columns=base_output,
1009
+ source_map=self.source_map,
1010
+ grain=self.grain,
1011
+ existence_source_map=self.existence_source_map,
1012
+ parent_ctes=self.parent_ctes,
1013
+ joins=self.joins,
1014
+ condition=self.condition,
1015
+ partial_concepts=self.partial_concepts,
1016
+ hidden_concepts=self.hidden_concepts,
1017
+ nullable_concepts=self.nullable_concepts,
1018
+ join_derived_concepts=self.join_derived_concepts,
1019
+ group_to_grain=self.group_to_grain,
1020
+ order_by=self.order_by,
1021
+ limit=self.limit,
1022
+ )
1023
+ top_cte_array: list[CTE | UnionCTE] = [top]
1024
+ bottom_source_map = {
1025
+ left_recurse_concept.address: [top.identifier],
1026
+ right_recurse_concept.address: [parent_identifier],
1027
+ # recursive_derived.address: self.source_map[recursive_derived.address],
1028
+ join_gate.address: [top.identifier],
1029
+ recursive_derived.address: [top.identifier],
1030
+ }
1031
+ bottom = CTE(
1032
+ name=self.name,
1033
+ source=self.source,
1034
+ output_columns=bottom_output,
1035
+ source_map=bottom_source_map,
1036
+ grain=self.grain,
1037
+ existence_source_map=self.existence_source_map,
1038
+ parent_ctes=top_cte_array + parent_ctes,
1039
+ joins=[
1040
+ Join(
1041
+ right_cte=loop_input_cte,
1042
+ jointype=JoinType.INNER,
1043
+ joinkey_pairs=[
1044
+ CTEConceptPair(
1045
+ left=recursive_derived,
1046
+ right=left_recurse_concept,
1047
+ existing_datasource=loop_input_cte.source,
1048
+ modifiers=[],
1049
+ cte=top,
1050
+ )
1051
+ ],
1052
+ condition=BuildComparison(
1053
+ left=join_gate, right=True, operator=ComparisonOperator.IS_NOT
1054
+ ),
1055
+ )
1056
+ ],
1057
+ partial_concepts=self.partial_concepts,
1058
+ hidden_concepts=self.hidden_concepts,
1059
+ nullable_concepts=self.nullable_concepts,
1060
+ join_derived_concepts=self.join_derived_concepts,
1061
+ group_to_grain=self.group_to_grain,
1062
+ order_by=self.order_by,
1063
+ limit=self.limit,
1064
+ )
1065
+ return [top, bottom]
1066
+
1067
+
1068
+ class UnionCTE(BaseModel):
1069
+ name: str
1070
+ source: QueryDatasource
1071
+ parent_ctes: list[CTE | UnionCTE]
1072
+ internal_ctes: list[CTE | UnionCTE]
1073
+ output_columns: List[BuildConcept]
1074
+ grain: BuildGrain
1075
+ operator: str = "UNION ALL"
1076
+ order_by: Optional[BuildOrderBy] = None
1077
+ limit: Optional[int] = None
1078
+ hidden_concepts: set[str] = Field(default_factory=set)
1079
+ partial_concepts: list[BuildConcept] = Field(default_factory=list)
1080
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
1081
+ inlined_ctes: Dict[str, InlinedCTE] = Field(default_factory=dict)
1082
+
1083
+ @computed_field # type: ignore
1084
+ @property
1085
+ def output_lcl(self) -> LooseBuildConceptList:
1086
+ return LooseBuildConceptList(concepts=self.output_columns)
1087
+
1088
+ def get_alias(self, concept: BuildConcept, source: str | None = None) -> str:
1089
+ for cte in self.parent_ctes:
1090
+ if concept.address in cte.output_columns:
1091
+ if source and source != cte.name:
1092
+ continue
1093
+ return concept.safe_address
1094
+ return "INVALID_ALIAS"
1095
+
1096
+ def get_concept(self, address: str) -> BuildConcept | None:
1097
+ for cte in self.internal_ctes:
1098
+ if address in cte.output_columns:
1099
+ match = [x for x in cte.output_columns if x.address == address].pop()
1100
+ return match
1101
+
1102
+ match_list = [x for x in self.output_columns if x.address == address]
1103
+ if match_list:
1104
+ return match_list.pop()
1105
+ return None
1106
+
1107
+ @property
1108
+ def source_map(self):
1109
+ return {x.address: [] for x in self.output_columns}
1110
+
1111
+ @property
1112
+ def condition(self):
1113
+ return None
1114
+
1115
+ @condition.setter
1116
+ def condition(self, value):
1117
+ raise NotImplementedError
1118
+
1119
+ @property
1120
+ def identifier(self) -> str:
1121
+ return self.name
1122
+
1123
+ @property
1124
+ def safe_identifier(self):
1125
+ return self.name
1126
+
1127
+ @property
1128
+ def group_to_grain(self) -> bool:
1129
+ return False
1130
+
1131
+ @property
1132
+ def group_concepts(self) -> List[BuildConcept]:
1133
+ # unions should always be on unique sets
1134
+ return []
1135
+
1136
+ def __add__(self, other):
1137
+ if not isinstance(other, UnionCTE) or not other.name == self.name:
1138
+ raise SyntaxError("Cannot merge union CTEs")
1139
+ return self
1140
+
1141
+
1142
+ class Join(BaseModel):
1143
+ right_cte: CTE | UnionCTE
1144
+ jointype: JoinType
1145
+ left_cte: CTE | UnionCTE | None = None
1146
+ joinkey_pairs: List[CTEConceptPair] | None = None
1147
+ inlined_ctes: set[str] = Field(default_factory=set)
1148
+ quote: str | None = None
1149
+ condition: BuildConditional | BuildComparison | BuildParenthetical | None = None
1150
+ modifiers: List[Modifier] = Field(default_factory=list)
1151
+
1152
+ def inline_cte(self, cte: CTE):
1153
+ self.inlined_ctes.add(cte.name)
1154
+
1155
+ def get_name(self, cte: CTE | UnionCTE) -> str:
1156
+ if cte.identifier in self.inlined_ctes:
1157
+ return cte.source.datasources[0].safe_identifier
1158
+ return cte.safe_identifier
1159
+
1160
+ @property
1161
+ def right_name(self) -> str:
1162
+ if self.right_cte.identifier in self.inlined_ctes:
1163
+ return self.right_cte.source.datasources[0].safe_identifier
1164
+ return self.right_cte.safe_identifier
1165
+
1166
+ @property
1167
+ def right_ref(self) -> str:
1168
+ if self.quote:
1169
+ if self.right_cte.identifier in self.inlined_ctes:
1170
+ return f"{safe_quote(self.right_cte.source.datasources[0].safe_location, self.quote)} as {self.quote}{self.right_cte.source.datasources[0].safe_identifier}{self.quote}"
1171
+ return f"{self.quote}{self.right_cte.safe_identifier}{self.quote}"
1172
+ if self.right_cte.identifier in self.inlined_ctes:
1173
+ return f"{self.right_cte.source.datasources[0].safe_location} as {self.right_cte.source.datasources[0].safe_identifier}"
1174
+ return self.right_cte.safe_identifier
1175
+
1176
+ @property
1177
+ def unique_id(self) -> str:
1178
+ return str(self)
1179
+
1180
+ def __str__(self):
1181
+ if self.joinkey_pairs:
1182
+ return (
1183
+ f"{self.jointype.value} join"
1184
+ f" {self.right_name} on"
1185
+ f" {','.join([k.cte.name + '.'+str(k.left.address)+'='+str(k.right.address) for k in self.joinkey_pairs])}"
1186
+ )
1187
+ elif self.left_cte:
1188
+ return (
1189
+ f"{self.jointype.value} JOIN {self.left_cte.name} and"
1190
+ f" {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
1191
+ )
1192
+ return f"{self.jointype.value} JOIN {self.right_name} on {','.join([str(k) for k in self.joinkey_pairs])}"
1193
+
1194
+
1195
+ def merge_ctes(ctes: List[CTE | UnionCTE]) -> List[CTE | UnionCTE]:
1196
+ final_ctes_dict: Dict[str, CTE | UnionCTE] = {}
1197
+ # merge CTEs
1198
+ for cte in ctes:
1199
+ if cte.name not in final_ctes_dict:
1200
+ final_ctes_dict[cte.name] = cte
1201
+ else:
1202
+ final_ctes_dict[cte.name] = final_ctes_dict[cte.name] + cte
1203
+
1204
+ final_ctes = list(final_ctes_dict.values())
1205
+ return final_ctes
1206
+
1207
+
1208
+ class CompiledCTE(BaseModel):
1209
+ name: str
1210
+ statement: str
1211
+
1212
+
1213
+ UnionCTE.model_rebuild()