pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.148.dist-info/METADATA +555 -0
  5. pytrilogy-0.3.148.dist-info/RECORD +206 -0
  6. pytrilogy-0.3.148.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +27 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +100 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +148 -0
  26. trilogy/constants.py +119 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +454 -0
  31. trilogy/core/env_processor.py +239 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1240 -0
  36. trilogy/core/graph_models.py +142 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2662 -0
  40. trilogy/core/models/build.py +2603 -0
  41. trilogy/core/models/build_environment.py +165 -0
  42. trilogy/core/models/core.py +506 -0
  43. trilogy/core/models/datasource.py +434 -0
  44. trilogy/core/models/environment.py +756 -0
  45. trilogy/core/models/execute.py +1213 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +548 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +270 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +207 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +695 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +786 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +522 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +604 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +256 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1431 -0
  112. trilogy/dialect/bigquery.py +314 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +159 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +376 -0
  117. trilogy/dialect/enums.py +149 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +117 -0
  121. trilogy/dialect/presto.py +110 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +129 -0
  124. trilogy/dialect/sql_server.py +137 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/__init__.py +17 -0
  127. trilogy/execution/config.py +119 -0
  128. trilogy/execution/state/__init__.py +0 -0
  129. trilogy/execution/state/file_state_store.py +0 -0
  130. trilogy/execution/state/sqllite_state_store.py +0 -0
  131. trilogy/execution/state/state_store.py +301 -0
  132. trilogy/executor.py +656 -0
  133. trilogy/hooks/__init__.py +4 -0
  134. trilogy/hooks/base_hook.py +40 -0
  135. trilogy/hooks/graph_hook.py +135 -0
  136. trilogy/hooks/query_debugger.py +166 -0
  137. trilogy/metadata/__init__.py +0 -0
  138. trilogy/parser.py +10 -0
  139. trilogy/parsing/README.md +21 -0
  140. trilogy/parsing/__init__.py +0 -0
  141. trilogy/parsing/common.py +1069 -0
  142. trilogy/parsing/config.py +5 -0
  143. trilogy/parsing/exceptions.py +8 -0
  144. trilogy/parsing/helpers.py +1 -0
  145. trilogy/parsing/parse_engine.py +2863 -0
  146. trilogy/parsing/render.py +773 -0
  147. trilogy/parsing/trilogy.lark +544 -0
  148. trilogy/py.typed +0 -0
  149. trilogy/render.py +45 -0
  150. trilogy/scripts/README.md +9 -0
  151. trilogy/scripts/__init__.py +0 -0
  152. trilogy/scripts/agent.py +41 -0
  153. trilogy/scripts/agent_info.py +306 -0
  154. trilogy/scripts/common.py +430 -0
  155. trilogy/scripts/dependency/Cargo.lock +617 -0
  156. trilogy/scripts/dependency/Cargo.toml +39 -0
  157. trilogy/scripts/dependency/README.md +131 -0
  158. trilogy/scripts/dependency/build.sh +25 -0
  159. trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
  160. trilogy/scripts/dependency/src/lib.rs +16 -0
  161. trilogy/scripts/dependency/src/main.rs +770 -0
  162. trilogy/scripts/dependency/src/parser.rs +435 -0
  163. trilogy/scripts/dependency/src/preql.pest +208 -0
  164. trilogy/scripts/dependency/src/python_bindings.rs +311 -0
  165. trilogy/scripts/dependency/src/resolver.rs +716 -0
  166. trilogy/scripts/dependency/tests/base.preql +3 -0
  167. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  168. trilogy/scripts/dependency/tests/customer.preql +6 -0
  169. trilogy/scripts/dependency/tests/main.preql +9 -0
  170. trilogy/scripts/dependency/tests/orders.preql +7 -0
  171. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  172. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  173. trilogy/scripts/dependency.py +323 -0
  174. trilogy/scripts/display.py +555 -0
  175. trilogy/scripts/environment.py +59 -0
  176. trilogy/scripts/fmt.py +32 -0
  177. trilogy/scripts/ingest.py +472 -0
  178. trilogy/scripts/ingest_helpers/__init__.py +1 -0
  179. trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
  180. trilogy/scripts/ingest_helpers/formatting.py +93 -0
  181. trilogy/scripts/ingest_helpers/typing.py +161 -0
  182. trilogy/scripts/init.py +105 -0
  183. trilogy/scripts/parallel_execution.py +748 -0
  184. trilogy/scripts/plan.py +189 -0
  185. trilogy/scripts/refresh.py +106 -0
  186. trilogy/scripts/run.py +79 -0
  187. trilogy/scripts/serve.py +202 -0
  188. trilogy/scripts/serve_helpers/__init__.py +41 -0
  189. trilogy/scripts/serve_helpers/file_discovery.py +142 -0
  190. trilogy/scripts/serve_helpers/index_generation.py +206 -0
  191. trilogy/scripts/serve_helpers/models.py +38 -0
  192. trilogy/scripts/single_execution.py +131 -0
  193. trilogy/scripts/testing.py +129 -0
  194. trilogy/scripts/trilogy.py +75 -0
  195. trilogy/std/__init__.py +0 -0
  196. trilogy/std/color.preql +3 -0
  197. trilogy/std/date.preql +13 -0
  198. trilogy/std/display.preql +18 -0
  199. trilogy/std/geography.preql +22 -0
  200. trilogy/std/metric.preql +15 -0
  201. trilogy/std/money.preql +67 -0
  202. trilogy/std/net.preql +14 -0
  203. trilogy/std/ranking.preql +7 -0
  204. trilogy/std/report.preql +5 -0
  205. trilogy/std/semantic.preql +6 -0
  206. trilogy/utility.py +34 -0
@@ -0,0 +1,2603 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC
4
+ from collections import defaultdict
5
+ from dataclasses import dataclass, field
6
+ from datetime import date, datetime
7
+ from functools import cached_property, reduce, singledispatchmethod
8
+ from typing import (
9
+ TYPE_CHECKING,
10
+ Any,
11
+ Callable,
12
+ Iterable,
13
+ List,
14
+ Optional,
15
+ Self,
16
+ Sequence,
17
+ Set,
18
+ Tuple,
19
+ Union,
20
+ )
21
+
22
+ from trilogy.constants import DEFAULT_NAMESPACE, VIRTUAL_CONCEPT_PREFIX, MagicConstants
23
+ from trilogy.core.constants import ALL_ROWS_CONCEPT
24
+ from trilogy.core.enums import (
25
+ BooleanOperator,
26
+ ComparisonOperator,
27
+ DatasourceState,
28
+ DatePart,
29
+ Derivation,
30
+ FunctionClass,
31
+ FunctionType,
32
+ Granularity,
33
+ Modifier,
34
+ Ordering,
35
+ Purpose,
36
+ WindowType,
37
+ )
38
+ from trilogy.core.models.author import (
39
+ AggregateWrapper,
40
+ AlignClause,
41
+ AlignItem,
42
+ ArgBinding,
43
+ CaseElse,
44
+ CaseWhen,
45
+ Comparison,
46
+ Concept,
47
+ ConceptRef,
48
+ Conditional,
49
+ DeriveClause,
50
+ DeriveItem,
51
+ FilterItem,
52
+ FuncArgs,
53
+ Function,
54
+ FunctionCallWrapper,
55
+ Grain,
56
+ HavingClause,
57
+ Metadata,
58
+ MultiSelectLineage,
59
+ OrderBy,
60
+ OrderItem,
61
+ Parenthetical,
62
+ RowsetItem,
63
+ RowsetLineage,
64
+ SelectLineage,
65
+ SubselectComparison,
66
+ UndefinedConcept,
67
+ WhereClause,
68
+ WindowItem,
69
+ )
70
+ from trilogy.core.models.core import (
71
+ Addressable,
72
+ ArrayType,
73
+ DataType,
74
+ DataTyped,
75
+ ListWrapper,
76
+ MapType,
77
+ MapWrapper,
78
+ NumericType,
79
+ StructType,
80
+ TraitDataType,
81
+ TupleWrapper,
82
+ arg_to_datatype,
83
+ )
84
+ from trilogy.core.models.datasource import (
85
+ Address,
86
+ ColumnAssignment,
87
+ Datasource,
88
+ DatasourceMetadata,
89
+ RawColumnExpr,
90
+ )
91
+ from trilogy.core.models.environment import Environment
92
+ from trilogy.utility import string_to_hash
93
+
94
+ # TODO: refactor to avoid these
95
+ if TYPE_CHECKING:
96
+ from trilogy.core.models.build_environment import BuildEnvironment
97
+ from trilogy.core.models.execute import CTE, UnionCTE
98
+
99
+
100
+ LOGGER_PREFIX = "[MODELS_BUILD]"
101
+
102
+
103
+ def _gen_agg_name(parent: "BuildAggregateWrapper") -> str:
104
+ if parent.is_abstract:
105
+ return f"{VIRTUAL_CONCEPT_PREFIX}_agg_{parent.function.operator.value}_{string_to_hash(str(parent.with_abstract_by()))}"
106
+ return f"{VIRTUAL_CONCEPT_PREFIX}_agg_{parent.function.operator.value}_{string_to_hash(str(parent))}"
107
+
108
+
109
+ def _gen_window_name(parent: "BuildWindowItem") -> str:
110
+ return f"{VIRTUAL_CONCEPT_PREFIX}_window_{parent.type.value}_{string_to_hash(str(parent))}"
111
+
112
+
113
+ def _gen_filter_name(parent: "BuildFilterItem") -> str:
114
+ return f"{VIRTUAL_CONCEPT_PREFIX}_filter_{string_to_hash(str(parent))}"
115
+
116
+
117
+ def _gen_function_name(parent: "BuildFunction") -> str:
118
+ if parent.operator == FunctionType.GROUP:
119
+ return f"{VIRTUAL_CONCEPT_PREFIX}_group_to_{string_to_hash(str(parent))}"
120
+ return f"{VIRTUAL_CONCEPT_PREFIX}_func_{parent.operator.value}_{string_to_hash(str(parent))}"
121
+
122
+
123
+ def _gen_paren_name(parent: "BuildParenthetical") -> str:
124
+ return f"{VIRTUAL_CONCEPT_PREFIX}_paren_{string_to_hash(str(parent))}"
125
+
126
+
127
+ def _gen_comp_name(parent: "BuildComparison") -> str:
128
+ return f"{VIRTUAL_CONCEPT_PREFIX}_comp_{string_to_hash(str(parent))}"
129
+
130
+
131
+ def _gen_msl_name(parent: "BuildMultiSelectLineage") -> str:
132
+ return f"{VIRTUAL_CONCEPT_PREFIX}_msl_{string_to_hash(str(parent))}"
133
+
134
+
135
+ def _gen_default_name(parent: Any) -> str:
136
+ return f"{VIRTUAL_CONCEPT_PREFIX}_{string_to_hash(str(parent))}"
137
+
138
+
139
+ # Initialized after classes are defined
140
+ _CONCEPT_NAME_GENERATORS: dict[type, Callable[[Any], str]] = {}
141
+
142
+
143
+ def generate_concept_name(parent: Any) -> str:
144
+ generator = _CONCEPT_NAME_GENERATORS.get(type(parent))
145
+ if generator:
146
+ return generator(parent)
147
+ return _gen_default_name(parent)
148
+
149
+
150
+ class BuildConceptArgs(ABC):
151
+ @property
152
+ def concept_arguments(self) -> Sequence["BuildConcept"]:
153
+ raise NotImplementedError
154
+
155
+ @property
156
+ def rendered_concept_arguments(self) -> Sequence["BuildConcept"]:
157
+ return self.concept_arguments
158
+
159
+ @property
160
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
161
+ return []
162
+
163
+ @property
164
+ def row_arguments(self) -> Sequence["BuildConcept"]:
165
+ return self.concept_arguments
166
+
167
+
168
+ def concept_is_relevant(
169
+ concept: BuildConcept,
170
+ others: list[BuildConcept],
171
+ ) -> bool:
172
+
173
+ if concept.is_aggregate and not (
174
+ isinstance(concept.lineage, BuildAggregateWrapper) and concept.lineage.by
175
+ ):
176
+
177
+ return False
178
+ if concept.purpose in (Purpose.PROPERTY, Purpose.METRIC) and concept.keys:
179
+ if all([c in others for c in concept.keys]):
180
+ return False
181
+ if (
182
+ concept.purpose == Purpose.KEY
183
+ and concept.keys
184
+ and all([c in others and c != concept.address for c in concept.keys])
185
+ ):
186
+ return False
187
+ if concept.purpose in (Purpose.METRIC,):
188
+ if all([c in others for c in concept.grain.components]):
189
+ return False
190
+ if concept.derivation in (Derivation.UNNEST,):
191
+ return True
192
+ if concept.derivation in (Derivation.BASIC,):
193
+ return any(concept_is_relevant(c, others) for c in concept.concept_arguments)
194
+ if concept.granularity == Granularity.SINGLE_ROW:
195
+ return False
196
+ return True
197
+
198
+
199
+ def concepts_to_build_grain_concepts(
200
+ concepts: Iterable[BuildConcept | str], environment: "BuildEnvironment" | None
201
+ ) -> set[str]:
202
+ pconcepts: list[BuildConcept] = []
203
+ for c in concepts:
204
+ if isinstance(c, BuildConcept):
205
+ pconcepts.append(c)
206
+ elif environment:
207
+ pconcepts.append(environment.concepts[c])
208
+
209
+ else:
210
+ raise ValueError(
211
+ f"Unable to resolve input {c} without environment provided to concepts_to_grain call"
212
+ )
213
+
214
+ final: set[str] = set()
215
+ for sub in pconcepts:
216
+ if not concept_is_relevant(sub, pconcepts):
217
+ continue
218
+ final.add(sub.address)
219
+
220
+ return final
221
+
222
+
223
+ @dataclass
224
+ class LooseBuildConceptList:
225
+ concepts: Sequence[BuildConcept]
226
+
227
+ @cached_property
228
+ def addresses(self) -> set[str]:
229
+ return {s.address for s in self.concepts}
230
+
231
+ @cached_property
232
+ def sorted_addresses(self) -> List[str]:
233
+ return sorted(list(self.addresses))
234
+
235
+ def __str__(self) -> str:
236
+ return f"lcl{str(self.sorted_addresses)}"
237
+
238
+ def __iter__(self):
239
+ return iter(self.concepts)
240
+
241
+ def __eq__(self, other):
242
+ if not isinstance(other, LooseBuildConceptList):
243
+ return False
244
+ return self.addresses == other.addresses
245
+
246
+ def issubset(self, other):
247
+ if not isinstance(other, LooseBuildConceptList):
248
+ return False
249
+ return self.addresses.issubset(other.addresses)
250
+
251
+ def __contains__(self, other):
252
+ if isinstance(other, str):
253
+ return other in self.addresses
254
+ if not isinstance(other, BuildConcept):
255
+ return False
256
+ return other.address in self.addresses
257
+
258
+ def difference(self, other):
259
+ if not isinstance(other, LooseBuildConceptList):
260
+ return False
261
+ return self.addresses.difference(other.addresses)
262
+
263
+ def isdisjoint(self, other):
264
+ if not isinstance(other, LooseBuildConceptList):
265
+ return False
266
+ return self.addresses.isdisjoint(other.addresses)
267
+
268
+
269
+ @dataclass
270
+ class CanonicalBuildConceptList:
271
+ concepts: Sequence[BuildConcept]
272
+
273
+ @cached_property
274
+ def addresses(self) -> set[str]:
275
+ return {s.canonical_address for s in self.concepts}
276
+
277
+ @cached_property
278
+ def sorted_addresses(self) -> List[str]:
279
+ return sorted(list(self.addresses))
280
+
281
+ def __str__(self) -> str:
282
+ return f"lcl{str(self.sorted_addresses)}"
283
+
284
+ def __iter__(self):
285
+ return iter(self.concepts)
286
+
287
+ def __eq__(self, other):
288
+ if not isinstance(other, CanonicalBuildConceptList):
289
+ return False
290
+ return self.addresses == other.addresses
291
+
292
+ def issubset(self, other):
293
+ if not isinstance(other, CanonicalBuildConceptList):
294
+ return False
295
+ return self.addresses.issubset(other.addresses)
296
+
297
+ def __contains__(self, other):
298
+ if isinstance(other, str):
299
+ return other in self.addresses
300
+ if not isinstance(other, BuildConcept):
301
+ return False
302
+ return other.canonical_address in self.addresses
303
+
304
+ def difference(self, other):
305
+ if not isinstance(other, CanonicalBuildConceptList):
306
+ return False
307
+ return self.addresses.difference(other.addresses)
308
+
309
+ def isdisjoint(self, other):
310
+ if not isinstance(other, CanonicalBuildConceptList):
311
+ return False
312
+ return self.addresses.isdisjoint(other.addresses)
313
+
314
+
315
+ class ConstantInlineable(ABC):
316
+
317
+ def inline_constant(self, concept: BuildConcept):
318
+ raise NotImplementedError
319
+
320
+
321
+ def get_concept_row_arguments(expr) -> List["BuildConcept"]:
322
+ output = []
323
+ if isinstance(expr, BuildConcept):
324
+ output += [expr]
325
+
326
+ elif isinstance(expr, BuildConceptArgs):
327
+ output += expr.row_arguments
328
+ return output
329
+
330
+
331
+ def get_concept_arguments(expr) -> List["BuildConcept"]:
332
+ output = []
333
+ if isinstance(expr, BuildConcept):
334
+ output += [expr]
335
+
336
+ elif isinstance(
337
+ expr,
338
+ BuildConceptArgs,
339
+ ):
340
+ output += expr.concept_arguments
341
+ return output
342
+
343
+
344
+ def get_rendered_concept_arguments(expr) -> List["BuildConcept"]:
345
+ output = []
346
+ if isinstance(expr, BuildConcept):
347
+ output += [expr]
348
+
349
+ elif isinstance(
350
+ expr,
351
+ BuildConceptArgs,
352
+ ):
353
+ output += expr.rendered_concept_arguments
354
+ return output
355
+
356
+
357
+ @dataclass
358
+ class BuildParamaterizedConceptReference(DataTyped):
359
+ concept: BuildConcept
360
+
361
+ def __str__(self):
362
+ return f":{self.concept.address.replace('.', '_')}"
363
+
364
+ @property
365
+ def safe_address(self) -> str:
366
+ return self.concept.safe_address
367
+
368
+ @property
369
+ def output_datatype(self) -> DataType:
370
+ return self.concept.output_datatype
371
+
372
+
373
+ @dataclass
374
+ class BuildGrain:
375
+ components: set[str] = field(default_factory=set)
376
+ where_clause: Optional[BuildWhereClause] = None
377
+ _str: str | None = None
378
+ _str_no_condition: str | None = None
379
+
380
+ def without_condition(self):
381
+ if not self.where_clause:
382
+ return self
383
+ return BuildGrain(components=self.components)
384
+
385
+ @classmethod
386
+ def from_concepts(
387
+ cls,
388
+ concepts: Iterable[BuildConcept | str],
389
+ environment: BuildEnvironment | None = None,
390
+ where_clause: BuildWhereClause | None = None,
391
+ ) -> "BuildGrain":
392
+
393
+ return BuildGrain(
394
+ components=concepts_to_build_grain_concepts(
395
+ concepts, environment=environment
396
+ ),
397
+ where_clause=where_clause,
398
+ )
399
+
400
+ def __add__(self, other: "BuildGrain") -> "BuildGrain":
401
+ if not other:
402
+ return self
403
+ where = self.where_clause
404
+ if other.where_clause:
405
+ if not self.where_clause:
406
+ where = other.where_clause
407
+ elif not other.where_clause == self.where_clause:
408
+ where = BuildWhereClause(
409
+ conditional=BuildConditional(
410
+ left=self.where_clause.conditional,
411
+ right=other.where_clause.conditional,
412
+ operator=BooleanOperator.AND,
413
+ )
414
+ )
415
+ # raise NotImplementedError(
416
+ # f"Cannot merge grains with where clauses, self {self.where_clause} other {other.where_clause}"
417
+ # )
418
+ return BuildGrain(
419
+ components=self.components.union(other.components), where_clause=where
420
+ )
421
+
422
+ def __sub__(self, other: "BuildGrain") -> "BuildGrain":
423
+ return BuildGrain(
424
+ components=self.components.difference(other.components),
425
+ where_clause=self.where_clause,
426
+ )
427
+
428
+ @property
429
+ def abstract(self):
430
+ return not self.components or all(
431
+ [c.endswith(ALL_ROWS_CONCEPT) for c in self.components]
432
+ )
433
+
434
+ def __eq__(self, other):
435
+ if self.components == other.components:
436
+ return True
437
+ if self.abstract is True and other.abstract is True:
438
+ return True
439
+ return False
440
+
441
+ def issubset(self, other: "BuildGrain"):
442
+ return self.components.issubset(other.components)
443
+
444
+ def union(self, other: "BuildGrain"):
445
+ addresses = self.components.union(other.components)
446
+ return BuildGrain(components=addresses, where_clause=self.where_clause)
447
+
448
+ def isdisjoint(self, other: "BuildGrain"):
449
+ return self.components.isdisjoint(other.components)
450
+
451
+ def intersection(self, other: "BuildGrain") -> "BuildGrain":
452
+ intersection = self.components.intersection(other.components)
453
+ return BuildGrain(components=intersection)
454
+
455
+ def _calculate_string(self):
456
+ if self.abstract:
457
+ base = "Grain<Abstract>"
458
+ else:
459
+ base = "Grain<" + ",".join(sorted(self.components)) + ">"
460
+ if self.where_clause:
461
+ base += f"|{str(self.where_clause)}"
462
+ return base
463
+
464
+ def _calculate_string_no_condition(self):
465
+ if self.abstract:
466
+ base = "Grain<Abstract>"
467
+ else:
468
+ base = "Grain<" + ",".join(sorted(self.components)) + ">"
469
+ return base
470
+
471
+ @property
472
+ def str_no_condition(self):
473
+ if self._str_no_condition:
474
+ return self._str_no_condition
475
+ self._str_no_condition = self._calculate_string_no_condition()
476
+ return self._str_no_condition
477
+
478
+ def __str__(self):
479
+ if self._str:
480
+ return self._str
481
+ self._str = self._calculate_string()
482
+ return self._str
483
+
484
+ def __radd__(self, other) -> "BuildGrain":
485
+ if other == 0:
486
+ return self
487
+ else:
488
+ return self.__add__(other)
489
+
490
+
491
+ DEFAULT_GRAIN = BuildGrain(components=set())
492
+
493
+
494
+ @dataclass
495
+ class BuildParenthetical(DataTyped, ConstantInlineable, BuildConceptArgs):
496
+ content: "BuildExpr"
497
+
498
+ def __add__(self, other) -> Union["BuildParenthetical", "BuildConditional"]:
499
+ if other is None:
500
+ return self
501
+ elif isinstance(other, (BuildComparison, BuildConditional, BuildParenthetical)):
502
+ return BuildConditional(
503
+ left=self, right=other, operator=BooleanOperator.AND
504
+ )
505
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
506
+
507
+ def __str__(self):
508
+ return self.__repr__()
509
+
510
+ def __repr__(self):
511
+ return f"({str(self.content)})"
512
+
513
+ def inline_constant(self, BuildConcept: BuildConcept):
514
+ return BuildParenthetical(
515
+ content=(
516
+ self.content.inline_constant(BuildConcept)
517
+ if isinstance(self.content, ConstantInlineable)
518
+ else self.content
519
+ )
520
+ )
521
+
522
+ @cached_property
523
+ def concept_arguments(self) -> List[BuildConcept]:
524
+ return get_concept_arguments(self.content)
525
+
526
+ @property
527
+ def rendered_concept_arguments(self) -> Sequence[BuildConcept]:
528
+ return get_rendered_concept_arguments(self.content)
529
+
530
+ @property
531
+ def row_arguments(self) -> Sequence[BuildConcept]:
532
+ if isinstance(self.content, BuildConceptArgs):
533
+ return self.content.row_arguments
534
+ return self.concept_arguments
535
+
536
+ @property
537
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
538
+ if isinstance(self.content, BuildConceptArgs):
539
+ return self.content.existence_arguments
540
+ return []
541
+
542
+ @property
543
+ def output_datatype(self):
544
+ return arg_to_datatype(self.content)
545
+
546
+
547
+ @dataclass
548
+ class BuildConditional(DataTyped, BuildConceptArgs, ConstantInlineable):
549
+ left: Union[
550
+ int,
551
+ str,
552
+ float,
553
+ list,
554
+ bool,
555
+ MagicConstants,
556
+ BuildConcept,
557
+ BuildComparison,
558
+ BuildConditional,
559
+ BuildParenthetical,
560
+ BuildSubselectComparison,
561
+ BuildFunction,
562
+ BuildFilterItem,
563
+ ]
564
+ right: Union[
565
+ int,
566
+ str,
567
+ float,
568
+ list,
569
+ bool,
570
+ MagicConstants,
571
+ BuildConcept,
572
+ BuildComparison,
573
+ BuildConditional,
574
+ BuildParenthetical,
575
+ BuildSubselectComparison,
576
+ BuildFunction,
577
+ BuildFilterItem,
578
+ ]
579
+ operator: BooleanOperator
580
+
581
+ def __add__(self, other) -> "BuildConditional":
582
+ if other is None:
583
+ return self
584
+ elif str(other) == str(self):
585
+ return self
586
+ elif isinstance(other, (BuildComparison, BuildConditional, BuildParenthetical)):
587
+ return BuildConditional(
588
+ left=self, right=other, operator=BooleanOperator.AND
589
+ )
590
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
591
+
592
+ def __str__(self):
593
+ return self.__repr__()
594
+
595
+ def __repr__(self):
596
+ return f"{str(self.left)} {self.operator.value} {str(self.right)}"
597
+
598
+ def __eq__(self, other):
599
+ if not isinstance(other, BuildConditional):
600
+ return False
601
+ return (
602
+ self.left == other.left
603
+ and self.right == other.right
604
+ and self.operator == other.operator
605
+ )
606
+
607
+ def inline_constant(self, constant: BuildConcept) -> "BuildConditional":
608
+ assert isinstance(constant.lineage, BuildFunction)
609
+ new_val = constant.lineage.arguments[0]
610
+ if isinstance(self.left, ConstantInlineable):
611
+ new_left = self.left.inline_constant(constant)
612
+ elif (
613
+ isinstance(self.left, BuildConcept)
614
+ and self.left.address == constant.address
615
+ ):
616
+ new_left = new_val
617
+ else:
618
+ new_left = self.left
619
+
620
+ if isinstance(self.right, ConstantInlineable):
621
+ new_right = self.right.inline_constant(constant)
622
+ elif (
623
+ isinstance(self.right, BuildConcept)
624
+ and self.right.address == constant.address
625
+ ):
626
+ new_right = new_val
627
+ else:
628
+ new_right = self.right
629
+
630
+ if self.right == constant:
631
+ new_right = new_val
632
+
633
+ return BuildConditional(
634
+ left=new_left,
635
+ right=new_right,
636
+ operator=self.operator,
637
+ )
638
+
639
+ @cached_property
640
+ def concept_arguments(self) -> List[BuildConcept]:
641
+ """Return BuildConcepts directly referenced in where clause"""
642
+ output = []
643
+ output += get_concept_arguments(self.left)
644
+ output += get_concept_arguments(self.right)
645
+ return output
646
+
647
+ @property
648
+ def row_arguments(self) -> List[BuildConcept]:
649
+ output = []
650
+ output += get_concept_row_arguments(self.left)
651
+ output += get_concept_row_arguments(self.right)
652
+ return output
653
+
654
+ @property
655
+ def existence_arguments(self) -> list[tuple[BuildConcept, ...]]:
656
+ output: list[tuple[BuildConcept, ...]] = []
657
+ if isinstance(self.left, BuildConceptArgs):
658
+ output += self.left.existence_arguments
659
+ if isinstance(self.right, BuildConceptArgs):
660
+ output += self.right.existence_arguments
661
+ return output
662
+
663
+ def decompose(self):
664
+ chunks = []
665
+ if self.operator == BooleanOperator.AND:
666
+ for val in [self.left, self.right]:
667
+ if isinstance(val, BuildConditional):
668
+ chunks.extend(val.decompose())
669
+ else:
670
+ chunks.append(val)
671
+ else:
672
+ chunks.append(self)
673
+ return chunks
674
+
675
+ @property
676
+ def output_datatype(self):
677
+ return DataType.BOOL
678
+
679
+
680
+ @dataclass
681
+ class BuildWhereClause(BuildConceptArgs):
682
+ conditional: Union[
683
+ BuildSubselectComparison,
684
+ BuildComparison,
685
+ BuildConditional,
686
+ BuildParenthetical,
687
+ ]
688
+
689
+ def __eq__(self, other):
690
+ if not isinstance(other, (BuildWhereClause, WhereClause)):
691
+ return False
692
+ return self.conditional == other.conditional
693
+
694
+ def __repr__(self):
695
+ return str(self.conditional)
696
+
697
+ def __str__(self):
698
+ return self.__repr__()
699
+
700
+ @property
701
+ def concept_arguments(self) -> List[BuildConcept]:
702
+ return self.conditional.concept_arguments
703
+
704
+ @property
705
+ def row_arguments(self) -> Sequence[BuildConcept]:
706
+ return self.conditional.row_arguments
707
+
708
+ @property
709
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
710
+ return self.conditional.existence_arguments
711
+
712
+
713
+ class BuildHavingClause(BuildWhereClause):
714
+ pass
715
+
716
+
717
+ @dataclass
718
+ class BuildComparison(DataTyped, BuildConceptArgs, ConstantInlineable):
719
+
720
+ left: Union[
721
+ int,
722
+ str,
723
+ float,
724
+ bool,
725
+ datetime,
726
+ date,
727
+ BuildFunction,
728
+ BuildConcept,
729
+ BuildConditional,
730
+ DataType,
731
+ BuildComparison,
732
+ BuildParenthetical,
733
+ MagicConstants,
734
+ BuildWindowItem,
735
+ BuildAggregateWrapper,
736
+ ListWrapper,
737
+ TupleWrapper,
738
+ ]
739
+ right: Union[
740
+ int,
741
+ str,
742
+ float,
743
+ bool,
744
+ date,
745
+ datetime,
746
+ BuildConcept,
747
+ BuildFunction,
748
+ BuildConditional,
749
+ DataType,
750
+ BuildComparison,
751
+ BuildParenthetical,
752
+ MagicConstants,
753
+ BuildWindowItem,
754
+ BuildAggregateWrapper,
755
+ TupleWrapper,
756
+ ListWrapper,
757
+ ]
758
+ operator: ComparisonOperator
759
+
760
+ def __add__(self, other):
761
+ if other is None:
762
+ return self
763
+ if not isinstance(
764
+ other, (BuildComparison, BuildConditional, BuildParenthetical)
765
+ ):
766
+ raise ValueError(f"Cannot add {type(other)} to {__class__}")
767
+ if other == self:
768
+ return self
769
+ return BuildConditional(left=self, right=other, operator=BooleanOperator.AND)
770
+
771
+ def __repr__(self):
772
+ if isinstance(self.left, BuildConcept):
773
+ left = self.left.address
774
+ else:
775
+ left = str(self.left)
776
+ if isinstance(self.right, BuildConcept):
777
+ right = self.right.address
778
+ else:
779
+ right = str(self.right)
780
+ return f"{left} {self.operator.value} {right}"
781
+
782
+ def __str__(self):
783
+ return self.__repr__()
784
+
785
+ def __eq__(self, other):
786
+ if not isinstance(other, BuildComparison):
787
+ return False
788
+ return (
789
+ self.left == other.left
790
+ and self.right == other.right
791
+ and self.operator == other.operator
792
+ )
793
+
794
+ def inline_constant(self, constant: BuildConcept):
795
+ assert isinstance(constant.lineage, BuildFunction)
796
+ new_val = constant.lineage.arguments[0]
797
+ if isinstance(self.left, ConstantInlineable):
798
+ new_left = self.left.inline_constant(constant)
799
+ elif (
800
+ isinstance(self.left, BuildConcept)
801
+ and self.left.address == constant.address
802
+ ):
803
+ new_left = new_val
804
+ else:
805
+ new_left = self.left
806
+ if isinstance(self.right, ConstantInlineable):
807
+ new_right = self.right.inline_constant(constant)
808
+ elif (
809
+ isinstance(self.right, BuildConcept)
810
+ and self.right.address == constant.address
811
+ ):
812
+ new_right = new_val
813
+ else:
814
+ new_right = self.right
815
+
816
+ return self.__class__(
817
+ left=new_left,
818
+ right=new_right,
819
+ operator=self.operator,
820
+ )
821
+
822
+ @cached_property
823
+ def concept_arguments(self) -> List[BuildConcept]:
824
+ """Return BuildConcepts directly referenced in where clause"""
825
+ output = []
826
+ output += get_concept_arguments(self.left)
827
+ output += get_concept_arguments(self.right)
828
+ return output
829
+
830
+ @property
831
+ def row_arguments(self) -> List[BuildConcept]:
832
+ output = []
833
+ output += get_concept_row_arguments(self.left)
834
+ output += get_concept_row_arguments(self.right)
835
+ return output
836
+
837
+ @property
838
+ def existence_arguments(self) -> List[Tuple[BuildConcept, ...]]:
839
+ """Return BuildConcepts directly referenced in where clause"""
840
+ output: List[Tuple[BuildConcept, ...]] = []
841
+ if isinstance(self.left, BuildConceptArgs):
842
+ output += self.left.existence_arguments
843
+ if isinstance(self.right, BuildConceptArgs):
844
+ output += self.right.existence_arguments
845
+ return output
846
+
847
+ @property
848
+ def output_datatype(self):
849
+ return DataType.BOOL
850
+
851
+
852
+ @dataclass
853
+ class BuildSubselectComparison(BuildComparison):
854
+ left: Union[
855
+ int,
856
+ str,
857
+ float,
858
+ bool,
859
+ datetime,
860
+ date,
861
+ BuildFunction,
862
+ BuildConcept,
863
+ "BuildConditional",
864
+ DataType,
865
+ "BuildComparison",
866
+ "BuildParenthetical",
867
+ MagicConstants,
868
+ BuildWindowItem,
869
+ BuildAggregateWrapper,
870
+ ListWrapper,
871
+ TupleWrapper,
872
+ ]
873
+ right: Union[
874
+ int,
875
+ str,
876
+ float,
877
+ bool,
878
+ date,
879
+ datetime,
880
+ BuildConcept,
881
+ BuildFunction,
882
+ "BuildConditional",
883
+ DataType,
884
+ "BuildComparison",
885
+ "BuildParenthetical",
886
+ MagicConstants,
887
+ BuildWindowItem,
888
+ BuildAggregateWrapper,
889
+ TupleWrapper,
890
+ ListWrapper,
891
+ ]
892
+ operator: ComparisonOperator
893
+
894
+ def __eq__(self, other):
895
+ if not isinstance(other, BuildSubselectComparison):
896
+ return False
897
+
898
+ comp = (
899
+ self.left == other.left
900
+ and self.right == other.right
901
+ and self.operator == other.operator
902
+ )
903
+ return comp
904
+
905
+ @property
906
+ def row_arguments(self) -> List[BuildConcept]:
907
+ return get_concept_row_arguments(self.left)
908
+
909
+ @property
910
+ def existence_arguments(self) -> list[tuple["BuildConcept", ...]]:
911
+ return [tuple(get_concept_arguments(self.right))]
912
+
913
+
914
+ @dataclass
915
+ class BuildConcept(Addressable, BuildConceptArgs, DataTyped):
916
+ name: str
917
+ canonical_name: str
918
+ datatype: DataType | ArrayType | StructType | MapType | NumericType | TraitDataType
919
+ purpose: Purpose
920
+ build_is_aggregate: bool
921
+ derivation: Derivation = Derivation.ROOT
922
+ granularity: Granularity = Granularity.MULTI_ROW
923
+ namespace: Optional[str] = field(default=DEFAULT_NAMESPACE)
924
+ metadata: Metadata = field(
925
+ default_factory=lambda: Metadata(description=None, line_number=None),
926
+ )
927
+ lineage: Optional[
928
+ Union[
929
+ BuildFunction,
930
+ BuildWindowItem,
931
+ BuildFilterItem,
932
+ BuildAggregateWrapper,
933
+ BuildRowsetItem,
934
+ BuildMultiSelectLineage,
935
+ ]
936
+ ] = None
937
+ keys: Optional[set[str]] = None
938
+ grain: BuildGrain = field(default=None) # type: ignore
939
+ modifiers: List[Modifier] = field(default_factory=list) # type: ignore
940
+ pseudonyms: set[str] = field(default_factory=set)
941
+
942
+ @property
943
+ def is_aggregate(self) -> bool:
944
+ return self.build_is_aggregate
945
+
946
+ @cached_property
947
+ def hash(self) -> int:
948
+ return hash(
949
+ f"{self.name}+{self.datatype}+ {self.purpose} + {str(self.lineage)} + {self.namespace} + {str(self.grain)} + {str(self.keys)}"
950
+ )
951
+
952
+ def __hash__(self):
953
+ return self.hash
954
+
955
+ def __repr__(self):
956
+ base = f"{self.address}@{self.grain}"
957
+ return base
958
+
959
+ @property
960
+ def output_datatype(self):
961
+ return self.datatype
962
+
963
+ def __eq__(self, other: object):
964
+ otype = type(other)
965
+ if otype is str and self.address == other:
966
+ return True
967
+ if otype is not BuildConcept:
968
+ return False
969
+
970
+ return (
971
+ self.name == other.name # type: ignore
972
+ and self.datatype == other.datatype # type: ignore
973
+ and self.purpose == other.purpose # type: ignore
974
+ and self.namespace == other.namespace # type: ignore
975
+ and self.grain == other.grain # type: ignore
976
+ # and self.keys == other.keys
977
+ )
978
+
979
+ def __str__(self):
980
+ grain = str(self.grain) if self.grain else "Grain<>"
981
+ return f"{self.namespace}.{self.name}@{grain}"
982
+
983
+ @cached_property
984
+ def address(self) -> str:
985
+ return f"{self.namespace}.{self.name}"
986
+
987
+ @cached_property
988
+ def canonical_address(self) -> str:
989
+ return f"{self.namespace}.{self.canonical_name}"
990
+
991
+ @cached_property
992
+ def canonical_address_grain(self) -> str:
993
+ return f"{self.namespace}.{self.canonical_name}@{str(self.grain)}"
994
+
995
+ @property
996
+ def safe_address(self) -> str:
997
+ if self.namespace == DEFAULT_NAMESPACE:
998
+ return self.name.replace(".", "_")
999
+ elif self.namespace:
1000
+ return f"{self.namespace.replace('.','_')}_{self.name.replace('.','_')}"
1001
+ return self.name.replace(".", "_")
1002
+
1003
+ def with_materialized_source(self) -> Self:
1004
+
1005
+ return self.__class__(
1006
+ name=self.name,
1007
+ canonical_name=self.canonical_name,
1008
+ datatype=self.datatype,
1009
+ purpose=self.purpose,
1010
+ metadata=self.metadata,
1011
+ lineage=None,
1012
+ grain=self.grain,
1013
+ namespace=self.namespace,
1014
+ keys=self.keys,
1015
+ modifiers=self.modifiers,
1016
+ pseudonyms=self.pseudonyms,
1017
+ ## bound
1018
+ derivation=Derivation.ROOT,
1019
+ granularity=self.granularity,
1020
+ build_is_aggregate=self.build_is_aggregate,
1021
+ )
1022
+
1023
+ def with_grain(self, grain: Optional["BuildGrain"] = None) -> Self:
1024
+ grain = grain if grain else DEFAULT_GRAIN
1025
+ if grain == self.grain:
1026
+ return self
1027
+ return self.__class__(
1028
+ name=self.name,
1029
+ canonical_name=self.canonical_name,
1030
+ datatype=self.datatype,
1031
+ purpose=self.purpose,
1032
+ metadata=self.metadata,
1033
+ lineage=self.lineage,
1034
+ grain=grain if grain else DEFAULT_GRAIN,
1035
+ namespace=self.namespace,
1036
+ keys=self.keys,
1037
+ modifiers=self.modifiers,
1038
+ pseudonyms=self.pseudonyms,
1039
+ ## bound
1040
+ derivation=self.derivation,
1041
+ granularity=self.granularity,
1042
+ build_is_aggregate=self.build_is_aggregate,
1043
+ )
1044
+
1045
+ @property
1046
+ def _with_default_grain(self) -> Self:
1047
+ if self.purpose == Purpose.KEY:
1048
+ # we need to make this abstract
1049
+ grain = BuildGrain(components={self.address})
1050
+ elif self.purpose == Purpose.PROPERTY:
1051
+ components = []
1052
+ if self.keys:
1053
+ components = [*self.keys]
1054
+ if self.lineage:
1055
+ for item in self.lineage.concept_arguments:
1056
+ components += [x.address for x in item.sources]
1057
+ # TODO: set synonyms
1058
+ grain = BuildGrain(
1059
+ components=set([x for x in components]),
1060
+ ) # synonym_set=generate_BuildConcept_synonyms(components))
1061
+ elif self.purpose == Purpose.METRIC:
1062
+ grain = DEFAULT_GRAIN
1063
+ elif self.purpose == Purpose.CONSTANT:
1064
+ if self.derivation != Derivation.CONSTANT:
1065
+ grain = BuildGrain(components={self.address})
1066
+ else:
1067
+ grain = self.grain
1068
+ else:
1069
+ grain = self.grain
1070
+ if grain == self.grain:
1071
+ return self
1072
+ return self.__class__(
1073
+ name=self.name,
1074
+ canonical_name=self.canonical_name,
1075
+ datatype=self.datatype,
1076
+ purpose=self.purpose,
1077
+ metadata=self.metadata,
1078
+ lineage=self.lineage,
1079
+ grain=grain,
1080
+ keys=self.keys,
1081
+ namespace=self.namespace,
1082
+ modifiers=self.modifiers,
1083
+ pseudonyms=self.pseudonyms,
1084
+ ## bound
1085
+ derivation=self.derivation,
1086
+ granularity=self.granularity,
1087
+ build_is_aggregate=self.build_is_aggregate,
1088
+ )
1089
+
1090
+ def with_default_grain(self) -> "BuildConcept":
1091
+ return self._with_default_grain
1092
+
1093
+ @property
1094
+ def sources(self) -> List["BuildConcept"]:
1095
+ if self.lineage:
1096
+ output: List[BuildConcept] = []
1097
+
1098
+ def get_sources(
1099
+ expr: Union[
1100
+ BuildFunction,
1101
+ BuildWindowItem,
1102
+ BuildFilterItem,
1103
+ BuildAggregateWrapper,
1104
+ BuildRowsetItem,
1105
+ BuildMultiSelectLineage,
1106
+ ],
1107
+ output: List[BuildConcept],
1108
+ ):
1109
+ if isinstance(expr, BuildMultiSelectLineage):
1110
+ return
1111
+ for item in expr.concept_arguments:
1112
+ if isinstance(item, BuildConcept):
1113
+ if item.address == self.address:
1114
+ raise SyntaxError(
1115
+ f"BuildConcept {self.address} references itself"
1116
+ )
1117
+ output.append(item)
1118
+ output += item.sources
1119
+
1120
+ get_sources(self.lineage, output)
1121
+ return output
1122
+ return []
1123
+
1124
+ @property
1125
+ def concept_arguments(self) -> List[BuildConcept]:
1126
+ return self.lineage.concept_arguments if self.lineage else []
1127
+
1128
+
1129
+ @dataclass
1130
+ class BuildOrderItem(DataTyped, BuildConceptArgs):
1131
+ expr: BuildExpr
1132
+ order: Ordering
1133
+
1134
+ @cached_property
1135
+ def concept_arguments(self) -> List[BuildConcept]:
1136
+ base: List[BuildConcept] = []
1137
+ x = self.expr
1138
+ if isinstance(x, BuildConcept):
1139
+ base += [x]
1140
+ elif isinstance(x, BuildConceptArgs):
1141
+ base += x.concept_arguments
1142
+ return base
1143
+
1144
+ @property
1145
+ def row_arguments(self) -> Sequence[BuildConcept]:
1146
+ if isinstance(self.expr, BuildConceptArgs):
1147
+ return self.expr.row_arguments
1148
+ return self.concept_arguments
1149
+
1150
+ @property
1151
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
1152
+ if isinstance(self.expr, BuildConceptArgs):
1153
+ return self.expr.existence_arguments
1154
+ return []
1155
+
1156
+ @property
1157
+ def output_datatype(self):
1158
+ return arg_to_datatype(self.expr)
1159
+
1160
+
1161
+ @dataclass
1162
+ class BuildWindowItem(DataTyped, BuildConceptArgs):
1163
+ type: WindowType
1164
+ content: BuildConcept
1165
+ order_by: List[BuildOrderItem]
1166
+ over: List["BuildConcept"] = field(default_factory=list)
1167
+ index: Optional[int] = None
1168
+
1169
+ def __repr__(self) -> str:
1170
+ return f"{self.type}({self.content} {self.index}, {self.over}, {self.order_by})"
1171
+
1172
+ def __str__(self):
1173
+ return self.__repr__()
1174
+
1175
+ @cached_property
1176
+ def concept_arguments(self) -> List[BuildConcept]:
1177
+ output = [self.content]
1178
+ for order in self.order_by:
1179
+ output += order.concept_arguments
1180
+ for item in self.over:
1181
+ output += [item]
1182
+ return output
1183
+
1184
+ @property
1185
+ def output_datatype(self):
1186
+ if self.type in (WindowType.RANK, WindowType.ROW_NUMBER):
1187
+ return DataType.INTEGER
1188
+ return self.content.output_datatype
1189
+
1190
+ @property
1191
+ def output_purpose(self):
1192
+ return Purpose.PROPERTY
1193
+
1194
+
1195
+ @dataclass
1196
+ class BuildCaseWhen(DataTyped, BuildConceptArgs):
1197
+ comparison: BuildConditional | BuildSubselectComparison | BuildComparison
1198
+ expr: "BuildExpr"
1199
+
1200
+ def __str__(self):
1201
+ return self.__repr__()
1202
+
1203
+ def __repr__(self):
1204
+ return f"WHEN {str(self.comparison)} THEN {str(self.expr)}"
1205
+
1206
+ @cached_property
1207
+ def concept_arguments(self):
1208
+ return get_concept_arguments(self.comparison) + get_concept_arguments(self.expr)
1209
+
1210
+ @property
1211
+ def concept_row_arguments(self):
1212
+ return get_concept_row_arguments(self.comparison) + get_concept_row_arguments(
1213
+ self.expr
1214
+ )
1215
+
1216
+ @property
1217
+ def output_datatype(self):
1218
+ return DataType.BOOL
1219
+
1220
+
1221
+ @dataclass
1222
+ class BuildCaseElse(DataTyped, BuildConceptArgs):
1223
+ expr: "BuildExpr"
1224
+
1225
+ @cached_property
1226
+ def concept_arguments(self):
1227
+ return get_concept_arguments(self.expr)
1228
+
1229
+ def output_datatype(self):
1230
+ return arg_to_datatype(self.expr)
1231
+
1232
+
1233
+ @dataclass
1234
+ class BuildFunction(DataTyped, BuildConceptArgs):
1235
+ operator: FunctionType
1236
+ arguments: Sequence[
1237
+ Union[
1238
+ int,
1239
+ float,
1240
+ str,
1241
+ date,
1242
+ datetime,
1243
+ MapWrapper[Any, Any],
1244
+ TraitDataType,
1245
+ DataType,
1246
+ ArrayType,
1247
+ MapType,
1248
+ NumericType,
1249
+ DatePart,
1250
+ BuildConcept,
1251
+ BuildAggregateWrapper,
1252
+ BuildFunction,
1253
+ BuildWindowItem,
1254
+ BuildParenthetical,
1255
+ BuildCaseWhen,
1256
+ BuildCaseElse,
1257
+ list,
1258
+ ListWrapper[Any],
1259
+ ]
1260
+ ]
1261
+ output_data_type: (
1262
+ DataType | ArrayType | StructType | MapType | NumericType | TraitDataType
1263
+ )
1264
+ output_purpose: Purpose = field(default=Purpose.KEY)
1265
+ arg_count: int = field(default=1)
1266
+ valid_inputs: Optional[
1267
+ Union[
1268
+ Set[DataType],
1269
+ List[Set[DataType]],
1270
+ ]
1271
+ ] = None
1272
+
1273
+ def __repr__(self):
1274
+ return f'{self.operator.value}({",".join([str(a) for a in self.arguments])})'
1275
+
1276
+ def __str__(self):
1277
+ return self.__repr__()
1278
+
1279
+ @property
1280
+ def datatype(self):
1281
+ return self.output_data_type
1282
+
1283
+ @property
1284
+ def output_datatype(self):
1285
+ return self.output_data_type
1286
+
1287
+ @cached_property
1288
+ def concept_arguments(self) -> List[BuildConcept]:
1289
+ base = []
1290
+ for arg in self.arguments:
1291
+ base += get_concept_arguments(arg)
1292
+ return base
1293
+
1294
+ @property
1295
+ def rendered_concept_arguments(self) -> List[BuildConcept]:
1296
+ if self.operator == FunctionType.GROUP:
1297
+ base = self.arguments[0]
1298
+ return get_rendered_concept_arguments(base)
1299
+ base = []
1300
+ for arg in self.arguments:
1301
+ base += get_rendered_concept_arguments(arg)
1302
+ return base
1303
+
1304
+ @property
1305
+ def output_grain(self):
1306
+ # aggregates have an abstract grain
1307
+ if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
1308
+ return BuildGrain(components=[])
1309
+ # scalars have implicit grain of all arguments
1310
+ args = set()
1311
+ for input in self.concept_arguments:
1312
+ args += input.grain.components
1313
+ return BuildGrain(components=args)
1314
+
1315
+
1316
+ @dataclass
1317
+ class BuildAggregateWrapper(BuildConceptArgs, DataTyped):
1318
+ function: BuildFunction
1319
+ by: List[BuildConcept] = field(default_factory=list)
1320
+
1321
+ def __str__(self):
1322
+ grain_str = [str(c) for c in self.by] if self.by else "abstract"
1323
+ return f"{str(self.function)}<{grain_str}>"
1324
+
1325
+ @property
1326
+ def is_abstract(self):
1327
+ if not self.by:
1328
+ return True
1329
+ if all(x.name == ALL_ROWS_CONCEPT for x in self.by):
1330
+ return True
1331
+ return False
1332
+
1333
+ def with_abstract_by(self) -> "BuildAggregateWrapper":
1334
+ return BuildAggregateWrapper(function=self.function, by=[])
1335
+
1336
+ @property
1337
+ def datatype(self):
1338
+ return self.function.datatype
1339
+
1340
+ @cached_property
1341
+ def concept_arguments(self) -> List[BuildConcept]:
1342
+ return self.function.concept_arguments + self.by
1343
+
1344
+ @property
1345
+ def output_datatype(self):
1346
+ return self.function.output_datatype
1347
+
1348
+ @property
1349
+ def output_purpose(self):
1350
+ return self.function.output_purpose
1351
+
1352
+
1353
+ @dataclass
1354
+ class BuildFilterItem(BuildConceptArgs):
1355
+ content: "BuildExpr"
1356
+ where: BuildWhereClause
1357
+
1358
+ def __str__(self):
1359
+ return f"<Filter: {str(self.content)} where {str(self.where)}>"
1360
+
1361
+ @property
1362
+ def output_datatype(self):
1363
+ return arg_to_datatype(self.content)
1364
+
1365
+ @property
1366
+ def output_purpose(self):
1367
+ return self.content.purpose
1368
+
1369
+ @property
1370
+ def content_concept_arguments(self):
1371
+ if isinstance(self.content, BuildConcept):
1372
+ return [self.content]
1373
+ elif isinstance(self.content, BuildConceptArgs):
1374
+ return self.content.concept_arguments
1375
+ return []
1376
+
1377
+ @cached_property
1378
+ def concept_arguments(self):
1379
+ return self.where.concept_arguments + get_concept_arguments(self.content)
1380
+
1381
+ @property
1382
+ def rendered_concept_arguments(self):
1383
+ return (
1384
+ get_rendered_concept_arguments(self.content)
1385
+ + self.where.rendered_concept_arguments
1386
+ )
1387
+
1388
+
1389
+ @dataclass
1390
+ class BuildRowsetLineage(BuildConceptArgs):
1391
+ name: str
1392
+ derived_concepts: List[str]
1393
+ select: SelectLineage | MultiSelectLineage
1394
+
1395
+
1396
+ @dataclass
1397
+ class BuildRowsetItem(DataTyped, BuildConceptArgs):
1398
+ content: BuildConcept
1399
+ rowset: BuildRowsetLineage
1400
+
1401
+ def __repr__(self):
1402
+ return f"<Rowset<{self.rowset.name}>: {str(self.content)}>"
1403
+
1404
+ def __str__(self):
1405
+ return self.__repr__()
1406
+
1407
+ @property
1408
+ def output_datatype(self):
1409
+ return self.content.datatype
1410
+
1411
+ @property
1412
+ def output_purpose(self):
1413
+ return self.content.purpose
1414
+
1415
+ @property
1416
+ def concept_arguments(self):
1417
+ return [self.content]
1418
+
1419
+
1420
+ @dataclass
1421
+ class BuildOrderBy:
1422
+ items: List[BuildOrderItem]
1423
+
1424
+ @property
1425
+ def concept_arguments(self):
1426
+ return [x.expr for x in self.items]
1427
+
1428
+
1429
+ @dataclass
1430
+ class BuildAlignClause:
1431
+ items: List[BuildAlignItem]
1432
+
1433
+
1434
+ @dataclass
1435
+ class BuildDeriveClause:
1436
+ items: List[BuildDeriveItem]
1437
+
1438
+
1439
+ @dataclass
1440
+ class BuildDeriveItem:
1441
+ expr: BuildExpr
1442
+ name: str
1443
+ namespace: str = field(default=DEFAULT_NAMESPACE)
1444
+
1445
+ @property
1446
+ def address(self) -> str:
1447
+ return f"{self.namespace}.{self.name}"
1448
+
1449
+
1450
+ @dataclass
1451
+ class BuildSelectLineage:
1452
+ selection: List[BuildConcept]
1453
+ hidden_components: set[str]
1454
+ local_concepts: dict[str, BuildConcept]
1455
+ order_by: Optional[BuildOrderBy] = None
1456
+ limit: Optional[int] = None
1457
+ meta: Metadata = field(default_factory=lambda: Metadata())
1458
+ grain: BuildGrain = field(default_factory=BuildGrain)
1459
+ where_clause: BuildWhereClause | None = field(default=None)
1460
+ having_clause: BuildHavingClause | None = field(default=None)
1461
+
1462
+ @property
1463
+ def output_components(self) -> List[BuildConcept]:
1464
+ return self.selection
1465
+
1466
+
1467
+ @dataclass
1468
+ class BuildMultiSelectLineage(BuildConceptArgs):
1469
+ selects: List[SelectLineage]
1470
+ grain: BuildGrain
1471
+ align: BuildAlignClause
1472
+ namespace: str
1473
+ local_concepts: dict[str, BuildConcept]
1474
+ build_concept_arguments: list[BuildConcept]
1475
+ build_output_components: list[BuildConcept]
1476
+ hidden_components: set[str]
1477
+ order_by: Optional[BuildOrderBy] = None
1478
+ limit: Optional[int] = None
1479
+ where_clause: Union["BuildWhereClause", None] = field(default=None)
1480
+ having_clause: Union["BuildHavingClause", None] = field(default=None)
1481
+ derive: BuildDeriveClause | None = None
1482
+
1483
+ @property
1484
+ def derived_concepts(self) -> set[str]:
1485
+ output = set()
1486
+ for item in self.align.items:
1487
+ output.add(item.aligned_concept)
1488
+ if self.derive:
1489
+ for ditem in self.derive.items:
1490
+ output.add(ditem.address)
1491
+ return output
1492
+
1493
+ @property
1494
+ def output_components(self) -> list[BuildConcept]:
1495
+ return self.build_output_components
1496
+
1497
+ @property
1498
+ def calculated_derivations(self) -> set[str]:
1499
+ output: set[str] = set()
1500
+ if not self.derive:
1501
+ return output
1502
+ for item in self.derive.items:
1503
+ output.add(item.address)
1504
+ return output
1505
+
1506
+ @property
1507
+ def concept_arguments(self) -> list[BuildConcept]:
1508
+ return self.build_concept_arguments
1509
+
1510
+ # these are needed to help disambiguate between parents
1511
+ def get_merge_concept(self, check: BuildConcept) -> str | None:
1512
+ for item in self.align.items:
1513
+ if check in item.concepts_lcl:
1514
+ return f"{item.namespace}.{item.alias}"
1515
+ return None
1516
+
1517
+ def find_source(self, concept: BuildConcept, cte: CTE | UnionCTE) -> BuildConcept:
1518
+ for x in self.align.items:
1519
+ if concept.name == x.alias:
1520
+ for c in x.concepts:
1521
+ if c.address in cte.output_lcl:
1522
+ return c
1523
+
1524
+ raise SyntaxError(
1525
+ f"Could not find upstream map for multiselect {str(concept)} on cte ({cte})"
1526
+ )
1527
+
1528
+
1529
+ @dataclass
1530
+ class BuildAlignItem:
1531
+ alias: str
1532
+ concepts: List[BuildConcept]
1533
+ namespace: str = field(default=DEFAULT_NAMESPACE)
1534
+
1535
+ @cached_property
1536
+ def concepts_lcl(self) -> LooseBuildConceptList:
1537
+ return LooseBuildConceptList(concepts=self.concepts)
1538
+
1539
+ @property
1540
+ def aligned_concept(self) -> str:
1541
+ return f"{self.namespace}.{self.alias}"
1542
+
1543
+
1544
+ @dataclass
1545
+ class BuildColumnAssignment:
1546
+ alias: str | RawColumnExpr | BuildFunction | BuildAggregateWrapper
1547
+ concept: BuildConcept
1548
+ modifiers: set[Modifier] = field(default_factory=set)
1549
+
1550
+ @property
1551
+ def is_complete(self) -> bool:
1552
+ return Modifier.PARTIAL not in self.modifiers
1553
+
1554
+ @property
1555
+ def is_nullable(self) -> bool:
1556
+ return Modifier.NULLABLE in self.modifiers
1557
+
1558
+
1559
+ @dataclass
1560
+ class BuildDatasource:
1561
+ name: str
1562
+ columns: List[BuildColumnAssignment]
1563
+ address: Union[Address, str]
1564
+ grain: BuildGrain = field(
1565
+ default_factory=lambda: DEFAULT_GRAIN,
1566
+ )
1567
+ namespace: Optional[str] = field(default=DEFAULT_NAMESPACE)
1568
+ metadata: DatasourceMetadata = field(
1569
+ default_factory=lambda: DatasourceMetadata(freshness_concept=None)
1570
+ )
1571
+ where: Optional[BuildWhereClause] = None
1572
+ non_partial_for: Optional[BuildWhereClause] = None
1573
+
1574
+ def __hash__(self):
1575
+ return self.identifier.__hash__()
1576
+
1577
+ def __add__(self, other):
1578
+ if not other == self:
1579
+ raise ValueError(
1580
+ "Attempted to add two datasources that are not identical, this is not a valid operation"
1581
+ )
1582
+ return self
1583
+
1584
+ @property
1585
+ def condition(self):
1586
+ return None
1587
+
1588
+ @property
1589
+ def can_be_inlined(self) -> bool:
1590
+ if isinstance(self.address, Address) and (
1591
+ self.address.is_query or self.address.is_file
1592
+ ):
1593
+ return False
1594
+ return True
1595
+
1596
+ @property
1597
+ def identifier(self) -> str:
1598
+ if not self.namespace or self.namespace == DEFAULT_NAMESPACE:
1599
+ return self.name
1600
+ return f"{self.namespace}.{self.name}"
1601
+
1602
+ @property
1603
+ def safe_identifier(self) -> str:
1604
+ return self.identifier.replace(".", "_")
1605
+
1606
+ @property
1607
+ def concepts(self) -> List[BuildConcept]:
1608
+ return [c.concept for c in self.columns]
1609
+
1610
+ @property
1611
+ def group_required(self):
1612
+ return False
1613
+
1614
+ @property
1615
+ def output_concepts(self) -> List[BuildConcept]:
1616
+ return self.concepts
1617
+
1618
+ @property
1619
+ def full_concepts(self) -> List[BuildConcept]:
1620
+ return [c.concept for c in self.columns if Modifier.PARTIAL not in c.modifiers]
1621
+
1622
+ @property
1623
+ def nullable_concepts(self) -> List[BuildConcept]:
1624
+ return [c.concept for c in self.columns if Modifier.NULLABLE in c.modifiers]
1625
+
1626
+ @property
1627
+ def hidden_concepts(self) -> List[BuildConcept]:
1628
+ return [c.concept for c in self.columns if Modifier.HIDDEN in c.modifiers]
1629
+
1630
+ @property
1631
+ def partial_concepts(self) -> List[BuildConcept]:
1632
+ return [c.concept for c in self.columns if Modifier.PARTIAL in c.modifiers]
1633
+
1634
+ def get_alias(
1635
+ self,
1636
+ concept: BuildConcept,
1637
+ use_raw_name: bool = True,
1638
+ force_alias: bool = False,
1639
+ ) -> Optional[str | RawColumnExpr] | BuildFunction | BuildAggregateWrapper:
1640
+ # 2022-01-22
1641
+ # this logic needs to be refined.
1642
+ # if concept.lineage:
1643
+ # # return None
1644
+ for x in self.columns:
1645
+ if (
1646
+ x.concept.canonical_address == concept.canonical_address
1647
+ or x.concept == concept
1648
+ or x.concept.with_grain(concept.grain) == concept
1649
+ ):
1650
+ if use_raw_name:
1651
+ return x.alias
1652
+ return concept.safe_address
1653
+ existing = [str(c.concept) for c in self.columns]
1654
+ raise ValueError(
1655
+ f"{LOGGER_PREFIX} Concept {concept} not found on {self.identifier}; have"
1656
+ f" {existing}."
1657
+ )
1658
+
1659
+ @property
1660
+ def safe_location(self) -> str:
1661
+ if isinstance(self.address, Address):
1662
+ return self.address.location
1663
+ return self.address
1664
+
1665
+ @property
1666
+ def output_lcl(self) -> LooseBuildConceptList:
1667
+ return LooseBuildConceptList(concepts=self.output_concepts)
1668
+
1669
+ @property
1670
+ def is_union(self) -> bool:
1671
+ return False
1672
+
1673
+
1674
+ @dataclass
1675
+ class BuildUnionDatasource:
1676
+ children: List[BuildDatasource]
1677
+
1678
+ def is_union(self) -> bool:
1679
+ return True
1680
+
1681
+ @property
1682
+ def columns(self) -> List[BuildColumnAssignment]:
1683
+ return self.children[0].columns
1684
+
1685
+ @property
1686
+ def grain(self) -> BuildGrain:
1687
+ return reduce(lambda x, y: x.union(y.grain), self.children, BuildGrain())
1688
+
1689
+ @property
1690
+ def non_partial_for(self) -> Optional[BuildWhereClause]:
1691
+ return None
1692
+
1693
+ @property
1694
+ def partial_concepts(self) -> List[BuildConcept]:
1695
+ return []
1696
+
1697
+
1698
+ BuildExpr = (
1699
+ BuildWindowItem
1700
+ | BuildFilterItem
1701
+ | BuildConcept
1702
+ | BuildComparison
1703
+ | BuildConditional
1704
+ | BuildParenthetical
1705
+ | BuildFunction
1706
+ | BuildAggregateWrapper
1707
+ | bool
1708
+ | MagicConstants
1709
+ | int
1710
+ | str
1711
+ | float
1712
+ | list
1713
+ )
1714
+
1715
+
1716
+ def get_canonical_pseudonyms(environment: Environment) -> dict[str, set[str]]:
1717
+ roots: dict[str, set[str]] = defaultdict(set)
1718
+ for k, v in environment.concepts.items():
1719
+ roots[v.address].add(k)
1720
+ for x in v.pseudonyms:
1721
+ roots[v.address].add(x)
1722
+ for k, v in environment.alias_origin_lookup.items():
1723
+ lookup = environment.concepts[k].address
1724
+ roots[lookup].add(v.address)
1725
+ for x2 in v.pseudonyms:
1726
+ roots[lookup].add(x2)
1727
+ return roots
1728
+
1729
+
1730
+ def requires_concept_nesting(
1731
+ expr,
1732
+ ) -> AggregateWrapper | WindowItem | FilterItem | Function | None:
1733
+ if isinstance(expr, (AggregateWrapper, WindowItem, FilterItem)):
1734
+ return expr
1735
+ if isinstance(expr, Function) and expr.operator in (
1736
+ FunctionType.GROUP,
1737
+ FunctionType.PARENTHETICAL,
1738
+ ):
1739
+ # group by requires nesting
1740
+ return expr
1741
+ return None
1742
+
1743
+
1744
+ class Factory:
1745
+
1746
+ def __init__(
1747
+ self,
1748
+ environment: Environment,
1749
+ local_concepts: dict[str, BuildConcept] | None = None,
1750
+ grain: Grain | None = None,
1751
+ pseudonym_map: dict[str, set[str]] | None = None,
1752
+ build_cache: dict[str, BuildConcept] | None = None,
1753
+ ):
1754
+ self.grain = grain or Grain()
1755
+ self.environment = environment
1756
+ self.local_concepts: dict[str, BuildConcept] = (
1757
+ {} if local_concepts is None else local_concepts
1758
+ )
1759
+ self.local_non_build_concepts: dict[str, Concept] = {}
1760
+ self.pseudonym_map = pseudonym_map or get_canonical_pseudonyms(environment)
1761
+ self.build_cache = build_cache or {}
1762
+ self.build_grain = self.build(self.grain) if self.grain else None
1763
+
1764
+ def instantiate_concept(
1765
+ self,
1766
+ arg: (
1767
+ AggregateWrapper
1768
+ | FunctionCallWrapper
1769
+ | WindowItem
1770
+ | FilterItem
1771
+ | Function
1772
+ | ListWrapper
1773
+ | MapWrapper
1774
+ | int
1775
+ | float
1776
+ | str
1777
+ | date
1778
+ ),
1779
+ ) -> tuple[Concept, BuildConcept]:
1780
+ from trilogy.parsing.common import arbitrary_to_concept, generate_concept_name
1781
+
1782
+ name = generate_concept_name(arg)
1783
+ if name in self.local_concepts and name in self.local_non_build_concepts:
1784
+ # if we already have this concept, return it
1785
+ return self.local_non_build_concepts[name], self.local_concepts[name]
1786
+ new = arbitrary_to_concept(
1787
+ arg,
1788
+ environment=self.environment,
1789
+ )
1790
+ built = self._build_concept(new)
1791
+ self.local_concepts[name] = built
1792
+ self.local_non_build_concepts[name] = new
1793
+ return new, built
1794
+
1795
+ @singledispatchmethod
1796
+ def build(self, base):
1797
+ raise NotImplementedError("Cannot build {}".format(type(base)))
1798
+
1799
+ @build.register
1800
+ def _(
1801
+ self,
1802
+ base: (
1803
+ int
1804
+ | str
1805
+ | float
1806
+ | list
1807
+ | date
1808
+ | TupleWrapper
1809
+ | ListWrapper
1810
+ | MagicConstants
1811
+ | MapWrapper
1812
+ | DataType
1813
+ | DatePart
1814
+ | NumericType
1815
+ ),
1816
+ ) -> (
1817
+ int
1818
+ | str
1819
+ | float
1820
+ | list
1821
+ | date
1822
+ | TupleWrapper
1823
+ | ListWrapper
1824
+ | MagicConstants
1825
+ | MapWrapper
1826
+ | DataType
1827
+ | DatePart
1828
+ | NumericType
1829
+ ):
1830
+ return self._build_primitive(base)
1831
+
1832
+ def _build_primitive(self, base):
1833
+ return base
1834
+
1835
+ @build.register
1836
+ def _(self, base: None) -> None:
1837
+ return self._build_none(base)
1838
+
1839
+ def _build_none(self, base):
1840
+ return base
1841
+
1842
+ @build.register
1843
+ def _(self, base: Function) -> BuildFunction | BuildAggregateWrapper:
1844
+ return self._build_function(base)
1845
+
1846
+ def _build_function(self, base: Function) -> BuildFunction | BuildAggregateWrapper:
1847
+ raw_args: list[Concept | FuncArgs] = []
1848
+ for arg in base.arguments:
1849
+ # to do proper discovery, we need to inject virtual intermediate concepts
1850
+ # we don't use requires_concept_nesting here by design
1851
+ if isinstance(arg, (AggregateWrapper, FilterItem, WindowItem)):
1852
+ narg, _ = self.instantiate_concept(arg)
1853
+ raw_args.append(narg)
1854
+ else:
1855
+ raw_args.append(arg)
1856
+ if base.operator == FunctionType.GROUP:
1857
+ group_base = raw_args[0]
1858
+ final_args: List[Concept | ConceptRef] = []
1859
+ if isinstance(group_base, ConceptRef):
1860
+ if group_base.address in self.environment.concepts and not isinstance(
1861
+ self.environment.concepts[group_base.address], UndefinedConcept
1862
+ ):
1863
+ group_base = self.environment.concepts[group_base.address]
1864
+ if (
1865
+ isinstance(group_base, Concept)
1866
+ and isinstance(group_base.lineage, AggregateWrapper)
1867
+ and not group_base.lineage.by
1868
+ ):
1869
+ arguments = raw_args[1:]
1870
+ for x in arguments:
1871
+ if isinstance(x, (ConceptRef, Concept)):
1872
+ final_args.append(x)
1873
+ else:
1874
+ # constants, etc, can be ignored for group
1875
+ continue
1876
+ _, rval = self.instantiate_concept(
1877
+ AggregateWrapper(
1878
+ function=group_base.lineage.function,
1879
+ by=final_args,
1880
+ )
1881
+ )
1882
+
1883
+ return BuildFunction(
1884
+ operator=base.operator,
1885
+ arguments=[
1886
+ rval,
1887
+ *[self.handle_constant(self.build(c)) for c in raw_args[1:]],
1888
+ ],
1889
+ output_data_type=base.output_datatype,
1890
+ output_purpose=base.output_purpose,
1891
+ valid_inputs=base.valid_inputs,
1892
+ arg_count=base.arg_count,
1893
+ )
1894
+ new = BuildFunction(
1895
+ operator=base.operator,
1896
+ arguments=[self.handle_constant(self.build(c)) for c in raw_args],
1897
+ output_data_type=base.output_datatype,
1898
+ output_purpose=base.output_purpose,
1899
+ valid_inputs=base.valid_inputs,
1900
+ arg_count=base.arg_count,
1901
+ )
1902
+ return new
1903
+
1904
+ @build.register
1905
+ def _(self, base: ConceptRef) -> BuildConcept:
1906
+ return self._build_concept_ref(base)
1907
+
1908
+ def _build_concept_ref(self, base: ConceptRef) -> BuildConcept:
1909
+ if base.address in self.local_concepts:
1910
+ full = self.local_concepts[base.address]
1911
+ if isinstance(full, BuildConcept):
1912
+ return full
1913
+ if base.address in self.environment.concepts:
1914
+ raw = self.environment.concepts[base.address]
1915
+ return self._build_concept(raw)
1916
+ # this will error by design - TODO - more helpful message?
1917
+ return self._build_concept(self.environment.concepts[base.address])
1918
+
1919
+ @build.register
1920
+ def _(self, base: CaseWhen) -> BuildCaseWhen:
1921
+ return self._build_case_when(base)
1922
+
1923
+ def _build_case_when(self, base: CaseWhen) -> BuildCaseWhen:
1924
+ expr: Concept | FuncArgs = base.expr
1925
+ validation = requires_concept_nesting(expr)
1926
+ if validation:
1927
+ expr, _ = self.instantiate_concept(validation)
1928
+ return BuildCaseWhen(
1929
+ comparison=self.build(base.comparison),
1930
+ expr=self.build(expr),
1931
+ )
1932
+
1933
+ @build.register
1934
+ def _(self, base: CaseElse) -> BuildCaseElse:
1935
+ return self._build_case_else(base)
1936
+
1937
+ def _build_case_else(self, base: CaseElse) -> BuildCaseElse:
1938
+ expr: Concept | FuncArgs = base.expr
1939
+ validation = requires_concept_nesting(expr)
1940
+ if validation:
1941
+ expr, _ = self.instantiate_concept(validation)
1942
+ return BuildCaseElse(expr=self.build(expr))
1943
+
1944
+ @build.register
1945
+ def _(self, base: Concept) -> BuildConcept:
1946
+ return self._build_concept(base)
1947
+
1948
+ def _build_concept(self, base: Concept) -> BuildConcept:
1949
+ try:
1950
+ return self.__build_concept(base)
1951
+ except RecursionError as e:
1952
+ raise RecursionError(
1953
+ f"Recursion error building concept {base.address} with grain {base.grain} and lineage {base.lineage}. This is likely due to a circular reference."
1954
+ ) from e
1955
+
1956
+ def __build_concept(self, base: Concept) -> BuildConcept:
1957
+ # TODO: if we are using parameters, wrap it in a new model and use that in rendering
1958
+ # this doesn't work for persisted addresses.
1959
+ # we need to early exit if we have it in local concepts, because in that case,
1960
+ # it is built with appropriate grain only in that dictionary
1961
+
1962
+ if base.address in self.local_concepts:
1963
+ return self.local_concepts[base.address]
1964
+ new_lineage, final_grain, _ = base.get_select_grain_and_keys(
1965
+ self.grain, self.environment
1966
+ )
1967
+
1968
+ if new_lineage:
1969
+ build_lineage = self.build(new_lineage)
1970
+ else:
1971
+ build_lineage = None
1972
+ canonical_name = (
1973
+ generate_concept_name(build_lineage) if build_lineage else base.name
1974
+ )
1975
+ cache_address = (
1976
+ f"{base.namespace}.{base.address}.{canonical_name}.{str(final_grain)}"
1977
+ )
1978
+ if cache_address in self.build_cache:
1979
+ return self.build_cache[cache_address]
1980
+
1981
+ new_grain = self._build_grain(final_grain)
1982
+
1983
+ derivation = Concept.calculate_derivation(build_lineage, base.purpose)
1984
+
1985
+ granularity = Concept.calculate_granularity(
1986
+ derivation, final_grain, build_lineage
1987
+ )
1988
+
1989
+ def calculate_is_aggregate(lineage):
1990
+ ltype = type(lineage)
1991
+ if ltype is BuildFunction:
1992
+ if lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
1993
+ return True
1994
+ if ltype is BuildAggregateWrapper:
1995
+ if lineage.function.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
1996
+ return True
1997
+ return False
1998
+
1999
+ is_aggregate = calculate_is_aggregate(build_lineage) if build_lineage else False
2000
+ # if this is a pseudonym, we need to look up the base address
2001
+ if base.address in self.environment.alias_origin_lookup:
2002
+ lookup_address = self.environment.concepts[base.address].address
2003
+ # map only to the canonical concept, not to other merged concepts
2004
+ base_pseudonyms = {lookup_address}
2005
+ else:
2006
+ base_pseudonyms = {
2007
+ x
2008
+ for x in self.pseudonym_map.get(base.address, set())
2009
+ if x != base.address
2010
+ }
2011
+
2012
+ rval = BuildConcept(
2013
+ name=base.name,
2014
+ canonical_name=canonical_name,
2015
+ datatype=base.datatype,
2016
+ purpose=base.purpose,
2017
+ metadata=base.metadata,
2018
+ lineage=build_lineage,
2019
+ grain=new_grain,
2020
+ namespace=base.namespace,
2021
+ keys=base.keys,
2022
+ modifiers=base.modifiers,
2023
+ pseudonyms=base_pseudonyms,
2024
+ ## instantiated values
2025
+ derivation=derivation,
2026
+ granularity=granularity,
2027
+ build_is_aggregate=is_aggregate,
2028
+ )
2029
+
2030
+ if base.address in self.local_concepts:
2031
+ return self.local_concepts[base.address]
2032
+ self.local_concepts[base.address] = rval
2033
+ # this is a global cache that can be reused across Factory instances
2034
+ self.build_cache[cache_address] = rval
2035
+ return rval
2036
+
2037
+ @build.register
2038
+ def _(self, base: AggregateWrapper) -> BuildAggregateWrapper:
2039
+ return self._build_aggregate_wrapper(base)
2040
+
2041
+ def _build_aggregate_wrapper(self, base: AggregateWrapper) -> BuildAggregateWrapper:
2042
+ if not base.by:
2043
+ by = [
2044
+ self._build_concept(self.environment.concepts[c])
2045
+ for c in self.grain.components
2046
+ ]
2047
+ else:
2048
+ by = [self.build(x) for x in base.by]
2049
+
2050
+ parent: BuildFunction = self._build_function(base.function) # type: ignore
2051
+ return BuildAggregateWrapper(
2052
+ function=parent, by=sorted(by, key=lambda x: x.address)
2053
+ )
2054
+
2055
+ @build.register
2056
+ def _(self, base: ColumnAssignment) -> BuildColumnAssignment:
2057
+ return self._build_column_assignment(base)
2058
+
2059
+ def _build_column_assignment(self, base: ColumnAssignment) -> BuildColumnAssignment:
2060
+ address = base.concept.address
2061
+ fetched = (
2062
+ self._build_concept(
2063
+ self.environment.alias_origin_lookup[address]
2064
+ ).with_grain(self.build_grain)
2065
+ if address in self.environment.alias_origin_lookup
2066
+ else self._build_concept(self.environment.concepts[address]).with_grain(
2067
+ self.build_grain
2068
+ )
2069
+ )
2070
+
2071
+ return BuildColumnAssignment(
2072
+ alias=(
2073
+ self._build_function(base.alias)
2074
+ if isinstance(base.alias, Function)
2075
+ else base.alias
2076
+ ),
2077
+ concept=fetched,
2078
+ modifiers=set(base.modifiers),
2079
+ )
2080
+
2081
+ @build.register
2082
+ def _(self, base: OrderBy) -> BuildOrderBy:
2083
+ return self._build_order_by(base)
2084
+
2085
+ def _build_order_by(self, base: OrderBy) -> BuildOrderBy:
2086
+ return BuildOrderBy(items=[self._build_order_item(x) for x in base.items])
2087
+
2088
+ @build.register
2089
+ def _(self, base: FunctionCallWrapper) -> BuildExpr:
2090
+ return self._build_function_call_wrapper(base)
2091
+
2092
+ def _build_function_call_wrapper(self, base: FunctionCallWrapper) -> BuildExpr:
2093
+ # function calls are kept around purely for the parse tree
2094
+ # so discard at the build point
2095
+ return self.build(base.content)
2096
+
2097
+ @build.register
2098
+ def _(self, base: OrderItem) -> BuildOrderItem:
2099
+ return self._build_order_item(base)
2100
+
2101
+ def _build_order_item(self, base: OrderItem) -> BuildOrderItem:
2102
+ bexpr: Any
2103
+ validation = requires_concept_nesting(base.expr)
2104
+ if validation:
2105
+ bexpr, _ = self.instantiate_concept(validation)
2106
+ else:
2107
+ bexpr = base.expr
2108
+ return BuildOrderItem(
2109
+ expr=(self.build(bexpr)),
2110
+ order=base.order,
2111
+ )
2112
+
2113
+ @build.register
2114
+ def _(self, base: WhereClause) -> BuildWhereClause:
2115
+ return self._build_where_clause(base)
2116
+
2117
+ def _build_where_clause(self, base: WhereClause) -> BuildWhereClause:
2118
+ return BuildWhereClause(conditional=self.build(base.conditional))
2119
+
2120
+ @build.register
2121
+ def _(self, base: HavingClause) -> BuildHavingClause:
2122
+ return self._build_having_clause(base)
2123
+
2124
+ def _build_having_clause(self, base: HavingClause) -> BuildHavingClause:
2125
+ return BuildHavingClause(conditional=self.build(base.conditional))
2126
+
2127
+ @build.register
2128
+ def _(self, base: WindowItem) -> BuildWindowItem:
2129
+ return self._build_window_item(base)
2130
+
2131
+ def _build_window_item(self, base: WindowItem) -> BuildWindowItem:
2132
+ content: Concept | FuncArgs = base.content
2133
+ validation = requires_concept_nesting(base.content)
2134
+ if validation:
2135
+ content, _ = self.instantiate_concept(validation)
2136
+ final_by = []
2137
+ for x in base.order_by:
2138
+ if (
2139
+ isinstance(x.expr, AggregateWrapper)
2140
+ and not x.expr.by
2141
+ and isinstance(content, (ConceptRef, Concept))
2142
+ ):
2143
+ x.expr.by = [content]
2144
+ final_by.append(x)
2145
+ return BuildWindowItem(
2146
+ type=base.type,
2147
+ content=self.build(content),
2148
+ order_by=[self.build(x) for x in final_by],
2149
+ over=[self._build_concept_ref(x) for x in base.over],
2150
+ index=base.index,
2151
+ )
2152
+
2153
+ @build.register
2154
+ def _(self, base: Conditional) -> BuildConditional:
2155
+ return self._build_conditional(base)
2156
+
2157
+ def _build_conditional(self, base: Conditional) -> BuildConditional:
2158
+ return BuildConditional(
2159
+ left=self.handle_constant(self.build(base.left)),
2160
+ right=self.handle_constant(self.build(base.right)),
2161
+ operator=base.operator,
2162
+ )
2163
+
2164
+ @build.register
2165
+ def _(self, base: SubselectComparison) -> BuildSubselectComparison:
2166
+ return self._build_subselect_comparison(base)
2167
+
2168
+ def _build_subselect_comparison(
2169
+ self, base: SubselectComparison
2170
+ ) -> BuildSubselectComparison:
2171
+ right: Any = base.right
2172
+ # this has specialized logic - include all Functions
2173
+ if isinstance(base.right, (AggregateWrapper, WindowItem, FilterItem, Function)):
2174
+ right_c, _ = self.instantiate_concept(base.right)
2175
+ right = right_c
2176
+ return BuildSubselectComparison(
2177
+ left=self.handle_constant(self.build(base.left)),
2178
+ right=self.handle_constant(self.build(right)),
2179
+ operator=base.operator,
2180
+ )
2181
+
2182
+ @build.register
2183
+ def _(self, base: Comparison) -> BuildComparison:
2184
+ return self._build_comparison(base)
2185
+
2186
+ def _build_comparison(self, base: Comparison) -> BuildComparison:
2187
+ left = base.left
2188
+ validation = requires_concept_nesting(base.left)
2189
+ if validation:
2190
+ left_c, _ = self.instantiate_concept(validation)
2191
+ left = left_c # type: ignore
2192
+ right = base.right
2193
+ validation = requires_concept_nesting(base.right)
2194
+ if validation:
2195
+ right_c, _ = self.instantiate_concept(validation)
2196
+ right = right_c # type: ignore
2197
+ return BuildComparison(
2198
+ left=self.handle_constant(self.build(left)),
2199
+ right=self.handle_constant(self.build(right)),
2200
+ operator=base.operator,
2201
+ )
2202
+
2203
+ @build.register
2204
+ def _(self, base: AlignItem) -> BuildAlignItem:
2205
+ return self._build_align_item(base)
2206
+
2207
+ def _build_align_item(self, base: AlignItem) -> BuildAlignItem:
2208
+ return BuildAlignItem(
2209
+ alias=base.alias,
2210
+ concepts=[self._build_concept_ref(x) for x in base.concepts],
2211
+ namespace=base.namespace,
2212
+ )
2213
+
2214
+ @build.register
2215
+ def _(self, base: AlignClause) -> BuildAlignClause:
2216
+ return self._build_align_clause(base)
2217
+
2218
+ def _build_align_clause(self, base: AlignClause) -> BuildAlignClause:
2219
+ return BuildAlignClause(items=[self._build_align_item(x) for x in base.items])
2220
+
2221
+ @build.register
2222
+ def _(self, base: DeriveItem) -> BuildDeriveItem:
2223
+ return self._build_derive_item(base)
2224
+
2225
+ def _build_derive_item(self, base: DeriveItem) -> BuildDeriveItem:
2226
+ expr: Concept | FuncArgs = base.expr
2227
+ validation = requires_concept_nesting(expr)
2228
+ if validation:
2229
+ expr, _ = self.instantiate_concept(validation)
2230
+ return BuildDeriveItem(
2231
+ expr=self.build(expr),
2232
+ name=base.name,
2233
+ namespace=base.namespace,
2234
+ )
2235
+
2236
+ @build.register
2237
+ def _(self, base: DeriveClause) -> BuildDeriveClause:
2238
+ return self._build_derive_clause(base)
2239
+
2240
+ def _build_derive_clause(self, base: DeriveClause) -> BuildDeriveClause:
2241
+ return BuildDeriveClause(items=[self.build(x) for x in base.items])
2242
+
2243
+ @build.register
2244
+ def _(self, base: RowsetItem) -> BuildRowsetItem:
2245
+ return self._build_rowset_item(base)
2246
+
2247
+ def _build_rowset_item(self, base: RowsetItem) -> BuildRowsetItem:
2248
+ factory = Factory(
2249
+ environment=self.environment,
2250
+ local_concepts={},
2251
+ grain=base.rowset.select.grain,
2252
+ pseudonym_map=self.pseudonym_map,
2253
+ )
2254
+ return BuildRowsetItem(
2255
+ content=factory._build_concept_ref(base.content),
2256
+ rowset=factory._build_rowset_lineage(base.rowset),
2257
+ )
2258
+
2259
+ @build.register
2260
+ def _(self, base: RowsetLineage) -> BuildRowsetLineage:
2261
+ return self._build_rowset_lineage(base)
2262
+
2263
+ def _build_rowset_lineage(self, base: RowsetLineage) -> BuildRowsetLineage:
2264
+ out = BuildRowsetLineage(
2265
+ name=base.name,
2266
+ derived_concepts=[x.address for x in base.derived_concepts],
2267
+ select=base.select,
2268
+ )
2269
+ return out
2270
+
2271
+ @build.register
2272
+ def _(self, base: Grain) -> BuildGrain:
2273
+ return self._build_grain(base)
2274
+
2275
+ def _build_grain(self, base: Grain) -> BuildGrain:
2276
+ if base.where_clause:
2277
+ factory = Factory(
2278
+ environment=self.environment,
2279
+ pseudonym_map=self.pseudonym_map,
2280
+ build_cache=self.build_cache,
2281
+ )
2282
+ where = factory._build_where_clause(base.where_clause)
2283
+ else:
2284
+ where = None
2285
+ return BuildGrain(components=base.components, where_clause=where)
2286
+
2287
+ @build.register
2288
+ def _(self, base: TupleWrapper) -> TupleWrapper:
2289
+ return self._build_tuple_wrapper(base)
2290
+
2291
+ def _build_tuple_wrapper(self, base: TupleWrapper) -> TupleWrapper:
2292
+ return TupleWrapper(val=[self.build(x) for x in base.val], type=base.type)
2293
+
2294
+ @build.register
2295
+ def _(self, base: ListWrapper) -> ListWrapper:
2296
+ return self._build_list_wrapper(base)
2297
+
2298
+ def _build_list_wrapper(self, base: ListWrapper) -> ListWrapper:
2299
+ return ListWrapper([self.build(x) for x in base], type=base.type)
2300
+
2301
+ @build.register
2302
+ def _(self, base: FilterItem) -> BuildFilterItem:
2303
+ return self._build_filter_item(base)
2304
+
2305
+ def _build_filter_item(self, base: FilterItem) -> BuildFilterItem:
2306
+ if isinstance(
2307
+ base.content, (Function, AggregateWrapper, WindowItem, FilterItem)
2308
+ ):
2309
+ _, built = self.instantiate_concept(base.content)
2310
+ return BuildFilterItem(content=built, where=self.build(base.where))
2311
+ return BuildFilterItem(
2312
+ content=self.build(base.content), where=self.build(base.where)
2313
+ )
2314
+
2315
+ @build.register
2316
+ def _(self, base: Parenthetical) -> BuildParenthetical:
2317
+ return self._build_parenthetical(base)
2318
+
2319
+ def _build_parenthetical(self, base: Parenthetical) -> BuildParenthetical:
2320
+ validate = requires_concept_nesting(base.content)
2321
+ if validate:
2322
+ content, _ = self.instantiate_concept(validate)
2323
+ return BuildParenthetical(content=self.build(content))
2324
+ else:
2325
+ return BuildParenthetical(content=self.build(base.content))
2326
+
2327
+ @build.register
2328
+ def _(self, base: SelectLineage) -> BuildSelectLineage:
2329
+ return self._build_select_lineage(base)
2330
+
2331
+ def _build_select_lineage(self, base: SelectLineage) -> BuildSelectLineage:
2332
+ from trilogy.core.models.build import (
2333
+ BuildSelectLineage,
2334
+ Factory,
2335
+ )
2336
+
2337
+ materialized: dict[str, BuildConcept] = {}
2338
+ factory = Factory(
2339
+ grain=base.grain,
2340
+ environment=self.environment,
2341
+ local_concepts=materialized,
2342
+ pseudonym_map=self.pseudonym_map,
2343
+ build_cache=self.build_cache,
2344
+ )
2345
+ for k, v in base.local_concepts.items():
2346
+
2347
+ materialized[k] = factory.build(v)
2348
+ where_factory = Factory(
2349
+ grain=Grain(),
2350
+ environment=self.environment,
2351
+ local_concepts={},
2352
+ pseudonym_map=self.pseudonym_map,
2353
+ build_cache=self.build_cache,
2354
+ )
2355
+ where_clause = (
2356
+ where_factory.build(base.where_clause) if base.where_clause else None
2357
+ )
2358
+ # if the where clause derives new concepts
2359
+ # we need to ensure these are accessible from the general factory
2360
+ # post resolution
2361
+ for bk, bv in where_factory.local_concepts.items():
2362
+ # but do not override any local cahced grains
2363
+ if bk in materialized:
2364
+ continue
2365
+ materialized[bk] = bv
2366
+ final: List[BuildConcept] = []
2367
+ for original in base.selection:
2368
+ new = original
2369
+ # we don't know the grain of an aggregate at assignment time
2370
+ # so rebuild at this point in the tree
2371
+ # TODO: simplify
2372
+ if new.address in materialized:
2373
+ built = materialized[new.address]
2374
+ else:
2375
+ # Sometimes cached values here don't have the latest info
2376
+ # but we can't just use environment, as it might not have the right grain.
2377
+ built = factory.build(new)
2378
+ materialized[new.address] = built
2379
+ final.append(built)
2380
+ return BuildSelectLineage(
2381
+ selection=final,
2382
+ hidden_components=base.hidden_components,
2383
+ order_by=(factory.build(base.order_by) if base.order_by else None),
2384
+ limit=base.limit,
2385
+ meta=base.meta,
2386
+ local_concepts=materialized,
2387
+ grain=self.build(base.grain),
2388
+ having_clause=(
2389
+ factory.build(base.having_clause) if base.having_clause else None
2390
+ ),
2391
+ # this uses a different grain factory
2392
+ where_clause=where_clause,
2393
+ )
2394
+
2395
+ @build.register
2396
+ def _(self, base: MultiSelectLineage) -> BuildMultiSelectLineage:
2397
+ return self._build_multi_select_lineage(base)
2398
+
2399
+ def _build_multi_select_lineage(
2400
+ self, base: MultiSelectLineage
2401
+ ) -> BuildMultiSelectLineage:
2402
+ local_build_cache: dict[str, BuildConcept] = {}
2403
+
2404
+ parents: list[BuildSelectLineage] = [self.build(x) for x in base.selects]
2405
+ base_local = parents[0].local_concepts
2406
+
2407
+ for select in parents[1:]:
2408
+ for k, v in select.local_concepts.items():
2409
+ base_local[k] = v
2410
+
2411
+ # this requires custom handling to avoid circular dependencies
2412
+ final_grain = self.build(base.grain)
2413
+ derived_base = []
2414
+ for k in base.derived_concepts:
2415
+ base_concept = self.environment.concepts[k]
2416
+ x = BuildConcept(
2417
+ name=base_concept.name,
2418
+ canonical_name=base_concept.name,
2419
+ datatype=base_concept.datatype,
2420
+ purpose=base_concept.purpose,
2421
+ build_is_aggregate=False,
2422
+ derivation=Derivation.MULTISELECT,
2423
+ lineage=None,
2424
+ grain=final_grain,
2425
+ namespace=base_concept.namespace,
2426
+ )
2427
+ local_build_cache[k] = x
2428
+ derived_base.append(x)
2429
+ all_input: list[BuildConcept] = []
2430
+ for parent in parents:
2431
+ all_input += parent.output_components
2432
+ all_output: list[BuildConcept] = derived_base + all_input
2433
+ final: list[BuildConcept] = [
2434
+ x for x in all_output if x.address not in base.hidden_components
2435
+ ]
2436
+ factory = Factory(
2437
+ grain=base.grain,
2438
+ environment=self.environment,
2439
+ local_concepts=local_build_cache,
2440
+ pseudonym_map=self.pseudonym_map,
2441
+ )
2442
+ where_factory = Factory(
2443
+ environment=self.environment, pseudonym_map=self.pseudonym_map
2444
+ )
2445
+ lineage = BuildMultiSelectLineage(
2446
+ # we don't build selects here; they'll be built automatically in query discovery
2447
+ selects=base.selects,
2448
+ grain=final_grain,
2449
+ align=factory.build(base.align),
2450
+ derive=factory.build(base.derive) if base.derive else None,
2451
+ # self.align.with_select_context(
2452
+ # local_build_cache, self.grain, environment
2453
+ # ),
2454
+ namespace=base.namespace,
2455
+ hidden_components=base.hidden_components,
2456
+ order_by=factory.build(base.order_by) if base.order_by else None,
2457
+ limit=base.limit,
2458
+ where_clause=(
2459
+ where_factory.build(base.where_clause) if base.where_clause else None
2460
+ ),
2461
+ having_clause=(
2462
+ factory.build(base.having_clause) if base.having_clause else None
2463
+ ),
2464
+ local_concepts=base_local,
2465
+ build_output_components=final,
2466
+ build_concept_arguments=all_input,
2467
+ )
2468
+ for k in base.derived_concepts:
2469
+ local_build_cache[k].lineage = lineage
2470
+ return lineage
2471
+
2472
+ @build.register
2473
+ def _(self, base: Environment):
2474
+ return self._build_environment(base)
2475
+
2476
+ def _build_environment(self, base: Environment):
2477
+ from trilogy.core.models.build_environment import BuildEnvironment
2478
+
2479
+ new = BuildEnvironment(
2480
+ namespace=base.namespace,
2481
+ cte_name_map=base.cte_name_map,
2482
+ )
2483
+
2484
+ for k, v in base.concepts.items():
2485
+ v_build = self._build_concept(v)
2486
+ new.concepts[k] = v_build
2487
+ new.canonical_concepts[v_build.canonical_address] = v_build
2488
+ for (
2489
+ k,
2490
+ d,
2491
+ ) in base.datasources.items():
2492
+ if d.status != DatasourceState.PUBLISHED:
2493
+ continue
2494
+ new.datasources[k] = self._build_datasource(d)
2495
+ for k, a in base.alias_origin_lookup.items():
2496
+ a_build = self._build_concept(a)
2497
+ new.alias_origin_lookup[k] = a_build
2498
+ new.canonical_concepts[a_build.canonical_address] = a_build
2499
+ # add in anything that was built as a side-effect
2500
+ for bk, bv in self.local_concepts.items():
2501
+ if bk not in new.concepts:
2502
+ new.concepts[bk] = bv
2503
+ new.canonical_concepts[bv.canonical_address] = bv
2504
+ new.gen_concept_list_caches()
2505
+ return new
2506
+
2507
+ @build.register
2508
+ def _(self, base: TraitDataType):
2509
+ return self._build_trait_data_type(base)
2510
+
2511
+ def _build_trait_data_type(self, base: TraitDataType):
2512
+ return base
2513
+
2514
+ @build.register
2515
+ def _(self, base: ArrayType):
2516
+ return self._build_array_type(base)
2517
+
2518
+ def _build_array_type(self, base: ArrayType):
2519
+ return base
2520
+
2521
+ @build.register
2522
+ def _(self, base: StructType):
2523
+ return self._build_struct_type(base)
2524
+
2525
+ def _build_struct_type(self, base: StructType):
2526
+ return base
2527
+
2528
+ @build.register
2529
+ def _(self, base: MapType):
2530
+ return self._build_map_type(base)
2531
+
2532
+ def _build_map_type(self, base: MapType):
2533
+ return base
2534
+
2535
+ @build.register
2536
+ def _(self, base: ArgBinding):
2537
+ return self._build_arg_binding(base)
2538
+
2539
+ def _build_arg_binding(self, base: ArgBinding):
2540
+ return base
2541
+
2542
+ @build.register
2543
+ def _(self, base: Ordering):
2544
+ return self._build_ordering(base)
2545
+
2546
+ def _build_ordering(self, base: Ordering):
2547
+ return base
2548
+
2549
+ @build.register
2550
+ def _(self, base: Datasource):
2551
+ return self._build_datasource(base)
2552
+
2553
+ def _build_datasource(self, base: Datasource):
2554
+ local_cache: dict[str, BuildConcept] = {}
2555
+ from trilogy.constants import CONFIG
2556
+
2557
+ factory = Factory(
2558
+ grain=base.grain,
2559
+ environment=self.environment,
2560
+ local_concepts=local_cache,
2561
+ pseudonym_map=self.pseudonym_map,
2562
+ # build_cache = self.build_cache,
2563
+ build_cache=(
2564
+ self.build_cache if CONFIG.generation.datasource_build_cache else None
2565
+ ),
2566
+ )
2567
+ return BuildDatasource(
2568
+ name=base.name,
2569
+ columns=[factory._build_column_assignment(c) for c in base.columns],
2570
+ address=base.address,
2571
+ grain=factory._build_grain(base.grain),
2572
+ namespace=base.namespace,
2573
+ metadata=base.metadata,
2574
+ where=(factory.build(base.where) if base.where else None),
2575
+ non_partial_for=(
2576
+ factory.build(base.non_partial_for) if base.non_partial_for else None
2577
+ ),
2578
+ )
2579
+
2580
+ def handle_constant(self, base):
2581
+ if (
2582
+ isinstance(base, BuildConcept)
2583
+ and isinstance(base.lineage, BuildFunction)
2584
+ and base.lineage.operator == FunctionType.CONSTANT
2585
+ ):
2586
+ return BuildParamaterizedConceptReference(concept=base)
2587
+ elif isinstance(base, ConceptRef):
2588
+ return self.handle_constant(self.build(base))
2589
+ return base
2590
+
2591
+
2592
+ # Initialize the concept name generators after classes are defined
2593
+ _CONCEPT_NAME_GENERATORS.update(
2594
+ {
2595
+ BuildAggregateWrapper: _gen_agg_name,
2596
+ BuildWindowItem: _gen_window_name,
2597
+ BuildFilterItem: _gen_filter_name,
2598
+ BuildFunction: _gen_function_name,
2599
+ BuildParenthetical: _gen_paren_name,
2600
+ BuildComparison: _gen_comp_name,
2601
+ BuildMultiSelectLineage: _gen_msl_name,
2602
+ }
2603
+ )