pytrilogy 0.0.2.57__py3-none-any.whl → 0.0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.0.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1845 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +696 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +17 -22
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +181 -146
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +51 -45
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +13 -10
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +59 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +19 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +92 -77
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +55 -40
  45. trilogy/core/processing/nodes/merge_node.py +47 -38
  46. trilogy/core/processing/nodes/select_node_v2.py +54 -40
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +108 -80
  51. trilogy/core/query_processor.py +67 -49
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +152 -111
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/base_hook.py +6 -4
  65. trilogy/hooks/query_debugger.py +110 -93
  66. trilogy/parser.py +1 -1
  67. trilogy/parsing/common.py +303 -64
  68. trilogy/parsing/parse_engine.py +263 -617
  69. trilogy/parsing/render.py +50 -26
  70. trilogy/scripts/trilogy.py +2 -1
  71. pytrilogy-0.0.2.57.dist-info/RECORD +0 -87
  72. trilogy/core/models.py +0 -4960
  73. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/LICENSE.md +0 -0
  74. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/entry_points.txt +0 -0
  75. {pytrilogy-0.0.2.57.dist-info → pytrilogy-0.0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1845 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC
4
+ from datetime import date, datetime
5
+ from functools import cached_property, singledispatchmethod
6
+ from typing import (
7
+ TYPE_CHECKING,
8
+ Any,
9
+ Iterable,
10
+ List,
11
+ Optional,
12
+ Self,
13
+ Sequence,
14
+ Set,
15
+ Tuple,
16
+ Union,
17
+ )
18
+
19
+ from pydantic import (
20
+ BaseModel,
21
+ ConfigDict,
22
+ Field,
23
+ ValidationInfo,
24
+ computed_field,
25
+ field_validator,
26
+ )
27
+
28
+ from trilogy.constants import DEFAULT_NAMESPACE, MagicConstants
29
+ from trilogy.core.constants import ALL_ROWS_CONCEPT
30
+ from trilogy.core.enums import (
31
+ BooleanOperator,
32
+ ComparisonOperator,
33
+ DatePart,
34
+ Derivation,
35
+ FunctionClass,
36
+ FunctionType,
37
+ Granularity,
38
+ Modifier,
39
+ Ordering,
40
+ Purpose,
41
+ WindowType,
42
+ )
43
+ from trilogy.core.models.author import (
44
+ AggregateWrapper,
45
+ AlignClause,
46
+ AlignItem,
47
+ CaseElse,
48
+ CaseWhen,
49
+ Comparison,
50
+ Concept,
51
+ ConceptRef,
52
+ Conditional,
53
+ FilterItem,
54
+ Function,
55
+ Grain,
56
+ HavingClause,
57
+ Metadata,
58
+ MultiSelectLineage,
59
+ OrderBy,
60
+ OrderItem,
61
+ Parenthetical,
62
+ RowsetItem,
63
+ RowsetLineage,
64
+ SelectLineage,
65
+ SubselectComparison,
66
+ WhereClause,
67
+ WindowItem,
68
+ )
69
+ from trilogy.core.models.core import (
70
+ Addressable,
71
+ DataType,
72
+ DataTyped,
73
+ ListType,
74
+ ListWrapper,
75
+ MapType,
76
+ MapWrapper,
77
+ NumericType,
78
+ StructType,
79
+ TupleWrapper,
80
+ arg_to_datatype,
81
+ )
82
+ from trilogy.core.models.datasource import (
83
+ Address,
84
+ ColumnAssignment,
85
+ Datasource,
86
+ DatasourceMetadata,
87
+ RawColumnExpr,
88
+ )
89
+ from trilogy.core.models.environment import Environment
90
+ from trilogy.utility import unique
91
+
92
+ # TODO: refactor to avoid these
93
+ if TYPE_CHECKING:
94
+ from trilogy.core.models.build_environment import BuildEnvironment
95
+ from trilogy.core.models.execute import CTE, UnionCTE
96
+
97
+ LOGGER_PREFIX = "[MODELS_BUILD]"
98
+
99
+
100
+ class BuildConceptArgs(ABC):
101
+ @property
102
+ def concept_arguments(self) -> Sequence["BuildConcept"]:
103
+ raise NotImplementedError
104
+
105
+ @property
106
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
107
+ return []
108
+
109
+ @property
110
+ def row_arguments(self) -> Sequence["BuildConcept"]:
111
+ return self.concept_arguments
112
+
113
+
114
+ def concept_is_relevant(
115
+ concept: BuildConcept,
116
+ others: list[BuildConcept],
117
+ ) -> bool:
118
+
119
+ if concept.is_aggregate and not (
120
+ isinstance(concept.lineage, BuildAggregateWrapper) and concept.lineage.by
121
+ ):
122
+
123
+ return False
124
+ if concept.purpose in (Purpose.PROPERTY, Purpose.METRIC) and concept.keys:
125
+ if any([c in others for c in concept.keys]):
126
+
127
+ return False
128
+ if concept.purpose in (Purpose.METRIC,):
129
+ if all([c in others for c in concept.grain.components]):
130
+ return False
131
+ if concept.derivation in (Derivation.BASIC,):
132
+
133
+ return any(concept_is_relevant(c, others) for c in concept.concept_arguments)
134
+ if concept.granularity == Granularity.SINGLE_ROW:
135
+ return False
136
+ return True
137
+
138
+
139
+ def concepts_to_build_grain_concepts(
140
+ concepts: Iterable[BuildConcept | str], environment: "BuildEnvironment" | None
141
+ ) -> list[BuildConcept]:
142
+ pconcepts = []
143
+ for c in concepts:
144
+ if isinstance(c, BuildConcept):
145
+ pconcepts.append(c)
146
+ elif environment:
147
+ pconcepts.append(environment.concepts[c])
148
+ else:
149
+ raise ValueError(
150
+ f"Unable to resolve input {c} without environment provided to concepts_to_grain call"
151
+ )
152
+
153
+ final: List[BuildConcept] = []
154
+ for sub in pconcepts:
155
+ if not concept_is_relevant(sub, pconcepts):
156
+ continue
157
+ final.append(sub)
158
+ final = unique(final, "address")
159
+ v2 = sorted(final, key=lambda x: x.name)
160
+ return v2
161
+
162
+
163
+ class LooseBuildConceptList(BaseModel):
164
+ concepts: Sequence[BuildConcept]
165
+
166
+ @cached_property
167
+ def addresses(self) -> set[str]:
168
+ return {s.address for s in self.concepts}
169
+
170
+ @classmethod
171
+ def validate(cls, v):
172
+ return cls(v)
173
+
174
+ @cached_property
175
+ def sorted_addresses(self) -> List[str]:
176
+ return sorted(list(self.addresses))
177
+
178
+ def __str__(self) -> str:
179
+ return f"lcl{str(self.sorted_addresses)}"
180
+
181
+ def __iter__(self):
182
+ return iter(self.concepts)
183
+
184
+ def __eq__(self, other):
185
+ if not isinstance(other, LooseBuildConceptList):
186
+ return False
187
+ return self.addresses == other.addresses
188
+
189
+ def issubset(self, other):
190
+ if not isinstance(other, LooseBuildConceptList):
191
+ return False
192
+ return self.addresses.issubset(other.addresses)
193
+
194
+ def __contains__(self, other):
195
+ if isinstance(other, str):
196
+ return other in self.addresses
197
+ if not isinstance(other, BuildConcept):
198
+ return False
199
+ return other.address in self.addresses
200
+
201
+ def difference(self, other):
202
+ if not isinstance(other, LooseBuildConceptList):
203
+ return False
204
+ return self.addresses.difference(other.addresses)
205
+
206
+ def isdisjoint(self, other):
207
+ if not isinstance(other, LooseBuildConceptList):
208
+ return False
209
+ return self.addresses.isdisjoint(other.addresses)
210
+
211
+
212
+ class ConstantInlineable(ABC):
213
+
214
+ def inline_constant(self, concept: BuildConcept):
215
+ raise NotImplementedError
216
+
217
+
218
+ def get_concept_row_arguments(expr) -> List["BuildConcept"]:
219
+ output = []
220
+ if isinstance(expr, BuildConcept):
221
+ output += [expr]
222
+
223
+ elif isinstance(expr, BuildConceptArgs):
224
+ output += expr.row_arguments
225
+ return output
226
+
227
+
228
+ def get_concept_arguments(expr) -> List["BuildConcept"]:
229
+ output = []
230
+ if isinstance(expr, BuildConcept):
231
+ output += [expr]
232
+
233
+ elif isinstance(
234
+ expr,
235
+ BuildConceptArgs,
236
+ ):
237
+ output += expr.concept_arguments
238
+ return output
239
+
240
+
241
+ class BuildGrain(BaseModel):
242
+ components: set[str] = Field(default_factory=set)
243
+ where_clause: Optional[BuildWhereClause] = None
244
+
245
+ def without_condition(self):
246
+ return BuildGrain(components=self.components)
247
+
248
+ @classmethod
249
+ def from_concepts(
250
+ cls,
251
+ concepts: Iterable[BuildConcept | str],
252
+ environment: BuildEnvironment | None = None,
253
+ where_clause: BuildWhereClause | None = None,
254
+ ) -> "BuildGrain":
255
+
256
+ return BuildGrain(
257
+ components={
258
+ c.address
259
+ for c in concepts_to_build_grain_concepts(
260
+ concepts, environment=environment
261
+ )
262
+ },
263
+ where_clause=where_clause,
264
+ )
265
+
266
+ @field_validator("components", mode="before")
267
+ def component_validator(cls, v, info: ValidationInfo):
268
+ output = set()
269
+ if isinstance(v, list):
270
+ for vc in v:
271
+ if isinstance(vc, BuildConcept):
272
+ output.add(vc.address)
273
+ else:
274
+ output.add(vc)
275
+ else:
276
+ output = v
277
+ if not isinstance(output, set):
278
+ raise ValueError(f"Invalid grain component {output}, is not set")
279
+ if not all(isinstance(x, str) for x in output):
280
+ raise ValueError(f"Invalid component {output}")
281
+ return output
282
+
283
+ def __add__(self, other: "BuildGrain") -> "BuildGrain":
284
+ if not other:
285
+ return self
286
+ where = self.where_clause
287
+ if other.where_clause:
288
+ if not self.where_clause:
289
+ where = other.where_clause
290
+ elif not other.where_clause == self.where_clause:
291
+ where = BuildWhereClause(
292
+ conditional=BuildConditional(
293
+ left=self.where_clause.conditional,
294
+ right=other.where_clause.conditional,
295
+ operator=BooleanOperator.AND,
296
+ )
297
+ )
298
+ # raise NotImplementedError(
299
+ # f"Cannot merge grains with where clauses, self {self.where_clause} other {other.where_clause}"
300
+ # )
301
+ return BuildGrain(
302
+ components=self.components.union(other.components), where_clause=where
303
+ )
304
+
305
+ def __sub__(self, other: "BuildGrain") -> "BuildGrain":
306
+ return BuildGrain(
307
+ components=self.components.difference(other.components),
308
+ where_clause=self.where_clause,
309
+ )
310
+
311
+ @property
312
+ def abstract(self):
313
+ return not self.components or all(
314
+ [c.endswith(ALL_ROWS_CONCEPT) for c in self.components]
315
+ )
316
+
317
+ def __eq__(self, other: object):
318
+ if isinstance(other, list):
319
+ if all([isinstance(c, BuildConcept) for c in other]):
320
+ return self.components == set([c.address for c in other])
321
+ return False
322
+ if not isinstance(other, BuildGrain):
323
+ return False
324
+ if self.components == other.components:
325
+ return True
326
+ return False
327
+
328
+ def issubset(self, other: "BuildGrain"):
329
+ return self.components.issubset(other.components)
330
+
331
+ def union(self, other: "BuildGrain"):
332
+ addresses = self.components.union(other.components)
333
+ return BuildGrain(components=addresses, where_clause=self.where_clause)
334
+
335
+ def isdisjoint(self, other: "BuildGrain"):
336
+ return self.components.isdisjoint(other.components)
337
+
338
+ def intersection(self, other: "BuildGrain") -> "BuildGrain":
339
+ intersection = self.components.intersection(other.components)
340
+ return BuildGrain(components=intersection)
341
+
342
+ def __str__(self):
343
+ if self.abstract:
344
+ base = "Grain<Abstract>"
345
+ else:
346
+ base = "Grain<" + ",".join([c for c in sorted(list(self.components))]) + ">"
347
+ if self.where_clause:
348
+ base += f"|{str(self.where_clause)}"
349
+ return base
350
+
351
+ def __radd__(self, other) -> "BuildGrain":
352
+ if other == 0:
353
+ return self
354
+ else:
355
+ return self.__add__(other)
356
+
357
+
358
+ class BuildParenthetical(DataTyped, ConstantInlineable, BuildConceptArgs, BaseModel):
359
+ content: "BuildExpr"
360
+
361
+ def __add__(self, other) -> Union["BuildParenthetical", "BuildConditional"]:
362
+ if other is None:
363
+ return self
364
+ elif isinstance(other, (BuildComparison, BuildConditional, BuildParenthetical)):
365
+ return BuildConditional(
366
+ left=self, right=other, operator=BooleanOperator.AND
367
+ )
368
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
369
+
370
+ def __str__(self):
371
+ return self.__repr__()
372
+
373
+ def __repr__(self):
374
+ return f"({str(self.content)})"
375
+
376
+ def inline_constant(self, BuildConcept: BuildConcept):
377
+ return BuildParenthetical(
378
+ content=(
379
+ self.content.inline_constant(BuildConcept)
380
+ if isinstance(self.content, ConstantInlineable)
381
+ else self.content
382
+ )
383
+ )
384
+
385
+ @property
386
+ def concept_arguments(self) -> List[BuildConcept]:
387
+ base: List[BuildConcept] = []
388
+ x = self.content
389
+ if isinstance(x, BuildConcept):
390
+ base += [x]
391
+ elif isinstance(x, BuildConceptArgs):
392
+ base += x.concept_arguments
393
+ return base
394
+
395
+ @property
396
+ def row_arguments(self) -> Sequence[BuildConcept]:
397
+ if isinstance(self.content, BuildConceptArgs):
398
+ return self.content.row_arguments
399
+ return self.concept_arguments
400
+
401
+ @property
402
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
403
+ if isinstance(self.content, BuildConceptArgs):
404
+ return self.content.existence_arguments
405
+ return []
406
+
407
+ @property
408
+ def output_datatype(self):
409
+ return arg_to_datatype(self.content)
410
+
411
+
412
+ class BuildConditional(BuildConceptArgs, ConstantInlineable, BaseModel):
413
+ left: Union[
414
+ int,
415
+ str,
416
+ float,
417
+ list,
418
+ bool,
419
+ MagicConstants,
420
+ BuildConcept,
421
+ BuildComparison,
422
+ BuildConditional,
423
+ BuildParenthetical,
424
+ BuildSubselectComparison,
425
+ BuildFunction,
426
+ BuildFilterItem,
427
+ ]
428
+ right: Union[
429
+ int,
430
+ str,
431
+ float,
432
+ list,
433
+ bool,
434
+ MagicConstants,
435
+ BuildConcept,
436
+ BuildComparison,
437
+ BuildConditional,
438
+ BuildParenthetical,
439
+ BuildSubselectComparison,
440
+ BuildFunction,
441
+ BuildFilterItem,
442
+ ]
443
+ operator: BooleanOperator
444
+
445
+ def __add__(self, other) -> "BuildConditional":
446
+ if other is None:
447
+ return self
448
+ elif str(other) == str(self):
449
+ return self
450
+ elif isinstance(other, (BuildComparison, BuildConditional, BuildParenthetical)):
451
+ return BuildConditional(
452
+ left=self, right=other, operator=BooleanOperator.AND
453
+ )
454
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
455
+
456
+ def __str__(self):
457
+ return self.__repr__()
458
+
459
+ def __repr__(self):
460
+ return f"{str(self.left)} {self.operator.value} {str(self.right)}"
461
+
462
+ def __eq__(self, other):
463
+ if not isinstance(other, BuildConditional):
464
+ return False
465
+ return (
466
+ self.left == other.left
467
+ and self.right == other.right
468
+ and self.operator == other.operator
469
+ )
470
+
471
+ def inline_constant(self, constant: BuildConcept) -> "BuildConditional":
472
+ assert isinstance(constant.lineage, BuildFunction)
473
+ new_val = constant.lineage.arguments[0]
474
+ if isinstance(self.left, ConstantInlineable):
475
+ new_left = self.left.inline_constant(constant)
476
+ elif (
477
+ isinstance(self.left, BuildConcept)
478
+ and self.left.address == constant.address
479
+ ):
480
+ new_left = new_val
481
+ else:
482
+ new_left = self.left
483
+
484
+ if isinstance(self.right, ConstantInlineable):
485
+ new_right = self.right.inline_constant(constant)
486
+ elif (
487
+ isinstance(self.right, BuildConcept)
488
+ and self.right.address == constant.address
489
+ ):
490
+ new_right = new_val
491
+ else:
492
+ new_right = self.right
493
+
494
+ if self.right == constant:
495
+ new_right = new_val
496
+
497
+ return BuildConditional(
498
+ left=new_left,
499
+ right=new_right,
500
+ operator=self.operator,
501
+ )
502
+
503
+ @property
504
+ def concept_arguments(self) -> List[BuildConcept]:
505
+ """Return BuildConcepts directly referenced in where clause"""
506
+ output = []
507
+ output += get_concept_arguments(self.left)
508
+ output += get_concept_arguments(self.right)
509
+ return output
510
+
511
+ @property
512
+ def row_arguments(self) -> List[BuildConcept]:
513
+ output = []
514
+ output += get_concept_row_arguments(self.left)
515
+ output += get_concept_row_arguments(self.right)
516
+ return output
517
+
518
+ @property
519
+ def existence_arguments(self) -> list[tuple[BuildConcept, ...]]:
520
+ output: list[tuple[BuildConcept, ...]] = []
521
+ if isinstance(self.left, BuildConceptArgs):
522
+ output += self.left.existence_arguments
523
+ if isinstance(self.right, BuildConceptArgs):
524
+ output += self.right.existence_arguments
525
+ return output
526
+
527
+ def decompose(self):
528
+ chunks = []
529
+ if self.operator == BooleanOperator.AND:
530
+ for val in [self.left, self.right]:
531
+ if isinstance(val, BuildConditional):
532
+ chunks.extend(val.decompose())
533
+ else:
534
+ chunks.append(val)
535
+ else:
536
+ chunks.append(self)
537
+ return chunks
538
+
539
+
540
+ class BuildWhereClause(BuildConceptArgs, BaseModel):
541
+ conditional: Union[
542
+ BuildSubselectComparison,
543
+ BuildComparison,
544
+ BuildConditional,
545
+ BuildParenthetical,
546
+ ]
547
+
548
+ def __eq__(self, other):
549
+ if not isinstance(other, (BuildWhereClause, WhereClause)):
550
+ return False
551
+ return self.conditional == other.conditional
552
+
553
+ def __repr__(self):
554
+ return str(self.conditional)
555
+
556
+ def __str__(self):
557
+ return self.__repr__()
558
+
559
+ @property
560
+ def concept_arguments(self) -> List[BuildConcept]:
561
+ return self.conditional.concept_arguments
562
+
563
+ @property
564
+ def row_arguments(self) -> Sequence[BuildConcept]:
565
+ return self.conditional.row_arguments
566
+
567
+ @property
568
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
569
+ return self.conditional.existence_arguments
570
+
571
+
572
+ class BuildHavingClause(BuildWhereClause):
573
+ pass
574
+
575
+
576
+ class BuildComparison(BuildConceptArgs, ConstantInlineable, BaseModel):
577
+
578
+ left: Union[
579
+ int,
580
+ str,
581
+ float,
582
+ list,
583
+ bool,
584
+ datetime,
585
+ date,
586
+ BuildFunction,
587
+ BuildConcept,
588
+ BuildConditional,
589
+ DataType,
590
+ BuildComparison,
591
+ BuildParenthetical,
592
+ MagicConstants,
593
+ BuildWindowItem,
594
+ BuildAggregateWrapper,
595
+ ]
596
+ right: Union[
597
+ int,
598
+ str,
599
+ float,
600
+ list,
601
+ bool,
602
+ date,
603
+ datetime,
604
+ BuildConcept,
605
+ BuildFunction,
606
+ BuildConditional,
607
+ DataType,
608
+ BuildComparison,
609
+ BuildParenthetical,
610
+ MagicConstants,
611
+ BuildWindowItem,
612
+ BuildAggregateWrapper,
613
+ TupleWrapper,
614
+ ]
615
+ operator: ComparisonOperator
616
+
617
+ def __init__(self, *args, **kwargs) -> None:
618
+ super().__init__(*args, **kwargs)
619
+
620
+ def __add__(self, other):
621
+ if other is None:
622
+ return self
623
+ if not isinstance(
624
+ other, (BuildComparison, BuildConditional, BuildParenthetical)
625
+ ):
626
+ raise ValueError(f"Cannot add {type(other)} to {__class__}")
627
+ if other == self:
628
+ return self
629
+ return BuildConditional(left=self, right=other, operator=BooleanOperator.AND)
630
+
631
+ def __repr__(self):
632
+ if isinstance(self.left, BuildConcept):
633
+ left = self.left.address
634
+ else:
635
+ left = str(self.left)
636
+ if isinstance(self.right, BuildConcept):
637
+ right = self.right.address
638
+ else:
639
+ right = str(self.right)
640
+ return f"{left} {self.operator.value} {right}"
641
+
642
+ def __str__(self):
643
+ return self.__repr__()
644
+
645
+ def __eq__(self, other):
646
+ if not isinstance(other, BuildComparison):
647
+ return False
648
+ return (
649
+ self.left == other.left
650
+ and self.right == other.right
651
+ and self.operator == other.operator
652
+ )
653
+
654
+ def inline_constant(self, constant: BuildConcept):
655
+ assert isinstance(constant.lineage, BuildFunction)
656
+ new_val = constant.lineage.arguments[0]
657
+ if isinstance(self.left, ConstantInlineable):
658
+ new_left = self.left.inline_constant(constant)
659
+ elif (
660
+ isinstance(self.left, BuildConcept)
661
+ and self.left.address == constant.address
662
+ ):
663
+ new_left = new_val
664
+ else:
665
+ new_left = self.left
666
+ if isinstance(self.right, ConstantInlineable):
667
+ new_right = self.right.inline_constant(constant)
668
+ elif (
669
+ isinstance(self.right, BuildConcept)
670
+ and self.right.address == constant.address
671
+ ):
672
+ new_right = new_val
673
+ else:
674
+ new_right = self.right
675
+
676
+ return self.__class__(
677
+ left=new_left,
678
+ right=new_right,
679
+ operator=self.operator,
680
+ )
681
+
682
+ @property
683
+ def concept_arguments(self) -> List[BuildConcept]:
684
+ """Return BuildConcepts directly referenced in where clause"""
685
+ output = []
686
+ output += get_concept_arguments(self.left)
687
+ output += get_concept_arguments(self.right)
688
+ return output
689
+
690
+ @property
691
+ def row_arguments(self) -> List[BuildConcept]:
692
+ output = []
693
+ output += get_concept_row_arguments(self.left)
694
+ output += get_concept_row_arguments(self.right)
695
+ return output
696
+
697
+ @property
698
+ def existence_arguments(self) -> List[Tuple[BuildConcept, ...]]:
699
+ """Return BuildConcepts directly referenced in where clause"""
700
+ output: List[Tuple[BuildConcept, ...]] = []
701
+ if isinstance(self.left, BuildConceptArgs):
702
+ output += self.left.existence_arguments
703
+ if isinstance(self.right, BuildConceptArgs):
704
+ output += self.right.existence_arguments
705
+ return output
706
+
707
+
708
+ class BuildSubselectComparison(BuildComparison):
709
+ left: Union[
710
+ int,
711
+ str,
712
+ float,
713
+ list,
714
+ bool,
715
+ datetime,
716
+ date,
717
+ BuildFunction,
718
+ BuildConcept,
719
+ "BuildConditional",
720
+ DataType,
721
+ "BuildComparison",
722
+ "BuildParenthetical",
723
+ MagicConstants,
724
+ BuildWindowItem,
725
+ BuildAggregateWrapper,
726
+ ]
727
+ right: Union[
728
+ int,
729
+ str,
730
+ float,
731
+ list,
732
+ bool,
733
+ date,
734
+ datetime,
735
+ BuildConcept,
736
+ BuildFunction,
737
+ "BuildConditional",
738
+ DataType,
739
+ "BuildComparison",
740
+ "BuildParenthetical",
741
+ MagicConstants,
742
+ BuildWindowItem,
743
+ BuildAggregateWrapper,
744
+ TupleWrapper,
745
+ ]
746
+ operator: ComparisonOperator
747
+
748
+ def __eq__(self, other):
749
+ if not isinstance(other, BuildSubselectComparison):
750
+ return False
751
+
752
+ comp = (
753
+ self.left == other.left
754
+ and self.right == other.right
755
+ and self.operator == other.operator
756
+ )
757
+ return comp
758
+
759
+ @property
760
+ def row_arguments(self) -> List[BuildConcept]:
761
+ return get_concept_row_arguments(self.left)
762
+
763
+ @property
764
+ def existence_arguments(self) -> list[tuple["BuildConcept", ...]]:
765
+ return [tuple(get_concept_arguments(self.right))]
766
+
767
+
768
+ class BuildConcept(Addressable, BuildConceptArgs, DataTyped, BaseModel):
769
+ model_config = ConfigDict(extra="forbid")
770
+ name: str
771
+ datatype: DataType | ListType | StructType | MapType | NumericType
772
+ purpose: Purpose
773
+ build_is_aggregate: bool
774
+ derivation: Derivation = Derivation.ROOT
775
+ granularity: Granularity = Granularity.MULTI_ROW
776
+ namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
777
+ metadata: Metadata = Field(
778
+ default_factory=lambda: Metadata(description=None, line_number=None),
779
+ validate_default=True,
780
+ )
781
+ lineage: Optional[
782
+ Union[
783
+ BuildFunction,
784
+ BuildWindowItem,
785
+ BuildFilterItem,
786
+ BuildAggregateWrapper,
787
+ BuildRowsetItem,
788
+ BuildMultiSelectLineage,
789
+ ]
790
+ ] = None
791
+
792
+ keys: Optional[set[str]] = None
793
+ grain: BuildGrain = Field(default=None, validate_default=True) # type: ignore
794
+ modifiers: List[Modifier] = Field(default_factory=list) # type: ignore
795
+ pseudonyms: set[str] = Field(default_factory=set)
796
+
797
+ def with_select_context(self, *args, **kwargs):
798
+ return self
799
+
800
+ @property
801
+ def is_aggregate(self) -> bool:
802
+ return self.build_is_aggregate
803
+
804
+ def duplicate(self) -> BuildConcept:
805
+ return self.model_copy(deep=True)
806
+
807
+ def __hash__(self):
808
+ return hash(
809
+ f"{self.name}+{self.datatype}+ {self.purpose} + {str(self.lineage)} + {self.namespace} + {str(self.grain)} + {str(self.keys)}"
810
+ )
811
+
812
+ def __repr__(self):
813
+ base = f"{self.address}@{self.grain}"
814
+ return base
815
+
816
+ @property
817
+ def output_datatype(self):
818
+ return self.datatype
819
+
820
+ def __eq__(self, other: object):
821
+ if isinstance(other, str):
822
+ if self.address == other:
823
+ return True
824
+ if not isinstance(other, (BuildConcept, Concept)):
825
+ return False
826
+ return (
827
+ self.name == other.name
828
+ and self.datatype == other.datatype
829
+ and self.purpose == other.purpose
830
+ and self.namespace == other.namespace
831
+ and self.grain == other.grain
832
+ # and self.keys == other.keys
833
+ )
834
+
835
+ def __str__(self):
836
+ grain = str(self.grain) if self.grain else "Grain<>"
837
+ return f"{self.namespace}.{self.name}@{grain}"
838
+
839
+ @cached_property
840
+ def address(self) -> str:
841
+ return f"{self.namespace}.{self.name}"
842
+
843
+ @property
844
+ def output(self) -> "BuildConcept":
845
+ return self
846
+
847
+ @property
848
+ def safe_address(self) -> str:
849
+ if self.namespace == DEFAULT_NAMESPACE:
850
+ return self.name.replace(".", "_")
851
+ elif self.namespace:
852
+ return f"{self.namespace.replace('.','_')}_{self.name.replace('.','_')}"
853
+ return self.name.replace(".", "_")
854
+
855
+ def with_grain(self, grain: Optional["BuildGrain" | BuildConcept] = None) -> Self:
856
+ if isinstance(grain, BuildConcept):
857
+ grain = BuildGrain(
858
+ components=set(
859
+ [
860
+ grain.address,
861
+ ]
862
+ )
863
+ )
864
+ return self.__class__.model_construct(
865
+ name=self.name,
866
+ datatype=self.datatype,
867
+ purpose=self.purpose,
868
+ metadata=self.metadata,
869
+ lineage=self.lineage,
870
+ grain=grain if grain else BuildGrain(components=set()),
871
+ namespace=self.namespace,
872
+ keys=self.keys,
873
+ modifiers=self.modifiers,
874
+ pseudonyms=self.pseudonyms,
875
+ ## bound
876
+ derivation=self.derivation,
877
+ granularity=self.granularity,
878
+ build_is_aggregate=self.build_is_aggregate,
879
+ )
880
+
881
+ @property
882
+ def _with_default_grain(self) -> Self:
883
+ if self.purpose == Purpose.KEY:
884
+ # we need to make this abstract
885
+ grain = BuildGrain(components={self.address})
886
+ elif self.purpose == Purpose.PROPERTY:
887
+ components = []
888
+ if self.keys:
889
+ components = [*self.keys]
890
+ if self.lineage:
891
+ for item in self.lineage.concept_arguments:
892
+ components += [x.address for x in item.sources]
893
+ # TODO: set synonyms
894
+ grain = BuildGrain(
895
+ components=set([x for x in components]),
896
+ ) # synonym_set=generate_BuildConcept_synonyms(components))
897
+ elif self.purpose == Purpose.METRIC:
898
+ grain = BuildGrain()
899
+ elif self.purpose == Purpose.CONSTANT:
900
+ if self.derivation != Derivation.CONSTANT:
901
+ grain = BuildGrain(components={self.address})
902
+ else:
903
+ grain = self.grain
904
+ else:
905
+ grain = self.grain # type: ignore
906
+ return self.__class__.model_construct(
907
+ name=self.name,
908
+ datatype=self.datatype,
909
+ purpose=self.purpose,
910
+ metadata=self.metadata,
911
+ lineage=self.lineage,
912
+ grain=grain,
913
+ keys=self.keys,
914
+ namespace=self.namespace,
915
+ modifiers=self.modifiers,
916
+ pseudonyms=self.pseudonyms,
917
+ ## bound
918
+ derivation=self.derivation,
919
+ granularity=self.granularity,
920
+ build_is_aggregate=self.build_is_aggregate,
921
+ )
922
+
923
+ def with_default_grain(self) -> "BuildConcept":
924
+ return self._with_default_grain
925
+
926
+ @property
927
+ def sources(self) -> List["BuildConcept"]:
928
+ if self.lineage:
929
+ output: List[BuildConcept] = []
930
+
931
+ def get_sources(
932
+ expr: Union[
933
+ BuildFunction,
934
+ BuildWindowItem,
935
+ BuildFilterItem,
936
+ BuildAggregateWrapper,
937
+ BuildRowsetItem,
938
+ BuildMultiSelectLineage,
939
+ ],
940
+ output: List[BuildConcept],
941
+ ):
942
+ if isinstance(expr, BuildMultiSelectLineage):
943
+ return
944
+ for item in expr.concept_arguments:
945
+ if isinstance(item, BuildConcept):
946
+ if item.address == self.address:
947
+ raise SyntaxError(
948
+ f"BuildConcept {self.address} references itself"
949
+ )
950
+ output.append(item)
951
+ output += item.sources
952
+
953
+ get_sources(self.lineage, output)
954
+ return output
955
+ return []
956
+
957
+ @property
958
+ def concept_arguments(self) -> List[BuildConcept]:
959
+ return self.lineage.concept_arguments if self.lineage else []
960
+
961
+
962
+ class BuildOrderItem(DataTyped, BuildConceptArgs, BaseModel):
963
+ expr: BuildExpr
964
+ order: Ordering
965
+
966
+ @property
967
+ def concept_arguments(self) -> List[BuildConcept]:
968
+ base: List[BuildConcept] = []
969
+ x = self.expr
970
+ if isinstance(x, BuildConcept):
971
+ base += [x]
972
+ elif isinstance(x, BuildConceptArgs):
973
+ base += x.concept_arguments
974
+ return base
975
+
976
+ @property
977
+ def row_arguments(self) -> Sequence[BuildConcept]:
978
+ if isinstance(self.expr, BuildConceptArgs):
979
+ return self.expr.row_arguments
980
+ return self.concept_arguments
981
+
982
+ @property
983
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
984
+ if isinstance(self.expr, BuildConceptArgs):
985
+ return self.expr.existence_arguments
986
+ return []
987
+
988
+ @property
989
+ def output_datatype(self):
990
+ return arg_to_datatype(self.expr)
991
+
992
+
993
+ class BuildWindowItem(DataTyped, BuildConceptArgs, BaseModel):
994
+ type: WindowType
995
+ content: BuildConcept
996
+ order_by: List[BuildOrderItem]
997
+ over: List["BuildConcept"] = Field(default_factory=list)
998
+ index: Optional[int] = None
999
+
1000
+ def __repr__(self) -> str:
1001
+ return f"{self.type}({self.content} {self.index}, {self.over}, {self.order_by})"
1002
+
1003
+ def __str__(self):
1004
+ return self.__repr__()
1005
+
1006
+ @property
1007
+ def concept_arguments(self) -> List[BuildConcept]:
1008
+ output = [self.content]
1009
+ for order in self.order_by:
1010
+ output += order.concept_arguments
1011
+ for item in self.over:
1012
+ output += [item]
1013
+ return output
1014
+
1015
+ @property
1016
+ def output(self) -> BuildConcept:
1017
+ return self.content
1018
+
1019
+ @property
1020
+ def output_datatype(self):
1021
+ if self.type in (WindowType.RANK, WindowType.ROW_NUMBER):
1022
+ return DataType.INTEGER
1023
+ return self.content.output_datatype
1024
+
1025
+ @property
1026
+ def output_purpose(self):
1027
+ return Purpose.PROPERTY
1028
+
1029
+
1030
+ class BuildCaseWhen(BuildConceptArgs, BaseModel):
1031
+ comparison: BuildConditional | BuildSubselectComparison | BuildComparison
1032
+ expr: "BuildExpr"
1033
+
1034
+ def __str__(self):
1035
+ return self.__repr__()
1036
+
1037
+ def __repr__(self):
1038
+ return f"WHEN {str(self.comparison)} THEN {str(self.expr)}"
1039
+
1040
+ @property
1041
+ def concept_arguments(self):
1042
+ return get_concept_arguments(self.comparison) + get_concept_arguments(self.expr)
1043
+
1044
+ @property
1045
+ def concept_row_arguments(self):
1046
+ return get_concept_row_arguments(self.comparison) + get_concept_row_arguments(
1047
+ self.expr
1048
+ )
1049
+
1050
+
1051
+ class BuildCaseElse(BuildConceptArgs, BaseModel):
1052
+ expr: "BuildExpr"
1053
+ # this ensures that it's easily differentiable from CaseWhen
1054
+ discriminant: ComparisonOperator = ComparisonOperator.ELSE
1055
+
1056
+ @property
1057
+ def concept_arguments(self):
1058
+ return get_concept_arguments(self.expr)
1059
+
1060
+
1061
+ class BuildFunction(DataTyped, BuildConceptArgs, BaseModel):
1062
+ # class BuildFunction(Function):
1063
+ operator: FunctionType
1064
+ arg_count: int = Field(default=1)
1065
+ output_datatype: DataType | ListType | StructType | MapType | NumericType
1066
+ output_purpose: Purpose
1067
+ valid_inputs: Optional[
1068
+ Union[
1069
+ Set[DataType],
1070
+ List[Set[DataType]],
1071
+ ]
1072
+ ] = None
1073
+ arguments: Sequence[
1074
+ Union[
1075
+ int,
1076
+ float,
1077
+ str,
1078
+ date,
1079
+ datetime,
1080
+ MapWrapper[Any, Any],
1081
+ DataType,
1082
+ ListType,
1083
+ MapType,
1084
+ NumericType,
1085
+ DatePart,
1086
+ BuildConcept,
1087
+ BuildAggregateWrapper,
1088
+ BuildFunction,
1089
+ BuildWindowItem,
1090
+ BuildParenthetical,
1091
+ BuildCaseWhen,
1092
+ BuildCaseElse,
1093
+ list,
1094
+ ListWrapper[Any],
1095
+ ]
1096
+ ]
1097
+
1098
+ def __repr__(self):
1099
+ return f'{self.operator.value}({",".join([str(a) for a in self.arguments])})'
1100
+
1101
+ def __str__(self):
1102
+ return self.__repr__()
1103
+
1104
+ @property
1105
+ def datatype(self):
1106
+ return self.output_datatype
1107
+
1108
+ @property
1109
+ def concept_arguments(self) -> List[BuildConcept]:
1110
+ base = []
1111
+ for arg in self.arguments:
1112
+ base += get_concept_arguments(arg)
1113
+ return base
1114
+
1115
+ @property
1116
+ def output_grain(self):
1117
+ # aggregates have an abstract grain
1118
+ base_grain = BuildGrain(components=[])
1119
+ if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
1120
+ return base_grain
1121
+ # scalars have implicit grain of all arguments
1122
+ for input in self.concept_arguments:
1123
+ base_grain += input.grain
1124
+ return base_grain
1125
+
1126
+
1127
+ class BuildAggregateWrapper(BuildConceptArgs, DataTyped, BaseModel):
1128
+ function: BuildFunction
1129
+ by: List[BuildConcept] = Field(default_factory=list)
1130
+
1131
+ def __str__(self):
1132
+ grain_str = [str(c) for c in self.by] if self.by else "abstract"
1133
+ return f"{str(self.function)}<{grain_str}>"
1134
+
1135
+ @property
1136
+ def datatype(self):
1137
+ return self.function.datatype
1138
+
1139
+ @property
1140
+ def concept_arguments(self) -> List[BuildConcept]:
1141
+ return self.function.concept_arguments + self.by
1142
+
1143
+ @property
1144
+ def output_datatype(self):
1145
+ return self.function.output_datatype
1146
+
1147
+ @property
1148
+ def output_purpose(self):
1149
+ return self.function.output_purpose
1150
+
1151
+
1152
+ class BuildFilterItem(BuildConceptArgs, BaseModel):
1153
+ content: BuildConcept
1154
+ where: BuildWhereClause
1155
+
1156
+ def __str__(self):
1157
+ return f"<Filter: {str(self.content)} where {str(self.where)}>"
1158
+
1159
+ @property
1160
+ def output_datatype(self):
1161
+ return self.content.datatype
1162
+
1163
+ @property
1164
+ def output_purpose(self):
1165
+ return self.content.purpose
1166
+
1167
+ @property
1168
+ def concept_arguments(self):
1169
+ return [self.content] + self.where.concept_arguments
1170
+
1171
+
1172
+ class BuildRowsetLineage(BuildConceptArgs, BaseModel):
1173
+ name: str
1174
+ derived_concepts: List[BuildConcept]
1175
+ select: SelectLineage | MultiSelectLineage
1176
+
1177
+
1178
+ class BuildRowsetItem(DataTyped, BuildConceptArgs, BaseModel):
1179
+ content: BuildConcept
1180
+ rowset: BuildRowsetLineage
1181
+
1182
+ def __repr__(self):
1183
+ return f"<Rowset<{self.rowset.name}>: {str(self.content)}>"
1184
+
1185
+ def __str__(self):
1186
+ return self.__repr__()
1187
+
1188
+ @property
1189
+ def output(self) -> BuildConcept:
1190
+ return self.content
1191
+
1192
+ @property
1193
+ def output_datatype(self):
1194
+ return self.content.datatype
1195
+
1196
+ @property
1197
+ def output_purpose(self):
1198
+ return self.content.purpose
1199
+
1200
+ @property
1201
+ def concept_arguments(self):
1202
+ return [self.content]
1203
+
1204
+
1205
+ class BuildOrderBy(BaseModel):
1206
+ items: List[BuildOrderItem]
1207
+
1208
+ @property
1209
+ def concept_arguments(self):
1210
+ return [x.expr for x in self.items]
1211
+
1212
+
1213
+ class BuildAlignClause(BaseModel):
1214
+ items: List[BuildAlignItem]
1215
+
1216
+
1217
+ class BuildSelectLineage(BaseModel):
1218
+ selection: List[BuildConcept]
1219
+ hidden_components: set[str]
1220
+ local_concepts: dict[str, BuildConcept]
1221
+ order_by: Optional[BuildOrderBy] = None
1222
+ limit: Optional[int] = None
1223
+ meta: Metadata = Field(default_factory=lambda: Metadata())
1224
+ grain: BuildGrain = Field(default_factory=BuildGrain)
1225
+ where_clause: WhereClause | BuildWhereClause | None = Field(default=None)
1226
+ having_clause: HavingClause | BuildHavingClause | None = Field(default=None)
1227
+
1228
+ @property
1229
+ def output_components(self) -> List[BuildConcept]:
1230
+ return self.selection
1231
+
1232
+
1233
+ class BuildMultiSelectLineage(BuildConceptArgs, BaseModel):
1234
+ selects: List[SelectLineage]
1235
+ grain: BuildGrain
1236
+ align: BuildAlignClause
1237
+ namespace: str
1238
+ order_by: Optional[BuildOrderBy] = None
1239
+ limit: Optional[int] = None
1240
+ where_clause: Union["BuildWhereClause", None] = Field(default=None)
1241
+ having_clause: Union["BuildHavingClause", None] = Field(default=None)
1242
+ local_concepts: dict[str, BuildConcept]
1243
+ build_concept_arguments: list[BuildConcept]
1244
+ build_output_components: list[BuildConcept]
1245
+ hidden_components: set[str]
1246
+
1247
+ @property
1248
+ def derived_concepts(self) -> set[str]:
1249
+ output = set()
1250
+ for item in self.align.items:
1251
+ output.add(item.aligned_concept)
1252
+ return output
1253
+
1254
+ @property
1255
+ def output_components(self) -> list[BuildConcept]:
1256
+ return self.build_output_components
1257
+
1258
+ @property
1259
+ def derived_concept(self) -> set[str]:
1260
+ output = set()
1261
+ for item in self.align.items:
1262
+ output.add(item.aligned_concept)
1263
+ return output
1264
+
1265
+ @property
1266
+ def concept_arguments(self) -> list[BuildConcept]:
1267
+ return self.build_concept_arguments
1268
+
1269
+ # these are needed to help disambiguate between parents
1270
+ def get_merge_concept(self, check: BuildConcept) -> str | None:
1271
+ for item in self.align.items:
1272
+ if check in item.concepts_lcl:
1273
+ return f"{item.namespace}.{item.alias}"
1274
+ return None
1275
+
1276
+ def find_source(self, concept: BuildConcept, cte: CTE | UnionCTE) -> BuildConcept:
1277
+ for x in self.align.items:
1278
+ if concept.name == x.alias:
1279
+ for c in x.concepts:
1280
+ if c.address in cte.output_lcl:
1281
+ return c
1282
+ raise SyntaxError(
1283
+ f"Could not find upstream map for multiselect {str(concept)} on cte ({cte})"
1284
+ )
1285
+
1286
+
1287
+ class BuildAlignItem(BaseModel):
1288
+ alias: str
1289
+ concepts: List[BuildConcept]
1290
+ namespace: str = Field(default=DEFAULT_NAMESPACE, validate_default=True)
1291
+
1292
+ @computed_field # type: ignore
1293
+ @cached_property
1294
+ def concepts_lcl(self) -> LooseBuildConceptList:
1295
+ return LooseBuildConceptList(concepts=self.concepts)
1296
+
1297
+ @property
1298
+ def aligned_concept(self) -> str:
1299
+ return f"{self.namespace}.{self.alias}"
1300
+
1301
+
1302
+ class BuildColumnAssignment(BaseModel):
1303
+ alias: str | RawColumnExpr | BuildFunction
1304
+ concept: BuildConcept
1305
+ modifiers: List[Modifier] = Field(default_factory=list)
1306
+
1307
+ @property
1308
+ def is_complete(self) -> bool:
1309
+ return Modifier.PARTIAL not in self.modifiers
1310
+
1311
+ @property
1312
+ def is_nullable(self) -> bool:
1313
+ return Modifier.NULLABLE in self.modifiers
1314
+
1315
+
1316
+ class BuildDatasource(BaseModel):
1317
+ name: str
1318
+ columns: List[BuildColumnAssignment]
1319
+ address: Union[Address, str]
1320
+ grain: BuildGrain = Field(
1321
+ default_factory=lambda: BuildGrain(components=set()), validate_default=True
1322
+ )
1323
+ namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
1324
+ metadata: DatasourceMetadata = Field(
1325
+ default_factory=lambda: DatasourceMetadata(freshness_concept=None)
1326
+ )
1327
+ where: Optional[BuildWhereClause] = None
1328
+ non_partial_for: Optional[BuildWhereClause] = None
1329
+
1330
+ def __hash__(self):
1331
+ return self.identifier.__hash__()
1332
+
1333
+ def __add__(self, other):
1334
+ if not other == self:
1335
+ raise ValueError(
1336
+ "Attempted to add two datasources that are not identical, this is not a valid operation"
1337
+ )
1338
+ return self
1339
+
1340
+ @property
1341
+ def condition(self):
1342
+ return None
1343
+
1344
+ @property
1345
+ def can_be_inlined(self) -> bool:
1346
+ if isinstance(self.address, Address) and self.address.is_query:
1347
+ return False
1348
+ return True
1349
+
1350
+ @property
1351
+ def identifier(self) -> str:
1352
+ if not self.namespace or self.namespace == DEFAULT_NAMESPACE:
1353
+ return self.name
1354
+ return f"{self.namespace}.{self.name}"
1355
+
1356
+ @property
1357
+ def safe_identifier(self) -> str:
1358
+ return self.identifier.replace(".", "_")
1359
+
1360
+ @property
1361
+ def concepts(self) -> List[BuildConcept]:
1362
+ return [c.concept for c in self.columns]
1363
+
1364
+ @property
1365
+ def group_required(self):
1366
+ return False
1367
+
1368
+ @property
1369
+ def output_concepts(self) -> List[BuildConcept]:
1370
+ return self.concepts
1371
+
1372
+ @property
1373
+ def full_concepts(self) -> List[BuildConcept]:
1374
+ return [c.concept for c in self.columns if Modifier.PARTIAL not in c.modifiers]
1375
+
1376
+ @property
1377
+ def nullable_concepts(self) -> List[BuildConcept]:
1378
+ return [c.concept for c in self.columns if Modifier.NULLABLE in c.modifiers]
1379
+
1380
+ @property
1381
+ def hidden_concepts(self) -> List[BuildConcept]:
1382
+ return [c.concept for c in self.columns if Modifier.HIDDEN in c.modifiers]
1383
+
1384
+ @property
1385
+ def partial_concepts(self) -> List[BuildConcept]:
1386
+ return [c.concept for c in self.columns if Modifier.PARTIAL in c.modifiers]
1387
+
1388
+ def get_alias(
1389
+ self,
1390
+ concept: BuildConcept,
1391
+ use_raw_name: bool = True,
1392
+ force_alias: bool = False,
1393
+ ) -> Optional[str | RawColumnExpr] | BuildFunction:
1394
+ # 2022-01-22
1395
+ # this logic needs to be refined.
1396
+ # if concept.lineage:
1397
+ # # return None
1398
+ for x in self.columns:
1399
+ if x.concept == concept or x.concept.with_grain(concept.grain) == concept:
1400
+ if use_raw_name:
1401
+ return x.alias
1402
+ return concept.safe_address
1403
+ existing = [str(c.concept) for c in self.columns]
1404
+ raise ValueError(
1405
+ f"{LOGGER_PREFIX} Concept {concept} not found on {self.identifier}; have"
1406
+ f" {existing}."
1407
+ )
1408
+
1409
+ @property
1410
+ def safe_location(self) -> str:
1411
+ if isinstance(self.address, Address):
1412
+ return self.address.location
1413
+ return self.address
1414
+
1415
+ @property
1416
+ def output_lcl(self) -> LooseBuildConceptList:
1417
+ return LooseBuildConceptList(concepts=self.output_concepts)
1418
+
1419
+
1420
+ BuildExpr = (
1421
+ BuildWindowItem
1422
+ | BuildFilterItem
1423
+ | BuildConcept
1424
+ | BuildComparison
1425
+ | BuildConditional
1426
+ | BuildParenthetical
1427
+ | BuildFunction
1428
+ | BuildAggregateWrapper
1429
+ | bool
1430
+ | MagicConstants
1431
+ | int
1432
+ | str
1433
+ | float
1434
+ | list
1435
+ )
1436
+
1437
+ BuildConcept.model_rebuild()
1438
+
1439
+
1440
+ class Factory:
1441
+
1442
+ def __init__(
1443
+ self,
1444
+ environment: Environment,
1445
+ local_concepts: dict[str, BuildConcept] | None = None,
1446
+ grain: Grain | None = None,
1447
+ ):
1448
+ self.grain = grain or Grain()
1449
+ self.environment = environment
1450
+ self.local_concepts: dict[str, BuildConcept] = local_concepts or {}
1451
+
1452
+ @singledispatchmethod
1453
+ def build(self, base):
1454
+ raise NotImplementedError("Cannot build {}".format(type(base)))
1455
+
1456
+ @build.register
1457
+ @build.register
1458
+ def _(
1459
+ self,
1460
+ base: (
1461
+ int
1462
+ | str
1463
+ | float
1464
+ | list
1465
+ | date
1466
+ | TupleWrapper
1467
+ | ListWrapper
1468
+ | MagicConstants
1469
+ | MapWrapper
1470
+ | DataType
1471
+ | DatePart
1472
+ | NumericType
1473
+ ),
1474
+ ) -> (
1475
+ int
1476
+ | str
1477
+ | float
1478
+ | list
1479
+ | date
1480
+ | TupleWrapper
1481
+ | ListWrapper
1482
+ | MagicConstants
1483
+ | MapWrapper
1484
+ | DataType
1485
+ | DatePart
1486
+ | NumericType
1487
+ ):
1488
+ return base
1489
+
1490
+ @build.register
1491
+ def _(self, base: None) -> None:
1492
+ return base
1493
+
1494
+ @build.register
1495
+ def _(self, base: Function) -> BuildFunction:
1496
+
1497
+ new = BuildFunction(
1498
+ operator=base.operator,
1499
+ arguments=[self.build(c) for c in base.arguments],
1500
+ output_datatype=base.output_datatype,
1501
+ output_purpose=base.output_purpose,
1502
+ valid_inputs=base.valid_inputs,
1503
+ arg_count=base.arg_count,
1504
+ )
1505
+ return new
1506
+
1507
+ @build.register
1508
+ def _(self, base: ConceptRef) -> BuildConcept:
1509
+ if base.address in self.local_concepts:
1510
+ full = self.local_concepts[base.address]
1511
+ if isinstance(full, BuildConcept):
1512
+ return full
1513
+ raw = self.environment.concepts[base.address]
1514
+ return self.build(raw)
1515
+
1516
+ @build.register
1517
+ def _(self, base: CaseWhen) -> BuildCaseWhen:
1518
+ return BuildCaseWhen(
1519
+ comparison=self.build(base.comparison),
1520
+ expr=(self.build(base.expr)),
1521
+ )
1522
+
1523
+ @build.register
1524
+ def _(self, base: CaseElse) -> BuildCaseElse:
1525
+ return BuildCaseElse(expr=self.build(base.expr))
1526
+
1527
+ @build.register
1528
+ def _(self, base: Concept) -> BuildConcept:
1529
+ if base.address in self.local_concepts:
1530
+ return self.local_concepts[base.address]
1531
+ new_lineage, final_grain, _ = base.get_select_grain_and_keys(
1532
+ self.grain, self.environment
1533
+ )
1534
+ build_lineage = self.build(new_lineage)
1535
+ derivation = Concept.calculate_derivation(build_lineage, base.purpose)
1536
+ granularity = Concept.calculate_granularity(
1537
+ derivation, final_grain, build_lineage
1538
+ )
1539
+ is_aggregate = Concept.calculate_is_aggregate(build_lineage)
1540
+ rval = BuildConcept(
1541
+ name=base.name,
1542
+ datatype=base.datatype,
1543
+ purpose=base.purpose,
1544
+ metadata=base.metadata,
1545
+ lineage=build_lineage,
1546
+ grain=self.build(final_grain),
1547
+ namespace=base.namespace,
1548
+ keys=base.keys,
1549
+ modifiers=base.modifiers,
1550
+ pseudonyms=base.pseudonyms,
1551
+ ## instantiated values
1552
+ derivation=derivation,
1553
+ granularity=granularity,
1554
+ build_is_aggregate=is_aggregate,
1555
+ )
1556
+ return rval
1557
+
1558
+ @build.register
1559
+ def _(self, base: AggregateWrapper) -> BuildAggregateWrapper:
1560
+
1561
+ if not base.by:
1562
+ by = [
1563
+ self.build(self.environment.concepts[c]) for c in self.grain.components
1564
+ ]
1565
+ else:
1566
+ by = [self.build(x) for x in base.by]
1567
+ parent = self.build(base.function)
1568
+ return BuildAggregateWrapper(function=parent, by=by)
1569
+
1570
+ @build.register
1571
+ def _(self, base: ColumnAssignment) -> BuildColumnAssignment:
1572
+
1573
+ return BuildColumnAssignment(
1574
+ alias=(
1575
+ self.build(base.alias)
1576
+ if isinstance(base.alias, Function)
1577
+ else base.alias
1578
+ ),
1579
+ concept=self.build(
1580
+ self.environment.concepts[base.concept.address].with_grain(self.grain)
1581
+ ),
1582
+ modifiers=base.modifiers,
1583
+ )
1584
+
1585
+ @build.register
1586
+ def _(self, base: OrderBy) -> BuildOrderBy:
1587
+ return BuildOrderBy(items=[self.build(x) for x in base.items])
1588
+
1589
+ @build.register
1590
+ def _(self, base: OrderItem) -> BuildOrderItem:
1591
+ return BuildOrderItem(
1592
+ expr=(self.build(base.expr)),
1593
+ order=base.order,
1594
+ )
1595
+
1596
+ @build.register
1597
+ def _(self, base: WhereClause) -> BuildWhereClause:
1598
+ return BuildWhereClause(conditional=self.build(base.conditional))
1599
+
1600
+ @build.register
1601
+ def _(self, base: HavingClause) -> BuildHavingClause:
1602
+ return BuildHavingClause(conditional=self.build(base.conditional))
1603
+
1604
+ @build.register
1605
+ def _(self, base: WindowItem) -> BuildWindowItem:
1606
+ return BuildWindowItem(
1607
+ type=base.type,
1608
+ content=self.build(base.content),
1609
+ order_by=[self.build(x) for x in base.order_by],
1610
+ over=[self.build(x) for x in base.over],
1611
+ index=base.index,
1612
+ )
1613
+
1614
+ @build.register
1615
+ def _(self, base: Conditional) -> BuildConditional:
1616
+ return BuildConditional(
1617
+ left=(self.build(base.left)),
1618
+ right=(self.build(base.right)),
1619
+ operator=base.operator,
1620
+ )
1621
+
1622
+ @build.register
1623
+ def _(self, base: SubselectComparison) -> BuildSubselectComparison:
1624
+ return BuildSubselectComparison(
1625
+ left=(self.build(base.left)),
1626
+ right=(self.build(base.right)),
1627
+ operator=base.operator,
1628
+ )
1629
+
1630
+ @build.register
1631
+ def _(self, base: Comparison) -> BuildComparison:
1632
+ return BuildComparison(
1633
+ left=(self.build(base.left)),
1634
+ right=(self.build(base.right)),
1635
+ operator=base.operator,
1636
+ )
1637
+
1638
+ @build.register
1639
+ def _(self, base: AlignItem) -> BuildAlignItem:
1640
+ return BuildAlignItem(
1641
+ alias=base.alias,
1642
+ concepts=[self.build(x) for x in base.concepts],
1643
+ namespace=base.namespace,
1644
+ )
1645
+
1646
+ @build.register
1647
+ def _(self, base: AlignClause) -> BuildAlignClause:
1648
+ return BuildAlignClause(items=[self.build(x) for x in base.items])
1649
+
1650
+ @build.register
1651
+ def _(self, base: RowsetItem):
1652
+
1653
+ factory = Factory(
1654
+ environment=self.environment,
1655
+ local_concepts={},
1656
+ grain=base.rowset.select.grain,
1657
+ )
1658
+ return BuildRowsetItem(
1659
+ content=factory.build(base.content), rowset=factory.build(base.rowset)
1660
+ )
1661
+
1662
+ @build.register
1663
+ def _(self, base: RowsetLineage) -> BuildRowsetLineage:
1664
+ out = BuildRowsetLineage(
1665
+ name=base.name,
1666
+ derived_concepts=[],
1667
+ select=base.select,
1668
+ )
1669
+ return out
1670
+
1671
+ @build.register
1672
+ def _(self, base: Grain) -> BuildGrain:
1673
+ if base.where_clause:
1674
+ factory = Factory(environment=self.environment)
1675
+ where = factory.build(base.where_clause)
1676
+ else:
1677
+ where = None
1678
+ return BuildGrain(components=base.components, where_clause=where)
1679
+
1680
+ @build.register
1681
+ def _(self, base: FilterItem) -> BuildFilterItem:
1682
+ return BuildFilterItem(
1683
+ content=self.build(base.content), where=self.build(base.where)
1684
+ )
1685
+
1686
+ @build.register
1687
+ def _(self, base: Parenthetical) -> BuildParenthetical:
1688
+ return BuildParenthetical(content=(self.build(base.content)))
1689
+
1690
+ @build.register
1691
+ def _(self, base: SelectLineage) -> BuildSelectLineage:
1692
+
1693
+ from trilogy.core.models.build import (
1694
+ BuildSelectLineage,
1695
+ Factory,
1696
+ )
1697
+
1698
+ materialized: dict[str, BuildConcept] = {}
1699
+ factory = Factory(
1700
+ grain=base.grain, environment=self.environment, local_concepts=materialized
1701
+ )
1702
+ for k, v in base.local_concepts.items():
1703
+ materialized[k] = factory.build(v)
1704
+ where_factory = Factory(
1705
+ grain=Grain(), environment=self.environment, local_concepts={}
1706
+ )
1707
+
1708
+ final: List[BuildConcept] = []
1709
+ for original in base.selection:
1710
+ new = original
1711
+ # we don't know the grain of an aggregate at assignment time
1712
+ # so rebuild at this point in the tree
1713
+ # TODO: simplify
1714
+ if new.address in materialized:
1715
+ built = materialized[new.address]
1716
+ else:
1717
+ # Sometimes cached values here don't have the latest info
1718
+ # but we can't just use environment, as it might not have the right grain.
1719
+ built = factory.build(new)
1720
+ materialized[new.address] = built
1721
+ final.append(built)
1722
+ return BuildSelectLineage(
1723
+ selection=final,
1724
+ hidden_components=base.hidden_components,
1725
+ order_by=(factory.build(base.order_by) if base.order_by else None),
1726
+ limit=base.limit,
1727
+ meta=base.meta,
1728
+ local_concepts=materialized,
1729
+ grain=self.build(base.grain),
1730
+ having_clause=(
1731
+ factory.build(base.having_clause) if base.having_clause else None
1732
+ ),
1733
+ # this uses a different grain factory
1734
+ where_clause=(
1735
+ where_factory.build(base.where_clause) if base.where_clause else None
1736
+ ),
1737
+ )
1738
+
1739
+ @build.register
1740
+ def _(self, base: MultiSelectLineage) -> BuildMultiSelectLineage:
1741
+
1742
+ local_build_cache: dict[str, BuildConcept] = {}
1743
+
1744
+ parents: list[BuildSelectLineage] = [self.build(x) for x in base.selects]
1745
+ base_local = parents[0].local_concepts
1746
+
1747
+ for select in parents[1:]:
1748
+ for k, v in select.local_concepts.items():
1749
+ base_local[k] = v
1750
+
1751
+ # this requires custom handling to avoid circular dependencies
1752
+ final_grain = self.build(base.grain)
1753
+ derived_base = []
1754
+ for k in base.derived_concepts:
1755
+ base_concept = self.environment.concepts[k]
1756
+ x = BuildConcept(
1757
+ name=base_concept.name,
1758
+ datatype=base_concept.datatype,
1759
+ purpose=base_concept.purpose,
1760
+ build_is_aggregate=False,
1761
+ derivation=Derivation.MULTISELECT,
1762
+ lineage=None,
1763
+ grain=final_grain,
1764
+ namespace=base_concept.namespace,
1765
+ )
1766
+ local_build_cache[k] = x
1767
+ derived_base.append(x)
1768
+ all_input: list[BuildConcept] = []
1769
+ for parent in parents:
1770
+ all_input += parent.output_components
1771
+ all_output: list[BuildConcept] = derived_base + all_input
1772
+ final: list[BuildConcept] = [
1773
+ x for x in all_output if x.address not in base.hidden_components
1774
+ ]
1775
+ factory = Factory(
1776
+ grain=base.grain,
1777
+ environment=self.environment,
1778
+ local_concepts=local_build_cache,
1779
+ )
1780
+ where_factory = Factory(environment=self.environment)
1781
+ lineage = BuildMultiSelectLineage(
1782
+ # we don't build selects here; they'll be built automatically in query discovery
1783
+ selects=base.selects,
1784
+ grain=final_grain,
1785
+ align=factory.build(base.align),
1786
+ # self.align.with_select_context(
1787
+ # local_build_cache, self.grain, environment
1788
+ # ),
1789
+ namespace=base.namespace,
1790
+ hidden_components=base.hidden_components,
1791
+ order_by=factory.build(base.order_by) if base.order_by else None,
1792
+ limit=base.limit,
1793
+ where_clause=(
1794
+ where_factory.build(base.where_clause) if base.where_clause else None
1795
+ ),
1796
+ having_clause=(
1797
+ factory.build(base.having_clause) if base.having_clause else None
1798
+ ),
1799
+ local_concepts=base_local,
1800
+ build_output_components=final,
1801
+ build_concept_arguments=all_input,
1802
+ )
1803
+ for k in base.derived_concepts:
1804
+ local_build_cache[k].lineage = lineage
1805
+ return lineage
1806
+
1807
+ @build.register
1808
+ def _(self, base: Environment):
1809
+ from trilogy.core.models.build_environment import BuildEnvironment
1810
+
1811
+ new = BuildEnvironment(
1812
+ namespace=base.namespace,
1813
+ cte_name_map=base.cte_name_map,
1814
+ )
1815
+
1816
+ for k, v in base.concepts.items():
1817
+ new.concepts[k] = self.build(v)
1818
+ for (
1819
+ k,
1820
+ d,
1821
+ ) in base.datasources.items():
1822
+ new.datasources[k] = self.build(d)
1823
+ for k, a in base.alias_origin_lookup.items():
1824
+ new.alias_origin_lookup[k] = self.build(a)
1825
+ new.gen_concept_list_caches()
1826
+ return new
1827
+
1828
+ @build.register
1829
+ def _(self, base: Datasource):
1830
+ local_cache: dict[str, BuildConcept] = {}
1831
+ factory = Factory(
1832
+ grain=base.grain, environment=self.environment, local_concepts=local_cache
1833
+ )
1834
+ return BuildDatasource(
1835
+ name=base.name,
1836
+ columns=[factory.build(c) for c in base.columns],
1837
+ address=base.address,
1838
+ grain=factory.build(base.grain),
1839
+ namespace=base.namespace,
1840
+ metadata=base.metadata,
1841
+ where=(factory.build(base.where) if base.where else None),
1842
+ non_partial_for=(
1843
+ factory.build(base.non_partial_for) if base.non_partial_for else None
1844
+ ),
1845
+ )