pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (76) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1859 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +701 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +178 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +55 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +18 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +70 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +148 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/__init__.py +4 -0
  65. trilogy/hooks/base_hook.py +6 -4
  66. trilogy/hooks/query_debugger.py +113 -97
  67. trilogy/parser.py +1 -1
  68. trilogy/parsing/common.py +307 -64
  69. trilogy/parsing/parse_engine.py +277 -618
  70. trilogy/parsing/render.py +50 -26
  71. trilogy/scripts/trilogy.py +2 -1
  72. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  73. trilogy/core/models.py +0 -4960
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
  76. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1859 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC
4
+ from datetime import date, datetime
5
+ from functools import cached_property, singledispatchmethod
6
+ from typing import (
7
+ TYPE_CHECKING,
8
+ Any,
9
+ Iterable,
10
+ List,
11
+ Optional,
12
+ Self,
13
+ Sequence,
14
+ Set,
15
+ Tuple,
16
+ Union,
17
+ )
18
+
19
+ from pydantic import (
20
+ BaseModel,
21
+ ConfigDict,
22
+ Field,
23
+ ValidationInfo,
24
+ computed_field,
25
+ field_validator,
26
+ )
27
+
28
+ from trilogy.constants import DEFAULT_NAMESPACE, MagicConstants
29
+ from trilogy.core.constants import ALL_ROWS_CONCEPT
30
+ from trilogy.core.enums import (
31
+ BooleanOperator,
32
+ ComparisonOperator,
33
+ DatePart,
34
+ Derivation,
35
+ FunctionClass,
36
+ FunctionType,
37
+ Granularity,
38
+ Modifier,
39
+ Ordering,
40
+ Purpose,
41
+ WindowType,
42
+ )
43
+ from trilogy.core.models.author import (
44
+ AggregateWrapper,
45
+ AlignClause,
46
+ AlignItem,
47
+ CaseElse,
48
+ CaseWhen,
49
+ Comparison,
50
+ Concept,
51
+ ConceptRef,
52
+ Conditional,
53
+ FilterItem,
54
+ Function,
55
+ Grain,
56
+ HavingClause,
57
+ Metadata,
58
+ MultiSelectLineage,
59
+ OrderBy,
60
+ OrderItem,
61
+ Parenthetical,
62
+ RowsetItem,
63
+ RowsetLineage,
64
+ SelectLineage,
65
+ SubselectComparison,
66
+ WhereClause,
67
+ WindowItem,
68
+ )
69
+ from trilogy.core.models.core import (
70
+ Addressable,
71
+ DataType,
72
+ DataTyped,
73
+ ListType,
74
+ ListWrapper,
75
+ MapType,
76
+ MapWrapper,
77
+ NumericType,
78
+ StructType,
79
+ TupleWrapper,
80
+ arg_to_datatype,
81
+ )
82
+ from trilogy.core.models.datasource import (
83
+ Address,
84
+ ColumnAssignment,
85
+ Datasource,
86
+ DatasourceMetadata,
87
+ RawColumnExpr,
88
+ )
89
+ from trilogy.core.models.environment import Environment
90
+ from trilogy.utility import unique
91
+
92
+ # TODO: refactor to avoid these
93
+ if TYPE_CHECKING:
94
+ from trilogy.core.models.build_environment import BuildEnvironment
95
+ from trilogy.core.models.execute import CTE, UnionCTE
96
+
97
+ LOGGER_PREFIX = "[MODELS_BUILD]"
98
+
99
+
100
+ class BuildConceptArgs(ABC):
101
+ @property
102
+ def concept_arguments(self) -> Sequence["BuildConcept"]:
103
+ raise NotImplementedError
104
+
105
+ @property
106
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
107
+ return []
108
+
109
+ @property
110
+ def row_arguments(self) -> Sequence["BuildConcept"]:
111
+ return self.concept_arguments
112
+
113
+
114
+ def concept_is_relevant(
115
+ concept: BuildConcept,
116
+ others: list[BuildConcept],
117
+ ) -> bool:
118
+
119
+ if concept.is_aggregate and not (
120
+ isinstance(concept.lineage, BuildAggregateWrapper) and concept.lineage.by
121
+ ):
122
+
123
+ return False
124
+ if concept.purpose in (Purpose.PROPERTY, Purpose.METRIC) and concept.keys:
125
+ if any([c in others for c in concept.keys]):
126
+
127
+ return False
128
+ if concept.purpose in (Purpose.METRIC,):
129
+ if all([c in others for c in concept.grain.components]):
130
+ return False
131
+ if concept.derivation in (Derivation.BASIC,):
132
+
133
+ return any(concept_is_relevant(c, others) for c in concept.concept_arguments)
134
+ if concept.granularity == Granularity.SINGLE_ROW:
135
+ return False
136
+ return True
137
+
138
+
139
+ def concepts_to_build_grain_concepts(
140
+ concepts: Iterable[BuildConcept | str], environment: "BuildEnvironment" | None
141
+ ) -> list[BuildConcept]:
142
+ pconcepts = []
143
+ for c in concepts:
144
+ if isinstance(c, BuildConcept):
145
+ pconcepts.append(c)
146
+ elif environment:
147
+ pconcepts.append(environment.concepts[c])
148
+ else:
149
+ raise ValueError(
150
+ f"Unable to resolve input {c} without environment provided to concepts_to_grain call"
151
+ )
152
+
153
+ final: List[BuildConcept] = []
154
+ for sub in pconcepts:
155
+ if not concept_is_relevant(sub, pconcepts):
156
+ continue
157
+ final.append(sub)
158
+ final = unique(final, "address")
159
+ v2 = sorted(final, key=lambda x: x.name)
160
+ return v2
161
+
162
+
163
+ class LooseBuildConceptList(BaseModel):
164
+ concepts: Sequence[BuildConcept]
165
+
166
+ @cached_property
167
+ def addresses(self) -> set[str]:
168
+ return {s.address for s in self.concepts}
169
+
170
+ @classmethod
171
+ def validate(cls, v):
172
+ return cls(v)
173
+
174
+ @cached_property
175
+ def sorted_addresses(self) -> List[str]:
176
+ return sorted(list(self.addresses))
177
+
178
+ def __str__(self) -> str:
179
+ return f"lcl{str(self.sorted_addresses)}"
180
+
181
+ def __iter__(self):
182
+ return iter(self.concepts)
183
+
184
+ def __eq__(self, other):
185
+ if not isinstance(other, LooseBuildConceptList):
186
+ return False
187
+ return self.addresses == other.addresses
188
+
189
+ def issubset(self, other):
190
+ if not isinstance(other, LooseBuildConceptList):
191
+ return False
192
+ return self.addresses.issubset(other.addresses)
193
+
194
+ def __contains__(self, other):
195
+ if isinstance(other, str):
196
+ return other in self.addresses
197
+ if not isinstance(other, BuildConcept):
198
+ return False
199
+ return other.address in self.addresses
200
+
201
+ def difference(self, other):
202
+ if not isinstance(other, LooseBuildConceptList):
203
+ return False
204
+ return self.addresses.difference(other.addresses)
205
+
206
+ def isdisjoint(self, other):
207
+ if not isinstance(other, LooseBuildConceptList):
208
+ return False
209
+ return self.addresses.isdisjoint(other.addresses)
210
+
211
+
212
+ class ConstantInlineable(ABC):
213
+
214
+ def inline_constant(self, concept: BuildConcept):
215
+ raise NotImplementedError
216
+
217
+
218
+ def get_concept_row_arguments(expr) -> List["BuildConcept"]:
219
+ output = []
220
+ if isinstance(expr, BuildConcept):
221
+ output += [expr]
222
+
223
+ elif isinstance(expr, BuildConceptArgs):
224
+ output += expr.row_arguments
225
+ return output
226
+
227
+
228
+ def get_concept_arguments(expr) -> List["BuildConcept"]:
229
+ output = []
230
+ if isinstance(expr, BuildConcept):
231
+ output += [expr]
232
+
233
+ elif isinstance(
234
+ expr,
235
+ BuildConceptArgs,
236
+ ):
237
+ output += expr.concept_arguments
238
+ return output
239
+
240
+
241
+ class BuildGrain(BaseModel):
242
+ components: set[str] = Field(default_factory=set)
243
+ where_clause: Optional[BuildWhereClause] = None
244
+
245
+ def __init__(self, **kwargs):
246
+ super().__init__(**kwargs)
247
+
248
+ def without_condition(self):
249
+ return BuildGrain(components=self.components)
250
+
251
+ @classmethod
252
+ def from_concepts(
253
+ cls,
254
+ concepts: Iterable[BuildConcept | str],
255
+ environment: BuildEnvironment | None = None,
256
+ where_clause: BuildWhereClause | None = None,
257
+ ) -> "BuildGrain":
258
+
259
+ return BuildGrain(
260
+ components={
261
+ c.address
262
+ for c in concepts_to_build_grain_concepts(
263
+ concepts, environment=environment
264
+ )
265
+ },
266
+ where_clause=where_clause,
267
+ )
268
+
269
+ @field_validator("components", mode="before")
270
+ def component_validator(cls, v, info: ValidationInfo):
271
+ output = set()
272
+ if isinstance(v, list):
273
+ for vc in v:
274
+ if isinstance(vc, BuildConcept):
275
+ output.add(vc.address)
276
+ else:
277
+ output.add(vc)
278
+ else:
279
+ output = v
280
+ if not isinstance(output, set):
281
+ raise ValueError(f"Invalid grain component {output}, is not set")
282
+ if not all(isinstance(x, str) for x in output):
283
+ raise ValueError(f"Invalid component {output}")
284
+ return output
285
+
286
+ def __add__(self, other: "BuildGrain") -> "BuildGrain":
287
+ if not other:
288
+ return self
289
+ where = self.where_clause
290
+ if other.where_clause:
291
+ if not self.where_clause:
292
+ where = other.where_clause
293
+ elif not other.where_clause == self.where_clause:
294
+ where = BuildWhereClause(
295
+ conditional=BuildConditional(
296
+ left=self.where_clause.conditional,
297
+ right=other.where_clause.conditional,
298
+ operator=BooleanOperator.AND,
299
+ )
300
+ )
301
+ # raise NotImplementedError(
302
+ # f"Cannot merge grains with where clauses, self {self.where_clause} other {other.where_clause}"
303
+ # )
304
+ return BuildGrain(
305
+ components=self.components.union(other.components), where_clause=where
306
+ )
307
+
308
+ def __sub__(self, other: "BuildGrain") -> "BuildGrain":
309
+ return BuildGrain(
310
+ components=self.components.difference(other.components),
311
+ where_clause=self.where_clause,
312
+ )
313
+
314
+ @property
315
+ def abstract(self):
316
+ return not self.components or all(
317
+ [c.endswith(ALL_ROWS_CONCEPT) for c in self.components]
318
+ )
319
+
320
+ def __eq__(self, other: object):
321
+ if isinstance(other, list):
322
+ if all([isinstance(c, BuildConcept) for c in other]):
323
+ return self.components == set([c.address for c in other])
324
+ return False
325
+ if not isinstance(other, BuildGrain):
326
+ return False
327
+ if self.components == other.components:
328
+ return True
329
+ return False
330
+
331
+ def issubset(self, other: "BuildGrain"):
332
+ return self.components.issubset(other.components)
333
+
334
+ def union(self, other: "BuildGrain"):
335
+ addresses = self.components.union(other.components)
336
+ return BuildGrain(components=addresses, where_clause=self.where_clause)
337
+
338
+ def isdisjoint(self, other: "BuildGrain"):
339
+ return self.components.isdisjoint(other.components)
340
+
341
+ def intersection(self, other: "BuildGrain") -> "BuildGrain":
342
+ intersection = self.components.intersection(other.components)
343
+ return BuildGrain(components=intersection)
344
+
345
+ def __str__(self):
346
+ if self.abstract:
347
+ base = "Grain<Abstract>"
348
+ else:
349
+ base = "Grain<" + ",".join([c for c in sorted(list(self.components))]) + ">"
350
+ if self.where_clause:
351
+ base += f"|{str(self.where_clause)}"
352
+ return base
353
+
354
+ def __radd__(self, other) -> "BuildGrain":
355
+ if other == 0:
356
+ return self
357
+ else:
358
+ return self.__add__(other)
359
+
360
+
361
+ class BuildParenthetical(DataTyped, ConstantInlineable, BuildConceptArgs, BaseModel):
362
+ content: "BuildExpr"
363
+
364
+ def __add__(self, other) -> Union["BuildParenthetical", "BuildConditional"]:
365
+ if other is None:
366
+ return self
367
+ elif isinstance(other, (BuildComparison, BuildConditional, BuildParenthetical)):
368
+ return BuildConditional(
369
+ left=self, right=other, operator=BooleanOperator.AND
370
+ )
371
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
372
+
373
+ def __str__(self):
374
+ return self.__repr__()
375
+
376
+ def __repr__(self):
377
+ return f"({str(self.content)})"
378
+
379
+ def inline_constant(self, BuildConcept: BuildConcept):
380
+ return BuildParenthetical(
381
+ content=(
382
+ self.content.inline_constant(BuildConcept)
383
+ if isinstance(self.content, ConstantInlineable)
384
+ else self.content
385
+ )
386
+ )
387
+
388
+ @property
389
+ def concept_arguments(self) -> List[BuildConcept]:
390
+ base: List[BuildConcept] = []
391
+ x = self.content
392
+ if isinstance(x, BuildConcept):
393
+ base += [x]
394
+ elif isinstance(x, BuildConceptArgs):
395
+ base += x.concept_arguments
396
+ return base
397
+
398
+ @property
399
+ def row_arguments(self) -> Sequence[BuildConcept]:
400
+ if isinstance(self.content, BuildConceptArgs):
401
+ return self.content.row_arguments
402
+ return self.concept_arguments
403
+
404
+ @property
405
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
406
+ if isinstance(self.content, BuildConceptArgs):
407
+ return self.content.existence_arguments
408
+ return []
409
+
410
+ @property
411
+ def output_datatype(self):
412
+ return arg_to_datatype(self.content)
413
+
414
+
415
+ class BuildConditional(BuildConceptArgs, ConstantInlineable, BaseModel):
416
+ left: Union[
417
+ int,
418
+ str,
419
+ float,
420
+ list,
421
+ bool,
422
+ MagicConstants,
423
+ BuildConcept,
424
+ BuildComparison,
425
+ BuildConditional,
426
+ BuildParenthetical,
427
+ BuildSubselectComparison,
428
+ BuildFunction,
429
+ BuildFilterItem,
430
+ ]
431
+ right: Union[
432
+ int,
433
+ str,
434
+ float,
435
+ list,
436
+ bool,
437
+ MagicConstants,
438
+ BuildConcept,
439
+ BuildComparison,
440
+ BuildConditional,
441
+ BuildParenthetical,
442
+ BuildSubselectComparison,
443
+ BuildFunction,
444
+ BuildFilterItem,
445
+ ]
446
+ operator: BooleanOperator
447
+
448
+ def __add__(self, other) -> "BuildConditional":
449
+ if other is None:
450
+ return self
451
+ elif str(other) == str(self):
452
+ return self
453
+ elif isinstance(other, (BuildComparison, BuildConditional, BuildParenthetical)):
454
+ return BuildConditional(
455
+ left=self, right=other, operator=BooleanOperator.AND
456
+ )
457
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
458
+
459
+ def __str__(self):
460
+ return self.__repr__()
461
+
462
+ def __repr__(self):
463
+ return f"{str(self.left)} {self.operator.value} {str(self.right)}"
464
+
465
+ def __eq__(self, other):
466
+ if not isinstance(other, BuildConditional):
467
+ return False
468
+ return (
469
+ self.left == other.left
470
+ and self.right == other.right
471
+ and self.operator == other.operator
472
+ )
473
+
474
+ def inline_constant(self, constant: BuildConcept) -> "BuildConditional":
475
+ assert isinstance(constant.lineage, BuildFunction)
476
+ new_val = constant.lineage.arguments[0]
477
+ if isinstance(self.left, ConstantInlineable):
478
+ new_left = self.left.inline_constant(constant)
479
+ elif (
480
+ isinstance(self.left, BuildConcept)
481
+ and self.left.address == constant.address
482
+ ):
483
+ new_left = new_val
484
+ else:
485
+ new_left = self.left
486
+
487
+ if isinstance(self.right, ConstantInlineable):
488
+ new_right = self.right.inline_constant(constant)
489
+ elif (
490
+ isinstance(self.right, BuildConcept)
491
+ and self.right.address == constant.address
492
+ ):
493
+ new_right = new_val
494
+ else:
495
+ new_right = self.right
496
+
497
+ if self.right == constant:
498
+ new_right = new_val
499
+
500
+ return BuildConditional(
501
+ left=new_left,
502
+ right=new_right,
503
+ operator=self.operator,
504
+ )
505
+
506
+ @property
507
+ def concept_arguments(self) -> List[BuildConcept]:
508
+ """Return BuildConcepts directly referenced in where clause"""
509
+ output = []
510
+ output += get_concept_arguments(self.left)
511
+ output += get_concept_arguments(self.right)
512
+ return output
513
+
514
+ @property
515
+ def row_arguments(self) -> List[BuildConcept]:
516
+ output = []
517
+ output += get_concept_row_arguments(self.left)
518
+ output += get_concept_row_arguments(self.right)
519
+ return output
520
+
521
+ @property
522
+ def existence_arguments(self) -> list[tuple[BuildConcept, ...]]:
523
+ output: list[tuple[BuildConcept, ...]] = []
524
+ if isinstance(self.left, BuildConceptArgs):
525
+ output += self.left.existence_arguments
526
+ if isinstance(self.right, BuildConceptArgs):
527
+ output += self.right.existence_arguments
528
+ return output
529
+
530
+ def decompose(self):
531
+ chunks = []
532
+ if self.operator == BooleanOperator.AND:
533
+ for val in [self.left, self.right]:
534
+ if isinstance(val, BuildConditional):
535
+ chunks.extend(val.decompose())
536
+ else:
537
+ chunks.append(val)
538
+ else:
539
+ chunks.append(self)
540
+ return chunks
541
+
542
+
543
+ class BuildWhereClause(BuildConceptArgs, BaseModel):
544
+ conditional: Union[
545
+ BuildSubselectComparison,
546
+ BuildComparison,
547
+ BuildConditional,
548
+ BuildParenthetical,
549
+ ]
550
+
551
+ def __eq__(self, other):
552
+ if not isinstance(other, (BuildWhereClause, WhereClause)):
553
+ return False
554
+ return self.conditional == other.conditional
555
+
556
+ def __repr__(self):
557
+ return str(self.conditional)
558
+
559
+ def __str__(self):
560
+ return self.__repr__()
561
+
562
+ @property
563
+ def concept_arguments(self) -> List[BuildConcept]:
564
+ return self.conditional.concept_arguments
565
+
566
+ @property
567
+ def row_arguments(self) -> Sequence[BuildConcept]:
568
+ return self.conditional.row_arguments
569
+
570
+ @property
571
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
572
+ return self.conditional.existence_arguments
573
+
574
+
575
+ class BuildHavingClause(BuildWhereClause):
576
+ pass
577
+
578
+
579
+ class BuildComparison(BuildConceptArgs, ConstantInlineable, BaseModel):
580
+
581
+ left: Union[
582
+ int,
583
+ str,
584
+ float,
585
+ list,
586
+ bool,
587
+ datetime,
588
+ date,
589
+ BuildFunction,
590
+ BuildConcept,
591
+ BuildConditional,
592
+ DataType,
593
+ BuildComparison,
594
+ BuildParenthetical,
595
+ MagicConstants,
596
+ BuildWindowItem,
597
+ BuildAggregateWrapper,
598
+ ]
599
+ right: Union[
600
+ int,
601
+ str,
602
+ float,
603
+ list,
604
+ bool,
605
+ date,
606
+ datetime,
607
+ BuildConcept,
608
+ BuildFunction,
609
+ BuildConditional,
610
+ DataType,
611
+ BuildComparison,
612
+ BuildParenthetical,
613
+ MagicConstants,
614
+ BuildWindowItem,
615
+ BuildAggregateWrapper,
616
+ TupleWrapper,
617
+ ]
618
+ operator: ComparisonOperator
619
+
620
+ def __init__(self, *args, **kwargs) -> None:
621
+ super().__init__(*args, **kwargs)
622
+
623
+ def __add__(self, other):
624
+ if other is None:
625
+ return self
626
+ if not isinstance(
627
+ other, (BuildComparison, BuildConditional, BuildParenthetical)
628
+ ):
629
+ raise ValueError(f"Cannot add {type(other)} to {__class__}")
630
+ if other == self:
631
+ return self
632
+ return BuildConditional(left=self, right=other, operator=BooleanOperator.AND)
633
+
634
+ def __repr__(self):
635
+ if isinstance(self.left, BuildConcept):
636
+ left = self.left.address
637
+ else:
638
+ left = str(self.left)
639
+ if isinstance(self.right, BuildConcept):
640
+ right = self.right.address
641
+ else:
642
+ right = str(self.right)
643
+ return f"{left} {self.operator.value} {right}"
644
+
645
+ def __str__(self):
646
+ return self.__repr__()
647
+
648
+ def __eq__(self, other):
649
+ if not isinstance(other, BuildComparison):
650
+ return False
651
+ return (
652
+ self.left == other.left
653
+ and self.right == other.right
654
+ and self.operator == other.operator
655
+ )
656
+
657
+ def inline_constant(self, constant: BuildConcept):
658
+ assert isinstance(constant.lineage, BuildFunction)
659
+ new_val = constant.lineage.arguments[0]
660
+ if isinstance(self.left, ConstantInlineable):
661
+ new_left = self.left.inline_constant(constant)
662
+ elif (
663
+ isinstance(self.left, BuildConcept)
664
+ and self.left.address == constant.address
665
+ ):
666
+ new_left = new_val
667
+ else:
668
+ new_left = self.left
669
+ if isinstance(self.right, ConstantInlineable):
670
+ new_right = self.right.inline_constant(constant)
671
+ elif (
672
+ isinstance(self.right, BuildConcept)
673
+ and self.right.address == constant.address
674
+ ):
675
+ new_right = new_val
676
+ else:
677
+ new_right = self.right
678
+
679
+ return self.__class__(
680
+ left=new_left,
681
+ right=new_right,
682
+ operator=self.operator,
683
+ )
684
+
685
+ @property
686
+ def concept_arguments(self) -> List[BuildConcept]:
687
+ """Return BuildConcepts directly referenced in where clause"""
688
+ output = []
689
+ output += get_concept_arguments(self.left)
690
+ output += get_concept_arguments(self.right)
691
+ return output
692
+
693
+ @property
694
+ def row_arguments(self) -> List[BuildConcept]:
695
+ output = []
696
+ output += get_concept_row_arguments(self.left)
697
+ output += get_concept_row_arguments(self.right)
698
+ return output
699
+
700
+ @property
701
+ def existence_arguments(self) -> List[Tuple[BuildConcept, ...]]:
702
+ """Return BuildConcepts directly referenced in where clause"""
703
+ output: List[Tuple[BuildConcept, ...]] = []
704
+ if isinstance(self.left, BuildConceptArgs):
705
+ output += self.left.existence_arguments
706
+ if isinstance(self.right, BuildConceptArgs):
707
+ output += self.right.existence_arguments
708
+ return output
709
+
710
+
711
+ class BuildSubselectComparison(BuildComparison):
712
+ left: Union[
713
+ int,
714
+ str,
715
+ float,
716
+ list,
717
+ bool,
718
+ datetime,
719
+ date,
720
+ BuildFunction,
721
+ BuildConcept,
722
+ "BuildConditional",
723
+ DataType,
724
+ "BuildComparison",
725
+ "BuildParenthetical",
726
+ MagicConstants,
727
+ BuildWindowItem,
728
+ BuildAggregateWrapper,
729
+ ]
730
+ right: Union[
731
+ int,
732
+ str,
733
+ float,
734
+ list,
735
+ bool,
736
+ date,
737
+ datetime,
738
+ BuildConcept,
739
+ BuildFunction,
740
+ "BuildConditional",
741
+ DataType,
742
+ "BuildComparison",
743
+ "BuildParenthetical",
744
+ MagicConstants,
745
+ BuildWindowItem,
746
+ BuildAggregateWrapper,
747
+ TupleWrapper,
748
+ ]
749
+ operator: ComparisonOperator
750
+
751
+ def __eq__(self, other):
752
+ if not isinstance(other, BuildSubselectComparison):
753
+ return False
754
+
755
+ comp = (
756
+ self.left == other.left
757
+ and self.right == other.right
758
+ and self.operator == other.operator
759
+ )
760
+ return comp
761
+
762
+ @property
763
+ def row_arguments(self) -> List[BuildConcept]:
764
+ return get_concept_row_arguments(self.left)
765
+
766
+ @property
767
+ def existence_arguments(self) -> list[tuple["BuildConcept", ...]]:
768
+ return [tuple(get_concept_arguments(self.right))]
769
+
770
+
771
+ class BuildConcept(Addressable, BuildConceptArgs, DataTyped, BaseModel):
772
+ model_config = ConfigDict(extra="forbid")
773
+ name: str
774
+ datatype: DataType | ListType | StructType | MapType | NumericType
775
+ purpose: Purpose
776
+ build_is_aggregate: bool
777
+ derivation: Derivation = Derivation.ROOT
778
+ granularity: Granularity = Granularity.MULTI_ROW
779
+ namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
780
+ metadata: Metadata = Field(
781
+ default_factory=lambda: Metadata(description=None, line_number=None),
782
+ validate_default=True,
783
+ )
784
+ lineage: Optional[
785
+ Union[
786
+ BuildFunction,
787
+ BuildWindowItem,
788
+ BuildFilterItem,
789
+ BuildAggregateWrapper,
790
+ BuildRowsetItem,
791
+ BuildMultiSelectLineage,
792
+ ]
793
+ ] = None
794
+
795
+ keys: Optional[set[str]] = None
796
+ grain: BuildGrain = Field(default=None, validate_default=True) # type: ignore
797
+ modifiers: List[Modifier] = Field(default_factory=list) # type: ignore
798
+ pseudonyms: set[str] = Field(default_factory=set)
799
+
800
+ def with_select_context(self, *args, **kwargs):
801
+ return self
802
+
803
+ @property
804
+ def is_aggregate(self) -> bool:
805
+ return self.build_is_aggregate
806
+
807
+ def duplicate(self) -> BuildConcept:
808
+ return self.model_copy(deep=True)
809
+
810
+ def __hash__(self):
811
+ return hash(
812
+ f"{self.name}+{self.datatype}+ {self.purpose} + {str(self.lineage)} + {self.namespace} + {str(self.grain)} + {str(self.keys)}"
813
+ )
814
+
815
+ def __repr__(self):
816
+ base = f"{self.address}@{self.grain}"
817
+ return base
818
+
819
+ @property
820
+ def output_datatype(self):
821
+ return self.datatype
822
+
823
+ def __eq__(self, other: object):
824
+ if isinstance(other, str):
825
+ if self.address == other:
826
+ return True
827
+ if not isinstance(other, (BuildConcept, Concept)):
828
+ return False
829
+ return (
830
+ self.name == other.name
831
+ and self.datatype == other.datatype
832
+ and self.purpose == other.purpose
833
+ and self.namespace == other.namespace
834
+ and self.grain == other.grain
835
+ # and self.keys == other.keys
836
+ )
837
+
838
+ def __str__(self):
839
+ grain = str(self.grain) if self.grain else "Grain<>"
840
+ return f"{self.namespace}.{self.name}@{grain}"
841
+
842
+ @cached_property
843
+ def address(self) -> str:
844
+ return f"{self.namespace}.{self.name}"
845
+
846
+ @property
847
+ def output(self) -> "BuildConcept":
848
+ return self
849
+
850
+ @property
851
+ def safe_address(self) -> str:
852
+ if self.namespace == DEFAULT_NAMESPACE:
853
+ return self.name.replace(".", "_")
854
+ elif self.namespace:
855
+ return f"{self.namespace.replace('.','_')}_{self.name.replace('.','_')}"
856
+ return self.name.replace(".", "_")
857
+
858
+ def with_grain(self, grain: Optional["BuildGrain" | BuildConcept] = None) -> Self:
859
+ if isinstance(grain, BuildConcept):
860
+ grain = BuildGrain(
861
+ components=set(
862
+ [
863
+ grain.address,
864
+ ]
865
+ )
866
+ )
867
+ return self.__class__.model_construct(
868
+ name=self.name,
869
+ datatype=self.datatype,
870
+ purpose=self.purpose,
871
+ metadata=self.metadata,
872
+ lineage=self.lineage,
873
+ grain=grain if grain else BuildGrain(components=set()),
874
+ namespace=self.namespace,
875
+ keys=self.keys,
876
+ modifiers=self.modifiers,
877
+ pseudonyms=self.pseudonyms,
878
+ ## bound
879
+ derivation=self.derivation,
880
+ granularity=self.granularity,
881
+ build_is_aggregate=self.build_is_aggregate,
882
+ )
883
+
884
+ @property
885
+ def _with_default_grain(self) -> Self:
886
+ if self.purpose == Purpose.KEY:
887
+ # we need to make this abstract
888
+ grain = BuildGrain(components={self.address})
889
+ elif self.purpose == Purpose.PROPERTY:
890
+ components = []
891
+ if self.keys:
892
+ components = [*self.keys]
893
+ if self.lineage:
894
+ for item in self.lineage.concept_arguments:
895
+ components += [x.address for x in item.sources]
896
+ # TODO: set synonyms
897
+ grain = BuildGrain(
898
+ components=set([x for x in components]),
899
+ ) # synonym_set=generate_BuildConcept_synonyms(components))
900
+ elif self.purpose == Purpose.METRIC:
901
+ grain = BuildGrain()
902
+ elif self.purpose == Purpose.CONSTANT:
903
+ if self.derivation != Derivation.CONSTANT:
904
+ grain = BuildGrain(components={self.address})
905
+ else:
906
+ grain = self.grain
907
+ else:
908
+ grain = self.grain # type: ignore
909
+ return self.__class__.model_construct(
910
+ name=self.name,
911
+ datatype=self.datatype,
912
+ purpose=self.purpose,
913
+ metadata=self.metadata,
914
+ lineage=self.lineage,
915
+ grain=grain,
916
+ keys=self.keys,
917
+ namespace=self.namespace,
918
+ modifiers=self.modifiers,
919
+ pseudonyms=self.pseudonyms,
920
+ ## bound
921
+ derivation=self.derivation,
922
+ granularity=self.granularity,
923
+ build_is_aggregate=self.build_is_aggregate,
924
+ )
925
+
926
+ def with_default_grain(self) -> "BuildConcept":
927
+ return self._with_default_grain
928
+
929
+ @property
930
+ def sources(self) -> List["BuildConcept"]:
931
+ if self.lineage:
932
+ output: List[BuildConcept] = []
933
+
934
+ def get_sources(
935
+ expr: Union[
936
+ BuildFunction,
937
+ BuildWindowItem,
938
+ BuildFilterItem,
939
+ BuildAggregateWrapper,
940
+ BuildRowsetItem,
941
+ BuildMultiSelectLineage,
942
+ ],
943
+ output: List[BuildConcept],
944
+ ):
945
+ if isinstance(expr, BuildMultiSelectLineage):
946
+ return
947
+ for item in expr.concept_arguments:
948
+ if isinstance(item, BuildConcept):
949
+ if item.address == self.address:
950
+ raise SyntaxError(
951
+ f"BuildConcept {self.address} references itself"
952
+ )
953
+ output.append(item)
954
+ output += item.sources
955
+
956
+ get_sources(self.lineage, output)
957
+ return output
958
+ return []
959
+
960
+ @property
961
+ def concept_arguments(self) -> List[BuildConcept]:
962
+ return self.lineage.concept_arguments if self.lineage else []
963
+
964
+
965
+ class BuildOrderItem(DataTyped, BuildConceptArgs, BaseModel):
966
+ expr: BuildExpr
967
+ order: Ordering
968
+
969
+ @property
970
+ def concept_arguments(self) -> List[BuildConcept]:
971
+ base: List[BuildConcept] = []
972
+ x = self.expr
973
+ if isinstance(x, BuildConcept):
974
+ base += [x]
975
+ elif isinstance(x, BuildConceptArgs):
976
+ base += x.concept_arguments
977
+ return base
978
+
979
+ @property
980
+ def row_arguments(self) -> Sequence[BuildConcept]:
981
+ if isinstance(self.expr, BuildConceptArgs):
982
+ return self.expr.row_arguments
983
+ return self.concept_arguments
984
+
985
+ @property
986
+ def existence_arguments(self) -> Sequence[tuple["BuildConcept", ...]]:
987
+ if isinstance(self.expr, BuildConceptArgs):
988
+ return self.expr.existence_arguments
989
+ return []
990
+
991
+ @property
992
+ def output_datatype(self):
993
+ return arg_to_datatype(self.expr)
994
+
995
+
996
+ class BuildWindowItem(DataTyped, BuildConceptArgs, BaseModel):
997
+ type: WindowType
998
+ content: BuildConcept
999
+ order_by: List[BuildOrderItem]
1000
+ over: List["BuildConcept"] = Field(default_factory=list)
1001
+ index: Optional[int] = None
1002
+
1003
+ def __repr__(self) -> str:
1004
+ return f"{self.type}({self.content} {self.index}, {self.over}, {self.order_by})"
1005
+
1006
+ def __str__(self):
1007
+ return self.__repr__()
1008
+
1009
+ @property
1010
+ def concept_arguments(self) -> List[BuildConcept]:
1011
+ output = [self.content]
1012
+ for order in self.order_by:
1013
+ output += order.concept_arguments
1014
+ for item in self.over:
1015
+ output += [item]
1016
+ return output
1017
+
1018
+ @property
1019
+ def output(self) -> BuildConcept:
1020
+ return self.content
1021
+
1022
+ @property
1023
+ def output_datatype(self):
1024
+ if self.type in (WindowType.RANK, WindowType.ROW_NUMBER):
1025
+ return DataType.INTEGER
1026
+ return self.content.output_datatype
1027
+
1028
+ @property
1029
+ def output_purpose(self):
1030
+ return Purpose.PROPERTY
1031
+
1032
+
1033
+ class BuildCaseWhen(BuildConceptArgs, BaseModel):
1034
+ comparison: BuildConditional | BuildSubselectComparison | BuildComparison
1035
+ expr: "BuildExpr"
1036
+
1037
+ def __str__(self):
1038
+ return self.__repr__()
1039
+
1040
+ def __repr__(self):
1041
+ return f"WHEN {str(self.comparison)} THEN {str(self.expr)}"
1042
+
1043
+ @property
1044
+ def concept_arguments(self):
1045
+ return get_concept_arguments(self.comparison) + get_concept_arguments(self.expr)
1046
+
1047
+ @property
1048
+ def concept_row_arguments(self):
1049
+ return get_concept_row_arguments(self.comparison) + get_concept_row_arguments(
1050
+ self.expr
1051
+ )
1052
+
1053
+
1054
+ class BuildCaseElse(BuildConceptArgs, BaseModel):
1055
+ expr: "BuildExpr"
1056
+ # this ensures that it's easily differentiable from CaseWhen
1057
+ discriminant: ComparisonOperator = ComparisonOperator.ELSE
1058
+
1059
+ @property
1060
+ def concept_arguments(self):
1061
+ return get_concept_arguments(self.expr)
1062
+
1063
+
1064
+ class BuildFunction(DataTyped, BuildConceptArgs, BaseModel):
1065
+ # class BuildFunction(Function):
1066
+ operator: FunctionType
1067
+ arg_count: int = Field(default=1)
1068
+ output_datatype: DataType | ListType | StructType | MapType | NumericType
1069
+ output_purpose: Purpose
1070
+ valid_inputs: Optional[
1071
+ Union[
1072
+ Set[DataType],
1073
+ List[Set[DataType]],
1074
+ ]
1075
+ ] = None
1076
+ arguments: Sequence[
1077
+ Union[
1078
+ int,
1079
+ float,
1080
+ str,
1081
+ date,
1082
+ datetime,
1083
+ MapWrapper[Any, Any],
1084
+ DataType,
1085
+ ListType,
1086
+ MapType,
1087
+ NumericType,
1088
+ DatePart,
1089
+ BuildConcept,
1090
+ BuildAggregateWrapper,
1091
+ BuildFunction,
1092
+ BuildWindowItem,
1093
+ BuildParenthetical,
1094
+ BuildCaseWhen,
1095
+ BuildCaseElse,
1096
+ list,
1097
+ ListWrapper[Any],
1098
+ ]
1099
+ ]
1100
+
1101
+ def __repr__(self):
1102
+ return f'{self.operator.value}({",".join([str(a) for a in self.arguments])})'
1103
+
1104
+ def __str__(self):
1105
+ return self.__repr__()
1106
+
1107
+ @property
1108
+ def datatype(self):
1109
+ return self.output_datatype
1110
+
1111
+ @property
1112
+ def concept_arguments(self) -> List[BuildConcept]:
1113
+ base = []
1114
+ for arg in self.arguments:
1115
+ base += get_concept_arguments(arg)
1116
+ return base
1117
+
1118
+ @property
1119
+ def output_grain(self):
1120
+ # aggregates have an abstract grain
1121
+ base_grain = BuildGrain(components=[])
1122
+ if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
1123
+ return base_grain
1124
+ # scalars have implicit grain of all arguments
1125
+ for input in self.concept_arguments:
1126
+ base_grain += input.grain
1127
+ return base_grain
1128
+
1129
+
1130
+ class BuildAggregateWrapper(BuildConceptArgs, DataTyped, BaseModel):
1131
+ function: BuildFunction
1132
+ by: List[BuildConcept] = Field(default_factory=list)
1133
+
1134
+ def __str__(self):
1135
+ grain_str = [str(c) for c in self.by] if self.by else "abstract"
1136
+ return f"{str(self.function)}<{grain_str}>"
1137
+
1138
+ @property
1139
+ def datatype(self):
1140
+ return self.function.datatype
1141
+
1142
+ @property
1143
+ def concept_arguments(self) -> List[BuildConcept]:
1144
+ return self.function.concept_arguments + self.by
1145
+
1146
+ @property
1147
+ def output_datatype(self):
1148
+ return self.function.output_datatype
1149
+
1150
+ @property
1151
+ def output_purpose(self):
1152
+ return self.function.output_purpose
1153
+
1154
+
1155
+ class BuildFilterItem(BuildConceptArgs, BaseModel):
1156
+ content: BuildConcept
1157
+ where: BuildWhereClause
1158
+
1159
+ def __str__(self):
1160
+ return f"<Filter: {str(self.content)} where {str(self.where)}>"
1161
+
1162
+ @property
1163
+ def output_datatype(self):
1164
+ return self.content.datatype
1165
+
1166
+ @property
1167
+ def output_purpose(self):
1168
+ return self.content.purpose
1169
+
1170
+ @property
1171
+ def concept_arguments(self):
1172
+ return [self.content] + self.where.concept_arguments
1173
+
1174
+
1175
+ class BuildRowsetLineage(BuildConceptArgs, BaseModel):
1176
+ name: str
1177
+ derived_concepts: List[str]
1178
+ select: SelectLineage | MultiSelectLineage
1179
+
1180
+
1181
+ class BuildRowsetItem(DataTyped, BuildConceptArgs, BaseModel):
1182
+ content: BuildConcept
1183
+ rowset: BuildRowsetLineage
1184
+
1185
+ def __repr__(self):
1186
+ return f"<Rowset<{self.rowset.name}>: {str(self.content)}>"
1187
+
1188
+ def __str__(self):
1189
+ return self.__repr__()
1190
+
1191
+ @property
1192
+ def output(self) -> BuildConcept:
1193
+ return self.content
1194
+
1195
+ @property
1196
+ def output_datatype(self):
1197
+ return self.content.datatype
1198
+
1199
+ @property
1200
+ def output_purpose(self):
1201
+ return self.content.purpose
1202
+
1203
+ @property
1204
+ def concept_arguments(self):
1205
+ return [self.content]
1206
+
1207
+
1208
+ class BuildOrderBy(BaseModel):
1209
+ items: List[BuildOrderItem]
1210
+
1211
+ @property
1212
+ def concept_arguments(self):
1213
+ return [x.expr for x in self.items]
1214
+
1215
+
1216
+ class BuildAlignClause(BaseModel):
1217
+ items: List[BuildAlignItem]
1218
+
1219
+
1220
+ class BuildSelectLineage(BaseModel):
1221
+ selection: List[BuildConcept]
1222
+ hidden_components: set[str]
1223
+ local_concepts: dict[str, BuildConcept]
1224
+ order_by: Optional[BuildOrderBy] = None
1225
+ limit: Optional[int] = None
1226
+ meta: Metadata = Field(default_factory=lambda: Metadata())
1227
+ grain: BuildGrain = Field(default_factory=BuildGrain)
1228
+ where_clause: BuildWhereClause | None = Field(default=None)
1229
+ having_clause: BuildHavingClause | None = Field(default=None)
1230
+
1231
+ @property
1232
+ def output_components(self) -> List[BuildConcept]:
1233
+ return self.selection
1234
+
1235
+
1236
+ class BuildMultiSelectLineage(BuildConceptArgs, BaseModel):
1237
+ selects: List[SelectLineage]
1238
+ grain: BuildGrain
1239
+ align: BuildAlignClause
1240
+ namespace: str
1241
+ order_by: Optional[BuildOrderBy] = None
1242
+ limit: Optional[int] = None
1243
+ where_clause: Union["BuildWhereClause", None] = Field(default=None)
1244
+ having_clause: Union["BuildHavingClause", None] = Field(default=None)
1245
+ local_concepts: dict[str, BuildConcept]
1246
+ build_concept_arguments: list[BuildConcept]
1247
+ build_output_components: list[BuildConcept]
1248
+ hidden_components: set[str]
1249
+
1250
+ @property
1251
+ def derived_concepts(self) -> set[str]:
1252
+ output = set()
1253
+ for item in self.align.items:
1254
+ output.add(item.aligned_concept)
1255
+ return output
1256
+
1257
+ @property
1258
+ def output_components(self) -> list[BuildConcept]:
1259
+ return self.build_output_components
1260
+
1261
+ @property
1262
+ def derived_concept(self) -> set[str]:
1263
+ output = set()
1264
+ for item in self.align.items:
1265
+ output.add(item.aligned_concept)
1266
+ return output
1267
+
1268
+ @property
1269
+ def concept_arguments(self) -> list[BuildConcept]:
1270
+ return self.build_concept_arguments
1271
+
1272
+ # these are needed to help disambiguate between parents
1273
+ def get_merge_concept(self, check: BuildConcept) -> str | None:
1274
+ for item in self.align.items:
1275
+ if check in item.concepts_lcl:
1276
+ return f"{item.namespace}.{item.alias}"
1277
+ return None
1278
+
1279
+ def find_source(self, concept: BuildConcept, cte: CTE | UnionCTE) -> BuildConcept:
1280
+ for x in self.align.items:
1281
+ if concept.name == x.alias:
1282
+ for c in x.concepts:
1283
+ if c.address in cte.output_lcl:
1284
+ return c
1285
+ raise SyntaxError(
1286
+ f"Could not find upstream map for multiselect {str(concept)} on cte ({cte})"
1287
+ )
1288
+
1289
+
1290
+ class BuildAlignItem(BaseModel):
1291
+ alias: str
1292
+ concepts: List[BuildConcept]
1293
+ namespace: str = Field(default=DEFAULT_NAMESPACE, validate_default=True)
1294
+
1295
+ @computed_field # type: ignore
1296
+ @cached_property
1297
+ def concepts_lcl(self) -> LooseBuildConceptList:
1298
+ return LooseBuildConceptList(concepts=self.concepts)
1299
+
1300
+ @property
1301
+ def aligned_concept(self) -> str:
1302
+ return f"{self.namespace}.{self.alias}"
1303
+
1304
+
1305
+ class BuildColumnAssignment(BaseModel):
1306
+ alias: str | RawColumnExpr | BuildFunction
1307
+ concept: BuildConcept
1308
+ modifiers: List[Modifier] = Field(default_factory=list)
1309
+
1310
+ @property
1311
+ def is_complete(self) -> bool:
1312
+ return Modifier.PARTIAL not in self.modifiers
1313
+
1314
+ @property
1315
+ def is_nullable(self) -> bool:
1316
+ return Modifier.NULLABLE in self.modifiers
1317
+
1318
+
1319
+ class BuildDatasource(BaseModel):
1320
+ name: str
1321
+ columns: List[BuildColumnAssignment]
1322
+ address: Union[Address, str]
1323
+ grain: BuildGrain = Field(
1324
+ default_factory=lambda: BuildGrain(components=set()), validate_default=True
1325
+ )
1326
+ namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
1327
+ metadata: DatasourceMetadata = Field(
1328
+ default_factory=lambda: DatasourceMetadata(freshness_concept=None)
1329
+ )
1330
+ where: Optional[BuildWhereClause] = None
1331
+ non_partial_for: Optional[BuildWhereClause] = None
1332
+
1333
+ def __hash__(self):
1334
+ return self.identifier.__hash__()
1335
+
1336
+ def __add__(self, other):
1337
+ if not other == self:
1338
+ raise ValueError(
1339
+ "Attempted to add two datasources that are not identical, this is not a valid operation"
1340
+ )
1341
+ return self
1342
+
1343
+ @property
1344
+ def condition(self):
1345
+ return None
1346
+
1347
+ @property
1348
+ def can_be_inlined(self) -> bool:
1349
+ if isinstance(self.address, Address) and self.address.is_query:
1350
+ return False
1351
+ return True
1352
+
1353
+ @property
1354
+ def identifier(self) -> str:
1355
+ if not self.namespace or self.namespace == DEFAULT_NAMESPACE:
1356
+ return self.name
1357
+ return f"{self.namespace}.{self.name}"
1358
+
1359
+ @property
1360
+ def safe_identifier(self) -> str:
1361
+ return self.identifier.replace(".", "_")
1362
+
1363
+ @property
1364
+ def concepts(self) -> List[BuildConcept]:
1365
+ return [c.concept for c in self.columns]
1366
+
1367
+ @property
1368
+ def group_required(self):
1369
+ return False
1370
+
1371
+ @property
1372
+ def output_concepts(self) -> List[BuildConcept]:
1373
+ return self.concepts
1374
+
1375
+ @property
1376
+ def full_concepts(self) -> List[BuildConcept]:
1377
+ return [c.concept for c in self.columns if Modifier.PARTIAL not in c.modifiers]
1378
+
1379
+ @property
1380
+ def nullable_concepts(self) -> List[BuildConcept]:
1381
+ return [c.concept for c in self.columns if Modifier.NULLABLE in c.modifiers]
1382
+
1383
+ @property
1384
+ def hidden_concepts(self) -> List[BuildConcept]:
1385
+ return [c.concept for c in self.columns if Modifier.HIDDEN in c.modifiers]
1386
+
1387
+ @property
1388
+ def partial_concepts(self) -> List[BuildConcept]:
1389
+ return [c.concept for c in self.columns if Modifier.PARTIAL in c.modifiers]
1390
+
1391
+ def get_alias(
1392
+ self,
1393
+ concept: BuildConcept,
1394
+ use_raw_name: bool = True,
1395
+ force_alias: bool = False,
1396
+ ) -> Optional[str | RawColumnExpr] | BuildFunction:
1397
+ # 2022-01-22
1398
+ # this logic needs to be refined.
1399
+ # if concept.lineage:
1400
+ # # return None
1401
+ for x in self.columns:
1402
+ if x.concept == concept or x.concept.with_grain(concept.grain) == concept:
1403
+ if use_raw_name:
1404
+ return x.alias
1405
+ return concept.safe_address
1406
+ existing = [str(c.concept) for c in self.columns]
1407
+ raise ValueError(
1408
+ f"{LOGGER_PREFIX} Concept {concept} not found on {self.identifier}; have"
1409
+ f" {existing}."
1410
+ )
1411
+
1412
+ @property
1413
+ def safe_location(self) -> str:
1414
+ if isinstance(self.address, Address):
1415
+ return self.address.location
1416
+ return self.address
1417
+
1418
+ @property
1419
+ def output_lcl(self) -> LooseBuildConceptList:
1420
+ return LooseBuildConceptList(concepts=self.output_concepts)
1421
+
1422
+
1423
+ BuildExpr = (
1424
+ BuildWindowItem
1425
+ | BuildFilterItem
1426
+ | BuildConcept
1427
+ | BuildComparison
1428
+ | BuildConditional
1429
+ | BuildParenthetical
1430
+ | BuildFunction
1431
+ | BuildAggregateWrapper
1432
+ | bool
1433
+ | MagicConstants
1434
+ | int
1435
+ | str
1436
+ | float
1437
+ | list
1438
+ )
1439
+
1440
+ BuildConcept.model_rebuild()
1441
+
1442
+
1443
+ class Factory:
1444
+
1445
+ def __init__(
1446
+ self,
1447
+ environment: Environment,
1448
+ local_concepts: dict[str, BuildConcept] | None = None,
1449
+ grain: Grain | None = None,
1450
+ ):
1451
+ self.grain = grain or Grain()
1452
+ self.environment = environment
1453
+ self.local_concepts: dict[str, BuildConcept] = (
1454
+ {} if local_concepts is None else local_concepts
1455
+ )
1456
+
1457
+ @singledispatchmethod
1458
+ def build(self, base):
1459
+ raise NotImplementedError("Cannot build {}".format(type(base)))
1460
+
1461
+ @build.register
1462
+ @build.register
1463
+ def _(
1464
+ self,
1465
+ base: (
1466
+ int
1467
+ | str
1468
+ | float
1469
+ | list
1470
+ | date
1471
+ | TupleWrapper
1472
+ | ListWrapper
1473
+ | MagicConstants
1474
+ | MapWrapper
1475
+ | DataType
1476
+ | DatePart
1477
+ | NumericType
1478
+ ),
1479
+ ) -> (
1480
+ int
1481
+ | str
1482
+ | float
1483
+ | list
1484
+ | date
1485
+ | TupleWrapper
1486
+ | ListWrapper
1487
+ | MagicConstants
1488
+ | MapWrapper
1489
+ | DataType
1490
+ | DatePart
1491
+ | NumericType
1492
+ ):
1493
+ return base
1494
+
1495
+ @build.register
1496
+ def _(self, base: None) -> None:
1497
+ return base
1498
+
1499
+ @build.register
1500
+ def _(self, base: Function) -> BuildFunction:
1501
+
1502
+ new = BuildFunction.model_construct(
1503
+ operator=base.operator,
1504
+ arguments=[self.build(c) for c in base.arguments],
1505
+ output_datatype=base.output_datatype,
1506
+ output_purpose=base.output_purpose,
1507
+ valid_inputs=base.valid_inputs,
1508
+ arg_count=base.arg_count,
1509
+ )
1510
+ return new
1511
+
1512
+ @build.register
1513
+ def _(self, base: ConceptRef) -> BuildConcept:
1514
+ if base.address in self.local_concepts:
1515
+ full = self.local_concepts[base.address]
1516
+ if isinstance(full, BuildConcept):
1517
+ return full
1518
+ raw = self.environment.concepts[base.address]
1519
+ return self.build(raw)
1520
+
1521
+ @build.register
1522
+ def _(self, base: CaseWhen) -> BuildCaseWhen:
1523
+ return BuildCaseWhen.model_construct(
1524
+ comparison=self.build(base.comparison),
1525
+ expr=(self.build(base.expr)),
1526
+ )
1527
+
1528
+ @build.register
1529
+ def _(self, base: CaseElse) -> BuildCaseElse:
1530
+ return BuildCaseElse.model_construct(expr=self.build(base.expr))
1531
+
1532
+ @build.register
1533
+ def _(self, base: Concept) -> BuildConcept:
1534
+ if base.address in self.local_concepts:
1535
+ return self.local_concepts[base.address]
1536
+ new_lineage, final_grain, _ = base.get_select_grain_and_keys(
1537
+ self.grain, self.environment
1538
+ )
1539
+ build_lineage = self.build(new_lineage)
1540
+ derivation = Concept.calculate_derivation(build_lineage, base.purpose)
1541
+ granularity = Concept.calculate_granularity(
1542
+ derivation, final_grain, build_lineage
1543
+ )
1544
+ is_aggregate = Concept.calculate_is_aggregate(build_lineage)
1545
+ rval = BuildConcept.model_construct(
1546
+ name=base.name,
1547
+ datatype=base.datatype,
1548
+ purpose=base.purpose,
1549
+ metadata=base.metadata,
1550
+ lineage=build_lineage,
1551
+ grain=self.build(final_grain),
1552
+ namespace=base.namespace,
1553
+ keys=base.keys,
1554
+ modifiers=base.modifiers,
1555
+ pseudonyms=base.pseudonyms,
1556
+ ## instantiated values
1557
+ derivation=derivation,
1558
+ granularity=granularity,
1559
+ build_is_aggregate=is_aggregate,
1560
+ )
1561
+ return rval
1562
+
1563
+ @build.register
1564
+ def _(self, base: AggregateWrapper) -> BuildAggregateWrapper:
1565
+
1566
+ if not base.by:
1567
+ by = [
1568
+ self.build(self.environment.concepts[c]) for c in self.grain.components
1569
+ ]
1570
+ else:
1571
+ by = [self.build(x) for x in base.by]
1572
+ parent = self.build(base.function)
1573
+ return BuildAggregateWrapper.model_construct(function=parent, by=by)
1574
+
1575
+ @build.register
1576
+ def _(self, base: ColumnAssignment) -> BuildColumnAssignment:
1577
+
1578
+ return BuildColumnAssignment.model_construct(
1579
+ alias=(
1580
+ self.build(base.alias)
1581
+ if isinstance(base.alias, Function)
1582
+ else base.alias
1583
+ ),
1584
+ concept=self.build(
1585
+ self.environment.concepts[base.concept.address].with_grain(self.grain)
1586
+ ),
1587
+ modifiers=base.modifiers,
1588
+ )
1589
+
1590
+ @build.register
1591
+ def _(self, base: OrderBy) -> BuildOrderBy:
1592
+ return BuildOrderBy.model_construct(items=[self.build(x) for x in base.items])
1593
+
1594
+ @build.register
1595
+ def _(self, base: OrderItem) -> BuildOrderItem:
1596
+ return BuildOrderItem.model_construct(
1597
+ expr=(self.build(base.expr)),
1598
+ order=base.order,
1599
+ )
1600
+
1601
+ @build.register
1602
+ def _(self, base: WhereClause) -> BuildWhereClause:
1603
+ return BuildWhereClause.model_construct(
1604
+ conditional=self.build(base.conditional)
1605
+ )
1606
+
1607
+ @build.register
1608
+ def _(self, base: HavingClause) -> BuildHavingClause:
1609
+ return BuildHavingClause.model_construct(
1610
+ conditional=self.build(base.conditional)
1611
+ )
1612
+
1613
+ @build.register
1614
+ def _(self, base: WindowItem) -> BuildWindowItem:
1615
+
1616
+ return BuildWindowItem.model_construct(
1617
+ type=base.type,
1618
+ content=self.build(base.content),
1619
+ order_by=[self.build(x) for x in base.order_by],
1620
+ over=[self.build(x) for x in base.over],
1621
+ index=base.index,
1622
+ )
1623
+
1624
+ @build.register
1625
+ def _(self, base: Conditional) -> BuildConditional:
1626
+ return BuildConditional.model_construct(
1627
+ left=(self.build(base.left)),
1628
+ right=(self.build(base.right)),
1629
+ operator=base.operator,
1630
+ )
1631
+
1632
+ @build.register
1633
+ def _(self, base: SubselectComparison) -> BuildSubselectComparison:
1634
+ return BuildSubselectComparison.model_construct(
1635
+ left=(self.build(base.left)),
1636
+ right=(self.build(base.right)),
1637
+ operator=base.operator,
1638
+ )
1639
+
1640
+ @build.register
1641
+ def _(self, base: Comparison) -> BuildComparison:
1642
+ return BuildComparison.model_construct(
1643
+ left=(self.build(base.left)),
1644
+ right=(self.build(base.right)),
1645
+ operator=base.operator,
1646
+ )
1647
+
1648
+ @build.register
1649
+ def _(self, base: AlignItem) -> BuildAlignItem:
1650
+ return BuildAlignItem.model_construct(
1651
+ alias=base.alias,
1652
+ concepts=[self.build(x) for x in base.concepts],
1653
+ namespace=base.namespace,
1654
+ )
1655
+
1656
+ @build.register
1657
+ def _(self, base: AlignClause) -> BuildAlignClause:
1658
+ return BuildAlignClause.model_construct(
1659
+ items=[self.build(x) for x in base.items]
1660
+ )
1661
+
1662
+ @build.register
1663
+ def _(self, base: RowsetItem):
1664
+
1665
+ factory = Factory(
1666
+ environment=self.environment,
1667
+ local_concepts={},
1668
+ grain=base.rowset.select.grain,
1669
+ )
1670
+ return BuildRowsetItem(
1671
+ content=factory.build(base.content), rowset=factory.build(base.rowset)
1672
+ )
1673
+
1674
+ @build.register
1675
+ def _(self, base: RowsetLineage) -> BuildRowsetLineage:
1676
+ out = BuildRowsetLineage(
1677
+ name=base.name,
1678
+ derived_concepts=[x.address for x in base.derived_concepts],
1679
+ select=base.select,
1680
+ )
1681
+ return out
1682
+
1683
+ @build.register
1684
+ def _(self, base: Grain) -> BuildGrain:
1685
+ if base.where_clause:
1686
+ factory = Factory(environment=self.environment)
1687
+ where = factory.build(base.where_clause)
1688
+ else:
1689
+ where = None
1690
+ return BuildGrain.model_construct(
1691
+ components=base.components, where_clause=where
1692
+ )
1693
+
1694
+ @build.register
1695
+ def _(self, base: FilterItem) -> BuildFilterItem:
1696
+ return BuildFilterItem.model_construct(
1697
+ content=self.build(base.content), where=self.build(base.where)
1698
+ )
1699
+
1700
+ @build.register
1701
+ def _(self, base: Parenthetical) -> BuildParenthetical:
1702
+ return BuildParenthetical.model_construct(content=(self.build(base.content)))
1703
+
1704
+ @build.register
1705
+ def _(self, base: SelectLineage) -> BuildSelectLineage:
1706
+
1707
+ from trilogy.core.models.build import (
1708
+ BuildSelectLineage,
1709
+ Factory,
1710
+ )
1711
+
1712
+ materialized: dict[str, BuildConcept] = {}
1713
+ factory = Factory(
1714
+ grain=base.grain, environment=self.environment, local_concepts=materialized
1715
+ )
1716
+ for k, v in base.local_concepts.items():
1717
+ materialized[k] = factory.build(v)
1718
+ where_factory = Factory(
1719
+ grain=Grain(), environment=self.environment, local_concepts={}
1720
+ )
1721
+
1722
+ final: List[BuildConcept] = []
1723
+ for original in base.selection:
1724
+ new = original
1725
+ # we don't know the grain of an aggregate at assignment time
1726
+ # so rebuild at this point in the tree
1727
+ # TODO: simplify
1728
+ if new.address in materialized:
1729
+ built = materialized[new.address]
1730
+ else:
1731
+ # Sometimes cached values here don't have the latest info
1732
+ # but we can't just use environment, as it might not have the right grain.
1733
+ built = factory.build(new)
1734
+ materialized[new.address] = built
1735
+ final.append(built)
1736
+ return BuildSelectLineage(
1737
+ selection=final,
1738
+ hidden_components=base.hidden_components,
1739
+ order_by=(factory.build(base.order_by) if base.order_by else None),
1740
+ limit=base.limit,
1741
+ meta=base.meta,
1742
+ local_concepts=materialized,
1743
+ grain=self.build(base.grain),
1744
+ having_clause=(
1745
+ factory.build(base.having_clause) if base.having_clause else None
1746
+ ),
1747
+ # this uses a different grain factory
1748
+ where_clause=(
1749
+ where_factory.build(base.where_clause) if base.where_clause else None
1750
+ ),
1751
+ )
1752
+
1753
+ @build.register
1754
+ def _(self, base: MultiSelectLineage) -> BuildMultiSelectLineage:
1755
+
1756
+ local_build_cache: dict[str, BuildConcept] = {}
1757
+
1758
+ parents: list[BuildSelectLineage] = [self.build(x) for x in base.selects]
1759
+ base_local = parents[0].local_concepts
1760
+
1761
+ for select in parents[1:]:
1762
+ for k, v in select.local_concepts.items():
1763
+ base_local[k] = v
1764
+
1765
+ # this requires custom handling to avoid circular dependencies
1766
+ final_grain = self.build(base.grain)
1767
+ derived_base = []
1768
+ for k in base.derived_concepts:
1769
+ base_concept = self.environment.concepts[k]
1770
+ x = BuildConcept.model_construct(
1771
+ name=base_concept.name,
1772
+ datatype=base_concept.datatype,
1773
+ purpose=base_concept.purpose,
1774
+ build_is_aggregate=False,
1775
+ derivation=Derivation.MULTISELECT,
1776
+ lineage=None,
1777
+ grain=final_grain,
1778
+ namespace=base_concept.namespace,
1779
+ )
1780
+ local_build_cache[k] = x
1781
+ derived_base.append(x)
1782
+ all_input: list[BuildConcept] = []
1783
+ for parent in parents:
1784
+ all_input += parent.output_components
1785
+ all_output: list[BuildConcept] = derived_base + all_input
1786
+ final: list[BuildConcept] = [
1787
+ x for x in all_output if x.address not in base.hidden_components
1788
+ ]
1789
+ factory = Factory(
1790
+ grain=base.grain,
1791
+ environment=self.environment,
1792
+ local_concepts=local_build_cache,
1793
+ )
1794
+ where_factory = Factory(environment=self.environment)
1795
+ lineage = BuildMultiSelectLineage.model_construct(
1796
+ # we don't build selects here; they'll be built automatically in query discovery
1797
+ selects=base.selects,
1798
+ grain=final_grain,
1799
+ align=factory.build(base.align),
1800
+ # self.align.with_select_context(
1801
+ # local_build_cache, self.grain, environment
1802
+ # ),
1803
+ namespace=base.namespace,
1804
+ hidden_components=base.hidden_components,
1805
+ order_by=factory.build(base.order_by) if base.order_by else None,
1806
+ limit=base.limit,
1807
+ where_clause=(
1808
+ where_factory.build(base.where_clause) if base.where_clause else None
1809
+ ),
1810
+ having_clause=(
1811
+ factory.build(base.having_clause) if base.having_clause else None
1812
+ ),
1813
+ local_concepts=base_local,
1814
+ build_output_components=final,
1815
+ build_concept_arguments=all_input,
1816
+ )
1817
+ for k in base.derived_concepts:
1818
+ local_build_cache[k].lineage = lineage
1819
+ return lineage
1820
+
1821
+ @build.register
1822
+ def _(self, base: Environment):
1823
+ from trilogy.core.models.build_environment import BuildEnvironment
1824
+
1825
+ new = BuildEnvironment(
1826
+ namespace=base.namespace,
1827
+ cte_name_map=base.cte_name_map,
1828
+ )
1829
+
1830
+ for k, v in base.concepts.items():
1831
+ new.concepts[k] = self.build(v)
1832
+ for (
1833
+ k,
1834
+ d,
1835
+ ) in base.datasources.items():
1836
+ new.datasources[k] = self.build(d)
1837
+ for k, a in base.alias_origin_lookup.items():
1838
+ new.alias_origin_lookup[k] = self.build(a)
1839
+ new.gen_concept_list_caches()
1840
+ return new
1841
+
1842
+ @build.register
1843
+ def _(self, base: Datasource):
1844
+ local_cache: dict[str, BuildConcept] = {}
1845
+ factory = Factory(
1846
+ grain=base.grain, environment=self.environment, local_concepts=local_cache
1847
+ )
1848
+ return BuildDatasource.model_construct(
1849
+ name=base.name,
1850
+ columns=[factory.build(c) for c in base.columns],
1851
+ address=base.address,
1852
+ grain=factory.build(base.grain),
1853
+ namespace=base.namespace,
1854
+ metadata=base.metadata,
1855
+ where=(factory.build(base.where) if base.where else None),
1856
+ non_partial_for=(
1857
+ factory.build(base.non_partial_for) if base.non_partial_for else None
1858
+ ),
1859
+ )