pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (76) hide show
  1. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
  2. pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
  3. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +2 -2
  5. trilogy/core/enums.py +1 -7
  6. trilogy/core/env_processor.py +17 -5
  7. trilogy/core/environment_helpers.py +11 -25
  8. trilogy/core/exceptions.py +4 -0
  9. trilogy/core/functions.py +695 -261
  10. trilogy/core/graph_models.py +10 -10
  11. trilogy/core/internal.py +11 -2
  12. trilogy/core/models/__init__.py +0 -0
  13. trilogy/core/models/author.py +2110 -0
  14. trilogy/core/models/build.py +1859 -0
  15. trilogy/core/models/build_environment.py +151 -0
  16. trilogy/core/models/core.py +370 -0
  17. trilogy/core/models/datasource.py +297 -0
  18. trilogy/core/models/environment.py +701 -0
  19. trilogy/core/models/execute.py +931 -0
  20. trilogy/core/optimization.py +14 -16
  21. trilogy/core/optimizations/base_optimization.py +1 -1
  22. trilogy/core/optimizations/inline_constant.py +6 -6
  23. trilogy/core/optimizations/inline_datasource.py +17 -11
  24. trilogy/core/optimizations/predicate_pushdown.py +17 -16
  25. trilogy/core/processing/concept_strategies_v3.py +178 -145
  26. trilogy/core/processing/graph_utils.py +1 -1
  27. trilogy/core/processing/node_generators/basic_node.py +19 -18
  28. trilogy/core/processing/node_generators/common.py +50 -44
  29. trilogy/core/processing/node_generators/filter_node.py +26 -13
  30. trilogy/core/processing/node_generators/group_node.py +26 -21
  31. trilogy/core/processing/node_generators/group_to_node.py +11 -8
  32. trilogy/core/processing/node_generators/multiselect_node.py +60 -43
  33. trilogy/core/processing/node_generators/node_merge_node.py +76 -38
  34. trilogy/core/processing/node_generators/rowset_node.py +55 -36
  35. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
  36. trilogy/core/processing/node_generators/select_merge_node.py +161 -64
  37. trilogy/core/processing/node_generators/select_node.py +13 -13
  38. trilogy/core/processing/node_generators/union_node.py +12 -11
  39. trilogy/core/processing/node_generators/unnest_node.py +9 -7
  40. trilogy/core/processing/node_generators/window_node.py +18 -16
  41. trilogy/core/processing/nodes/__init__.py +21 -18
  42. trilogy/core/processing/nodes/base_node.py +82 -66
  43. trilogy/core/processing/nodes/filter_node.py +19 -13
  44. trilogy/core/processing/nodes/group_node.py +50 -35
  45. trilogy/core/processing/nodes/merge_node.py +45 -36
  46. trilogy/core/processing/nodes/select_node_v2.py +53 -39
  47. trilogy/core/processing/nodes/union_node.py +5 -7
  48. trilogy/core/processing/nodes/unnest_node.py +7 -11
  49. trilogy/core/processing/nodes/window_node.py +9 -4
  50. trilogy/core/processing/utility.py +103 -75
  51. trilogy/core/query_processor.py +70 -47
  52. trilogy/core/statements/__init__.py +0 -0
  53. trilogy/core/statements/author.py +413 -0
  54. trilogy/core/statements/build.py +0 -0
  55. trilogy/core/statements/common.py +30 -0
  56. trilogy/core/statements/execute.py +42 -0
  57. trilogy/dialect/base.py +148 -106
  58. trilogy/dialect/common.py +9 -10
  59. trilogy/dialect/duckdb.py +1 -1
  60. trilogy/dialect/enums.py +4 -2
  61. trilogy/dialect/presto.py +1 -1
  62. trilogy/dialect/sql_server.py +1 -1
  63. trilogy/executor.py +44 -32
  64. trilogy/hooks/__init__.py +4 -0
  65. trilogy/hooks/base_hook.py +6 -4
  66. trilogy/hooks/query_debugger.py +113 -97
  67. trilogy/parser.py +1 -1
  68. trilogy/parsing/common.py +307 -64
  69. trilogy/parsing/parse_engine.py +277 -618
  70. trilogy/parsing/render.py +50 -26
  71. trilogy/scripts/trilogy.py +2 -1
  72. pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
  73. trilogy/core/models.py +0 -4960
  74. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
  75. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
  76. {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2110 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from abc import ABC
5
+ from datetime import date, datetime
6
+ from functools import cached_property
7
+ from typing import (
8
+ TYPE_CHECKING,
9
+ Any,
10
+ Iterable,
11
+ List,
12
+ Optional,
13
+ Self,
14
+ Sequence,
15
+ Set,
16
+ Tuple,
17
+ Type,
18
+ Union,
19
+ )
20
+
21
+ from pydantic import (
22
+ BaseModel,
23
+ ConfigDict,
24
+ Field,
25
+ ValidationInfo,
26
+ computed_field,
27
+ field_validator,
28
+ )
29
+
30
+ from trilogy.constants import DEFAULT_NAMESPACE, MagicConstants
31
+ from trilogy.core.constants import ALL_ROWS_CONCEPT
32
+ from trilogy.core.enums import (
33
+ BooleanOperator,
34
+ ComparisonOperator,
35
+ ConceptSource,
36
+ DatePart,
37
+ Derivation,
38
+ FunctionClass,
39
+ FunctionType,
40
+ Granularity,
41
+ InfiniteFunctionArgs,
42
+ Modifier,
43
+ Ordering,
44
+ Purpose,
45
+ WindowOrder,
46
+ WindowType,
47
+ )
48
+ from trilogy.core.models.core import (
49
+ Addressable,
50
+ DataType,
51
+ DataTyped,
52
+ ListType,
53
+ ListWrapper,
54
+ MapType,
55
+ MapWrapper,
56
+ NumericType,
57
+ StructType,
58
+ TupleWrapper,
59
+ arg_to_datatype,
60
+ is_compatible_datatype,
61
+ )
62
+ from trilogy.utility import unique
63
+
64
+ # TODO: refactor to avoid these
65
+ if TYPE_CHECKING:
66
+ from trilogy.core.models.environment import Environment
67
+
68
+
69
+ class Namespaced(ABC):
70
+ def with_namespace(self, namespace: str):
71
+ raise NotImplementedError
72
+
73
+
74
+ class Mergeable(ABC):
75
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
76
+ raise NotImplementedError
77
+
78
+
79
+ class ConceptArgs(ABC):
80
+ @property
81
+ def concept_arguments(self) -> Sequence["ConceptRef"]:
82
+ raise NotImplementedError
83
+
84
+ @property
85
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
86
+ return []
87
+
88
+ @property
89
+ def row_arguments(self) -> Sequence["ConceptRef"]:
90
+ return self.concept_arguments
91
+
92
+
93
+ class HasUUID(ABC):
94
+ @property
95
+ def uuid(self) -> str:
96
+ return hashlib.md5(str(self).encode()).hexdigest()
97
+
98
+
99
+ class ConceptRef(Addressable, Namespaced, DataTyped, Mergeable, BaseModel):
100
+ address: str
101
+ datatype: DataType | ListType | StructType | MapType | NumericType = (
102
+ DataType.UNKNOWN
103
+ )
104
+ metadata: Optional["Metadata"] = None
105
+
106
+ @property
107
+ def line_no(self) -> int | None:
108
+ if self.metadata:
109
+ return self.metadata.line_number
110
+ return None
111
+
112
+ def __repr__(self):
113
+ return f"ref:{self.address}"
114
+
115
+ def __str__(self):
116
+ return self.__repr__()
117
+
118
+ def __eq__(self, other):
119
+ if isinstance(other, Concept):
120
+ return self.address == other.address
121
+ elif isinstance(other, str):
122
+ return self.address == other
123
+ elif isinstance(other, ConceptRef):
124
+ return self.address == other.address
125
+ return False
126
+
127
+ @property
128
+ def namespace(self):
129
+ return self.address.rsplit(".", 1)[0]
130
+
131
+ @property
132
+ def name(self):
133
+ return self.address.rsplit(".", 1)[1]
134
+
135
+ @property
136
+ def output_datatype(self):
137
+ return self.datatype
138
+
139
+ def with_merge(
140
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
141
+ ) -> ConceptRef:
142
+ if self.address == source.address:
143
+ return ConceptRef.model_construct(
144
+ address=target.address, datatype=target.datatype, metadata=self.metadata
145
+ )
146
+ return self
147
+
148
+ def with_namespace(self, namespace: str):
149
+ return ConceptRef.model_construct(
150
+ address=address_with_namespace(self.address, namespace),
151
+ datatype=self.datatype,
152
+ metadata=self.metadata,
153
+ )
154
+
155
+
156
+ class UndefinedConcept(ConceptRef):
157
+ pass
158
+
159
+ @property
160
+ def reference(self):
161
+ return self
162
+
163
+ @property
164
+ def purpose(self):
165
+ return Purpose.UNKNOWN
166
+
167
+
168
+ def address_with_namespace(address: str, namespace: str) -> str:
169
+ ns = address.split(".", 1)[0]
170
+ if ns == namespace:
171
+ return address
172
+ if ns == DEFAULT_NAMESPACE:
173
+ return f"{namespace}.{address.split('.',1)[1]}"
174
+ return f"{namespace}.{address}"
175
+
176
+
177
+ class Parenthetical(
178
+ DataTyped,
179
+ ConceptArgs,
180
+ Mergeable,
181
+ Namespaced,
182
+ BaseModel,
183
+ ):
184
+ content: "Expr"
185
+
186
+ @field_validator("content", mode="before")
187
+ @classmethod
188
+ def content_validator(cls, v, info: ValidationInfo):
189
+ if isinstance(v, Concept):
190
+ return v.reference
191
+ return v
192
+
193
+ def __add__(self, other) -> Union["Parenthetical", "Conditional"]:
194
+ if other is None:
195
+ return self
196
+ elif isinstance(other, (Comparison, Conditional, Parenthetical)):
197
+ return Conditional(left=self, right=other, operator=BooleanOperator.AND)
198
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
199
+
200
+ def __str__(self):
201
+ return self.__repr__()
202
+
203
+ def __repr__(self):
204
+ return f"({str(self.content)})"
205
+
206
+ def with_namespace(self, namespace: str):
207
+ return Parenthetical.model_construct(
208
+ content=(
209
+ self.content.with_namespace(namespace)
210
+ if isinstance(self.content, Namespaced)
211
+ else self.content
212
+ )
213
+ )
214
+
215
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
216
+ return Parenthetical.model_construct(
217
+ content=(
218
+ self.content.with_merge(source, target, modifiers)
219
+ if isinstance(self.content, Mergeable)
220
+ else self.content
221
+ )
222
+ )
223
+
224
+ @property
225
+ def concept_arguments(self) -> Sequence[ConceptRef]:
226
+ base: List[ConceptRef] = []
227
+ x = self.content
228
+ if isinstance(x, ConceptRef):
229
+ base += [x]
230
+ elif isinstance(x, ConceptArgs):
231
+ base += x.concept_arguments
232
+ return base
233
+
234
+ @property
235
+ def row_arguments(self) -> Sequence[ConceptRef]:
236
+ if isinstance(self.content, ConceptArgs):
237
+ return self.content.row_arguments
238
+ return self.concept_arguments
239
+
240
+ @property
241
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
242
+ if isinstance(self.content, ConceptArgs):
243
+ return self.content.existence_arguments
244
+ return []
245
+
246
+ @property
247
+ def output_datatype(self):
248
+ return arg_to_datatype(self.content)
249
+
250
+
251
+ class Conditional(Mergeable, ConceptArgs, Namespaced, BaseModel):
252
+ left: Expr
253
+ right: Expr
254
+ operator: BooleanOperator
255
+
256
+ def __add__(self, other) -> "Conditional":
257
+ if other is None:
258
+ return self
259
+ elif str(other) == str(self):
260
+ return self
261
+ elif isinstance(other, (Comparison, Conditional, Parenthetical)):
262
+ return Conditional.model_construct(
263
+ left=self, right=other, operator=BooleanOperator.AND
264
+ )
265
+ raise ValueError(f"Cannot add {self.__class__} and {type(other)}")
266
+
267
+ def __str__(self):
268
+ return self.__repr__()
269
+
270
+ def __repr__(self):
271
+ return f"{str(self.left)} {self.operator.value} {str(self.right)}"
272
+
273
+ def __eq__(self, other):
274
+ if not isinstance(other, Conditional):
275
+ return False
276
+ return (
277
+ self.left == other.left
278
+ and self.right == other.right
279
+ and self.operator == other.operator
280
+ )
281
+
282
+ def with_namespace(self, namespace: str) -> "Conditional":
283
+ return Conditional.model_construct(
284
+ left=(
285
+ self.left.with_namespace(namespace)
286
+ if isinstance(self.left, Namespaced)
287
+ else self.left
288
+ ),
289
+ right=(
290
+ self.right.with_namespace(namespace)
291
+ if isinstance(self.right, Namespaced)
292
+ else self.right
293
+ ),
294
+ operator=self.operator,
295
+ )
296
+
297
+ def with_merge(
298
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
299
+ ) -> "Conditional":
300
+ return Conditional.model_construct(
301
+ left=(
302
+ self.left.with_merge(source, target, modifiers)
303
+ if isinstance(self.left, Mergeable)
304
+ else self.left
305
+ ),
306
+ right=(
307
+ self.right.with_merge(source, target, modifiers)
308
+ if isinstance(self.right, Mergeable)
309
+ else self.right
310
+ ),
311
+ operator=self.operator,
312
+ )
313
+
314
+ @property
315
+ def concept_arguments(self) -> Sequence[ConceptRef]:
316
+ """Return concepts directly referenced in where clause"""
317
+ output = []
318
+ output += get_concept_arguments(self.left)
319
+ output += get_concept_arguments(self.right)
320
+ return output
321
+
322
+ @property
323
+ def row_arguments(self) -> Sequence[ConceptRef]:
324
+ output = []
325
+ output += get_concept_row_arguments(self.left)
326
+ output += get_concept_row_arguments(self.right)
327
+ return output
328
+
329
+ @property
330
+ def existence_arguments(self) -> Sequence[tuple[ConceptRef, ...]]:
331
+ output: list[tuple[ConceptRef, ...]] = []
332
+ if isinstance(self.left, ConceptArgs):
333
+ output += self.left.existence_arguments
334
+ if isinstance(self.right, ConceptArgs):
335
+ output += self.right.existence_arguments
336
+ return output
337
+
338
+ def decompose(self):
339
+ chunks = []
340
+ if self.operator == BooleanOperator.AND:
341
+ for val in [self.left, self.right]:
342
+ if isinstance(val, Conditional):
343
+ chunks.extend(val.decompose())
344
+ else:
345
+ chunks.append(val)
346
+ else:
347
+ chunks.append(self)
348
+ return chunks
349
+
350
+
351
+ class WhereClause(Mergeable, ConceptArgs, Namespaced, BaseModel):
352
+ conditional: Union[SubselectComparison, Comparison, Conditional, "Parenthetical"]
353
+
354
+ def __repr__(self):
355
+ return str(self.conditional)
356
+
357
+ def __str__(self):
358
+ return self.__repr__()
359
+
360
+ @property
361
+ def concept_arguments(self) -> Sequence[ConceptRef]:
362
+ return self.conditional.concept_arguments
363
+
364
+ @property
365
+ def row_arguments(self) -> Sequence[ConceptRef]:
366
+ return self.conditional.row_arguments
367
+
368
+ @property
369
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
370
+ return self.conditional.existence_arguments
371
+
372
+ def with_merge(
373
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
374
+ ) -> Self:
375
+ return self.__class__.model_construct(
376
+ conditional=self.conditional.with_merge(source, target, modifiers)
377
+ )
378
+
379
+ def with_namespace(self, namespace: str) -> Self:
380
+ return self.__class__.model_construct(
381
+ conditional=self.conditional.with_namespace(namespace)
382
+ )
383
+
384
+
385
+ class HavingClause(WhereClause):
386
+ pass
387
+
388
+
389
+ class Grain(Namespaced, BaseModel):
390
+ components: set[str] = Field(default_factory=set)
391
+ where_clause: Optional["WhereClause"] = None
392
+
393
+ def without_condition(self):
394
+ return Grain(components=self.components)
395
+
396
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
397
+ new_components = set()
398
+ for c in self.components:
399
+ if c == source.address:
400
+ new_components.add(target.address)
401
+ else:
402
+ new_components.add(c)
403
+ return Grain.model_construct(components=new_components)
404
+
405
+ @classmethod
406
+ def from_concepts(
407
+ cls,
408
+ concepts: Iterable[Concept | ConceptRef | str],
409
+ environment: Environment | None = None,
410
+ where_clause: WhereClause | None = None,
411
+ ) -> Grain:
412
+ from trilogy.parsing.common import concepts_to_grain_concepts
413
+
414
+ return Grain.model_construct(
415
+ components={
416
+ c.address
417
+ for c in concepts_to_grain_concepts(concepts, environment=environment)
418
+ },
419
+ where_clause=where_clause,
420
+ )
421
+
422
+ def with_namespace(self, namespace: str) -> "Grain":
423
+ return Grain.model_construct(
424
+ components={address_with_namespace(c, namespace) for c in self.components},
425
+ where_clause=(
426
+ self.where_clause.with_namespace(namespace)
427
+ if self.where_clause
428
+ else None
429
+ ),
430
+ )
431
+
432
+ @field_validator("components", mode="before")
433
+ def component_validator(cls, v, info: ValidationInfo):
434
+ output = set()
435
+ if isinstance(v, list):
436
+ for vc in v:
437
+ if isinstance(vc, Addressable):
438
+ output.add(vc._address)
439
+ else:
440
+ output.add(vc)
441
+ else:
442
+ output = v
443
+ if not isinstance(output, set):
444
+ raise ValueError(f"Invalid grain component {output}, is not set")
445
+ if not all(isinstance(x, str) for x in output):
446
+ raise ValueError(f"Invalid component {output}")
447
+ return output
448
+
449
+ def __add__(self, other: "Grain") -> "Grain":
450
+ if not other:
451
+ return self
452
+ where = self.where_clause
453
+ if other.where_clause:
454
+ if not self.where_clause:
455
+ where = other.where_clause
456
+ elif not other.where_clause == self.where_clause:
457
+ where = WhereClause.model_construct(
458
+ conditional=Conditional(
459
+ left=self.where_clause.conditional,
460
+ right=other.where_clause.conditional,
461
+ operator=BooleanOperator.AND,
462
+ )
463
+ )
464
+ # raise NotImplementedError(
465
+ # f"Cannot merge grains with where clauses, self {self.where_clause} other {other.where_clause}"
466
+ # )
467
+ return Grain(
468
+ components=self.components.union(other.components), where_clause=where
469
+ )
470
+
471
+ def __sub__(self, other: "Grain") -> "Grain":
472
+ return Grain.model_construct(
473
+ components=self.components.difference(other.components),
474
+ where_clause=self.where_clause,
475
+ )
476
+
477
+ @property
478
+ def abstract(self):
479
+ return not self.components or all(
480
+ [c.endswith(ALL_ROWS_CONCEPT) for c in self.components]
481
+ )
482
+
483
+ def __eq__(self, other: object):
484
+ if isinstance(other, list):
485
+ if not all([isinstance(c, Concept) for c in other]):
486
+ return False
487
+ return self.components == set([c.address for c in other])
488
+ if not isinstance(other, Grain):
489
+ return False
490
+ if self.components == other.components:
491
+ return True
492
+ return False
493
+
494
+ def issubset(self, other: "Grain"):
495
+ return self.components.issubset(other.components)
496
+
497
+ def union(self, other: "Grain"):
498
+ addresses = self.components.union(other.components)
499
+ return Grain(components=addresses, where_clause=self.where_clause)
500
+
501
+ def isdisjoint(self, other: "Grain"):
502
+ return self.components.isdisjoint(other.components)
503
+
504
+ def intersection(self, other: "Grain") -> "Grain":
505
+ intersection = self.components.intersection(other.components)
506
+ return Grain(components=intersection)
507
+
508
+ def __str__(self):
509
+ if self.abstract:
510
+ base = "Grain<Abstract>"
511
+ else:
512
+ base = "Grain<" + ",".join([c for c in sorted(list(self.components))]) + ">"
513
+ if self.where_clause:
514
+ base += f"|{str(self.where_clause)}"
515
+ return base
516
+
517
+ def __radd__(self, other) -> "Grain":
518
+ if other == 0:
519
+ return self
520
+ else:
521
+ return self.__add__(other)
522
+
523
+
524
+ class Comparison(ConceptArgs, Mergeable, Namespaced, BaseModel):
525
+ left: Union[
526
+ int,
527
+ str,
528
+ float,
529
+ list,
530
+ bool,
531
+ datetime,
532
+ date,
533
+ Function,
534
+ ConceptRef,
535
+ "Conditional",
536
+ DataType,
537
+ "Comparison",
538
+ "Parenthetical",
539
+ MagicConstants,
540
+ WindowItem,
541
+ AggregateWrapper,
542
+ ]
543
+ right: Union[
544
+ int,
545
+ str,
546
+ float,
547
+ list,
548
+ bool,
549
+ date,
550
+ datetime,
551
+ ConceptRef,
552
+ Function,
553
+ Conditional,
554
+ DataType,
555
+ Comparison,
556
+ Parenthetical,
557
+ MagicConstants,
558
+ WindowItem,
559
+ AggregateWrapper,
560
+ TupleWrapper,
561
+ ]
562
+ operator: ComparisonOperator
563
+
564
+ @field_validator("left", mode="before")
565
+ @classmethod
566
+ def left_validator(cls, v, info: ValidationInfo):
567
+ if isinstance(v, Concept):
568
+ return v.reference
569
+ return v
570
+
571
+ @field_validator("right", mode="before")
572
+ @classmethod
573
+ def right_validator(cls, v, info: ValidationInfo):
574
+ if isinstance(v, Concept):
575
+ return v.reference
576
+ return v
577
+
578
+ def __init__(self, *args, **kwargs) -> None:
579
+ super().__init__(*args, **kwargs)
580
+ if self.operator in (ComparisonOperator.IS, ComparisonOperator.IS_NOT):
581
+ if self.right != MagicConstants.NULL and DataType.BOOL != arg_to_datatype(
582
+ self.right
583
+ ):
584
+ raise SyntaxError(
585
+ f"Cannot use {self.operator.value} with non-null or boolean value {self.right}"
586
+ )
587
+ elif self.operator in (ComparisonOperator.IN, ComparisonOperator.NOT_IN):
588
+ right = arg_to_datatype(self.right)
589
+ if not isinstance(
590
+ self.right, (ConceptRef, Concept, ListType, TupleWrapper)
591
+ ):
592
+ raise SyntaxError(
593
+ f"Cannot use {self.operator.value} with non-list, non-tuple, non-concept object {right} in {str(self)}"
594
+ )
595
+
596
+ elif isinstance(right, ListType) and not is_compatible_datatype(
597
+ arg_to_datatype(self.left), right.value_data_type
598
+ ):
599
+ raise SyntaxError(
600
+ f"Cannot compare {arg_to_datatype(self.left)} and {right} with operator {self.operator} in {str(self)}"
601
+ )
602
+ elif isinstance(self.right, Concept) and not is_compatible_datatype(
603
+ arg_to_datatype(self.left), arg_to_datatype(self.right)
604
+ ):
605
+ raise SyntaxError(
606
+ f"Cannot compare {arg_to_datatype(self.left)} and {arg_to_datatype(self.right)} with operator {self.operator} in {str(self)}"
607
+ )
608
+ else:
609
+ if not is_compatible_datatype(
610
+ arg_to_datatype(self.left), arg_to_datatype(self.right)
611
+ ):
612
+ raise SyntaxError(
613
+ f"Cannot compare {arg_to_datatype(self.left)} and {arg_to_datatype(self.right)} of different types with operator {self.operator} in {str(self)}"
614
+ )
615
+
616
+ def __add__(self, other):
617
+ if other is None:
618
+ return self
619
+ if not isinstance(other, (Comparison, Conditional, Parenthetical)):
620
+ raise ValueError("Cannot add Comparison to non-Comparison")
621
+ if other == self:
622
+ return self
623
+ return Conditional(left=self, right=other, operator=BooleanOperator.AND)
624
+
625
+ def __repr__(self):
626
+ if isinstance(self.left, Concept):
627
+ left = self.left.address
628
+ else:
629
+ left = str(self.left)
630
+ if isinstance(self.right, Concept):
631
+ right = self.right.address
632
+ else:
633
+ right = str(self.right)
634
+ return f"{left} {self.operator.value} {right}"
635
+
636
+ def __str__(self):
637
+ return self.__repr__()
638
+
639
+ def __eq__(self, other):
640
+ if not isinstance(other, Comparison):
641
+ return False
642
+ return (
643
+ self.left == other.left
644
+ and self.right == other.right
645
+ and self.operator == other.operator
646
+ )
647
+
648
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
649
+ return self.__class__.model_construct(
650
+ left=(
651
+ self.left.with_merge(source, target, modifiers)
652
+ if isinstance(self.left, Mergeable)
653
+ else self.left
654
+ ),
655
+ right=(
656
+ self.right.with_merge(source, target, modifiers)
657
+ if isinstance(self.right, Mergeable)
658
+ else self.right
659
+ ),
660
+ operator=self.operator,
661
+ )
662
+
663
+ def with_namespace(self, namespace: str):
664
+ return self.__class__.model_construct(
665
+ left=(
666
+ self.left.with_namespace(namespace)
667
+ if isinstance(self.left, Namespaced)
668
+ else self.left
669
+ ),
670
+ right=(
671
+ self.right.with_namespace(namespace)
672
+ if isinstance(self.right, Namespaced)
673
+ else self.right
674
+ ),
675
+ operator=self.operator,
676
+ )
677
+
678
+ @property
679
+ def concept_arguments(self) -> List[ConceptRef]:
680
+ """Return concepts directly referenced in where clause"""
681
+ output = []
682
+ output += get_concept_arguments(self.left)
683
+ output += get_concept_arguments(self.right)
684
+ return output
685
+
686
+ @property
687
+ def row_arguments(self) -> List[ConceptRef]:
688
+ output = []
689
+ output += get_concept_row_arguments(self.left)
690
+ output += get_concept_row_arguments(self.right)
691
+ return output
692
+
693
+ @property
694
+ def existence_arguments(self) -> List[Tuple[ConceptRef, ...]]:
695
+ """Return concepts directly referenced in where clause"""
696
+ output: List[Tuple[ConceptRef, ...]] = []
697
+ if isinstance(self.left, ConceptArgs):
698
+ output += self.left.existence_arguments
699
+ if isinstance(self.right, ConceptArgs):
700
+ output += self.right.existence_arguments
701
+ return output
702
+
703
+
704
+ class SubselectComparison(Comparison):
705
+ def __eq__(self, other):
706
+ if not isinstance(other, SubselectComparison):
707
+ return False
708
+
709
+ comp = (
710
+ self.left == other.left
711
+ and self.right == other.right
712
+ and self.operator == other.operator
713
+ )
714
+ return comp
715
+
716
+ @property
717
+ def row_arguments(self) -> List[ConceptRef]:
718
+ return get_concept_row_arguments(self.left)
719
+
720
+ @property
721
+ def existence_arguments(self) -> list[tuple["ConceptRef", ...]]:
722
+ return [tuple(get_concept_arguments(self.right))]
723
+
724
+
725
+ class Concept(Addressable, DataTyped, ConceptArgs, Mergeable, Namespaced, BaseModel):
726
+ model_config = ConfigDict(
727
+ extra="forbid",
728
+ )
729
+ name: str
730
+ datatype: DataType | ListType | StructType | MapType | NumericType
731
+ purpose: Purpose
732
+ derivation: Derivation = Derivation.ROOT
733
+ granularity: Granularity = Granularity.MULTI_ROW
734
+ metadata: Metadata = Field(
735
+ default_factory=lambda: Metadata(description=None, line_number=None),
736
+ validate_default=True,
737
+ )
738
+ lineage: Optional[
739
+ Union[
740
+ Function,
741
+ WindowItem,
742
+ FilterItem,
743
+ AggregateWrapper,
744
+ RowsetItem,
745
+ MultiSelectLineage,
746
+ ]
747
+ ] = None
748
+ namespace: str = Field(default=DEFAULT_NAMESPACE, validate_default=True)
749
+ keys: Optional[set[str]] = None
750
+ grain: "Grain" = Field(default=None, validate_default=True) # type: ignore
751
+ modifiers: List[Modifier] = Field(default_factory=list) # type: ignore
752
+ pseudonyms: set[str] = Field(default_factory=set)
753
+
754
+ def duplicate(self) -> Concept:
755
+ return self.model_copy(deep=True)
756
+
757
+ def __hash__(self):
758
+ return hash(
759
+ f"{self.name}+{self.datatype}+ {self.purpose} + {str(self.lineage)} + {self.namespace} + {str(self.grain)} + {str(self.keys)}"
760
+ )
761
+
762
+ def __repr__(self):
763
+ base = f"{self.address}@{self.grain}"
764
+ return base
765
+
766
+ @property
767
+ def reference(self) -> ConceptRef:
768
+ return ConceptRef.model_construct(
769
+ address=self.address,
770
+ datatype=self.output_datatype,
771
+ metadata=self.metadata,
772
+ )
773
+
774
+ @property
775
+ def output_datatype(self):
776
+ return self.datatype
777
+
778
+ @classmethod
779
+ def calculate_is_aggregate(cls, lineage):
780
+ if lineage and isinstance(lineage, Function):
781
+ if lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
782
+ return True
783
+ if (
784
+ lineage
785
+ and isinstance(lineage, AggregateWrapper)
786
+ and lineage.function.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
787
+ ):
788
+ return True
789
+ return False
790
+
791
+ @property
792
+ def is_aggregate(self):
793
+ return self.calculate_is_aggregate(self.lineage)
794
+
795
+ def with_merge(self, source: Self, target: Self, modifiers: List[Modifier]) -> Self:
796
+ if self.address == source.address:
797
+ new = target.with_grain(self.grain.with_merge(source, target, modifiers))
798
+ new.pseudonyms.add(self.address)
799
+ return new
800
+ if not self.grain.components and not self.lineage and not self.keys:
801
+ return self
802
+ return self.__class__.model_construct(
803
+ name=self.name,
804
+ datatype=self.datatype,
805
+ purpose=self.purpose,
806
+ metadata=self.metadata,
807
+ derivation=self.derivation,
808
+ granularity=self.granularity,
809
+ lineage=(
810
+ self.lineage.with_merge(source, target, modifiers)
811
+ if self.lineage
812
+ else None
813
+ ),
814
+ grain=self.grain.with_merge(source, target, modifiers),
815
+ namespace=self.namespace,
816
+ keys=(
817
+ set(x if x != source.address else target.address for x in self.keys)
818
+ if self.keys
819
+ else None
820
+ ),
821
+ modifiers=self.modifiers,
822
+ pseudonyms=self.pseudonyms,
823
+ )
824
+
825
+ @field_validator("namespace", mode="plain")
826
+ @classmethod
827
+ def namespace_validation(cls, v):
828
+ return v or DEFAULT_NAMESPACE
829
+
830
+ @field_validator("metadata", mode="before")
831
+ @classmethod
832
+ def metadata_validation(cls, v):
833
+ v = v or Metadata()
834
+ return v
835
+
836
+ @field_validator("purpose", mode="after")
837
+ @classmethod
838
+ def purpose_validation(cls, v):
839
+ if v == Purpose.AUTO:
840
+ raise ValueError("Cannot set purpose to AUTO")
841
+ return v
842
+
843
+ @field_validator("grain", mode="before")
844
+ @classmethod
845
+ def parse_grain(cls, v, info: ValidationInfo) -> Grain:
846
+
847
+ # this is silly - rethink how we do grains
848
+ values = info.data
849
+
850
+ if not v and values.get("purpose", None) == Purpose.KEY:
851
+ v = Grain(
852
+ components={
853
+ f'{values.get("namespace", DEFAULT_NAMESPACE)}.{values["name"]}'
854
+ }
855
+ )
856
+ elif not v and values.get("purpose", None) == Purpose.PROPERTY:
857
+ v = Grain(components=values.get("keys", set()) or set())
858
+ elif (
859
+ "lineage" in values
860
+ and isinstance(values["lineage"], AggregateWrapper)
861
+ and values["lineage"].by
862
+ ):
863
+ v = Grain(components={c.address for c in values["lineage"].by})
864
+ elif not v:
865
+ v = Grain(components=set())
866
+ elif isinstance(v, Grain):
867
+ pass
868
+ elif isinstance(v, Concept):
869
+ v = Grain(components={v.address})
870
+ elif isinstance(v, dict):
871
+ v = Grain.model_validate(v)
872
+ else:
873
+ raise SyntaxError(f"Invalid grain {v} for concept {values['name']}")
874
+ return v
875
+
876
+ def __eq__(self, other: object):
877
+ if isinstance(other, str):
878
+ if self.address == other:
879
+ return True
880
+ if isinstance(other, ConceptRef):
881
+ return self.address == other.address
882
+ if not isinstance(other, Concept):
883
+ return False
884
+ return (
885
+ self.name == other.name
886
+ and self.datatype == other.datatype
887
+ and self.purpose == other.purpose
888
+ and self.namespace == other.namespace
889
+ and self.grain == other.grain
890
+ and self.derivation == other.derivation
891
+ and self.granularity == other.granularity
892
+ # and self.keys == other.keys
893
+ )
894
+
895
+ def __str__(self):
896
+ grain = str(self.grain) if self.grain else "Grain<>"
897
+ return f"{self.namespace}.{self.name}@{grain}"
898
+
899
+ @property
900
+ def address(self) -> str:
901
+ return f"{self.namespace}.{self.name}"
902
+
903
+ @property
904
+ def output(self) -> "Concept":
905
+ return self
906
+
907
+ @property
908
+ def safe_address(self) -> str:
909
+ if self.namespace == DEFAULT_NAMESPACE:
910
+ return self.name.replace(".", "_")
911
+ elif self.namespace:
912
+ return f"{self.namespace.replace('.','_')}_{self.name.replace('.','_')}"
913
+ return self.name.replace(".", "_")
914
+
915
+ def with_namespace(self, namespace: str) -> Self:
916
+ if namespace == self.namespace:
917
+ return self
918
+ return self.__class__.model_construct(
919
+ name=self.name,
920
+ datatype=self.datatype,
921
+ purpose=self.purpose,
922
+ granularity=self.granularity,
923
+ derivation=self.derivation,
924
+ metadata=self.metadata,
925
+ lineage=self.lineage.with_namespace(namespace) if self.lineage else None,
926
+ grain=(
927
+ self.grain.with_namespace(namespace)
928
+ if self.grain
929
+ else Grain(components=set())
930
+ ),
931
+ namespace=(
932
+ namespace + "." + self.namespace
933
+ if self.namespace != DEFAULT_NAMESPACE
934
+ else namespace
935
+ ),
936
+ keys=(
937
+ set([address_with_namespace(x, namespace) for x in self.keys])
938
+ if self.keys
939
+ else None
940
+ ),
941
+ modifiers=self.modifiers,
942
+ pseudonyms={address_with_namespace(v, namespace) for v in self.pseudonyms},
943
+ )
944
+
945
+ def get_select_grain_and_keys(
946
+ self, grain: Grain, environment: Environment
947
+ ) -> Tuple[
948
+ Function
949
+ | WindowItem
950
+ | FilterItem
951
+ | AggregateWrapper
952
+ | RowsetItem
953
+ | MultiSelectLineage
954
+ | None,
955
+ Grain,
956
+ set[str] | None,
957
+ ]:
958
+ new_lineage = self.lineage
959
+ final_grain = grain if not self.grain.components else self.grain
960
+ keys = self.keys
961
+
962
+ if self.is_aggregate and isinstance(new_lineage, Function) and grain.components:
963
+ grain_components = [
964
+ environment.concepts[c].reference for c in grain.components
965
+ ]
966
+ new_lineage = AggregateWrapper(function=new_lineage, by=grain_components)
967
+ final_grain = grain
968
+ keys = set(grain.components)
969
+ elif isinstance(new_lineage, AggregateWrapper) and not new_lineage.by and grain:
970
+ grain_components = [
971
+ environment.concepts[c].reference for c in grain.components
972
+ ]
973
+ new_lineage = AggregateWrapper(
974
+ function=new_lineage.function, by=grain_components
975
+ )
976
+ final_grain = grain
977
+ keys = set([x.address for x in new_lineage.by])
978
+ elif self.derivation == Derivation.BASIC:
979
+
980
+ pkeys: set[str] = set()
981
+ assert new_lineage
982
+ for x_ref in new_lineage.concept_arguments:
983
+ x = environment.concepts[x_ref.address]
984
+ if isinstance(x, UndefinedConcept):
985
+ continue
986
+ _, _, parent_keys = x.get_select_grain_and_keys(grain, environment)
987
+ if parent_keys:
988
+ pkeys.update(parent_keys)
989
+ raw_keys = pkeys
990
+ # deduplicate
991
+ final_grain = Grain.from_concepts(raw_keys, environment)
992
+ keys = final_grain.components
993
+ return new_lineage, final_grain, keys
994
+
995
+ def set_select_grain(self, grain: Grain, environment: Environment) -> Self:
996
+ """Assign a mutable concept the appropriate grain/keys for a select"""
997
+ new_lineage, final_grain, keys = self.get_select_grain_and_keys(
998
+ grain, environment
999
+ )
1000
+ return self.__class__.model_construct(
1001
+ name=self.name,
1002
+ datatype=self.datatype,
1003
+ purpose=self.purpose,
1004
+ granularity=self.granularity,
1005
+ derivation=self.derivation,
1006
+ metadata=self.metadata,
1007
+ lineage=new_lineage,
1008
+ grain=final_grain,
1009
+ namespace=self.namespace,
1010
+ keys=keys,
1011
+ modifiers=self.modifiers,
1012
+ pseudonyms=self.pseudonyms,
1013
+ )
1014
+
1015
+ def with_grain(self, grain: Optional["Grain"] = None) -> Self:
1016
+
1017
+ return self.__class__.model_construct(
1018
+ name=self.name,
1019
+ datatype=self.datatype,
1020
+ purpose=self.purpose,
1021
+ metadata=self.metadata,
1022
+ granularity=self.granularity,
1023
+ derivation=self.derivation,
1024
+ lineage=self.lineage,
1025
+ grain=grain if grain else Grain(components=set()),
1026
+ namespace=self.namespace,
1027
+ keys=self.keys,
1028
+ modifiers=self.modifiers,
1029
+ pseudonyms=self.pseudonyms,
1030
+ )
1031
+
1032
+ @property
1033
+ def sources(self) -> List["ConceptRef"]:
1034
+ if self.lineage:
1035
+ output: List[ConceptRef] = []
1036
+
1037
+ def get_sources(
1038
+ expr: Union[
1039
+ Function,
1040
+ WindowItem,
1041
+ FilterItem,
1042
+ AggregateWrapper,
1043
+ RowsetItem,
1044
+ MultiSelectLineage,
1045
+ ],
1046
+ output: List[ConceptRef],
1047
+ ):
1048
+
1049
+ for item in expr.concept_arguments:
1050
+ if isinstance(item, (ConceptRef,)):
1051
+ if item.address == self.address:
1052
+ raise SyntaxError(
1053
+ f"Concept {self.address} references itself"
1054
+ )
1055
+ output.append(item)
1056
+
1057
+ # output += item.sources
1058
+
1059
+ get_sources(self.lineage, output)
1060
+ return output
1061
+ return []
1062
+
1063
+ @property
1064
+ def concept_arguments(self) -> List[ConceptRef]:
1065
+ return self.lineage.concept_arguments if self.lineage else []
1066
+
1067
+ @classmethod
1068
+ def calculate_derivation(self, lineage, purpose):
1069
+ from trilogy.core.models.build import (
1070
+ BuildAggregateWrapper,
1071
+ BuildFilterItem,
1072
+ BuildFunction,
1073
+ BuildMultiSelectLineage,
1074
+ BuildRowsetItem,
1075
+ BuildWindowItem,
1076
+ )
1077
+
1078
+ if lineage and isinstance(lineage, (BuildWindowItem, WindowItem)):
1079
+ return Derivation.WINDOW
1080
+ elif lineage and isinstance(lineage, (BuildFilterItem, FilterItem)):
1081
+ return Derivation.FILTER
1082
+ elif lineage and isinstance(lineage, (BuildAggregateWrapper, AggregateWrapper)):
1083
+ return Derivation.AGGREGATE
1084
+ elif lineage and isinstance(lineage, (BuildRowsetItem, RowsetItem)):
1085
+ return Derivation.ROWSET
1086
+ elif lineage and isinstance(
1087
+ lineage, (BuildMultiSelectLineage, MultiSelectLineage)
1088
+ ):
1089
+ return Derivation.MULTISELECT
1090
+ elif (
1091
+ lineage
1092
+ and isinstance(lineage, (BuildFunction, Function))
1093
+ and lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
1094
+ ):
1095
+ return Derivation.AGGREGATE
1096
+ elif (
1097
+ lineage
1098
+ and isinstance(lineage, (BuildFunction, Function))
1099
+ and lineage.operator == FunctionType.UNNEST
1100
+ ):
1101
+ return Derivation.UNNEST
1102
+ elif (
1103
+ lineage
1104
+ and isinstance(lineage, (BuildFunction, Function))
1105
+ and lineage.operator == FunctionType.UNION
1106
+ ):
1107
+ return Derivation.UNION
1108
+ elif (
1109
+ lineage
1110
+ and isinstance(lineage, (BuildFunction, Function))
1111
+ and lineage.operator in FunctionClass.SINGLE_ROW.value
1112
+ ):
1113
+ return Derivation.CONSTANT
1114
+
1115
+ elif lineage and isinstance(lineage, (BuildFunction, Function)):
1116
+ if not lineage.concept_arguments:
1117
+ return Derivation.CONSTANT
1118
+ elif all(
1119
+ [x.derivation == Derivation.CONSTANT for x in lineage.concept_arguments]
1120
+ ):
1121
+ return Derivation.CONSTANT
1122
+ return Derivation.BASIC
1123
+ elif purpose == Purpose.CONSTANT:
1124
+ return Derivation.CONSTANT
1125
+ return Derivation.ROOT
1126
+
1127
+ # @property
1128
+ # def derivation(self) -> Derivation:
1129
+ # return self.calculate_derivation(self.lineage, self.purpose)
1130
+
1131
+ @classmethod
1132
+ def calculate_granularity(cls, derivation: Derivation, grain: Grain, lineage):
1133
+ from trilogy.core.models.build import BuildFunction
1134
+
1135
+ if derivation == Derivation.CONSTANT:
1136
+ return Granularity.SINGLE_ROW
1137
+ elif derivation == Derivation.AGGREGATE:
1138
+ if all([x.endswith(ALL_ROWS_CONCEPT) for x in grain.components]):
1139
+ return Granularity.SINGLE_ROW
1140
+ elif (
1141
+ lineage
1142
+ and isinstance(lineage, (Function, BuildFunction))
1143
+ and lineage.operator in (FunctionType.UNNEST, FunctionType.UNION)
1144
+ ):
1145
+ return Granularity.MULTI_ROW
1146
+ elif lineage and all(
1147
+ [x.granularity == Granularity.SINGLE_ROW for x in lineage.concept_arguments]
1148
+ ):
1149
+ return Granularity.SINGLE_ROW
1150
+ return Granularity.MULTI_ROW
1151
+
1152
+ # @property
1153
+ # def granularity(self) -> Granularity:
1154
+ # return self.calculate_granularity(self.derivation, self.grain, self.lineage)
1155
+
1156
+ def with_filter(
1157
+ self,
1158
+ condition: Conditional | Comparison | Parenthetical,
1159
+ environment: Environment | None = None,
1160
+ ) -> "Concept":
1161
+ from trilogy.utility import string_to_hash
1162
+
1163
+ if self.lineage and isinstance(self.lineage, FilterItem):
1164
+ if self.lineage.where.conditional == condition:
1165
+ return self
1166
+ hash = string_to_hash(self.name + str(condition))
1167
+ new_lineage = FilterItem(
1168
+ content=self.reference, where=WhereClause(conditional=condition)
1169
+ )
1170
+ new = Concept.model_construct(
1171
+ name=f"{self.name}_filter_{hash}",
1172
+ datatype=self.datatype,
1173
+ purpose=self.purpose,
1174
+ derivation=self.calculate_derivation(new_lineage, self.purpose),
1175
+ granularity=self.granularity,
1176
+ metadata=self.metadata,
1177
+ lineage=new_lineage,
1178
+ keys=(self.keys if self.purpose == Purpose.PROPERTY else None),
1179
+ grain=self.grain if self.grain else Grain(components=set()),
1180
+ namespace=self.namespace,
1181
+ modifiers=self.modifiers,
1182
+ pseudonyms=self.pseudonyms,
1183
+ )
1184
+ if environment:
1185
+ environment.add_concept(new)
1186
+ return new
1187
+
1188
+
1189
+ class UndefinedConceptFull(Concept, Mergeable, Namespaced):
1190
+ model_config = ConfigDict(arbitrary_types_allowed=True)
1191
+ name: str
1192
+ line_no: int | None = None
1193
+ datatype: DataType | ListType | StructType | MapType | NumericType = (
1194
+ DataType.UNKNOWN
1195
+ )
1196
+ purpose: Purpose = Purpose.UNKNOWN
1197
+
1198
+ @property
1199
+ def reference(self) -> UndefinedConcept:
1200
+ return UndefinedConcept(address=self.address)
1201
+
1202
+
1203
+ class OrderItem(Mergeable, ConceptArgs, Namespaced, BaseModel):
1204
+ # this needs to be a full concept as it may not exist in environment
1205
+ expr: Expr
1206
+ order: Ordering
1207
+
1208
+ @field_validator("expr", mode="before")
1209
+ def enforce_reference(cls, v):
1210
+ if isinstance(v, Concept):
1211
+ return v.reference
1212
+ return v
1213
+
1214
+ def with_namespace(self, namespace: str) -> "OrderItem":
1215
+ return OrderItem.model_construct(
1216
+ expr=(
1217
+ self.expr.with_namespace(namespace)
1218
+ if isinstance(self.expr, Namespaced)
1219
+ else self.expr
1220
+ ),
1221
+ order=self.order,
1222
+ )
1223
+
1224
+ def with_merge(
1225
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1226
+ ) -> "OrderItem":
1227
+ return OrderItem(
1228
+ expr=(
1229
+ self.expr.with_merge(source, target, modifiers)
1230
+ if isinstance(self.expr, Mergeable)
1231
+ else self.expr
1232
+ ),
1233
+ order=self.order,
1234
+ )
1235
+
1236
+ @property
1237
+ def output(self):
1238
+ return self.expr.output
1239
+
1240
+ @property
1241
+ def concept_arguments(self) -> Sequence[ConceptRef]:
1242
+ base: List[ConceptRef] = []
1243
+ x = self.expr
1244
+ if isinstance(x, ConceptRef):
1245
+ base += [x]
1246
+ elif isinstance(x, ConceptArgs):
1247
+ base += x.concept_arguments
1248
+ return base
1249
+
1250
+ @property
1251
+ def row_arguments(self) -> Sequence[ConceptRef]:
1252
+ if isinstance(self.expr, ConceptArgs):
1253
+ return self.expr.row_arguments
1254
+ return self.concept_arguments
1255
+
1256
+ @property
1257
+ def existence_arguments(self) -> Sequence[tuple["ConceptRef", ...]]:
1258
+ if isinstance(self.expr, ConceptArgs):
1259
+ return self.expr.existence_arguments
1260
+ return []
1261
+
1262
+ @property
1263
+ def output_datatype(self):
1264
+ return arg_to_datatype(self.expr)
1265
+
1266
+
1267
+ class WindowItem(DataTyped, ConceptArgs, Mergeable, Namespaced, BaseModel):
1268
+ type: WindowType
1269
+ content: ConceptRef
1270
+ order_by: List["OrderItem"]
1271
+ over: List["ConceptRef"] = Field(default_factory=list)
1272
+ index: Optional[int] = None
1273
+
1274
+ def __str__(self):
1275
+ return self.__repr__()
1276
+
1277
+ def __repr__(self):
1278
+ return f"{self.type}({self.content} {self.index}, {self.over}, {self.order_by})"
1279
+
1280
+ @field_validator("content", mode="before")
1281
+ def enforce_concept_ref(cls, v):
1282
+ if isinstance(v, Concept):
1283
+ return ConceptRef(address=v.address, datatype=v.datatype)
1284
+ return v
1285
+
1286
+ @field_validator("over", mode="before")
1287
+ def enforce_concept_ref_over(cls, v):
1288
+ final = []
1289
+ for item in v:
1290
+ if isinstance(item, Concept):
1291
+ final.append(ConceptRef(address=item.address, datatype=item.datatype))
1292
+ else:
1293
+ final.append(item)
1294
+ return final
1295
+
1296
+ def with_merge(
1297
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1298
+ ) -> "WindowItem":
1299
+ output = WindowItem.model_construct(
1300
+ type=self.type,
1301
+ content=self.content.with_merge(source, target, modifiers),
1302
+ over=[x.with_merge(source, target, modifiers) for x in self.over],
1303
+ order_by=[x.with_merge(source, target, modifiers) for x in self.order_by],
1304
+ index=self.index,
1305
+ )
1306
+ return output
1307
+
1308
+ def with_namespace(self, namespace: str) -> "WindowItem":
1309
+ return WindowItem.model_construct(
1310
+ type=self.type,
1311
+ content=self.content.with_namespace(namespace),
1312
+ over=[x.with_namespace(namespace) for x in self.over],
1313
+ order_by=[x.with_namespace(namespace) for x in self.order_by],
1314
+ index=self.index,
1315
+ )
1316
+
1317
+ @property
1318
+ def concept_arguments(self) -> List[ConceptRef]:
1319
+ return self.arguments
1320
+
1321
+ @property
1322
+ def arguments(self) -> List[ConceptRef]:
1323
+ output = [self.content]
1324
+ for order in self.order_by:
1325
+ output += [order.output]
1326
+ for item in self.over:
1327
+ output += [item]
1328
+ return output
1329
+
1330
+ @property
1331
+ def output(self) -> ConceptRef:
1332
+ return self.content
1333
+
1334
+ @property
1335
+ def output_datatype(self):
1336
+ if self.type in (WindowType.RANK, WindowType.ROW_NUMBER):
1337
+ return DataType.INTEGER
1338
+ return self.content.output_datatype
1339
+
1340
+ @property
1341
+ def output_purpose(self):
1342
+ return Purpose.PROPERTY
1343
+
1344
+
1345
+ def get_basic_type(
1346
+ type: DataType | ListType | StructType | MapType | NumericType,
1347
+ ) -> DataType:
1348
+ if isinstance(type, ListType):
1349
+ return DataType.LIST
1350
+ if isinstance(type, StructType):
1351
+ return DataType.STRUCT
1352
+ if isinstance(type, MapType):
1353
+ return DataType.MAP
1354
+ if isinstance(type, NumericType):
1355
+ return DataType.NUMERIC
1356
+ return type
1357
+
1358
+
1359
+ class CaseWhen(Namespaced, ConceptArgs, Mergeable, BaseModel):
1360
+ comparison: Conditional | SubselectComparison | Comparison
1361
+ expr: "Expr"
1362
+
1363
+ @field_validator("expr", mode="before")
1364
+ def enforce_reference(cls, v):
1365
+ if isinstance(v, Concept):
1366
+ return v.reference
1367
+ return v
1368
+
1369
+ def __str__(self):
1370
+ return self.__repr__()
1371
+
1372
+ def __repr__(self):
1373
+ return f"WHEN {str(self.comparison)} THEN {str(self.expr)}"
1374
+
1375
+ @property
1376
+ def concept_arguments(self):
1377
+ return get_concept_arguments(self.comparison) + get_concept_arguments(self.expr)
1378
+
1379
+ @property
1380
+ def concept_row_arguments(self):
1381
+ return get_concept_row_arguments(self.comparison) + get_concept_row_arguments(
1382
+ self.expr
1383
+ )
1384
+
1385
+ def with_namespace(self, namespace: str) -> CaseWhen:
1386
+ return CaseWhen.model_construct(
1387
+ comparison=self.comparison.with_namespace(namespace),
1388
+ expr=(
1389
+ self.expr.with_namespace(namespace)
1390
+ if isinstance(
1391
+ self.expr,
1392
+ Namespaced,
1393
+ )
1394
+ else self.expr
1395
+ ),
1396
+ )
1397
+
1398
+ def with_merge(
1399
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1400
+ ) -> CaseWhen:
1401
+ return CaseWhen.model_construct(
1402
+ comparison=self.comparison.with_merge(source, target, modifiers),
1403
+ expr=(
1404
+ self.expr.with_merge(source, target, modifiers)
1405
+ if isinstance(self.expr, Mergeable)
1406
+ else self.expr
1407
+ ),
1408
+ )
1409
+
1410
+
1411
+ class CaseElse(Namespaced, ConceptArgs, Mergeable, BaseModel):
1412
+ expr: "Expr"
1413
+ # this ensures that it's easily differentiable from CaseWhen
1414
+ discriminant: ComparisonOperator = ComparisonOperator.ELSE
1415
+
1416
+ @field_validator("expr", mode="before")
1417
+ def enforce_expr(cls, v):
1418
+ if isinstance(v, Concept):
1419
+ return ConceptRef(address=v.address, datatype=v.datatype)
1420
+ return v
1421
+
1422
+ @property
1423
+ def concept_arguments(self):
1424
+ return get_concept_arguments(self.expr)
1425
+
1426
+ def with_merge(
1427
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1428
+ ) -> CaseElse:
1429
+ return CaseElse.model_construct(
1430
+ discriminant=self.discriminant,
1431
+ expr=(
1432
+ self.expr.with_merge(source, target, modifiers)
1433
+ if isinstance(self.expr, Mergeable)
1434
+ else self.expr
1435
+ ),
1436
+ )
1437
+
1438
+ def with_namespace(self, namespace: str) -> CaseElse:
1439
+ return CaseElse.model_construct(
1440
+ discriminant=self.discriminant,
1441
+ expr=(
1442
+ self.expr.with_namespace(namespace)
1443
+ if isinstance(
1444
+ self.expr,
1445
+ Namespaced,
1446
+ )
1447
+ else self.expr
1448
+ ),
1449
+ )
1450
+
1451
+
1452
+ def get_concept_row_arguments(expr) -> List["ConceptRef"]:
1453
+ output = []
1454
+ if isinstance(expr, ConceptRef):
1455
+ output += [expr]
1456
+
1457
+ elif isinstance(expr, ConceptArgs):
1458
+ output += expr.row_arguments
1459
+ return output
1460
+
1461
+
1462
+ def get_concept_arguments(expr) -> List["ConceptRef"]:
1463
+ output = []
1464
+ if isinstance(expr, ConceptRef):
1465
+ output += [expr]
1466
+
1467
+ elif isinstance(
1468
+ expr,
1469
+ ConceptArgs,
1470
+ ):
1471
+ output += expr.concept_arguments
1472
+ return output
1473
+
1474
+
1475
+ class Function(DataTyped, ConceptArgs, Mergeable, Namespaced, BaseModel):
1476
+ operator: FunctionType
1477
+ arg_count: int = Field(default=1)
1478
+ output_datatype: DataType | ListType | StructType | MapType | NumericType
1479
+ output_purpose: Purpose
1480
+ valid_inputs: Optional[
1481
+ Union[
1482
+ Set[DataType],
1483
+ List[Set[DataType]],
1484
+ ]
1485
+ ] = None
1486
+ arguments: Sequence[
1487
+ Union[
1488
+ ConceptRef,
1489
+ AggregateWrapper,
1490
+ Function,
1491
+ Parenthetical,
1492
+ CaseWhen,
1493
+ CaseElse,
1494
+ WindowItem,
1495
+ int,
1496
+ float,
1497
+ str,
1498
+ date,
1499
+ datetime,
1500
+ MapWrapper[Any, Any],
1501
+ DataType,
1502
+ ListType,
1503
+ MapType,
1504
+ NumericType,
1505
+ DatePart,
1506
+ list,
1507
+ ListWrapper[Any],
1508
+ ]
1509
+ ]
1510
+
1511
+ def __init__(self, **kwargs):
1512
+ super().__init__(**kwargs)
1513
+ if "datatype" in str(self):
1514
+ raise SyntaxError(str(self))
1515
+
1516
+ def __repr__(self):
1517
+ return f'{self.operator.value}({",".join([str(a) for a in self.arguments])})'
1518
+
1519
+ def __str__(self):
1520
+ return self.__repr__()
1521
+
1522
+ @property
1523
+ def datatype(self):
1524
+ return self.output_datatype
1525
+
1526
+ @field_validator("arguments", mode="before")
1527
+ @classmethod
1528
+ def parse_arguments(cls, v, info: ValidationInfo):
1529
+ from trilogy.core.models.build import BuildConcept
1530
+ from trilogy.parsing.exceptions import ParseError
1531
+
1532
+ values = info.data
1533
+ arg_count = len(v)
1534
+ final = []
1535
+ for x in v:
1536
+ if isinstance(x, Concept) and not isinstance(x, BuildConcept):
1537
+ final.append(x.reference)
1538
+ else:
1539
+ final.append(x)
1540
+ v = final
1541
+ target_arg_count = values["arg_count"]
1542
+ operator_name = values["operator"].name
1543
+ # surface right error
1544
+ if "valid_inputs" not in values:
1545
+ return v
1546
+ valid_inputs = values["valid_inputs"]
1547
+ if not arg_count <= target_arg_count:
1548
+ if target_arg_count != InfiniteFunctionArgs:
1549
+ raise ParseError(
1550
+ f"Incorrect argument count to {operator_name} function, expects"
1551
+ f" {target_arg_count}, got {arg_count}"
1552
+ )
1553
+ # if all arguments can be any of the set type
1554
+ # turn this into an array for validation
1555
+ if isinstance(valid_inputs, set):
1556
+ valid_inputs = [valid_inputs for _ in v]
1557
+ elif not valid_inputs:
1558
+ return v
1559
+ for idx, arg in enumerate(v):
1560
+
1561
+ if (
1562
+ isinstance(arg, ConceptRef)
1563
+ and get_basic_type(arg.datatype.data_type) not in valid_inputs[idx]
1564
+ ):
1565
+ if arg.datatype != DataType.UNKNOWN:
1566
+ raise TypeError(
1567
+ f"Invalid input datatype {arg.datatype.data_type} passed into position {idx}"
1568
+ f" for {operator_name} from concept {arg.name}, valid is {valid_inputs[idx]}"
1569
+ )
1570
+ if (
1571
+ isinstance(arg, Function)
1572
+ and get_basic_type(arg.output_datatype) not in valid_inputs[idx]
1573
+ ):
1574
+ if arg.output_datatype != DataType.UNKNOWN:
1575
+ raise TypeError(
1576
+ f"Invalid input datatype {arg.output_datatype} passed into"
1577
+ f" {operator_name} from function {arg.operator.name}, need {valid_inputs[idx]}"
1578
+ )
1579
+ # check constants
1580
+ comparisons: List[Tuple[Type, DataType]] = [
1581
+ (str, DataType.STRING),
1582
+ (int, DataType.INTEGER),
1583
+ (float, DataType.FLOAT),
1584
+ (bool, DataType.BOOL),
1585
+ (DatePart, DataType.DATE_PART),
1586
+ ]
1587
+ for ptype, dtype in comparisons:
1588
+ if (
1589
+ isinstance(arg, ptype)
1590
+ and get_basic_type(dtype) in valid_inputs[idx]
1591
+ ):
1592
+ # attempt to exit early to avoid checking all types
1593
+ break
1594
+ elif isinstance(arg, ptype):
1595
+ raise TypeError(
1596
+ f"Invalid {dtype} constant passed into {operator_name} {arg}, expecting one of {valid_inputs[idx]}"
1597
+ )
1598
+ return v
1599
+
1600
+ def with_namespace(self, namespace: str) -> "Function":
1601
+ return Function.model_construct(
1602
+ operator=self.operator,
1603
+ arguments=[
1604
+ (
1605
+ c.with_namespace(namespace)
1606
+ if isinstance(
1607
+ c,
1608
+ Namespaced,
1609
+ )
1610
+ else c
1611
+ )
1612
+ for c in self.arguments
1613
+ ],
1614
+ output_datatype=self.output_datatype,
1615
+ output_purpose=self.output_purpose,
1616
+ valid_inputs=self.valid_inputs,
1617
+ arg_count=self.arg_count,
1618
+ )
1619
+
1620
+ def with_merge(
1621
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1622
+ ) -> "Function":
1623
+ return Function.model_construct(
1624
+ operator=self.operator,
1625
+ arguments=[
1626
+ (
1627
+ c.with_merge(source, target, modifiers)
1628
+ if isinstance(
1629
+ c,
1630
+ Mergeable,
1631
+ )
1632
+ else c
1633
+ )
1634
+ for c in self.arguments
1635
+ ],
1636
+ output_datatype=self.output_datatype,
1637
+ output_purpose=self.output_purpose,
1638
+ valid_inputs=self.valid_inputs,
1639
+ arg_count=self.arg_count,
1640
+ )
1641
+
1642
+ @property
1643
+ def concept_arguments(self) -> List[ConceptRef]:
1644
+ base = []
1645
+ for arg in self.arguments:
1646
+ base += get_concept_arguments(arg)
1647
+ return base
1648
+
1649
+ @property
1650
+ def output_grain(self):
1651
+ # aggregates have an abstract grain
1652
+ base_grain = Grain(components=[])
1653
+ if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value:
1654
+ return base_grain
1655
+ # scalars have implicit grain of all arguments
1656
+ for input in self.concept_arguments:
1657
+ base_grain += input.grain
1658
+ return base_grain
1659
+
1660
+
1661
+ class AggregateWrapper(Mergeable, ConceptArgs, Namespaced, BaseModel):
1662
+ function: Function
1663
+ by: List[ConceptRef] = Field(default_factory=list)
1664
+
1665
+ @field_validator("by", mode="before")
1666
+ @classmethod
1667
+ def enforce_concept_ref(cls, v):
1668
+ output = []
1669
+ for item in v:
1670
+ if isinstance(item, Concept):
1671
+ output.append(item.reference)
1672
+ else:
1673
+ output.append(item)
1674
+ return output
1675
+
1676
+ def __str__(self):
1677
+ grain_str = [str(c) for c in self.by] if self.by else "abstract"
1678
+ return f"{str(self.function)}<{grain_str}>"
1679
+
1680
+ @property
1681
+ def datatype(self):
1682
+ return self.function.datatype
1683
+
1684
+ @property
1685
+ def concept_arguments(self) -> List[ConceptRef]:
1686
+ return self.function.concept_arguments + self.by
1687
+
1688
+ @property
1689
+ def output_datatype(self):
1690
+ return self.function.output_datatype
1691
+
1692
+ @property
1693
+ def output_purpose(self):
1694
+ return self.function.output_purpose
1695
+
1696
+ @property
1697
+ def arguments(self):
1698
+ return self.function.arguments
1699
+
1700
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
1701
+ return AggregateWrapper.model_construct(
1702
+ function=self.function.with_merge(source, target, modifiers=modifiers),
1703
+ by=(
1704
+ [c.with_merge(source, target, modifiers) for c in self.by]
1705
+ if self.by
1706
+ else []
1707
+ ),
1708
+ )
1709
+
1710
+ def with_namespace(self, namespace: str) -> "AggregateWrapper":
1711
+ return AggregateWrapper.model_construct(
1712
+ function=self.function.with_namespace(namespace),
1713
+ by=[c.with_namespace(namespace) for c in self.by] if self.by else [],
1714
+ )
1715
+
1716
+
1717
+ class FilterItem(Namespaced, ConceptArgs, BaseModel):
1718
+ content: ConceptRef
1719
+ where: "WhereClause"
1720
+
1721
+ @field_validator("content", mode="before")
1722
+ def enforce_concept_ref(cls, v):
1723
+ if isinstance(v, Concept):
1724
+ return ConceptRef(address=v.address, datatype=v.datatype)
1725
+ return v
1726
+
1727
+ def __str__(self):
1728
+ return f"<Filter: {str(self.content)} where {str(self.where)}>"
1729
+
1730
+ def with_merge(
1731
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1732
+ ) -> "FilterItem":
1733
+ return FilterItem.model_construct(
1734
+ content=self.content.with_merge(source, target, modifiers),
1735
+ where=self.where.with_merge(source, target, modifiers),
1736
+ )
1737
+
1738
+ def with_namespace(self, namespace: str) -> "FilterItem":
1739
+ return FilterItem.model_construct(
1740
+ content=self.content.with_namespace(namespace),
1741
+ where=self.where.with_namespace(namespace),
1742
+ )
1743
+
1744
+ @property
1745
+ def output_datatype(self):
1746
+ return self.content.datatype
1747
+
1748
+ @property
1749
+ def concept_arguments(self):
1750
+ return [self.content] + self.where.concept_arguments
1751
+
1752
+
1753
+ class RowsetLineage(Namespaced, Mergeable, BaseModel):
1754
+ name: str
1755
+ derived_concepts: List[ConceptRef]
1756
+ select: SelectLineage | MultiSelectLineage
1757
+
1758
+ def with_namespace(self, namespace: str):
1759
+ return RowsetLineage.model_construct(
1760
+ name=self.name,
1761
+ derived_concepts=[
1762
+ x.with_namespace(namespace) for x in self.derived_concepts
1763
+ ],
1764
+ select=self.select.with_namespace(namespace),
1765
+ )
1766
+
1767
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
1768
+ return RowsetLineage.model_construct(
1769
+ name=self.name,
1770
+ derived_concepts=[
1771
+ x.with_merge(source, target, modifiers) for x in self.derived_concepts
1772
+ ],
1773
+ select=self.select.with_merge(source, target, modifiers),
1774
+ )
1775
+
1776
+
1777
+ class RowsetItem(Mergeable, ConceptArgs, Namespaced, BaseModel):
1778
+ content: ConceptRef
1779
+ rowset: RowsetLineage
1780
+
1781
+ def __repr__(self):
1782
+ return f"<Rowset<{self.rowset.name}>: {str(self.content)}>"
1783
+
1784
+ def __str__(self):
1785
+ return self.__repr__()
1786
+
1787
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
1788
+ return RowsetItem.model_construct(
1789
+ content=self.content.with_merge(source, target, modifiers),
1790
+ rowset=self.rowset,
1791
+ )
1792
+
1793
+ def with_namespace(self, namespace: str) -> "RowsetItem":
1794
+ return RowsetItem.model_construct(
1795
+ content=self.content.with_namespace(namespace),
1796
+ rowset=self.rowset.with_namespace(namespace),
1797
+ )
1798
+
1799
+ @property
1800
+ def arguments(self) -> List[ConceptRef]:
1801
+ output = [self.content]
1802
+ return output
1803
+
1804
+ @property
1805
+ def output(self) -> ConceptRef:
1806
+ return self.content
1807
+
1808
+ @property
1809
+ def output_datatype(self):
1810
+ return self.content.datatype
1811
+
1812
+ @property
1813
+ def concept_arguments(self):
1814
+ return [self.content]
1815
+
1816
+
1817
+ class OrderBy(Mergeable, Namespaced, BaseModel):
1818
+ items: List[OrderItem]
1819
+
1820
+ def with_namespace(self, namespace: str) -> "OrderBy":
1821
+ return OrderBy.model_construct(
1822
+ items=[x.with_namespace(namespace) for x in self.items]
1823
+ )
1824
+
1825
+ def with_merge(
1826
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1827
+ ) -> "OrderBy":
1828
+ return OrderBy.model_construct(
1829
+ items=[x.with_merge(source, target, modifiers) for x in self.items]
1830
+ )
1831
+
1832
+ @property
1833
+ def concept_arguments(self):
1834
+ return [x.expr for x in self.items]
1835
+
1836
+
1837
+ class AlignClause(Namespaced, BaseModel):
1838
+ items: List[AlignItem]
1839
+
1840
+ def with_namespace(self, namespace: str) -> "AlignClause":
1841
+ return AlignClause.model_construct(
1842
+ items=[x.with_namespace(namespace) for x in self.items]
1843
+ )
1844
+
1845
+
1846
+ class SelectLineage(Mergeable, Namespaced, BaseModel):
1847
+ selection: List[ConceptRef]
1848
+ hidden_components: set[str]
1849
+ local_concepts: dict[str, Concept]
1850
+ order_by: Optional[OrderBy] = None
1851
+ limit: Optional[int] = None
1852
+ meta: Metadata = Field(default_factory=lambda: Metadata())
1853
+ grain: Grain = Field(default_factory=Grain)
1854
+ where_clause: Union["WhereClause", None] = Field(default=None)
1855
+ having_clause: Union["HavingClause", None] = Field(default=None)
1856
+
1857
+ @property
1858
+ def output_components(self) -> List[ConceptRef]:
1859
+ return self.selection
1860
+
1861
+ def with_merge(
1862
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1863
+ ) -> SelectLineage:
1864
+ return SelectLineage.model_construct(
1865
+ selection=[x.with_merge(source, target, modifiers) for x in self.selection],
1866
+ hidden_components=self.hidden_components,
1867
+ local_concepts={
1868
+ x: y.with_merge(source, target, modifiers)
1869
+ for x, y in self.local_concepts.items()
1870
+ },
1871
+ order_by=(
1872
+ self.order_by.with_merge(source, target, modifiers)
1873
+ if self.order_by
1874
+ else None
1875
+ ),
1876
+ limit=self.limit,
1877
+ grain=self.grain.with_merge(source, target, modifiers),
1878
+ where_clause=(
1879
+ self.where_clause.with_merge(source, target, modifiers)
1880
+ if self.where_clause
1881
+ else None
1882
+ ),
1883
+ having_clause=(
1884
+ self.having_clause.with_merge(source, target, modifiers)
1885
+ if self.having_clause
1886
+ else None
1887
+ ),
1888
+ )
1889
+
1890
+
1891
+ class MultiSelectLineage(Mergeable, ConceptArgs, Namespaced, BaseModel):
1892
+ selects: List[SelectLineage]
1893
+ align: AlignClause
1894
+ namespace: str
1895
+ order_by: Optional[OrderBy] = None
1896
+ limit: Optional[int] = None
1897
+ where_clause: Union["WhereClause", None] = Field(default=None)
1898
+ having_clause: Union["HavingClause", None] = Field(default=None)
1899
+ hidden_components: set[str]
1900
+
1901
+ @property
1902
+ def grain(self):
1903
+ base = Grain()
1904
+ for select in self.selects:
1905
+ base += select.grain
1906
+ return base
1907
+
1908
+ @property
1909
+ def output_components(self) -> list[ConceptRef]:
1910
+ output = [
1911
+ ConceptRef.model_construct(address=x, datatype=DataType.UNKNOWN)
1912
+ for x in self.derived_concepts
1913
+ ]
1914
+ for select in self.selects:
1915
+ output += select.output_components
1916
+ return [x for x in output if x.address not in self.hidden_components]
1917
+
1918
+ def with_merge(
1919
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1920
+ ) -> MultiSelectLineage:
1921
+ new = MultiSelectLineage.model_construct(
1922
+ selects=[s.with_merge(source, target, modifiers) for s in self.selects],
1923
+ align=self.align,
1924
+ namespace=self.namespace,
1925
+ hidden_components=self.hidden_components,
1926
+ order_by=(
1927
+ self.order_by.with_merge(source, target, modifiers)
1928
+ if self.order_by
1929
+ else None
1930
+ ),
1931
+ limit=self.limit,
1932
+ where_clause=(
1933
+ self.where_clause.with_merge(source, target, modifiers)
1934
+ if self.where_clause
1935
+ else None
1936
+ ),
1937
+ having_clause=(
1938
+ self.having_clause.with_merge(source, target, modifiers)
1939
+ if self.having_clause
1940
+ else None
1941
+ ),
1942
+ )
1943
+ return new
1944
+
1945
+ def with_namespace(self, namespace: str) -> "MultiSelectLineage":
1946
+ return MultiSelectLineage.model_construct(
1947
+ selects=[c.with_namespace(namespace) for c in self.selects],
1948
+ align=self.align.with_namespace(namespace),
1949
+ namespace=namespace,
1950
+ hidden_components=self.hidden_components,
1951
+ order_by=self.order_by.with_namespace(namespace) if self.order_by else None,
1952
+ limit=self.limit,
1953
+ where_clause=(
1954
+ self.where_clause.with_namespace(namespace)
1955
+ if self.where_clause
1956
+ else None
1957
+ ),
1958
+ having_clause=(
1959
+ self.having_clause.with_namespace(namespace)
1960
+ if self.having_clause
1961
+ else None
1962
+ ),
1963
+ )
1964
+
1965
+ @property
1966
+ def derived_concepts(self) -> set[str]:
1967
+ output = set()
1968
+ for item in self.align.items:
1969
+ output.add(item.aligned_concept)
1970
+ return output
1971
+
1972
+ @property
1973
+ def concept_arguments(self):
1974
+ output = []
1975
+ for select in self.selects:
1976
+ output += select.output_components
1977
+ return unique(output, "address")
1978
+
1979
+
1980
+ class LooseConceptList(BaseModel):
1981
+ concepts: Sequence[Concept | ConceptRef]
1982
+
1983
+ @cached_property
1984
+ def addresses(self) -> set[str]:
1985
+ return {s.address for s in self.concepts}
1986
+
1987
+ @classmethod
1988
+ def validate(cls, v):
1989
+ return cls(v)
1990
+
1991
+ @cached_property
1992
+ def sorted_addresses(self) -> List[str]:
1993
+ return sorted(list(self.addresses))
1994
+
1995
+ def __str__(self) -> str:
1996
+ return f"lcl{str(self.sorted_addresses)}"
1997
+
1998
+ def __iter__(self):
1999
+ return iter(self.concepts)
2000
+
2001
+ def __eq__(self, other):
2002
+ if not isinstance(other, LooseConceptList):
2003
+ return False
2004
+ return self.addresses == other.addresses
2005
+
2006
+ def issubset(self, other):
2007
+ if not isinstance(other, LooseConceptList):
2008
+ return False
2009
+ return self.addresses.issubset(other.addresses)
2010
+
2011
+ def __contains__(self, other):
2012
+ if isinstance(other, str):
2013
+ return other in self.addresses
2014
+ if not isinstance(other, Concept):
2015
+ return False
2016
+ return other.address in self.addresses
2017
+
2018
+ def difference(self, other):
2019
+ if not isinstance(other, LooseConceptList):
2020
+ return False
2021
+ return self.addresses.difference(other.addresses)
2022
+
2023
+ def isdisjoint(self, other):
2024
+ if not isinstance(other, LooseConceptList):
2025
+ return False
2026
+ return self.addresses.isdisjoint(other.addresses)
2027
+
2028
+
2029
+ class AlignItem(Namespaced, BaseModel):
2030
+ alias: str
2031
+ concepts: List[ConceptRef]
2032
+ namespace: str = Field(default=DEFAULT_NAMESPACE, validate_default=True)
2033
+
2034
+ @field_validator("concepts", mode="before")
2035
+ @classmethod
2036
+ def enforce_concept_ref(cls, v):
2037
+ output = []
2038
+ for item in v:
2039
+ if isinstance(item, Concept):
2040
+ output.append(item.reference)
2041
+ else:
2042
+ output.append(item)
2043
+ return output
2044
+
2045
+ @computed_field # type: ignore
2046
+ @cached_property
2047
+ def concepts_lcl(self) -> LooseConceptList:
2048
+ return LooseConceptList(concepts=self.concepts)
2049
+
2050
+ @property
2051
+ def aligned_concept(self) -> str:
2052
+ return f"{self.namespace}.{self.alias}"
2053
+
2054
+ def with_namespace(self, namespace: str) -> "AlignItem":
2055
+ return AlignItem.model_construct(
2056
+ alias=self.alias,
2057
+ concepts=[c.with_namespace(namespace) for c in self.concepts],
2058
+ namespace=namespace,
2059
+ )
2060
+
2061
+
2062
+ class Metadata(BaseModel):
2063
+ """Metadata container object.
2064
+ TODO: support arbitrary tags"""
2065
+
2066
+ description: Optional[str] = None
2067
+ line_number: Optional[int] = None
2068
+ concept_source: ConceptSource = ConceptSource.MANUAL
2069
+
2070
+
2071
+ class Window(BaseModel):
2072
+ count: int
2073
+ window_order: WindowOrder
2074
+
2075
+ def __str__(self):
2076
+ return f"Window<{self.window_order}>"
2077
+
2078
+
2079
+ class WindowItemOver(BaseModel):
2080
+ contents: List[ConceptRef]
2081
+
2082
+
2083
+ class WindowItemOrder(BaseModel):
2084
+ contents: List["OrderItem"]
2085
+
2086
+
2087
+ class Comment(BaseModel):
2088
+ text: str
2089
+
2090
+
2091
+ Expr = (
2092
+ MagicConstants
2093
+ | bool
2094
+ | int
2095
+ | str
2096
+ | float
2097
+ | list
2098
+ | date
2099
+ | datetime
2100
+ | TupleWrapper
2101
+ | ListWrapper
2102
+ | WindowItem
2103
+ | FilterItem
2104
+ | ConceptRef
2105
+ | Comparison
2106
+ | Conditional
2107
+ | Parenthetical
2108
+ | Function
2109
+ | AggregateWrapper
2110
+ )