pytrilogy 0.0.1.118__py3-none-any.whl → 0.0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (45) hide show
  1. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.2.dist-info}/METADATA +1 -1
  2. pytrilogy-0.0.2.2.dist-info/RECORD +82 -0
  3. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.2.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +6 -0
  6. trilogy/core/enums.py +7 -2
  7. trilogy/core/env_processor.py +43 -19
  8. trilogy/core/functions.py +1 -0
  9. trilogy/core/models.py +674 -146
  10. trilogy/core/optimization.py +31 -28
  11. trilogy/core/optimizations/inline_constant.py +4 -1
  12. trilogy/core/optimizations/inline_datasource.py +25 -4
  13. trilogy/core/optimizations/predicate_pushdown.py +94 -54
  14. trilogy/core/processing/concept_strategies_v3.py +69 -39
  15. trilogy/core/processing/graph_utils.py +3 -3
  16. trilogy/core/processing/node_generators/__init__.py +0 -2
  17. trilogy/core/processing/node_generators/basic_node.py +30 -17
  18. trilogy/core/processing/node_generators/filter_node.py +3 -1
  19. trilogy/core/processing/node_generators/node_merge_node.py +345 -96
  20. trilogy/core/processing/node_generators/rowset_node.py +18 -16
  21. trilogy/core/processing/node_generators/select_node.py +45 -85
  22. trilogy/core/processing/nodes/__init__.py +2 -0
  23. trilogy/core/processing/nodes/base_node.py +22 -5
  24. trilogy/core/processing/nodes/filter_node.py +3 -0
  25. trilogy/core/processing/nodes/group_node.py +20 -2
  26. trilogy/core/processing/nodes/merge_node.py +32 -18
  27. trilogy/core/processing/nodes/select_node_v2.py +17 -3
  28. trilogy/core/processing/utility.py +100 -8
  29. trilogy/core/query_processor.py +77 -24
  30. trilogy/dialect/base.py +11 -46
  31. trilogy/dialect/bigquery.py +1 -1
  32. trilogy/dialect/common.py +11 -0
  33. trilogy/dialect/duckdb.py +1 -1
  34. trilogy/dialect/presto.py +1 -0
  35. trilogy/hooks/graph_hook.py +50 -5
  36. trilogy/hooks/query_debugger.py +1 -0
  37. trilogy/parsing/common.py +8 -5
  38. trilogy/parsing/parse_engine.py +52 -27
  39. trilogy/parsing/render.py +20 -9
  40. trilogy/parsing/trilogy.lark +13 -8
  41. pytrilogy-0.0.1.118.dist-info/RECORD +0 -83
  42. trilogy/core/processing/node_generators/concept_merge_node.py +0 -214
  43. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.2.dist-info}/LICENSE.md +0 -0
  44. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.2.dist-info}/entry_points.txt +0 -0
  45. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.2.dist-info}/top_level.txt +0 -0
trilogy/core/models.py CHANGED
@@ -63,6 +63,7 @@ from trilogy.core.enums import (
63
63
  DatePart,
64
64
  ShowCategory,
65
65
  Granularity,
66
+ SelectFiltering,
66
67
  )
67
68
  from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
68
69
  from trilogy.utility import unique
@@ -129,6 +130,12 @@ class Namespaced(ABC):
129
130
  raise NotImplementedError
130
131
 
131
132
 
133
+ class Mergeable(ABC):
134
+
135
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
136
+ raise NotImplementedError
137
+
138
+
132
139
  class ConceptArgs(ABC):
133
140
 
134
141
  @property
@@ -144,8 +151,11 @@ class ConceptArgs(ABC):
144
151
  return self.concept_arguments
145
152
 
146
153
 
147
- class SelectGrain(ABC):
148
- def with_select_grain(self, grain: Grain):
154
+ class SelectContext(ABC):
155
+
156
+ def with_select_context(
157
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
158
+ ):
149
159
  raise NotImplementedError
150
160
 
151
161
 
@@ -154,6 +164,41 @@ class ConstantInlineable(ABC):
154
164
  raise NotImplementedError
155
165
 
156
166
 
167
+ class SelectTypeMixin(BaseModel):
168
+ where_clause: Union["WhereClause", None] = Field(default=None)
169
+
170
+ @property
171
+ def output_components(self) -> List[Concept]:
172
+ raise NotImplementedError
173
+
174
+ @property
175
+ def implicit_where_clause_selections(self) -> List[Concept]:
176
+ if not self.where_clause:
177
+ return []
178
+ filter = set(
179
+ [
180
+ str(x.address)
181
+ for x in self.where_clause.row_arguments
182
+ if not x.derivation == PurposeLineage.CONSTANT
183
+ ]
184
+ )
185
+ query_output = set([str(z.address) for z in self.output_components])
186
+ delta = filter.difference(query_output)
187
+ if delta:
188
+ return [
189
+ x for x in self.where_clause.row_arguments if str(x.address) in delta
190
+ ]
191
+ return []
192
+
193
+ @property
194
+ def where_clause_category(self) -> SelectFiltering:
195
+ if not self.where_clause:
196
+ return SelectFiltering.NONE
197
+ elif self.implicit_where_clause_selections:
198
+ return SelectFiltering.IMPLICIT
199
+ return SelectFiltering.EXPLICIT
200
+
201
+
157
202
  class DataType(Enum):
158
203
  # PRIMITIVES
159
204
  STRING = "string"
@@ -290,11 +335,24 @@ def empty_grain() -> Grain:
290
335
  return Grain(components=[])
291
336
 
292
337
 
293
- class Concept(Namespaced, SelectGrain, BaseModel):
338
+ class MultiLineage(BaseModel):
339
+ lineages: list[
340
+ Union[
341
+ Function,
342
+ WindowItem,
343
+ FilterItem,
344
+ AggregateWrapper,
345
+ RowsetItem,
346
+ MultiSelectStatement,
347
+ ]
348
+ ]
349
+
350
+
351
+ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
294
352
  name: str
295
353
  datatype: DataType | ListType | StructType | MapType | NumericType
296
354
  purpose: Purpose
297
- metadata: Optional[Metadata] = Field(
355
+ metadata: Metadata = Field(
298
356
  default_factory=lambda: Metadata(description=None, line_number=None),
299
357
  validate_default=True,
300
358
  )
@@ -305,17 +363,44 @@ class Concept(Namespaced, SelectGrain, BaseModel):
305
363
  FilterItem,
306
364
  AggregateWrapper,
307
365
  RowsetItem,
308
- MultiSelectStatement | MergeStatement,
366
+ MultiSelectStatement,
309
367
  ]
310
368
  ] = None
311
369
  namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
312
370
  keys: Optional[Tuple["Concept", ...]] = None
313
371
  grain: "Grain" = Field(default=None, validate_default=True)
314
372
  modifiers: Optional[List[Modifier]] = Field(default_factory=list)
373
+ pseudonyms: Dict[str, Concept] = Field(default_factory=dict)
315
374
 
316
375
  def __hash__(self):
317
376
  return hash(str(self))
318
377
 
378
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
379
+ if self.address == source.address:
380
+ new = target.with_grain(self.grain.with_merge(source, target, modifiers))
381
+ new.pseudonyms[self.address] = self
382
+ return new
383
+ return self.__class__(
384
+ name=self.name,
385
+ datatype=self.datatype,
386
+ purpose=self.purpose,
387
+ metadata=self.metadata,
388
+ lineage=(
389
+ self.lineage.with_merge(source, target, modifiers)
390
+ if self.lineage
391
+ else None
392
+ ),
393
+ grain=self.grain.with_merge(source, target, modifiers),
394
+ namespace=self.namespace,
395
+ keys=(
396
+ tuple(x.with_merge(source, target, modifiers) for x in self.keys)
397
+ if self.keys
398
+ else None
399
+ ),
400
+ modifiers=self.modifiers,
401
+ pseudonyms=self.pseudonyms,
402
+ )
403
+
319
404
  @field_validator("keys", mode="before")
320
405
  @classmethod
321
406
  def keys_validator(cls, v, info: ValidationInfo):
@@ -332,7 +417,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
332
417
  def namespace_validation(cls, v):
333
418
  return v or DEFAULT_NAMESPACE
334
419
 
335
- @field_validator("metadata")
420
+ @field_validator("metadata", mode="before")
336
421
  @classmethod
337
422
  def metadata_validation(cls, v):
338
423
  v = v or Metadata()
@@ -440,15 +525,22 @@ class Concept(Namespaced, SelectGrain, BaseModel):
440
525
  else None
441
526
  ),
442
527
  modifiers=self.modifiers,
528
+ pseudonyms={
529
+ k: v.with_namespace(namespace) for k, v in self.pseudonyms.items()
530
+ },
443
531
  )
444
532
 
445
- def with_select_grain(self, grain: Optional["Grain"] = None) -> "Concept":
533
+ def with_select_context(
534
+ self,
535
+ grain: Optional["Grain"] = None,
536
+ conditional: Conditional | Comparison | Parenthetical | None = None,
537
+ ) -> "Concept":
446
538
  if not all([isinstance(x, Concept) for x in self.keys or []]):
447
539
  raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
448
540
  new_grain = grain or self.grain
449
541
  new_lineage = self.lineage
450
- if isinstance(self.lineage, SelectGrain):
451
- new_lineage = self.lineage.with_select_grain(new_grain)
542
+ if isinstance(self.lineage, SelectContext):
543
+ new_lineage = self.lineage.with_select_context(new_grain, conditional)
452
544
  return self.__class__(
453
545
  name=self.name,
454
546
  datatype=self.datatype,
@@ -459,6 +551,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
459
551
  namespace=self.namespace,
460
552
  keys=self.keys,
461
553
  modifiers=self.modifiers,
554
+ pseudonyms=self.pseudonyms,
462
555
  )
463
556
 
464
557
  def with_grain(self, grain: Optional["Grain"] = None) -> "Concept":
@@ -474,6 +567,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
474
567
  namespace=self.namespace,
475
568
  keys=self.keys,
476
569
  modifiers=self.modifiers,
570
+ pseudonyms=self.pseudonyms,
477
571
  )
478
572
 
479
573
  @cached_property
@@ -512,6 +606,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
512
606
  keys=self.keys,
513
607
  namespace=self.namespace,
514
608
  modifiers=self.modifiers,
609
+ pseudonyms=self.pseudonyms,
515
610
  )
516
611
 
517
612
  def with_default_grain(self) -> "Concept":
@@ -529,7 +624,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
529
624
  FilterItem,
530
625
  AggregateWrapper,
531
626
  RowsetItem,
532
- MultiSelectStatement | MergeStatement,
627
+ MultiSelectStatement,
533
628
  ],
534
629
  output: List[Concept],
535
630
  ):
@@ -568,8 +663,6 @@ class Concept(Namespaced, SelectGrain, BaseModel):
568
663
  return PurposeLineage.ROWSET
569
664
  elif self.lineage and isinstance(self.lineage, MultiSelectStatement):
570
665
  return PurposeLineage.MULTISELECT
571
- elif self.lineage and isinstance(self.lineage, MergeStatement):
572
- return PurposeLineage.MERGE
573
666
  elif (
574
667
  self.lineage
575
668
  and isinstance(self.lineage, Function)
@@ -592,6 +685,13 @@ class Concept(Namespaced, SelectGrain, BaseModel):
592
685
  elif self.lineage and isinstance(self.lineage, Function):
593
686
  if not self.lineage.concept_arguments:
594
687
  return PurposeLineage.CONSTANT
688
+ elif all(
689
+ [
690
+ x.derivation == PurposeLineage.CONSTANT
691
+ for x in self.lineage.concept_arguments
692
+ ]
693
+ ):
694
+ return PurposeLineage.CONSTANT
595
695
  return PurposeLineage.BASIC
596
696
  elif self.purpose == Purpose.CONSTANT:
597
697
  return PurposeLineage.CONSTANT
@@ -626,8 +726,28 @@ class Concept(Namespaced, SelectGrain, BaseModel):
626
726
  return Granularity.SINGLE_ROW
627
727
  return Granularity.MULTI_ROW
628
728
 
729
+ def with_filter(
730
+ self, condition: "Conditional | Comparison | Parenthetical"
731
+ ) -> "Concept":
732
+ from trilogy.utility import string_to_hash
629
733
 
630
- class Grain(BaseModel):
734
+ name = string_to_hash(self.name + str(condition))
735
+ new = Concept(
736
+ name=f"{self.name}_{name}",
737
+ datatype=self.datatype,
738
+ purpose=self.purpose,
739
+ metadata=self.metadata,
740
+ lineage=FilterItem(content=self, where=WhereClause(conditional=condition)),
741
+ keys=None,
742
+ grain=(self.grain if self.purpose == Purpose.PROPERTY else Grain()),
743
+ namespace=self.namespace,
744
+ modifiers=self.modifiers,
745
+ pseudonyms=self.pseudonyms,
746
+ )
747
+ return new
748
+
749
+
750
+ class Grain(Mergeable, BaseModel):
631
751
  nested: bool = False
632
752
  components: List[Concept] = Field(default_factory=list, validate_default=True)
633
753
 
@@ -645,12 +765,6 @@ class Grain(BaseModel):
645
765
  if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
646
766
  if all([c in v2 for c in sub.keys]):
647
767
  continue
648
- elif sub.derivation == PurposeLineage.MERGE and isinstance(
649
- sub.lineage, MergeStatement
650
- ):
651
- parents = sub.lineage.concepts
652
- if any([p in v2 for p in parents]):
653
- continue
654
768
  final.append(sub)
655
769
  v2 = sorted(final, key=lambda x: x.name)
656
770
  return v2
@@ -672,6 +786,16 @@ class Grain(BaseModel):
672
786
  nested=self.nested,
673
787
  )
674
788
 
789
+ def with_merge(
790
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
791
+ ) -> "Grain":
792
+ return Grain(
793
+ components=[
794
+ x.with_merge(source, target, modifiers) for x in self.components
795
+ ],
796
+ nested=self.nested,
797
+ )
798
+
675
799
  @property
676
800
  def abstract(self):
677
801
  return not self.components or all(
@@ -759,6 +883,15 @@ class ColumnAssignment(BaseModel):
759
883
  modifiers=self.modifiers,
760
884
  )
761
885
 
886
+ def with_merge(
887
+ self, concept: Concept, modifiers: List[Modifier]
888
+ ) -> "ColumnAssignment":
889
+ return ColumnAssignment(
890
+ alias=self.alias,
891
+ concept=concept,
892
+ modifiers=modifiers,
893
+ )
894
+
762
895
 
763
896
  class Statement(BaseModel):
764
897
  pass
@@ -809,7 +942,7 @@ class LooseConceptList(BaseModel):
809
942
  return self.addresses.isdisjoint(other.addresses)
810
943
 
811
944
 
812
- class Function(Namespaced, SelectGrain, BaseModel):
945
+ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
813
946
  operator: FunctionType
814
947
  arg_count: int = Field(default=1)
815
948
  output_datatype: DataType | ListType | StructType | MapType | NumericType
@@ -849,15 +982,42 @@ class Function(Namespaced, SelectGrain, BaseModel):
849
982
  def datatype(self):
850
983
  return self.output_datatype
851
984
 
852
- def with_select_grain(self, grain: Grain) -> Function:
985
+ def with_select_context(
986
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
987
+ ) -> Function:
988
+ if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value and conditional:
989
+ base = [
990
+ (
991
+ c.with_select_context(grain, conditional)
992
+ if isinstance(
993
+ c,
994
+ SelectContext,
995
+ )
996
+ else c
997
+ )
998
+ for c in self.arguments
999
+ ]
1000
+ final = [
1001
+ c.with_filter(conditional) if isinstance(c, Concept) else c
1002
+ for c in base
1003
+ ]
1004
+ return Function(
1005
+ operator=self.operator,
1006
+ arguments=final,
1007
+ output_datatype=self.output_datatype,
1008
+ output_purpose=self.output_purpose,
1009
+ valid_inputs=self.valid_inputs,
1010
+ arg_count=self.arg_count,
1011
+ )
1012
+
853
1013
  return Function(
854
1014
  operator=self.operator,
855
1015
  arguments=[
856
1016
  (
857
- c.with_select_grain(grain)
1017
+ c.with_select_context(grain, conditional)
858
1018
  if isinstance(
859
1019
  c,
860
- SelectGrain,
1020
+ SelectContext,
861
1021
  )
862
1022
  else c
863
1023
  )
@@ -951,6 +1111,28 @@ class Function(Namespaced, SelectGrain, BaseModel):
951
1111
  arg_count=self.arg_count,
952
1112
  )
953
1113
 
1114
+ def with_merge(
1115
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1116
+ ) -> "Function":
1117
+ return Function(
1118
+ operator=self.operator,
1119
+ arguments=[
1120
+ (
1121
+ c.with_merge(source, target, modifiers)
1122
+ if isinstance(
1123
+ c,
1124
+ Mergeable,
1125
+ )
1126
+ else c
1127
+ )
1128
+ for c in self.arguments
1129
+ ],
1130
+ output_datatype=self.output_datatype,
1131
+ output_purpose=self.output_purpose,
1132
+ valid_inputs=self.valid_inputs,
1133
+ arg_count=self.arg_count,
1134
+ )
1135
+
954
1136
  @property
955
1137
  def concept_arguments(self) -> List[Concept]:
956
1138
  base = []
@@ -991,6 +1173,13 @@ class ConceptTransform(Namespaced, BaseModel):
991
1173
  def input(self) -> List[Concept]:
992
1174
  return [v for v in self.function.arguments if isinstance(v, Concept)]
993
1175
 
1176
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
1177
+ return ConceptTransform(
1178
+ function=self.function.with_merge(source, target, modifiers),
1179
+ output=self.output.with_merge(source, target, modifiers),
1180
+ modifiers=self.modifiers + modifiers,
1181
+ )
1182
+
994
1183
  def with_namespace(self, namespace: str) -> "ConceptTransform":
995
1184
  return ConceptTransform(
996
1185
  function=self.function.with_namespace(namespace),
@@ -1015,13 +1204,23 @@ class WindowItemOrder(BaseModel):
1015
1204
  contents: List["OrderItem"]
1016
1205
 
1017
1206
 
1018
- class WindowItem(Namespaced, SelectGrain, BaseModel):
1207
+ class WindowItem(Mergeable, Namespaced, SelectContext, BaseModel):
1019
1208
  type: WindowType
1020
1209
  content: Concept
1021
1210
  order_by: List["OrderItem"]
1022
1211
  over: List["Concept"] = Field(default_factory=list)
1023
1212
  index: Optional[int] = None
1024
1213
 
1214
+ def with_merge(
1215
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1216
+ ) -> "WindowItem":
1217
+ return WindowItem(
1218
+ type=self.type,
1219
+ content=self.content.with_merge(source, target, modifiers),
1220
+ over=[x.with_merge(source, target, modifiers) for x in self.over],
1221
+ order_by=[x.with_merge(source, target, modifiers) for x in self.order_by],
1222
+ )
1223
+
1025
1224
  def with_namespace(self, namespace: str) -> "WindowItem":
1026
1225
  return WindowItem(
1027
1226
  type=self.type,
@@ -1030,12 +1229,14 @@ class WindowItem(Namespaced, SelectGrain, BaseModel):
1030
1229
  order_by=[x.with_namespace(namespace) for x in self.order_by],
1031
1230
  )
1032
1231
 
1033
- def with_select_grain(self, grain: Grain) -> "WindowItem":
1232
+ def with_select_context(
1233
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
1234
+ ) -> "WindowItem":
1034
1235
  return WindowItem(
1035
1236
  type=self.type,
1036
- content=self.content.with_select_grain(grain),
1037
- over=[x.with_select_grain(grain) for x in self.over],
1038
- order_by=[x.with_select_grain(grain) for x in self.order_by],
1237
+ content=self.content.with_select_context(grain, conditional),
1238
+ over=[x.with_select_context(grain, conditional) for x in self.over],
1239
+ order_by=[x.with_select_context(grain, conditional) for x in self.order_by],
1039
1240
  )
1040
1241
 
1041
1242
  @property
@@ -1082,23 +1283,33 @@ class WindowItem(Namespaced, SelectGrain, BaseModel):
1082
1283
  return Purpose.PROPERTY
1083
1284
 
1084
1285
 
1085
- class FilterItem(Namespaced, SelectGrain, BaseModel):
1286
+ class FilterItem(Namespaced, SelectContext, BaseModel):
1086
1287
  content: Concept
1087
1288
  where: "WhereClause"
1088
1289
 
1089
1290
  def __str__(self):
1090
1291
  return f"<Filter: {str(self.content)} where {str(self.where)}>"
1091
1292
 
1293
+ def with_merge(
1294
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1295
+ ) -> "FilterItem":
1296
+ return FilterItem(
1297
+ content=source.with_merge(source, target, modifiers),
1298
+ where=self.where.with_merge(source, target, modifiers),
1299
+ )
1300
+
1092
1301
  def with_namespace(self, namespace: str) -> "FilterItem":
1093
1302
  return FilterItem(
1094
1303
  content=self.content.with_namespace(namespace),
1095
1304
  where=self.where.with_namespace(namespace),
1096
1305
  )
1097
1306
 
1098
- def with_select_grain(self, grain: Grain) -> FilterItem:
1307
+ def with_select_context(
1308
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
1309
+ ) -> FilterItem:
1099
1310
  return FilterItem(
1100
- content=self.content.with_select_grain(grain),
1101
- where=self.where.with_select_grain(grain),
1311
+ content=self.content.with_select_context(grain, conditional),
1312
+ where=self.where.with_select_context(grain, conditional),
1102
1313
  )
1103
1314
 
1104
1315
  @property
@@ -1139,7 +1350,7 @@ class FilterItem(Namespaced, SelectGrain, BaseModel):
1139
1350
  return [self.content] + self.where.concept_arguments
1140
1351
 
1141
1352
 
1142
- class SelectItem(Namespaced, BaseModel):
1353
+ class SelectItem(Mergeable, Namespaced, BaseModel):
1143
1354
  content: Union[Concept, ConceptTransform]
1144
1355
  modifiers: List[Modifier] = Field(default_factory=list)
1145
1356
 
@@ -1155,6 +1366,14 @@ class SelectItem(Namespaced, BaseModel):
1155
1366
  def input(self) -> List[Concept]:
1156
1367
  return self.content.input
1157
1368
 
1369
+ def with_merge(
1370
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1371
+ ) -> "SelectItem":
1372
+ return SelectItem(
1373
+ content=self.content.with_merge(source, target, modifiers),
1374
+ modifiers=modifiers,
1375
+ )
1376
+
1158
1377
  def with_namespace(self, namespace: str) -> "SelectItem":
1159
1378
  return SelectItem(
1160
1379
  content=self.content.with_namespace(namespace),
@@ -1162,16 +1381,25 @@ class SelectItem(Namespaced, BaseModel):
1162
1381
  )
1163
1382
 
1164
1383
 
1165
- class OrderItem(SelectGrain, Namespaced, BaseModel):
1384
+ class OrderItem(Mergeable, SelectContext, Namespaced, BaseModel):
1166
1385
  expr: Concept
1167
1386
  order: Ordering
1168
1387
 
1169
1388
  def with_namespace(self, namespace: str) -> "OrderItem":
1170
1389
  return OrderItem(expr=self.expr.with_namespace(namespace), order=self.order)
1171
1390
 
1172
- def with_select_grain(self, grain: Grain) -> "OrderItem":
1391
+ def with_select_context(
1392
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
1393
+ ) -> "OrderItem":
1173
1394
  return OrderItem(expr=self.expr.with_grain(grain), order=self.order)
1174
1395
 
1396
+ def with_merge(
1397
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1398
+ ) -> "OrderItem":
1399
+ return OrderItem(
1400
+ expr=source.with_merge(source, target, modifiers), order=self.order
1401
+ )
1402
+
1175
1403
  @property
1176
1404
  def input(self):
1177
1405
  return self.expr.input
@@ -1181,21 +1409,27 @@ class OrderItem(SelectGrain, Namespaced, BaseModel):
1181
1409
  return self.expr.output
1182
1410
 
1183
1411
 
1184
- class OrderBy(Namespaced, BaseModel):
1412
+ class OrderBy(Mergeable, Namespaced, BaseModel):
1185
1413
  items: List[OrderItem]
1186
1414
 
1187
1415
  def with_namespace(self, namespace: str) -> "OrderBy":
1188
1416
  return OrderBy(items=[x.with_namespace(namespace) for x in self.items])
1189
1417
 
1418
+ def with_merge(
1419
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1420
+ ) -> "OrderBy":
1421
+ return OrderBy(
1422
+ items=[x.with_merge(source, target, modifiers) for x in self.items]
1423
+ )
1424
+
1190
1425
 
1191
1426
  class RawSQLStatement(BaseModel):
1192
1427
  text: str
1193
1428
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1194
1429
 
1195
1430
 
1196
- class SelectStatement(Namespaced, BaseModel):
1431
+ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
1197
1432
  selection: List[SelectItem]
1198
- where_clause: Optional["WhereClause"] = None
1199
1433
  order_by: Optional[OrderBy] = None
1200
1434
  limit: Optional[int] = None
1201
1435
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
@@ -1225,6 +1459,19 @@ class SelectStatement(Namespaced, BaseModel):
1225
1459
  new.append(item)
1226
1460
  return new
1227
1461
 
1462
+ def with_merge(
1463
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1464
+ ) -> "SelectStatement":
1465
+ return SelectStatement(
1466
+ selection=[x.with_merge(source, target, modifiers) for x in self.selection],
1467
+ order_by=(
1468
+ self.order_by.with_merge(source, target, modifiers)
1469
+ if self.order_by
1470
+ else None
1471
+ ),
1472
+ limit=self.limit,
1473
+ )
1474
+
1228
1475
  @property
1229
1476
  def input_components(self) -> List[Concept]:
1230
1477
  output = set()
@@ -1297,14 +1544,14 @@ class SelectStatement(Namespaced, BaseModel):
1297
1544
  for item in self.output_components:
1298
1545
  if item.purpose == Purpose.KEY:
1299
1546
  output.append(item)
1300
- if self.where_clause:
1301
- for item in self.where_clause.concept_arguments:
1302
- if item.purpose == Purpose.KEY:
1303
- output.append(item)
1304
- # elif item.purpose == Purpose.PROPERTY and item.grain:
1305
- # output += item.grain.components
1306
- # TODO: handle other grain cases
1307
- # new if block by design
1547
+ # if self.where_clause:
1548
+ # for item in self.where_clause.concept_arguments:
1549
+ # if item.purpose == Purpose.KEY:
1550
+ # output.append(item)
1551
+ # elif item.purpose == Purpose.PROPERTY and item.grain:
1552
+ # output += item.grain.components
1553
+ # TODO: handle other grain cases
1554
+ # new if block by design
1308
1555
  # add back any purpose that is not at the grain
1309
1556
  # if a query already has the key of the property in the grain
1310
1557
  # we want to group to that grain and ignore the property, which is a derivation
@@ -1393,11 +1640,10 @@ class AlignClause(Namespaced, BaseModel):
1393
1640
  return AlignClause(items=[x.with_namespace(namespace) for x in self.items])
1394
1641
 
1395
1642
 
1396
- class MultiSelectStatement(Namespaced, BaseModel):
1643
+ class MultiSelectStatement(SelectTypeMixin, Mergeable, Namespaced, BaseModel):
1397
1644
  selects: List[SelectStatement]
1398
1645
  align: AlignClause
1399
1646
  namespace: str
1400
- where_clause: Optional["WhereClause"] = None
1401
1647
  order_by: Optional[OrderBy] = None
1402
1648
  limit: Optional[int] = None
1403
1649
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
@@ -1423,6 +1669,28 @@ class MultiSelectStatement(Namespaced, BaseModel):
1423
1669
  output += self.where_clause.concept_arguments
1424
1670
  return unique(output, "address")
1425
1671
 
1672
+ def with_merge(
1673
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1674
+ ) -> "MultiSelectStatement":
1675
+ new = MultiSelectStatement(
1676
+ selects=[s.with_merge(source, target, modifiers) for s in self.selects],
1677
+ align=self.align,
1678
+ namespace=self.namespace,
1679
+ order_by=(
1680
+ self.order_by.with_merge(source, target, modifiers)
1681
+ if self.order_by
1682
+ else None
1683
+ ),
1684
+ limit=self.limit,
1685
+ meta=self.meta,
1686
+ where_clause=(
1687
+ self.where_clause.with_merge(source, target, modifiers)
1688
+ if self.where_clause
1689
+ else None
1690
+ ),
1691
+ )
1692
+ return new
1693
+
1426
1694
  def get_merge_concept(self, check: Concept):
1427
1695
  for item in self.align.items:
1428
1696
  if check in item.concepts_lcl:
@@ -1434,6 +1702,14 @@ class MultiSelectStatement(Namespaced, BaseModel):
1434
1702
  selects=[c.with_namespace(namespace) for c in self.selects],
1435
1703
  align=self.align.with_namespace(namespace),
1436
1704
  namespace=namespace,
1705
+ order_by=self.order_by.with_namespace(namespace) if self.order_by else None,
1706
+ limit=self.limit,
1707
+ meta=self.meta,
1708
+ where_clause=(
1709
+ self.where_clause.with_namespace(namespace)
1710
+ if self.where_clause
1711
+ else None
1712
+ ),
1437
1713
  )
1438
1714
 
1439
1715
  @property
@@ -1518,49 +1794,21 @@ def safe_grain(v) -> Grain:
1518
1794
  class DatasourceMetadata(BaseModel):
1519
1795
  freshness_concept: Concept | None
1520
1796
  partition_fields: List[Concept] = Field(default_factory=list)
1797
+ line_no: int | None = None
1521
1798
 
1522
1799
 
1523
- class MergeStatement(Namespaced, BaseModel):
1524
- concepts: List[Concept]
1525
- datatype: DataType | ListType | StructType | MapType | NumericType
1526
-
1527
- @cached_property
1528
- def concepts_lcl(self):
1529
- return LooseConceptList(concepts=self.concepts)
1530
-
1531
- @property
1532
- def merge_concept(self) -> Concept:
1533
- bridge_name = "_".join([c.safe_address for c in self.concepts])
1534
- return Concept(
1535
- name=f"__merge_{bridge_name}",
1536
- datatype=self.datatype,
1537
- purpose=Purpose.PROPERTY,
1538
- lineage=self,
1539
- keys=tuple(self.concepts),
1540
- )
1541
-
1542
- @property
1543
- def arguments(self) -> List[Concept]:
1544
- return self.concepts
1545
-
1546
- @property
1547
- def concept_arguments(self) -> List[Concept]:
1548
- return self.concepts
1549
-
1550
- def find_source(self, concept: Concept, cte: CTE) -> Concept:
1551
- for x in self.concepts:
1552
- for z in cte.output_columns:
1553
- if z.address == x.address:
1554
- return z
1555
- raise SyntaxError(
1556
- f"Could not find upstream map for multiselect {str(concept)} on cte ({cte.name})"
1557
- )
1800
+ class MergeStatementV2(Namespaced, BaseModel):
1801
+ source: Concept
1802
+ target: Concept
1803
+ modifiers: List[Modifier] = Field(default_factory=list)
1558
1804
 
1559
- def with_namespace(self, namespace: str) -> "MergeStatement":
1560
- return MergeStatement(
1561
- concepts=[c.with_namespace(namespace) for c in self.concepts],
1562
- datatype=self.datatype,
1805
+ def with_namespace(self, namespace: str) -> "MergeStatementV2":
1806
+ new = MergeStatementV2(
1807
+ source=self.source.with_namespace(namespace),
1808
+ target=self.target.with_namespace(namespace),
1809
+ modifiers=self.modifiers,
1563
1810
  )
1811
+ return new
1564
1812
 
1565
1813
 
1566
1814
  class Datasource(Namespaced, BaseModel):
@@ -1574,6 +1822,32 @@ class Datasource(Namespaced, BaseModel):
1574
1822
  metadata: DatasourceMetadata = Field(
1575
1823
  default_factory=lambda: DatasourceMetadata(freshness_concept=None)
1576
1824
  )
1825
+ where: Optional[WhereClause] = None
1826
+
1827
+ def merge_concept(
1828
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1829
+ ):
1830
+ original = [c for c in self.columns if c.concept.address == source.address]
1831
+ # map to the alias with the modifier, and the original
1832
+ self.columns = [
1833
+ (
1834
+ c.with_merge(target, modifiers)
1835
+ if c.concept.address == source.address
1836
+ else c
1837
+ )
1838
+ for c in self.columns
1839
+ ] + original
1840
+ self.grain = self.grain.with_merge(source, target, modifiers)
1841
+ self.where = (
1842
+ self.where.with_merge(source, target, modifiers) if self.where else None
1843
+ )
1844
+ del self.output_lcl
1845
+
1846
+ @property
1847
+ def env_label(self) -> str:
1848
+ if not self.namespace or self.namespace == DEFAULT_NAMESPACE:
1849
+ return self.identifier
1850
+ return f"{self.namespace}.{self.identifier}"
1577
1851
 
1578
1852
  @property
1579
1853
  def condition(self):
@@ -1659,6 +1933,7 @@ class Datasource(Namespaced, BaseModel):
1659
1933
  grain=self.grain.with_namespace(namespace),
1660
1934
  address=self.address,
1661
1935
  columns=[c.with_namespace(namespace) for c in self.columns],
1936
+ where=self.where.with_namespace(namespace) if self.where else None,
1662
1937
  )
1663
1938
 
1664
1939
  @cached_property
@@ -1738,6 +2013,7 @@ class BaseJoin(BaseModel):
1738
2013
  concepts: List[Concept]
1739
2014
  join_type: JoinType
1740
2015
  filter_to_mutual: bool = False
2016
+ concept_pairs: list[tuple[Concept, Concept]] | None = None
1741
2017
 
1742
2018
  def __init__(self, **data: Any):
1743
2019
  super().__init__(**data)
@@ -1747,10 +2023,21 @@ class BaseJoin(BaseModel):
1747
2023
  f" {self.right_datasource}"
1748
2024
  )
1749
2025
  final_concepts = []
2026
+
2027
+ # if we have a list of concept pairs
2028
+ if self.concept_pairs:
2029
+ return
2030
+
1750
2031
  for concept in self.concepts:
1751
2032
  include = True
1752
2033
  for ds in [self.left_datasource, self.right_datasource]:
1753
- if concept.address not in [c.address for c in ds.output_concepts]:
2034
+ synonyms = []
2035
+ for c in ds.output_concepts:
2036
+ synonyms += list(c.pseudonyms.keys())
2037
+ if (
2038
+ concept.address not in [c.address for c in ds.output_concepts]
2039
+ and concept.address not in synonyms
2040
+ ):
1754
2041
  if self.filter_to_mutual:
1755
2042
  include = False
1756
2043
  else:
@@ -1988,7 +2275,7 @@ class QueryDatasource(BaseModel):
1988
2275
  )
1989
2276
  # partial = "_".join([str(c.address).replace(".", "_") for c in self.partial_concepts])
1990
2277
  return (
1991
- "_join_".join([d.name for d in self.datasources])
2278
+ "_join_".join([d.full_name for d in self.datasources])
1992
2279
  + (f"_at_{grain}" if grain else "_at_abstract")
1993
2280
  + (f"_filtered_by_{filters}" if filters else "")
1994
2281
  # + (f"_partial_{partial}" if partial else "")
@@ -2144,7 +2431,13 @@ class CTE(BaseModel):
2144
2431
  ds_being_inlined.name if x == parent.name else x for x in v
2145
2432
  ]
2146
2433
  elif v == parent.name:
2147
- self.source_map[k] = ds_being_inlined.name
2434
+ self.source_map[k] = [ds_being_inlined.name]
2435
+
2436
+ # zip in any required values for lookups
2437
+ for k in ds_being_inlined.output_lcl.addresses:
2438
+ if k in self.source_map and self.source_map[k]:
2439
+ continue
2440
+ self.source_map[k] = [ds_being_inlined.name]
2148
2441
  self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
2149
2442
  if force_group:
2150
2443
  self.group_to_grain = True
@@ -2245,6 +2538,45 @@ class CTE(BaseModel):
2245
2538
  except ValueError as e:
2246
2539
  return f"INVALID_ALIAS: {str(e)}"
2247
2540
 
2541
+ @property
2542
+ def group_concepts(self) -> List[Concept]:
2543
+ return (
2544
+ unique(
2545
+ self.grain.components
2546
+ + [
2547
+ c
2548
+ for c in self.output_columns
2549
+ if c.purpose in (Purpose.PROPERTY, Purpose.KEY)
2550
+ and c.address not in [x.address for x in self.grain.components]
2551
+ ]
2552
+ + [
2553
+ c
2554
+ for c in self.output_columns
2555
+ if c.purpose == Purpose.METRIC
2556
+ and (
2557
+ any(
2558
+ [
2559
+ c.with_grain(cte.grain) in cte.output_columns
2560
+ for cte in self.parent_ctes
2561
+ ]
2562
+ )
2563
+ # if we have this metric from a source
2564
+ # it isn't derived here and must be grouped on
2565
+ or len(self.source_map[c.address]) > 0
2566
+ )
2567
+ ]
2568
+ + [
2569
+ c
2570
+ for c in self.output_columns
2571
+ if c.purpose == Purpose.CONSTANT
2572
+ and self.source_map[c.address] != []
2573
+ ],
2574
+ "address",
2575
+ )
2576
+ if self.group_to_grain
2577
+ else []
2578
+ )
2579
+
2248
2580
  @property
2249
2581
  def render_from_clause(self) -> bool:
2250
2582
  if (
@@ -2300,6 +2632,7 @@ class Join(BaseModel):
2300
2632
  right_cte: CTE | Datasource
2301
2633
  jointype: JoinType
2302
2634
  joinkeys: List[JoinKey]
2635
+ joinkey_pairs: List[tuple[Concept, Concept]] | None = None
2303
2636
 
2304
2637
  @property
2305
2638
  def left_name(self) -> str:
@@ -2336,7 +2669,7 @@ class Join(BaseModel):
2336
2669
  )
2337
2670
 
2338
2671
 
2339
- class UndefinedConcept(Concept):
2672
+ class UndefinedConcept(Concept, Mergeable, Namespaced):
2340
2673
  model_config = ConfigDict(arbitrary_types_allowed=True)
2341
2674
  name: str
2342
2675
  environment: "EnvironmentConceptDict"
@@ -2344,6 +2677,34 @@ class UndefinedConcept(Concept):
2344
2677
  datatype: DataType = DataType.UNKNOWN
2345
2678
  purpose: Purpose = Purpose.KEY
2346
2679
 
2680
+ def with_merge(
2681
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2682
+ ) -> "UndefinedConcept" | Concept:
2683
+ if self.address == source.address:
2684
+ new = target.with_grain(self.grain.with_merge(source, target, modifiers))
2685
+ new.pseudonyms[self.address] = self
2686
+ return new
2687
+ return self.__class__(
2688
+ name=self.name,
2689
+ datatype=self.datatype,
2690
+ purpose=self.purpose,
2691
+ metadata=self.metadata,
2692
+ lineage=(
2693
+ self.lineage.with_merge(source, target, modifiers)
2694
+ if self.lineage
2695
+ else None
2696
+ ),
2697
+ grain=self.grain.with_merge(source, target, modifiers),
2698
+ namespace=self.namespace,
2699
+ keys=(
2700
+ tuple(x.with_merge(source, target, modifiers) for x in self.keys)
2701
+ if self.keys
2702
+ else None
2703
+ ),
2704
+ environment=self.environment,
2705
+ line_no=self.line_no,
2706
+ )
2707
+
2347
2708
  def with_namespace(self, namespace: str) -> "UndefinedConcept":
2348
2709
  return self.__class__(
2349
2710
  name=self.name,
@@ -2362,14 +2723,18 @@ class UndefinedConcept(Concept):
2362
2723
  line_no=self.line_no,
2363
2724
  )
2364
2725
 
2365
- def with_select_grain(self, grain: Optional["Grain"] = None) -> "UndefinedConcept":
2726
+ def with_select_context(
2727
+ self,
2728
+ grain: Optional["Grain"] = None,
2729
+ conditional: Conditional | Comparison | Parenthetical | None = None,
2730
+ ) -> "UndefinedConcept":
2366
2731
  if not all([isinstance(x, Concept) for x in self.keys or []]):
2367
2732
  raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
2368
2733
  new_grain = grain or Grain(components=[])
2369
2734
  if self.lineage:
2370
2735
  new_lineage = self.lineage
2371
- if isinstance(self.lineage, SelectGrain):
2372
- new_lineage = self.lineage.with_select_grain(new_grain)
2736
+ if isinstance(self.lineage, SelectContext):
2737
+ new_lineage = self.lineage.with_select_context(new_grain, conditional)
2373
2738
  else:
2374
2739
  new_lineage = None
2375
2740
  return self.__class__(
@@ -2384,7 +2749,7 @@ class UndefinedConcept(Concept):
2384
2749
  environment=self.environment,
2385
2750
  )
2386
2751
 
2387
- def with_grain(self, grain: Optional["Grain"] = None) -> "Concept":
2752
+ def with_grain(self, grain: Optional["Grain"] = None) -> "UndefinedConcept":
2388
2753
  return self.__class__(
2389
2754
  name=self.name,
2390
2755
  datatype=self.datatype,
@@ -2398,7 +2763,7 @@ class UndefinedConcept(Concept):
2398
2763
  line_no=self.line_no,
2399
2764
  )
2400
2765
 
2401
- def with_default_grain(self) -> "Concept":
2766
+ def with_default_grain(self) -> "UndefinedConcept":
2402
2767
  if self.purpose == Purpose.KEY:
2403
2768
  # we need to make this abstract
2404
2769
  grain = Grain(components=[self.with_grain(Grain())], nested=True)
@@ -2432,6 +2797,24 @@ class UndefinedConcept(Concept):
2432
2797
  )
2433
2798
 
2434
2799
 
2800
+ class EnvironmentDatasourceDict(dict):
2801
+ def __init__(self, *args, **kwargs) -> None:
2802
+ super().__init__(self, *args, **kwargs)
2803
+
2804
+ def __getitem__(self, key: str) -> Datasource:
2805
+ try:
2806
+ return super(EnvironmentDatasourceDict, self).__getitem__(key)
2807
+ except KeyError:
2808
+ if DEFAULT_NAMESPACE + "." + key in self:
2809
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key)
2810
+ if "." in key and key.split(".")[0] == DEFAULT_NAMESPACE:
2811
+ return self.__getitem__(key.split(".")[1])
2812
+ raise
2813
+
2814
+ def values(self) -> ValuesView[Datasource]: # type: ignore
2815
+ return super().values()
2816
+
2817
+
2435
2818
  class EnvironmentConceptDict(dict):
2436
2819
  def __init__(self, *args, **kwargs) -> None:
2437
2820
  super().__init__(self, *args, **kwargs)
@@ -2460,6 +2843,8 @@ class EnvironmentConceptDict(dict):
2460
2843
  if DEFAULT_NAMESPACE + "." + key in self:
2461
2844
  return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
2462
2845
  if not self.fail_on_missing:
2846
+ if key in self.undefined:
2847
+ return self.undefined[key]
2463
2848
  undefined = UndefinedConcept(
2464
2849
  name=key,
2465
2850
  line_no=line_no,
@@ -2483,7 +2868,7 @@ class EnvironmentConceptDict(dict):
2483
2868
  matches = difflib.get_close_matches(concept_name, self.keys())
2484
2869
  return matches
2485
2870
 
2486
- def items(self) -> ItemsView[str, Concept | UndefinedConcept]: # type: ignore
2871
+ def items(self) -> ItemsView[str, Concept]: # type: ignore
2487
2872
  return super().items()
2488
2873
 
2489
2874
 
@@ -2509,13 +2894,25 @@ def validate_concepts(v) -> EnvironmentConceptDict:
2509
2894
  raise ValueError
2510
2895
 
2511
2896
 
2897
+ def validate_datasources(v) -> EnvironmentDatasourceDict:
2898
+ if isinstance(v, EnvironmentDatasourceDict):
2899
+ return v
2900
+ elif isinstance(v, dict):
2901
+ return EnvironmentDatasourceDict(
2902
+ **{x: Datasource.model_validate(y) for x, y in v.items()}
2903
+ )
2904
+ raise ValueError
2905
+
2906
+
2512
2907
  class Environment(BaseModel):
2513
2908
  model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
2514
2909
 
2515
2910
  concepts: Annotated[EnvironmentConceptDict, PlainValidator(validate_concepts)] = (
2516
2911
  Field(default_factory=EnvironmentConceptDict)
2517
2912
  )
2518
- datasources: Dict[str, Datasource] = Field(default_factory=dict)
2913
+ datasources: Annotated[
2914
+ EnvironmentDatasourceDict, PlainValidator(validate_datasources)
2915
+ ] = Field(default_factory=EnvironmentDatasourceDict)
2519
2916
  functions: Dict[str, Function] = Field(default_factory=dict)
2520
2917
  data_types: Dict[str, DataType] = Field(default_factory=dict)
2521
2918
  imports: Dict[str, ImportStatement] = Field(default_factory=dict)
@@ -2526,7 +2923,7 @@ class Environment(BaseModel):
2526
2923
  cte_name_map: Dict[str, str] = Field(default_factory=dict)
2527
2924
 
2528
2925
  materialized_concepts: List[Concept] = Field(default_factory=list)
2529
- merged_concepts: Dict[str, Concept] = Field(default_factory=dict)
2926
+ alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
2530
2927
  _parse_count: int = 0
2531
2928
 
2532
2929
  @classmethod
@@ -2563,6 +2960,12 @@ class Environment(BaseModel):
2563
2960
  self.materialized_concepts = [
2564
2961
  c for c in self.concepts.values() if c.address in concrete_addresses
2565
2962
  ]
2963
+ # include aliased concepts
2964
+ self.materialized_concepts += [
2965
+ c
2966
+ for c in self.alias_origin_lookup.values()
2967
+ if c.address in concrete_addresses
2968
+ ]
2566
2969
  new = [
2567
2970
  x.address
2568
2971
  for x in self.materialized_concepts
@@ -2570,12 +2973,6 @@ class Environment(BaseModel):
2570
2973
  ]
2571
2974
  if new:
2572
2975
  logger.info(f"Environment added new materialized concepts {new}")
2573
- for concept in self.concepts.values():
2574
- if concept.derivation == PurposeLineage.MERGE:
2575
- ms = concept.lineage
2576
- assert isinstance(ms, MergeStatement)
2577
- for parent in ms.concepts:
2578
- self.merged_concepts[parent.address] = concept
2579
2976
 
2580
2977
  def validate_concept(self, lookup: str, meta: Meta | None = None):
2581
2978
  existing: Concept = self.concepts.get(lookup) # type: ignore
@@ -2718,13 +3115,8 @@ class Environment(BaseModel):
2718
3115
  datasource: Datasource,
2719
3116
  meta: Meta | None = None,
2720
3117
  ):
2721
- if not datasource.namespace or datasource.namespace == DEFAULT_NAMESPACE:
2722
- self.datasources[datasource.name] = datasource
2723
- self.gen_concept_list_caches()
2724
- return datasource
2725
- self.datasources[datasource.namespace + "." + datasource.identifier] = (
2726
- datasource
2727
- )
3118
+
3119
+ self.datasources[datasource.env_label] = datasource
2728
3120
  self.gen_concept_list_caches()
2729
3121
  return datasource
2730
3122
 
@@ -2739,6 +3131,22 @@ class Environment(BaseModel):
2739
3131
  return True
2740
3132
  return False
2741
3133
 
3134
+ def merge_concept(
3135
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
3136
+ ):
3137
+ replacements = {}
3138
+ self.alias_origin_lookup[source.address] = source
3139
+ for k, v in self.concepts.items():
3140
+ if v.address == target.address:
3141
+ v.pseudonyms[source.address] = source
3142
+ if v.address == source.address:
3143
+ replacements[k] = target
3144
+ self.concepts.update(replacements)
3145
+
3146
+ for k, ds in self.datasources.items():
3147
+ if source.address in ds.output_lcl:
3148
+ ds.merge_concept(source, target, modifiers=modifiers)
3149
+
2742
3150
 
2743
3151
  class LazyEnvironment(Environment):
2744
3152
  """Variant of environment to defer parsing of a path
@@ -2771,7 +3179,9 @@ class LazyEnvironment(Environment):
2771
3179
  return super().__getattribute__(name)
2772
3180
 
2773
3181
 
2774
- class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseModel):
3182
+ class Comparison(
3183
+ ConceptArgs, Mergeable, Namespaced, ConstantInlineable, SelectContext, BaseModel
3184
+ ):
2775
3185
  left: Union[
2776
3186
  int,
2777
3187
  str,
@@ -2821,6 +3231,8 @@ class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseM
2821
3231
  )
2822
3232
 
2823
3233
  def __add__(self, other):
3234
+ if other is None:
3235
+ return self
2824
3236
  if not isinstance(other, (Comparison, Conditional, Parenthetical)):
2825
3237
  raise ValueError("Cannot add Comparison to non-Comparison")
2826
3238
  if other == self:
@@ -2833,6 +3245,15 @@ class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseM
2833
3245
  def __str__(self):
2834
3246
  return self.__repr__()
2835
3247
 
3248
+ def __eq__(self, other):
3249
+ if not isinstance(other, Comparison):
3250
+ return False
3251
+ return (
3252
+ self.left == other.left
3253
+ and self.right == other.right
3254
+ and self.operator == other.operator
3255
+ )
3256
+
2836
3257
  def inline_constant(self, constant: Concept) -> "Comparison":
2837
3258
  assert isinstance(constant.lineage, Function)
2838
3259
  new_val = constant.lineage.arguments[0]
@@ -2859,6 +3280,21 @@ class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseM
2859
3280
  operator=self.operator,
2860
3281
  )
2861
3282
 
3283
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3284
+ return self.__class__(
3285
+ left=(
3286
+ self.left.with_merge(source, target, modifiers)
3287
+ if isinstance(self.left, Mergeable)
3288
+ else self.left
3289
+ ),
3290
+ right=(
3291
+ self.right.with_merge(source, target, modifiers)
3292
+ if isinstance(self.right, Mergeable)
3293
+ else self.right
3294
+ ),
3295
+ operator=self.operator,
3296
+ )
3297
+
2862
3298
  def with_namespace(self, namespace: str):
2863
3299
  return self.__class__(
2864
3300
  left=(
@@ -2874,11 +3310,13 @@ class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseM
2874
3310
  operator=self.operator,
2875
3311
  )
2876
3312
 
2877
- def with_select_grain(self, grain: Grain):
3313
+ def with_select_context(
3314
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3315
+ ):
2878
3316
  return self.__class__(
2879
3317
  left=(
2880
- self.left.with_select_grain(grain)
2881
- if isinstance(self.left, SelectGrain)
3318
+ self.left.with_select_context(grain, conditional)
3319
+ if isinstance(self.left, SelectContext)
2882
3320
  else self.left
2883
3321
  ),
2884
3322
  # the right side does NOT need to inherit select grain
@@ -2946,6 +3384,17 @@ class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseM
2946
3384
 
2947
3385
  class SubselectComparison(Comparison):
2948
3386
 
3387
+ def __eq__(self, other):
3388
+ if not isinstance(other, SubselectComparison):
3389
+ return False
3390
+
3391
+ comp = (
3392
+ self.left == other.left
3393
+ and self.right == other.right
3394
+ and self.operator == other.operator
3395
+ )
3396
+ return comp
3397
+
2949
3398
  @property
2950
3399
  def row_arguments(self) -> List[Concept]:
2951
3400
  return get_concept_arguments(self.left)
@@ -2954,12 +3403,14 @@ class SubselectComparison(Comparison):
2954
3403
  def existence_arguments(self) -> list[tuple["Concept", ...]]:
2955
3404
  return [tuple(get_concept_arguments(self.right))]
2956
3405
 
2957
- def with_select_grain(self, grain: Grain):
3406
+ def with_select_context(
3407
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3408
+ ):
2958
3409
  # there's no need to pass the select grain through to a subselect comparison
2959
3410
  return self.__class__(
2960
3411
  left=(
2961
- self.left.with_select_grain(grain)
2962
- if isinstance(self.left, SelectGrain)
3412
+ self.left.with_select_context(grain, conditional)
3413
+ if isinstance(self.left, SelectContext)
2963
3414
  else self.left
2964
3415
  ),
2965
3416
  right=self.right,
@@ -2967,7 +3418,7 @@ class SubselectComparison(Comparison):
2967
3418
  )
2968
3419
 
2969
3420
 
2970
- class CaseWhen(Namespaced, SelectGrain, BaseModel):
3421
+ class CaseWhen(Namespaced, SelectContext, BaseModel):
2971
3422
  comparison: Conditional | SubselectComparison | Comparison
2972
3423
  expr: "Expr"
2973
3424
 
@@ -2988,18 +3439,20 @@ class CaseWhen(Namespaced, SelectGrain, BaseModel):
2988
3439
  ),
2989
3440
  )
2990
3441
 
2991
- def with_select_grain(self, grain: Grain) -> CaseWhen:
3442
+ def with_select_context(
3443
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3444
+ ) -> CaseWhen:
2992
3445
  return CaseWhen(
2993
- comparison=self.comparison.with_select_grain(grain),
3446
+ comparison=self.comparison.with_select_context(grain, conditional),
2994
3447
  expr=(
2995
- (self.expr.with_select_grain(grain))
2996
- if isinstance(self.expr, SelectGrain)
3448
+ (self.expr.with_select_context(grain, conditional))
3449
+ if isinstance(self.expr, SelectContext)
2997
3450
  else self.expr
2998
3451
  ),
2999
3452
  )
3000
3453
 
3001
3454
 
3002
- class CaseElse(Namespaced, SelectGrain, BaseModel):
3455
+ class CaseElse(Namespaced, SelectContext, BaseModel):
3003
3456
  expr: "Expr"
3004
3457
  # this ensures that it's easily differentiable from CaseWhen
3005
3458
  discriminant: ComparisonOperator = ComparisonOperator.ELSE
@@ -3008,14 +3461,16 @@ class CaseElse(Namespaced, SelectGrain, BaseModel):
3008
3461
  def concept_arguments(self):
3009
3462
  return get_concept_arguments(self.expr)
3010
3463
 
3011
- def with_select_grain(self, grain: Grain) -> CaseElse:
3464
+ def with_select_context(
3465
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3466
+ ) -> CaseElse:
3012
3467
  return CaseElse(
3013
3468
  discriminant=self.discriminant,
3014
3469
  expr=(
3015
- self.expr.with_select_grain(grain)
3470
+ self.expr.with_select_context(grain, conditional)
3016
3471
  if isinstance(
3017
3472
  self.expr,
3018
- SelectGrain,
3473
+ SelectContext,
3019
3474
  )
3020
3475
  else self.expr
3021
3476
  ),
@@ -3035,7 +3490,9 @@ class CaseElse(Namespaced, SelectGrain, BaseModel):
3035
3490
  )
3036
3491
 
3037
3492
 
3038
- class Conditional(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseModel):
3493
+ class Conditional(
3494
+ Mergeable, ConceptArgs, Namespaced, ConstantInlineable, SelectContext, BaseModel
3495
+ ):
3039
3496
  left: Union[
3040
3497
  int,
3041
3498
  str,
@@ -3081,6 +3538,16 @@ class Conditional(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, Base
3081
3538
  def __repr__(self):
3082
3539
  return f"{str(self.left)} {self.operator.value} {str(self.right)}"
3083
3540
 
3541
+ def __eq__(self, other):
3542
+
3543
+ if not isinstance(other, Conditional):
3544
+ return False
3545
+ return (
3546
+ self.left == other.left
3547
+ and self.right == other.right
3548
+ and self.operator == other.operator
3549
+ )
3550
+
3084
3551
  def inline_constant(self, constant: Concept) -> "Conditional":
3085
3552
  assert isinstance(constant.lineage, Function)
3086
3553
  new_val = constant.lineage.arguments[0]
@@ -3107,7 +3574,7 @@ class Conditional(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, Base
3107
3574
  operator=self.operator,
3108
3575
  )
3109
3576
 
3110
- def with_namespace(self, namespace: str):
3577
+ def with_namespace(self, namespace: str) -> "Conditional":
3111
3578
  return Conditional(
3112
3579
  left=(
3113
3580
  self.left.with_namespace(namespace)
@@ -3122,16 +3589,35 @@ class Conditional(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, Base
3122
3589
  operator=self.operator,
3123
3590
  )
3124
3591
 
3125
- def with_select_grain(self, grain: Grain):
3592
+ def with_merge(
3593
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
3594
+ ) -> "Conditional":
3595
+ return Conditional(
3596
+ left=(
3597
+ self.left.with_merge(source, target, modifiers)
3598
+ if isinstance(self.left, Mergeable)
3599
+ else self.left
3600
+ ),
3601
+ right=(
3602
+ self.right.with_merge(source, target, modifiers)
3603
+ if isinstance(self.right, Mergeable)
3604
+ else self.right
3605
+ ),
3606
+ operator=self.operator,
3607
+ )
3608
+
3609
+ def with_select_context(
3610
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3611
+ ):
3126
3612
  return Conditional(
3127
3613
  left=(
3128
- self.left.with_select_grain(grain)
3129
- if isinstance(self.left, SelectGrain)
3614
+ self.left.with_select_context(grain, conditional)
3615
+ if isinstance(self.left, SelectContext)
3130
3616
  else self.left
3131
3617
  ),
3132
3618
  right=(
3133
- self.right.with_select_grain(grain)
3134
- if isinstance(self.right, SelectGrain)
3619
+ self.right.with_select_context(grain, conditional)
3620
+ if isinstance(self.right, SelectContext)
3135
3621
  else self.right
3136
3622
  ),
3137
3623
  operator=self.operator,
@@ -3194,7 +3680,7 @@ class Conditional(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, Base
3194
3680
  return chunks
3195
3681
 
3196
3682
 
3197
- class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
3683
+ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
3198
3684
  function: Function
3199
3685
  by: List[Concept] = Field(default_factory=list)
3200
3686
 
@@ -3222,21 +3708,34 @@ class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
3222
3708
  def arguments(self):
3223
3709
  return self.function.arguments
3224
3710
 
3711
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3712
+ return AggregateWrapper(
3713
+ function=self.function.with_merge(source, target, modifiers=modifiers),
3714
+ by=(
3715
+ [c.with_merge(source, target, modifiers) for c in self.by]
3716
+ if self.by
3717
+ else []
3718
+ ),
3719
+ )
3720
+
3225
3721
  def with_namespace(self, namespace: str) -> "AggregateWrapper":
3226
3722
  return AggregateWrapper(
3227
3723
  function=self.function.with_namespace(namespace),
3228
3724
  by=[c.with_namespace(namespace) for c in self.by] if self.by else [],
3229
3725
  )
3230
3726
 
3231
- def with_select_grain(self, grain: Grain) -> AggregateWrapper:
3727
+ def with_select_context(
3728
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3729
+ ) -> AggregateWrapper:
3232
3730
  if not self.by:
3233
3731
  by = grain.components_copy
3234
3732
  else:
3235
3733
  by = self.by
3236
- return AggregateWrapper(function=self.function.with_select_grain(grain), by=by)
3734
+ parent = self.function.with_select_context(grain, conditional)
3735
+ return AggregateWrapper(function=parent, by=by)
3237
3736
 
3238
3737
 
3239
- class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3738
+ class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
3240
3739
  conditional: Union[SubselectComparison, Comparison, Conditional, "Parenthetical"]
3241
3740
 
3242
3741
  @property
@@ -3255,11 +3754,20 @@ class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3255
3754
  def existence_arguments(self) -> list[tuple["Concept", ...]]:
3256
3755
  return self.conditional.existence_arguments
3257
3756
 
3757
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3758
+ return WhereClause(
3759
+ conditional=self.conditional.with_merge(source, target, modifiers)
3760
+ )
3761
+
3258
3762
  def with_namespace(self, namespace: str) -> WhereClause:
3259
3763
  return WhereClause(conditional=self.conditional.with_namespace(namespace))
3260
3764
 
3261
- def with_select_grain(self, grain: Grain) -> WhereClause:
3262
- return WhereClause(conditional=self.conditional.with_select_grain(grain))
3765
+ def with_select_context(
3766
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3767
+ ) -> WhereClause:
3768
+ return WhereClause(
3769
+ conditional=self.conditional.with_select_context(grain, conditional)
3770
+ )
3263
3771
 
3264
3772
  @property
3265
3773
  def grain(self) -> Grain:
@@ -3387,7 +3895,7 @@ class RowsetDerivationStatement(Namespaced, BaseModel):
3387
3895
  )
3388
3896
 
3389
3897
 
3390
- class RowsetItem(Namespaced, BaseModel):
3898
+ class RowsetItem(Mergeable, Namespaced, BaseModel):
3391
3899
  content: Concept
3392
3900
  rowset: RowsetDerivationStatement
3393
3901
  where: Optional["WhereClause"] = None
@@ -3397,6 +3905,15 @@ class RowsetItem(Namespaced, BaseModel):
3397
3905
  f"<Rowset<{self.rowset.name}>: {str(self.content)} where {str(self.where)}>"
3398
3906
  )
3399
3907
 
3908
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3909
+ return RowsetItem(
3910
+ content=self.content.with_merge(source, target, modifiers),
3911
+ rowset=self.rowset,
3912
+ where=(
3913
+ self.where.with_merge(source, target, modifiers) if self.where else None
3914
+ ),
3915
+ )
3916
+
3400
3917
  def with_namespace(self, namespace: str) -> "RowsetItem":
3401
3918
  return RowsetItem(
3402
3919
  content=self.content.with_namespace(namespace),
@@ -3447,7 +3964,7 @@ class RowsetItem(Namespaced, BaseModel):
3447
3964
 
3448
3965
 
3449
3966
  class Parenthetical(
3450
- ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseModel
3967
+ ConceptArgs, Mergeable, Namespaced, ConstantInlineable, SelectContext, BaseModel
3451
3968
  ):
3452
3969
  content: "Expr"
3453
3970
 
@@ -3473,11 +3990,22 @@ class Parenthetical(
3473
3990
  )
3474
3991
  )
3475
3992
 
3476
- def with_select_grain(self, grain: Grain):
3993
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3994
+ return Parenthetical(
3995
+ content=(
3996
+ self.content.with_merge(source, target, modifiers)
3997
+ if isinstance(self.content, Mergeable)
3998
+ else self.content
3999
+ )
4000
+ )
4001
+
4002
+ def with_select_context(
4003
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
4004
+ ):
3477
4005
  return Parenthetical(
3478
4006
  content=(
3479
- self.content.with_select_grain(grain)
3480
- if isinstance(self.content, SelectGrain)
4007
+ self.content.with_select_context(grain, conditional)
4008
+ if isinstance(self.content, SelectContext)
3481
4009
  else self.content
3482
4010
  )
3483
4011
  )