pytrilogy 0.0.1.118__py3-none-any.whl → 0.0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (45) hide show
  1. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.1.dist-info}/METADATA +1 -1
  2. pytrilogy-0.0.2.1.dist-info/RECORD +82 -0
  3. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +6 -0
  6. trilogy/core/enums.py +7 -2
  7. trilogy/core/env_processor.py +43 -19
  8. trilogy/core/functions.py +1 -0
  9. trilogy/core/models.py +666 -146
  10. trilogy/core/optimization.py +31 -28
  11. trilogy/core/optimizations/inline_constant.py +4 -1
  12. trilogy/core/optimizations/inline_datasource.py +25 -4
  13. trilogy/core/optimizations/predicate_pushdown.py +94 -54
  14. trilogy/core/processing/concept_strategies_v3.py +69 -39
  15. trilogy/core/processing/graph_utils.py +3 -3
  16. trilogy/core/processing/node_generators/__init__.py +0 -2
  17. trilogy/core/processing/node_generators/basic_node.py +30 -17
  18. trilogy/core/processing/node_generators/filter_node.py +3 -1
  19. trilogy/core/processing/node_generators/node_merge_node.py +345 -96
  20. trilogy/core/processing/node_generators/rowset_node.py +18 -16
  21. trilogy/core/processing/node_generators/select_node.py +44 -83
  22. trilogy/core/processing/nodes/__init__.py +2 -0
  23. trilogy/core/processing/nodes/base_node.py +22 -5
  24. trilogy/core/processing/nodes/filter_node.py +3 -0
  25. trilogy/core/processing/nodes/group_node.py +20 -2
  26. trilogy/core/processing/nodes/merge_node.py +32 -18
  27. trilogy/core/processing/nodes/select_node_v2.py +17 -3
  28. trilogy/core/processing/utility.py +100 -8
  29. trilogy/core/query_processor.py +77 -24
  30. trilogy/dialect/base.py +11 -46
  31. trilogy/dialect/bigquery.py +1 -1
  32. trilogy/dialect/common.py +11 -0
  33. trilogy/dialect/duckdb.py +1 -1
  34. trilogy/dialect/presto.py +1 -0
  35. trilogy/hooks/graph_hook.py +50 -5
  36. trilogy/hooks/query_debugger.py +1 -0
  37. trilogy/parsing/common.py +8 -5
  38. trilogy/parsing/parse_engine.py +48 -27
  39. trilogy/parsing/render.py +13 -6
  40. trilogy/parsing/trilogy.lark +12 -7
  41. pytrilogy-0.0.1.118.dist-info/RECORD +0 -83
  42. trilogy/core/processing/node_generators/concept_merge_node.py +0 -214
  43. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.1.dist-info}/LICENSE.md +0 -0
  44. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.1.dist-info}/entry_points.txt +0 -0
  45. {pytrilogy-0.0.1.118.dist-info → pytrilogy-0.0.2.1.dist-info}/top_level.txt +0 -0
trilogy/core/models.py CHANGED
@@ -63,6 +63,7 @@ from trilogy.core.enums import (
63
63
  DatePart,
64
64
  ShowCategory,
65
65
  Granularity,
66
+ SelectFiltering,
66
67
  )
67
68
  from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
68
69
  from trilogy.utility import unique
@@ -129,6 +130,12 @@ class Namespaced(ABC):
129
130
  raise NotImplementedError
130
131
 
131
132
 
133
+ class Mergeable(ABC):
134
+
135
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
136
+ raise NotImplementedError
137
+
138
+
132
139
  class ConceptArgs(ABC):
133
140
 
134
141
  @property
@@ -144,8 +151,11 @@ class ConceptArgs(ABC):
144
151
  return self.concept_arguments
145
152
 
146
153
 
147
- class SelectGrain(ABC):
148
- def with_select_grain(self, grain: Grain):
154
+ class SelectContext(ABC):
155
+
156
+ def with_select_context(
157
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
158
+ ):
149
159
  raise NotImplementedError
150
160
 
151
161
 
@@ -154,6 +164,41 @@ class ConstantInlineable(ABC):
154
164
  raise NotImplementedError
155
165
 
156
166
 
167
+ class SelectTypeMixin(BaseModel):
168
+ where_clause: Union["WhereClause", None] = Field(default=None)
169
+
170
+ @property
171
+ def output_components(self) -> List[Concept]:
172
+ raise NotImplementedError
173
+
174
+ @property
175
+ def implicit_where_clause_selections(self) -> List[Concept]:
176
+ if not self.where_clause:
177
+ return []
178
+ filter = set(
179
+ [
180
+ str(x.address)
181
+ for x in self.where_clause.row_arguments
182
+ if not x.derivation == PurposeLineage.CONSTANT
183
+ ]
184
+ )
185
+ query_output = set([str(z.address) for z in self.output_components])
186
+ delta = filter.difference(query_output)
187
+ if delta:
188
+ return [
189
+ x for x in self.where_clause.row_arguments if str(x.address) in delta
190
+ ]
191
+ return []
192
+
193
+ @property
194
+ def where_clause_category(self) -> SelectFiltering:
195
+ if not self.where_clause:
196
+ return SelectFiltering.NONE
197
+ elif self.implicit_where_clause_selections:
198
+ return SelectFiltering.IMPLICIT
199
+ return SelectFiltering.EXPLICIT
200
+
201
+
157
202
  class DataType(Enum):
158
203
  # PRIMITIVES
159
204
  STRING = "string"
@@ -290,11 +335,24 @@ def empty_grain() -> Grain:
290
335
  return Grain(components=[])
291
336
 
292
337
 
293
- class Concept(Namespaced, SelectGrain, BaseModel):
338
+ class MultiLineage(BaseModel):
339
+ lineages: list[
340
+ Union[
341
+ Function,
342
+ WindowItem,
343
+ FilterItem,
344
+ AggregateWrapper,
345
+ RowsetItem,
346
+ MultiSelectStatement,
347
+ ]
348
+ ]
349
+
350
+
351
+ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
294
352
  name: str
295
353
  datatype: DataType | ListType | StructType | MapType | NumericType
296
354
  purpose: Purpose
297
- metadata: Optional[Metadata] = Field(
355
+ metadata: Metadata = Field(
298
356
  default_factory=lambda: Metadata(description=None, line_number=None),
299
357
  validate_default=True,
300
358
  )
@@ -305,17 +363,44 @@ class Concept(Namespaced, SelectGrain, BaseModel):
305
363
  FilterItem,
306
364
  AggregateWrapper,
307
365
  RowsetItem,
308
- MultiSelectStatement | MergeStatement,
366
+ MultiSelectStatement,
309
367
  ]
310
368
  ] = None
311
369
  namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
312
370
  keys: Optional[Tuple["Concept", ...]] = None
313
371
  grain: "Grain" = Field(default=None, validate_default=True)
314
372
  modifiers: Optional[List[Modifier]] = Field(default_factory=list)
373
+ pseudonyms: Dict[str, Concept] = Field(default_factory=dict)
315
374
 
316
375
  def __hash__(self):
317
376
  return hash(str(self))
318
377
 
378
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
379
+ if self.address == source.address:
380
+ new = target.with_grain(self.grain.with_merge(source, target, modifiers))
381
+ new.pseudonyms[self.address] = self
382
+ return new
383
+ return self.__class__(
384
+ name=self.name,
385
+ datatype=self.datatype,
386
+ purpose=self.purpose,
387
+ metadata=self.metadata,
388
+ lineage=(
389
+ self.lineage.with_merge(source, target, modifiers)
390
+ if self.lineage
391
+ else None
392
+ ),
393
+ grain=self.grain.with_merge(source, target, modifiers),
394
+ namespace=self.namespace,
395
+ keys=(
396
+ tuple(x.with_merge(source, target, modifiers) for x in self.keys)
397
+ if self.keys
398
+ else None
399
+ ),
400
+ modifiers=self.modifiers,
401
+ pseudonyms=self.pseudonyms,
402
+ )
403
+
319
404
  @field_validator("keys", mode="before")
320
405
  @classmethod
321
406
  def keys_validator(cls, v, info: ValidationInfo):
@@ -332,7 +417,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
332
417
  def namespace_validation(cls, v):
333
418
  return v or DEFAULT_NAMESPACE
334
419
 
335
- @field_validator("metadata")
420
+ @field_validator("metadata", mode="before")
336
421
  @classmethod
337
422
  def metadata_validation(cls, v):
338
423
  v = v or Metadata()
@@ -440,15 +525,22 @@ class Concept(Namespaced, SelectGrain, BaseModel):
440
525
  else None
441
526
  ),
442
527
  modifiers=self.modifiers,
528
+ pseudonyms={
529
+ k: v.with_namespace(namespace) for k, v in self.pseudonyms.items()
530
+ },
443
531
  )
444
532
 
445
- def with_select_grain(self, grain: Optional["Grain"] = None) -> "Concept":
533
+ def with_select_context(
534
+ self,
535
+ grain: Optional["Grain"] = None,
536
+ conditional: Conditional | Comparison | Parenthetical | None = None,
537
+ ) -> "Concept":
446
538
  if not all([isinstance(x, Concept) for x in self.keys or []]):
447
539
  raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
448
540
  new_grain = grain or self.grain
449
541
  new_lineage = self.lineage
450
- if isinstance(self.lineage, SelectGrain):
451
- new_lineage = self.lineage.with_select_grain(new_grain)
542
+ if isinstance(self.lineage, SelectContext):
543
+ new_lineage = self.lineage.with_select_context(new_grain, conditional)
452
544
  return self.__class__(
453
545
  name=self.name,
454
546
  datatype=self.datatype,
@@ -459,6 +551,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
459
551
  namespace=self.namespace,
460
552
  keys=self.keys,
461
553
  modifiers=self.modifiers,
554
+ pseudonyms=self.pseudonyms,
462
555
  )
463
556
 
464
557
  def with_grain(self, grain: Optional["Grain"] = None) -> "Concept":
@@ -474,6 +567,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
474
567
  namespace=self.namespace,
475
568
  keys=self.keys,
476
569
  modifiers=self.modifiers,
570
+ pseudonyms=self.pseudonyms,
477
571
  )
478
572
 
479
573
  @cached_property
@@ -512,6 +606,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
512
606
  keys=self.keys,
513
607
  namespace=self.namespace,
514
608
  modifiers=self.modifiers,
609
+ pseudonyms=self.pseudonyms,
515
610
  )
516
611
 
517
612
  def with_default_grain(self) -> "Concept":
@@ -529,7 +624,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
529
624
  FilterItem,
530
625
  AggregateWrapper,
531
626
  RowsetItem,
532
- MultiSelectStatement | MergeStatement,
627
+ MultiSelectStatement,
533
628
  ],
534
629
  output: List[Concept],
535
630
  ):
@@ -568,8 +663,6 @@ class Concept(Namespaced, SelectGrain, BaseModel):
568
663
  return PurposeLineage.ROWSET
569
664
  elif self.lineage and isinstance(self.lineage, MultiSelectStatement):
570
665
  return PurposeLineage.MULTISELECT
571
- elif self.lineage and isinstance(self.lineage, MergeStatement):
572
- return PurposeLineage.MERGE
573
666
  elif (
574
667
  self.lineage
575
668
  and isinstance(self.lineage, Function)
@@ -592,6 +685,13 @@ class Concept(Namespaced, SelectGrain, BaseModel):
592
685
  elif self.lineage and isinstance(self.lineage, Function):
593
686
  if not self.lineage.concept_arguments:
594
687
  return PurposeLineage.CONSTANT
688
+ elif all(
689
+ [
690
+ x.derivation == PurposeLineage.CONSTANT
691
+ for x in self.lineage.concept_arguments
692
+ ]
693
+ ):
694
+ return PurposeLineage.CONSTANT
595
695
  return PurposeLineage.BASIC
596
696
  elif self.purpose == Purpose.CONSTANT:
597
697
  return PurposeLineage.CONSTANT
@@ -626,8 +726,28 @@ class Concept(Namespaced, SelectGrain, BaseModel):
626
726
  return Granularity.SINGLE_ROW
627
727
  return Granularity.MULTI_ROW
628
728
 
729
+ def with_filter(
730
+ self, condition: "Conditional | Comparison | Parenthetical"
731
+ ) -> "Concept":
732
+ from trilogy.utility import string_to_hash
629
733
 
630
- class Grain(BaseModel):
734
+ name = string_to_hash(self.name + str(condition))
735
+ new = Concept(
736
+ name=f"{self.name}_{name}",
737
+ datatype=self.datatype,
738
+ purpose=self.purpose,
739
+ metadata=self.metadata,
740
+ lineage=FilterItem(content=self, where=WhereClause(conditional=condition)),
741
+ keys=None,
742
+ grain=(self.grain if self.purpose == Purpose.PROPERTY else Grain()),
743
+ namespace=self.namespace,
744
+ modifiers=self.modifiers,
745
+ pseudonyms=self.pseudonyms,
746
+ )
747
+ return new
748
+
749
+
750
+ class Grain(Mergeable, BaseModel):
631
751
  nested: bool = False
632
752
  components: List[Concept] = Field(default_factory=list, validate_default=True)
633
753
 
@@ -645,12 +765,6 @@ class Grain(BaseModel):
645
765
  if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
646
766
  if all([c in v2 for c in sub.keys]):
647
767
  continue
648
- elif sub.derivation == PurposeLineage.MERGE and isinstance(
649
- sub.lineage, MergeStatement
650
- ):
651
- parents = sub.lineage.concepts
652
- if any([p in v2 for p in parents]):
653
- continue
654
768
  final.append(sub)
655
769
  v2 = sorted(final, key=lambda x: x.name)
656
770
  return v2
@@ -672,6 +786,16 @@ class Grain(BaseModel):
672
786
  nested=self.nested,
673
787
  )
674
788
 
789
+ def with_merge(
790
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
791
+ ) -> "Grain":
792
+ return Grain(
793
+ components=[
794
+ x.with_merge(source, target, modifiers) for x in self.components
795
+ ],
796
+ nested=self.nested,
797
+ )
798
+
675
799
  @property
676
800
  def abstract(self):
677
801
  return not self.components or all(
@@ -759,6 +883,15 @@ class ColumnAssignment(BaseModel):
759
883
  modifiers=self.modifiers,
760
884
  )
761
885
 
886
+ def with_merge(
887
+ self, concept: Concept, modifiers: List[Modifier]
888
+ ) -> "ColumnAssignment":
889
+ return ColumnAssignment(
890
+ alias=self.alias,
891
+ concept=concept,
892
+ modifiers=modifiers,
893
+ )
894
+
762
895
 
763
896
  class Statement(BaseModel):
764
897
  pass
@@ -809,7 +942,7 @@ class LooseConceptList(BaseModel):
809
942
  return self.addresses.isdisjoint(other.addresses)
810
943
 
811
944
 
812
- class Function(Namespaced, SelectGrain, BaseModel):
945
+ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
813
946
  operator: FunctionType
814
947
  arg_count: int = Field(default=1)
815
948
  output_datatype: DataType | ListType | StructType | MapType | NumericType
@@ -849,15 +982,42 @@ class Function(Namespaced, SelectGrain, BaseModel):
849
982
  def datatype(self):
850
983
  return self.output_datatype
851
984
 
852
- def with_select_grain(self, grain: Grain) -> Function:
985
+ def with_select_context(
986
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
987
+ ) -> Function:
988
+ if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value and conditional:
989
+ base = [
990
+ (
991
+ c.with_select_context(grain, conditional)
992
+ if isinstance(
993
+ c,
994
+ SelectContext,
995
+ )
996
+ else c
997
+ )
998
+ for c in self.arguments
999
+ ]
1000
+ final = [
1001
+ c.with_filter(conditional) if isinstance(c, Concept) else c
1002
+ for c in base
1003
+ ]
1004
+ return Function(
1005
+ operator=self.operator,
1006
+ arguments=final,
1007
+ output_datatype=self.output_datatype,
1008
+ output_purpose=self.output_purpose,
1009
+ valid_inputs=self.valid_inputs,
1010
+ arg_count=self.arg_count,
1011
+ )
1012
+
853
1013
  return Function(
854
1014
  operator=self.operator,
855
1015
  arguments=[
856
1016
  (
857
- c.with_select_grain(grain)
1017
+ c.with_select_context(grain, conditional)
858
1018
  if isinstance(
859
1019
  c,
860
- SelectGrain,
1020
+ SelectContext,
861
1021
  )
862
1022
  else c
863
1023
  )
@@ -951,6 +1111,28 @@ class Function(Namespaced, SelectGrain, BaseModel):
951
1111
  arg_count=self.arg_count,
952
1112
  )
953
1113
 
1114
+ def with_merge(
1115
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1116
+ ) -> "Function":
1117
+ return Function(
1118
+ operator=self.operator,
1119
+ arguments=[
1120
+ (
1121
+ c.with_merge(source, target, modifiers)
1122
+ if isinstance(
1123
+ c,
1124
+ Mergeable,
1125
+ )
1126
+ else c
1127
+ )
1128
+ for c in self.arguments
1129
+ ],
1130
+ output_datatype=self.output_datatype,
1131
+ output_purpose=self.output_purpose,
1132
+ valid_inputs=self.valid_inputs,
1133
+ arg_count=self.arg_count,
1134
+ )
1135
+
954
1136
  @property
955
1137
  def concept_arguments(self) -> List[Concept]:
956
1138
  base = []
@@ -991,6 +1173,13 @@ class ConceptTransform(Namespaced, BaseModel):
991
1173
  def input(self) -> List[Concept]:
992
1174
  return [v for v in self.function.arguments if isinstance(v, Concept)]
993
1175
 
1176
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
1177
+ return ConceptTransform(
1178
+ function=self.function.with_merge(source, target, modifiers),
1179
+ output=self.output.with_merge(source, target, modifiers),
1180
+ modifiers=self.modifiers + modifiers,
1181
+ )
1182
+
994
1183
  def with_namespace(self, namespace: str) -> "ConceptTransform":
995
1184
  return ConceptTransform(
996
1185
  function=self.function.with_namespace(namespace),
@@ -1015,13 +1204,23 @@ class WindowItemOrder(BaseModel):
1015
1204
  contents: List["OrderItem"]
1016
1205
 
1017
1206
 
1018
- class WindowItem(Namespaced, SelectGrain, BaseModel):
1207
+ class WindowItem(Mergeable, Namespaced, SelectContext, BaseModel):
1019
1208
  type: WindowType
1020
1209
  content: Concept
1021
1210
  order_by: List["OrderItem"]
1022
1211
  over: List["Concept"] = Field(default_factory=list)
1023
1212
  index: Optional[int] = None
1024
1213
 
1214
+ def with_merge(
1215
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1216
+ ) -> "WindowItem":
1217
+ return WindowItem(
1218
+ type=self.type,
1219
+ content=self.content.with_merge(source, target, modifiers),
1220
+ over=[x.with_merge(source, target, modifiers) for x in self.over],
1221
+ order_by=[x.with_merge(source, target, modifiers) for x in self.order_by],
1222
+ )
1223
+
1025
1224
  def with_namespace(self, namespace: str) -> "WindowItem":
1026
1225
  return WindowItem(
1027
1226
  type=self.type,
@@ -1030,12 +1229,14 @@ class WindowItem(Namespaced, SelectGrain, BaseModel):
1030
1229
  order_by=[x.with_namespace(namespace) for x in self.order_by],
1031
1230
  )
1032
1231
 
1033
- def with_select_grain(self, grain: Grain) -> "WindowItem":
1232
+ def with_select_context(
1233
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
1234
+ ) -> "WindowItem":
1034
1235
  return WindowItem(
1035
1236
  type=self.type,
1036
- content=self.content.with_select_grain(grain),
1037
- over=[x.with_select_grain(grain) for x in self.over],
1038
- order_by=[x.with_select_grain(grain) for x in self.order_by],
1237
+ content=self.content.with_select_context(grain, conditional),
1238
+ over=[x.with_select_context(grain, conditional) for x in self.over],
1239
+ order_by=[x.with_select_context(grain, conditional) for x in self.order_by],
1039
1240
  )
1040
1241
 
1041
1242
  @property
@@ -1082,23 +1283,33 @@ class WindowItem(Namespaced, SelectGrain, BaseModel):
1082
1283
  return Purpose.PROPERTY
1083
1284
 
1084
1285
 
1085
- class FilterItem(Namespaced, SelectGrain, BaseModel):
1286
+ class FilterItem(Namespaced, SelectContext, BaseModel):
1086
1287
  content: Concept
1087
1288
  where: "WhereClause"
1088
1289
 
1089
1290
  def __str__(self):
1090
1291
  return f"<Filter: {str(self.content)} where {str(self.where)}>"
1091
1292
 
1293
+ def with_merge(
1294
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1295
+ ) -> "FilterItem":
1296
+ return FilterItem(
1297
+ content=source.with_merge(source, target, modifiers),
1298
+ where=self.where.with_merge(source, target, modifiers),
1299
+ )
1300
+
1092
1301
  def with_namespace(self, namespace: str) -> "FilterItem":
1093
1302
  return FilterItem(
1094
1303
  content=self.content.with_namespace(namespace),
1095
1304
  where=self.where.with_namespace(namespace),
1096
1305
  )
1097
1306
 
1098
- def with_select_grain(self, grain: Grain) -> FilterItem:
1307
+ def with_select_context(
1308
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
1309
+ ) -> FilterItem:
1099
1310
  return FilterItem(
1100
- content=self.content.with_select_grain(grain),
1101
- where=self.where.with_select_grain(grain),
1311
+ content=self.content.with_select_context(grain, conditional),
1312
+ where=self.where.with_select_context(grain, conditional),
1102
1313
  )
1103
1314
 
1104
1315
  @property
@@ -1139,7 +1350,7 @@ class FilterItem(Namespaced, SelectGrain, BaseModel):
1139
1350
  return [self.content] + self.where.concept_arguments
1140
1351
 
1141
1352
 
1142
- class SelectItem(Namespaced, BaseModel):
1353
+ class SelectItem(Mergeable, Namespaced, BaseModel):
1143
1354
  content: Union[Concept, ConceptTransform]
1144
1355
  modifiers: List[Modifier] = Field(default_factory=list)
1145
1356
 
@@ -1155,6 +1366,14 @@ class SelectItem(Namespaced, BaseModel):
1155
1366
  def input(self) -> List[Concept]:
1156
1367
  return self.content.input
1157
1368
 
1369
+ def with_merge(
1370
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1371
+ ) -> "SelectItem":
1372
+ return SelectItem(
1373
+ content=self.content.with_merge(source, target, modifiers),
1374
+ modifiers=modifiers,
1375
+ )
1376
+
1158
1377
  def with_namespace(self, namespace: str) -> "SelectItem":
1159
1378
  return SelectItem(
1160
1379
  content=self.content.with_namespace(namespace),
@@ -1162,16 +1381,25 @@ class SelectItem(Namespaced, BaseModel):
1162
1381
  )
1163
1382
 
1164
1383
 
1165
- class OrderItem(SelectGrain, Namespaced, BaseModel):
1384
+ class OrderItem(Mergeable, SelectContext, Namespaced, BaseModel):
1166
1385
  expr: Concept
1167
1386
  order: Ordering
1168
1387
 
1169
1388
  def with_namespace(self, namespace: str) -> "OrderItem":
1170
1389
  return OrderItem(expr=self.expr.with_namespace(namespace), order=self.order)
1171
1390
 
1172
- def with_select_grain(self, grain: Grain) -> "OrderItem":
1391
+ def with_select_context(
1392
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
1393
+ ) -> "OrderItem":
1173
1394
  return OrderItem(expr=self.expr.with_grain(grain), order=self.order)
1174
1395
 
1396
+ def with_merge(
1397
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1398
+ ) -> "OrderItem":
1399
+ return OrderItem(
1400
+ expr=source.with_merge(source, target, modifiers), order=self.order
1401
+ )
1402
+
1175
1403
  @property
1176
1404
  def input(self):
1177
1405
  return self.expr.input
@@ -1181,21 +1409,27 @@ class OrderItem(SelectGrain, Namespaced, BaseModel):
1181
1409
  return self.expr.output
1182
1410
 
1183
1411
 
1184
- class OrderBy(Namespaced, BaseModel):
1412
+ class OrderBy(Mergeable, Namespaced, BaseModel):
1185
1413
  items: List[OrderItem]
1186
1414
 
1187
1415
  def with_namespace(self, namespace: str) -> "OrderBy":
1188
1416
  return OrderBy(items=[x.with_namespace(namespace) for x in self.items])
1189
1417
 
1418
+ def with_merge(
1419
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1420
+ ) -> "OrderBy":
1421
+ return OrderBy(
1422
+ items=[x.with_merge(source, target, modifiers) for x in self.items]
1423
+ )
1424
+
1190
1425
 
1191
1426
  class RawSQLStatement(BaseModel):
1192
1427
  text: str
1193
1428
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1194
1429
 
1195
1430
 
1196
- class SelectStatement(Namespaced, BaseModel):
1431
+ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
1197
1432
  selection: List[SelectItem]
1198
- where_clause: Optional["WhereClause"] = None
1199
1433
  order_by: Optional[OrderBy] = None
1200
1434
  limit: Optional[int] = None
1201
1435
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
@@ -1225,6 +1459,19 @@ class SelectStatement(Namespaced, BaseModel):
1225
1459
  new.append(item)
1226
1460
  return new
1227
1461
 
1462
+ def with_merge(
1463
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1464
+ ) -> "SelectStatement":
1465
+ return SelectStatement(
1466
+ selection=[x.with_merge(source, target, modifiers) for x in self.selection],
1467
+ order_by=(
1468
+ self.order_by.with_merge(source, target, modifiers)
1469
+ if self.order_by
1470
+ else None
1471
+ ),
1472
+ limit=self.limit,
1473
+ )
1474
+
1228
1475
  @property
1229
1476
  def input_components(self) -> List[Concept]:
1230
1477
  output = set()
@@ -1297,14 +1544,14 @@ class SelectStatement(Namespaced, BaseModel):
1297
1544
  for item in self.output_components:
1298
1545
  if item.purpose == Purpose.KEY:
1299
1546
  output.append(item)
1300
- if self.where_clause:
1301
- for item in self.where_clause.concept_arguments:
1302
- if item.purpose == Purpose.KEY:
1303
- output.append(item)
1304
- # elif item.purpose == Purpose.PROPERTY and item.grain:
1305
- # output += item.grain.components
1306
- # TODO: handle other grain cases
1307
- # new if block by design
1547
+ # if self.where_clause:
1548
+ # for item in self.where_clause.concept_arguments:
1549
+ # if item.purpose == Purpose.KEY:
1550
+ # output.append(item)
1551
+ # elif item.purpose == Purpose.PROPERTY and item.grain:
1552
+ # output += item.grain.components
1553
+ # TODO: handle other grain cases
1554
+ # new if block by design
1308
1555
  # add back any purpose that is not at the grain
1309
1556
  # if a query already has the key of the property in the grain
1310
1557
  # we want to group to that grain and ignore the property, which is a derivation
@@ -1393,11 +1640,10 @@ class AlignClause(Namespaced, BaseModel):
1393
1640
  return AlignClause(items=[x.with_namespace(namespace) for x in self.items])
1394
1641
 
1395
1642
 
1396
- class MultiSelectStatement(Namespaced, BaseModel):
1643
+ class MultiSelectStatement(SelectTypeMixin, Mergeable, Namespaced, BaseModel):
1397
1644
  selects: List[SelectStatement]
1398
1645
  align: AlignClause
1399
1646
  namespace: str
1400
- where_clause: Optional["WhereClause"] = None
1401
1647
  order_by: Optional[OrderBy] = None
1402
1648
  limit: Optional[int] = None
1403
1649
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
@@ -1423,6 +1669,28 @@ class MultiSelectStatement(Namespaced, BaseModel):
1423
1669
  output += self.where_clause.concept_arguments
1424
1670
  return unique(output, "address")
1425
1671
 
1672
+ def with_merge(
1673
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1674
+ ) -> "MultiSelectStatement":
1675
+ new = MultiSelectStatement(
1676
+ selects=[s.with_merge(source, target, modifiers) for s in self.selects],
1677
+ align=self.align,
1678
+ namespace=self.namespace,
1679
+ order_by=(
1680
+ self.order_by.with_merge(source, target, modifiers)
1681
+ if self.order_by
1682
+ else None
1683
+ ),
1684
+ limit=self.limit,
1685
+ meta=self.meta,
1686
+ where_clause=(
1687
+ self.where_clause.with_merge(source, target, modifiers)
1688
+ if self.where_clause
1689
+ else None
1690
+ ),
1691
+ )
1692
+ return new
1693
+
1426
1694
  def get_merge_concept(self, check: Concept):
1427
1695
  for item in self.align.items:
1428
1696
  if check in item.concepts_lcl:
@@ -1434,6 +1702,14 @@ class MultiSelectStatement(Namespaced, BaseModel):
1434
1702
  selects=[c.with_namespace(namespace) for c in self.selects],
1435
1703
  align=self.align.with_namespace(namespace),
1436
1704
  namespace=namespace,
1705
+ order_by=self.order_by.with_namespace(namespace) if self.order_by else None,
1706
+ limit=self.limit,
1707
+ meta=self.meta,
1708
+ where_clause=(
1709
+ self.where_clause.with_namespace(namespace)
1710
+ if self.where_clause
1711
+ else None
1712
+ ),
1437
1713
  )
1438
1714
 
1439
1715
  @property
@@ -1518,49 +1794,21 @@ def safe_grain(v) -> Grain:
1518
1794
  class DatasourceMetadata(BaseModel):
1519
1795
  freshness_concept: Concept | None
1520
1796
  partition_fields: List[Concept] = Field(default_factory=list)
1797
+ line_no: int | None = None
1521
1798
 
1522
1799
 
1523
- class MergeStatement(Namespaced, BaseModel):
1524
- concepts: List[Concept]
1525
- datatype: DataType | ListType | StructType | MapType | NumericType
1526
-
1527
- @cached_property
1528
- def concepts_lcl(self):
1529
- return LooseConceptList(concepts=self.concepts)
1530
-
1531
- @property
1532
- def merge_concept(self) -> Concept:
1533
- bridge_name = "_".join([c.safe_address for c in self.concepts])
1534
- return Concept(
1535
- name=f"__merge_{bridge_name}",
1536
- datatype=self.datatype,
1537
- purpose=Purpose.PROPERTY,
1538
- lineage=self,
1539
- keys=tuple(self.concepts),
1540
- )
1541
-
1542
- @property
1543
- def arguments(self) -> List[Concept]:
1544
- return self.concepts
1545
-
1546
- @property
1547
- def concept_arguments(self) -> List[Concept]:
1548
- return self.concepts
1549
-
1550
- def find_source(self, concept: Concept, cte: CTE) -> Concept:
1551
- for x in self.concepts:
1552
- for z in cte.output_columns:
1553
- if z.address == x.address:
1554
- return z
1555
- raise SyntaxError(
1556
- f"Could not find upstream map for multiselect {str(concept)} on cte ({cte.name})"
1557
- )
1800
+ class MergeStatementV2(Namespaced, BaseModel):
1801
+ source: Concept
1802
+ target: Concept
1803
+ modifiers: List[Modifier] = Field(default_factory=list)
1558
1804
 
1559
- def with_namespace(self, namespace: str) -> "MergeStatement":
1560
- return MergeStatement(
1561
- concepts=[c.with_namespace(namespace) for c in self.concepts],
1562
- datatype=self.datatype,
1805
+ def with_namespace(self, namespace: str) -> "MergeStatementV2":
1806
+ new = MergeStatementV2(
1807
+ source=self.source.with_namespace(namespace),
1808
+ target=self.target.with_namespace(namespace),
1809
+ modifiers=self.modifiers,
1563
1810
  )
1811
+ return new
1564
1812
 
1565
1813
 
1566
1814
  class Datasource(Namespaced, BaseModel):
@@ -1575,6 +1823,28 @@ class Datasource(Namespaced, BaseModel):
1575
1823
  default_factory=lambda: DatasourceMetadata(freshness_concept=None)
1576
1824
  )
1577
1825
 
1826
+ def merge_concept(
1827
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1828
+ ):
1829
+ original = [c for c in self.columns if c.concept.address == source.address]
1830
+ # map to the alias with the modifier, and the original
1831
+ self.columns = [
1832
+ (
1833
+ c.with_merge(target, modifiers)
1834
+ if c.concept.address == source.address
1835
+ else c
1836
+ )
1837
+ for c in self.columns
1838
+ ] + original
1839
+ self.grain = self.grain.with_merge(source, target, modifiers)
1840
+ del self.output_lcl
1841
+
1842
+ @property
1843
+ def env_label(self) -> str:
1844
+ if not self.namespace or self.namespace == DEFAULT_NAMESPACE:
1845
+ return self.identifier
1846
+ return f"{self.namespace}.{self.identifier}"
1847
+
1578
1848
  @property
1579
1849
  def condition(self):
1580
1850
  return None
@@ -1738,6 +2008,7 @@ class BaseJoin(BaseModel):
1738
2008
  concepts: List[Concept]
1739
2009
  join_type: JoinType
1740
2010
  filter_to_mutual: bool = False
2011
+ concept_pairs: list[tuple[Concept, Concept]] | None = None
1741
2012
 
1742
2013
  def __init__(self, **data: Any):
1743
2014
  super().__init__(**data)
@@ -1747,10 +2018,21 @@ class BaseJoin(BaseModel):
1747
2018
  f" {self.right_datasource}"
1748
2019
  )
1749
2020
  final_concepts = []
2021
+
2022
+ # if we have a list of concept pairs
2023
+ if self.concept_pairs:
2024
+ return
2025
+
1750
2026
  for concept in self.concepts:
1751
2027
  include = True
1752
2028
  for ds in [self.left_datasource, self.right_datasource]:
1753
- if concept.address not in [c.address for c in ds.output_concepts]:
2029
+ synonyms = []
2030
+ for c in ds.output_concepts:
2031
+ synonyms += list(c.pseudonyms.keys())
2032
+ if (
2033
+ concept.address not in [c.address for c in ds.output_concepts]
2034
+ and concept.address not in synonyms
2035
+ ):
1754
2036
  if self.filter_to_mutual:
1755
2037
  include = False
1756
2038
  else:
@@ -1988,7 +2270,7 @@ class QueryDatasource(BaseModel):
1988
2270
  )
1989
2271
  # partial = "_".join([str(c.address).replace(".", "_") for c in self.partial_concepts])
1990
2272
  return (
1991
- "_join_".join([d.name for d in self.datasources])
2273
+ "_join_".join([d.full_name for d in self.datasources])
1992
2274
  + (f"_at_{grain}" if grain else "_at_abstract")
1993
2275
  + (f"_filtered_by_{filters}" if filters else "")
1994
2276
  # + (f"_partial_{partial}" if partial else "")
@@ -2144,7 +2426,13 @@ class CTE(BaseModel):
2144
2426
  ds_being_inlined.name if x == parent.name else x for x in v
2145
2427
  ]
2146
2428
  elif v == parent.name:
2147
- self.source_map[k] = ds_being_inlined.name
2429
+ self.source_map[k] = [ds_being_inlined.name]
2430
+
2431
+ # zip in any required values for lookups
2432
+ for k in ds_being_inlined.output_lcl.addresses:
2433
+ if k in self.source_map and self.source_map[k]:
2434
+ continue
2435
+ self.source_map[k] = [ds_being_inlined.name]
2148
2436
  self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
2149
2437
  if force_group:
2150
2438
  self.group_to_grain = True
@@ -2245,6 +2533,45 @@ class CTE(BaseModel):
2245
2533
  except ValueError as e:
2246
2534
  return f"INVALID_ALIAS: {str(e)}"
2247
2535
 
2536
+ @property
2537
+ def group_concepts(self) -> List[Concept]:
2538
+ return (
2539
+ unique(
2540
+ self.grain.components
2541
+ + [
2542
+ c
2543
+ for c in self.output_columns
2544
+ if c.purpose in (Purpose.PROPERTY, Purpose.KEY)
2545
+ and c.address not in [x.address for x in self.grain.components]
2546
+ ]
2547
+ + [
2548
+ c
2549
+ for c in self.output_columns
2550
+ if c.purpose == Purpose.METRIC
2551
+ and (
2552
+ any(
2553
+ [
2554
+ c.with_grain(cte.grain) in cte.output_columns
2555
+ for cte in self.parent_ctes
2556
+ ]
2557
+ )
2558
+ # if we have this metric from a source
2559
+ # it isn't derived here and must be grouped on
2560
+ or len(self.source_map[c.address]) > 0
2561
+ )
2562
+ ]
2563
+ + [
2564
+ c
2565
+ for c in self.output_columns
2566
+ if c.purpose == Purpose.CONSTANT
2567
+ and self.source_map[c.address] != []
2568
+ ],
2569
+ "address",
2570
+ )
2571
+ if self.group_to_grain
2572
+ else []
2573
+ )
2574
+
2248
2575
  @property
2249
2576
  def render_from_clause(self) -> bool:
2250
2577
  if (
@@ -2300,6 +2627,7 @@ class Join(BaseModel):
2300
2627
  right_cte: CTE | Datasource
2301
2628
  jointype: JoinType
2302
2629
  joinkeys: List[JoinKey]
2630
+ joinkey_pairs: List[tuple[Concept, Concept]] | None = None
2303
2631
 
2304
2632
  @property
2305
2633
  def left_name(self) -> str:
@@ -2336,7 +2664,7 @@ class Join(BaseModel):
2336
2664
  )
2337
2665
 
2338
2666
 
2339
- class UndefinedConcept(Concept):
2667
+ class UndefinedConcept(Concept, Mergeable, Namespaced):
2340
2668
  model_config = ConfigDict(arbitrary_types_allowed=True)
2341
2669
  name: str
2342
2670
  environment: "EnvironmentConceptDict"
@@ -2344,6 +2672,34 @@ class UndefinedConcept(Concept):
2344
2672
  datatype: DataType = DataType.UNKNOWN
2345
2673
  purpose: Purpose = Purpose.KEY
2346
2674
 
2675
+ def with_merge(
2676
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2677
+ ) -> "UndefinedConcept" | Concept:
2678
+ if self.address == source.address:
2679
+ new = target.with_grain(self.grain.with_merge(source, target, modifiers))
2680
+ new.pseudonyms[self.address] = self
2681
+ return new
2682
+ return self.__class__(
2683
+ name=self.name,
2684
+ datatype=self.datatype,
2685
+ purpose=self.purpose,
2686
+ metadata=self.metadata,
2687
+ lineage=(
2688
+ self.lineage.with_merge(source, target, modifiers)
2689
+ if self.lineage
2690
+ else None
2691
+ ),
2692
+ grain=self.grain.with_merge(source, target, modifiers),
2693
+ namespace=self.namespace,
2694
+ keys=(
2695
+ tuple(x.with_merge(source, target, modifiers) for x in self.keys)
2696
+ if self.keys
2697
+ else None
2698
+ ),
2699
+ environment=self.environment,
2700
+ line_no=self.line_no,
2701
+ )
2702
+
2347
2703
  def with_namespace(self, namespace: str) -> "UndefinedConcept":
2348
2704
  return self.__class__(
2349
2705
  name=self.name,
@@ -2362,14 +2718,18 @@ class UndefinedConcept(Concept):
2362
2718
  line_no=self.line_no,
2363
2719
  )
2364
2720
 
2365
- def with_select_grain(self, grain: Optional["Grain"] = None) -> "UndefinedConcept":
2721
+ def with_select_context(
2722
+ self,
2723
+ grain: Optional["Grain"] = None,
2724
+ conditional: Conditional | Comparison | Parenthetical | None = None,
2725
+ ) -> "UndefinedConcept":
2366
2726
  if not all([isinstance(x, Concept) for x in self.keys or []]):
2367
2727
  raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
2368
2728
  new_grain = grain or Grain(components=[])
2369
2729
  if self.lineage:
2370
2730
  new_lineage = self.lineage
2371
- if isinstance(self.lineage, SelectGrain):
2372
- new_lineage = self.lineage.with_select_grain(new_grain)
2731
+ if isinstance(self.lineage, SelectContext):
2732
+ new_lineage = self.lineage.with_select_context(new_grain, conditional)
2373
2733
  else:
2374
2734
  new_lineage = None
2375
2735
  return self.__class__(
@@ -2384,7 +2744,7 @@ class UndefinedConcept(Concept):
2384
2744
  environment=self.environment,
2385
2745
  )
2386
2746
 
2387
- def with_grain(self, grain: Optional["Grain"] = None) -> "Concept":
2747
+ def with_grain(self, grain: Optional["Grain"] = None) -> "UndefinedConcept":
2388
2748
  return self.__class__(
2389
2749
  name=self.name,
2390
2750
  datatype=self.datatype,
@@ -2398,7 +2758,7 @@ class UndefinedConcept(Concept):
2398
2758
  line_no=self.line_no,
2399
2759
  )
2400
2760
 
2401
- def with_default_grain(self) -> "Concept":
2761
+ def with_default_grain(self) -> "UndefinedConcept":
2402
2762
  if self.purpose == Purpose.KEY:
2403
2763
  # we need to make this abstract
2404
2764
  grain = Grain(components=[self.with_grain(Grain())], nested=True)
@@ -2432,6 +2792,21 @@ class UndefinedConcept(Concept):
2432
2792
  )
2433
2793
 
2434
2794
 
2795
+ class EnvironmentDatasourceDict(dict):
2796
+ def __init__(self, *args, **kwargs) -> None:
2797
+ super().__init__(self, *args, **kwargs)
2798
+
2799
+ def __getitem__(self, key: str) -> Datasource:
2800
+ try:
2801
+ return super(EnvironmentDatasourceDict, self).__getitem__(key)
2802
+ except KeyError:
2803
+ if DEFAULT_NAMESPACE + "." + key in self:
2804
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key)
2805
+ if "." in key and key.split(".")[0] == DEFAULT_NAMESPACE:
2806
+ return self.__getitem__(key.split(".")[1])
2807
+ raise
2808
+
2809
+
2435
2810
  class EnvironmentConceptDict(dict):
2436
2811
  def __init__(self, *args, **kwargs) -> None:
2437
2812
  super().__init__(self, *args, **kwargs)
@@ -2460,6 +2835,8 @@ class EnvironmentConceptDict(dict):
2460
2835
  if DEFAULT_NAMESPACE + "." + key in self:
2461
2836
  return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
2462
2837
  if not self.fail_on_missing:
2838
+ if key in self.undefined:
2839
+ return self.undefined[key]
2463
2840
  undefined = UndefinedConcept(
2464
2841
  name=key,
2465
2842
  line_no=line_no,
@@ -2483,7 +2860,7 @@ class EnvironmentConceptDict(dict):
2483
2860
  matches = difflib.get_close_matches(concept_name, self.keys())
2484
2861
  return matches
2485
2862
 
2486
- def items(self) -> ItemsView[str, Concept | UndefinedConcept]: # type: ignore
2863
+ def items(self) -> ItemsView[str, Concept]: # type: ignore
2487
2864
  return super().items()
2488
2865
 
2489
2866
 
@@ -2509,13 +2886,25 @@ def validate_concepts(v) -> EnvironmentConceptDict:
2509
2886
  raise ValueError
2510
2887
 
2511
2888
 
2889
+ def validate_datasources(v) -> EnvironmentDatasourceDict:
2890
+ if isinstance(v, EnvironmentDatasourceDict):
2891
+ return v
2892
+ elif isinstance(v, dict):
2893
+ return EnvironmentDatasourceDict(
2894
+ **{x: Datasource.model_validate(y) for x, y in v.items()}
2895
+ )
2896
+ raise ValueError
2897
+
2898
+
2512
2899
  class Environment(BaseModel):
2513
2900
  model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
2514
2901
 
2515
2902
  concepts: Annotated[EnvironmentConceptDict, PlainValidator(validate_concepts)] = (
2516
2903
  Field(default_factory=EnvironmentConceptDict)
2517
2904
  )
2518
- datasources: Dict[str, Datasource] = Field(default_factory=dict)
2905
+ datasources: Annotated[
2906
+ EnvironmentDatasourceDict, PlainValidator(validate_datasources)
2907
+ ] = Field(default_factory=EnvironmentDatasourceDict)
2519
2908
  functions: Dict[str, Function] = Field(default_factory=dict)
2520
2909
  data_types: Dict[str, DataType] = Field(default_factory=dict)
2521
2910
  imports: Dict[str, ImportStatement] = Field(default_factory=dict)
@@ -2526,7 +2915,7 @@ class Environment(BaseModel):
2526
2915
  cte_name_map: Dict[str, str] = Field(default_factory=dict)
2527
2916
 
2528
2917
  materialized_concepts: List[Concept] = Field(default_factory=list)
2529
- merged_concepts: Dict[str, Concept] = Field(default_factory=dict)
2918
+ alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
2530
2919
  _parse_count: int = 0
2531
2920
 
2532
2921
  @classmethod
@@ -2563,6 +2952,12 @@ class Environment(BaseModel):
2563
2952
  self.materialized_concepts = [
2564
2953
  c for c in self.concepts.values() if c.address in concrete_addresses
2565
2954
  ]
2955
+ # include aliased concepts
2956
+ self.materialized_concepts += [
2957
+ c
2958
+ for c in self.alias_origin_lookup.values()
2959
+ if c.address in concrete_addresses
2960
+ ]
2566
2961
  new = [
2567
2962
  x.address
2568
2963
  for x in self.materialized_concepts
@@ -2570,12 +2965,6 @@ class Environment(BaseModel):
2570
2965
  ]
2571
2966
  if new:
2572
2967
  logger.info(f"Environment added new materialized concepts {new}")
2573
- for concept in self.concepts.values():
2574
- if concept.derivation == PurposeLineage.MERGE:
2575
- ms = concept.lineage
2576
- assert isinstance(ms, MergeStatement)
2577
- for parent in ms.concepts:
2578
- self.merged_concepts[parent.address] = concept
2579
2968
 
2580
2969
  def validate_concept(self, lookup: str, meta: Meta | None = None):
2581
2970
  existing: Concept = self.concepts.get(lookup) # type: ignore
@@ -2718,13 +3107,8 @@ class Environment(BaseModel):
2718
3107
  datasource: Datasource,
2719
3108
  meta: Meta | None = None,
2720
3109
  ):
2721
- if not datasource.namespace or datasource.namespace == DEFAULT_NAMESPACE:
2722
- self.datasources[datasource.name] = datasource
2723
- self.gen_concept_list_caches()
2724
- return datasource
2725
- self.datasources[datasource.namespace + "." + datasource.identifier] = (
2726
- datasource
2727
- )
3110
+
3111
+ self.datasources[datasource.env_label] = datasource
2728
3112
  self.gen_concept_list_caches()
2729
3113
  return datasource
2730
3114
 
@@ -2739,6 +3123,22 @@ class Environment(BaseModel):
2739
3123
  return True
2740
3124
  return False
2741
3125
 
3126
+ def merge_concept(
3127
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
3128
+ ):
3129
+ replacements = {}
3130
+ self.alias_origin_lookup[source.address] = source
3131
+ for k, v in self.concepts.items():
3132
+ if v.address == target.address:
3133
+ v.pseudonyms[source.address] = source
3134
+ if v.address == source.address:
3135
+ replacements[k] = target
3136
+ self.concepts.update(replacements)
3137
+
3138
+ for k, ds in self.datasources.items():
3139
+ if source.address in ds.output_lcl:
3140
+ ds.merge_concept(source, target, modifiers=modifiers)
3141
+
2742
3142
 
2743
3143
  class LazyEnvironment(Environment):
2744
3144
  """Variant of environment to defer parsing of a path
@@ -2771,7 +3171,9 @@ class LazyEnvironment(Environment):
2771
3171
  return super().__getattribute__(name)
2772
3172
 
2773
3173
 
2774
- class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseModel):
3174
+ class Comparison(
3175
+ ConceptArgs, Mergeable, Namespaced, ConstantInlineable, SelectContext, BaseModel
3176
+ ):
2775
3177
  left: Union[
2776
3178
  int,
2777
3179
  str,
@@ -2821,6 +3223,8 @@ class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseM
2821
3223
  )
2822
3224
 
2823
3225
  def __add__(self, other):
3226
+ if other is None:
3227
+ return self
2824
3228
  if not isinstance(other, (Comparison, Conditional, Parenthetical)):
2825
3229
  raise ValueError("Cannot add Comparison to non-Comparison")
2826
3230
  if other == self:
@@ -2833,6 +3237,15 @@ class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseM
2833
3237
  def __str__(self):
2834
3238
  return self.__repr__()
2835
3239
 
3240
+ def __eq__(self, other):
3241
+ if not isinstance(other, Comparison):
3242
+ return False
3243
+ return (
3244
+ self.left == other.left
3245
+ and self.right == other.right
3246
+ and self.operator == other.operator
3247
+ )
3248
+
2836
3249
  def inline_constant(self, constant: Concept) -> "Comparison":
2837
3250
  assert isinstance(constant.lineage, Function)
2838
3251
  new_val = constant.lineage.arguments[0]
@@ -2859,6 +3272,21 @@ class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseM
2859
3272
  operator=self.operator,
2860
3273
  )
2861
3274
 
3275
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3276
+ return self.__class__(
3277
+ left=(
3278
+ self.left.with_merge(source, target, modifiers)
3279
+ if isinstance(self.left, Mergeable)
3280
+ else self.left
3281
+ ),
3282
+ right=(
3283
+ self.right.with_merge(source, target, modifiers)
3284
+ if isinstance(self.right, Mergeable)
3285
+ else self.right
3286
+ ),
3287
+ operator=self.operator,
3288
+ )
3289
+
2862
3290
  def with_namespace(self, namespace: str):
2863
3291
  return self.__class__(
2864
3292
  left=(
@@ -2874,11 +3302,13 @@ class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseM
2874
3302
  operator=self.operator,
2875
3303
  )
2876
3304
 
2877
- def with_select_grain(self, grain: Grain):
3305
+ def with_select_context(
3306
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3307
+ ):
2878
3308
  return self.__class__(
2879
3309
  left=(
2880
- self.left.with_select_grain(grain)
2881
- if isinstance(self.left, SelectGrain)
3310
+ self.left.with_select_context(grain, conditional)
3311
+ if isinstance(self.left, SelectContext)
2882
3312
  else self.left
2883
3313
  ),
2884
3314
  # the right side does NOT need to inherit select grain
@@ -2946,6 +3376,17 @@ class Comparison(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseM
2946
3376
 
2947
3377
  class SubselectComparison(Comparison):
2948
3378
 
3379
+ def __eq__(self, other):
3380
+ if not isinstance(other, SubselectComparison):
3381
+ return False
3382
+
3383
+ comp = (
3384
+ self.left == other.left
3385
+ and self.right == other.right
3386
+ and self.operator == other.operator
3387
+ )
3388
+ return comp
3389
+
2949
3390
  @property
2950
3391
  def row_arguments(self) -> List[Concept]:
2951
3392
  return get_concept_arguments(self.left)
@@ -2954,12 +3395,14 @@ class SubselectComparison(Comparison):
2954
3395
  def existence_arguments(self) -> list[tuple["Concept", ...]]:
2955
3396
  return [tuple(get_concept_arguments(self.right))]
2956
3397
 
2957
- def with_select_grain(self, grain: Grain):
3398
+ def with_select_context(
3399
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3400
+ ):
2958
3401
  # there's no need to pass the select grain through to a subselect comparison
2959
3402
  return self.__class__(
2960
3403
  left=(
2961
- self.left.with_select_grain(grain)
2962
- if isinstance(self.left, SelectGrain)
3404
+ self.left.with_select_context(grain, conditional)
3405
+ if isinstance(self.left, SelectContext)
2963
3406
  else self.left
2964
3407
  ),
2965
3408
  right=self.right,
@@ -2967,7 +3410,7 @@ class SubselectComparison(Comparison):
2967
3410
  )
2968
3411
 
2969
3412
 
2970
- class CaseWhen(Namespaced, SelectGrain, BaseModel):
3413
+ class CaseWhen(Namespaced, SelectContext, BaseModel):
2971
3414
  comparison: Conditional | SubselectComparison | Comparison
2972
3415
  expr: "Expr"
2973
3416
 
@@ -2988,18 +3431,20 @@ class CaseWhen(Namespaced, SelectGrain, BaseModel):
2988
3431
  ),
2989
3432
  )
2990
3433
 
2991
- def with_select_grain(self, grain: Grain) -> CaseWhen:
3434
+ def with_select_context(
3435
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3436
+ ) -> CaseWhen:
2992
3437
  return CaseWhen(
2993
- comparison=self.comparison.with_select_grain(grain),
3438
+ comparison=self.comparison.with_select_context(grain, conditional),
2994
3439
  expr=(
2995
- (self.expr.with_select_grain(grain))
2996
- if isinstance(self.expr, SelectGrain)
3440
+ (self.expr.with_select_context(grain, conditional))
3441
+ if isinstance(self.expr, SelectContext)
2997
3442
  else self.expr
2998
3443
  ),
2999
3444
  )
3000
3445
 
3001
3446
 
3002
- class CaseElse(Namespaced, SelectGrain, BaseModel):
3447
+ class CaseElse(Namespaced, SelectContext, BaseModel):
3003
3448
  expr: "Expr"
3004
3449
  # this ensures that it's easily differentiable from CaseWhen
3005
3450
  discriminant: ComparisonOperator = ComparisonOperator.ELSE
@@ -3008,14 +3453,16 @@ class CaseElse(Namespaced, SelectGrain, BaseModel):
3008
3453
  def concept_arguments(self):
3009
3454
  return get_concept_arguments(self.expr)
3010
3455
 
3011
- def with_select_grain(self, grain: Grain) -> CaseElse:
3456
+ def with_select_context(
3457
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3458
+ ) -> CaseElse:
3012
3459
  return CaseElse(
3013
3460
  discriminant=self.discriminant,
3014
3461
  expr=(
3015
- self.expr.with_select_grain(grain)
3462
+ self.expr.with_select_context(grain, conditional)
3016
3463
  if isinstance(
3017
3464
  self.expr,
3018
- SelectGrain,
3465
+ SelectContext,
3019
3466
  )
3020
3467
  else self.expr
3021
3468
  ),
@@ -3035,7 +3482,9 @@ class CaseElse(Namespaced, SelectGrain, BaseModel):
3035
3482
  )
3036
3483
 
3037
3484
 
3038
- class Conditional(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseModel):
3485
+ class Conditional(
3486
+ Mergeable, ConceptArgs, Namespaced, ConstantInlineable, SelectContext, BaseModel
3487
+ ):
3039
3488
  left: Union[
3040
3489
  int,
3041
3490
  str,
@@ -3081,6 +3530,16 @@ class Conditional(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, Base
3081
3530
  def __repr__(self):
3082
3531
  return f"{str(self.left)} {self.operator.value} {str(self.right)}"
3083
3532
 
3533
+ def __eq__(self, other):
3534
+
3535
+ if not isinstance(other, Conditional):
3536
+ return False
3537
+ return (
3538
+ self.left == other.left
3539
+ and self.right == other.right
3540
+ and self.operator == other.operator
3541
+ )
3542
+
3084
3543
  def inline_constant(self, constant: Concept) -> "Conditional":
3085
3544
  assert isinstance(constant.lineage, Function)
3086
3545
  new_val = constant.lineage.arguments[0]
@@ -3107,7 +3566,7 @@ class Conditional(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, Base
3107
3566
  operator=self.operator,
3108
3567
  )
3109
3568
 
3110
- def with_namespace(self, namespace: str):
3569
+ def with_namespace(self, namespace: str) -> "Conditional":
3111
3570
  return Conditional(
3112
3571
  left=(
3113
3572
  self.left.with_namespace(namespace)
@@ -3122,16 +3581,35 @@ class Conditional(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, Base
3122
3581
  operator=self.operator,
3123
3582
  )
3124
3583
 
3125
- def with_select_grain(self, grain: Grain):
3584
+ def with_merge(
3585
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
3586
+ ) -> "Conditional":
3587
+ return Conditional(
3588
+ left=(
3589
+ self.left.with_merge(source, target, modifiers)
3590
+ if isinstance(self.left, Mergeable)
3591
+ else self.left
3592
+ ),
3593
+ right=(
3594
+ self.right.with_merge(source, target, modifiers)
3595
+ if isinstance(self.right, Mergeable)
3596
+ else self.right
3597
+ ),
3598
+ operator=self.operator,
3599
+ )
3600
+
3601
+ def with_select_context(
3602
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3603
+ ):
3126
3604
  return Conditional(
3127
3605
  left=(
3128
- self.left.with_select_grain(grain)
3129
- if isinstance(self.left, SelectGrain)
3606
+ self.left.with_select_context(grain, conditional)
3607
+ if isinstance(self.left, SelectContext)
3130
3608
  else self.left
3131
3609
  ),
3132
3610
  right=(
3133
- self.right.with_select_grain(grain)
3134
- if isinstance(self.right, SelectGrain)
3611
+ self.right.with_select_context(grain, conditional)
3612
+ if isinstance(self.right, SelectContext)
3135
3613
  else self.right
3136
3614
  ),
3137
3615
  operator=self.operator,
@@ -3194,7 +3672,7 @@ class Conditional(ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, Base
3194
3672
  return chunks
3195
3673
 
3196
3674
 
3197
- class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
3675
+ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
3198
3676
  function: Function
3199
3677
  by: List[Concept] = Field(default_factory=list)
3200
3678
 
@@ -3222,21 +3700,34 @@ class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
3222
3700
  def arguments(self):
3223
3701
  return self.function.arguments
3224
3702
 
3703
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3704
+ return AggregateWrapper(
3705
+ function=self.function.with_merge(source, target, modifiers=modifiers),
3706
+ by=(
3707
+ [c.with_merge(source, target, modifiers) for c in self.by]
3708
+ if self.by
3709
+ else []
3710
+ ),
3711
+ )
3712
+
3225
3713
  def with_namespace(self, namespace: str) -> "AggregateWrapper":
3226
3714
  return AggregateWrapper(
3227
3715
  function=self.function.with_namespace(namespace),
3228
3716
  by=[c.with_namespace(namespace) for c in self.by] if self.by else [],
3229
3717
  )
3230
3718
 
3231
- def with_select_grain(self, grain: Grain) -> AggregateWrapper:
3719
+ def with_select_context(
3720
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3721
+ ) -> AggregateWrapper:
3232
3722
  if not self.by:
3233
3723
  by = grain.components_copy
3234
3724
  else:
3235
3725
  by = self.by
3236
- return AggregateWrapper(function=self.function.with_select_grain(grain), by=by)
3726
+ parent = self.function.with_select_context(grain, conditional)
3727
+ return AggregateWrapper(function=parent, by=by)
3237
3728
 
3238
3729
 
3239
- class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3730
+ class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
3240
3731
  conditional: Union[SubselectComparison, Comparison, Conditional, "Parenthetical"]
3241
3732
 
3242
3733
  @property
@@ -3255,11 +3746,20 @@ class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3255
3746
  def existence_arguments(self) -> list[tuple["Concept", ...]]:
3256
3747
  return self.conditional.existence_arguments
3257
3748
 
3749
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3750
+ return WhereClause(
3751
+ conditional=self.conditional.with_merge(source, target, modifiers)
3752
+ )
3753
+
3258
3754
  def with_namespace(self, namespace: str) -> WhereClause:
3259
3755
  return WhereClause(conditional=self.conditional.with_namespace(namespace))
3260
3756
 
3261
- def with_select_grain(self, grain: Grain) -> WhereClause:
3262
- return WhereClause(conditional=self.conditional.with_select_grain(grain))
3757
+ def with_select_context(
3758
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3759
+ ) -> WhereClause:
3760
+ return WhereClause(
3761
+ conditional=self.conditional.with_select_context(grain, conditional)
3762
+ )
3263
3763
 
3264
3764
  @property
3265
3765
  def grain(self) -> Grain:
@@ -3387,7 +3887,7 @@ class RowsetDerivationStatement(Namespaced, BaseModel):
3387
3887
  )
3388
3888
 
3389
3889
 
3390
- class RowsetItem(Namespaced, BaseModel):
3890
+ class RowsetItem(Mergeable, Namespaced, BaseModel):
3391
3891
  content: Concept
3392
3892
  rowset: RowsetDerivationStatement
3393
3893
  where: Optional["WhereClause"] = None
@@ -3397,6 +3897,15 @@ class RowsetItem(Namespaced, BaseModel):
3397
3897
  f"<Rowset<{self.rowset.name}>: {str(self.content)} where {str(self.where)}>"
3398
3898
  )
3399
3899
 
3900
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3901
+ return RowsetItem(
3902
+ content=self.content.with_merge(source, target, modifiers),
3903
+ rowset=self.rowset,
3904
+ where=(
3905
+ self.where.with_merge(source, target, modifiers) if self.where else None
3906
+ ),
3907
+ )
3908
+
3400
3909
  def with_namespace(self, namespace: str) -> "RowsetItem":
3401
3910
  return RowsetItem(
3402
3911
  content=self.content.with_namespace(namespace),
@@ -3447,7 +3956,7 @@ class RowsetItem(Namespaced, BaseModel):
3447
3956
 
3448
3957
 
3449
3958
  class Parenthetical(
3450
- ConceptArgs, Namespaced, ConstantInlineable, SelectGrain, BaseModel
3959
+ ConceptArgs, Mergeable, Namespaced, ConstantInlineable, SelectContext, BaseModel
3451
3960
  ):
3452
3961
  content: "Expr"
3453
3962
 
@@ -3473,11 +3982,22 @@ class Parenthetical(
3473
3982
  )
3474
3983
  )
3475
3984
 
3476
- def with_select_grain(self, grain: Grain):
3985
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3986
+ return Parenthetical(
3987
+ content=(
3988
+ self.content.with_merge(source, target, modifiers)
3989
+ if isinstance(self.content, Mergeable)
3990
+ else self.content
3991
+ )
3992
+ )
3993
+
3994
+ def with_select_context(
3995
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3996
+ ):
3477
3997
  return Parenthetical(
3478
3998
  content=(
3479
- self.content.with_select_grain(grain)
3480
- if isinstance(self.content, SelectGrain)
3999
+ self.content.with_select_context(grain, conditional)
4000
+ if isinstance(self.content, SelectContext)
3481
4001
  else self.content
3482
4002
  )
3483
4003
  )