pytrilogy 0.0.1.117__py3-none-any.whl → 0.0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (46) hide show
  1. {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/METADATA +1 -1
  2. pytrilogy-0.0.2.1.dist-info/RECORD +82 -0
  3. {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/WHEEL +1 -1
  4. trilogy/__init__.py +1 -1
  5. trilogy/constants.py +6 -0
  6. trilogy/core/enums.py +7 -2
  7. trilogy/core/env_processor.py +43 -19
  8. trilogy/core/functions.py +11 -0
  9. trilogy/core/models.py +737 -146
  10. trilogy/core/optimization.py +31 -28
  11. trilogy/core/optimizations/inline_constant.py +4 -1
  12. trilogy/core/optimizations/inline_datasource.py +25 -4
  13. trilogy/core/optimizations/predicate_pushdown.py +94 -54
  14. trilogy/core/processing/concept_strategies_v3.py +69 -39
  15. trilogy/core/processing/graph_utils.py +3 -3
  16. trilogy/core/processing/node_generators/__init__.py +0 -2
  17. trilogy/core/processing/node_generators/basic_node.py +30 -17
  18. trilogy/core/processing/node_generators/filter_node.py +3 -1
  19. trilogy/core/processing/node_generators/node_merge_node.py +345 -96
  20. trilogy/core/processing/node_generators/rowset_node.py +18 -16
  21. trilogy/core/processing/node_generators/select_node.py +44 -83
  22. trilogy/core/processing/nodes/__init__.py +2 -0
  23. trilogy/core/processing/nodes/base_node.py +22 -5
  24. trilogy/core/processing/nodes/filter_node.py +3 -0
  25. trilogy/core/processing/nodes/group_node.py +20 -2
  26. trilogy/core/processing/nodes/merge_node.py +32 -18
  27. trilogy/core/processing/nodes/select_node_v2.py +17 -3
  28. trilogy/core/processing/utility.py +100 -8
  29. trilogy/core/query_processor.py +77 -24
  30. trilogy/dialect/base.py +11 -46
  31. trilogy/dialect/bigquery.py +1 -1
  32. trilogy/dialect/common.py +11 -0
  33. trilogy/dialect/duckdb.py +1 -1
  34. trilogy/dialect/presto.py +1 -0
  35. trilogy/executor.py +29 -0
  36. trilogy/hooks/graph_hook.py +50 -5
  37. trilogy/hooks/query_debugger.py +1 -0
  38. trilogy/parsing/common.py +8 -5
  39. trilogy/parsing/parse_engine.py +48 -27
  40. trilogy/parsing/render.py +13 -6
  41. trilogy/parsing/trilogy.lark +12 -7
  42. pytrilogy-0.0.1.117.dist-info/RECORD +0 -83
  43. trilogy/core/processing/node_generators/concept_merge_node.py +0 -214
  44. {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/LICENSE.md +0 -0
  45. {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/entry_points.txt +0 -0
  46. {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/top_level.txt +0 -0
trilogy/core/models.py CHANGED
@@ -63,6 +63,7 @@ from trilogy.core.enums import (
63
63
  DatePart,
64
64
  ShowCategory,
65
65
  Granularity,
66
+ SelectFiltering,
66
67
  )
67
68
  from trilogy.core.exceptions import UndefinedConceptException, InvalidSyntaxException
68
69
  from trilogy.utility import unique
@@ -129,6 +130,12 @@ class Namespaced(ABC):
129
130
  raise NotImplementedError
130
131
 
131
132
 
133
+ class Mergeable(ABC):
134
+
135
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
136
+ raise NotImplementedError
137
+
138
+
132
139
  class ConceptArgs(ABC):
133
140
 
134
141
  @property
@@ -144,11 +151,54 @@ class ConceptArgs(ABC):
144
151
  return self.concept_arguments
145
152
 
146
153
 
147
- class SelectGrain(ABC):
148
- def with_select_grain(self, grain: Grain):
154
+ class SelectContext(ABC):
155
+
156
+ def with_select_context(
157
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
158
+ ):
149
159
  raise NotImplementedError
150
160
 
151
161
 
162
+ class ConstantInlineable(ABC):
163
+ def inline_concept(self, concept: Concept):
164
+ raise NotImplementedError
165
+
166
+
167
+ class SelectTypeMixin(BaseModel):
168
+ where_clause: Union["WhereClause", None] = Field(default=None)
169
+
170
+ @property
171
+ def output_components(self) -> List[Concept]:
172
+ raise NotImplementedError
173
+
174
+ @property
175
+ def implicit_where_clause_selections(self) -> List[Concept]:
176
+ if not self.where_clause:
177
+ return []
178
+ filter = set(
179
+ [
180
+ str(x.address)
181
+ for x in self.where_clause.row_arguments
182
+ if not x.derivation == PurposeLineage.CONSTANT
183
+ ]
184
+ )
185
+ query_output = set([str(z.address) for z in self.output_components])
186
+ delta = filter.difference(query_output)
187
+ if delta:
188
+ return [
189
+ x for x in self.where_clause.row_arguments if str(x.address) in delta
190
+ ]
191
+ return []
192
+
193
+ @property
194
+ def where_clause_category(self) -> SelectFiltering:
195
+ if not self.where_clause:
196
+ return SelectFiltering.NONE
197
+ elif self.implicit_where_clause_selections:
198
+ return SelectFiltering.IMPLICIT
199
+ return SelectFiltering.EXPLICIT
200
+
201
+
152
202
  class DataType(Enum):
153
203
  # PRIMITIVES
154
204
  STRING = "string"
@@ -285,11 +335,24 @@ def empty_grain() -> Grain:
285
335
  return Grain(components=[])
286
336
 
287
337
 
288
- class Concept(Namespaced, SelectGrain, BaseModel):
338
+ class MultiLineage(BaseModel):
339
+ lineages: list[
340
+ Union[
341
+ Function,
342
+ WindowItem,
343
+ FilterItem,
344
+ AggregateWrapper,
345
+ RowsetItem,
346
+ MultiSelectStatement,
347
+ ]
348
+ ]
349
+
350
+
351
+ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
289
352
  name: str
290
353
  datatype: DataType | ListType | StructType | MapType | NumericType
291
354
  purpose: Purpose
292
- metadata: Optional[Metadata] = Field(
355
+ metadata: Metadata = Field(
293
356
  default_factory=lambda: Metadata(description=None, line_number=None),
294
357
  validate_default=True,
295
358
  )
@@ -300,17 +363,44 @@ class Concept(Namespaced, SelectGrain, BaseModel):
300
363
  FilterItem,
301
364
  AggregateWrapper,
302
365
  RowsetItem,
303
- MultiSelectStatement | MergeStatement,
366
+ MultiSelectStatement,
304
367
  ]
305
368
  ] = None
306
369
  namespace: Optional[str] = Field(default=DEFAULT_NAMESPACE, validate_default=True)
307
370
  keys: Optional[Tuple["Concept", ...]] = None
308
371
  grain: "Grain" = Field(default=None, validate_default=True)
309
372
  modifiers: Optional[List[Modifier]] = Field(default_factory=list)
373
+ pseudonyms: Dict[str, Concept] = Field(default_factory=dict)
310
374
 
311
375
  def __hash__(self):
312
376
  return hash(str(self))
313
377
 
378
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
379
+ if self.address == source.address:
380
+ new = target.with_grain(self.grain.with_merge(source, target, modifiers))
381
+ new.pseudonyms[self.address] = self
382
+ return new
383
+ return self.__class__(
384
+ name=self.name,
385
+ datatype=self.datatype,
386
+ purpose=self.purpose,
387
+ metadata=self.metadata,
388
+ lineage=(
389
+ self.lineage.with_merge(source, target, modifiers)
390
+ if self.lineage
391
+ else None
392
+ ),
393
+ grain=self.grain.with_merge(source, target, modifiers),
394
+ namespace=self.namespace,
395
+ keys=(
396
+ tuple(x.with_merge(source, target, modifiers) for x in self.keys)
397
+ if self.keys
398
+ else None
399
+ ),
400
+ modifiers=self.modifiers,
401
+ pseudonyms=self.pseudonyms,
402
+ )
403
+
314
404
  @field_validator("keys", mode="before")
315
405
  @classmethod
316
406
  def keys_validator(cls, v, info: ValidationInfo):
@@ -327,7 +417,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
327
417
  def namespace_validation(cls, v):
328
418
  return v or DEFAULT_NAMESPACE
329
419
 
330
- @field_validator("metadata")
420
+ @field_validator("metadata", mode="before")
331
421
  @classmethod
332
422
  def metadata_validation(cls, v):
333
423
  v = v or Metadata()
@@ -435,15 +525,22 @@ class Concept(Namespaced, SelectGrain, BaseModel):
435
525
  else None
436
526
  ),
437
527
  modifiers=self.modifiers,
528
+ pseudonyms={
529
+ k: v.with_namespace(namespace) for k, v in self.pseudonyms.items()
530
+ },
438
531
  )
439
532
 
440
- def with_select_grain(self, grain: Optional["Grain"] = None) -> "Concept":
533
+ def with_select_context(
534
+ self,
535
+ grain: Optional["Grain"] = None,
536
+ conditional: Conditional | Comparison | Parenthetical | None = None,
537
+ ) -> "Concept":
441
538
  if not all([isinstance(x, Concept) for x in self.keys or []]):
442
539
  raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
443
540
  new_grain = grain or self.grain
444
541
  new_lineage = self.lineage
445
- if isinstance(self.lineage, SelectGrain):
446
- new_lineage = self.lineage.with_select_grain(new_grain)
542
+ if isinstance(self.lineage, SelectContext):
543
+ new_lineage = self.lineage.with_select_context(new_grain, conditional)
447
544
  return self.__class__(
448
545
  name=self.name,
449
546
  datatype=self.datatype,
@@ -454,6 +551,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
454
551
  namespace=self.namespace,
455
552
  keys=self.keys,
456
553
  modifiers=self.modifiers,
554
+ pseudonyms=self.pseudonyms,
457
555
  )
458
556
 
459
557
  def with_grain(self, grain: Optional["Grain"] = None) -> "Concept":
@@ -469,6 +567,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
469
567
  namespace=self.namespace,
470
568
  keys=self.keys,
471
569
  modifiers=self.modifiers,
570
+ pseudonyms=self.pseudonyms,
472
571
  )
473
572
 
474
573
  @cached_property
@@ -507,6 +606,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
507
606
  keys=self.keys,
508
607
  namespace=self.namespace,
509
608
  modifiers=self.modifiers,
609
+ pseudonyms=self.pseudonyms,
510
610
  )
511
611
 
512
612
  def with_default_grain(self) -> "Concept":
@@ -524,7 +624,7 @@ class Concept(Namespaced, SelectGrain, BaseModel):
524
624
  FilterItem,
525
625
  AggregateWrapper,
526
626
  RowsetItem,
527
- MultiSelectStatement | MergeStatement,
627
+ MultiSelectStatement,
528
628
  ],
529
629
  output: List[Concept],
530
630
  ):
@@ -563,8 +663,6 @@ class Concept(Namespaced, SelectGrain, BaseModel):
563
663
  return PurposeLineage.ROWSET
564
664
  elif self.lineage and isinstance(self.lineage, MultiSelectStatement):
565
665
  return PurposeLineage.MULTISELECT
566
- elif self.lineage and isinstance(self.lineage, MergeStatement):
567
- return PurposeLineage.MERGE
568
666
  elif (
569
667
  self.lineage
570
668
  and isinstance(self.lineage, Function)
@@ -587,6 +685,13 @@ class Concept(Namespaced, SelectGrain, BaseModel):
587
685
  elif self.lineage and isinstance(self.lineage, Function):
588
686
  if not self.lineage.concept_arguments:
589
687
  return PurposeLineage.CONSTANT
688
+ elif all(
689
+ [
690
+ x.derivation == PurposeLineage.CONSTANT
691
+ for x in self.lineage.concept_arguments
692
+ ]
693
+ ):
694
+ return PurposeLineage.CONSTANT
590
695
  return PurposeLineage.BASIC
591
696
  elif self.purpose == Purpose.CONSTANT:
592
697
  return PurposeLineage.CONSTANT
@@ -621,8 +726,28 @@ class Concept(Namespaced, SelectGrain, BaseModel):
621
726
  return Granularity.SINGLE_ROW
622
727
  return Granularity.MULTI_ROW
623
728
 
729
+ def with_filter(
730
+ self, condition: "Conditional | Comparison | Parenthetical"
731
+ ) -> "Concept":
732
+ from trilogy.utility import string_to_hash
733
+
734
+ name = string_to_hash(self.name + str(condition))
735
+ new = Concept(
736
+ name=f"{self.name}_{name}",
737
+ datatype=self.datatype,
738
+ purpose=self.purpose,
739
+ metadata=self.metadata,
740
+ lineage=FilterItem(content=self, where=WhereClause(conditional=condition)),
741
+ keys=None,
742
+ grain=(self.grain if self.purpose == Purpose.PROPERTY else Grain()),
743
+ namespace=self.namespace,
744
+ modifiers=self.modifiers,
745
+ pseudonyms=self.pseudonyms,
746
+ )
747
+ return new
748
+
624
749
 
625
- class Grain(BaseModel):
750
+ class Grain(Mergeable, BaseModel):
626
751
  nested: bool = False
627
752
  components: List[Concept] = Field(default_factory=list, validate_default=True)
628
753
 
@@ -640,12 +765,6 @@ class Grain(BaseModel):
640
765
  if sub.purpose in (Purpose.PROPERTY, Purpose.METRIC) and sub.keys:
641
766
  if all([c in v2 for c in sub.keys]):
642
767
  continue
643
- elif sub.derivation == PurposeLineage.MERGE and isinstance(
644
- sub.lineage, MergeStatement
645
- ):
646
- parents = sub.lineage.concepts
647
- if any([p in v2 for p in parents]):
648
- continue
649
768
  final.append(sub)
650
769
  v2 = sorted(final, key=lambda x: x.name)
651
770
  return v2
@@ -667,6 +786,16 @@ class Grain(BaseModel):
667
786
  nested=self.nested,
668
787
  )
669
788
 
789
+ def with_merge(
790
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
791
+ ) -> "Grain":
792
+ return Grain(
793
+ components=[
794
+ x.with_merge(source, target, modifiers) for x in self.components
795
+ ],
796
+ nested=self.nested,
797
+ )
798
+
670
799
  @property
671
800
  def abstract(self):
672
801
  return not self.components or all(
@@ -754,6 +883,15 @@ class ColumnAssignment(BaseModel):
754
883
  modifiers=self.modifiers,
755
884
  )
756
885
 
886
+ def with_merge(
887
+ self, concept: Concept, modifiers: List[Modifier]
888
+ ) -> "ColumnAssignment":
889
+ return ColumnAssignment(
890
+ alias=self.alias,
891
+ concept=concept,
892
+ modifiers=modifiers,
893
+ )
894
+
757
895
 
758
896
  class Statement(BaseModel):
759
897
  pass
@@ -804,7 +942,7 @@ class LooseConceptList(BaseModel):
804
942
  return self.addresses.isdisjoint(other.addresses)
805
943
 
806
944
 
807
- class Function(Namespaced, SelectGrain, BaseModel):
945
+ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
808
946
  operator: FunctionType
809
947
  arg_count: int = Field(default=1)
810
948
  output_datatype: DataType | ListType | StructType | MapType | NumericType
@@ -844,15 +982,42 @@ class Function(Namespaced, SelectGrain, BaseModel):
844
982
  def datatype(self):
845
983
  return self.output_datatype
846
984
 
847
- def with_select_grain(self, grain: Grain) -> Function:
985
+ def with_select_context(
986
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
987
+ ) -> Function:
988
+ if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value and conditional:
989
+ base = [
990
+ (
991
+ c.with_select_context(grain, conditional)
992
+ if isinstance(
993
+ c,
994
+ SelectContext,
995
+ )
996
+ else c
997
+ )
998
+ for c in self.arguments
999
+ ]
1000
+ final = [
1001
+ c.with_filter(conditional) if isinstance(c, Concept) else c
1002
+ for c in base
1003
+ ]
1004
+ return Function(
1005
+ operator=self.operator,
1006
+ arguments=final,
1007
+ output_datatype=self.output_datatype,
1008
+ output_purpose=self.output_purpose,
1009
+ valid_inputs=self.valid_inputs,
1010
+ arg_count=self.arg_count,
1011
+ )
1012
+
848
1013
  return Function(
849
1014
  operator=self.operator,
850
1015
  arguments=[
851
1016
  (
852
- c.with_select_grain(grain)
1017
+ c.with_select_context(grain, conditional)
853
1018
  if isinstance(
854
1019
  c,
855
- SelectGrain,
1020
+ SelectContext,
856
1021
  )
857
1022
  else c
858
1023
  )
@@ -946,6 +1111,28 @@ class Function(Namespaced, SelectGrain, BaseModel):
946
1111
  arg_count=self.arg_count,
947
1112
  )
948
1113
 
1114
+ def with_merge(
1115
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1116
+ ) -> "Function":
1117
+ return Function(
1118
+ operator=self.operator,
1119
+ arguments=[
1120
+ (
1121
+ c.with_merge(source, target, modifiers)
1122
+ if isinstance(
1123
+ c,
1124
+ Mergeable,
1125
+ )
1126
+ else c
1127
+ )
1128
+ for c in self.arguments
1129
+ ],
1130
+ output_datatype=self.output_datatype,
1131
+ output_purpose=self.output_purpose,
1132
+ valid_inputs=self.valid_inputs,
1133
+ arg_count=self.arg_count,
1134
+ )
1135
+
949
1136
  @property
950
1137
  def concept_arguments(self) -> List[Concept]:
951
1138
  base = []
@@ -986,6 +1173,13 @@ class ConceptTransform(Namespaced, BaseModel):
986
1173
  def input(self) -> List[Concept]:
987
1174
  return [v for v in self.function.arguments if isinstance(v, Concept)]
988
1175
 
1176
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
1177
+ return ConceptTransform(
1178
+ function=self.function.with_merge(source, target, modifiers),
1179
+ output=self.output.with_merge(source, target, modifiers),
1180
+ modifiers=self.modifiers + modifiers,
1181
+ )
1182
+
989
1183
  def with_namespace(self, namespace: str) -> "ConceptTransform":
990
1184
  return ConceptTransform(
991
1185
  function=self.function.with_namespace(namespace),
@@ -1010,13 +1204,23 @@ class WindowItemOrder(BaseModel):
1010
1204
  contents: List["OrderItem"]
1011
1205
 
1012
1206
 
1013
- class WindowItem(Namespaced, SelectGrain, BaseModel):
1207
+ class WindowItem(Mergeable, Namespaced, SelectContext, BaseModel):
1014
1208
  type: WindowType
1015
1209
  content: Concept
1016
1210
  order_by: List["OrderItem"]
1017
1211
  over: List["Concept"] = Field(default_factory=list)
1018
1212
  index: Optional[int] = None
1019
1213
 
1214
+ def with_merge(
1215
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1216
+ ) -> "WindowItem":
1217
+ return WindowItem(
1218
+ type=self.type,
1219
+ content=self.content.with_merge(source, target, modifiers),
1220
+ over=[x.with_merge(source, target, modifiers) for x in self.over],
1221
+ order_by=[x.with_merge(source, target, modifiers) for x in self.order_by],
1222
+ )
1223
+
1020
1224
  def with_namespace(self, namespace: str) -> "WindowItem":
1021
1225
  return WindowItem(
1022
1226
  type=self.type,
@@ -1025,12 +1229,14 @@ class WindowItem(Namespaced, SelectGrain, BaseModel):
1025
1229
  order_by=[x.with_namespace(namespace) for x in self.order_by],
1026
1230
  )
1027
1231
 
1028
- def with_select_grain(self, grain: Grain) -> "WindowItem":
1232
+ def with_select_context(
1233
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
1234
+ ) -> "WindowItem":
1029
1235
  return WindowItem(
1030
1236
  type=self.type,
1031
- content=self.content.with_select_grain(grain),
1032
- over=[x.with_select_grain(grain) for x in self.over],
1033
- order_by=[x.with_select_grain(grain) for x in self.order_by],
1237
+ content=self.content.with_select_context(grain, conditional),
1238
+ over=[x.with_select_context(grain, conditional) for x in self.over],
1239
+ order_by=[x.with_select_context(grain, conditional) for x in self.order_by],
1034
1240
  )
1035
1241
 
1036
1242
  @property
@@ -1077,23 +1283,33 @@ class WindowItem(Namespaced, SelectGrain, BaseModel):
1077
1283
  return Purpose.PROPERTY
1078
1284
 
1079
1285
 
1080
- class FilterItem(Namespaced, SelectGrain, BaseModel):
1286
+ class FilterItem(Namespaced, SelectContext, BaseModel):
1081
1287
  content: Concept
1082
1288
  where: "WhereClause"
1083
1289
 
1084
1290
  def __str__(self):
1085
1291
  return f"<Filter: {str(self.content)} where {str(self.where)}>"
1086
1292
 
1293
+ def with_merge(
1294
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1295
+ ) -> "FilterItem":
1296
+ return FilterItem(
1297
+ content=source.with_merge(source, target, modifiers),
1298
+ where=self.where.with_merge(source, target, modifiers),
1299
+ )
1300
+
1087
1301
  def with_namespace(self, namespace: str) -> "FilterItem":
1088
1302
  return FilterItem(
1089
1303
  content=self.content.with_namespace(namespace),
1090
1304
  where=self.where.with_namespace(namespace),
1091
1305
  )
1092
1306
 
1093
- def with_select_grain(self, grain: Grain) -> FilterItem:
1307
+ def with_select_context(
1308
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
1309
+ ) -> FilterItem:
1094
1310
  return FilterItem(
1095
- content=self.content.with_select_grain(grain),
1096
- where=self.where.with_select_grain(grain),
1311
+ content=self.content.with_select_context(grain, conditional),
1312
+ where=self.where.with_select_context(grain, conditional),
1097
1313
  )
1098
1314
 
1099
1315
  @property
@@ -1134,7 +1350,7 @@ class FilterItem(Namespaced, SelectGrain, BaseModel):
1134
1350
  return [self.content] + self.where.concept_arguments
1135
1351
 
1136
1352
 
1137
- class SelectItem(Namespaced, BaseModel):
1353
+ class SelectItem(Mergeable, Namespaced, BaseModel):
1138
1354
  content: Union[Concept, ConceptTransform]
1139
1355
  modifiers: List[Modifier] = Field(default_factory=list)
1140
1356
 
@@ -1150,6 +1366,14 @@ class SelectItem(Namespaced, BaseModel):
1150
1366
  def input(self) -> List[Concept]:
1151
1367
  return self.content.input
1152
1368
 
1369
+ def with_merge(
1370
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1371
+ ) -> "SelectItem":
1372
+ return SelectItem(
1373
+ content=self.content.with_merge(source, target, modifiers),
1374
+ modifiers=modifiers,
1375
+ )
1376
+
1153
1377
  def with_namespace(self, namespace: str) -> "SelectItem":
1154
1378
  return SelectItem(
1155
1379
  content=self.content.with_namespace(namespace),
@@ -1157,16 +1381,25 @@ class SelectItem(Namespaced, BaseModel):
1157
1381
  )
1158
1382
 
1159
1383
 
1160
- class OrderItem(SelectGrain, Namespaced, BaseModel):
1384
+ class OrderItem(Mergeable, SelectContext, Namespaced, BaseModel):
1161
1385
  expr: Concept
1162
1386
  order: Ordering
1163
1387
 
1164
1388
  def with_namespace(self, namespace: str) -> "OrderItem":
1165
1389
  return OrderItem(expr=self.expr.with_namespace(namespace), order=self.order)
1166
1390
 
1167
- def with_select_grain(self, grain: Grain) -> "OrderItem":
1391
+ def with_select_context(
1392
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
1393
+ ) -> "OrderItem":
1168
1394
  return OrderItem(expr=self.expr.with_grain(grain), order=self.order)
1169
1395
 
1396
+ def with_merge(
1397
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1398
+ ) -> "OrderItem":
1399
+ return OrderItem(
1400
+ expr=source.with_merge(source, target, modifiers), order=self.order
1401
+ )
1402
+
1170
1403
  @property
1171
1404
  def input(self):
1172
1405
  return self.expr.input
@@ -1176,21 +1409,27 @@ class OrderItem(SelectGrain, Namespaced, BaseModel):
1176
1409
  return self.expr.output
1177
1410
 
1178
1411
 
1179
- class OrderBy(Namespaced, BaseModel):
1412
+ class OrderBy(Mergeable, Namespaced, BaseModel):
1180
1413
  items: List[OrderItem]
1181
1414
 
1182
1415
  def with_namespace(self, namespace: str) -> "OrderBy":
1183
1416
  return OrderBy(items=[x.with_namespace(namespace) for x in self.items])
1184
1417
 
1418
+ def with_merge(
1419
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1420
+ ) -> "OrderBy":
1421
+ return OrderBy(
1422
+ items=[x.with_merge(source, target, modifiers) for x in self.items]
1423
+ )
1424
+
1185
1425
 
1186
1426
  class RawSQLStatement(BaseModel):
1187
1427
  text: str
1188
1428
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1189
1429
 
1190
1430
 
1191
- class SelectStatement(Namespaced, BaseModel):
1431
+ class SelectStatement(Mergeable, Namespaced, SelectTypeMixin, BaseModel):
1192
1432
  selection: List[SelectItem]
1193
- where_clause: Optional["WhereClause"] = None
1194
1433
  order_by: Optional[OrderBy] = None
1195
1434
  limit: Optional[int] = None
1196
1435
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
@@ -1220,6 +1459,19 @@ class SelectStatement(Namespaced, BaseModel):
1220
1459
  new.append(item)
1221
1460
  return new
1222
1461
 
1462
+ def with_merge(
1463
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1464
+ ) -> "SelectStatement":
1465
+ return SelectStatement(
1466
+ selection=[x.with_merge(source, target, modifiers) for x in self.selection],
1467
+ order_by=(
1468
+ self.order_by.with_merge(source, target, modifiers)
1469
+ if self.order_by
1470
+ else None
1471
+ ),
1472
+ limit=self.limit,
1473
+ )
1474
+
1223
1475
  @property
1224
1476
  def input_components(self) -> List[Concept]:
1225
1477
  output = set()
@@ -1292,14 +1544,14 @@ class SelectStatement(Namespaced, BaseModel):
1292
1544
  for item in self.output_components:
1293
1545
  if item.purpose == Purpose.KEY:
1294
1546
  output.append(item)
1295
- if self.where_clause:
1296
- for item in self.where_clause.concept_arguments:
1297
- if item.purpose == Purpose.KEY:
1298
- output.append(item)
1299
- # elif item.purpose == Purpose.PROPERTY and item.grain:
1300
- # output += item.grain.components
1301
- # TODO: handle other grain cases
1302
- # new if block by design
1547
+ # if self.where_clause:
1548
+ # for item in self.where_clause.concept_arguments:
1549
+ # if item.purpose == Purpose.KEY:
1550
+ # output.append(item)
1551
+ # elif item.purpose == Purpose.PROPERTY and item.grain:
1552
+ # output += item.grain.components
1553
+ # TODO: handle other grain cases
1554
+ # new if block by design
1303
1555
  # add back any purpose that is not at the grain
1304
1556
  # if a query already has the key of the property in the grain
1305
1557
  # we want to group to that grain and ignore the property, which is a derivation
@@ -1388,11 +1640,10 @@ class AlignClause(Namespaced, BaseModel):
1388
1640
  return AlignClause(items=[x.with_namespace(namespace) for x in self.items])
1389
1641
 
1390
1642
 
1391
- class MultiSelectStatement(Namespaced, BaseModel):
1643
+ class MultiSelectStatement(SelectTypeMixin, Mergeable, Namespaced, BaseModel):
1392
1644
  selects: List[SelectStatement]
1393
1645
  align: AlignClause
1394
1646
  namespace: str
1395
- where_clause: Optional["WhereClause"] = None
1396
1647
  order_by: Optional[OrderBy] = None
1397
1648
  limit: Optional[int] = None
1398
1649
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
@@ -1418,6 +1669,28 @@ class MultiSelectStatement(Namespaced, BaseModel):
1418
1669
  output += self.where_clause.concept_arguments
1419
1670
  return unique(output, "address")
1420
1671
 
1672
+ def with_merge(
1673
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1674
+ ) -> "MultiSelectStatement":
1675
+ new = MultiSelectStatement(
1676
+ selects=[s.with_merge(source, target, modifiers) for s in self.selects],
1677
+ align=self.align,
1678
+ namespace=self.namespace,
1679
+ order_by=(
1680
+ self.order_by.with_merge(source, target, modifiers)
1681
+ if self.order_by
1682
+ else None
1683
+ ),
1684
+ limit=self.limit,
1685
+ meta=self.meta,
1686
+ where_clause=(
1687
+ self.where_clause.with_merge(source, target, modifiers)
1688
+ if self.where_clause
1689
+ else None
1690
+ ),
1691
+ )
1692
+ return new
1693
+
1421
1694
  def get_merge_concept(self, check: Concept):
1422
1695
  for item in self.align.items:
1423
1696
  if check in item.concepts_lcl:
@@ -1429,6 +1702,14 @@ class MultiSelectStatement(Namespaced, BaseModel):
1429
1702
  selects=[c.with_namespace(namespace) for c in self.selects],
1430
1703
  align=self.align.with_namespace(namespace),
1431
1704
  namespace=namespace,
1705
+ order_by=self.order_by.with_namespace(namespace) if self.order_by else None,
1706
+ limit=self.limit,
1707
+ meta=self.meta,
1708
+ where_clause=(
1709
+ self.where_clause.with_namespace(namespace)
1710
+ if self.where_clause
1711
+ else None
1712
+ ),
1432
1713
  )
1433
1714
 
1434
1715
  @property
@@ -1513,49 +1794,21 @@ def safe_grain(v) -> Grain:
1513
1794
  class DatasourceMetadata(BaseModel):
1514
1795
  freshness_concept: Concept | None
1515
1796
  partition_fields: List[Concept] = Field(default_factory=list)
1797
+ line_no: int | None = None
1516
1798
 
1517
1799
 
1518
- class MergeStatement(Namespaced, BaseModel):
1519
- concepts: List[Concept]
1520
- datatype: DataType | ListType | StructType | MapType | NumericType
1521
-
1522
- @cached_property
1523
- def concepts_lcl(self):
1524
- return LooseConceptList(concepts=self.concepts)
1525
-
1526
- @property
1527
- def merge_concept(self) -> Concept:
1528
- bridge_name = "_".join([c.safe_address for c in self.concepts])
1529
- return Concept(
1530
- name=f"__merge_{bridge_name}",
1531
- datatype=self.datatype,
1532
- purpose=Purpose.PROPERTY,
1533
- lineage=self,
1534
- keys=tuple(self.concepts),
1535
- )
1536
-
1537
- @property
1538
- def arguments(self) -> List[Concept]:
1539
- return self.concepts
1540
-
1541
- @property
1542
- def concept_arguments(self) -> List[Concept]:
1543
- return self.concepts
1544
-
1545
- def find_source(self, concept: Concept, cte: CTE) -> Concept:
1546
- for x in self.concepts:
1547
- for z in cte.output_columns:
1548
- if z.address == x.address:
1549
- return z
1550
- raise SyntaxError(
1551
- f"Could not find upstream map for multiselect {str(concept)} on cte ({cte.name})"
1552
- )
1800
+ class MergeStatementV2(Namespaced, BaseModel):
1801
+ source: Concept
1802
+ target: Concept
1803
+ modifiers: List[Modifier] = Field(default_factory=list)
1553
1804
 
1554
- def with_namespace(self, namespace: str) -> "MergeStatement":
1555
- return MergeStatement(
1556
- concepts=[c.with_namespace(namespace) for c in self.concepts],
1557
- datatype=self.datatype,
1805
+ def with_namespace(self, namespace: str) -> "MergeStatementV2":
1806
+ new = MergeStatementV2(
1807
+ source=self.source.with_namespace(namespace),
1808
+ target=self.target.with_namespace(namespace),
1809
+ modifiers=self.modifiers,
1558
1810
  )
1811
+ return new
1559
1812
 
1560
1813
 
1561
1814
  class Datasource(Namespaced, BaseModel):
@@ -1570,6 +1823,28 @@ class Datasource(Namespaced, BaseModel):
1570
1823
  default_factory=lambda: DatasourceMetadata(freshness_concept=None)
1571
1824
  )
1572
1825
 
1826
+ def merge_concept(
1827
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
1828
+ ):
1829
+ original = [c for c in self.columns if c.concept.address == source.address]
1830
+ # map to the alias with the modifier, and the original
1831
+ self.columns = [
1832
+ (
1833
+ c.with_merge(target, modifiers)
1834
+ if c.concept.address == source.address
1835
+ else c
1836
+ )
1837
+ for c in self.columns
1838
+ ] + original
1839
+ self.grain = self.grain.with_merge(source, target, modifiers)
1840
+ del self.output_lcl
1841
+
1842
+ @property
1843
+ def env_label(self) -> str:
1844
+ if not self.namespace or self.namespace == DEFAULT_NAMESPACE:
1845
+ return self.identifier
1846
+ return f"{self.namespace}.{self.identifier}"
1847
+
1573
1848
  @property
1574
1849
  def condition(self):
1575
1850
  return None
@@ -1733,6 +2008,7 @@ class BaseJoin(BaseModel):
1733
2008
  concepts: List[Concept]
1734
2009
  join_type: JoinType
1735
2010
  filter_to_mutual: bool = False
2011
+ concept_pairs: list[tuple[Concept, Concept]] | None = None
1736
2012
 
1737
2013
  def __init__(self, **data: Any):
1738
2014
  super().__init__(**data)
@@ -1742,10 +2018,21 @@ class BaseJoin(BaseModel):
1742
2018
  f" {self.right_datasource}"
1743
2019
  )
1744
2020
  final_concepts = []
2021
+
2022
+ # if we have a list of concept pairs
2023
+ if self.concept_pairs:
2024
+ return
2025
+
1745
2026
  for concept in self.concepts:
1746
2027
  include = True
1747
2028
  for ds in [self.left_datasource, self.right_datasource]:
1748
- if concept.address not in [c.address for c in ds.output_concepts]:
2029
+ synonyms = []
2030
+ for c in ds.output_concepts:
2031
+ synonyms += list(c.pseudonyms.keys())
2032
+ if (
2033
+ concept.address not in [c.address for c in ds.output_concepts]
2034
+ and concept.address not in synonyms
2035
+ ):
1749
2036
  if self.filter_to_mutual:
1750
2037
  include = False
1751
2038
  else:
@@ -1983,7 +2270,7 @@ class QueryDatasource(BaseModel):
1983
2270
  )
1984
2271
  # partial = "_".join([str(c.address).replace(".", "_") for c in self.partial_concepts])
1985
2272
  return (
1986
- "_join_".join([d.name for d in self.datasources])
2273
+ "_join_".join([d.full_name for d in self.datasources])
1987
2274
  + (f"_at_{grain}" if grain else "_at_abstract")
1988
2275
  + (f"_filtered_by_{filters}" if filters else "")
1989
2276
  # + (f"_partial_{partial}" if partial else "")
@@ -2075,6 +2362,9 @@ class CTE(BaseModel):
2075
2362
  if concept.address in self.source_map:
2076
2363
  removed = removed.union(self.source_map[concept.address])
2077
2364
  del self.source_map[concept.address]
2365
+
2366
+ if self.condition:
2367
+ self.condition = self.condition.inline_constant(concept)
2078
2368
  # if we've entirely removed the need to join to someplace to get the concept
2079
2369
  # drop the join as well.
2080
2370
  for removed_cte in removed:
@@ -2136,7 +2426,13 @@ class CTE(BaseModel):
2136
2426
  ds_being_inlined.name if x == parent.name else x for x in v
2137
2427
  ]
2138
2428
  elif v == parent.name:
2139
- self.source_map[k] = ds_being_inlined.name
2429
+ self.source_map[k] = [ds_being_inlined.name]
2430
+
2431
+ # zip in any required values for lookups
2432
+ for k in ds_being_inlined.output_lcl.addresses:
2433
+ if k in self.source_map and self.source_map[k]:
2434
+ continue
2435
+ self.source_map[k] = [ds_being_inlined.name]
2140
2436
  self.parent_ctes = [x for x in self.parent_ctes if x.name != parent.name]
2141
2437
  if force_group:
2142
2438
  self.group_to_grain = True
@@ -2237,6 +2533,45 @@ class CTE(BaseModel):
2237
2533
  except ValueError as e:
2238
2534
  return f"INVALID_ALIAS: {str(e)}"
2239
2535
 
2536
+ @property
2537
+ def group_concepts(self) -> List[Concept]:
2538
+ return (
2539
+ unique(
2540
+ self.grain.components
2541
+ + [
2542
+ c
2543
+ for c in self.output_columns
2544
+ if c.purpose in (Purpose.PROPERTY, Purpose.KEY)
2545
+ and c.address not in [x.address for x in self.grain.components]
2546
+ ]
2547
+ + [
2548
+ c
2549
+ for c in self.output_columns
2550
+ if c.purpose == Purpose.METRIC
2551
+ and (
2552
+ any(
2553
+ [
2554
+ c.with_grain(cte.grain) in cte.output_columns
2555
+ for cte in self.parent_ctes
2556
+ ]
2557
+ )
2558
+ # if we have this metric from a source
2559
+ # it isn't derived here and must be grouped on
2560
+ or len(self.source_map[c.address]) > 0
2561
+ )
2562
+ ]
2563
+ + [
2564
+ c
2565
+ for c in self.output_columns
2566
+ if c.purpose == Purpose.CONSTANT
2567
+ and self.source_map[c.address] != []
2568
+ ],
2569
+ "address",
2570
+ )
2571
+ if self.group_to_grain
2572
+ else []
2573
+ )
2574
+
2240
2575
  @property
2241
2576
  def render_from_clause(self) -> bool:
2242
2577
  if (
@@ -2292,6 +2627,7 @@ class Join(BaseModel):
2292
2627
  right_cte: CTE | Datasource
2293
2628
  jointype: JoinType
2294
2629
  joinkeys: List[JoinKey]
2630
+ joinkey_pairs: List[tuple[Concept, Concept]] | None = None
2295
2631
 
2296
2632
  @property
2297
2633
  def left_name(self) -> str:
@@ -2328,7 +2664,7 @@ class Join(BaseModel):
2328
2664
  )
2329
2665
 
2330
2666
 
2331
- class UndefinedConcept(Concept):
2667
+ class UndefinedConcept(Concept, Mergeable, Namespaced):
2332
2668
  model_config = ConfigDict(arbitrary_types_allowed=True)
2333
2669
  name: str
2334
2670
  environment: "EnvironmentConceptDict"
@@ -2336,6 +2672,34 @@ class UndefinedConcept(Concept):
2336
2672
  datatype: DataType = DataType.UNKNOWN
2337
2673
  purpose: Purpose = Purpose.KEY
2338
2674
 
2675
+ def with_merge(
2676
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
2677
+ ) -> "UndefinedConcept" | Concept:
2678
+ if self.address == source.address:
2679
+ new = target.with_grain(self.grain.with_merge(source, target, modifiers))
2680
+ new.pseudonyms[self.address] = self
2681
+ return new
2682
+ return self.__class__(
2683
+ name=self.name,
2684
+ datatype=self.datatype,
2685
+ purpose=self.purpose,
2686
+ metadata=self.metadata,
2687
+ lineage=(
2688
+ self.lineage.with_merge(source, target, modifiers)
2689
+ if self.lineage
2690
+ else None
2691
+ ),
2692
+ grain=self.grain.with_merge(source, target, modifiers),
2693
+ namespace=self.namespace,
2694
+ keys=(
2695
+ tuple(x.with_merge(source, target, modifiers) for x in self.keys)
2696
+ if self.keys
2697
+ else None
2698
+ ),
2699
+ environment=self.environment,
2700
+ line_no=self.line_no,
2701
+ )
2702
+
2339
2703
  def with_namespace(self, namespace: str) -> "UndefinedConcept":
2340
2704
  return self.__class__(
2341
2705
  name=self.name,
@@ -2354,14 +2718,18 @@ class UndefinedConcept(Concept):
2354
2718
  line_no=self.line_no,
2355
2719
  )
2356
2720
 
2357
- def with_select_grain(self, grain: Optional["Grain"] = None) -> "UndefinedConcept":
2721
+ def with_select_context(
2722
+ self,
2723
+ grain: Optional["Grain"] = None,
2724
+ conditional: Conditional | Comparison | Parenthetical | None = None,
2725
+ ) -> "UndefinedConcept":
2358
2726
  if not all([isinstance(x, Concept) for x in self.keys or []]):
2359
2727
  raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
2360
2728
  new_grain = grain or Grain(components=[])
2361
2729
  if self.lineage:
2362
2730
  new_lineage = self.lineage
2363
- if isinstance(self.lineage, SelectGrain):
2364
- new_lineage = self.lineage.with_select_grain(new_grain)
2731
+ if isinstance(self.lineage, SelectContext):
2732
+ new_lineage = self.lineage.with_select_context(new_grain, conditional)
2365
2733
  else:
2366
2734
  new_lineage = None
2367
2735
  return self.__class__(
@@ -2376,7 +2744,7 @@ class UndefinedConcept(Concept):
2376
2744
  environment=self.environment,
2377
2745
  )
2378
2746
 
2379
- def with_grain(self, grain: Optional["Grain"] = None) -> "Concept":
2747
+ def with_grain(self, grain: Optional["Grain"] = None) -> "UndefinedConcept":
2380
2748
  return self.__class__(
2381
2749
  name=self.name,
2382
2750
  datatype=self.datatype,
@@ -2390,7 +2758,7 @@ class UndefinedConcept(Concept):
2390
2758
  line_no=self.line_no,
2391
2759
  )
2392
2760
 
2393
- def with_default_grain(self) -> "Concept":
2761
+ def with_default_grain(self) -> "UndefinedConcept":
2394
2762
  if self.purpose == Purpose.KEY:
2395
2763
  # we need to make this abstract
2396
2764
  grain = Grain(components=[self.with_grain(Grain())], nested=True)
@@ -2424,6 +2792,21 @@ class UndefinedConcept(Concept):
2424
2792
  )
2425
2793
 
2426
2794
 
2795
+ class EnvironmentDatasourceDict(dict):
2796
+ def __init__(self, *args, **kwargs) -> None:
2797
+ super().__init__(self, *args, **kwargs)
2798
+
2799
+ def __getitem__(self, key: str) -> Datasource:
2800
+ try:
2801
+ return super(EnvironmentDatasourceDict, self).__getitem__(key)
2802
+ except KeyError:
2803
+ if DEFAULT_NAMESPACE + "." + key in self:
2804
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key)
2805
+ if "." in key and key.split(".")[0] == DEFAULT_NAMESPACE:
2806
+ return self.__getitem__(key.split(".")[1])
2807
+ raise
2808
+
2809
+
2427
2810
  class EnvironmentConceptDict(dict):
2428
2811
  def __init__(self, *args, **kwargs) -> None:
2429
2812
  super().__init__(self, *args, **kwargs)
@@ -2452,6 +2835,8 @@ class EnvironmentConceptDict(dict):
2452
2835
  if DEFAULT_NAMESPACE + "." + key in self:
2453
2836
  return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
2454
2837
  if not self.fail_on_missing:
2838
+ if key in self.undefined:
2839
+ return self.undefined[key]
2455
2840
  undefined = UndefinedConcept(
2456
2841
  name=key,
2457
2842
  line_no=line_no,
@@ -2475,7 +2860,7 @@ class EnvironmentConceptDict(dict):
2475
2860
  matches = difflib.get_close_matches(concept_name, self.keys())
2476
2861
  return matches
2477
2862
 
2478
- def items(self) -> ItemsView[str, Concept | UndefinedConcept]: # type: ignore
2863
+ def items(self) -> ItemsView[str, Concept]: # type: ignore
2479
2864
  return super().items()
2480
2865
 
2481
2866
 
@@ -2501,13 +2886,25 @@ def validate_concepts(v) -> EnvironmentConceptDict:
2501
2886
  raise ValueError
2502
2887
 
2503
2888
 
2889
+ def validate_datasources(v) -> EnvironmentDatasourceDict:
2890
+ if isinstance(v, EnvironmentDatasourceDict):
2891
+ return v
2892
+ elif isinstance(v, dict):
2893
+ return EnvironmentDatasourceDict(
2894
+ **{x: Datasource.model_validate(y) for x, y in v.items()}
2895
+ )
2896
+ raise ValueError
2897
+
2898
+
2504
2899
  class Environment(BaseModel):
2505
2900
  model_config = ConfigDict(arbitrary_types_allowed=True, strict=False)
2506
2901
 
2507
2902
  concepts: Annotated[EnvironmentConceptDict, PlainValidator(validate_concepts)] = (
2508
2903
  Field(default_factory=EnvironmentConceptDict)
2509
2904
  )
2510
- datasources: Dict[str, Datasource] = Field(default_factory=dict)
2905
+ datasources: Annotated[
2906
+ EnvironmentDatasourceDict, PlainValidator(validate_datasources)
2907
+ ] = Field(default_factory=EnvironmentDatasourceDict)
2511
2908
  functions: Dict[str, Function] = Field(default_factory=dict)
2512
2909
  data_types: Dict[str, DataType] = Field(default_factory=dict)
2513
2910
  imports: Dict[str, ImportStatement] = Field(default_factory=dict)
@@ -2518,7 +2915,7 @@ class Environment(BaseModel):
2518
2915
  cte_name_map: Dict[str, str] = Field(default_factory=dict)
2519
2916
 
2520
2917
  materialized_concepts: List[Concept] = Field(default_factory=list)
2521
- merged_concepts: Dict[str, Concept] = Field(default_factory=dict)
2918
+ alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
2522
2919
  _parse_count: int = 0
2523
2920
 
2524
2921
  @classmethod
@@ -2555,6 +2952,12 @@ class Environment(BaseModel):
2555
2952
  self.materialized_concepts = [
2556
2953
  c for c in self.concepts.values() if c.address in concrete_addresses
2557
2954
  ]
2955
+ # include aliased concepts
2956
+ self.materialized_concepts += [
2957
+ c
2958
+ for c in self.alias_origin_lookup.values()
2959
+ if c.address in concrete_addresses
2960
+ ]
2558
2961
  new = [
2559
2962
  x.address
2560
2963
  for x in self.materialized_concepts
@@ -2562,12 +2965,6 @@ class Environment(BaseModel):
2562
2965
  ]
2563
2966
  if new:
2564
2967
  logger.info(f"Environment added new materialized concepts {new}")
2565
- for concept in self.concepts.values():
2566
- if concept.derivation == PurposeLineage.MERGE:
2567
- ms = concept.lineage
2568
- assert isinstance(ms, MergeStatement)
2569
- for parent in ms.concepts:
2570
- self.merged_concepts[parent.address] = concept
2571
2968
 
2572
2969
  def validate_concept(self, lookup: str, meta: Meta | None = None):
2573
2970
  existing: Concept = self.concepts.get(lookup) # type: ignore
@@ -2710,13 +3107,8 @@ class Environment(BaseModel):
2710
3107
  datasource: Datasource,
2711
3108
  meta: Meta | None = None,
2712
3109
  ):
2713
- if not datasource.namespace or datasource.namespace == DEFAULT_NAMESPACE:
2714
- self.datasources[datasource.name] = datasource
2715
- self.gen_concept_list_caches()
2716
- return datasource
2717
- self.datasources[datasource.namespace + "." + datasource.identifier] = (
2718
- datasource
2719
- )
3110
+
3111
+ self.datasources[datasource.env_label] = datasource
2720
3112
  self.gen_concept_list_caches()
2721
3113
  return datasource
2722
3114
 
@@ -2731,6 +3123,22 @@ class Environment(BaseModel):
2731
3123
  return True
2732
3124
  return False
2733
3125
 
3126
+ def merge_concept(
3127
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
3128
+ ):
3129
+ replacements = {}
3130
+ self.alias_origin_lookup[source.address] = source
3131
+ for k, v in self.concepts.items():
3132
+ if v.address == target.address:
3133
+ v.pseudonyms[source.address] = source
3134
+ if v.address == source.address:
3135
+ replacements[k] = target
3136
+ self.concepts.update(replacements)
3137
+
3138
+ for k, ds in self.datasources.items():
3139
+ if source.address in ds.output_lcl:
3140
+ ds.merge_concept(source, target, modifiers=modifiers)
3141
+
2734
3142
 
2735
3143
  class LazyEnvironment(Environment):
2736
3144
  """Variant of environment to defer parsing of a path
@@ -2763,7 +3171,9 @@ class LazyEnvironment(Environment):
2763
3171
  return super().__getattribute__(name)
2764
3172
 
2765
3173
 
2766
- class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3174
+ class Comparison(
3175
+ ConceptArgs, Mergeable, Namespaced, ConstantInlineable, SelectContext, BaseModel
3176
+ ):
2767
3177
  left: Union[
2768
3178
  int,
2769
3179
  str,
@@ -2813,6 +3223,8 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2813
3223
  )
2814
3224
 
2815
3225
  def __add__(self, other):
3226
+ if other is None:
3227
+ return self
2816
3228
  if not isinstance(other, (Comparison, Conditional, Parenthetical)):
2817
3229
  raise ValueError("Cannot add Comparison to non-Comparison")
2818
3230
  if other == self:
@@ -2825,6 +3237,56 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2825
3237
  def __str__(self):
2826
3238
  return self.__repr__()
2827
3239
 
3240
+ def __eq__(self, other):
3241
+ if not isinstance(other, Comparison):
3242
+ return False
3243
+ return (
3244
+ self.left == other.left
3245
+ and self.right == other.right
3246
+ and self.operator == other.operator
3247
+ )
3248
+
3249
+ def inline_constant(self, constant: Concept) -> "Comparison":
3250
+ assert isinstance(constant.lineage, Function)
3251
+ new_val = constant.lineage.arguments[0]
3252
+ if isinstance(self.left, ConstantInlineable):
3253
+ new_left = self.left.inline_constant(constant)
3254
+ elif self.left == constant:
3255
+ new_left = new_val
3256
+ else:
3257
+ new_left = self.left
3258
+
3259
+ if isinstance(self.right, ConstantInlineable):
3260
+ new_right = self.right.inline_constant(constant)
3261
+ elif self.right == constant:
3262
+ new_right = new_val
3263
+ else:
3264
+ new_right = self.right
3265
+
3266
+ if self.right == constant:
3267
+ new_right = new_val
3268
+
3269
+ return Comparison(
3270
+ left=new_left,
3271
+ right=new_right,
3272
+ operator=self.operator,
3273
+ )
3274
+
3275
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3276
+ return self.__class__(
3277
+ left=(
3278
+ self.left.with_merge(source, target, modifiers)
3279
+ if isinstance(self.left, Mergeable)
3280
+ else self.left
3281
+ ),
3282
+ right=(
3283
+ self.right.with_merge(source, target, modifiers)
3284
+ if isinstance(self.right, Mergeable)
3285
+ else self.right
3286
+ ),
3287
+ operator=self.operator,
3288
+ )
3289
+
2828
3290
  def with_namespace(self, namespace: str):
2829
3291
  return self.__class__(
2830
3292
  left=(
@@ -2840,11 +3302,13 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2840
3302
  operator=self.operator,
2841
3303
  )
2842
3304
 
2843
- def with_select_grain(self, grain: Grain):
3305
+ def with_select_context(
3306
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3307
+ ):
2844
3308
  return self.__class__(
2845
3309
  left=(
2846
- self.left.with_select_grain(grain)
2847
- if isinstance(self.left, SelectGrain)
3310
+ self.left.with_select_context(grain, conditional)
3311
+ if isinstance(self.left, SelectContext)
2848
3312
  else self.left
2849
3313
  ),
2850
3314
  # the right side does NOT need to inherit select grain
@@ -2912,6 +3376,17 @@ class Comparison(ConceptArgs, Namespaced, SelectGrain, BaseModel):
2912
3376
 
2913
3377
  class SubselectComparison(Comparison):
2914
3378
 
3379
+ def __eq__(self, other):
3380
+ if not isinstance(other, SubselectComparison):
3381
+ return False
3382
+
3383
+ comp = (
3384
+ self.left == other.left
3385
+ and self.right == other.right
3386
+ and self.operator == other.operator
3387
+ )
3388
+ return comp
3389
+
2915
3390
  @property
2916
3391
  def row_arguments(self) -> List[Concept]:
2917
3392
  return get_concept_arguments(self.left)
@@ -2920,12 +3395,14 @@ class SubselectComparison(Comparison):
2920
3395
  def existence_arguments(self) -> list[tuple["Concept", ...]]:
2921
3396
  return [tuple(get_concept_arguments(self.right))]
2922
3397
 
2923
- def with_select_grain(self, grain: Grain):
3398
+ def with_select_context(
3399
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3400
+ ):
2924
3401
  # there's no need to pass the select grain through to a subselect comparison
2925
3402
  return self.__class__(
2926
3403
  left=(
2927
- self.left.with_select_grain(grain)
2928
- if isinstance(self.left, SelectGrain)
3404
+ self.left.with_select_context(grain, conditional)
3405
+ if isinstance(self.left, SelectContext)
2929
3406
  else self.left
2930
3407
  ),
2931
3408
  right=self.right,
@@ -2933,7 +3410,7 @@ class SubselectComparison(Comparison):
2933
3410
  )
2934
3411
 
2935
3412
 
2936
- class CaseWhen(Namespaced, SelectGrain, BaseModel):
3413
+ class CaseWhen(Namespaced, SelectContext, BaseModel):
2937
3414
  comparison: Conditional | SubselectComparison | Comparison
2938
3415
  expr: "Expr"
2939
3416
 
@@ -2954,18 +3431,20 @@ class CaseWhen(Namespaced, SelectGrain, BaseModel):
2954
3431
  ),
2955
3432
  )
2956
3433
 
2957
- def with_select_grain(self, grain: Grain) -> CaseWhen:
3434
+ def with_select_context(
3435
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3436
+ ) -> CaseWhen:
2958
3437
  return CaseWhen(
2959
- comparison=self.comparison.with_select_grain(grain),
3438
+ comparison=self.comparison.with_select_context(grain, conditional),
2960
3439
  expr=(
2961
- (self.expr.with_select_grain(grain))
2962
- if isinstance(self.expr, SelectGrain)
3440
+ (self.expr.with_select_context(grain, conditional))
3441
+ if isinstance(self.expr, SelectContext)
2963
3442
  else self.expr
2964
3443
  ),
2965
3444
  )
2966
3445
 
2967
3446
 
2968
- class CaseElse(Namespaced, SelectGrain, BaseModel):
3447
+ class CaseElse(Namespaced, SelectContext, BaseModel):
2969
3448
  expr: "Expr"
2970
3449
  # this ensures that it's easily differentiable from CaseWhen
2971
3450
  discriminant: ComparisonOperator = ComparisonOperator.ELSE
@@ -2974,14 +3453,16 @@ class CaseElse(Namespaced, SelectGrain, BaseModel):
2974
3453
  def concept_arguments(self):
2975
3454
  return get_concept_arguments(self.expr)
2976
3455
 
2977
- def with_select_grain(self, grain: Grain) -> CaseElse:
3456
+ def with_select_context(
3457
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3458
+ ) -> CaseElse:
2978
3459
  return CaseElse(
2979
3460
  discriminant=self.discriminant,
2980
3461
  expr=(
2981
- self.expr.with_select_grain(grain)
3462
+ self.expr.with_select_context(grain, conditional)
2982
3463
  if isinstance(
2983
3464
  self.expr,
2984
- SelectGrain,
3465
+ SelectContext,
2985
3466
  )
2986
3467
  else self.expr
2987
3468
  ),
@@ -3001,7 +3482,9 @@ class CaseElse(Namespaced, SelectGrain, BaseModel):
3001
3482
  )
3002
3483
 
3003
3484
 
3004
- class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3485
+ class Conditional(
3486
+ Mergeable, ConceptArgs, Namespaced, ConstantInlineable, SelectContext, BaseModel
3487
+ ):
3005
3488
  left: Union[
3006
3489
  int,
3007
3490
  str,
@@ -3047,7 +3530,43 @@ class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3047
3530
  def __repr__(self):
3048
3531
  return f"{str(self.left)} {self.operator.value} {str(self.right)}"
3049
3532
 
3050
- def with_namespace(self, namespace: str):
3533
+ def __eq__(self, other):
3534
+
3535
+ if not isinstance(other, Conditional):
3536
+ return False
3537
+ return (
3538
+ self.left == other.left
3539
+ and self.right == other.right
3540
+ and self.operator == other.operator
3541
+ )
3542
+
3543
+ def inline_constant(self, constant: Concept) -> "Conditional":
3544
+ assert isinstance(constant.lineage, Function)
3545
+ new_val = constant.lineage.arguments[0]
3546
+ if isinstance(self.left, ConstantInlineable):
3547
+ new_left = self.left.inline_constant(constant)
3548
+ elif self.left == constant:
3549
+ new_left = new_val
3550
+ else:
3551
+ new_left = self.left
3552
+
3553
+ if isinstance(self.right, ConstantInlineable):
3554
+ new_right = self.right.inline_constant(constant)
3555
+ elif self.right == constant:
3556
+ new_right = new_val
3557
+ else:
3558
+ new_right = self.right
3559
+
3560
+ if self.right == constant:
3561
+ new_right = new_val
3562
+
3563
+ return Conditional(
3564
+ left=new_left,
3565
+ right=new_right,
3566
+ operator=self.operator,
3567
+ )
3568
+
3569
+ def with_namespace(self, namespace: str) -> "Conditional":
3051
3570
  return Conditional(
3052
3571
  left=(
3053
3572
  self.left.with_namespace(namespace)
@@ -3062,16 +3581,35 @@ class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3062
3581
  operator=self.operator,
3063
3582
  )
3064
3583
 
3065
- def with_select_grain(self, grain: Grain):
3584
+ def with_merge(
3585
+ self, source: Concept, target: Concept, modifiers: List[Modifier]
3586
+ ) -> "Conditional":
3587
+ return Conditional(
3588
+ left=(
3589
+ self.left.with_merge(source, target, modifiers)
3590
+ if isinstance(self.left, Mergeable)
3591
+ else self.left
3592
+ ),
3593
+ right=(
3594
+ self.right.with_merge(source, target, modifiers)
3595
+ if isinstance(self.right, Mergeable)
3596
+ else self.right
3597
+ ),
3598
+ operator=self.operator,
3599
+ )
3600
+
3601
+ def with_select_context(
3602
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3603
+ ):
3066
3604
  return Conditional(
3067
3605
  left=(
3068
- self.left.with_select_grain(grain)
3069
- if isinstance(self.left, SelectGrain)
3606
+ self.left.with_select_context(grain, conditional)
3607
+ if isinstance(self.left, SelectContext)
3070
3608
  else self.left
3071
3609
  ),
3072
3610
  right=(
3073
- self.right.with_select_grain(grain)
3074
- if isinstance(self.right, SelectGrain)
3611
+ self.right.with_select_context(grain, conditional)
3612
+ if isinstance(self.right, SelectContext)
3075
3613
  else self.right
3076
3614
  ),
3077
3615
  operator=self.operator,
@@ -3134,7 +3672,7 @@ class Conditional(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3134
3672
  return chunks
3135
3673
 
3136
3674
 
3137
- class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
3675
+ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
3138
3676
  function: Function
3139
3677
  by: List[Concept] = Field(default_factory=list)
3140
3678
 
@@ -3162,21 +3700,34 @@ class AggregateWrapper(Namespaced, SelectGrain, BaseModel):
3162
3700
  def arguments(self):
3163
3701
  return self.function.arguments
3164
3702
 
3703
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3704
+ return AggregateWrapper(
3705
+ function=self.function.with_merge(source, target, modifiers=modifiers),
3706
+ by=(
3707
+ [c.with_merge(source, target, modifiers) for c in self.by]
3708
+ if self.by
3709
+ else []
3710
+ ),
3711
+ )
3712
+
3165
3713
  def with_namespace(self, namespace: str) -> "AggregateWrapper":
3166
3714
  return AggregateWrapper(
3167
3715
  function=self.function.with_namespace(namespace),
3168
3716
  by=[c.with_namespace(namespace) for c in self.by] if self.by else [],
3169
3717
  )
3170
3718
 
3171
- def with_select_grain(self, grain: Grain) -> AggregateWrapper:
3719
+ def with_select_context(
3720
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3721
+ ) -> AggregateWrapper:
3172
3722
  if not self.by:
3173
3723
  by = grain.components_copy
3174
3724
  else:
3175
3725
  by = self.by
3176
- return AggregateWrapper(function=self.function.with_select_grain(grain), by=by)
3726
+ parent = self.function.with_select_context(grain, conditional)
3727
+ return AggregateWrapper(function=parent, by=by)
3177
3728
 
3178
3729
 
3179
- class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3730
+ class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
3180
3731
  conditional: Union[SubselectComparison, Comparison, Conditional, "Parenthetical"]
3181
3732
 
3182
3733
  @property
@@ -3195,11 +3746,20 @@ class WhereClause(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3195
3746
  def existence_arguments(self) -> list[tuple["Concept", ...]]:
3196
3747
  return self.conditional.existence_arguments
3197
3748
 
3749
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3750
+ return WhereClause(
3751
+ conditional=self.conditional.with_merge(source, target, modifiers)
3752
+ )
3753
+
3198
3754
  def with_namespace(self, namespace: str) -> WhereClause:
3199
3755
  return WhereClause(conditional=self.conditional.with_namespace(namespace))
3200
3756
 
3201
- def with_select_grain(self, grain: Grain) -> WhereClause:
3202
- return WhereClause(conditional=self.conditional.with_select_grain(grain))
3757
+ def with_select_context(
3758
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3759
+ ) -> WhereClause:
3760
+ return WhereClause(
3761
+ conditional=self.conditional.with_select_context(grain, conditional)
3762
+ )
3203
3763
 
3204
3764
  @property
3205
3765
  def grain(self) -> Grain:
@@ -3327,7 +3887,7 @@ class RowsetDerivationStatement(Namespaced, BaseModel):
3327
3887
  )
3328
3888
 
3329
3889
 
3330
- class RowsetItem(Namespaced, BaseModel):
3890
+ class RowsetItem(Mergeable, Namespaced, BaseModel):
3331
3891
  content: Concept
3332
3892
  rowset: RowsetDerivationStatement
3333
3893
  where: Optional["WhereClause"] = None
@@ -3337,6 +3897,15 @@ class RowsetItem(Namespaced, BaseModel):
3337
3897
  f"<Rowset<{self.rowset.name}>: {str(self.content)} where {str(self.where)}>"
3338
3898
  )
3339
3899
 
3900
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3901
+ return RowsetItem(
3902
+ content=self.content.with_merge(source, target, modifiers),
3903
+ rowset=self.rowset,
3904
+ where=(
3905
+ self.where.with_merge(source, target, modifiers) if self.where else None
3906
+ ),
3907
+ )
3908
+
3340
3909
  def with_namespace(self, namespace: str) -> "RowsetItem":
3341
3910
  return RowsetItem(
3342
3911
  content=self.content.with_namespace(namespace),
@@ -3386,7 +3955,9 @@ class RowsetItem(Namespaced, BaseModel):
3386
3955
  return [self.content]
3387
3956
 
3388
3957
 
3389
- class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3958
+ class Parenthetical(
3959
+ ConceptArgs, Mergeable, Namespaced, ConstantInlineable, SelectContext, BaseModel
3960
+ ):
3390
3961
  content: "Expr"
3391
3962
 
3392
3963
  def __str__(self):
@@ -3411,11 +3982,31 @@ class Parenthetical(ConceptArgs, Namespaced, SelectGrain, BaseModel):
3411
3982
  )
3412
3983
  )
3413
3984
 
3414
- def with_select_grain(self, grain: Grain):
3985
+ def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
3986
+ return Parenthetical(
3987
+ content=(
3988
+ self.content.with_merge(source, target, modifiers)
3989
+ if isinstance(self.content, Mergeable)
3990
+ else self.content
3991
+ )
3992
+ )
3993
+
3994
+ def with_select_context(
3995
+ self, grain: Grain, conditional: Conditional | Comparison | Parenthetical | None
3996
+ ):
3997
+ return Parenthetical(
3998
+ content=(
3999
+ self.content.with_select_context(grain, conditional)
4000
+ if isinstance(self.content, SelectContext)
4001
+ else self.content
4002
+ )
4003
+ )
4004
+
4005
+ def inline_constant(self, concept: Concept):
3415
4006
  return Parenthetical(
3416
4007
  content=(
3417
- self.content.with_select_grain(grain)
3418
- if isinstance(self.content, SelectGrain)
4008
+ self.content.inline_constant(concept)
4009
+ if isinstance(self.content, ConstantInlineable)
3419
4010
  else self.content
3420
4011
  )
3421
4012
  )