pytrilogy 0.0.2.47__py3-none-any.whl → 0.0.2.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (69) hide show
  1. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.48.dist-info}/METADATA +1 -1
  2. pytrilogy-0.0.2.48.dist-info/RECORD +85 -0
  3. trilogy/__init__.py +2 -2
  4. trilogy/constants.py +4 -2
  5. trilogy/core/enums.py +7 -1
  6. trilogy/core/env_processor.py +1 -2
  7. trilogy/core/environment_helpers.py +5 -5
  8. trilogy/core/functions.py +11 -10
  9. trilogy/core/internal.py +2 -3
  10. trilogy/core/models.py +444 -392
  11. trilogy/core/optimization.py +37 -21
  12. trilogy/core/optimizations/__init__.py +1 -1
  13. trilogy/core/optimizations/base_optimization.py +6 -6
  14. trilogy/core/optimizations/inline_constant.py +7 -4
  15. trilogy/core/optimizations/inline_datasource.py +14 -5
  16. trilogy/core/optimizations/predicate_pushdown.py +20 -10
  17. trilogy/core/processing/concept_strategies_v3.py +40 -24
  18. trilogy/core/processing/graph_utils.py +2 -3
  19. trilogy/core/processing/node_generators/__init__.py +7 -5
  20. trilogy/core/processing/node_generators/basic_node.py +4 -4
  21. trilogy/core/processing/node_generators/common.py +10 -11
  22. trilogy/core/processing/node_generators/filter_node.py +7 -9
  23. trilogy/core/processing/node_generators/group_node.py +10 -11
  24. trilogy/core/processing/node_generators/group_to_node.py +5 -5
  25. trilogy/core/processing/node_generators/multiselect_node.py +10 -12
  26. trilogy/core/processing/node_generators/node_merge_node.py +7 -9
  27. trilogy/core/processing/node_generators/rowset_node.py +9 -8
  28. trilogy/core/processing/node_generators/select_merge_node.py +11 -10
  29. trilogy/core/processing/node_generators/select_node.py +5 -5
  30. trilogy/core/processing/node_generators/union_node.py +75 -0
  31. trilogy/core/processing/node_generators/unnest_node.py +2 -3
  32. trilogy/core/processing/node_generators/window_node.py +3 -4
  33. trilogy/core/processing/nodes/__init__.py +9 -5
  34. trilogy/core/processing/nodes/base_node.py +17 -13
  35. trilogy/core/processing/nodes/filter_node.py +3 -4
  36. trilogy/core/processing/nodes/group_node.py +8 -10
  37. trilogy/core/processing/nodes/merge_node.py +11 -11
  38. trilogy/core/processing/nodes/select_node_v2.py +8 -9
  39. trilogy/core/processing/nodes/union_node.py +50 -0
  40. trilogy/core/processing/nodes/unnest_node.py +2 -3
  41. trilogy/core/processing/nodes/window_node.py +2 -3
  42. trilogy/core/processing/utility.py +37 -40
  43. trilogy/core/query_processor.py +68 -44
  44. trilogy/dialect/base.py +95 -53
  45. trilogy/dialect/bigquery.py +2 -3
  46. trilogy/dialect/common.py +5 -4
  47. trilogy/dialect/config.py +0 -2
  48. trilogy/dialect/duckdb.py +2 -2
  49. trilogy/dialect/enums.py +5 -5
  50. trilogy/dialect/postgres.py +2 -2
  51. trilogy/dialect/presto.py +3 -4
  52. trilogy/dialect/snowflake.py +2 -2
  53. trilogy/dialect/sql_server.py +3 -4
  54. trilogy/engine.py +2 -1
  55. trilogy/executor.py +43 -30
  56. trilogy/hooks/base_hook.py +5 -4
  57. trilogy/hooks/graph_hook.py +2 -1
  58. trilogy/hooks/query_debugger.py +18 -8
  59. trilogy/parsing/common.py +15 -20
  60. trilogy/parsing/parse_engine.py +124 -88
  61. trilogy/parsing/render.py +32 -35
  62. trilogy/parsing/trilogy.lark +8 -1
  63. trilogy/scripts/trilogy.py +6 -4
  64. trilogy/utility.py +1 -1
  65. pytrilogy-0.0.2.47.dist-info/RECORD +0 -83
  66. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.48.dist-info}/LICENSE.md +0 -0
  67. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.48.dist-info}/WHEEL +0 -0
  68. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.48.dist-info}/entry_points.txt +0 -0
  69. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.48.dist-info}/top_level.txt +0 -0
trilogy/core/models.py CHANGED
@@ -1,82 +1,86 @@
1
1
  from __future__ import annotations
2
+
2
3
  import difflib
4
+ import hashlib
3
5
  import os
6
+ from abc import ABC
7
+ from collections import UserDict, UserList, defaultdict
4
8
  from enum import Enum
9
+ from functools import cached_property
10
+ from pathlib import Path
5
11
  from typing import (
12
+ Annotated,
13
+ Any,
14
+ Callable,
6
15
  Dict,
7
- TypeVar,
16
+ Generic,
17
+ ItemsView,
8
18
  List,
19
+ Never,
9
20
  Optional,
10
- Union,
11
- Set,
12
- Any,
21
+ Self,
13
22
  Sequence,
14
- ValuesView,
15
- Callable,
16
- Annotated,
17
- get_args,
18
- Generic,
23
+ Set,
19
24
  Tuple,
20
25
  Type,
21
- ItemsView,
26
+ TypeVar,
27
+ Union,
28
+ ValuesView,
29
+ get_args,
22
30
  )
23
- from pydantic_core import core_schema
24
- from pydantic.functional_validators import PlainValidator
31
+
32
+ from lark.tree import Meta
25
33
  from pydantic import (
26
34
  BaseModel,
27
- Field,
28
35
  ConfigDict,
29
- field_validator,
36
+ Field,
30
37
  ValidationInfo,
31
38
  ValidatorFunctionWrapHandler,
32
39
  computed_field,
40
+ field_validator,
33
41
  )
34
- from lark.tree import Meta
35
- from pathlib import Path
42
+ from pydantic.functional_validators import PlainValidator
43
+ from pydantic_core import core_schema
44
+
36
45
  from trilogy.constants import (
37
- logger,
46
+ CONFIG,
38
47
  DEFAULT_NAMESPACE,
39
48
  ENV_CACHE_NAME,
40
49
  MagicConstants,
41
- CONFIG,
50
+ logger,
42
51
  )
43
52
  from trilogy.core.constants import (
44
53
  ALL_ROWS_CONCEPT,
45
- INTERNAL_NAMESPACE,
46
54
  CONSTANT_DATASET,
55
+ INTERNAL_NAMESPACE,
47
56
  PERSISTED_CONCEPT_PREFIX,
48
57
  )
49
58
  from trilogy.core.enums import (
50
- InfiniteFunctionArgs,
51
- Purpose,
52
- JoinType,
53
- Ordering,
54
- Modifier,
55
- FunctionType,
56
- FunctionClass,
57
59
  BooleanOperator,
58
60
  ComparisonOperator,
59
- WindowOrder,
60
- PurposeLineage,
61
- SourceType,
62
- WindowType,
63
61
  ConceptSource,
64
62
  DatePart,
65
- ShowCategory,
63
+ FunctionClass,
64
+ FunctionType,
66
65
  Granularity,
67
- SelectFiltering,
66
+ InfiniteFunctionArgs,
68
67
  IOType,
68
+ JoinType,
69
+ Modifier,
70
+ Ordering,
71
+ Purpose,
72
+ PurposeLineage,
73
+ SelectFiltering,
74
+ ShowCategory,
75
+ SourceType,
76
+ WindowOrder,
77
+ WindowType,
69
78
  )
70
79
  from trilogy.core.exceptions import (
71
- UndefinedConceptException,
72
80
  InvalidSyntaxException,
81
+ UndefinedConceptException,
73
82
  )
74
83
  from trilogy.utility import unique
75
- from collections import UserList, UserDict
76
- from functools import cached_property
77
- from abc import ABC
78
- from collections import defaultdict
79
- import hashlib
80
84
 
81
85
  LOGGER_PREFIX = "[MODELS]"
82
86
 
@@ -152,19 +156,19 @@ NAMESPACED_TYPES = Union[
152
156
 
153
157
 
154
158
  class Namespaced(ABC):
155
-
156
159
  def with_namespace(self, namespace: str):
157
160
  raise NotImplementedError
158
161
 
159
162
 
160
163
  class Mergeable(ABC):
161
-
162
164
  def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
163
165
  raise NotImplementedError
164
166
 
167
+ def hydrate_missing(self, concepts: EnvironmentConceptDict):
168
+ return self
165
169
 
166
- class ConceptArgs(ABC):
167
170
 
171
+ class ConceptArgs(ABC):
168
172
  @property
169
173
  def concept_arguments(self) -> List["Concept"]:
170
174
  raise NotImplementedError
@@ -179,13 +183,12 @@ class ConceptArgs(ABC):
179
183
 
180
184
 
181
185
  class SelectContext(ABC):
182
-
183
186
  def with_select_context(
184
187
  self,
188
+ local_concepts: dict[str, Concept],
185
189
  grain: Grain,
186
- conditional: Conditional | Comparison | Parenthetical | None,
187
- environment: Environment | None = None,
188
- ):
190
+ environment: Environment,
191
+ ) -> Any:
189
192
  raise NotImplementedError
190
193
 
191
194
 
@@ -195,7 +198,6 @@ class ConstantInlineable(ABC):
195
198
 
196
199
 
197
200
  class HasUUID(ABC):
198
-
199
201
  @property
200
202
  def uuid(self) -> str:
201
203
  return hashlib.md5(str(self).encode()).hexdigest()
@@ -456,8 +458,17 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
456
458
  pseudonyms: set[str] = Field(default_factory=set)
457
459
  _address_cache: str | None = None
458
460
 
461
+ def duplicate(self) -> Concept:
462
+ return self.model_copy(deep=True)
463
+
459
464
  def __hash__(self):
460
- return hash(str(self))
465
+ return hash(
466
+ f"{self.name}+{self.datatype}+ {self.purpose} + {str(self.lineage)} + {self.namespace} + {str(self.grain)} + {str(self.keys)}"
467
+ )
468
+
469
+ def __repr__(self):
470
+ base = f"{self.namespace}.{self.address}@{self.grain}"
471
+ return base
461
472
 
462
473
  @property
463
474
  def is_aggregate(self):
@@ -476,7 +487,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
476
487
  return True
477
488
  return False
478
489
 
479
- def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
490
+ def with_merge(self, source: Self, target: Self, modifiers: List[Modifier]) -> Self:
480
491
  if self.address == source.address:
481
492
  new = target.with_grain(self.grain.with_merge(source, target, modifiers))
482
493
  new.pseudonyms.add(self.address)
@@ -556,11 +567,13 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
556
567
  v = Grain(components=values["lineage"].by)
557
568
  elif not v:
558
569
  v = Grain(components=[])
570
+ elif isinstance(v, Grain):
571
+ return v
559
572
  elif isinstance(v, Concept):
560
573
  v = Grain(components=[v])
561
574
  elif isinstance(v, dict):
562
575
  v = Grain.model_validate(v)
563
- if not v:
576
+ else:
564
577
  raise SyntaxError(f"Invalid grain {v} for concept {values['name']}")
565
578
  return v
566
579
 
@@ -610,7 +623,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
610
623
  def grain_components(self) -> List["Concept"]:
611
624
  return self.grain.components_copy if self.grain else []
612
625
 
613
- def with_namespace(self, namespace: str) -> "Concept":
626
+ def with_namespace(self, namespace: str) -> Self:
614
627
  if namespace == self.namespace:
615
628
  return self
616
629
  return self.__class__(
@@ -641,33 +654,44 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
641
654
  )
642
655
 
643
656
  def with_select_context(
644
- self,
645
- grain: Optional["Grain"] = None,
646
- conditional: Conditional | Comparison | Parenthetical | None = None,
647
- environment: Environment | None = None,
648
- ) -> "Concept":
649
- if not all([isinstance(x, Concept) for x in self.keys or []]):
650
- raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
651
- new_grain = grain or self.grain
657
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
658
+ ) -> Concept:
652
659
  new_lineage = self.lineage
653
660
  if isinstance(self.lineage, SelectContext):
654
661
  new_lineage = self.lineage.with_select_context(
655
- new_grain, conditional, environment=environment
662
+ local_concepts=local_concepts, grain=grain, environment=environment
656
663
  )
664
+ final_grain = self.grain
665
+
666
+ if self.is_aggregate and isinstance(new_lineage, Function):
667
+ new_lineage = AggregateWrapper(function=new_lineage, by=grain.components)
668
+ final_grain = grain
669
+
657
670
  return self.__class__(
658
671
  name=self.name,
659
672
  datatype=self.datatype,
660
673
  purpose=self.purpose,
661
674
  metadata=self.metadata,
662
675
  lineage=new_lineage,
663
- grain=new_grain,
676
+ grain=final_grain,
664
677
  namespace=self.namespace,
665
- keys=self.keys,
678
+ keys=(
679
+ tuple(
680
+ [
681
+ x.with_select_context(local_concepts, grain, environment)
682
+ for x in self.keys
683
+ ]
684
+ )
685
+ if self.keys
686
+ else None
687
+ ),
666
688
  modifiers=self.modifiers,
667
- pseudonyms=self.pseudonyms,
689
+ # a select needs to always defer to the environment for pseudonyms
690
+ # TODO: evaluate if this should be cached
691
+ pseudonyms=(environment.concepts.get(self.address) or self).pseudonyms,
668
692
  )
669
693
 
670
- def with_grain(self, grain: Optional["Grain"] = None) -> "Concept":
694
+ def with_grain(self, grain: Optional["Grain"] = None) -> Self:
671
695
  if not all([isinstance(x, Concept) for x in self.keys or []]):
672
696
  raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
673
697
  return self.__class__(
@@ -683,8 +707,8 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
683
707
  pseudonyms=self.pseudonyms,
684
708
  )
685
709
 
686
- @cached_property
687
- def _with_default_grain(self) -> "Concept":
710
+ @property
711
+ def _with_default_grain(self) -> Self:
688
712
  if self.purpose == Purpose.KEY:
689
713
  # we need to make this abstract
690
714
  grain = Grain(components=[self.with_grain(Grain())], nested=True)
@@ -788,6 +812,12 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
788
812
  and self.lineage.operator == FunctionType.UNNEST
789
813
  ):
790
814
  return PurposeLineage.UNNEST
815
+ elif (
816
+ self.lineage
817
+ and isinstance(self.lineage, Function)
818
+ and self.lineage.operator == FunctionType.UNION
819
+ ):
820
+ return PurposeLineage.UNION
791
821
  elif (
792
822
  self.lineage
793
823
  and isinstance(self.lineage, Function)
@@ -827,7 +857,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
827
857
  elif (
828
858
  self.lineage
829
859
  and isinstance(self.lineage, Function)
830
- and self.lineage.operator == FunctionType.UNNEST
860
+ and self.lineage.operator in (FunctionType.UNNEST, FunctionType.UNION)
831
861
  ):
832
862
  return Granularity.MULTI_ROW
833
863
  elif self.lineage and all(
@@ -836,6 +866,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
836
866
  for x in self.lineage.concept_arguments
837
867
  ]
838
868
  ):
869
+
839
870
  return Granularity.SINGLE_ROW
840
871
  return Granularity.MULTI_ROW
841
872
 
@@ -867,7 +898,15 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
867
898
  return new
868
899
 
869
900
 
870
- class Grain(Mergeable, BaseModel):
901
+ class ConceptRef(BaseModel):
902
+ address: str
903
+ line_no: int
904
+
905
+ def hydrate(self, environment: Environment) -> Concept:
906
+ return environment.concepts.__getitem__(self.address, self.line_no)
907
+
908
+
909
+ class Grain(Mergeable, BaseModel, SelectContext):
871
910
  nested: bool = False
872
911
  components: List[Concept] = Field(default_factory=list, validate_default=True)
873
912
  where_clause: Optional[WhereClause] = Field(default=None)
@@ -890,6 +929,20 @@ class Grain(Mergeable, BaseModel):
890
929
  v2 = sorted(final, key=lambda x: x.name)
891
930
  return v2
892
931
 
932
+ def with_select_context(
933
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
934
+ ):
935
+ if self.nested:
936
+ return self
937
+ return Grain(
938
+ components=[
939
+ x.with_select_context(local_concepts, grain, environment)
940
+ for x in self.components
941
+ ],
942
+ where_clause=self.where_clause,
943
+ nested=self.nested,
944
+ )
945
+
893
946
  def with_filter(
894
947
  self,
895
948
  condition: "Conditional | Comparison | Parenthetical",
@@ -949,7 +1002,7 @@ class Grain(Mergeable, BaseModel):
949
1002
  base.append(c)
950
1003
  return set(base)
951
1004
 
952
- @cached_property
1005
+ @property
953
1006
  def set(self) -> set[str]:
954
1007
  base = []
955
1008
  for x in self.components_copy:
@@ -1016,6 +1069,89 @@ class Grain(Mergeable, BaseModel):
1016
1069
  return self.__add__(other)
1017
1070
 
1018
1071
 
1072
+ class EnvironmentConceptDict(dict):
1073
+ def __init__(self, *args, **kwargs) -> None:
1074
+ super().__init__(self, *args, **kwargs)
1075
+ self.undefined: dict[str, UndefinedConcept] = {}
1076
+ self.fail_on_missing: bool = True
1077
+ self.populate_default_concepts()
1078
+
1079
+ def duplicate(self) -> "EnvironmentConceptDict":
1080
+ new = EnvironmentConceptDict()
1081
+ new.update({k: v.duplicate() for k, v in self.items()})
1082
+ new.undefined = self.undefined
1083
+ new.fail_on_missing = self.fail_on_missing
1084
+ return new
1085
+
1086
+ def populate_default_concepts(self):
1087
+ from trilogy.core.internal import DEFAULT_CONCEPTS
1088
+
1089
+ for concept in DEFAULT_CONCEPTS.values():
1090
+ self[concept.address] = concept
1091
+
1092
+ def values(self) -> ValuesView[Concept]: # type: ignore
1093
+ return super().values()
1094
+
1095
+ def get(self, key: str, default: Concept | None = None) -> Concept | None: # type: ignore
1096
+ try:
1097
+ return self.__getitem__(key)
1098
+ except UndefinedConceptException:
1099
+ return default
1100
+
1101
+ def raise_undefined(
1102
+ self, key: str, line_no: int | None = None, file: Path | str | None = None
1103
+ ) -> Never:
1104
+ matches = self._find_similar_concepts(key)
1105
+ message = f"Undefined concept: {key}."
1106
+ if matches:
1107
+ message += f" Suggestions: {matches}"
1108
+
1109
+ if line_no:
1110
+ if file:
1111
+ raise UndefinedConceptException(
1112
+ f"{file}: {line_no}: " + message, matches
1113
+ )
1114
+ raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
1115
+ raise UndefinedConceptException(message, matches)
1116
+
1117
+ def __getitem__(
1118
+ self, key: str, line_no: int | None = None, file: Path | None = None
1119
+ ) -> Concept | UndefinedConcept:
1120
+ try:
1121
+ return super(EnvironmentConceptDict, self).__getitem__(key)
1122
+ except KeyError:
1123
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
1124
+ return self.__getitem__(key.split(".", 1)[1], line_no)
1125
+ if DEFAULT_NAMESPACE + "." + key in self:
1126
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
1127
+ if not self.fail_on_missing:
1128
+ if key in self.undefined:
1129
+ return self.undefined[key]
1130
+ undefined = UndefinedConcept(
1131
+ name=key,
1132
+ line_no=line_no,
1133
+ datatype=DataType.UNKNOWN,
1134
+ purpose=Purpose.UNKNOWN,
1135
+ )
1136
+ self.undefined[key] = undefined
1137
+ return undefined
1138
+ self.raise_undefined(key, line_no, file)
1139
+
1140
+ def _find_similar_concepts(self, concept_name: str):
1141
+ def strip_local(input: str):
1142
+ if input.startswith(f"{DEFAULT_NAMESPACE}."):
1143
+ return input[len(DEFAULT_NAMESPACE) + 1 :]
1144
+ return input
1145
+
1146
+ matches = difflib.get_close_matches(
1147
+ strip_local(concept_name), [strip_local(x) for x in self.keys()]
1148
+ )
1149
+ return matches
1150
+
1151
+ def items(self) -> ItemsView[str, Concept]: # type: ignore
1152
+ return super().items()
1153
+
1154
+
1019
1155
  class RawColumnExpr(BaseModel):
1020
1156
  text: str
1021
1157
 
@@ -1135,6 +1271,7 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
1135
1271
  "CaseElse",
1136
1272
  list,
1137
1273
  ListWrapper[Any],
1274
+ WindowItem,
1138
1275
  ]
1139
1276
  ]
1140
1277
 
@@ -1149,41 +1286,13 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
1149
1286
  return self.output_datatype
1150
1287
 
1151
1288
  def with_select_context(
1152
- self,
1153
- grain: Grain,
1154
- conditional: Conditional | Comparison | Parenthetical | None,
1155
- environment: Environment | None = None,
1289
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1156
1290
  ) -> Function:
1157
- if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value and conditional:
1158
- base = [
1159
- (
1160
- c.with_select_context(grain, conditional, environment)
1161
- if isinstance(
1162
- c,
1163
- SelectContext,
1164
- )
1165
- else c
1166
- )
1167
- for c in self.arguments
1168
- ]
1169
- final = [
1170
- c.with_filter(conditional, environment) if isinstance(c, Concept) else c
1171
- for c in base
1172
- ]
1173
- return Function(
1174
- operator=self.operator,
1175
- arguments=final,
1176
- output_datatype=self.output_datatype,
1177
- output_purpose=self.output_purpose,
1178
- valid_inputs=self.valid_inputs,
1179
- arg_count=self.arg_count,
1180
- )
1181
-
1182
- return Function(
1291
+ base = Function(
1183
1292
  operator=self.operator,
1184
1293
  arguments=[
1185
1294
  (
1186
- c.with_select_context(grain, conditional, environment)
1295
+ c.with_select_context(local_concepts, grain, environment)
1187
1296
  if isinstance(
1188
1297
  c,
1189
1298
  SelectContext,
@@ -1197,6 +1306,7 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
1197
1306
  valid_inputs=self.valid_inputs,
1198
1307
  arg_count=self.arg_count,
1199
1308
  )
1309
+ return base
1200
1310
 
1201
1311
  @field_validator("arguments")
1202
1312
  @classmethod
@@ -1404,20 +1514,19 @@ class WindowItem(Mergeable, Namespaced, SelectContext, BaseModel):
1404
1514
  )
1405
1515
 
1406
1516
  def with_select_context(
1407
- self,
1408
- grain: Grain,
1409
- conditional: Conditional | Comparison | Parenthetical | None,
1410
- environment: Environment | None = None,
1517
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1411
1518
  ) -> "WindowItem":
1412
1519
  return WindowItem(
1413
1520
  type=self.type,
1414
- content=self.content.with_select_context(grain, conditional, environment),
1521
+ content=self.content.with_select_context(
1522
+ local_concepts, grain, environment
1523
+ ),
1415
1524
  over=[
1416
- x.with_select_context(grain, conditional, environment)
1525
+ x.with_select_context(local_concepts, grain, environment)
1417
1526
  for x in self.over
1418
1527
  ],
1419
1528
  order_by=[
1420
- x.with_select_context(grain, conditional, environment)
1529
+ x.with_select_context(local_concepts, grain, environment)
1421
1530
  for x in self.order_by
1422
1531
  ],
1423
1532
  index=self.index,
@@ -1489,14 +1598,13 @@ class FilterItem(Namespaced, SelectContext, BaseModel):
1489
1598
  )
1490
1599
 
1491
1600
  def with_select_context(
1492
- self,
1493
- grain: Grain,
1494
- conditional: Conditional | Comparison | Parenthetical | None,
1495
- environment: Environment | None = None,
1601
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1496
1602
  ) -> FilterItem:
1497
1603
  return FilterItem(
1498
- content=self.content.with_select_context(grain, conditional, environment),
1499
- where=self.where.with_select_context(grain, conditional, environment),
1604
+ content=self.content.with_select_context(
1605
+ local_concepts, grain, environment
1606
+ ),
1607
+ where=self.where.with_select_context(local_concepts, grain, environment),
1500
1608
  )
1501
1609
 
1502
1610
  @property
@@ -1576,14 +1684,11 @@ class OrderItem(Mergeable, SelectContext, Namespaced, BaseModel):
1576
1684
  return OrderItem(expr=self.expr.with_namespace(namespace), order=self.order)
1577
1685
 
1578
1686
  def with_select_context(
1579
- self,
1580
- grain: Grain,
1581
- conditional: Conditional | Comparison | Parenthetical | None,
1582
- environment: Environment | None = None,
1687
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1583
1688
  ) -> "OrderItem":
1584
1689
  return OrderItem(
1585
1690
  expr=self.expr.with_select_context(
1586
- grain, conditional=conditional, environment=environment
1691
+ local_concepts, grain, environment=environment
1587
1692
  ),
1588
1693
  order=self.order,
1589
1694
  )
@@ -1604,7 +1709,7 @@ class OrderItem(Mergeable, SelectContext, Namespaced, BaseModel):
1604
1709
  return self.expr.output
1605
1710
 
1606
1711
 
1607
- class OrderBy(Mergeable, Namespaced, BaseModel):
1712
+ class OrderBy(SelectContext, Mergeable, Namespaced, BaseModel):
1608
1713
  items: List[OrderItem]
1609
1714
 
1610
1715
  def with_namespace(self, namespace: str) -> "OrderBy":
@@ -1617,6 +1722,14 @@ class OrderBy(Mergeable, Namespaced, BaseModel):
1617
1722
  items=[x.with_merge(source, target, modifiers) for x in self.items]
1618
1723
  )
1619
1724
 
1725
+ def with_select_context(self, local_concepts, grain, environment):
1726
+ return OrderBy(
1727
+ items=[
1728
+ x.with_select_context(local_concepts, grain, environment)
1729
+ for x in self.items
1730
+ ]
1731
+ )
1732
+
1620
1733
  @property
1621
1734
  def concept_arguments(self):
1622
1735
  return [x.expr for x in self.items]
@@ -1631,20 +1744,21 @@ class SelectStatement(HasUUID, Mergeable, Namespaced, SelectTypeMixin, BaseModel
1631
1744
  selection: List[SelectItem]
1632
1745
  order_by: Optional[OrderBy] = None
1633
1746
  limit: Optional[int] = None
1634
- meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1635
-
1636
- def refresh_bindings(self, environment: Environment):
1637
- for item in self.selection:
1638
- if isinstance(item.content, Concept):
1639
- item.content = environment.concepts[item.content.address].with_grain(
1640
- self.grain
1641
- )
1747
+ meta: Metadata = Field(default_factory=lambda: Metadata())
1748
+ local_concepts: Annotated[
1749
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
1750
+ ] = Field(default_factory=EnvironmentConceptDict)
1642
1751
 
1643
- def validate_syntax(self):
1752
+ def validate_syntax(self, environment: Environment):
1753
+ if self.where_clause:
1754
+ for x in self.where_clause.concept_arguments:
1755
+ if isinstance(x, UndefinedConcept):
1756
+ environment.concepts.raise_undefined(
1757
+ x.address, x.metadata.line_number
1758
+ )
1644
1759
  all_in_output = [x.address for x in self.output_components]
1645
1760
  if self.where_clause:
1646
1761
  for concept in self.where_clause.concept_arguments:
1647
-
1648
1762
  if (
1649
1763
  concept.lineage
1650
1764
  and isinstance(concept.lineage, Function)
@@ -1667,6 +1781,7 @@ class SelectStatement(HasUUID, Mergeable, Namespaced, SelectTypeMixin, BaseModel
1667
1781
  f"Cannot reference an aggregate derived in the select ({concept.address}) in the same statement where clause; move to the HAVING clause instead; Line: {self.meta.line_number}"
1668
1782
  )
1669
1783
  if self.having_clause:
1784
+ self.having_clause.hydrate_missing(self.local_concepts)
1670
1785
  for concept in self.having_clause.concept_arguments:
1671
1786
  if concept.address not in [x.address for x in self.output_components]:
1672
1787
  raise SyntaxError(
@@ -1884,9 +1999,6 @@ class CopyStatement(BaseModel):
1884
1999
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1885
2000
  select: SelectStatement
1886
2001
 
1887
- def refresh_bindings(self, environment: Environment):
1888
- self.select.refresh_bindings(environment)
1889
-
1890
2002
 
1891
2003
  class AlignItem(Namespaced, BaseModel):
1892
2004
  alias: str
@@ -1940,24 +2052,21 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, Base
1940
2052
  order_by: Optional[OrderBy] = None
1941
2053
  limit: Optional[int] = None
1942
2054
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1943
-
1944
- def refresh_bindings(self, environment: Environment):
1945
- for select in self.selects:
1946
- select.refresh_bindings(environment)
2055
+ local_concepts: Annotated[
2056
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
2057
+ ] = Field(default_factory=EnvironmentConceptDict)
1947
2058
 
1948
2059
  def __repr__(self):
1949
2060
  return "MultiSelect<" + " MERGE ".join([str(s) for s in self.selects]) + ">"
1950
2061
 
1951
- @computed_field # type: ignore
1952
- @cached_property
2062
+ @property
1953
2063
  def arguments(self) -> List[Concept]:
1954
2064
  output = []
1955
2065
  for select in self.selects:
1956
2066
  output += select.input_components
1957
2067
  return unique(output, "address")
1958
2068
 
1959
- @computed_field # type: ignore
1960
- @cached_property
2069
+ @property
1961
2070
  def concept_arguments(self) -> List[Concept]:
1962
2071
  output = []
1963
2072
  for select in self.selects:
@@ -2007,6 +2116,9 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, Base
2007
2116
  if self.where_clause
2008
2117
  else None
2009
2118
  ),
2119
+ local_concepts=EnvironmentConceptDict(
2120
+ {k: v.with_namespace(namespace) for k, v in self.local_concepts.items()}
2121
+ ),
2010
2122
  )
2011
2123
 
2012
2124
  @property
@@ -2024,7 +2136,7 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, Base
2024
2136
  output.append(item.gen_concept(self))
2025
2137
  return output
2026
2138
 
2027
- def find_source(self, concept: Concept, cte: CTE) -> Concept:
2139
+ def find_source(self, concept: Concept, cte: CTE | UnionCTE) -> Concept:
2028
2140
  for x in self.align.items:
2029
2141
  if concept.name == x.alias:
2030
2142
  for c in x.concepts:
@@ -2125,6 +2237,9 @@ class Datasource(HasUUID, Namespaced, BaseModel):
2125
2237
  where: Optional[WhereClause] = None
2126
2238
  non_partial_for: Optional[WhereClause] = None
2127
2239
 
2240
+ def duplicate(self) -> Datasource:
2241
+ return self.model_copy(deep=True)
2242
+
2128
2243
  def merge_concept(
2129
2244
  self, source: Concept, target: Concept, modifiers: List[Modifier]
2130
2245
  ):
@@ -2701,7 +2816,7 @@ class CTE(BaseModel):
2701
2816
  base: bool = False
2702
2817
  group_to_grain: bool = False
2703
2818
  existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
2704
- parent_ctes: List["CTE"] = Field(default_factory=list)
2819
+ parent_ctes: List[Union["CTE", "UnionCTE"]] = Field(default_factory=list)
2705
2820
  joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
2706
2821
  condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
2707
2822
  partial_concepts: List[Concept] = Field(default_factory=list)
@@ -2745,6 +2860,7 @@ class CTE(BaseModel):
2745
2860
 
2746
2861
  if self.condition:
2747
2862
  self.condition = self.condition.inline_constant(concept)
2863
+
2748
2864
  # if we've entirely removed the need to join to someplace to get the concept
2749
2865
  # drop the join as well.
2750
2866
  for removed_cte in removed:
@@ -2876,7 +2992,9 @@ class CTE(BaseModel):
2876
2992
  self.group_to_grain = True
2877
2993
  return True
2878
2994
 
2879
- def __add__(self, other: "CTE"):
2995
+ def __add__(self, other: "CTE" | UnionCTE):
2996
+ if isinstance(other, UnionCTE):
2997
+ raise ValueError("cannot merge CTE and union CTE")
2880
2998
  logger.info('Merging two copies of CTE "%s"', self.name)
2881
2999
  if not self.grain == other.grain:
2882
3000
  error = (
@@ -3052,8 +3170,72 @@ class CTE(BaseModel):
3052
3170
  return [c for c in self.output_columns if c.address in self.source_map]
3053
3171
 
3054
3172
 
3055
- def merge_ctes(ctes: List[CTE]) -> List[CTE]:
3056
- final_ctes_dict: Dict[str, CTE] = {}
3173
+ class UnionCTE(BaseModel):
3174
+ name: str
3175
+ source: QueryDatasource
3176
+ parent_ctes: list[CTE | UnionCTE]
3177
+ internal_ctes: list[CTE | UnionCTE]
3178
+ output_columns: List[Concept]
3179
+ grain: Grain
3180
+ operator: str = "UNION ALL"
3181
+ order_by: Optional[OrderBy] = None
3182
+ limit: Optional[int] = None
3183
+ hidden_concepts: list[Concept] = Field(default_factory=list)
3184
+ partial_concepts: list[Concept] = Field(default_factory=list)
3185
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
3186
+
3187
+ @computed_field # type: ignore
3188
+ @property
3189
+ def output_lcl(self) -> LooseConceptList:
3190
+ return LooseConceptList(concepts=self.output_columns)
3191
+
3192
+ def get_alias(self, concept: Concept, source: str | None = None) -> str:
3193
+ for cte in self.parent_ctes:
3194
+ if concept.address in cte.output_columns:
3195
+ if source and source != cte.name:
3196
+ continue
3197
+ return concept.safe_address
3198
+ return "INVALID_ALIAS"
3199
+
3200
+ def get_concept(self, address: str) -> Concept | None:
3201
+ for cte in self.internal_ctes:
3202
+ if address in cte.output_columns:
3203
+ match = [x for x in cte.output_columns if x.address == address].pop()
3204
+ return match
3205
+
3206
+ match_list = [x for x in self.output_columns if x.address == address]
3207
+ if match_list:
3208
+ return match_list.pop()
3209
+ return None
3210
+
3211
+ @property
3212
+ def source_map(self):
3213
+ return {x.address: [] for x in self.output_columns}
3214
+
3215
+ @property
3216
+ def condition(self):
3217
+ return None
3218
+
3219
+ @condition.setter
3220
+ def condition(self, value):
3221
+ raise NotImplementedError
3222
+
3223
+ @property
3224
+ def safe_identifier(self):
3225
+ return self.name
3226
+
3227
+ @property
3228
+ def group_to_grain(self) -> bool:
3229
+ return False
3230
+
3231
+ def __add__(self, other):
3232
+ if not isinstance(other, UnionCTE) or not other.name == self.name:
3233
+ raise SyntaxError("Cannot merge union CTEs")
3234
+ return self
3235
+
3236
+
3237
+ def merge_ctes(ctes: List[CTE | UnionCTE]) -> List[CTE | UnionCTE]:
3238
+ final_ctes_dict: Dict[str, CTE | UnionCTE] = {}
3057
3239
  # merge CTEs
3058
3240
  for cte in ctes:
3059
3241
  if cte.name not in final_ctes_dict:
@@ -3078,7 +3260,6 @@ class JoinKey(BaseModel):
3078
3260
 
3079
3261
 
3080
3262
  class Join(BaseModel):
3081
-
3082
3263
  right_cte: CTE
3083
3264
  jointype: JoinType
3084
3265
  left_cte: CTE | None = None
@@ -3127,134 +3308,24 @@ class Join(BaseModel):
3127
3308
  class UndefinedConcept(Concept, Mergeable, Namespaced):
3128
3309
  model_config = ConfigDict(arbitrary_types_allowed=True)
3129
3310
  name: str
3130
- environment: "EnvironmentConceptDict"
3131
3311
  line_no: int | None = None
3132
3312
  datatype: DataType | ListType | StructType | MapType | NumericType = (
3133
3313
  DataType.UNKNOWN
3134
3314
  )
3135
- purpose: Purpose = Purpose.KEY
3136
-
3137
- def with_merge(
3138
- self, source: Concept, target: Concept, modifiers: List[Modifier]
3139
- ) -> "UndefinedConcept" | Concept:
3140
- if self.address == source.address:
3141
- new = target.with_grain(self.grain.with_merge(source, target, modifiers))
3142
- new.pseudonyms.add(self.address)
3143
- return new
3144
- return self.__class__(
3145
- name=self.name,
3146
- datatype=self.datatype,
3147
- purpose=self.purpose,
3148
- metadata=self.metadata,
3149
- lineage=(
3150
- self.lineage.with_merge(source, target, modifiers)
3151
- if self.lineage
3152
- else None
3153
- ),
3154
- grain=self.grain.with_merge(source, target, modifiers),
3155
- namespace=self.namespace,
3156
- keys=(
3157
- tuple(x.with_merge(source, target, modifiers) for x in self.keys)
3158
- if self.keys
3159
- else None
3160
- ),
3161
- environment=self.environment,
3162
- line_no=self.line_no,
3163
- )
3164
-
3165
- def with_namespace(self, namespace: str) -> "UndefinedConcept":
3166
- return self.__class__(
3167
- name=self.name,
3168
- datatype=self.datatype,
3169
- purpose=self.purpose,
3170
- metadata=self.metadata,
3171
- lineage=self.lineage.with_namespace(namespace) if self.lineage else None,
3172
- grain=(
3173
- self.grain.with_namespace(namespace)
3174
- if self.grain
3175
- else Grain(components=[])
3176
- ),
3177
- namespace=namespace,
3178
- keys=self.keys,
3179
- environment=self.environment,
3180
- line_no=self.line_no,
3181
- )
3315
+ purpose: Purpose = Purpose.UNKNOWN
3182
3316
 
3183
3317
  def with_select_context(
3184
3318
  self,
3185
- grain: Optional["Grain"] = None,
3186
- conditional: Conditional | Comparison | Parenthetical | None = None,
3187
- environment: Environment | None = None,
3188
- ) -> "UndefinedConcept":
3189
- if not all([isinstance(x, Concept) for x in self.keys or []]):
3190
- raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
3191
- new_grain = grain or Grain(components=[])
3192
- if self.lineage:
3193
- new_lineage = self.lineage
3194
- if isinstance(self.lineage, SelectContext):
3195
- new_lineage = self.lineage.with_select_context(
3196
- new_grain, conditional, environment
3197
- )
3198
- else:
3199
- new_lineage = None
3200
- return self.__class__(
3201
- name=self.name,
3202
- datatype=self.datatype,
3203
- purpose=self.purpose,
3204
- metadata=self.metadata,
3205
- lineage=new_lineage,
3206
- grain=new_grain,
3207
- namespace=self.namespace,
3208
- keys=self.keys,
3209
- environment=self.environment,
3210
- )
3211
-
3212
- def with_grain(self, grain: Optional["Grain"] = None) -> "UndefinedConcept":
3213
- return self.__class__(
3214
- name=self.name,
3215
- datatype=self.datatype,
3216
- purpose=self.purpose,
3217
- metadata=self.metadata,
3218
- lineage=self.lineage,
3219
- grain=grain or Grain(components=[]),
3220
- namespace=self.namespace,
3221
- keys=self.keys,
3222
- environment=self.environment,
3223
- line_no=self.line_no,
3224
- )
3319
+ local_concepts: dict[str, Concept],
3320
+ grain: Grain,
3321
+ environment: Environment,
3322
+ ) -> "Concept":
3323
+ if self.address in local_concepts:
3324
+ rval = local_concepts[self.address]
3325
+ rval = rval.with_select_context(local_concepts, grain, environment)
3225
3326
 
3226
- def with_default_grain(self) -> "UndefinedConcept":
3227
- if self.purpose == Purpose.KEY:
3228
- # we need to make this abstract
3229
- grain = Grain(components=[self.with_grain(Grain())], nested=True)
3230
- elif self.purpose == Purpose.PROPERTY:
3231
- components: List[Concept] = []
3232
- if self.keys:
3233
- components = [*self.keys]
3234
- if self.lineage:
3235
- for item in self.lineage.arguments:
3236
- if isinstance(item, Concept):
3237
- if item.keys and not all(c in components for c in item.keys):
3238
- components += item.sources
3239
- else:
3240
- components += item.sources
3241
- grain = Grain(components=components)
3242
- elif self.purpose == Purpose.METRIC:
3243
- grain = Grain()
3244
- else:
3245
- grain = self.grain # type: ignore
3246
- return self.__class__(
3247
- name=self.name,
3248
- datatype=self.datatype,
3249
- purpose=self.purpose,
3250
- metadata=self.metadata,
3251
- lineage=self.lineage,
3252
- grain=grain,
3253
- keys=self.keys,
3254
- namespace=self.namespace,
3255
- environment=self.environment,
3256
- line_no=self.line_no,
3257
- )
3327
+ return rval
3328
+ environment.concepts.raise_undefined(self.address, line_no=self.line_no)
3258
3329
 
3259
3330
 
3260
3331
  class EnvironmentDatasourceDict(dict):
@@ -3277,78 +3348,10 @@ class EnvironmentDatasourceDict(dict):
3277
3348
  def items(self) -> ItemsView[str, Datasource]: # type: ignore
3278
3349
  return super().items()
3279
3350
 
3280
-
3281
- class EnvironmentConceptDict(dict):
3282
- def __init__(self, *args, **kwargs) -> None:
3283
- super().__init__(self, *args, **kwargs)
3284
- self.undefined: dict[str, UndefinedConcept] = {}
3285
- self.fail_on_missing: bool = True
3286
- self.populate_default_concepts()
3287
-
3288
- def populate_default_concepts(self):
3289
- from trilogy.core.internal import DEFAULT_CONCEPTS
3290
-
3291
- for concept in DEFAULT_CONCEPTS.values():
3292
- self[concept.address] = concept
3293
-
3294
- def values(self) -> ValuesView[Concept]: # type: ignore
3295
- return super().values()
3296
-
3297
- def get(self, key: str, default: Concept | None = None) -> Concept | None: # type: ignore
3298
- try:
3299
- return self.__getitem__(key)
3300
- except UndefinedConceptException:
3301
- return default
3302
-
3303
- def __getitem__(
3304
- self, key, line_no: int | None = None, file: Path | None = None
3305
- ) -> Concept | UndefinedConcept:
3306
- try:
3307
- return super(EnvironmentConceptDict, self).__getitem__(key)
3308
-
3309
- except KeyError:
3310
- if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
3311
- return self.__getitem__(key.split(".", 1)[1], line_no)
3312
- if DEFAULT_NAMESPACE + "." + key in self:
3313
- return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
3314
- if not self.fail_on_missing:
3315
- if key in self.undefined:
3316
- return self.undefined[key]
3317
- undefined = UndefinedConcept(
3318
- name=key,
3319
- line_no=line_no,
3320
- environment=self,
3321
- datatype=DataType.UNKNOWN,
3322
- purpose=Purpose.KEY,
3323
- )
3324
- self.undefined[key] = undefined
3325
- return undefined
3326
- matches = self._find_similar_concepts(key)
3327
- message = f"Undefined concept: {key}."
3328
- if matches:
3329
- message += f" Suggestions: {matches}"
3330
-
3331
- if line_no:
3332
- if file:
3333
- raise UndefinedConceptException(
3334
- f"{file}: {line_no}: " + message, matches
3335
- )
3336
- raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
3337
- raise UndefinedConceptException(message, matches)
3338
-
3339
- def _find_similar_concepts(self, concept_name: str):
3340
- def strip_local(input: str):
3341
- if input.startswith(f"{DEFAULT_NAMESPACE}."):
3342
- return input[len(DEFAULT_NAMESPACE) + 1 :]
3343
- return input
3344
-
3345
- matches = difflib.get_close_matches(
3346
- strip_local(concept_name), [strip_local(x) for x in self.keys()]
3347
- )
3348
- return matches
3349
-
3350
- def items(self) -> ItemsView[str, Concept]: # type: ignore
3351
- return super().items()
3351
+ def duplicate(self) -> "EnvironmentDatasourceDict":
3352
+ new = EnvironmentDatasourceDict()
3353
+ new.update({k: v.duplicate() for k, v in self.items()})
3354
+ return new
3352
3355
 
3353
3356
 
3354
3357
  class ImportStatement(HasUUID, BaseModel):
@@ -3405,10 +3408,31 @@ class Environment(BaseModel):
3405
3408
  materialized_concepts: set[str] = Field(default_factory=set)
3406
3409
  alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
3407
3410
  # TODO: support freezing environments to avoid mutation
3408
- # frozen: bool = False
3411
+ frozen: bool = False
3412
+
3413
+ def freeze(self):
3414
+ self.frozen = True
3415
+
3416
+ def thaw(self):
3417
+ self.frozen = False
3409
3418
 
3410
3419
  def duplicate(self):
3411
- return self.model_copy(deep=True)
3420
+ return Environment.model_construct(
3421
+ datasources=self.datasources.duplicate(),
3422
+ concepts=self.concepts.duplicate(),
3423
+ functions=dict(self.functions),
3424
+ data_types=dict(self.data_types),
3425
+ imports=dict(self.imports),
3426
+ namespace=self.namespace,
3427
+ working_path=self.working_path,
3428
+ environment_config=self.environment_config,
3429
+ version=self.version,
3430
+ cte_name_map=dict(self.cte_name_map),
3431
+ materialized_concepts=set(self.materialized_concepts),
3432
+ alias_origin_lookup={
3433
+ k: v.duplicate() for k, v in self.alias_origin_lookup.items()
3434
+ },
3435
+ )
3412
3436
 
3413
3437
  def __init__(self, **data):
3414
3438
  super().__init__(**data)
@@ -3540,6 +3564,8 @@ class Environment(BaseModel):
3540
3564
  def add_import(
3541
3565
  self, alias: str, source: Environment, imp_stm: ImportStatement | None = None
3542
3566
  ):
3567
+ if self.frozen:
3568
+ raise ValueError("Environment is frozen, cannot add imports")
3543
3569
  exists = False
3544
3570
  existing = self.imports[alias]
3545
3571
  if imp_stm:
@@ -3560,6 +3586,9 @@ class Environment(BaseModel):
3560
3586
  # we can't exit early
3561
3587
  # as there may be new concepts
3562
3588
  for k, concept in source.concepts.items():
3589
+ # skip internal namespace
3590
+ if INTERNAL_NAMESPACE in concept.address:
3591
+ continue
3563
3592
  if same_namespace:
3564
3593
  new = self.add_concept(concept, _ignore_cache=True)
3565
3594
  else:
@@ -3592,9 +3621,11 @@ class Environment(BaseModel):
3592
3621
  def add_file_import(
3593
3622
  self, path: str | Path, alias: str, env: Environment | None = None
3594
3623
  ):
3624
+ if self.frozen:
3625
+ raise ValueError("Environment is frozen, cannot add imports")
3595
3626
  from trilogy.parsing.parse_engine import (
3596
- ParseToObjects,
3597
3627
  PARSER,
3628
+ ParseToObjects,
3598
3629
  gen_cache_lookup,
3599
3630
  )
3600
3631
 
@@ -3626,6 +3657,7 @@ class Environment(BaseModel):
3626
3657
  )
3627
3658
  nparser.set_text(text)
3628
3659
  nparser.transform(PARSER.parse(text))
3660
+ nparser.hydrate_missing()
3629
3661
 
3630
3662
  except Exception as e:
3631
3663
  raise ImportError(
@@ -3676,6 +3708,8 @@ class Environment(BaseModel):
3676
3708
  add_derived: bool = True,
3677
3709
  _ignore_cache: bool = False,
3678
3710
  ):
3711
+ if self.frozen:
3712
+ raise ValueError("Environment is frozen, cannot add concepts")
3679
3713
  if not force:
3680
3714
  existing = self.validate_concept(concept, meta=meta)
3681
3715
  if existing:
@@ -3694,6 +3728,8 @@ class Environment(BaseModel):
3694
3728
  meta: Meta | None = None,
3695
3729
  _ignore_cache: bool = False,
3696
3730
  ):
3731
+ if self.frozen:
3732
+ raise ValueError("Environment is frozen, cannot add datasource")
3697
3733
  self.datasources[datasource.identifier] = datasource
3698
3734
 
3699
3735
  eligible_to_promote_roots = datasource.non_partial_for is None
@@ -3745,6 +3781,8 @@ class Environment(BaseModel):
3745
3781
  address: str,
3746
3782
  meta: Meta | None = None,
3747
3783
  ) -> bool:
3784
+ if self.frozen:
3785
+ raise ValueError("Environment is frozen, cannot delete datsources")
3748
3786
  if address in self.datasources:
3749
3787
  del self.datasources[address]
3750
3788
  self.gen_concept_list_caches()
@@ -3752,17 +3790,22 @@ class Environment(BaseModel):
3752
3790
  return False
3753
3791
 
3754
3792
  def merge_concept(
3755
- self, source: Concept, target: Concept, modifiers: List[Modifier]
3756
- ):
3793
+ self,
3794
+ source: Concept,
3795
+ target: Concept,
3796
+ modifiers: List[Modifier],
3797
+ force: bool = False,
3798
+ ) -> bool:
3799
+ if self.frozen:
3800
+ raise ValueError("Environment is frozen, cannot merge concepts")
3757
3801
  replacements = {}
3758
3802
 
3759
3803
  # exit early if we've run this
3760
- if source.address in self.alias_origin_lookup:
3804
+ if source.address in self.alias_origin_lookup and not force:
3761
3805
  if self.concepts[source.address] == target:
3762
- return
3806
+ return False
3763
3807
  self.alias_origin_lookup[source.address] = source
3764
3808
  for k, v in self.concepts.items():
3765
-
3766
3809
  if v.address == target.address:
3767
3810
  v.pseudonyms.add(source.address)
3768
3811
  if v.address == source.address:
@@ -3776,6 +3819,7 @@ class Environment(BaseModel):
3776
3819
  for k, ds in self.datasources.items():
3777
3820
  if source.address in ds.output_lcl:
3778
3821
  ds.merge_concept(source, target, modifiers=modifiers)
3822
+ return True
3779
3823
 
3780
3824
 
3781
3825
  class LazyEnvironment(Environment):
@@ -3849,6 +3893,17 @@ class Comparison(
3849
3893
  ]
3850
3894
  operator: ComparisonOperator
3851
3895
 
3896
+ def hydrate_missing(self, concepts: EnvironmentConceptDict):
3897
+ if isinstance(self.left, UndefinedConcept) and self.left.address in concepts:
3898
+ self.left = concepts[self.left.address]
3899
+ if isinstance(self.right, UndefinedConcept) and self.right.address in concepts:
3900
+ self.right = concepts[self.right.address]
3901
+ if isinstance(self.left, Mergeable):
3902
+ self.left.hydrate_missing(concepts)
3903
+ if isinstance(self.right, Mergeable):
3904
+ self.right.hydrate_missing(concepts)
3905
+ return self
3906
+
3852
3907
  def __init__(self, *args, **kwargs) -> None:
3853
3908
  super().__init__(*args, **kwargs)
3854
3909
  if self.operator in (ComparisonOperator.IS, ComparisonOperator.IS_NOT):
@@ -3963,20 +4018,17 @@ class Comparison(
3963
4018
  )
3964
4019
 
3965
4020
  def with_select_context(
3966
- self,
3967
- grain: Grain,
3968
- conditional: Conditional | Comparison | Parenthetical | None,
3969
- environment: Environment | None = None,
4021
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
3970
4022
  ):
3971
4023
  return self.__class__(
3972
4024
  left=(
3973
- self.left.with_select_context(grain, conditional, environment)
4025
+ self.left.with_select_context(local_concepts, grain, environment)
3974
4026
  if isinstance(self.left, SelectContext)
3975
4027
  else self.left
3976
4028
  ),
3977
4029
  # the right side does NOT need to inherit select grain
3978
4030
  right=(
3979
- self.right.with_select_context(grain, conditional, environment)
4031
+ self.right.with_select_context(local_concepts, grain, environment)
3980
4032
  if isinstance(self.right, SelectContext)
3981
4033
  else self.right
3982
4034
  ),
@@ -4042,7 +4094,6 @@ class Comparison(
4042
4094
 
4043
4095
 
4044
4096
  class SubselectComparison(Comparison):
4045
-
4046
4097
  def __eq__(self, other):
4047
4098
  if not isinstance(other, SubselectComparison):
4048
4099
  return False
@@ -4064,14 +4115,14 @@ class SubselectComparison(Comparison):
4064
4115
 
4065
4116
  def with_select_context(
4066
4117
  self,
4118
+ local_concepts: dict[str, Concept],
4067
4119
  grain: Grain,
4068
- conditional: Conditional | Comparison | Parenthetical | None,
4069
- environment: Environment | None = None,
4120
+ environment: Environment,
4070
4121
  ):
4071
4122
  # there's no need to pass the select grain through to a subselect comparison on the right
4072
4123
  return self.__class__(
4073
4124
  left=(
4074
- self.left.with_select_context(grain, conditional, environment)
4125
+ self.left.with_select_context(local_concepts, grain, environment)
4075
4126
  if isinstance(self.left, SelectContext)
4076
4127
  else self.left
4077
4128
  ),
@@ -4108,17 +4159,14 @@ class CaseWhen(Namespaced, SelectContext, BaseModel):
4108
4159
  )
4109
4160
 
4110
4161
  def with_select_context(
4111
- self,
4112
- grain: Grain,
4113
- conditional: Conditional | Comparison | Parenthetical | None,
4114
- environment: Environment | None = None,
4162
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4115
4163
  ) -> CaseWhen:
4116
4164
  return CaseWhen(
4117
4165
  comparison=self.comparison.with_select_context(
4118
- grain, conditional, environment
4166
+ local_concepts, grain, environment
4119
4167
  ),
4120
4168
  expr=(
4121
- (self.expr.with_select_context(grain, conditional, environment))
4169
+ (self.expr.with_select_context(local_concepts, grain, environment))
4122
4170
  if isinstance(self.expr, SelectContext)
4123
4171
  else self.expr
4124
4172
  ),
@@ -4136,14 +4184,14 @@ class CaseElse(Namespaced, SelectContext, BaseModel):
4136
4184
 
4137
4185
  def with_select_context(
4138
4186
  self,
4187
+ local_concepts: dict[str, Concept],
4139
4188
  grain: Grain,
4140
- conditional: Conditional | Comparison | Parenthetical | None,
4141
- environment: Environment | None = None,
4142
- ) -> CaseElse:
4189
+ environment: Environment,
4190
+ ):
4143
4191
  return CaseElse(
4144
4192
  discriminant=self.discriminant,
4145
4193
  expr=(
4146
- self.expr.with_select_context(grain, conditional, environment)
4194
+ self.expr.with_select_context(local_concepts, grain, environment)
4147
4195
  if isinstance(
4148
4196
  self.expr,
4149
4197
  SelectContext,
@@ -4215,7 +4263,6 @@ class Conditional(
4215
4263
  return f"{str(self.left)} {self.operator.value} {str(self.right)}"
4216
4264
 
4217
4265
  def __eq__(self, other):
4218
-
4219
4266
  if not isinstance(other, Conditional):
4220
4267
  return False
4221
4268
  return (
@@ -4283,19 +4330,16 @@ class Conditional(
4283
4330
  )
4284
4331
 
4285
4332
  def with_select_context(
4286
- self,
4287
- grain: Grain,
4288
- conditional: Conditional | Comparison | Parenthetical | None,
4289
- environment: Environment | None = None,
4333
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4290
4334
  ):
4291
4335
  return Conditional(
4292
4336
  left=(
4293
- self.left.with_select_context(grain, conditional, environment)
4337
+ self.left.with_select_context(local_concepts, grain, environment)
4294
4338
  if isinstance(self.left, SelectContext)
4295
4339
  else self.left
4296
4340
  ),
4297
4341
  right=(
4298
- self.right.with_select_context(grain, conditional, environment)
4342
+ self.right.with_select_context(local_concepts, grain, environment)
4299
4343
  if isinstance(self.right, SelectContext)
4300
4344
  else self.right
4301
4345
  ),
@@ -4404,16 +4448,16 @@ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
4404
4448
  )
4405
4449
 
4406
4450
  def with_select_context(
4407
- self,
4408
- grain: Grain,
4409
- conditional: Conditional | Comparison | Parenthetical | None,
4410
- environment: Environment | None = None,
4451
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4411
4452
  ) -> AggregateWrapper:
4412
4453
  if not self.by:
4413
4454
  by = grain.components_copy
4414
4455
  else:
4415
- by = self.by
4416
- parent = self.function.with_select_context(grain, conditional, environment)
4456
+ by = [
4457
+ x.with_select_context(local_concepts, grain, environment)
4458
+ for x in self.by
4459
+ ]
4460
+ parent = self.function.with_select_context(local_concepts, grain, environment)
4417
4461
  return AggregateWrapper(function=parent, by=by)
4418
4462
 
4419
4463
 
@@ -4448,14 +4492,11 @@ class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
4448
4492
  return WhereClause(conditional=self.conditional.with_namespace(namespace))
4449
4493
 
4450
4494
  def with_select_context(
4451
- self,
4452
- grain: Grain,
4453
- conditional: Conditional | Comparison | Parenthetical | None,
4454
- environment: Environment | None = None,
4495
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4455
4496
  ) -> WhereClause:
4456
- return WhereClause(
4497
+ return self.__class__(
4457
4498
  conditional=self.conditional.with_select_context(
4458
- grain, conditional, environment
4499
+ local_concepts, grain, environment
4459
4500
  )
4460
4501
  )
4461
4502
 
@@ -4485,6 +4526,18 @@ class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
4485
4526
  class HavingClause(WhereClause):
4486
4527
  pass
4487
4528
 
4529
+ def hydrate_missing(self, concepts: EnvironmentConceptDict):
4530
+ self.conditional.hydrate_missing(concepts)
4531
+
4532
+ def with_select_context(
4533
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4534
+ ) -> HavingClause:
4535
+ return HavingClause(
4536
+ conditional=self.conditional.with_select_context(
4537
+ local_concepts, grain, environment
4538
+ )
4539
+ )
4540
+
4488
4541
 
4489
4542
  class MaterializedDataset(BaseModel):
4490
4543
  address: Address
@@ -4497,8 +4550,8 @@ class MaterializedDataset(BaseModel):
4497
4550
 
4498
4551
  class ProcessedQuery(BaseModel):
4499
4552
  output_columns: List[Concept]
4500
- ctes: List[CTE]
4501
- base: CTE
4553
+ ctes: List[CTE | UnionCTE]
4554
+ base: CTE | UnionCTE
4502
4555
  joins: List[Join]
4503
4556
  grain: Grain
4504
4557
  hidden_columns: List[Concept] = Field(default_factory=list)
@@ -4506,6 +4559,9 @@ class ProcessedQuery(BaseModel):
4506
4559
  where_clause: Optional[WhereClause] = None
4507
4560
  having_clause: Optional[HavingClause] = None
4508
4561
  order_by: Optional[OrderBy] = None
4562
+ local_concepts: Annotated[
4563
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
4564
+ ] = Field(default_factory=EnvironmentConceptDict)
4509
4565
 
4510
4566
 
4511
4567
  class PersistQueryMixin(BaseModel):
@@ -4606,7 +4662,6 @@ class RowsetDerivationStatement(HasUUID, Namespaced, BaseModel):
4606
4662
  components=[orig[c.address] for c in x.grain.components_copy]
4607
4663
  )
4608
4664
  else:
4609
-
4610
4665
  x.grain = default_grain
4611
4666
  return output
4612
4667
 
@@ -4730,14 +4785,11 @@ class Parenthetical(
4730
4785
  )
4731
4786
 
4732
4787
  def with_select_context(
4733
- self,
4734
- grain: Grain,
4735
- conditional: Conditional | Comparison | Parenthetical | None,
4736
- environment: Environment | None = None,
4788
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4737
4789
  ):
4738
4790
  return Parenthetical(
4739
4791
  content=(
4740
- self.content.with_select_context(grain, conditional, environment)
4792
+ self.content.with_select_context(local_concepts, grain, environment)
4741
4793
  if isinstance(self.content, SelectContext)
4742
4794
  else self.content
4743
4795
  )