pytrilogy 0.0.2.46__py3-none-any.whl → 0.0.2.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (69) hide show
  1. {pytrilogy-0.0.2.46.dist-info → pytrilogy-0.0.2.48.dist-info}/METADATA +1 -1
  2. pytrilogy-0.0.2.48.dist-info/RECORD +85 -0
  3. trilogy/__init__.py +2 -2
  4. trilogy/constants.py +4 -2
  5. trilogy/core/enums.py +7 -1
  6. trilogy/core/env_processor.py +1 -2
  7. trilogy/core/environment_helpers.py +5 -5
  8. trilogy/core/functions.py +11 -10
  9. trilogy/core/internal.py +2 -3
  10. trilogy/core/models.py +448 -394
  11. trilogy/core/optimization.py +37 -21
  12. trilogy/core/optimizations/__init__.py +1 -1
  13. trilogy/core/optimizations/base_optimization.py +6 -6
  14. trilogy/core/optimizations/inline_constant.py +7 -4
  15. trilogy/core/optimizations/inline_datasource.py +14 -5
  16. trilogy/core/optimizations/predicate_pushdown.py +20 -10
  17. trilogy/core/processing/concept_strategies_v3.py +40 -24
  18. trilogy/core/processing/graph_utils.py +2 -3
  19. trilogy/core/processing/node_generators/__init__.py +7 -5
  20. trilogy/core/processing/node_generators/basic_node.py +4 -4
  21. trilogy/core/processing/node_generators/common.py +10 -11
  22. trilogy/core/processing/node_generators/filter_node.py +7 -9
  23. trilogy/core/processing/node_generators/group_node.py +10 -11
  24. trilogy/core/processing/node_generators/group_to_node.py +5 -5
  25. trilogy/core/processing/node_generators/multiselect_node.py +10 -12
  26. trilogy/core/processing/node_generators/node_merge_node.py +7 -9
  27. trilogy/core/processing/node_generators/rowset_node.py +9 -8
  28. trilogy/core/processing/node_generators/select_merge_node.py +11 -10
  29. trilogy/core/processing/node_generators/select_node.py +5 -5
  30. trilogy/core/processing/node_generators/union_node.py +75 -0
  31. trilogy/core/processing/node_generators/unnest_node.py +2 -3
  32. trilogy/core/processing/node_generators/window_node.py +3 -4
  33. trilogy/core/processing/nodes/__init__.py +9 -5
  34. trilogy/core/processing/nodes/base_node.py +17 -13
  35. trilogy/core/processing/nodes/filter_node.py +3 -4
  36. trilogy/core/processing/nodes/group_node.py +8 -10
  37. trilogy/core/processing/nodes/merge_node.py +11 -11
  38. trilogy/core/processing/nodes/select_node_v2.py +8 -9
  39. trilogy/core/processing/nodes/union_node.py +50 -0
  40. trilogy/core/processing/nodes/unnest_node.py +2 -3
  41. trilogy/core/processing/nodes/window_node.py +2 -3
  42. trilogy/core/processing/utility.py +37 -40
  43. trilogy/core/query_processor.py +68 -44
  44. trilogy/dialect/base.py +95 -53
  45. trilogy/dialect/bigquery.py +2 -3
  46. trilogy/dialect/common.py +5 -4
  47. trilogy/dialect/config.py +0 -2
  48. trilogy/dialect/duckdb.py +2 -2
  49. trilogy/dialect/enums.py +5 -5
  50. trilogy/dialect/postgres.py +2 -2
  51. trilogy/dialect/presto.py +3 -4
  52. trilogy/dialect/snowflake.py +2 -2
  53. trilogy/dialect/sql_server.py +3 -4
  54. trilogy/engine.py +2 -1
  55. trilogy/executor.py +43 -30
  56. trilogy/hooks/base_hook.py +5 -4
  57. trilogy/hooks/graph_hook.py +2 -1
  58. trilogy/hooks/query_debugger.py +18 -8
  59. trilogy/parsing/common.py +15 -20
  60. trilogy/parsing/parse_engine.py +124 -88
  61. trilogy/parsing/render.py +32 -35
  62. trilogy/parsing/trilogy.lark +8 -1
  63. trilogy/scripts/trilogy.py +6 -4
  64. trilogy/utility.py +1 -1
  65. pytrilogy-0.0.2.46.dist-info/RECORD +0 -83
  66. {pytrilogy-0.0.2.46.dist-info → pytrilogy-0.0.2.48.dist-info}/LICENSE.md +0 -0
  67. {pytrilogy-0.0.2.46.dist-info → pytrilogy-0.0.2.48.dist-info}/WHEEL +0 -0
  68. {pytrilogy-0.0.2.46.dist-info → pytrilogy-0.0.2.48.dist-info}/entry_points.txt +0 -0
  69. {pytrilogy-0.0.2.46.dist-info → pytrilogy-0.0.2.48.dist-info}/top_level.txt +0 -0
trilogy/core/models.py CHANGED
@@ -1,82 +1,86 @@
1
1
  from __future__ import annotations
2
+
2
3
  import difflib
4
+ import hashlib
3
5
  import os
6
+ from abc import ABC
7
+ from collections import UserDict, UserList, defaultdict
4
8
  from enum import Enum
9
+ from functools import cached_property
10
+ from pathlib import Path
5
11
  from typing import (
12
+ Annotated,
13
+ Any,
14
+ Callable,
6
15
  Dict,
7
- TypeVar,
16
+ Generic,
17
+ ItemsView,
8
18
  List,
19
+ Never,
9
20
  Optional,
10
- Union,
11
- Set,
12
- Any,
21
+ Self,
13
22
  Sequence,
14
- ValuesView,
15
- Callable,
16
- Annotated,
17
- get_args,
18
- Generic,
23
+ Set,
19
24
  Tuple,
20
25
  Type,
21
- ItemsView,
26
+ TypeVar,
27
+ Union,
28
+ ValuesView,
29
+ get_args,
22
30
  )
23
- from pydantic_core import core_schema
24
- from pydantic.functional_validators import PlainValidator
31
+
32
+ from lark.tree import Meta
25
33
  from pydantic import (
26
34
  BaseModel,
27
- Field,
28
35
  ConfigDict,
29
- field_validator,
36
+ Field,
30
37
  ValidationInfo,
31
38
  ValidatorFunctionWrapHandler,
32
39
  computed_field,
40
+ field_validator,
33
41
  )
34
- from lark.tree import Meta
35
- from pathlib import Path
42
+ from pydantic.functional_validators import PlainValidator
43
+ from pydantic_core import core_schema
44
+
36
45
  from trilogy.constants import (
37
- logger,
46
+ CONFIG,
38
47
  DEFAULT_NAMESPACE,
39
48
  ENV_CACHE_NAME,
40
49
  MagicConstants,
41
- CONFIG,
50
+ logger,
42
51
  )
43
52
  from trilogy.core.constants import (
44
53
  ALL_ROWS_CONCEPT,
45
- INTERNAL_NAMESPACE,
46
54
  CONSTANT_DATASET,
55
+ INTERNAL_NAMESPACE,
47
56
  PERSISTED_CONCEPT_PREFIX,
48
57
  )
49
58
  from trilogy.core.enums import (
50
- InfiniteFunctionArgs,
51
- Purpose,
52
- JoinType,
53
- Ordering,
54
- Modifier,
55
- FunctionType,
56
- FunctionClass,
57
59
  BooleanOperator,
58
60
  ComparisonOperator,
59
- WindowOrder,
60
- PurposeLineage,
61
- SourceType,
62
- WindowType,
63
61
  ConceptSource,
64
62
  DatePart,
65
- ShowCategory,
63
+ FunctionClass,
64
+ FunctionType,
66
65
  Granularity,
67
- SelectFiltering,
66
+ InfiniteFunctionArgs,
68
67
  IOType,
68
+ JoinType,
69
+ Modifier,
70
+ Ordering,
71
+ Purpose,
72
+ PurposeLineage,
73
+ SelectFiltering,
74
+ ShowCategory,
75
+ SourceType,
76
+ WindowOrder,
77
+ WindowType,
69
78
  )
70
79
  from trilogy.core.exceptions import (
71
- UndefinedConceptException,
72
80
  InvalidSyntaxException,
81
+ UndefinedConceptException,
73
82
  )
74
83
  from trilogy.utility import unique
75
- from collections import UserList, UserDict
76
- from functools import cached_property
77
- from abc import ABC
78
- from collections import defaultdict
79
- import hashlib
80
84
 
81
85
  LOGGER_PREFIX = "[MODELS]"
82
86
 
@@ -152,19 +156,19 @@ NAMESPACED_TYPES = Union[
152
156
 
153
157
 
154
158
  class Namespaced(ABC):
155
-
156
159
  def with_namespace(self, namespace: str):
157
160
  raise NotImplementedError
158
161
 
159
162
 
160
163
  class Mergeable(ABC):
161
-
162
164
  def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
163
165
  raise NotImplementedError
164
166
 
167
+ def hydrate_missing(self, concepts: EnvironmentConceptDict):
168
+ return self
165
169
 
166
- class ConceptArgs(ABC):
167
170
 
171
+ class ConceptArgs(ABC):
168
172
  @property
169
173
  def concept_arguments(self) -> List["Concept"]:
170
174
  raise NotImplementedError
@@ -179,13 +183,12 @@ class ConceptArgs(ABC):
179
183
 
180
184
 
181
185
  class SelectContext(ABC):
182
-
183
186
  def with_select_context(
184
187
  self,
188
+ local_concepts: dict[str, Concept],
185
189
  grain: Grain,
186
- conditional: Conditional | Comparison | Parenthetical | None,
187
- environment: Environment | None = None,
188
- ):
190
+ environment: Environment,
191
+ ) -> Any:
189
192
  raise NotImplementedError
190
193
 
191
194
 
@@ -195,7 +198,6 @@ class ConstantInlineable(ABC):
195
198
 
196
199
 
197
200
  class HasUUID(ABC):
198
-
199
201
  @property
200
202
  def uuid(self) -> str:
201
203
  return hashlib.md5(str(self).encode()).hexdigest()
@@ -456,8 +458,17 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
456
458
  pseudonyms: set[str] = Field(default_factory=set)
457
459
  _address_cache: str | None = None
458
460
 
461
+ def duplicate(self) -> Concept:
462
+ return self.model_copy(deep=True)
463
+
459
464
  def __hash__(self):
460
- return hash(str(self))
465
+ return hash(
466
+ f"{self.name}+{self.datatype}+ {self.purpose} + {str(self.lineage)} + {self.namespace} + {str(self.grain)} + {str(self.keys)}"
467
+ )
468
+
469
+ def __repr__(self):
470
+ base = f"{self.namespace}.{self.address}@{self.grain}"
471
+ return base
461
472
 
462
473
  @property
463
474
  def is_aggregate(self):
@@ -476,7 +487,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
476
487
  return True
477
488
  return False
478
489
 
479
- def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
490
+ def with_merge(self, source: Self, target: Self, modifiers: List[Modifier]) -> Self:
480
491
  if self.address == source.address:
481
492
  new = target.with_grain(self.grain.with_merge(source, target, modifiers))
482
493
  new.pseudonyms.add(self.address)
@@ -556,11 +567,13 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
556
567
  v = Grain(components=values["lineage"].by)
557
568
  elif not v:
558
569
  v = Grain(components=[])
570
+ elif isinstance(v, Grain):
571
+ return v
559
572
  elif isinstance(v, Concept):
560
573
  v = Grain(components=[v])
561
574
  elif isinstance(v, dict):
562
575
  v = Grain.model_validate(v)
563
- if not v:
576
+ else:
564
577
  raise SyntaxError(f"Invalid grain {v} for concept {values['name']}")
565
578
  return v
566
579
 
@@ -610,7 +623,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
610
623
  def grain_components(self) -> List["Concept"]:
611
624
  return self.grain.components_copy if self.grain else []
612
625
 
613
- def with_namespace(self, namespace: str) -> "Concept":
626
+ def with_namespace(self, namespace: str) -> Self:
614
627
  if namespace == self.namespace:
615
628
  return self
616
629
  return self.__class__(
@@ -641,33 +654,44 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
641
654
  )
642
655
 
643
656
  def with_select_context(
644
- self,
645
- grain: Optional["Grain"] = None,
646
- conditional: Conditional | Comparison | Parenthetical | None = None,
647
- environment: Environment | None = None,
648
- ) -> "Concept":
649
- if not all([isinstance(x, Concept) for x in self.keys or []]):
650
- raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
651
- new_grain = grain or self.grain
657
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
658
+ ) -> Concept:
652
659
  new_lineage = self.lineage
653
660
  if isinstance(self.lineage, SelectContext):
654
661
  new_lineage = self.lineage.with_select_context(
655
- new_grain, conditional, environment=environment
662
+ local_concepts=local_concepts, grain=grain, environment=environment
656
663
  )
664
+ final_grain = self.grain
665
+
666
+ if self.is_aggregate and isinstance(new_lineage, Function):
667
+ new_lineage = AggregateWrapper(function=new_lineage, by=grain.components)
668
+ final_grain = grain
669
+
657
670
  return self.__class__(
658
671
  name=self.name,
659
672
  datatype=self.datatype,
660
673
  purpose=self.purpose,
661
674
  metadata=self.metadata,
662
675
  lineage=new_lineage,
663
- grain=new_grain,
676
+ grain=final_grain,
664
677
  namespace=self.namespace,
665
- keys=self.keys,
678
+ keys=(
679
+ tuple(
680
+ [
681
+ x.with_select_context(local_concepts, grain, environment)
682
+ for x in self.keys
683
+ ]
684
+ )
685
+ if self.keys
686
+ else None
687
+ ),
666
688
  modifiers=self.modifiers,
667
- pseudonyms=self.pseudonyms,
689
+ # a select needs to always defer to the environment for pseudonyms
690
+ # TODO: evaluate if this should be cached
691
+ pseudonyms=(environment.concepts.get(self.address) or self).pseudonyms,
668
692
  )
669
693
 
670
- def with_grain(self, grain: Optional["Grain"] = None) -> "Concept":
694
+ def with_grain(self, grain: Optional["Grain"] = None) -> Self:
671
695
  if not all([isinstance(x, Concept) for x in self.keys or []]):
672
696
  raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
673
697
  return self.__class__(
@@ -683,8 +707,8 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
683
707
  pseudonyms=self.pseudonyms,
684
708
  )
685
709
 
686
- @cached_property
687
- def _with_default_grain(self) -> "Concept":
710
+ @property
711
+ def _with_default_grain(self) -> Self:
688
712
  if self.purpose == Purpose.KEY:
689
713
  # we need to make this abstract
690
714
  grain = Grain(components=[self.with_grain(Grain())], nested=True)
@@ -788,6 +812,12 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
788
812
  and self.lineage.operator == FunctionType.UNNEST
789
813
  ):
790
814
  return PurposeLineage.UNNEST
815
+ elif (
816
+ self.lineage
817
+ and isinstance(self.lineage, Function)
818
+ and self.lineage.operator == FunctionType.UNION
819
+ ):
820
+ return PurposeLineage.UNION
791
821
  elif (
792
822
  self.lineage
793
823
  and isinstance(self.lineage, Function)
@@ -827,7 +857,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
827
857
  elif (
828
858
  self.lineage
829
859
  and isinstance(self.lineage, Function)
830
- and self.lineage.operator == FunctionType.UNNEST
860
+ and self.lineage.operator in (FunctionType.UNNEST, FunctionType.UNION)
831
861
  ):
832
862
  return Granularity.MULTI_ROW
833
863
  elif self.lineage and all(
@@ -836,6 +866,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
836
866
  for x in self.lineage.concept_arguments
837
867
  ]
838
868
  ):
869
+
839
870
  return Granularity.SINGLE_ROW
840
871
  return Granularity.MULTI_ROW
841
872
 
@@ -867,7 +898,15 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
867
898
  return new
868
899
 
869
900
 
870
- class Grain(Mergeable, BaseModel):
901
+ class ConceptRef(BaseModel):
902
+ address: str
903
+ line_no: int
904
+
905
+ def hydrate(self, environment: Environment) -> Concept:
906
+ return environment.concepts.__getitem__(self.address, self.line_no)
907
+
908
+
909
+ class Grain(Mergeable, BaseModel, SelectContext):
871
910
  nested: bool = False
872
911
  components: List[Concept] = Field(default_factory=list, validate_default=True)
873
912
  where_clause: Optional[WhereClause] = Field(default=None)
@@ -890,6 +929,20 @@ class Grain(Mergeable, BaseModel):
890
929
  v2 = sorted(final, key=lambda x: x.name)
891
930
  return v2
892
931
 
932
+ def with_select_context(
933
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
934
+ ):
935
+ if self.nested:
936
+ return self
937
+ return Grain(
938
+ components=[
939
+ x.with_select_context(local_concepts, grain, environment)
940
+ for x in self.components
941
+ ],
942
+ where_clause=self.where_clause,
943
+ nested=self.nested,
944
+ )
945
+
893
946
  def with_filter(
894
947
  self,
895
948
  condition: "Conditional | Comparison | Parenthetical",
@@ -949,7 +1002,7 @@ class Grain(Mergeable, BaseModel):
949
1002
  base.append(c)
950
1003
  return set(base)
951
1004
 
952
- @cached_property
1005
+ @property
953
1006
  def set(self) -> set[str]:
954
1007
  base = []
955
1008
  for x in self.components_copy:
@@ -1016,6 +1069,89 @@ class Grain(Mergeable, BaseModel):
1016
1069
  return self.__add__(other)
1017
1070
 
1018
1071
 
1072
+ class EnvironmentConceptDict(dict):
1073
+ def __init__(self, *args, **kwargs) -> None:
1074
+ super().__init__(self, *args, **kwargs)
1075
+ self.undefined: dict[str, UndefinedConcept] = {}
1076
+ self.fail_on_missing: bool = True
1077
+ self.populate_default_concepts()
1078
+
1079
+ def duplicate(self) -> "EnvironmentConceptDict":
1080
+ new = EnvironmentConceptDict()
1081
+ new.update({k: v.duplicate() for k, v in self.items()})
1082
+ new.undefined = self.undefined
1083
+ new.fail_on_missing = self.fail_on_missing
1084
+ return new
1085
+
1086
+ def populate_default_concepts(self):
1087
+ from trilogy.core.internal import DEFAULT_CONCEPTS
1088
+
1089
+ for concept in DEFAULT_CONCEPTS.values():
1090
+ self[concept.address] = concept
1091
+
1092
+ def values(self) -> ValuesView[Concept]: # type: ignore
1093
+ return super().values()
1094
+
1095
+ def get(self, key: str, default: Concept | None = None) -> Concept | None: # type: ignore
1096
+ try:
1097
+ return self.__getitem__(key)
1098
+ except UndefinedConceptException:
1099
+ return default
1100
+
1101
+ def raise_undefined(
1102
+ self, key: str, line_no: int | None = None, file: Path | str | None = None
1103
+ ) -> Never:
1104
+ matches = self._find_similar_concepts(key)
1105
+ message = f"Undefined concept: {key}."
1106
+ if matches:
1107
+ message += f" Suggestions: {matches}"
1108
+
1109
+ if line_no:
1110
+ if file:
1111
+ raise UndefinedConceptException(
1112
+ f"{file}: {line_no}: " + message, matches
1113
+ )
1114
+ raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
1115
+ raise UndefinedConceptException(message, matches)
1116
+
1117
+ def __getitem__(
1118
+ self, key: str, line_no: int | None = None, file: Path | None = None
1119
+ ) -> Concept | UndefinedConcept:
1120
+ try:
1121
+ return super(EnvironmentConceptDict, self).__getitem__(key)
1122
+ except KeyError:
1123
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
1124
+ return self.__getitem__(key.split(".", 1)[1], line_no)
1125
+ if DEFAULT_NAMESPACE + "." + key in self:
1126
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
1127
+ if not self.fail_on_missing:
1128
+ if key in self.undefined:
1129
+ return self.undefined[key]
1130
+ undefined = UndefinedConcept(
1131
+ name=key,
1132
+ line_no=line_no,
1133
+ datatype=DataType.UNKNOWN,
1134
+ purpose=Purpose.UNKNOWN,
1135
+ )
1136
+ self.undefined[key] = undefined
1137
+ return undefined
1138
+ self.raise_undefined(key, line_no, file)
1139
+
1140
+ def _find_similar_concepts(self, concept_name: str):
1141
+ def strip_local(input: str):
1142
+ if input.startswith(f"{DEFAULT_NAMESPACE}."):
1143
+ return input[len(DEFAULT_NAMESPACE) + 1 :]
1144
+ return input
1145
+
1146
+ matches = difflib.get_close_matches(
1147
+ strip_local(concept_name), [strip_local(x) for x in self.keys()]
1148
+ )
1149
+ return matches
1150
+
1151
+ def items(self) -> ItemsView[str, Concept]: # type: ignore
1152
+ return super().items()
1153
+
1154
+
1019
1155
  class RawColumnExpr(BaseModel):
1020
1156
  text: str
1021
1157
 
@@ -1135,6 +1271,7 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
1135
1271
  "CaseElse",
1136
1272
  list,
1137
1273
  ListWrapper[Any],
1274
+ WindowItem,
1138
1275
  ]
1139
1276
  ]
1140
1277
 
@@ -1149,41 +1286,13 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
1149
1286
  return self.output_datatype
1150
1287
 
1151
1288
  def with_select_context(
1152
- self,
1153
- grain: Grain,
1154
- conditional: Conditional | Comparison | Parenthetical | None,
1155
- environment: Environment | None = None,
1289
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1156
1290
  ) -> Function:
1157
- if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value and conditional:
1158
- base = [
1159
- (
1160
- c.with_select_context(grain, conditional, environment)
1161
- if isinstance(
1162
- c,
1163
- SelectContext,
1164
- )
1165
- else c
1166
- )
1167
- for c in self.arguments
1168
- ]
1169
- final = [
1170
- c.with_filter(conditional, environment) if isinstance(c, Concept) else c
1171
- for c in base
1172
- ]
1173
- return Function(
1174
- operator=self.operator,
1175
- arguments=final,
1176
- output_datatype=self.output_datatype,
1177
- output_purpose=self.output_purpose,
1178
- valid_inputs=self.valid_inputs,
1179
- arg_count=self.arg_count,
1180
- )
1181
-
1182
- return Function(
1291
+ base = Function(
1183
1292
  operator=self.operator,
1184
1293
  arguments=[
1185
1294
  (
1186
- c.with_select_context(grain, conditional, environment)
1295
+ c.with_select_context(local_concepts, grain, environment)
1187
1296
  if isinstance(
1188
1297
  c,
1189
1298
  SelectContext,
@@ -1197,6 +1306,7 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
1197
1306
  valid_inputs=self.valid_inputs,
1198
1307
  arg_count=self.arg_count,
1199
1308
  )
1309
+ return base
1200
1310
 
1201
1311
  @field_validator("arguments")
1202
1312
  @classmethod
@@ -1404,20 +1514,19 @@ class WindowItem(Mergeable, Namespaced, SelectContext, BaseModel):
1404
1514
  )
1405
1515
 
1406
1516
  def with_select_context(
1407
- self,
1408
- grain: Grain,
1409
- conditional: Conditional | Comparison | Parenthetical | None,
1410
- environment: Environment | None = None,
1517
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1411
1518
  ) -> "WindowItem":
1412
1519
  return WindowItem(
1413
1520
  type=self.type,
1414
- content=self.content.with_select_context(grain, conditional, environment),
1521
+ content=self.content.with_select_context(
1522
+ local_concepts, grain, environment
1523
+ ),
1415
1524
  over=[
1416
- x.with_select_context(grain, conditional, environment)
1525
+ x.with_select_context(local_concepts, grain, environment)
1417
1526
  for x in self.over
1418
1527
  ],
1419
1528
  order_by=[
1420
- x.with_select_context(grain, conditional, environment)
1529
+ x.with_select_context(local_concepts, grain, environment)
1421
1530
  for x in self.order_by
1422
1531
  ],
1423
1532
  index=self.index,
@@ -1489,14 +1598,13 @@ class FilterItem(Namespaced, SelectContext, BaseModel):
1489
1598
  )
1490
1599
 
1491
1600
  def with_select_context(
1492
- self,
1493
- grain: Grain,
1494
- conditional: Conditional | Comparison | Parenthetical | None,
1495
- environment: Environment | None = None,
1601
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1496
1602
  ) -> FilterItem:
1497
1603
  return FilterItem(
1498
- content=self.content.with_select_context(grain, conditional, environment),
1499
- where=self.where.with_select_context(grain, conditional, environment),
1604
+ content=self.content.with_select_context(
1605
+ local_concepts, grain, environment
1606
+ ),
1607
+ where=self.where.with_select_context(local_concepts, grain, environment),
1500
1608
  )
1501
1609
 
1502
1610
  @property
@@ -1576,14 +1684,11 @@ class OrderItem(Mergeable, SelectContext, Namespaced, BaseModel):
1576
1684
  return OrderItem(expr=self.expr.with_namespace(namespace), order=self.order)
1577
1685
 
1578
1686
  def with_select_context(
1579
- self,
1580
- grain: Grain,
1581
- conditional: Conditional | Comparison | Parenthetical | None,
1582
- environment: Environment | None = None,
1687
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1583
1688
  ) -> "OrderItem":
1584
1689
  return OrderItem(
1585
1690
  expr=self.expr.with_select_context(
1586
- grain, conditional=conditional, environment=environment
1691
+ local_concepts, grain, environment=environment
1587
1692
  ),
1588
1693
  order=self.order,
1589
1694
  )
@@ -1604,7 +1709,7 @@ class OrderItem(Mergeable, SelectContext, Namespaced, BaseModel):
1604
1709
  return self.expr.output
1605
1710
 
1606
1711
 
1607
- class OrderBy(Mergeable, Namespaced, BaseModel):
1712
+ class OrderBy(SelectContext, Mergeable, Namespaced, BaseModel):
1608
1713
  items: List[OrderItem]
1609
1714
 
1610
1715
  def with_namespace(self, namespace: str) -> "OrderBy":
@@ -1617,6 +1722,14 @@ class OrderBy(Mergeable, Namespaced, BaseModel):
1617
1722
  items=[x.with_merge(source, target, modifiers) for x in self.items]
1618
1723
  )
1619
1724
 
1725
+ def with_select_context(self, local_concepts, grain, environment):
1726
+ return OrderBy(
1727
+ items=[
1728
+ x.with_select_context(local_concepts, grain, environment)
1729
+ for x in self.items
1730
+ ]
1731
+ )
1732
+
1620
1733
  @property
1621
1734
  def concept_arguments(self):
1622
1735
  return [x.expr for x in self.items]
@@ -1631,20 +1744,21 @@ class SelectStatement(HasUUID, Mergeable, Namespaced, SelectTypeMixin, BaseModel
1631
1744
  selection: List[SelectItem]
1632
1745
  order_by: Optional[OrderBy] = None
1633
1746
  limit: Optional[int] = None
1634
- meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1747
+ meta: Metadata = Field(default_factory=lambda: Metadata())
1748
+ local_concepts: Annotated[
1749
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
1750
+ ] = Field(default_factory=EnvironmentConceptDict)
1635
1751
 
1636
- def refresh_bindings(self, environment: Environment):
1637
- for item in self.selection:
1638
- if isinstance(item.content, Concept):
1639
- item.content = environment.concepts[item.content.address].with_grain(
1640
- self.grain
1641
- )
1642
-
1643
- def validate_syntax(self):
1752
+ def validate_syntax(self, environment: Environment):
1753
+ if self.where_clause:
1754
+ for x in self.where_clause.concept_arguments:
1755
+ if isinstance(x, UndefinedConcept):
1756
+ environment.concepts.raise_undefined(
1757
+ x.address, x.metadata.line_number
1758
+ )
1644
1759
  all_in_output = [x.address for x in self.output_components]
1645
1760
  if self.where_clause:
1646
1761
  for concept in self.where_clause.concept_arguments:
1647
-
1648
1762
  if (
1649
1763
  concept.lineage
1650
1764
  and isinstance(concept.lineage, Function)
@@ -1667,6 +1781,7 @@ class SelectStatement(HasUUID, Mergeable, Namespaced, SelectTypeMixin, BaseModel
1667
1781
  f"Cannot reference an aggregate derived in the select ({concept.address}) in the same statement where clause; move to the HAVING clause instead; Line: {self.meta.line_number}"
1668
1782
  )
1669
1783
  if self.having_clause:
1784
+ self.having_clause.hydrate_missing(self.local_concepts)
1670
1785
  for concept in self.having_clause.concept_arguments:
1671
1786
  if concept.address not in [x.address for x in self.output_components]:
1672
1787
  raise SyntaxError(
@@ -1884,9 +1999,6 @@ class CopyStatement(BaseModel):
1884
1999
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1885
2000
  select: SelectStatement
1886
2001
 
1887
- def refresh_bindings(self, environment: Environment):
1888
- self.select.refresh_bindings(environment)
1889
-
1890
2002
 
1891
2003
  class AlignItem(Namespaced, BaseModel):
1892
2004
  alias: str
@@ -1940,24 +2052,21 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, Base
1940
2052
  order_by: Optional[OrderBy] = None
1941
2053
  limit: Optional[int] = None
1942
2054
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1943
-
1944
- def refresh_bindings(self, environment: Environment):
1945
- for select in self.selects:
1946
- select.refresh_bindings(environment)
2055
+ local_concepts: Annotated[
2056
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
2057
+ ] = Field(default_factory=EnvironmentConceptDict)
1947
2058
 
1948
2059
  def __repr__(self):
1949
2060
  return "MultiSelect<" + " MERGE ".join([str(s) for s in self.selects]) + ">"
1950
2061
 
1951
- @computed_field # type: ignore
1952
- @cached_property
2062
+ @property
1953
2063
  def arguments(self) -> List[Concept]:
1954
2064
  output = []
1955
2065
  for select in self.selects:
1956
2066
  output += select.input_components
1957
2067
  return unique(output, "address")
1958
2068
 
1959
- @computed_field # type: ignore
1960
- @cached_property
2069
+ @property
1961
2070
  def concept_arguments(self) -> List[Concept]:
1962
2071
  output = []
1963
2072
  for select in self.selects:
@@ -2007,6 +2116,9 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, Base
2007
2116
  if self.where_clause
2008
2117
  else None
2009
2118
  ),
2119
+ local_concepts=EnvironmentConceptDict(
2120
+ {k: v.with_namespace(namespace) for k, v in self.local_concepts.items()}
2121
+ ),
2010
2122
  )
2011
2123
 
2012
2124
  @property
@@ -2024,7 +2136,7 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, Base
2024
2136
  output.append(item.gen_concept(self))
2025
2137
  return output
2026
2138
 
2027
- def find_source(self, concept: Concept, cte: CTE) -> Concept:
2139
+ def find_source(self, concept: Concept, cte: CTE | UnionCTE) -> Concept:
2028
2140
  for x in self.align.items:
2029
2141
  if concept.name == x.alias:
2030
2142
  for c in x.concepts:
@@ -2125,6 +2237,9 @@ class Datasource(HasUUID, Namespaced, BaseModel):
2125
2237
  where: Optional[WhereClause] = None
2126
2238
  non_partial_for: Optional[WhereClause] = None
2127
2239
 
2240
+ def duplicate(self) -> Datasource:
2241
+ return self.model_copy(deep=True)
2242
+
2128
2243
  def merge_concept(
2129
2244
  self, source: Concept, target: Concept, modifiers: List[Modifier]
2130
2245
  ):
@@ -2194,7 +2309,7 @@ class Datasource(HasUUID, Namespaced, BaseModel):
2194
2309
  v = Address(location=v)
2195
2310
  return v
2196
2311
 
2197
- @field_validator("grain", mode="plain")
2312
+ @field_validator("grain", mode="before")
2198
2313
  @classmethod
2199
2314
  def grain_enforcement(cls, v: Grain, info: ValidationInfo):
2200
2315
  values = info.data
@@ -2701,7 +2816,7 @@ class CTE(BaseModel):
2701
2816
  base: bool = False
2702
2817
  group_to_grain: bool = False
2703
2818
  existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
2704
- parent_ctes: List["CTE"] = Field(default_factory=list)
2819
+ parent_ctes: List[Union["CTE", "UnionCTE"]] = Field(default_factory=list)
2705
2820
  joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
2706
2821
  condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
2707
2822
  partial_concepts: List[Concept] = Field(default_factory=list)
@@ -2745,6 +2860,7 @@ class CTE(BaseModel):
2745
2860
 
2746
2861
  if self.condition:
2747
2862
  self.condition = self.condition.inline_constant(concept)
2863
+
2748
2864
  # if we've entirely removed the need to join to someplace to get the concept
2749
2865
  # drop the join as well.
2750
2866
  for removed_cte in removed:
@@ -2876,7 +2992,9 @@ class CTE(BaseModel):
2876
2992
  self.group_to_grain = True
2877
2993
  return True
2878
2994
 
2879
- def __add__(self, other: "CTE"):
2995
+ def __add__(self, other: "CTE" | UnionCTE):
2996
+ if isinstance(other, UnionCTE):
2997
+ raise ValueError("cannot merge CTE and union CTE")
2880
2998
  logger.info('Merging two copies of CTE "%s"', self.name)
2881
2999
  if not self.grain == other.grain:
2882
3000
  error = (
@@ -3052,8 +3170,72 @@ class CTE(BaseModel):
3052
3170
  return [c for c in self.output_columns if c.address in self.source_map]
3053
3171
 
3054
3172
 
3055
- def merge_ctes(ctes: List[CTE]) -> List[CTE]:
3056
- final_ctes_dict: Dict[str, CTE] = {}
3173
+ class UnionCTE(BaseModel):
3174
+ name: str
3175
+ source: QueryDatasource
3176
+ parent_ctes: list[CTE | UnionCTE]
3177
+ internal_ctes: list[CTE | UnionCTE]
3178
+ output_columns: List[Concept]
3179
+ grain: Grain
3180
+ operator: str = "UNION ALL"
3181
+ order_by: Optional[OrderBy] = None
3182
+ limit: Optional[int] = None
3183
+ hidden_concepts: list[Concept] = Field(default_factory=list)
3184
+ partial_concepts: list[Concept] = Field(default_factory=list)
3185
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
3186
+
3187
+ @computed_field # type: ignore
3188
+ @property
3189
+ def output_lcl(self) -> LooseConceptList:
3190
+ return LooseConceptList(concepts=self.output_columns)
3191
+
3192
+ def get_alias(self, concept: Concept, source: str | None = None) -> str:
3193
+ for cte in self.parent_ctes:
3194
+ if concept.address in cte.output_columns:
3195
+ if source and source != cte.name:
3196
+ continue
3197
+ return concept.safe_address
3198
+ return "INVALID_ALIAS"
3199
+
3200
+ def get_concept(self, address: str) -> Concept | None:
3201
+ for cte in self.internal_ctes:
3202
+ if address in cte.output_columns:
3203
+ match = [x for x in cte.output_columns if x.address == address].pop()
3204
+ return match
3205
+
3206
+ match_list = [x for x in self.output_columns if x.address == address]
3207
+ if match_list:
3208
+ return match_list.pop()
3209
+ return None
3210
+
3211
+ @property
3212
+ def source_map(self):
3213
+ return {x.address: [] for x in self.output_columns}
3214
+
3215
+ @property
3216
+ def condition(self):
3217
+ return None
3218
+
3219
+ @condition.setter
3220
+ def condition(self, value):
3221
+ raise NotImplementedError
3222
+
3223
+ @property
3224
+ def safe_identifier(self):
3225
+ return self.name
3226
+
3227
+ @property
3228
+ def group_to_grain(self) -> bool:
3229
+ return False
3230
+
3231
+ def __add__(self, other):
3232
+ if not isinstance(other, UnionCTE) or not other.name == self.name:
3233
+ raise SyntaxError("Cannot merge union CTEs")
3234
+ return self
3235
+
3236
+
3237
+ def merge_ctes(ctes: List[CTE | UnionCTE]) -> List[CTE | UnionCTE]:
3238
+ final_ctes_dict: Dict[str, CTE | UnionCTE] = {}
3057
3239
  # merge CTEs
3058
3240
  for cte in ctes:
3059
3241
  if cte.name not in final_ctes_dict:
@@ -3078,7 +3260,6 @@ class JoinKey(BaseModel):
3078
3260
 
3079
3261
 
3080
3262
  class Join(BaseModel):
3081
-
3082
3263
  right_cte: CTE
3083
3264
  jointype: JoinType
3084
3265
  left_cte: CTE | None = None
@@ -3127,132 +3308,24 @@ class Join(BaseModel):
3127
3308
  class UndefinedConcept(Concept, Mergeable, Namespaced):
3128
3309
  model_config = ConfigDict(arbitrary_types_allowed=True)
3129
3310
  name: str
3130
- environment: "EnvironmentConceptDict"
3131
3311
  line_no: int | None = None
3132
- datatype: DataType = DataType.UNKNOWN
3133
- purpose: Purpose = Purpose.KEY
3134
-
3135
- def with_merge(
3136
- self, source: Concept, target: Concept, modifiers: List[Modifier]
3137
- ) -> "UndefinedConcept" | Concept:
3138
- if self.address == source.address:
3139
- new = target.with_grain(self.grain.with_merge(source, target, modifiers))
3140
- new.pseudonyms.add(self.address)
3141
- return new
3142
- return self.__class__(
3143
- name=self.name,
3144
- datatype=self.datatype,
3145
- purpose=self.purpose,
3146
- metadata=self.metadata,
3147
- lineage=(
3148
- self.lineage.with_merge(source, target, modifiers)
3149
- if self.lineage
3150
- else None
3151
- ),
3152
- grain=self.grain.with_merge(source, target, modifiers),
3153
- namespace=self.namespace,
3154
- keys=(
3155
- tuple(x.with_merge(source, target, modifiers) for x in self.keys)
3156
- if self.keys
3157
- else None
3158
- ),
3159
- environment=self.environment,
3160
- line_no=self.line_no,
3161
- )
3162
-
3163
- def with_namespace(self, namespace: str) -> "UndefinedConcept":
3164
- return self.__class__(
3165
- name=self.name,
3166
- datatype=self.datatype,
3167
- purpose=self.purpose,
3168
- metadata=self.metadata,
3169
- lineage=self.lineage.with_namespace(namespace) if self.lineage else None,
3170
- grain=(
3171
- self.grain.with_namespace(namespace)
3172
- if self.grain
3173
- else Grain(components=[])
3174
- ),
3175
- namespace=namespace,
3176
- keys=self.keys,
3177
- environment=self.environment,
3178
- line_no=self.line_no,
3179
- )
3312
+ datatype: DataType | ListType | StructType | MapType | NumericType = (
3313
+ DataType.UNKNOWN
3314
+ )
3315
+ purpose: Purpose = Purpose.UNKNOWN
3180
3316
 
3181
3317
  def with_select_context(
3182
3318
  self,
3183
- grain: Optional["Grain"] = None,
3184
- conditional: Conditional | Comparison | Parenthetical | None = None,
3185
- environment: Environment | None = None,
3186
- ) -> "UndefinedConcept":
3187
- if not all([isinstance(x, Concept) for x in self.keys or []]):
3188
- raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
3189
- new_grain = grain or Grain(components=[])
3190
- if self.lineage:
3191
- new_lineage = self.lineage
3192
- if isinstance(self.lineage, SelectContext):
3193
- new_lineage = self.lineage.with_select_context(
3194
- new_grain, conditional, environment
3195
- )
3196
- else:
3197
- new_lineage = None
3198
- return self.__class__(
3199
- name=self.name,
3200
- datatype=self.datatype,
3201
- purpose=self.purpose,
3202
- metadata=self.metadata,
3203
- lineage=new_lineage,
3204
- grain=new_grain,
3205
- namespace=self.namespace,
3206
- keys=self.keys,
3207
- environment=self.environment,
3208
- )
3319
+ local_concepts: dict[str, Concept],
3320
+ grain: Grain,
3321
+ environment: Environment,
3322
+ ) -> "Concept":
3323
+ if self.address in local_concepts:
3324
+ rval = local_concepts[self.address]
3325
+ rval = rval.with_select_context(local_concepts, grain, environment)
3209
3326
 
3210
- def with_grain(self, grain: Optional["Grain"] = None) -> "UndefinedConcept":
3211
- return self.__class__(
3212
- name=self.name,
3213
- datatype=self.datatype,
3214
- purpose=self.purpose,
3215
- metadata=self.metadata,
3216
- lineage=self.lineage,
3217
- grain=grain or Grain(components=[]),
3218
- namespace=self.namespace,
3219
- keys=self.keys,
3220
- environment=self.environment,
3221
- line_no=self.line_no,
3222
- )
3223
-
3224
- def with_default_grain(self) -> "UndefinedConcept":
3225
- if self.purpose == Purpose.KEY:
3226
- # we need to make this abstract
3227
- grain = Grain(components=[self.with_grain(Grain())], nested=True)
3228
- elif self.purpose == Purpose.PROPERTY:
3229
- components: List[Concept] = []
3230
- if self.keys:
3231
- components = [*self.keys]
3232
- if self.lineage:
3233
- for item in self.lineage.arguments:
3234
- if isinstance(item, Concept):
3235
- if item.keys and not all(c in components for c in item.keys):
3236
- components += item.sources
3237
- else:
3238
- components += item.sources
3239
- grain = Grain(components=components)
3240
- elif self.purpose == Purpose.METRIC:
3241
- grain = Grain()
3242
- else:
3243
- grain = self.grain # type: ignore
3244
- return self.__class__(
3245
- name=self.name,
3246
- datatype=self.datatype,
3247
- purpose=self.purpose,
3248
- metadata=self.metadata,
3249
- lineage=self.lineage,
3250
- grain=grain,
3251
- keys=self.keys,
3252
- namespace=self.namespace,
3253
- environment=self.environment,
3254
- line_no=self.line_no,
3255
- )
3327
+ return rval
3328
+ environment.concepts.raise_undefined(self.address, line_no=self.line_no)
3256
3329
 
3257
3330
 
3258
3331
  class EnvironmentDatasourceDict(dict):
@@ -3275,78 +3348,10 @@ class EnvironmentDatasourceDict(dict):
3275
3348
  def items(self) -> ItemsView[str, Datasource]: # type: ignore
3276
3349
  return super().items()
3277
3350
 
3278
-
3279
- class EnvironmentConceptDict(dict):
3280
- def __init__(self, *args, **kwargs) -> None:
3281
- super().__init__(self, *args, **kwargs)
3282
- self.undefined: dict[str, UndefinedConcept] = {}
3283
- self.fail_on_missing: bool = True
3284
- self.populate_default_concepts()
3285
-
3286
- def populate_default_concepts(self):
3287
- from trilogy.core.internal import DEFAULT_CONCEPTS
3288
-
3289
- for concept in DEFAULT_CONCEPTS.values():
3290
- self[concept.address] = concept
3291
-
3292
- def values(self) -> ValuesView[Concept]: # type: ignore
3293
- return super().values()
3294
-
3295
- def get(self, key: str, default: Concept | None = None) -> Concept | None: # type: ignore
3296
- try:
3297
- return self.__getitem__(key)
3298
- except UndefinedConceptException:
3299
- return default
3300
-
3301
- def __getitem__(
3302
- self, key, line_no: int | None = None, file: Path | None = None
3303
- ) -> Concept | UndefinedConcept:
3304
- try:
3305
- return super(EnvironmentConceptDict, self).__getitem__(key)
3306
-
3307
- except KeyError:
3308
- if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
3309
- return self.__getitem__(key.split(".", 1)[1], line_no)
3310
- if DEFAULT_NAMESPACE + "." + key in self:
3311
- return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
3312
- if not self.fail_on_missing:
3313
- if key in self.undefined:
3314
- return self.undefined[key]
3315
- undefined = UndefinedConcept(
3316
- name=key,
3317
- line_no=line_no,
3318
- environment=self,
3319
- datatype=DataType.UNKNOWN,
3320
- purpose=Purpose.KEY,
3321
- )
3322
- self.undefined[key] = undefined
3323
- return undefined
3324
- matches = self._find_similar_concepts(key)
3325
- message = f"Undefined concept: {key}."
3326
- if matches:
3327
- message += f" Suggestions: {matches}"
3328
-
3329
- if line_no:
3330
- if file:
3331
- raise UndefinedConceptException(
3332
- f"{file}: {line_no}: " + message, matches
3333
- )
3334
- raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
3335
- raise UndefinedConceptException(message, matches)
3336
-
3337
- def _find_similar_concepts(self, concept_name: str):
3338
- def strip_local(input: str):
3339
- if input.startswith(f"{DEFAULT_NAMESPACE}."):
3340
- return input[len(DEFAULT_NAMESPACE) + 1 :]
3341
- return input
3342
-
3343
- matches = difflib.get_close_matches(
3344
- strip_local(concept_name), [strip_local(x) for x in self.keys()]
3345
- )
3346
- return matches
3347
-
3348
- def items(self) -> ItemsView[str, Concept]: # type: ignore
3349
- return super().items()
3351
+ def duplicate(self) -> "EnvironmentDatasourceDict":
3352
+ new = EnvironmentDatasourceDict()
3353
+ new.update({k: v.duplicate() for k, v in self.items()})
3354
+ return new
3350
3355
 
3351
3356
 
3352
3357
  class ImportStatement(HasUUID, BaseModel):
@@ -3403,10 +3408,31 @@ class Environment(BaseModel):
3403
3408
  materialized_concepts: set[str] = Field(default_factory=set)
3404
3409
  alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
3405
3410
  # TODO: support freezing environments to avoid mutation
3406
- # frozen: bool = False
3411
+ frozen: bool = False
3412
+
3413
+ def freeze(self):
3414
+ self.frozen = True
3415
+
3416
+ def thaw(self):
3417
+ self.frozen = False
3407
3418
 
3408
3419
  def duplicate(self):
3409
- return self.model_copy(deep=True)
3420
+ return Environment.model_construct(
3421
+ datasources=self.datasources.duplicate(),
3422
+ concepts=self.concepts.duplicate(),
3423
+ functions=dict(self.functions),
3424
+ data_types=dict(self.data_types),
3425
+ imports=dict(self.imports),
3426
+ namespace=self.namespace,
3427
+ working_path=self.working_path,
3428
+ environment_config=self.environment_config,
3429
+ version=self.version,
3430
+ cte_name_map=dict(self.cte_name_map),
3431
+ materialized_concepts=set(self.materialized_concepts),
3432
+ alias_origin_lookup={
3433
+ k: v.duplicate() for k, v in self.alias_origin_lookup.items()
3434
+ },
3435
+ )
3410
3436
 
3411
3437
  def __init__(self, **data):
3412
3438
  super().__init__(**data)
@@ -3538,6 +3564,8 @@ class Environment(BaseModel):
3538
3564
  def add_import(
3539
3565
  self, alias: str, source: Environment, imp_stm: ImportStatement | None = None
3540
3566
  ):
3567
+ if self.frozen:
3568
+ raise ValueError("Environment is frozen, cannot add imports")
3541
3569
  exists = False
3542
3570
  existing = self.imports[alias]
3543
3571
  if imp_stm:
@@ -3558,6 +3586,9 @@ class Environment(BaseModel):
3558
3586
  # we can't exit early
3559
3587
  # as there may be new concepts
3560
3588
  for k, concept in source.concepts.items():
3589
+ # skip internal namespace
3590
+ if INTERNAL_NAMESPACE in concept.address:
3591
+ continue
3561
3592
  if same_namespace:
3562
3593
  new = self.add_concept(concept, _ignore_cache=True)
3563
3594
  else:
@@ -3590,9 +3621,11 @@ class Environment(BaseModel):
3590
3621
  def add_file_import(
3591
3622
  self, path: str | Path, alias: str, env: Environment | None = None
3592
3623
  ):
3624
+ if self.frozen:
3625
+ raise ValueError("Environment is frozen, cannot add imports")
3593
3626
  from trilogy.parsing.parse_engine import (
3594
- ParseToObjects,
3595
3627
  PARSER,
3628
+ ParseToObjects,
3596
3629
  gen_cache_lookup,
3597
3630
  )
3598
3631
 
@@ -3624,6 +3657,7 @@ class Environment(BaseModel):
3624
3657
  )
3625
3658
  nparser.set_text(text)
3626
3659
  nparser.transform(PARSER.parse(text))
3660
+ nparser.hydrate_missing()
3627
3661
 
3628
3662
  except Exception as e:
3629
3663
  raise ImportError(
@@ -3674,6 +3708,8 @@ class Environment(BaseModel):
3674
3708
  add_derived: bool = True,
3675
3709
  _ignore_cache: bool = False,
3676
3710
  ):
3711
+ if self.frozen:
3712
+ raise ValueError("Environment is frozen, cannot add concepts")
3677
3713
  if not force:
3678
3714
  existing = self.validate_concept(concept, meta=meta)
3679
3715
  if existing:
@@ -3692,6 +3728,8 @@ class Environment(BaseModel):
3692
3728
  meta: Meta | None = None,
3693
3729
  _ignore_cache: bool = False,
3694
3730
  ):
3731
+ if self.frozen:
3732
+ raise ValueError("Environment is frozen, cannot add datasource")
3695
3733
  self.datasources[datasource.identifier] = datasource
3696
3734
 
3697
3735
  eligible_to_promote_roots = datasource.non_partial_for is None
@@ -3743,6 +3781,8 @@ class Environment(BaseModel):
3743
3781
  address: str,
3744
3782
  meta: Meta | None = None,
3745
3783
  ) -> bool:
3784
+ if self.frozen:
3785
+ raise ValueError("Environment is frozen, cannot delete datsources")
3746
3786
  if address in self.datasources:
3747
3787
  del self.datasources[address]
3748
3788
  self.gen_concept_list_caches()
@@ -3750,17 +3790,22 @@ class Environment(BaseModel):
3750
3790
  return False
3751
3791
 
3752
3792
  def merge_concept(
3753
- self, source: Concept, target: Concept, modifiers: List[Modifier]
3754
- ):
3793
+ self,
3794
+ source: Concept,
3795
+ target: Concept,
3796
+ modifiers: List[Modifier],
3797
+ force: bool = False,
3798
+ ) -> bool:
3799
+ if self.frozen:
3800
+ raise ValueError("Environment is frozen, cannot merge concepts")
3755
3801
  replacements = {}
3756
3802
 
3757
3803
  # exit early if we've run this
3758
- if source.address in self.alias_origin_lookup:
3804
+ if source.address in self.alias_origin_lookup and not force:
3759
3805
  if self.concepts[source.address] == target:
3760
- return
3806
+ return False
3761
3807
  self.alias_origin_lookup[source.address] = source
3762
3808
  for k, v in self.concepts.items():
3763
-
3764
3809
  if v.address == target.address:
3765
3810
  v.pseudonyms.add(source.address)
3766
3811
  if v.address == source.address:
@@ -3774,6 +3819,7 @@ class Environment(BaseModel):
3774
3819
  for k, ds in self.datasources.items():
3775
3820
  if source.address in ds.output_lcl:
3776
3821
  ds.merge_concept(source, target, modifiers=modifiers)
3822
+ return True
3777
3823
 
3778
3824
 
3779
3825
  class LazyEnvironment(Environment):
@@ -3847,6 +3893,17 @@ class Comparison(
3847
3893
  ]
3848
3894
  operator: ComparisonOperator
3849
3895
 
3896
+ def hydrate_missing(self, concepts: EnvironmentConceptDict):
3897
+ if isinstance(self.left, UndefinedConcept) and self.left.address in concepts:
3898
+ self.left = concepts[self.left.address]
3899
+ if isinstance(self.right, UndefinedConcept) and self.right.address in concepts:
3900
+ self.right = concepts[self.right.address]
3901
+ if isinstance(self.left, Mergeable):
3902
+ self.left.hydrate_missing(concepts)
3903
+ if isinstance(self.right, Mergeable):
3904
+ self.right.hydrate_missing(concepts)
3905
+ return self
3906
+
3850
3907
  def __init__(self, *args, **kwargs) -> None:
3851
3908
  super().__init__(*args, **kwargs)
3852
3909
  if self.operator in (ComparisonOperator.IS, ComparisonOperator.IS_NOT):
@@ -3961,20 +4018,17 @@ class Comparison(
3961
4018
  )
3962
4019
 
3963
4020
  def with_select_context(
3964
- self,
3965
- grain: Grain,
3966
- conditional: Conditional | Comparison | Parenthetical | None,
3967
- environment: Environment | None = None,
4021
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
3968
4022
  ):
3969
4023
  return self.__class__(
3970
4024
  left=(
3971
- self.left.with_select_context(grain, conditional, environment)
4025
+ self.left.with_select_context(local_concepts, grain, environment)
3972
4026
  if isinstance(self.left, SelectContext)
3973
4027
  else self.left
3974
4028
  ),
3975
4029
  # the right side does NOT need to inherit select grain
3976
4030
  right=(
3977
- self.right.with_select_context(grain, conditional, environment)
4031
+ self.right.with_select_context(local_concepts, grain, environment)
3978
4032
  if isinstance(self.right, SelectContext)
3979
4033
  else self.right
3980
4034
  ),
@@ -4040,7 +4094,6 @@ class Comparison(
4040
4094
 
4041
4095
 
4042
4096
  class SubselectComparison(Comparison):
4043
-
4044
4097
  def __eq__(self, other):
4045
4098
  if not isinstance(other, SubselectComparison):
4046
4099
  return False
@@ -4062,14 +4115,14 @@ class SubselectComparison(Comparison):
4062
4115
 
4063
4116
  def with_select_context(
4064
4117
  self,
4118
+ local_concepts: dict[str, Concept],
4065
4119
  grain: Grain,
4066
- conditional: Conditional | Comparison | Parenthetical | None,
4067
- environment: Environment | None = None,
4120
+ environment: Environment,
4068
4121
  ):
4069
4122
  # there's no need to pass the select grain through to a subselect comparison on the right
4070
4123
  return self.__class__(
4071
4124
  left=(
4072
- self.left.with_select_context(grain, conditional, environment)
4125
+ self.left.with_select_context(local_concepts, grain, environment)
4073
4126
  if isinstance(self.left, SelectContext)
4074
4127
  else self.left
4075
4128
  ),
@@ -4106,17 +4159,14 @@ class CaseWhen(Namespaced, SelectContext, BaseModel):
4106
4159
  )
4107
4160
 
4108
4161
  def with_select_context(
4109
- self,
4110
- grain: Grain,
4111
- conditional: Conditional | Comparison | Parenthetical | None,
4112
- environment: Environment | None = None,
4162
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4113
4163
  ) -> CaseWhen:
4114
4164
  return CaseWhen(
4115
4165
  comparison=self.comparison.with_select_context(
4116
- grain, conditional, environment
4166
+ local_concepts, grain, environment
4117
4167
  ),
4118
4168
  expr=(
4119
- (self.expr.with_select_context(grain, conditional, environment))
4169
+ (self.expr.with_select_context(local_concepts, grain, environment))
4120
4170
  if isinstance(self.expr, SelectContext)
4121
4171
  else self.expr
4122
4172
  ),
@@ -4134,14 +4184,14 @@ class CaseElse(Namespaced, SelectContext, BaseModel):
4134
4184
 
4135
4185
  def with_select_context(
4136
4186
  self,
4187
+ local_concepts: dict[str, Concept],
4137
4188
  grain: Grain,
4138
- conditional: Conditional | Comparison | Parenthetical | None,
4139
- environment: Environment | None = None,
4140
- ) -> CaseElse:
4189
+ environment: Environment,
4190
+ ):
4141
4191
  return CaseElse(
4142
4192
  discriminant=self.discriminant,
4143
4193
  expr=(
4144
- self.expr.with_select_context(grain, conditional, environment)
4194
+ self.expr.with_select_context(local_concepts, grain, environment)
4145
4195
  if isinstance(
4146
4196
  self.expr,
4147
4197
  SelectContext,
@@ -4213,7 +4263,6 @@ class Conditional(
4213
4263
  return f"{str(self.left)} {self.operator.value} {str(self.right)}"
4214
4264
 
4215
4265
  def __eq__(self, other):
4216
-
4217
4266
  if not isinstance(other, Conditional):
4218
4267
  return False
4219
4268
  return (
@@ -4281,19 +4330,16 @@ class Conditional(
4281
4330
  )
4282
4331
 
4283
4332
  def with_select_context(
4284
- self,
4285
- grain: Grain,
4286
- conditional: Conditional | Comparison | Parenthetical | None,
4287
- environment: Environment | None = None,
4333
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4288
4334
  ):
4289
4335
  return Conditional(
4290
4336
  left=(
4291
- self.left.with_select_context(grain, conditional, environment)
4337
+ self.left.with_select_context(local_concepts, grain, environment)
4292
4338
  if isinstance(self.left, SelectContext)
4293
4339
  else self.left
4294
4340
  ),
4295
4341
  right=(
4296
- self.right.with_select_context(grain, conditional, environment)
4342
+ self.right.with_select_context(local_concepts, grain, environment)
4297
4343
  if isinstance(self.right, SelectContext)
4298
4344
  else self.right
4299
4345
  ),
@@ -4402,16 +4448,16 @@ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
4402
4448
  )
4403
4449
 
4404
4450
  def with_select_context(
4405
- self,
4406
- grain: Grain,
4407
- conditional: Conditional | Comparison | Parenthetical | None,
4408
- environment: Environment | None = None,
4451
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4409
4452
  ) -> AggregateWrapper:
4410
4453
  if not self.by:
4411
4454
  by = grain.components_copy
4412
4455
  else:
4413
- by = self.by
4414
- parent = self.function.with_select_context(grain, conditional, environment)
4456
+ by = [
4457
+ x.with_select_context(local_concepts, grain, environment)
4458
+ for x in self.by
4459
+ ]
4460
+ parent = self.function.with_select_context(local_concepts, grain, environment)
4415
4461
  return AggregateWrapper(function=parent, by=by)
4416
4462
 
4417
4463
 
@@ -4446,14 +4492,11 @@ class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
4446
4492
  return WhereClause(conditional=self.conditional.with_namespace(namespace))
4447
4493
 
4448
4494
  def with_select_context(
4449
- self,
4450
- grain: Grain,
4451
- conditional: Conditional | Comparison | Parenthetical | None,
4452
- environment: Environment | None = None,
4495
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4453
4496
  ) -> WhereClause:
4454
- return WhereClause(
4497
+ return self.__class__(
4455
4498
  conditional=self.conditional.with_select_context(
4456
- grain, conditional, environment
4499
+ local_concepts, grain, environment
4457
4500
  )
4458
4501
  )
4459
4502
 
@@ -4483,6 +4526,18 @@ class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
4483
4526
  class HavingClause(WhereClause):
4484
4527
  pass
4485
4528
 
4529
+ def hydrate_missing(self, concepts: EnvironmentConceptDict):
4530
+ self.conditional.hydrate_missing(concepts)
4531
+
4532
+ def with_select_context(
4533
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4534
+ ) -> HavingClause:
4535
+ return HavingClause(
4536
+ conditional=self.conditional.with_select_context(
4537
+ local_concepts, grain, environment
4538
+ )
4539
+ )
4540
+
4486
4541
 
4487
4542
  class MaterializedDataset(BaseModel):
4488
4543
  address: Address
@@ -4495,8 +4550,8 @@ class MaterializedDataset(BaseModel):
4495
4550
 
4496
4551
  class ProcessedQuery(BaseModel):
4497
4552
  output_columns: List[Concept]
4498
- ctes: List[CTE]
4499
- base: CTE
4553
+ ctes: List[CTE | UnionCTE]
4554
+ base: CTE | UnionCTE
4500
4555
  joins: List[Join]
4501
4556
  grain: Grain
4502
4557
  hidden_columns: List[Concept] = Field(default_factory=list)
@@ -4504,6 +4559,9 @@ class ProcessedQuery(BaseModel):
4504
4559
  where_clause: Optional[WhereClause] = None
4505
4560
  having_clause: Optional[HavingClause] = None
4506
4561
  order_by: Optional[OrderBy] = None
4562
+ local_concepts: Annotated[
4563
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
4564
+ ] = Field(default_factory=EnvironmentConceptDict)
4507
4565
 
4508
4566
 
4509
4567
  class PersistQueryMixin(BaseModel):
@@ -4604,7 +4662,6 @@ class RowsetDerivationStatement(HasUUID, Namespaced, BaseModel):
4604
4662
  components=[orig[c.address] for c in x.grain.components_copy]
4605
4663
  )
4606
4664
  else:
4607
-
4608
4665
  x.grain = default_grain
4609
4666
  return output
4610
4667
 
@@ -4728,14 +4785,11 @@ class Parenthetical(
4728
4785
  )
4729
4786
 
4730
4787
  def with_select_context(
4731
- self,
4732
- grain: Grain,
4733
- conditional: Conditional | Comparison | Parenthetical | None,
4734
- environment: Environment | None = None,
4788
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4735
4789
  ):
4736
4790
  return Parenthetical(
4737
4791
  content=(
4738
- self.content.with_select_context(grain, conditional, environment)
4792
+ self.content.with_select_context(local_concepts, grain, environment)
4739
4793
  if isinstance(self.content, SelectContext)
4740
4794
  else self.content
4741
4795
  )