pytrilogy 0.0.2.47__py3-none-any.whl → 0.0.2.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (69) hide show
  1. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/METADATA +1 -1
  2. pytrilogy-0.0.2.49.dist-info/RECORD +85 -0
  3. trilogy/__init__.py +2 -2
  4. trilogy/constants.py +4 -2
  5. trilogy/core/enums.py +7 -1
  6. trilogy/core/env_processor.py +1 -2
  7. trilogy/core/environment_helpers.py +5 -5
  8. trilogy/core/functions.py +11 -10
  9. trilogy/core/internal.py +2 -3
  10. trilogy/core/models.py +449 -393
  11. trilogy/core/optimization.py +37 -21
  12. trilogy/core/optimizations/__init__.py +1 -1
  13. trilogy/core/optimizations/base_optimization.py +6 -6
  14. trilogy/core/optimizations/inline_constant.py +7 -4
  15. trilogy/core/optimizations/inline_datasource.py +14 -5
  16. trilogy/core/optimizations/predicate_pushdown.py +20 -10
  17. trilogy/core/processing/concept_strategies_v3.py +43 -24
  18. trilogy/core/processing/graph_utils.py +2 -3
  19. trilogy/core/processing/node_generators/__init__.py +7 -5
  20. trilogy/core/processing/node_generators/basic_node.py +4 -4
  21. trilogy/core/processing/node_generators/common.py +10 -11
  22. trilogy/core/processing/node_generators/filter_node.py +7 -9
  23. trilogy/core/processing/node_generators/group_node.py +10 -11
  24. trilogy/core/processing/node_generators/group_to_node.py +5 -5
  25. trilogy/core/processing/node_generators/multiselect_node.py +10 -12
  26. trilogy/core/processing/node_generators/node_merge_node.py +7 -9
  27. trilogy/core/processing/node_generators/rowset_node.py +36 -15
  28. trilogy/core/processing/node_generators/select_merge_node.py +11 -10
  29. trilogy/core/processing/node_generators/select_node.py +5 -5
  30. trilogy/core/processing/node_generators/union_node.py +75 -0
  31. trilogy/core/processing/node_generators/unnest_node.py +2 -3
  32. trilogy/core/processing/node_generators/window_node.py +3 -4
  33. trilogy/core/processing/nodes/__init__.py +9 -5
  34. trilogy/core/processing/nodes/base_node.py +45 -13
  35. trilogy/core/processing/nodes/filter_node.py +3 -4
  36. trilogy/core/processing/nodes/group_node.py +17 -13
  37. trilogy/core/processing/nodes/merge_node.py +14 -12
  38. trilogy/core/processing/nodes/select_node_v2.py +13 -9
  39. trilogy/core/processing/nodes/union_node.py +50 -0
  40. trilogy/core/processing/nodes/unnest_node.py +2 -3
  41. trilogy/core/processing/nodes/window_node.py +2 -3
  42. trilogy/core/processing/utility.py +38 -41
  43. trilogy/core/query_processor.py +71 -51
  44. trilogy/dialect/base.py +95 -53
  45. trilogy/dialect/bigquery.py +2 -3
  46. trilogy/dialect/common.py +5 -4
  47. trilogy/dialect/config.py +0 -2
  48. trilogy/dialect/duckdb.py +2 -2
  49. trilogy/dialect/enums.py +5 -5
  50. trilogy/dialect/postgres.py +2 -2
  51. trilogy/dialect/presto.py +3 -4
  52. trilogy/dialect/snowflake.py +2 -2
  53. trilogy/dialect/sql_server.py +3 -4
  54. trilogy/engine.py +2 -1
  55. trilogy/executor.py +43 -30
  56. trilogy/hooks/base_hook.py +5 -4
  57. trilogy/hooks/graph_hook.py +2 -1
  58. trilogy/hooks/query_debugger.py +18 -8
  59. trilogy/parsing/common.py +15 -20
  60. trilogy/parsing/parse_engine.py +125 -88
  61. trilogy/parsing/render.py +32 -35
  62. trilogy/parsing/trilogy.lark +8 -1
  63. trilogy/scripts/trilogy.py +6 -4
  64. trilogy/utility.py +1 -1
  65. pytrilogy-0.0.2.47.dist-info/RECORD +0 -83
  66. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/LICENSE.md +0 -0
  67. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/WHEEL +0 -0
  68. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/entry_points.txt +0 -0
  69. {pytrilogy-0.0.2.47.dist-info → pytrilogy-0.0.2.49.dist-info}/top_level.txt +0 -0
trilogy/core/models.py CHANGED
@@ -1,82 +1,86 @@
1
1
  from __future__ import annotations
2
+
2
3
  import difflib
4
+ import hashlib
3
5
  import os
6
+ from abc import ABC
7
+ from collections import UserDict, UserList, defaultdict
4
8
  from enum import Enum
9
+ from functools import cached_property
10
+ from pathlib import Path
5
11
  from typing import (
12
+ Annotated,
13
+ Any,
14
+ Callable,
6
15
  Dict,
7
- TypeVar,
16
+ Generic,
17
+ ItemsView,
8
18
  List,
19
+ Never,
9
20
  Optional,
10
- Union,
11
- Set,
12
- Any,
21
+ Self,
13
22
  Sequence,
14
- ValuesView,
15
- Callable,
16
- Annotated,
17
- get_args,
18
- Generic,
23
+ Set,
19
24
  Tuple,
20
25
  Type,
21
- ItemsView,
26
+ TypeVar,
27
+ Union,
28
+ ValuesView,
29
+ get_args,
22
30
  )
23
- from pydantic_core import core_schema
24
- from pydantic.functional_validators import PlainValidator
31
+
32
+ from lark.tree import Meta
25
33
  from pydantic import (
26
34
  BaseModel,
27
- Field,
28
35
  ConfigDict,
29
- field_validator,
36
+ Field,
30
37
  ValidationInfo,
31
38
  ValidatorFunctionWrapHandler,
32
39
  computed_field,
40
+ field_validator,
33
41
  )
34
- from lark.tree import Meta
35
- from pathlib import Path
42
+ from pydantic.functional_validators import PlainValidator
43
+ from pydantic_core import core_schema
44
+
36
45
  from trilogy.constants import (
37
- logger,
46
+ CONFIG,
38
47
  DEFAULT_NAMESPACE,
39
48
  ENV_CACHE_NAME,
40
49
  MagicConstants,
41
- CONFIG,
50
+ logger,
42
51
  )
43
52
  from trilogy.core.constants import (
44
53
  ALL_ROWS_CONCEPT,
45
- INTERNAL_NAMESPACE,
46
54
  CONSTANT_DATASET,
55
+ INTERNAL_NAMESPACE,
47
56
  PERSISTED_CONCEPT_PREFIX,
48
57
  )
49
58
  from trilogy.core.enums import (
50
- InfiniteFunctionArgs,
51
- Purpose,
52
- JoinType,
53
- Ordering,
54
- Modifier,
55
- FunctionType,
56
- FunctionClass,
57
59
  BooleanOperator,
58
60
  ComparisonOperator,
59
- WindowOrder,
60
- PurposeLineage,
61
- SourceType,
62
- WindowType,
63
61
  ConceptSource,
64
62
  DatePart,
65
- ShowCategory,
63
+ FunctionClass,
64
+ FunctionType,
66
65
  Granularity,
67
- SelectFiltering,
66
+ InfiniteFunctionArgs,
68
67
  IOType,
68
+ JoinType,
69
+ Modifier,
70
+ Ordering,
71
+ Purpose,
72
+ PurposeLineage,
73
+ SelectFiltering,
74
+ ShowCategory,
75
+ SourceType,
76
+ WindowOrder,
77
+ WindowType,
69
78
  )
70
79
  from trilogy.core.exceptions import (
71
- UndefinedConceptException,
72
80
  InvalidSyntaxException,
81
+ UndefinedConceptException,
73
82
  )
74
83
  from trilogy.utility import unique
75
- from collections import UserList, UserDict
76
- from functools import cached_property
77
- from abc import ABC
78
- from collections import defaultdict
79
- import hashlib
80
84
 
81
85
  LOGGER_PREFIX = "[MODELS]"
82
86
 
@@ -152,19 +156,19 @@ NAMESPACED_TYPES = Union[
152
156
 
153
157
 
154
158
  class Namespaced(ABC):
155
-
156
159
  def with_namespace(self, namespace: str):
157
160
  raise NotImplementedError
158
161
 
159
162
 
160
163
  class Mergeable(ABC):
161
-
162
164
  def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
163
165
  raise NotImplementedError
164
166
 
167
+ def hydrate_missing(self, concepts: EnvironmentConceptDict):
168
+ return self
165
169
 
166
- class ConceptArgs(ABC):
167
170
 
171
+ class ConceptArgs(ABC):
168
172
  @property
169
173
  def concept_arguments(self) -> List["Concept"]:
170
174
  raise NotImplementedError
@@ -179,13 +183,12 @@ class ConceptArgs(ABC):
179
183
 
180
184
 
181
185
  class SelectContext(ABC):
182
-
183
186
  def with_select_context(
184
187
  self,
188
+ local_concepts: dict[str, Concept],
185
189
  grain: Grain,
186
- conditional: Conditional | Comparison | Parenthetical | None,
187
- environment: Environment | None = None,
188
- ):
190
+ environment: Environment,
191
+ ) -> Any:
189
192
  raise NotImplementedError
190
193
 
191
194
 
@@ -195,7 +198,6 @@ class ConstantInlineable(ABC):
195
198
 
196
199
 
197
200
  class HasUUID(ABC):
198
-
199
201
  @property
200
202
  def uuid(self) -> str:
201
203
  return hashlib.md5(str(self).encode()).hexdigest()
@@ -456,8 +458,17 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
456
458
  pseudonyms: set[str] = Field(default_factory=set)
457
459
  _address_cache: str | None = None
458
460
 
461
+ def duplicate(self) -> Concept:
462
+ return self.model_copy(deep=True)
463
+
459
464
  def __hash__(self):
460
- return hash(str(self))
465
+ return hash(
466
+ f"{self.name}+{self.datatype}+ {self.purpose} + {str(self.lineage)} + {self.namespace} + {str(self.grain)} + {str(self.keys)}"
467
+ )
468
+
469
+ def __repr__(self):
470
+ base = f"{self.address}@{self.grain}"
471
+ return base
461
472
 
462
473
  @property
463
474
  def is_aggregate(self):
@@ -476,7 +487,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
476
487
  return True
477
488
  return False
478
489
 
479
- def with_merge(self, source: Concept, target: Concept, modifiers: List[Modifier]):
490
+ def with_merge(self, source: Self, target: Self, modifiers: List[Modifier]) -> Self:
480
491
  if self.address == source.address:
481
492
  new = target.with_grain(self.grain.with_merge(source, target, modifiers))
482
493
  new.pseudonyms.add(self.address)
@@ -556,11 +567,13 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
556
567
  v = Grain(components=values["lineage"].by)
557
568
  elif not v:
558
569
  v = Grain(components=[])
570
+ elif isinstance(v, Grain):
571
+ return v
559
572
  elif isinstance(v, Concept):
560
573
  v = Grain(components=[v])
561
574
  elif isinstance(v, dict):
562
575
  v = Grain.model_validate(v)
563
- if not v:
576
+ else:
564
577
  raise SyntaxError(f"Invalid grain {v} for concept {values['name']}")
565
578
  return v
566
579
 
@@ -610,7 +623,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
610
623
  def grain_components(self) -> List["Concept"]:
611
624
  return self.grain.components_copy if self.grain else []
612
625
 
613
- def with_namespace(self, namespace: str) -> "Concept":
626
+ def with_namespace(self, namespace: str) -> Self:
614
627
  if namespace == self.namespace:
615
628
  return self
616
629
  return self.__class__(
@@ -641,33 +654,48 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
641
654
  )
642
655
 
643
656
  def with_select_context(
644
- self,
645
- grain: Optional["Grain"] = None,
646
- conditional: Conditional | Comparison | Parenthetical | None = None,
647
- environment: Environment | None = None,
648
- ) -> "Concept":
649
- if not all([isinstance(x, Concept) for x in self.keys or []]):
650
- raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
651
- new_grain = grain or self.grain
657
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
658
+ ) -> Concept:
652
659
  new_lineage = self.lineage
653
660
  if isinstance(self.lineage, SelectContext):
654
661
  new_lineage = self.lineage.with_select_context(
655
- new_grain, conditional, environment=environment
662
+ local_concepts=local_concepts, grain=grain, environment=environment
663
+ )
664
+ final_grain = self.grain
665
+ keys = (
666
+ tuple(
667
+ [
668
+ x.with_select_context(local_concepts, grain, environment)
669
+ for x in self.keys
670
+ ]
656
671
  )
672
+ if self.keys
673
+ else None
674
+ )
675
+ if self.is_aggregate and isinstance(new_lineage, Function):
676
+ new_lineage = AggregateWrapper(function=new_lineage, by=grain.components)
677
+ final_grain = grain
678
+ keys = tuple(grain.components)
679
+ elif (
680
+ self.is_aggregate and not keys and isinstance(new_lineage, AggregateWrapper)
681
+ ):
682
+ keys = tuple(new_lineage.by)
657
683
  return self.__class__(
658
684
  name=self.name,
659
685
  datatype=self.datatype,
660
686
  purpose=self.purpose,
661
687
  metadata=self.metadata,
662
688
  lineage=new_lineage,
663
- grain=new_grain,
689
+ grain=final_grain,
664
690
  namespace=self.namespace,
665
- keys=self.keys,
691
+ keys=keys,
666
692
  modifiers=self.modifiers,
667
- pseudonyms=self.pseudonyms,
693
+ # a select needs to always defer to the environment for pseudonyms
694
+ # TODO: evaluate if this should be cached
695
+ pseudonyms=(environment.concepts.get(self.address) or self).pseudonyms,
668
696
  )
669
697
 
670
- def with_grain(self, grain: Optional["Grain"] = None) -> "Concept":
698
+ def with_grain(self, grain: Optional["Grain"] = None) -> Self:
671
699
  if not all([isinstance(x, Concept) for x in self.keys or []]):
672
700
  raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
673
701
  return self.__class__(
@@ -683,8 +711,8 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
683
711
  pseudonyms=self.pseudonyms,
684
712
  )
685
713
 
686
- @cached_property
687
- def _with_default_grain(self) -> "Concept":
714
+ @property
715
+ def _with_default_grain(self) -> Self:
688
716
  if self.purpose == Purpose.KEY:
689
717
  # we need to make this abstract
690
718
  grain = Grain(components=[self.with_grain(Grain())], nested=True)
@@ -788,6 +816,12 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
788
816
  and self.lineage.operator == FunctionType.UNNEST
789
817
  ):
790
818
  return PurposeLineage.UNNEST
819
+ elif (
820
+ self.lineage
821
+ and isinstance(self.lineage, Function)
822
+ and self.lineage.operator == FunctionType.UNION
823
+ ):
824
+ return PurposeLineage.UNION
791
825
  elif (
792
826
  self.lineage
793
827
  and isinstance(self.lineage, Function)
@@ -827,7 +861,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
827
861
  elif (
828
862
  self.lineage
829
863
  and isinstance(self.lineage, Function)
830
- and self.lineage.operator == FunctionType.UNNEST
864
+ and self.lineage.operator in (FunctionType.UNNEST, FunctionType.UNION)
831
865
  ):
832
866
  return Granularity.MULTI_ROW
833
867
  elif self.lineage and all(
@@ -836,6 +870,7 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
836
870
  for x in self.lineage.concept_arguments
837
871
  ]
838
872
  ):
873
+
839
874
  return Granularity.SINGLE_ROW
840
875
  return Granularity.MULTI_ROW
841
876
 
@@ -867,7 +902,15 @@ class Concept(Mergeable, Namespaced, SelectContext, BaseModel):
867
902
  return new
868
903
 
869
904
 
870
- class Grain(Mergeable, BaseModel):
905
+ class ConceptRef(BaseModel):
906
+ address: str
907
+ line_no: int
908
+
909
+ def hydrate(self, environment: Environment) -> Concept:
910
+ return environment.concepts.__getitem__(self.address, self.line_no)
911
+
912
+
913
+ class Grain(Mergeable, BaseModel, SelectContext):
871
914
  nested: bool = False
872
915
  components: List[Concept] = Field(default_factory=list, validate_default=True)
873
916
  where_clause: Optional[WhereClause] = Field(default=None)
@@ -890,6 +933,20 @@ class Grain(Mergeable, BaseModel):
890
933
  v2 = sorted(final, key=lambda x: x.name)
891
934
  return v2
892
935
 
936
+ def with_select_context(
937
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
938
+ ):
939
+ if self.nested:
940
+ return self
941
+ return Grain(
942
+ components=[
943
+ x.with_select_context(local_concepts, grain, environment)
944
+ for x in self.components
945
+ ],
946
+ where_clause=self.where_clause,
947
+ nested=self.nested,
948
+ )
949
+
893
950
  def with_filter(
894
951
  self,
895
952
  condition: "Conditional | Comparison | Parenthetical",
@@ -949,7 +1006,7 @@ class Grain(Mergeable, BaseModel):
949
1006
  base.append(c)
950
1007
  return set(base)
951
1008
 
952
- @cached_property
1009
+ @property
953
1010
  def set(self) -> set[str]:
954
1011
  base = []
955
1012
  for x in self.components_copy:
@@ -1016,6 +1073,89 @@ class Grain(Mergeable, BaseModel):
1016
1073
  return self.__add__(other)
1017
1074
 
1018
1075
 
1076
+ class EnvironmentConceptDict(dict):
1077
+ def __init__(self, *args, **kwargs) -> None:
1078
+ super().__init__(self, *args, **kwargs)
1079
+ self.undefined: dict[str, UndefinedConcept] = {}
1080
+ self.fail_on_missing: bool = True
1081
+ self.populate_default_concepts()
1082
+
1083
+ def duplicate(self) -> "EnvironmentConceptDict":
1084
+ new = EnvironmentConceptDict()
1085
+ new.update({k: v.duplicate() for k, v in self.items()})
1086
+ new.undefined = self.undefined
1087
+ new.fail_on_missing = self.fail_on_missing
1088
+ return new
1089
+
1090
+ def populate_default_concepts(self):
1091
+ from trilogy.core.internal import DEFAULT_CONCEPTS
1092
+
1093
+ for concept in DEFAULT_CONCEPTS.values():
1094
+ self[concept.address] = concept
1095
+
1096
+ def values(self) -> ValuesView[Concept]: # type: ignore
1097
+ return super().values()
1098
+
1099
+ def get(self, key: str, default: Concept | None = None) -> Concept | None: # type: ignore
1100
+ try:
1101
+ return self.__getitem__(key)
1102
+ except UndefinedConceptException:
1103
+ return default
1104
+
1105
+ def raise_undefined(
1106
+ self, key: str, line_no: int | None = None, file: Path | str | None = None
1107
+ ) -> Never:
1108
+ matches = self._find_similar_concepts(key)
1109
+ message = f"Undefined concept: {key}."
1110
+ if matches:
1111
+ message += f" Suggestions: {matches}"
1112
+
1113
+ if line_no:
1114
+ if file:
1115
+ raise UndefinedConceptException(
1116
+ f"{file}: {line_no}: " + message, matches
1117
+ )
1118
+ raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
1119
+ raise UndefinedConceptException(message, matches)
1120
+
1121
+ def __getitem__(
1122
+ self, key: str, line_no: int | None = None, file: Path | None = None
1123
+ ) -> Concept | UndefinedConcept:
1124
+ try:
1125
+ return super(EnvironmentConceptDict, self).__getitem__(key)
1126
+ except KeyError:
1127
+ if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
1128
+ return self.__getitem__(key.split(".", 1)[1], line_no)
1129
+ if DEFAULT_NAMESPACE + "." + key in self:
1130
+ return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
1131
+ if not self.fail_on_missing:
1132
+ if key in self.undefined:
1133
+ return self.undefined[key]
1134
+ undefined = UndefinedConcept(
1135
+ name=key,
1136
+ line_no=line_no,
1137
+ datatype=DataType.UNKNOWN,
1138
+ purpose=Purpose.UNKNOWN,
1139
+ )
1140
+ self.undefined[key] = undefined
1141
+ return undefined
1142
+ self.raise_undefined(key, line_no, file)
1143
+
1144
+ def _find_similar_concepts(self, concept_name: str):
1145
+ def strip_local(input: str):
1146
+ if input.startswith(f"{DEFAULT_NAMESPACE}."):
1147
+ return input[len(DEFAULT_NAMESPACE) + 1 :]
1148
+ return input
1149
+
1150
+ matches = difflib.get_close_matches(
1151
+ strip_local(concept_name), [strip_local(x) for x in self.keys()]
1152
+ )
1153
+ return matches
1154
+
1155
+ def items(self) -> ItemsView[str, Concept]: # type: ignore
1156
+ return super().items()
1157
+
1158
+
1019
1159
  class RawColumnExpr(BaseModel):
1020
1160
  text: str
1021
1161
 
@@ -1135,6 +1275,7 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
1135
1275
  "CaseElse",
1136
1276
  list,
1137
1277
  ListWrapper[Any],
1278
+ WindowItem,
1138
1279
  ]
1139
1280
  ]
1140
1281
 
@@ -1149,41 +1290,13 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
1149
1290
  return self.output_datatype
1150
1291
 
1151
1292
  def with_select_context(
1152
- self,
1153
- grain: Grain,
1154
- conditional: Conditional | Comparison | Parenthetical | None,
1155
- environment: Environment | None = None,
1293
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1156
1294
  ) -> Function:
1157
- if self.operator in FunctionClass.AGGREGATE_FUNCTIONS.value and conditional:
1158
- base = [
1159
- (
1160
- c.with_select_context(grain, conditional, environment)
1161
- if isinstance(
1162
- c,
1163
- SelectContext,
1164
- )
1165
- else c
1166
- )
1167
- for c in self.arguments
1168
- ]
1169
- final = [
1170
- c.with_filter(conditional, environment) if isinstance(c, Concept) else c
1171
- for c in base
1172
- ]
1173
- return Function(
1174
- operator=self.operator,
1175
- arguments=final,
1176
- output_datatype=self.output_datatype,
1177
- output_purpose=self.output_purpose,
1178
- valid_inputs=self.valid_inputs,
1179
- arg_count=self.arg_count,
1180
- )
1181
-
1182
- return Function(
1295
+ base = Function(
1183
1296
  operator=self.operator,
1184
1297
  arguments=[
1185
1298
  (
1186
- c.with_select_context(grain, conditional, environment)
1299
+ c.with_select_context(local_concepts, grain, environment)
1187
1300
  if isinstance(
1188
1301
  c,
1189
1302
  SelectContext,
@@ -1197,6 +1310,7 @@ class Function(Mergeable, Namespaced, SelectContext, BaseModel):
1197
1310
  valid_inputs=self.valid_inputs,
1198
1311
  arg_count=self.arg_count,
1199
1312
  )
1313
+ return base
1200
1314
 
1201
1315
  @field_validator("arguments")
1202
1316
  @classmethod
@@ -1404,20 +1518,19 @@ class WindowItem(Mergeable, Namespaced, SelectContext, BaseModel):
1404
1518
  )
1405
1519
 
1406
1520
  def with_select_context(
1407
- self,
1408
- grain: Grain,
1409
- conditional: Conditional | Comparison | Parenthetical | None,
1410
- environment: Environment | None = None,
1521
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1411
1522
  ) -> "WindowItem":
1412
1523
  return WindowItem(
1413
1524
  type=self.type,
1414
- content=self.content.with_select_context(grain, conditional, environment),
1525
+ content=self.content.with_select_context(
1526
+ local_concepts, grain, environment
1527
+ ),
1415
1528
  over=[
1416
- x.with_select_context(grain, conditional, environment)
1529
+ x.with_select_context(local_concepts, grain, environment)
1417
1530
  for x in self.over
1418
1531
  ],
1419
1532
  order_by=[
1420
- x.with_select_context(grain, conditional, environment)
1533
+ x.with_select_context(local_concepts, grain, environment)
1421
1534
  for x in self.order_by
1422
1535
  ],
1423
1536
  index=self.index,
@@ -1489,14 +1602,13 @@ class FilterItem(Namespaced, SelectContext, BaseModel):
1489
1602
  )
1490
1603
 
1491
1604
  def with_select_context(
1492
- self,
1493
- grain: Grain,
1494
- conditional: Conditional | Comparison | Parenthetical | None,
1495
- environment: Environment | None = None,
1605
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1496
1606
  ) -> FilterItem:
1497
1607
  return FilterItem(
1498
- content=self.content.with_select_context(grain, conditional, environment),
1499
- where=self.where.with_select_context(grain, conditional, environment),
1608
+ content=self.content.with_select_context(
1609
+ local_concepts, grain, environment
1610
+ ),
1611
+ where=self.where.with_select_context(local_concepts, grain, environment),
1500
1612
  )
1501
1613
 
1502
1614
  @property
@@ -1576,14 +1688,11 @@ class OrderItem(Mergeable, SelectContext, Namespaced, BaseModel):
1576
1688
  return OrderItem(expr=self.expr.with_namespace(namespace), order=self.order)
1577
1689
 
1578
1690
  def with_select_context(
1579
- self,
1580
- grain: Grain,
1581
- conditional: Conditional | Comparison | Parenthetical | None,
1582
- environment: Environment | None = None,
1691
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
1583
1692
  ) -> "OrderItem":
1584
1693
  return OrderItem(
1585
1694
  expr=self.expr.with_select_context(
1586
- grain, conditional=conditional, environment=environment
1695
+ local_concepts, grain, environment=environment
1587
1696
  ),
1588
1697
  order=self.order,
1589
1698
  )
@@ -1604,7 +1713,7 @@ class OrderItem(Mergeable, SelectContext, Namespaced, BaseModel):
1604
1713
  return self.expr.output
1605
1714
 
1606
1715
 
1607
- class OrderBy(Mergeable, Namespaced, BaseModel):
1716
+ class OrderBy(SelectContext, Mergeable, Namespaced, BaseModel):
1608
1717
  items: List[OrderItem]
1609
1718
 
1610
1719
  def with_namespace(self, namespace: str) -> "OrderBy":
@@ -1617,6 +1726,14 @@ class OrderBy(Mergeable, Namespaced, BaseModel):
1617
1726
  items=[x.with_merge(source, target, modifiers) for x in self.items]
1618
1727
  )
1619
1728
 
1729
+ def with_select_context(self, local_concepts, grain, environment):
1730
+ return OrderBy(
1731
+ items=[
1732
+ x.with_select_context(local_concepts, grain, environment)
1733
+ for x in self.items
1734
+ ]
1735
+ )
1736
+
1620
1737
  @property
1621
1738
  def concept_arguments(self):
1622
1739
  return [x.expr for x in self.items]
@@ -1631,20 +1748,21 @@ class SelectStatement(HasUUID, Mergeable, Namespaced, SelectTypeMixin, BaseModel
1631
1748
  selection: List[SelectItem]
1632
1749
  order_by: Optional[OrderBy] = None
1633
1750
  limit: Optional[int] = None
1634
- meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1751
+ meta: Metadata = Field(default_factory=lambda: Metadata())
1752
+ local_concepts: Annotated[
1753
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
1754
+ ] = Field(default_factory=EnvironmentConceptDict)
1635
1755
 
1636
- def refresh_bindings(self, environment: Environment):
1637
- for item in self.selection:
1638
- if isinstance(item.content, Concept):
1639
- item.content = environment.concepts[item.content.address].with_grain(
1640
- self.grain
1641
- )
1642
-
1643
- def validate_syntax(self):
1756
+ def validate_syntax(self, environment: Environment):
1757
+ if self.where_clause:
1758
+ for x in self.where_clause.concept_arguments:
1759
+ if isinstance(x, UndefinedConcept):
1760
+ environment.concepts.raise_undefined(
1761
+ x.address, x.metadata.line_number
1762
+ )
1644
1763
  all_in_output = [x.address for x in self.output_components]
1645
1764
  if self.where_clause:
1646
1765
  for concept in self.where_clause.concept_arguments:
1647
-
1648
1766
  if (
1649
1767
  concept.lineage
1650
1768
  and isinstance(concept.lineage, Function)
@@ -1667,6 +1785,7 @@ class SelectStatement(HasUUID, Mergeable, Namespaced, SelectTypeMixin, BaseModel
1667
1785
  f"Cannot reference an aggregate derived in the select ({concept.address}) in the same statement where clause; move to the HAVING clause instead; Line: {self.meta.line_number}"
1668
1786
  )
1669
1787
  if self.having_clause:
1788
+ self.having_clause.hydrate_missing(self.local_concepts)
1670
1789
  for concept in self.having_clause.concept_arguments:
1671
1790
  if concept.address not in [x.address for x in self.output_components]:
1672
1791
  raise SyntaxError(
@@ -1884,9 +2003,6 @@ class CopyStatement(BaseModel):
1884
2003
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1885
2004
  select: SelectStatement
1886
2005
 
1887
- def refresh_bindings(self, environment: Environment):
1888
- self.select.refresh_bindings(environment)
1889
-
1890
2006
 
1891
2007
  class AlignItem(Namespaced, BaseModel):
1892
2008
  alias: str
@@ -1940,24 +2056,21 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, Base
1940
2056
  order_by: Optional[OrderBy] = None
1941
2057
  limit: Optional[int] = None
1942
2058
  meta: Optional[Metadata] = Field(default_factory=lambda: Metadata())
1943
-
1944
- def refresh_bindings(self, environment: Environment):
1945
- for select in self.selects:
1946
- select.refresh_bindings(environment)
2059
+ local_concepts: Annotated[
2060
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
2061
+ ] = Field(default_factory=EnvironmentConceptDict)
1947
2062
 
1948
2063
  def __repr__(self):
1949
2064
  return "MultiSelect<" + " MERGE ".join([str(s) for s in self.selects]) + ">"
1950
2065
 
1951
- @computed_field # type: ignore
1952
- @cached_property
2066
+ @property
1953
2067
  def arguments(self) -> List[Concept]:
1954
2068
  output = []
1955
2069
  for select in self.selects:
1956
2070
  output += select.input_components
1957
2071
  return unique(output, "address")
1958
2072
 
1959
- @computed_field # type: ignore
1960
- @cached_property
2073
+ @property
1961
2074
  def concept_arguments(self) -> List[Concept]:
1962
2075
  output = []
1963
2076
  for select in self.selects:
@@ -2007,6 +2120,9 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, Base
2007
2120
  if self.where_clause
2008
2121
  else None
2009
2122
  ),
2123
+ local_concepts=EnvironmentConceptDict(
2124
+ {k: v.with_namespace(namespace) for k, v in self.local_concepts.items()}
2125
+ ),
2010
2126
  )
2011
2127
 
2012
2128
  @property
@@ -2024,7 +2140,7 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, Mergeable, Namespaced, Base
2024
2140
  output.append(item.gen_concept(self))
2025
2141
  return output
2026
2142
 
2027
- def find_source(self, concept: Concept, cte: CTE) -> Concept:
2143
+ def find_source(self, concept: Concept, cte: CTE | UnionCTE) -> Concept:
2028
2144
  for x in self.align.items:
2029
2145
  if concept.name == x.alias:
2030
2146
  for c in x.concepts:
@@ -2125,6 +2241,9 @@ class Datasource(HasUUID, Namespaced, BaseModel):
2125
2241
  where: Optional[WhereClause] = None
2126
2242
  non_partial_for: Optional[WhereClause] = None
2127
2243
 
2244
+ def duplicate(self) -> Datasource:
2245
+ return self.model_copy(deep=True)
2246
+
2128
2247
  def merge_concept(
2129
2248
  self, source: Concept, target: Concept, modifiers: List[Modifier]
2130
2249
  ):
@@ -2511,7 +2630,7 @@ class QueryDatasource(BaseModel):
2511
2630
  and CONFIG.validate_missing
2512
2631
  ):
2513
2632
  raise SyntaxError(
2514
- f"Missing source map for {concept.address} on {key}, have {v}"
2633
+ f"On query datasource missing source map for {concept.address} on {key}, have {v}"
2515
2634
  )
2516
2635
  return v
2517
2636
 
@@ -2701,7 +2820,7 @@ class CTE(BaseModel):
2701
2820
  base: bool = False
2702
2821
  group_to_grain: bool = False
2703
2822
  existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
2704
- parent_ctes: List["CTE"] = Field(default_factory=list)
2823
+ parent_ctes: List[Union["CTE", "UnionCTE"]] = Field(default_factory=list)
2705
2824
  joins: List[Union["Join", "InstantiatedUnnestJoin"]] = Field(default_factory=list)
2706
2825
  condition: Optional[Union["Conditional", "Comparison", "Parenthetical"]] = None
2707
2826
  partial_concepts: List[Concept] = Field(default_factory=list)
@@ -2745,6 +2864,7 @@ class CTE(BaseModel):
2745
2864
 
2746
2865
  if self.condition:
2747
2866
  self.condition = self.condition.inline_constant(concept)
2867
+
2748
2868
  # if we've entirely removed the need to join to someplace to get the concept
2749
2869
  # drop the join as well.
2750
2870
  for removed_cte in removed:
@@ -2876,7 +2996,9 @@ class CTE(BaseModel):
2876
2996
  self.group_to_grain = True
2877
2997
  return True
2878
2998
 
2879
- def __add__(self, other: "CTE"):
2999
+ def __add__(self, other: "CTE" | UnionCTE):
3000
+ if isinstance(other, UnionCTE):
3001
+ raise ValueError("cannot merge CTE and union CTE")
2880
3002
  logger.info('Merging two copies of CTE "%s"', self.name)
2881
3003
  if not self.grain == other.grain:
2882
3004
  error = (
@@ -3052,8 +3174,72 @@ class CTE(BaseModel):
3052
3174
  return [c for c in self.output_columns if c.address in self.source_map]
3053
3175
 
3054
3176
 
3055
- def merge_ctes(ctes: List[CTE]) -> List[CTE]:
3056
- final_ctes_dict: Dict[str, CTE] = {}
3177
+ class UnionCTE(BaseModel):
3178
+ name: str
3179
+ source: QueryDatasource
3180
+ parent_ctes: list[CTE | UnionCTE]
3181
+ internal_ctes: list[CTE | UnionCTE]
3182
+ output_columns: List[Concept]
3183
+ grain: Grain
3184
+ operator: str = "UNION ALL"
3185
+ order_by: Optional[OrderBy] = None
3186
+ limit: Optional[int] = None
3187
+ hidden_concepts: list[Concept] = Field(default_factory=list)
3188
+ partial_concepts: list[Concept] = Field(default_factory=list)
3189
+ existence_source_map: Dict[str, list[str]] = Field(default_factory=dict)
3190
+
3191
+ @computed_field # type: ignore
3192
+ @property
3193
+ def output_lcl(self) -> LooseConceptList:
3194
+ return LooseConceptList(concepts=self.output_columns)
3195
+
3196
+ def get_alias(self, concept: Concept, source: str | None = None) -> str:
3197
+ for cte in self.parent_ctes:
3198
+ if concept.address in cte.output_columns:
3199
+ if source and source != cte.name:
3200
+ continue
3201
+ return concept.safe_address
3202
+ return "INVALID_ALIAS"
3203
+
3204
+ def get_concept(self, address: str) -> Concept | None:
3205
+ for cte in self.internal_ctes:
3206
+ if address in cte.output_columns:
3207
+ match = [x for x in cte.output_columns if x.address == address].pop()
3208
+ return match
3209
+
3210
+ match_list = [x for x in self.output_columns if x.address == address]
3211
+ if match_list:
3212
+ return match_list.pop()
3213
+ return None
3214
+
3215
+ @property
3216
+ def source_map(self):
3217
+ return {x.address: [] for x in self.output_columns}
3218
+
3219
+ @property
3220
+ def condition(self):
3221
+ return None
3222
+
3223
+ @condition.setter
3224
+ def condition(self, value):
3225
+ raise NotImplementedError
3226
+
3227
+ @property
3228
+ def safe_identifier(self):
3229
+ return self.name
3230
+
3231
+ @property
3232
+ def group_to_grain(self) -> bool:
3233
+ return False
3234
+
3235
+ def __add__(self, other):
3236
+ if not isinstance(other, UnionCTE) or not other.name == self.name:
3237
+ raise SyntaxError("Cannot merge union CTEs")
3238
+ return self
3239
+
3240
+
3241
+ def merge_ctes(ctes: List[CTE | UnionCTE]) -> List[CTE | UnionCTE]:
3242
+ final_ctes_dict: Dict[str, CTE | UnionCTE] = {}
3057
3243
  # merge CTEs
3058
3244
  for cte in ctes:
3059
3245
  if cte.name not in final_ctes_dict:
@@ -3078,7 +3264,6 @@ class JoinKey(BaseModel):
3078
3264
 
3079
3265
 
3080
3266
  class Join(BaseModel):
3081
-
3082
3267
  right_cte: CTE
3083
3268
  jointype: JoinType
3084
3269
  left_cte: CTE | None = None
@@ -3127,134 +3312,24 @@ class Join(BaseModel):
3127
3312
  class UndefinedConcept(Concept, Mergeable, Namespaced):
3128
3313
  model_config = ConfigDict(arbitrary_types_allowed=True)
3129
3314
  name: str
3130
- environment: "EnvironmentConceptDict"
3131
3315
  line_no: int | None = None
3132
3316
  datatype: DataType | ListType | StructType | MapType | NumericType = (
3133
3317
  DataType.UNKNOWN
3134
3318
  )
3135
- purpose: Purpose = Purpose.KEY
3136
-
3137
- def with_merge(
3138
- self, source: Concept, target: Concept, modifiers: List[Modifier]
3139
- ) -> "UndefinedConcept" | Concept:
3140
- if self.address == source.address:
3141
- new = target.with_grain(self.grain.with_merge(source, target, modifiers))
3142
- new.pseudonyms.add(self.address)
3143
- return new
3144
- return self.__class__(
3145
- name=self.name,
3146
- datatype=self.datatype,
3147
- purpose=self.purpose,
3148
- metadata=self.metadata,
3149
- lineage=(
3150
- self.lineage.with_merge(source, target, modifiers)
3151
- if self.lineage
3152
- else None
3153
- ),
3154
- grain=self.grain.with_merge(source, target, modifiers),
3155
- namespace=self.namespace,
3156
- keys=(
3157
- tuple(x.with_merge(source, target, modifiers) for x in self.keys)
3158
- if self.keys
3159
- else None
3160
- ),
3161
- environment=self.environment,
3162
- line_no=self.line_no,
3163
- )
3164
-
3165
- def with_namespace(self, namespace: str) -> "UndefinedConcept":
3166
- return self.__class__(
3167
- name=self.name,
3168
- datatype=self.datatype,
3169
- purpose=self.purpose,
3170
- metadata=self.metadata,
3171
- lineage=self.lineage.with_namespace(namespace) if self.lineage else None,
3172
- grain=(
3173
- self.grain.with_namespace(namespace)
3174
- if self.grain
3175
- else Grain(components=[])
3176
- ),
3177
- namespace=namespace,
3178
- keys=self.keys,
3179
- environment=self.environment,
3180
- line_no=self.line_no,
3181
- )
3319
+ purpose: Purpose = Purpose.UNKNOWN
3182
3320
 
3183
3321
  def with_select_context(
3184
3322
  self,
3185
- grain: Optional["Grain"] = None,
3186
- conditional: Conditional | Comparison | Parenthetical | None = None,
3187
- environment: Environment | None = None,
3188
- ) -> "UndefinedConcept":
3189
- if not all([isinstance(x, Concept) for x in self.keys or []]):
3190
- raise ValueError(f"Invalid keys {self.keys} for concept {self.address}")
3191
- new_grain = grain or Grain(components=[])
3192
- if self.lineage:
3193
- new_lineage = self.lineage
3194
- if isinstance(self.lineage, SelectContext):
3195
- new_lineage = self.lineage.with_select_context(
3196
- new_grain, conditional, environment
3197
- )
3198
- else:
3199
- new_lineage = None
3200
- return self.__class__(
3201
- name=self.name,
3202
- datatype=self.datatype,
3203
- purpose=self.purpose,
3204
- metadata=self.metadata,
3205
- lineage=new_lineage,
3206
- grain=new_grain,
3207
- namespace=self.namespace,
3208
- keys=self.keys,
3209
- environment=self.environment,
3210
- )
3211
-
3212
- def with_grain(self, grain: Optional["Grain"] = None) -> "UndefinedConcept":
3213
- return self.__class__(
3214
- name=self.name,
3215
- datatype=self.datatype,
3216
- purpose=self.purpose,
3217
- metadata=self.metadata,
3218
- lineage=self.lineage,
3219
- grain=grain or Grain(components=[]),
3220
- namespace=self.namespace,
3221
- keys=self.keys,
3222
- environment=self.environment,
3223
- line_no=self.line_no,
3224
- )
3323
+ local_concepts: dict[str, Concept],
3324
+ grain: Grain,
3325
+ environment: Environment,
3326
+ ) -> "Concept":
3327
+ if self.address in local_concepts:
3328
+ rval = local_concepts[self.address]
3329
+ rval = rval.with_select_context(local_concepts, grain, environment)
3225
3330
 
3226
- def with_default_grain(self) -> "UndefinedConcept":
3227
- if self.purpose == Purpose.KEY:
3228
- # we need to make this abstract
3229
- grain = Grain(components=[self.with_grain(Grain())], nested=True)
3230
- elif self.purpose == Purpose.PROPERTY:
3231
- components: List[Concept] = []
3232
- if self.keys:
3233
- components = [*self.keys]
3234
- if self.lineage:
3235
- for item in self.lineage.arguments:
3236
- if isinstance(item, Concept):
3237
- if item.keys and not all(c in components for c in item.keys):
3238
- components += item.sources
3239
- else:
3240
- components += item.sources
3241
- grain = Grain(components=components)
3242
- elif self.purpose == Purpose.METRIC:
3243
- grain = Grain()
3244
- else:
3245
- grain = self.grain # type: ignore
3246
- return self.__class__(
3247
- name=self.name,
3248
- datatype=self.datatype,
3249
- purpose=self.purpose,
3250
- metadata=self.metadata,
3251
- lineage=self.lineage,
3252
- grain=grain,
3253
- keys=self.keys,
3254
- namespace=self.namespace,
3255
- environment=self.environment,
3256
- line_no=self.line_no,
3257
- )
3331
+ return rval
3332
+ environment.concepts.raise_undefined(self.address, line_no=self.line_no)
3258
3333
 
3259
3334
 
3260
3335
  class EnvironmentDatasourceDict(dict):
@@ -3277,78 +3352,10 @@ class EnvironmentDatasourceDict(dict):
3277
3352
  def items(self) -> ItemsView[str, Datasource]: # type: ignore
3278
3353
  return super().items()
3279
3354
 
3280
-
3281
- class EnvironmentConceptDict(dict):
3282
- def __init__(self, *args, **kwargs) -> None:
3283
- super().__init__(self, *args, **kwargs)
3284
- self.undefined: dict[str, UndefinedConcept] = {}
3285
- self.fail_on_missing: bool = True
3286
- self.populate_default_concepts()
3287
-
3288
- def populate_default_concepts(self):
3289
- from trilogy.core.internal import DEFAULT_CONCEPTS
3290
-
3291
- for concept in DEFAULT_CONCEPTS.values():
3292
- self[concept.address] = concept
3293
-
3294
- def values(self) -> ValuesView[Concept]: # type: ignore
3295
- return super().values()
3296
-
3297
- def get(self, key: str, default: Concept | None = None) -> Concept | None: # type: ignore
3298
- try:
3299
- return self.__getitem__(key)
3300
- except UndefinedConceptException:
3301
- return default
3302
-
3303
- def __getitem__(
3304
- self, key, line_no: int | None = None, file: Path | None = None
3305
- ) -> Concept | UndefinedConcept:
3306
- try:
3307
- return super(EnvironmentConceptDict, self).__getitem__(key)
3308
-
3309
- except KeyError:
3310
- if "." in key and key.split(".", 1)[0] == DEFAULT_NAMESPACE:
3311
- return self.__getitem__(key.split(".", 1)[1], line_no)
3312
- if DEFAULT_NAMESPACE + "." + key in self:
3313
- return self.__getitem__(DEFAULT_NAMESPACE + "." + key, line_no)
3314
- if not self.fail_on_missing:
3315
- if key in self.undefined:
3316
- return self.undefined[key]
3317
- undefined = UndefinedConcept(
3318
- name=key,
3319
- line_no=line_no,
3320
- environment=self,
3321
- datatype=DataType.UNKNOWN,
3322
- purpose=Purpose.KEY,
3323
- )
3324
- self.undefined[key] = undefined
3325
- return undefined
3326
- matches = self._find_similar_concepts(key)
3327
- message = f"Undefined concept: {key}."
3328
- if matches:
3329
- message += f" Suggestions: {matches}"
3330
-
3331
- if line_no:
3332
- if file:
3333
- raise UndefinedConceptException(
3334
- f"{file}: {line_no}: " + message, matches
3335
- )
3336
- raise UndefinedConceptException(f"line: {line_no}: " + message, matches)
3337
- raise UndefinedConceptException(message, matches)
3338
-
3339
- def _find_similar_concepts(self, concept_name: str):
3340
- def strip_local(input: str):
3341
- if input.startswith(f"{DEFAULT_NAMESPACE}."):
3342
- return input[len(DEFAULT_NAMESPACE) + 1 :]
3343
- return input
3344
-
3345
- matches = difflib.get_close_matches(
3346
- strip_local(concept_name), [strip_local(x) for x in self.keys()]
3347
- )
3348
- return matches
3349
-
3350
- def items(self) -> ItemsView[str, Concept]: # type: ignore
3351
- return super().items()
3355
+ def duplicate(self) -> "EnvironmentDatasourceDict":
3356
+ new = EnvironmentDatasourceDict()
3357
+ new.update({k: v.duplicate() for k, v in self.items()})
3358
+ return new
3352
3359
 
3353
3360
 
3354
3361
  class ImportStatement(HasUUID, BaseModel):
@@ -3405,10 +3412,31 @@ class Environment(BaseModel):
3405
3412
  materialized_concepts: set[str] = Field(default_factory=set)
3406
3413
  alias_origin_lookup: Dict[str, Concept] = Field(default_factory=dict)
3407
3414
  # TODO: support freezing environments to avoid mutation
3408
- # frozen: bool = False
3415
+ frozen: bool = False
3416
+
3417
+ def freeze(self):
3418
+ self.frozen = True
3419
+
3420
+ def thaw(self):
3421
+ self.frozen = False
3409
3422
 
3410
3423
  def duplicate(self):
3411
- return self.model_copy(deep=True)
3424
+ return Environment.model_construct(
3425
+ datasources=self.datasources.duplicate(),
3426
+ concepts=self.concepts.duplicate(),
3427
+ functions=dict(self.functions),
3428
+ data_types=dict(self.data_types),
3429
+ imports=dict(self.imports),
3430
+ namespace=self.namespace,
3431
+ working_path=self.working_path,
3432
+ environment_config=self.environment_config,
3433
+ version=self.version,
3434
+ cte_name_map=dict(self.cte_name_map),
3435
+ materialized_concepts=set(self.materialized_concepts),
3436
+ alias_origin_lookup={
3437
+ k: v.duplicate() for k, v in self.alias_origin_lookup.items()
3438
+ },
3439
+ )
3412
3440
 
3413
3441
  def __init__(self, **data):
3414
3442
  super().__init__(**data)
@@ -3540,6 +3568,8 @@ class Environment(BaseModel):
3540
3568
  def add_import(
3541
3569
  self, alias: str, source: Environment, imp_stm: ImportStatement | None = None
3542
3570
  ):
3571
+ if self.frozen:
3572
+ raise ValueError("Environment is frozen, cannot add imports")
3543
3573
  exists = False
3544
3574
  existing = self.imports[alias]
3545
3575
  if imp_stm:
@@ -3560,6 +3590,9 @@ class Environment(BaseModel):
3560
3590
  # we can't exit early
3561
3591
  # as there may be new concepts
3562
3592
  for k, concept in source.concepts.items():
3593
+ # skip internal namespace
3594
+ if INTERNAL_NAMESPACE in concept.address:
3595
+ continue
3563
3596
  if same_namespace:
3564
3597
  new = self.add_concept(concept, _ignore_cache=True)
3565
3598
  else:
@@ -3592,9 +3625,11 @@ class Environment(BaseModel):
3592
3625
  def add_file_import(
3593
3626
  self, path: str | Path, alias: str, env: Environment | None = None
3594
3627
  ):
3628
+ if self.frozen:
3629
+ raise ValueError("Environment is frozen, cannot add imports")
3595
3630
  from trilogy.parsing.parse_engine import (
3596
- ParseToObjects,
3597
3631
  PARSER,
3632
+ ParseToObjects,
3598
3633
  gen_cache_lookup,
3599
3634
  )
3600
3635
 
@@ -3626,6 +3661,7 @@ class Environment(BaseModel):
3626
3661
  )
3627
3662
  nparser.set_text(text)
3628
3663
  nparser.transform(PARSER.parse(text))
3664
+ nparser.hydrate_missing()
3629
3665
 
3630
3666
  except Exception as e:
3631
3667
  raise ImportError(
@@ -3676,6 +3712,8 @@ class Environment(BaseModel):
3676
3712
  add_derived: bool = True,
3677
3713
  _ignore_cache: bool = False,
3678
3714
  ):
3715
+ if self.frozen:
3716
+ raise ValueError("Environment is frozen, cannot add concepts")
3679
3717
  if not force:
3680
3718
  existing = self.validate_concept(concept, meta=meta)
3681
3719
  if existing:
@@ -3694,6 +3732,8 @@ class Environment(BaseModel):
3694
3732
  meta: Meta | None = None,
3695
3733
  _ignore_cache: bool = False,
3696
3734
  ):
3735
+ if self.frozen:
3736
+ raise ValueError("Environment is frozen, cannot add datasource")
3697
3737
  self.datasources[datasource.identifier] = datasource
3698
3738
 
3699
3739
  eligible_to_promote_roots = datasource.non_partial_for is None
@@ -3745,6 +3785,8 @@ class Environment(BaseModel):
3745
3785
  address: str,
3746
3786
  meta: Meta | None = None,
3747
3787
  ) -> bool:
3788
+ if self.frozen:
3789
+ raise ValueError("Environment is frozen, cannot delete datsources")
3748
3790
  if address in self.datasources:
3749
3791
  del self.datasources[address]
3750
3792
  self.gen_concept_list_caches()
@@ -3752,17 +3794,22 @@ class Environment(BaseModel):
3752
3794
  return False
3753
3795
 
3754
3796
  def merge_concept(
3755
- self, source: Concept, target: Concept, modifiers: List[Modifier]
3756
- ):
3797
+ self,
3798
+ source: Concept,
3799
+ target: Concept,
3800
+ modifiers: List[Modifier],
3801
+ force: bool = False,
3802
+ ) -> bool:
3803
+ if self.frozen:
3804
+ raise ValueError("Environment is frozen, cannot merge concepts")
3757
3805
  replacements = {}
3758
3806
 
3759
3807
  # exit early if we've run this
3760
- if source.address in self.alias_origin_lookup:
3808
+ if source.address in self.alias_origin_lookup and not force:
3761
3809
  if self.concepts[source.address] == target:
3762
- return
3810
+ return False
3763
3811
  self.alias_origin_lookup[source.address] = source
3764
3812
  for k, v in self.concepts.items():
3765
-
3766
3813
  if v.address == target.address:
3767
3814
  v.pseudonyms.add(source.address)
3768
3815
  if v.address == source.address:
@@ -3776,6 +3823,7 @@ class Environment(BaseModel):
3776
3823
  for k, ds in self.datasources.items():
3777
3824
  if source.address in ds.output_lcl:
3778
3825
  ds.merge_concept(source, target, modifiers=modifiers)
3826
+ return True
3779
3827
 
3780
3828
 
3781
3829
  class LazyEnvironment(Environment):
@@ -3849,6 +3897,17 @@ class Comparison(
3849
3897
  ]
3850
3898
  operator: ComparisonOperator
3851
3899
 
3900
+ def hydrate_missing(self, concepts: EnvironmentConceptDict):
3901
+ if isinstance(self.left, UndefinedConcept) and self.left.address in concepts:
3902
+ self.left = concepts[self.left.address]
3903
+ if isinstance(self.right, UndefinedConcept) and self.right.address in concepts:
3904
+ self.right = concepts[self.right.address]
3905
+ if isinstance(self.left, Mergeable):
3906
+ self.left.hydrate_missing(concepts)
3907
+ if isinstance(self.right, Mergeable):
3908
+ self.right.hydrate_missing(concepts)
3909
+ return self
3910
+
3852
3911
  def __init__(self, *args, **kwargs) -> None:
3853
3912
  super().__init__(*args, **kwargs)
3854
3913
  if self.operator in (ComparisonOperator.IS, ComparisonOperator.IS_NOT):
@@ -3963,20 +4022,17 @@ class Comparison(
3963
4022
  )
3964
4023
 
3965
4024
  def with_select_context(
3966
- self,
3967
- grain: Grain,
3968
- conditional: Conditional | Comparison | Parenthetical | None,
3969
- environment: Environment | None = None,
4025
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
3970
4026
  ):
3971
4027
  return self.__class__(
3972
4028
  left=(
3973
- self.left.with_select_context(grain, conditional, environment)
4029
+ self.left.with_select_context(local_concepts, grain, environment)
3974
4030
  if isinstance(self.left, SelectContext)
3975
4031
  else self.left
3976
4032
  ),
3977
4033
  # the right side does NOT need to inherit select grain
3978
4034
  right=(
3979
- self.right.with_select_context(grain, conditional, environment)
4035
+ self.right.with_select_context(local_concepts, grain, environment)
3980
4036
  if isinstance(self.right, SelectContext)
3981
4037
  else self.right
3982
4038
  ),
@@ -4042,7 +4098,6 @@ class Comparison(
4042
4098
 
4043
4099
 
4044
4100
  class SubselectComparison(Comparison):
4045
-
4046
4101
  def __eq__(self, other):
4047
4102
  if not isinstance(other, SubselectComparison):
4048
4103
  return False
@@ -4064,14 +4119,14 @@ class SubselectComparison(Comparison):
4064
4119
 
4065
4120
  def with_select_context(
4066
4121
  self,
4122
+ local_concepts: dict[str, Concept],
4067
4123
  grain: Grain,
4068
- conditional: Conditional | Comparison | Parenthetical | None,
4069
- environment: Environment | None = None,
4124
+ environment: Environment,
4070
4125
  ):
4071
4126
  # there's no need to pass the select grain through to a subselect comparison on the right
4072
4127
  return self.__class__(
4073
4128
  left=(
4074
- self.left.with_select_context(grain, conditional, environment)
4129
+ self.left.with_select_context(local_concepts, grain, environment)
4075
4130
  if isinstance(self.left, SelectContext)
4076
4131
  else self.left
4077
4132
  ),
@@ -4108,17 +4163,14 @@ class CaseWhen(Namespaced, SelectContext, BaseModel):
4108
4163
  )
4109
4164
 
4110
4165
  def with_select_context(
4111
- self,
4112
- grain: Grain,
4113
- conditional: Conditional | Comparison | Parenthetical | None,
4114
- environment: Environment | None = None,
4166
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4115
4167
  ) -> CaseWhen:
4116
4168
  return CaseWhen(
4117
4169
  comparison=self.comparison.with_select_context(
4118
- grain, conditional, environment
4170
+ local_concepts, grain, environment
4119
4171
  ),
4120
4172
  expr=(
4121
- (self.expr.with_select_context(grain, conditional, environment))
4173
+ (self.expr.with_select_context(local_concepts, grain, environment))
4122
4174
  if isinstance(self.expr, SelectContext)
4123
4175
  else self.expr
4124
4176
  ),
@@ -4136,14 +4188,14 @@ class CaseElse(Namespaced, SelectContext, BaseModel):
4136
4188
 
4137
4189
  def with_select_context(
4138
4190
  self,
4191
+ local_concepts: dict[str, Concept],
4139
4192
  grain: Grain,
4140
- conditional: Conditional | Comparison | Parenthetical | None,
4141
- environment: Environment | None = None,
4142
- ) -> CaseElse:
4193
+ environment: Environment,
4194
+ ):
4143
4195
  return CaseElse(
4144
4196
  discriminant=self.discriminant,
4145
4197
  expr=(
4146
- self.expr.with_select_context(grain, conditional, environment)
4198
+ self.expr.with_select_context(local_concepts, grain, environment)
4147
4199
  if isinstance(
4148
4200
  self.expr,
4149
4201
  SelectContext,
@@ -4215,7 +4267,6 @@ class Conditional(
4215
4267
  return f"{str(self.left)} {self.operator.value} {str(self.right)}"
4216
4268
 
4217
4269
  def __eq__(self, other):
4218
-
4219
4270
  if not isinstance(other, Conditional):
4220
4271
  return False
4221
4272
  return (
@@ -4283,19 +4334,16 @@ class Conditional(
4283
4334
  )
4284
4335
 
4285
4336
  def with_select_context(
4286
- self,
4287
- grain: Grain,
4288
- conditional: Conditional | Comparison | Parenthetical | None,
4289
- environment: Environment | None = None,
4337
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4290
4338
  ):
4291
4339
  return Conditional(
4292
4340
  left=(
4293
- self.left.with_select_context(grain, conditional, environment)
4341
+ self.left.with_select_context(local_concepts, grain, environment)
4294
4342
  if isinstance(self.left, SelectContext)
4295
4343
  else self.left
4296
4344
  ),
4297
4345
  right=(
4298
- self.right.with_select_context(grain, conditional, environment)
4346
+ self.right.with_select_context(local_concepts, grain, environment)
4299
4347
  if isinstance(self.right, SelectContext)
4300
4348
  else self.right
4301
4349
  ),
@@ -4404,16 +4452,16 @@ class AggregateWrapper(Mergeable, Namespaced, SelectContext, BaseModel):
4404
4452
  )
4405
4453
 
4406
4454
  def with_select_context(
4407
- self,
4408
- grain: Grain,
4409
- conditional: Conditional | Comparison | Parenthetical | None,
4410
- environment: Environment | None = None,
4455
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4411
4456
  ) -> AggregateWrapper:
4412
4457
  if not self.by:
4413
4458
  by = grain.components_copy
4414
4459
  else:
4415
- by = self.by
4416
- parent = self.function.with_select_context(grain, conditional, environment)
4460
+ by = [
4461
+ x.with_select_context(local_concepts, grain, environment)
4462
+ for x in self.by
4463
+ ]
4464
+ parent = self.function.with_select_context(local_concepts, grain, environment)
4417
4465
  return AggregateWrapper(function=parent, by=by)
4418
4466
 
4419
4467
 
@@ -4448,14 +4496,11 @@ class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
4448
4496
  return WhereClause(conditional=self.conditional.with_namespace(namespace))
4449
4497
 
4450
4498
  def with_select_context(
4451
- self,
4452
- grain: Grain,
4453
- conditional: Conditional | Comparison | Parenthetical | None,
4454
- environment: Environment | None = None,
4499
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4455
4500
  ) -> WhereClause:
4456
- return WhereClause(
4501
+ return self.__class__(
4457
4502
  conditional=self.conditional.with_select_context(
4458
- grain, conditional, environment
4503
+ local_concepts, grain, environment
4459
4504
  )
4460
4505
  )
4461
4506
 
@@ -4485,6 +4530,18 @@ class WhereClause(Mergeable, ConceptArgs, Namespaced, SelectContext, BaseModel):
4485
4530
  class HavingClause(WhereClause):
4486
4531
  pass
4487
4532
 
4533
+ def hydrate_missing(self, concepts: EnvironmentConceptDict):
4534
+ self.conditional.hydrate_missing(concepts)
4535
+
4536
+ def with_select_context(
4537
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4538
+ ) -> HavingClause:
4539
+ return HavingClause(
4540
+ conditional=self.conditional.with_select_context(
4541
+ local_concepts, grain, environment
4542
+ )
4543
+ )
4544
+
4488
4545
 
4489
4546
  class MaterializedDataset(BaseModel):
4490
4547
  address: Address
@@ -4497,8 +4554,8 @@ class MaterializedDataset(BaseModel):
4497
4554
 
4498
4555
  class ProcessedQuery(BaseModel):
4499
4556
  output_columns: List[Concept]
4500
- ctes: List[CTE]
4501
- base: CTE
4557
+ ctes: List[CTE | UnionCTE]
4558
+ base: CTE | UnionCTE
4502
4559
  joins: List[Join]
4503
4560
  grain: Grain
4504
4561
  hidden_columns: List[Concept] = Field(default_factory=list)
@@ -4506,6 +4563,9 @@ class ProcessedQuery(BaseModel):
4506
4563
  where_clause: Optional[WhereClause] = None
4507
4564
  having_clause: Optional[HavingClause] = None
4508
4565
  order_by: Optional[OrderBy] = None
4566
+ local_concepts: Annotated[
4567
+ EnvironmentConceptDict, PlainValidator(validate_concepts)
4568
+ ] = Field(default_factory=EnvironmentConceptDict)
4509
4569
 
4510
4570
 
4511
4571
  class PersistQueryMixin(BaseModel):
@@ -4606,7 +4666,6 @@ class RowsetDerivationStatement(HasUUID, Namespaced, BaseModel):
4606
4666
  components=[orig[c.address] for c in x.grain.components_copy]
4607
4667
  )
4608
4668
  else:
4609
-
4610
4669
  x.grain = default_grain
4611
4670
  return output
4612
4671
 
@@ -4730,14 +4789,11 @@ class Parenthetical(
4730
4789
  )
4731
4790
 
4732
4791
  def with_select_context(
4733
- self,
4734
- grain: Grain,
4735
- conditional: Conditional | Comparison | Parenthetical | None,
4736
- environment: Environment | None = None,
4792
+ self, local_concepts: dict[str, Concept], grain: Grain, environment: Environment
4737
4793
  ):
4738
4794
  return Parenthetical(
4739
4795
  content=(
4740
- self.content.with_select_context(grain, conditional, environment)
4796
+ self.content.with_select_context(local_concepts, grain, environment)
4741
4797
  if isinstance(self.content, SelectContext)
4742
4798
  else self.content
4743
4799
  )