pytrilogy 0.0.3.94__py3-none-any.whl → 0.0.3.96__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

Files changed (35) hide show
  1. {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/METADATA +184 -136
  2. {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/RECORD +35 -30
  3. trilogy/__init__.py +1 -1
  4. trilogy/authoring/__init__.py +61 -43
  5. trilogy/core/enums.py +13 -0
  6. trilogy/core/env_processor.py +19 -10
  7. trilogy/core/environment_helpers.py +111 -0
  8. trilogy/core/exceptions.py +21 -1
  9. trilogy/core/functions.py +6 -1
  10. trilogy/core/graph_models.py +11 -37
  11. trilogy/core/internal.py +18 -0
  12. trilogy/core/models/core.py +3 -0
  13. trilogy/core/models/environment.py +28 -0
  14. trilogy/core/models/execute.py +7 -0
  15. trilogy/core/processing/node_generators/select_merge_node.py +2 -2
  16. trilogy/core/query_processor.py +2 -1
  17. trilogy/core/statements/author.py +18 -3
  18. trilogy/core/statements/common.py +0 -10
  19. trilogy/core/statements/execute.py +73 -16
  20. trilogy/core/validation/common.py +110 -0
  21. trilogy/core/validation/concept.py +125 -0
  22. trilogy/core/validation/datasource.py +194 -0
  23. trilogy/core/validation/environment.py +71 -0
  24. trilogy/dialect/base.py +48 -21
  25. trilogy/dialect/metadata.py +233 -0
  26. trilogy/dialect/sql_server.py +3 -1
  27. trilogy/engine.py +25 -7
  28. trilogy/executor.py +94 -162
  29. trilogy/parsing/parse_engine.py +34 -3
  30. trilogy/parsing/trilogy.lark +11 -5
  31. {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/WHEEL +0 -0
  32. {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/entry_points.txt +0 -0
  33. {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/licenses/LICENSE.md +0 -0
  34. {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/top_level.txt +0 -0
  35. /trilogy/{compiler.py → core/validation/__init__.py} +0 -0
@@ -41,7 +41,9 @@ from trilogy.core.models.core import (
41
41
  DataType,
42
42
  ListWrapper,
43
43
  MapType,
44
+ NumericType,
44
45
  StructType,
46
+ TraitDataType,
45
47
  )
46
48
  from trilogy.core.models.datasource import Address, Datasource, DatasourceMetadata
47
49
  from trilogy.core.models.environment import Environment
@@ -58,66 +60,82 @@ from trilogy.core.statements.author import (
58
60
  RowsetDerivationStatement,
59
61
  SelectItem,
60
62
  SelectStatement,
63
+ ShowCategory,
64
+ ShowStatement,
65
+ ValidateStatement,
61
66
  )
62
67
  from trilogy.parsing.common import arbitrary_to_concept, arg_to_datatype
63
68
 
64
69
  __all__ = [
65
- "Concept",
66
- "Function",
67
- "WhereClause",
68
- "Comparison",
69
- "FilterItem",
70
- "CaseWhen",
71
- "CaseElse",
72
- "AggregateWrapper",
73
- "WindowItem",
74
- "WindowOrder",
75
- "WindowType",
76
- "WindowItemOrder",
77
- "WindowItemOver",
78
- "DataType",
79
- "StructType",
80
- "ArrayType",
81
- "Grain",
82
- "RowsetDerivationStatement",
83
- "MapType",
84
- "ListWrapper",
70
+ # trilogy.constants
71
+ "DEFAULT_NAMESPACE",
72
+ # trilogy.core.enums
73
+ "BooleanOperator",
74
+ "ComparisonOperator",
75
+ "FunctionClass",
85
76
  "FunctionType",
77
+ "InfiniteFunctionArgs",
78
+ "Ordering",
79
+ "Purpose",
80
+ # trilogy.core.functions
86
81
  "FunctionFactory",
87
- "ConceptDeclarationStatement",
88
- "ConceptTransform",
89
- "SelectItem",
90
- "SelectStatement",
91
- "Environment",
82
+ # trilogy.core.models.author
83
+ "AggregateWrapper",
84
+ "CaseElse",
85
+ "CaseWhen",
86
+ "Comparison",
87
+ "Concept",
92
88
  "ConceptRef",
89
+ "Conditional",
90
+ "FilterItem",
91
+ "Function",
92
+ "FunctionCallWrapper",
93
93
  "HavingClause",
94
94
  "MagicConstants",
95
95
  "Metadata",
96
+ "MultiSelectLineage",
96
97
  "OrderBy",
97
98
  "OrderItem",
98
99
  "Parenthetical",
100
+ "RowsetItem",
99
101
  "SubselectComparison",
100
- "Conditional",
101
- "BooleanOperator",
102
- "ComparisonOperator",
103
- "FunctionClass",
104
- "FunctionType",
105
- "InfiniteFunctionArgs",
106
- "Ordering",
107
- "Purpose",
108
- "DEFAULT_NAMESPACE",
109
- "arbitrary_to_concept",
110
- "arg_to_datatype",
111
- "MultiSelectStatement",
112
- "PersistStatement",
113
- "RawSQLStatement",
102
+ "WhereClause",
103
+ "WindowItem",
104
+ "WindowItemOrder",
105
+ "WindowItemOver",
106
+ "WindowOrder",
107
+ "WindowType",
108
+ # trilogy.core.models.core
109
+ "ArrayType",
110
+ "DataType",
111
+ "ListWrapper",
112
+ "MapType",
113
+ "NumericType",
114
+ "StructType",
115
+ "TraitDataType",
116
+ # trilogy.core.models.datasource
117
+ "Address",
114
118
  "Datasource",
115
119
  "DatasourceMetadata",
116
- "MultiSelectLineage",
117
- "RowsetItem",
118
- "FunctionCallWrapper",
120
+ # trilogy.core.models.environment
121
+ "Environment",
122
+ # trilogy.core.statements.author
123
+ "ConceptDeclarationStatement",
124
+ "ConceptTransform",
119
125
  "CopyStatement",
126
+ "Grain",
120
127
  "HasUUID",
121
128
  "ImportStatement",
122
- "Address",
129
+ "MultiSelectStatement",
130
+ "PersistStatement",
131
+ "RawSQLStatement",
132
+ "RowsetDerivationStatement",
133
+ "SelectItem",
134
+ "SelectStatement",
135
+ "ShowCategory",
136
+ "ShowStatement",
137
+ "ValidateStatement",
138
+ # trilogy.parsing.common
139
+ "arbitrary_to_concept",
140
+ "arg_to_datatype",
123
141
  ]
trilogy/core/enums.py CHANGED
@@ -240,6 +240,7 @@ class FunctionType(Enum):
240
240
  # CONSTANTS
241
241
  CURRENT_DATE = "current_date"
242
242
  CURRENT_DATETIME = "current_datetime"
243
+ CURRENT_TIMESTAMP = "current_timestamp"
243
244
 
244
245
 
245
246
  class FunctionClass(Enum):
@@ -378,3 +379,15 @@ class IOType(Enum):
378
379
  if isinstance(value, str) and value.lower() != value:
379
380
  return IOType(value.lower())
380
381
  return super()._missing_(value)
382
+
383
+
384
+ class ValidationScope(Enum):
385
+ ALL = "all"
386
+ CONCEPTS = "concepts"
387
+ DATASOURCES = "datasources"
388
+
389
+ @classmethod
390
+ def _missing_(cls, value):
391
+ if isinstance(value, str) and value.lower() != value:
392
+ return ValidationScope(value.lower())
393
+ return super()._missing_(value)
@@ -20,7 +20,8 @@ def add_concept(
20
20
  if node_name in seen:
21
21
  return
22
22
  seen.add(node_name)
23
- g.add_node(concept)
23
+ g.concepts[node_name] = concept
24
+ g.add_node(node_name)
24
25
  if concept.concept_arguments:
25
26
  for source in concept.concept_arguments:
26
27
  if not isinstance(source, BuildConcept):
@@ -28,9 +29,10 @@ def add_concept(
28
29
  f"Invalid non-build concept {source} passed into graph generation from {concept}"
29
30
  )
30
31
  generic = get_default_grain_concept(source, default_concept_graph)
32
+ generic_node = concept_to_node(generic)
31
33
  add_concept(generic, g, concept_mapping, default_concept_graph, seen)
32
34
 
33
- g.add_edge(generic, node_name)
35
+ g.add_edge(generic_node, node_name, fast=True)
34
36
  for ps_address in concept.pseudonyms:
35
37
  if ps_address not in concept_mapping:
36
38
  raise SyntaxError(f"Concept {concept} has invalid pseudonym {ps_address}")
@@ -44,8 +46,8 @@ def add_concept(
44
46
  continue
45
47
  if pseudonym_node.split("@")[0] == node_name.split("@")[0]:
46
48
  continue
47
- g.add_edge(pseudonym_node, node_name)
48
- g.add_edge(node_name, pseudonym_node)
49
+ g.add_edge(pseudonym_node, node_name, fast=True)
50
+ g.add_edge(node_name, pseudonym_node, fast=True)
49
51
  g.pseudonyms.add((pseudonym_node, node_name))
50
52
  g.pseudonyms.add((node_name, pseudonym_node))
51
53
  add_concept(pseudonym, g, concept_mapping, default_concept_graph, seen)
@@ -82,20 +84,27 @@ def generate_adhoc_graph(
82
84
 
83
85
  for dataset in datasources:
84
86
  node = datasource_to_node(dataset)
85
- g.add_node(dataset, type="datasource", datasource=dataset)
87
+ g.add_datasource_node(node, dataset)
86
88
  for concept in dataset.concepts:
89
+ cnode = concept_to_node(concept)
90
+ g.concepts[cnode] = concept
91
+ g.add_node(cnode)
87
92
  if restrict_to_listed:
88
- if concept_to_node(concept) not in g.nodes:
93
+ if cnode not in g.nodes:
89
94
  continue
90
- g.add_edge(node, concept)
91
- g.add_edge(concept, node)
95
+ g.add_edge(node, cnode, fast=True)
96
+ g.add_edge(cnode, node, fast=True)
92
97
  # if there is a key on a table at a different grain
93
98
  # add an FK edge to the canonical source, if it exists
94
99
  # for example, order ID on order product table
95
100
  default = get_default_grain_concept(concept, default_concept_graph)
101
+
96
102
  if concept != default:
97
- g.add_edge(concept, default)
98
- g.add_edge(default, concept)
103
+ dcnode = concept_to_node(default)
104
+ g.concepts[dcnode] = default
105
+ g.add_node(dcnode)
106
+ g.add_edge(cnode, dcnode, fast=True)
107
+ g.add_edge(dcnode, cnode, fast=True)
99
108
  return g
100
109
 
101
110
 
@@ -169,6 +169,112 @@ def generate_key_concepts(concept: Concept, environment: Environment):
169
169
  environment.add_concept(new_concept, add_derived=False)
170
170
 
171
171
 
172
+ def remove_date_concepts(concept: Concept, environment: Environment):
173
+ """Remove auto-generated date-related concepts for the given concept"""
174
+ date_suffixes = ["month", "year", "quarter", "day", "day_of_week"]
175
+ grain_suffixes = ["month_start", "year_start"]
176
+
177
+ for suffix in date_suffixes + grain_suffixes:
178
+ address = concept.address + f".{suffix}"
179
+ if address in environment.concepts:
180
+ derived_concept = environment.concepts[address]
181
+ # Only remove if it was auto-derived from this concept
182
+ if (
183
+ derived_concept.metadata
184
+ and derived_concept.metadata.concept_source
185
+ == ConceptSource.AUTO_DERIVED
186
+ and derived_concept.keys
187
+ and concept.address in derived_concept.keys
188
+ ):
189
+ environment.remove_concept(address)
190
+
191
+
192
+ def remove_datetime_concepts(concept: Concept, environment: Environment):
193
+ """Remove auto-generated datetime-related concepts for the given concept"""
194
+ datetime_suffixes = ["date", "hour", "minute", "second"]
195
+
196
+ for suffix in datetime_suffixes:
197
+ address = concept.address + f".{suffix}"
198
+ if address in environment.concepts:
199
+ derived_concept = environment.concepts[address]
200
+ # Only remove if it was auto-derived from this concept
201
+ if (
202
+ derived_concept.metadata
203
+ and derived_concept.metadata.concept_source
204
+ == ConceptSource.AUTO_DERIVED
205
+ and derived_concept.keys
206
+ and concept.address in derived_concept.keys
207
+ ):
208
+ environment.remove_concept(address)
209
+
210
+
211
+ def remove_key_concepts(concept: Concept, environment: Environment):
212
+ """Remove auto-generated key-related concepts for the given concept"""
213
+ key_suffixes = ["count"]
214
+
215
+ for suffix in key_suffixes:
216
+ address = concept.address + f".{suffix}"
217
+ if address in environment.concepts:
218
+ derived_concept = environment.concepts[address]
219
+ if (
220
+ derived_concept.metadata
221
+ and derived_concept.metadata.concept_source
222
+ == ConceptSource.AUTO_DERIVED
223
+ ):
224
+ environment.remove_concept(address)
225
+
226
+
227
+ def remove_struct_concepts(concept: Concept, environment: Environment):
228
+ """Remove auto-generated struct field concepts for the given concept"""
229
+ if not isinstance(concept.datatype, StructType):
230
+ return
231
+
232
+ target_namespace = (
233
+ environment.namespace + "." + concept.name
234
+ if environment.namespace and environment.namespace != DEFAULT_NAMESPACE
235
+ else concept.name
236
+ )
237
+
238
+ # Get all concepts in the target namespace that were auto-derived
239
+ concepts_to_remove = []
240
+ for address, derived_concept in environment.concepts.items():
241
+ if (
242
+ derived_concept.namespace == target_namespace
243
+ and derived_concept.metadata
244
+ and derived_concept.metadata.concept_source == ConceptSource.AUTO_DERIVED
245
+ and isinstance(derived_concept.lineage, Function)
246
+ and derived_concept.lineage.operator == FunctionType.ATTR_ACCESS
247
+ and len(derived_concept.lineage.arguments) >= 1
248
+ and derived_concept.lineage.arguments[0] == concept.reference
249
+ ):
250
+ concepts_to_remove.append(address)
251
+
252
+ for address in concepts_to_remove:
253
+ environment.remove_concept(address)
254
+
255
+
256
+ def remove_related_concepts(concept: Concept, environment: Environment):
257
+ """Remove all auto-generated concepts that were derived from the given concept"""
258
+
259
+ # Remove key-related concepts
260
+ if concept.purpose == Purpose.KEY:
261
+ remove_key_concepts(concept, environment)
262
+
263
+ # Remove datatype-specific concepts
264
+ if concept.datatype == DataType.DATE:
265
+ remove_date_concepts(concept, environment)
266
+ elif concept.datatype == DataType.DATETIME:
267
+ remove_date_concepts(concept, environment)
268
+ remove_datetime_concepts(concept, environment)
269
+ elif concept.datatype == DataType.TIMESTAMP:
270
+ remove_date_concepts(concept, environment)
271
+ remove_datetime_concepts(concept, environment)
272
+
273
+ # Remove struct field concepts
274
+ if isinstance(concept.datatype, StructType):
275
+ remove_struct_concepts(concept, environment)
276
+
277
+
172
278
  def generate_related_concepts(
173
279
  concept: Concept,
174
280
  environment: Environment,
@@ -183,6 +289,7 @@ def generate_related_concepts(
183
289
  if concept.datatype == DataType.DATE and add_derived:
184
290
  generate_date_concepts(concept, environment)
185
291
  elif concept.datatype == DataType.DATETIME and add_derived:
292
+
186
293
  generate_date_concepts(concept, environment)
187
294
  generate_datetime_concepts(concept, environment)
188
295
  elif concept.datatype == DataType.TIMESTAMP and add_derived:
@@ -203,6 +310,10 @@ def generate_related_concepts(
203
310
  ),
204
311
  lineage=AttrAccess([concept.reference, key], environment=environment),
205
312
  grain=concept.grain,
313
+ metadata=Metadata(
314
+ concept_source=ConceptSource.AUTO_DERIVED,
315
+ ),
316
+ keys=concept.keys,
206
317
  )
207
318
  environment.add_concept(auto, meta=meta)
208
319
  if isinstance(value, Concept):
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import List, Sequence
2
2
 
3
3
 
4
4
  class UndefinedConceptException(Exception):
@@ -24,6 +24,26 @@ class NoDatasourceException(UnresolvableQueryException):
24
24
  pass
25
25
 
26
26
 
27
+ class ModelValidationError(Exception):
28
+ def __init__(
29
+ self,
30
+ message,
31
+ children: Sequence["ModelValidationError"] | None = None,
32
+ **kwargs
33
+ ):
34
+ super().__init__(self, message, **kwargs)
35
+ self.message = message
36
+ self.children = children
37
+
38
+
39
+ class DatasourceModelValidationError(ModelValidationError):
40
+ pass
41
+
42
+
43
+ class ConceptModelValidationError(ModelValidationError):
44
+ pass
45
+
46
+
27
47
  class AmbiguousRelationshipResolutionException(UnresolvableQueryException):
28
48
  def __init__(self, message, parents: List[set[str]]):
29
49
  super().__init__(self, message)
trilogy/core/functions.py CHANGED
@@ -380,7 +380,12 @@ FUNCTION_REGISTRY: dict[FunctionType, FunctionConfig] = {
380
380
  ),
381
381
  FunctionType.CURRENT_DATETIME: FunctionConfig(
382
382
  output_purpose=Purpose.CONSTANT,
383
- output_type=DataType.DATE,
383
+ output_type=DataType.DATETIME,
384
+ arg_count=0,
385
+ ),
386
+ FunctionType.CURRENT_TIMESTAMP: FunctionConfig(
387
+ output_purpose=Purpose.CONSTANT,
388
+ output_type=DataType.TIMESTAMP,
384
389
  arg_count=0,
385
390
  ),
386
391
  FunctionType.BOOL: FunctionConfig(
@@ -64,13 +64,13 @@ def datasource_to_node(input: BuildDatasource) -> str:
64
64
 
65
65
 
66
66
  class ReferenceGraph(nx.DiGraph):
67
- def __init__(self, *args, **kwargs):
67
+ def __init__(self, *args, **kwargs) -> None:
68
68
  super().__init__(*args, **kwargs)
69
69
  self.concepts: dict[str, BuildConcept] = {}
70
70
  self.datasources: dict[str, BuildDatasource] = {}
71
71
  self.pseudonyms: set[tuple[str, str]] = set()
72
72
 
73
- def copy(self):
73
+ def copy(self) -> "ReferenceGraph":
74
74
  g = ReferenceGraph()
75
75
  g.concepts = self.concepts.copy()
76
76
  g.datasources = self.datasources.copy()
@@ -83,7 +83,7 @@ class ReferenceGraph(nx.DiGraph):
83
83
  # g.add_edges_from(self.edges(data=True))
84
84
  return g
85
85
 
86
- def remove_node(self, n):
86
+ def remove_node(self, n) -> None:
87
87
  if n in self.concepts:
88
88
  del self.concepts[n]
89
89
  if n in self.datasources:
@@ -93,40 +93,14 @@ class ReferenceGraph(nx.DiGraph):
93
93
  def add_node(self, node_for_adding, fast: bool = False, **attr):
94
94
  if fast:
95
95
  return super().add_node(node_for_adding, **attr)
96
- if isinstance(node_for_adding, BuildConcept):
97
- node_name = concept_to_node(node_for_adding)
98
- self.concepts[node_name] = node_for_adding
99
- elif isinstance(node_for_adding, BuildDatasource):
100
- node_name = datasource_to_node(node_for_adding)
101
- self.datasources[node_name] = node_for_adding
102
- else:
103
- node_name = node_for_adding
104
- if attr.get("datasource"):
105
- self.datasources[node_name] = attr["datasource"]
96
+ node_name = node_for_adding
97
+ if attr.get("datasource"):
98
+ self.datasources[node_name] = attr["datasource"]
106
99
  super().add_node(node_name, **attr)
107
100
 
101
+ def add_datasource_node(self, node_name, datasource) -> None:
102
+ self.datasources[node_name] = datasource
103
+ super().add_node(node_name, datasource=datasource)
104
+
108
105
  def add_edge(self, u_of_edge, v_of_edge, fast: bool = False, **attr):
109
- if fast:
110
- return super().add_edge(u_of_edge, v_of_edge, **attr)
111
- if isinstance(u_of_edge, BuildConcept):
112
- orig = u_of_edge
113
- u_of_edge = concept_to_node(u_of_edge)
114
- if u_of_edge not in self.nodes:
115
- self.add_node(orig)
116
- elif isinstance(u_of_edge, BuildDatasource):
117
- origd = u_of_edge
118
- u_of_edge = datasource_to_node(u_of_edge)
119
- if u_of_edge not in self.nodes:
120
- self.add_node(origd)
121
-
122
- if isinstance(v_of_edge, BuildConcept):
123
- orig = v_of_edge
124
- v_of_edge = concept_to_node(v_of_edge)
125
- if v_of_edge not in self.nodes:
126
- self.add_node(orig)
127
- elif isinstance(v_of_edge, BuildDatasource):
128
- origd = v_of_edge
129
- v_of_edge = datasource_to_node(v_of_edge)
130
- if v_of_edge not in self.nodes:
131
- self.add_node(origd)
132
- super().add_edge(u_of_edge, v_of_edge)
106
+ return super().add_edge(u_of_edge, v_of_edge, **attr)
trilogy/core/internal.py CHANGED
@@ -64,4 +64,22 @@ DEFAULT_CONCEPTS = {
64
64
  granularity=Granularity.SINGLE_ROW,
65
65
  derivation=Derivation.CONSTANT,
66
66
  ),
67
+ "label": Concept(
68
+ name="label",
69
+ namespace=INTERNAL_NAMESPACE,
70
+ datatype=DataType.STRING,
71
+ purpose=Purpose.KEY,
72
+ grain=Grain(),
73
+ granularity=Granularity.SINGLE_ROW,
74
+ derivation=Derivation.CONSTANT,
75
+ ),
76
+ "expected": Concept(
77
+ name="expected_value",
78
+ namespace=INTERNAL_NAMESPACE,
79
+ datatype=DataType.STRING,
80
+ purpose=Purpose.KEY,
81
+ grain=Grain(),
82
+ granularity=Granularity.SINGLE_ROW,
83
+ derivation=Derivation.CONSTANT,
84
+ ),
67
85
  }
@@ -103,6 +103,9 @@ class DataType(Enum):
103
103
  def data_type(self):
104
104
  return self
105
105
 
106
+ def __str__(self) -> str:
107
+ return self.name
108
+
106
109
 
107
110
  class TraitDataType(BaseModel):
108
111
  type: DataType | NumericType | StructType | ArrayType | MapType
@@ -571,6 +571,34 @@ class Environment(BaseModel):
571
571
 
572
572
  return concept
573
573
 
574
+ def remove_concept(
575
+ self,
576
+ concept: Concept | str,
577
+ ) -> bool:
578
+ if self.frozen:
579
+ raise FrozenEnvironmentException(
580
+ "Environment is frozen, cannot remove concepts"
581
+ )
582
+ if isinstance(concept, Concept):
583
+ address = concept.address
584
+ c_instance = concept
585
+ else:
586
+ address = concept
587
+ c_instance_check = self.concepts.get(address)
588
+ if not c_instance_check:
589
+ return False
590
+ c_instance = c_instance_check
591
+ from trilogy.core.environment_helpers import remove_related_concepts
592
+
593
+ remove_related_concepts(c_instance, self)
594
+ if address in self.concepts:
595
+ del self.concepts[address]
596
+ return True
597
+ if address in self.alias_origin_lookup:
598
+ del self.alias_origin_lookup[address]
599
+
600
+ return False
601
+
574
602
  def add_datasource(
575
603
  self,
576
604
  datasource: Datasource,
@@ -23,6 +23,7 @@ from trilogy.core.constants import CONSTANT_DATASET
23
23
  from trilogy.core.enums import (
24
24
  ComparisonOperator,
25
25
  Derivation,
26
+ FunctionClass,
26
27
  FunctionType,
27
28
  JoinType,
28
29
  Modifier,
@@ -375,6 +376,12 @@ class CTE(BaseModel):
375
376
  return check_is_not_in_group(c.lineage.content)
376
377
  if c.derivation == Derivation.CONSTANT:
377
378
  return True
379
+ if (
380
+ c.purpose == Purpose.CONSTANT
381
+ and isinstance(c.lineage, BuildFunction)
382
+ and c.lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
383
+ ):
384
+ return True
378
385
  if c.purpose == Purpose.METRIC:
379
386
  return True
380
387
 
@@ -123,7 +123,7 @@ def create_pruned_concept_graph(
123
123
  common: set[BuildConcept] = set.intersection(
124
124
  *[set(x.output_concepts) for x in ds_list]
125
125
  )
126
- g.add_node(node_address, datasource=ds_list)
126
+ g.add_datasource_node(node_address, ds_list)
127
127
  for c in common:
128
128
  cnode = concept_to_node(c)
129
129
  g.add_edge(node_address, cnode)
@@ -214,7 +214,7 @@ def create_pruned_concept_graph(
214
214
  relevant = set(relevant_concepts + relevent_datasets)
215
215
  for edge in orig_g.edges():
216
216
  if edge[0] in relevant and edge[1] in relevant:
217
- g.add_edge(edge[0], edge[1])
217
+ g.add_edge(edge[0], edge[1], fast=True)
218
218
  # if we have no ds nodes at all, for non constant, we can't find it
219
219
  if not any([n.startswith("ds~") for n in g.nodes]):
220
220
  logger.info(
@@ -40,8 +40,8 @@ from trilogy.core.statements.author import (
40
40
  PersistStatement,
41
41
  SelectStatement,
42
42
  )
43
- from trilogy.core.statements.common import MaterializedDataset
44
43
  from trilogy.core.statements.execute import (
44
+ MaterializedDataset,
45
45
  ProcessedCopyStatement,
46
46
  ProcessedQuery,
47
47
  ProcessedQueryPersist,
@@ -567,4 +567,5 @@ def process_query(
567
567
  base=root_cte,
568
568
  hidden_columns=set([x for x in statement.hidden_components]),
569
569
  local_concepts=statement.local_concepts,
570
+ locally_derived=statement.locally_derived,
570
571
  )
@@ -12,6 +12,7 @@ from trilogy.core.enums import (
12
12
  IOType,
13
13
  Modifier,
14
14
  ShowCategory,
15
+ ValidationScope,
15
16
  )
16
17
  from trilogy.core.models.author import (
17
18
  AggregateWrapper,
@@ -147,11 +148,13 @@ class SelectStatement(HasUUID, SelectTypeMixin, BaseModel):
147
148
  continue
148
149
  if CONFIG.parsing.select_as_definition and not environment.frozen:
149
150
  if x.concept.address not in environment.concepts:
150
- environment.add_concept(x.content.output)
151
+ environment.add_concept(x.content.output, add_derived=False)
151
152
  elif x.concept.address in environment.concepts:
152
153
  version = environment.concepts[x.concept.address]
153
154
  if version.metadata.concept_source == ConceptSource.SELECT:
154
- environment.add_concept(x.content.output, force=True)
155
+ environment.add_concept(
156
+ x.content.output, force=True, add_derived=False
157
+ )
155
158
  x.content.output = x.content.output.set_select_grain(
156
159
  output.grain, environment
157
160
  )
@@ -378,6 +381,13 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, BaseModel):
378
381
  output = output.union(select.hidden_components)
379
382
  return output
380
383
 
384
+ @property
385
+ def locally_derived(self) -> set[str]:
386
+ locally_derived: set[str] = set([x.address for x in self.derived_concepts])
387
+ for select in self.selects:
388
+ locally_derived = locally_derived.union(select.locally_derived)
389
+ return locally_derived
390
+
381
391
 
382
392
  class RowsetDerivationStatement(HasUUID, BaseModel):
383
393
  name: str
@@ -428,8 +438,13 @@ class PersistStatement(HasUUID, BaseModel):
428
438
  return self.datasource.address
429
439
 
430
440
 
441
+ class ValidateStatement(BaseModel):
442
+ scope: ValidationScope
443
+ targets: Optional[List[str]] = None # list of identifiers
444
+
445
+
431
446
  class ShowStatement(BaseModel):
432
- content: SelectStatement | PersistStatement | ShowCategory
447
+ content: SelectStatement | PersistStatement | ValidateStatement | ShowCategory
433
448
 
434
449
 
435
450
  class Limit(BaseModel):
@@ -4,7 +4,6 @@ from pydantic import BaseModel, Field
4
4
 
5
5
  from trilogy.core.enums import IOType
6
6
  from trilogy.core.models.author import ConceptRef, HavingClause, WhereClause
7
- from trilogy.core.models.datasource import Address, Datasource
8
7
 
9
8
 
10
9
  class CopyQueryMixin(BaseModel):
@@ -12,15 +11,6 @@ class CopyQueryMixin(BaseModel):
12
11
  target_type: IOType
13
12
 
14
13
 
15
- class MaterializedDataset(BaseModel):
16
- address: Address
17
-
18
-
19
- class PersistQueryMixin(BaseModel):
20
- output_to: MaterializedDataset
21
- datasource: Datasource
22
-
23
-
24
14
  class SelectTypeMixin(BaseModel):
25
15
  where_clause: Union["WhereClause", None] = Field(default=None)
26
16
  having_clause: Union["HavingClause", None] = Field(default=None)