pytrilogy 0.0.3.94__py3-none-any.whl → 0.0.3.96__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/METADATA +184 -136
- {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/RECORD +35 -30
- trilogy/__init__.py +1 -1
- trilogy/authoring/__init__.py +61 -43
- trilogy/core/enums.py +13 -0
- trilogy/core/env_processor.py +19 -10
- trilogy/core/environment_helpers.py +111 -0
- trilogy/core/exceptions.py +21 -1
- trilogy/core/functions.py +6 -1
- trilogy/core/graph_models.py +11 -37
- trilogy/core/internal.py +18 -0
- trilogy/core/models/core.py +3 -0
- trilogy/core/models/environment.py +28 -0
- trilogy/core/models/execute.py +7 -0
- trilogy/core/processing/node_generators/select_merge_node.py +2 -2
- trilogy/core/query_processor.py +2 -1
- trilogy/core/statements/author.py +18 -3
- trilogy/core/statements/common.py +0 -10
- trilogy/core/statements/execute.py +73 -16
- trilogy/core/validation/common.py +110 -0
- trilogy/core/validation/concept.py +125 -0
- trilogy/core/validation/datasource.py +194 -0
- trilogy/core/validation/environment.py +71 -0
- trilogy/dialect/base.py +48 -21
- trilogy/dialect/metadata.py +233 -0
- trilogy/dialect/sql_server.py +3 -1
- trilogy/engine.py +25 -7
- trilogy/executor.py +94 -162
- trilogy/parsing/parse_engine.py +34 -3
- trilogy/parsing/trilogy.lark +11 -5
- {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.94.dist-info → pytrilogy-0.0.3.96.dist-info}/top_level.txt +0 -0
- /trilogy/{compiler.py → core/validation/__init__.py} +0 -0
trilogy/authoring/__init__.py
CHANGED
|
@@ -41,7 +41,9 @@ from trilogy.core.models.core import (
|
|
|
41
41
|
DataType,
|
|
42
42
|
ListWrapper,
|
|
43
43
|
MapType,
|
|
44
|
+
NumericType,
|
|
44
45
|
StructType,
|
|
46
|
+
TraitDataType,
|
|
45
47
|
)
|
|
46
48
|
from trilogy.core.models.datasource import Address, Datasource, DatasourceMetadata
|
|
47
49
|
from trilogy.core.models.environment import Environment
|
|
@@ -58,66 +60,82 @@ from trilogy.core.statements.author import (
|
|
|
58
60
|
RowsetDerivationStatement,
|
|
59
61
|
SelectItem,
|
|
60
62
|
SelectStatement,
|
|
63
|
+
ShowCategory,
|
|
64
|
+
ShowStatement,
|
|
65
|
+
ValidateStatement,
|
|
61
66
|
)
|
|
62
67
|
from trilogy.parsing.common import arbitrary_to_concept, arg_to_datatype
|
|
63
68
|
|
|
64
69
|
__all__ = [
|
|
65
|
-
|
|
66
|
-
"
|
|
67
|
-
|
|
68
|
-
"
|
|
69
|
-
"
|
|
70
|
-
"
|
|
71
|
-
"CaseElse",
|
|
72
|
-
"AggregateWrapper",
|
|
73
|
-
"WindowItem",
|
|
74
|
-
"WindowOrder",
|
|
75
|
-
"WindowType",
|
|
76
|
-
"WindowItemOrder",
|
|
77
|
-
"WindowItemOver",
|
|
78
|
-
"DataType",
|
|
79
|
-
"StructType",
|
|
80
|
-
"ArrayType",
|
|
81
|
-
"Grain",
|
|
82
|
-
"RowsetDerivationStatement",
|
|
83
|
-
"MapType",
|
|
84
|
-
"ListWrapper",
|
|
70
|
+
# trilogy.constants
|
|
71
|
+
"DEFAULT_NAMESPACE",
|
|
72
|
+
# trilogy.core.enums
|
|
73
|
+
"BooleanOperator",
|
|
74
|
+
"ComparisonOperator",
|
|
75
|
+
"FunctionClass",
|
|
85
76
|
"FunctionType",
|
|
77
|
+
"InfiniteFunctionArgs",
|
|
78
|
+
"Ordering",
|
|
79
|
+
"Purpose",
|
|
80
|
+
# trilogy.core.functions
|
|
86
81
|
"FunctionFactory",
|
|
87
|
-
|
|
88
|
-
"
|
|
89
|
-
"
|
|
90
|
-
"
|
|
91
|
-
"
|
|
82
|
+
# trilogy.core.models.author
|
|
83
|
+
"AggregateWrapper",
|
|
84
|
+
"CaseElse",
|
|
85
|
+
"CaseWhen",
|
|
86
|
+
"Comparison",
|
|
87
|
+
"Concept",
|
|
92
88
|
"ConceptRef",
|
|
89
|
+
"Conditional",
|
|
90
|
+
"FilterItem",
|
|
91
|
+
"Function",
|
|
92
|
+
"FunctionCallWrapper",
|
|
93
93
|
"HavingClause",
|
|
94
94
|
"MagicConstants",
|
|
95
95
|
"Metadata",
|
|
96
|
+
"MultiSelectLineage",
|
|
96
97
|
"OrderBy",
|
|
97
98
|
"OrderItem",
|
|
98
99
|
"Parenthetical",
|
|
100
|
+
"RowsetItem",
|
|
99
101
|
"SubselectComparison",
|
|
100
|
-
"
|
|
101
|
-
"
|
|
102
|
-
"
|
|
103
|
-
"
|
|
104
|
-
"
|
|
105
|
-
"
|
|
106
|
-
|
|
107
|
-
"
|
|
108
|
-
"
|
|
109
|
-
"
|
|
110
|
-
"
|
|
111
|
-
"
|
|
112
|
-
"
|
|
113
|
-
"
|
|
102
|
+
"WhereClause",
|
|
103
|
+
"WindowItem",
|
|
104
|
+
"WindowItemOrder",
|
|
105
|
+
"WindowItemOver",
|
|
106
|
+
"WindowOrder",
|
|
107
|
+
"WindowType",
|
|
108
|
+
# trilogy.core.models.core
|
|
109
|
+
"ArrayType",
|
|
110
|
+
"DataType",
|
|
111
|
+
"ListWrapper",
|
|
112
|
+
"MapType",
|
|
113
|
+
"NumericType",
|
|
114
|
+
"StructType",
|
|
115
|
+
"TraitDataType",
|
|
116
|
+
# trilogy.core.models.datasource
|
|
117
|
+
"Address",
|
|
114
118
|
"Datasource",
|
|
115
119
|
"DatasourceMetadata",
|
|
116
|
-
|
|
117
|
-
"
|
|
118
|
-
|
|
120
|
+
# trilogy.core.models.environment
|
|
121
|
+
"Environment",
|
|
122
|
+
# trilogy.core.statements.author
|
|
123
|
+
"ConceptDeclarationStatement",
|
|
124
|
+
"ConceptTransform",
|
|
119
125
|
"CopyStatement",
|
|
126
|
+
"Grain",
|
|
120
127
|
"HasUUID",
|
|
121
128
|
"ImportStatement",
|
|
122
|
-
"
|
|
129
|
+
"MultiSelectStatement",
|
|
130
|
+
"PersistStatement",
|
|
131
|
+
"RawSQLStatement",
|
|
132
|
+
"RowsetDerivationStatement",
|
|
133
|
+
"SelectItem",
|
|
134
|
+
"SelectStatement",
|
|
135
|
+
"ShowCategory",
|
|
136
|
+
"ShowStatement",
|
|
137
|
+
"ValidateStatement",
|
|
138
|
+
# trilogy.parsing.common
|
|
139
|
+
"arbitrary_to_concept",
|
|
140
|
+
"arg_to_datatype",
|
|
123
141
|
]
|
trilogy/core/enums.py
CHANGED
|
@@ -240,6 +240,7 @@ class FunctionType(Enum):
|
|
|
240
240
|
# CONSTANTS
|
|
241
241
|
CURRENT_DATE = "current_date"
|
|
242
242
|
CURRENT_DATETIME = "current_datetime"
|
|
243
|
+
CURRENT_TIMESTAMP = "current_timestamp"
|
|
243
244
|
|
|
244
245
|
|
|
245
246
|
class FunctionClass(Enum):
|
|
@@ -378,3 +379,15 @@ class IOType(Enum):
|
|
|
378
379
|
if isinstance(value, str) and value.lower() != value:
|
|
379
380
|
return IOType(value.lower())
|
|
380
381
|
return super()._missing_(value)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
class ValidationScope(Enum):
|
|
385
|
+
ALL = "all"
|
|
386
|
+
CONCEPTS = "concepts"
|
|
387
|
+
DATASOURCES = "datasources"
|
|
388
|
+
|
|
389
|
+
@classmethod
|
|
390
|
+
def _missing_(cls, value):
|
|
391
|
+
if isinstance(value, str) and value.lower() != value:
|
|
392
|
+
return ValidationScope(value.lower())
|
|
393
|
+
return super()._missing_(value)
|
trilogy/core/env_processor.py
CHANGED
|
@@ -20,7 +20,8 @@ def add_concept(
|
|
|
20
20
|
if node_name in seen:
|
|
21
21
|
return
|
|
22
22
|
seen.add(node_name)
|
|
23
|
-
g.
|
|
23
|
+
g.concepts[node_name] = concept
|
|
24
|
+
g.add_node(node_name)
|
|
24
25
|
if concept.concept_arguments:
|
|
25
26
|
for source in concept.concept_arguments:
|
|
26
27
|
if not isinstance(source, BuildConcept):
|
|
@@ -28,9 +29,10 @@ def add_concept(
|
|
|
28
29
|
f"Invalid non-build concept {source} passed into graph generation from {concept}"
|
|
29
30
|
)
|
|
30
31
|
generic = get_default_grain_concept(source, default_concept_graph)
|
|
32
|
+
generic_node = concept_to_node(generic)
|
|
31
33
|
add_concept(generic, g, concept_mapping, default_concept_graph, seen)
|
|
32
34
|
|
|
33
|
-
g.add_edge(
|
|
35
|
+
g.add_edge(generic_node, node_name, fast=True)
|
|
34
36
|
for ps_address in concept.pseudonyms:
|
|
35
37
|
if ps_address not in concept_mapping:
|
|
36
38
|
raise SyntaxError(f"Concept {concept} has invalid pseudonym {ps_address}")
|
|
@@ -44,8 +46,8 @@ def add_concept(
|
|
|
44
46
|
continue
|
|
45
47
|
if pseudonym_node.split("@")[0] == node_name.split("@")[0]:
|
|
46
48
|
continue
|
|
47
|
-
g.add_edge(pseudonym_node, node_name)
|
|
48
|
-
g.add_edge(node_name, pseudonym_node)
|
|
49
|
+
g.add_edge(pseudonym_node, node_name, fast=True)
|
|
50
|
+
g.add_edge(node_name, pseudonym_node, fast=True)
|
|
49
51
|
g.pseudonyms.add((pseudonym_node, node_name))
|
|
50
52
|
g.pseudonyms.add((node_name, pseudonym_node))
|
|
51
53
|
add_concept(pseudonym, g, concept_mapping, default_concept_graph, seen)
|
|
@@ -82,20 +84,27 @@ def generate_adhoc_graph(
|
|
|
82
84
|
|
|
83
85
|
for dataset in datasources:
|
|
84
86
|
node = datasource_to_node(dataset)
|
|
85
|
-
g.
|
|
87
|
+
g.add_datasource_node(node, dataset)
|
|
86
88
|
for concept in dataset.concepts:
|
|
89
|
+
cnode = concept_to_node(concept)
|
|
90
|
+
g.concepts[cnode] = concept
|
|
91
|
+
g.add_node(cnode)
|
|
87
92
|
if restrict_to_listed:
|
|
88
|
-
if
|
|
93
|
+
if cnode not in g.nodes:
|
|
89
94
|
continue
|
|
90
|
-
g.add_edge(node,
|
|
91
|
-
g.add_edge(
|
|
95
|
+
g.add_edge(node, cnode, fast=True)
|
|
96
|
+
g.add_edge(cnode, node, fast=True)
|
|
92
97
|
# if there is a key on a table at a different grain
|
|
93
98
|
# add an FK edge to the canonical source, if it exists
|
|
94
99
|
# for example, order ID on order product table
|
|
95
100
|
default = get_default_grain_concept(concept, default_concept_graph)
|
|
101
|
+
|
|
96
102
|
if concept != default:
|
|
97
|
-
|
|
98
|
-
g.
|
|
103
|
+
dcnode = concept_to_node(default)
|
|
104
|
+
g.concepts[dcnode] = default
|
|
105
|
+
g.add_node(dcnode)
|
|
106
|
+
g.add_edge(cnode, dcnode, fast=True)
|
|
107
|
+
g.add_edge(dcnode, cnode, fast=True)
|
|
99
108
|
return g
|
|
100
109
|
|
|
101
110
|
|
|
@@ -169,6 +169,112 @@ def generate_key_concepts(concept: Concept, environment: Environment):
|
|
|
169
169
|
environment.add_concept(new_concept, add_derived=False)
|
|
170
170
|
|
|
171
171
|
|
|
172
|
+
def remove_date_concepts(concept: Concept, environment: Environment):
|
|
173
|
+
"""Remove auto-generated date-related concepts for the given concept"""
|
|
174
|
+
date_suffixes = ["month", "year", "quarter", "day", "day_of_week"]
|
|
175
|
+
grain_suffixes = ["month_start", "year_start"]
|
|
176
|
+
|
|
177
|
+
for suffix in date_suffixes + grain_suffixes:
|
|
178
|
+
address = concept.address + f".{suffix}"
|
|
179
|
+
if address in environment.concepts:
|
|
180
|
+
derived_concept = environment.concepts[address]
|
|
181
|
+
# Only remove if it was auto-derived from this concept
|
|
182
|
+
if (
|
|
183
|
+
derived_concept.metadata
|
|
184
|
+
and derived_concept.metadata.concept_source
|
|
185
|
+
== ConceptSource.AUTO_DERIVED
|
|
186
|
+
and derived_concept.keys
|
|
187
|
+
and concept.address in derived_concept.keys
|
|
188
|
+
):
|
|
189
|
+
environment.remove_concept(address)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def remove_datetime_concepts(concept: Concept, environment: Environment):
|
|
193
|
+
"""Remove auto-generated datetime-related concepts for the given concept"""
|
|
194
|
+
datetime_suffixes = ["date", "hour", "minute", "second"]
|
|
195
|
+
|
|
196
|
+
for suffix in datetime_suffixes:
|
|
197
|
+
address = concept.address + f".{suffix}"
|
|
198
|
+
if address in environment.concepts:
|
|
199
|
+
derived_concept = environment.concepts[address]
|
|
200
|
+
# Only remove if it was auto-derived from this concept
|
|
201
|
+
if (
|
|
202
|
+
derived_concept.metadata
|
|
203
|
+
and derived_concept.metadata.concept_source
|
|
204
|
+
== ConceptSource.AUTO_DERIVED
|
|
205
|
+
and derived_concept.keys
|
|
206
|
+
and concept.address in derived_concept.keys
|
|
207
|
+
):
|
|
208
|
+
environment.remove_concept(address)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def remove_key_concepts(concept: Concept, environment: Environment):
|
|
212
|
+
"""Remove auto-generated key-related concepts for the given concept"""
|
|
213
|
+
key_suffixes = ["count"]
|
|
214
|
+
|
|
215
|
+
for suffix in key_suffixes:
|
|
216
|
+
address = concept.address + f".{suffix}"
|
|
217
|
+
if address in environment.concepts:
|
|
218
|
+
derived_concept = environment.concepts[address]
|
|
219
|
+
if (
|
|
220
|
+
derived_concept.metadata
|
|
221
|
+
and derived_concept.metadata.concept_source
|
|
222
|
+
== ConceptSource.AUTO_DERIVED
|
|
223
|
+
):
|
|
224
|
+
environment.remove_concept(address)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def remove_struct_concepts(concept: Concept, environment: Environment):
|
|
228
|
+
"""Remove auto-generated struct field concepts for the given concept"""
|
|
229
|
+
if not isinstance(concept.datatype, StructType):
|
|
230
|
+
return
|
|
231
|
+
|
|
232
|
+
target_namespace = (
|
|
233
|
+
environment.namespace + "." + concept.name
|
|
234
|
+
if environment.namespace and environment.namespace != DEFAULT_NAMESPACE
|
|
235
|
+
else concept.name
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Get all concepts in the target namespace that were auto-derived
|
|
239
|
+
concepts_to_remove = []
|
|
240
|
+
for address, derived_concept in environment.concepts.items():
|
|
241
|
+
if (
|
|
242
|
+
derived_concept.namespace == target_namespace
|
|
243
|
+
and derived_concept.metadata
|
|
244
|
+
and derived_concept.metadata.concept_source == ConceptSource.AUTO_DERIVED
|
|
245
|
+
and isinstance(derived_concept.lineage, Function)
|
|
246
|
+
and derived_concept.lineage.operator == FunctionType.ATTR_ACCESS
|
|
247
|
+
and len(derived_concept.lineage.arguments) >= 1
|
|
248
|
+
and derived_concept.lineage.arguments[0] == concept.reference
|
|
249
|
+
):
|
|
250
|
+
concepts_to_remove.append(address)
|
|
251
|
+
|
|
252
|
+
for address in concepts_to_remove:
|
|
253
|
+
environment.remove_concept(address)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def remove_related_concepts(concept: Concept, environment: Environment):
|
|
257
|
+
"""Remove all auto-generated concepts that were derived from the given concept"""
|
|
258
|
+
|
|
259
|
+
# Remove key-related concepts
|
|
260
|
+
if concept.purpose == Purpose.KEY:
|
|
261
|
+
remove_key_concepts(concept, environment)
|
|
262
|
+
|
|
263
|
+
# Remove datatype-specific concepts
|
|
264
|
+
if concept.datatype == DataType.DATE:
|
|
265
|
+
remove_date_concepts(concept, environment)
|
|
266
|
+
elif concept.datatype == DataType.DATETIME:
|
|
267
|
+
remove_date_concepts(concept, environment)
|
|
268
|
+
remove_datetime_concepts(concept, environment)
|
|
269
|
+
elif concept.datatype == DataType.TIMESTAMP:
|
|
270
|
+
remove_date_concepts(concept, environment)
|
|
271
|
+
remove_datetime_concepts(concept, environment)
|
|
272
|
+
|
|
273
|
+
# Remove struct field concepts
|
|
274
|
+
if isinstance(concept.datatype, StructType):
|
|
275
|
+
remove_struct_concepts(concept, environment)
|
|
276
|
+
|
|
277
|
+
|
|
172
278
|
def generate_related_concepts(
|
|
173
279
|
concept: Concept,
|
|
174
280
|
environment: Environment,
|
|
@@ -183,6 +289,7 @@ def generate_related_concepts(
|
|
|
183
289
|
if concept.datatype == DataType.DATE and add_derived:
|
|
184
290
|
generate_date_concepts(concept, environment)
|
|
185
291
|
elif concept.datatype == DataType.DATETIME and add_derived:
|
|
292
|
+
|
|
186
293
|
generate_date_concepts(concept, environment)
|
|
187
294
|
generate_datetime_concepts(concept, environment)
|
|
188
295
|
elif concept.datatype == DataType.TIMESTAMP and add_derived:
|
|
@@ -203,6 +310,10 @@ def generate_related_concepts(
|
|
|
203
310
|
),
|
|
204
311
|
lineage=AttrAccess([concept.reference, key], environment=environment),
|
|
205
312
|
grain=concept.grain,
|
|
313
|
+
metadata=Metadata(
|
|
314
|
+
concept_source=ConceptSource.AUTO_DERIVED,
|
|
315
|
+
),
|
|
316
|
+
keys=concept.keys,
|
|
206
317
|
)
|
|
207
318
|
environment.add_concept(auto, meta=meta)
|
|
208
319
|
if isinstance(value, Concept):
|
trilogy/core/exceptions.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List
|
|
1
|
+
from typing import List, Sequence
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class UndefinedConceptException(Exception):
|
|
@@ -24,6 +24,26 @@ class NoDatasourceException(UnresolvableQueryException):
|
|
|
24
24
|
pass
|
|
25
25
|
|
|
26
26
|
|
|
27
|
+
class ModelValidationError(Exception):
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
message,
|
|
31
|
+
children: Sequence["ModelValidationError"] | None = None,
|
|
32
|
+
**kwargs
|
|
33
|
+
):
|
|
34
|
+
super().__init__(self, message, **kwargs)
|
|
35
|
+
self.message = message
|
|
36
|
+
self.children = children
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DatasourceModelValidationError(ModelValidationError):
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ConceptModelValidationError(ModelValidationError):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
|
|
27
47
|
class AmbiguousRelationshipResolutionException(UnresolvableQueryException):
|
|
28
48
|
def __init__(self, message, parents: List[set[str]]):
|
|
29
49
|
super().__init__(self, message)
|
trilogy/core/functions.py
CHANGED
|
@@ -380,7 +380,12 @@ FUNCTION_REGISTRY: dict[FunctionType, FunctionConfig] = {
|
|
|
380
380
|
),
|
|
381
381
|
FunctionType.CURRENT_DATETIME: FunctionConfig(
|
|
382
382
|
output_purpose=Purpose.CONSTANT,
|
|
383
|
-
output_type=DataType.
|
|
383
|
+
output_type=DataType.DATETIME,
|
|
384
|
+
arg_count=0,
|
|
385
|
+
),
|
|
386
|
+
FunctionType.CURRENT_TIMESTAMP: FunctionConfig(
|
|
387
|
+
output_purpose=Purpose.CONSTANT,
|
|
388
|
+
output_type=DataType.TIMESTAMP,
|
|
384
389
|
arg_count=0,
|
|
385
390
|
),
|
|
386
391
|
FunctionType.BOOL: FunctionConfig(
|
trilogy/core/graph_models.py
CHANGED
|
@@ -64,13 +64,13 @@ def datasource_to_node(input: BuildDatasource) -> str:
|
|
|
64
64
|
|
|
65
65
|
|
|
66
66
|
class ReferenceGraph(nx.DiGraph):
|
|
67
|
-
def __init__(self, *args, **kwargs):
|
|
67
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
68
68
|
super().__init__(*args, **kwargs)
|
|
69
69
|
self.concepts: dict[str, BuildConcept] = {}
|
|
70
70
|
self.datasources: dict[str, BuildDatasource] = {}
|
|
71
71
|
self.pseudonyms: set[tuple[str, str]] = set()
|
|
72
72
|
|
|
73
|
-
def copy(self):
|
|
73
|
+
def copy(self) -> "ReferenceGraph":
|
|
74
74
|
g = ReferenceGraph()
|
|
75
75
|
g.concepts = self.concepts.copy()
|
|
76
76
|
g.datasources = self.datasources.copy()
|
|
@@ -83,7 +83,7 @@ class ReferenceGraph(nx.DiGraph):
|
|
|
83
83
|
# g.add_edges_from(self.edges(data=True))
|
|
84
84
|
return g
|
|
85
85
|
|
|
86
|
-
def remove_node(self, n):
|
|
86
|
+
def remove_node(self, n) -> None:
|
|
87
87
|
if n in self.concepts:
|
|
88
88
|
del self.concepts[n]
|
|
89
89
|
if n in self.datasources:
|
|
@@ -93,40 +93,14 @@ class ReferenceGraph(nx.DiGraph):
|
|
|
93
93
|
def add_node(self, node_for_adding, fast: bool = False, **attr):
|
|
94
94
|
if fast:
|
|
95
95
|
return super().add_node(node_for_adding, **attr)
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
self.
|
|
99
|
-
elif isinstance(node_for_adding, BuildDatasource):
|
|
100
|
-
node_name = datasource_to_node(node_for_adding)
|
|
101
|
-
self.datasources[node_name] = node_for_adding
|
|
102
|
-
else:
|
|
103
|
-
node_name = node_for_adding
|
|
104
|
-
if attr.get("datasource"):
|
|
105
|
-
self.datasources[node_name] = attr["datasource"]
|
|
96
|
+
node_name = node_for_adding
|
|
97
|
+
if attr.get("datasource"):
|
|
98
|
+
self.datasources[node_name] = attr["datasource"]
|
|
106
99
|
super().add_node(node_name, **attr)
|
|
107
100
|
|
|
101
|
+
def add_datasource_node(self, node_name, datasource) -> None:
|
|
102
|
+
self.datasources[node_name] = datasource
|
|
103
|
+
super().add_node(node_name, datasource=datasource)
|
|
104
|
+
|
|
108
105
|
def add_edge(self, u_of_edge, v_of_edge, fast: bool = False, **attr):
|
|
109
|
-
|
|
110
|
-
return super().add_edge(u_of_edge, v_of_edge, **attr)
|
|
111
|
-
if isinstance(u_of_edge, BuildConcept):
|
|
112
|
-
orig = u_of_edge
|
|
113
|
-
u_of_edge = concept_to_node(u_of_edge)
|
|
114
|
-
if u_of_edge not in self.nodes:
|
|
115
|
-
self.add_node(orig)
|
|
116
|
-
elif isinstance(u_of_edge, BuildDatasource):
|
|
117
|
-
origd = u_of_edge
|
|
118
|
-
u_of_edge = datasource_to_node(u_of_edge)
|
|
119
|
-
if u_of_edge not in self.nodes:
|
|
120
|
-
self.add_node(origd)
|
|
121
|
-
|
|
122
|
-
if isinstance(v_of_edge, BuildConcept):
|
|
123
|
-
orig = v_of_edge
|
|
124
|
-
v_of_edge = concept_to_node(v_of_edge)
|
|
125
|
-
if v_of_edge not in self.nodes:
|
|
126
|
-
self.add_node(orig)
|
|
127
|
-
elif isinstance(v_of_edge, BuildDatasource):
|
|
128
|
-
origd = v_of_edge
|
|
129
|
-
v_of_edge = datasource_to_node(v_of_edge)
|
|
130
|
-
if v_of_edge not in self.nodes:
|
|
131
|
-
self.add_node(origd)
|
|
132
|
-
super().add_edge(u_of_edge, v_of_edge)
|
|
106
|
+
return super().add_edge(u_of_edge, v_of_edge, **attr)
|
trilogy/core/internal.py
CHANGED
|
@@ -64,4 +64,22 @@ DEFAULT_CONCEPTS = {
|
|
|
64
64
|
granularity=Granularity.SINGLE_ROW,
|
|
65
65
|
derivation=Derivation.CONSTANT,
|
|
66
66
|
),
|
|
67
|
+
"label": Concept(
|
|
68
|
+
name="label",
|
|
69
|
+
namespace=INTERNAL_NAMESPACE,
|
|
70
|
+
datatype=DataType.STRING,
|
|
71
|
+
purpose=Purpose.KEY,
|
|
72
|
+
grain=Grain(),
|
|
73
|
+
granularity=Granularity.SINGLE_ROW,
|
|
74
|
+
derivation=Derivation.CONSTANT,
|
|
75
|
+
),
|
|
76
|
+
"expected": Concept(
|
|
77
|
+
name="expected_value",
|
|
78
|
+
namespace=INTERNAL_NAMESPACE,
|
|
79
|
+
datatype=DataType.STRING,
|
|
80
|
+
purpose=Purpose.KEY,
|
|
81
|
+
grain=Grain(),
|
|
82
|
+
granularity=Granularity.SINGLE_ROW,
|
|
83
|
+
derivation=Derivation.CONSTANT,
|
|
84
|
+
),
|
|
67
85
|
}
|
trilogy/core/models/core.py
CHANGED
|
@@ -571,6 +571,34 @@ class Environment(BaseModel):
|
|
|
571
571
|
|
|
572
572
|
return concept
|
|
573
573
|
|
|
574
|
+
def remove_concept(
|
|
575
|
+
self,
|
|
576
|
+
concept: Concept | str,
|
|
577
|
+
) -> bool:
|
|
578
|
+
if self.frozen:
|
|
579
|
+
raise FrozenEnvironmentException(
|
|
580
|
+
"Environment is frozen, cannot remove concepts"
|
|
581
|
+
)
|
|
582
|
+
if isinstance(concept, Concept):
|
|
583
|
+
address = concept.address
|
|
584
|
+
c_instance = concept
|
|
585
|
+
else:
|
|
586
|
+
address = concept
|
|
587
|
+
c_instance_check = self.concepts.get(address)
|
|
588
|
+
if not c_instance_check:
|
|
589
|
+
return False
|
|
590
|
+
c_instance = c_instance_check
|
|
591
|
+
from trilogy.core.environment_helpers import remove_related_concepts
|
|
592
|
+
|
|
593
|
+
remove_related_concepts(c_instance, self)
|
|
594
|
+
if address in self.concepts:
|
|
595
|
+
del self.concepts[address]
|
|
596
|
+
return True
|
|
597
|
+
if address in self.alias_origin_lookup:
|
|
598
|
+
del self.alias_origin_lookup[address]
|
|
599
|
+
|
|
600
|
+
return False
|
|
601
|
+
|
|
574
602
|
def add_datasource(
|
|
575
603
|
self,
|
|
576
604
|
datasource: Datasource,
|
trilogy/core/models/execute.py
CHANGED
|
@@ -23,6 +23,7 @@ from trilogy.core.constants import CONSTANT_DATASET
|
|
|
23
23
|
from trilogy.core.enums import (
|
|
24
24
|
ComparisonOperator,
|
|
25
25
|
Derivation,
|
|
26
|
+
FunctionClass,
|
|
26
27
|
FunctionType,
|
|
27
28
|
JoinType,
|
|
28
29
|
Modifier,
|
|
@@ -375,6 +376,12 @@ class CTE(BaseModel):
|
|
|
375
376
|
return check_is_not_in_group(c.lineage.content)
|
|
376
377
|
if c.derivation == Derivation.CONSTANT:
|
|
377
378
|
return True
|
|
379
|
+
if (
|
|
380
|
+
c.purpose == Purpose.CONSTANT
|
|
381
|
+
and isinstance(c.lineage, BuildFunction)
|
|
382
|
+
and c.lineage.operator in FunctionClass.AGGREGATE_FUNCTIONS.value
|
|
383
|
+
):
|
|
384
|
+
return True
|
|
378
385
|
if c.purpose == Purpose.METRIC:
|
|
379
386
|
return True
|
|
380
387
|
|
|
@@ -123,7 +123,7 @@ def create_pruned_concept_graph(
|
|
|
123
123
|
common: set[BuildConcept] = set.intersection(
|
|
124
124
|
*[set(x.output_concepts) for x in ds_list]
|
|
125
125
|
)
|
|
126
|
-
g.
|
|
126
|
+
g.add_datasource_node(node_address, ds_list)
|
|
127
127
|
for c in common:
|
|
128
128
|
cnode = concept_to_node(c)
|
|
129
129
|
g.add_edge(node_address, cnode)
|
|
@@ -214,7 +214,7 @@ def create_pruned_concept_graph(
|
|
|
214
214
|
relevant = set(relevant_concepts + relevent_datasets)
|
|
215
215
|
for edge in orig_g.edges():
|
|
216
216
|
if edge[0] in relevant and edge[1] in relevant:
|
|
217
|
-
g.add_edge(edge[0], edge[1])
|
|
217
|
+
g.add_edge(edge[0], edge[1], fast=True)
|
|
218
218
|
# if we have no ds nodes at all, for non constant, we can't find it
|
|
219
219
|
if not any([n.startswith("ds~") for n in g.nodes]):
|
|
220
220
|
logger.info(
|
trilogy/core/query_processor.py
CHANGED
|
@@ -40,8 +40,8 @@ from trilogy.core.statements.author import (
|
|
|
40
40
|
PersistStatement,
|
|
41
41
|
SelectStatement,
|
|
42
42
|
)
|
|
43
|
-
from trilogy.core.statements.common import MaterializedDataset
|
|
44
43
|
from trilogy.core.statements.execute import (
|
|
44
|
+
MaterializedDataset,
|
|
45
45
|
ProcessedCopyStatement,
|
|
46
46
|
ProcessedQuery,
|
|
47
47
|
ProcessedQueryPersist,
|
|
@@ -567,4 +567,5 @@ def process_query(
|
|
|
567
567
|
base=root_cte,
|
|
568
568
|
hidden_columns=set([x for x in statement.hidden_components]),
|
|
569
569
|
local_concepts=statement.local_concepts,
|
|
570
|
+
locally_derived=statement.locally_derived,
|
|
570
571
|
)
|
|
@@ -12,6 +12,7 @@ from trilogy.core.enums import (
|
|
|
12
12
|
IOType,
|
|
13
13
|
Modifier,
|
|
14
14
|
ShowCategory,
|
|
15
|
+
ValidationScope,
|
|
15
16
|
)
|
|
16
17
|
from trilogy.core.models.author import (
|
|
17
18
|
AggregateWrapper,
|
|
@@ -147,11 +148,13 @@ class SelectStatement(HasUUID, SelectTypeMixin, BaseModel):
|
|
|
147
148
|
continue
|
|
148
149
|
if CONFIG.parsing.select_as_definition and not environment.frozen:
|
|
149
150
|
if x.concept.address not in environment.concepts:
|
|
150
|
-
environment.add_concept(x.content.output)
|
|
151
|
+
environment.add_concept(x.content.output, add_derived=False)
|
|
151
152
|
elif x.concept.address in environment.concepts:
|
|
152
153
|
version = environment.concepts[x.concept.address]
|
|
153
154
|
if version.metadata.concept_source == ConceptSource.SELECT:
|
|
154
|
-
environment.add_concept(
|
|
155
|
+
environment.add_concept(
|
|
156
|
+
x.content.output, force=True, add_derived=False
|
|
157
|
+
)
|
|
155
158
|
x.content.output = x.content.output.set_select_grain(
|
|
156
159
|
output.grain, environment
|
|
157
160
|
)
|
|
@@ -378,6 +381,13 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, BaseModel):
|
|
|
378
381
|
output = output.union(select.hidden_components)
|
|
379
382
|
return output
|
|
380
383
|
|
|
384
|
+
@property
|
|
385
|
+
def locally_derived(self) -> set[str]:
|
|
386
|
+
locally_derived: set[str] = set([x.address for x in self.derived_concepts])
|
|
387
|
+
for select in self.selects:
|
|
388
|
+
locally_derived = locally_derived.union(select.locally_derived)
|
|
389
|
+
return locally_derived
|
|
390
|
+
|
|
381
391
|
|
|
382
392
|
class RowsetDerivationStatement(HasUUID, BaseModel):
|
|
383
393
|
name: str
|
|
@@ -428,8 +438,13 @@ class PersistStatement(HasUUID, BaseModel):
|
|
|
428
438
|
return self.datasource.address
|
|
429
439
|
|
|
430
440
|
|
|
441
|
+
class ValidateStatement(BaseModel):
|
|
442
|
+
scope: ValidationScope
|
|
443
|
+
targets: Optional[List[str]] = None # list of identifiers
|
|
444
|
+
|
|
445
|
+
|
|
431
446
|
class ShowStatement(BaseModel):
|
|
432
|
-
content: SelectStatement | PersistStatement | ShowCategory
|
|
447
|
+
content: SelectStatement | PersistStatement | ValidateStatement | ShowCategory
|
|
433
448
|
|
|
434
449
|
|
|
435
450
|
class Limit(BaseModel):
|
|
@@ -4,7 +4,6 @@ from pydantic import BaseModel, Field
|
|
|
4
4
|
|
|
5
5
|
from trilogy.core.enums import IOType
|
|
6
6
|
from trilogy.core.models.author import ConceptRef, HavingClause, WhereClause
|
|
7
|
-
from trilogy.core.models.datasource import Address, Datasource
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
class CopyQueryMixin(BaseModel):
|
|
@@ -12,15 +11,6 @@ class CopyQueryMixin(BaseModel):
|
|
|
12
11
|
target_type: IOType
|
|
13
12
|
|
|
14
13
|
|
|
15
|
-
class MaterializedDataset(BaseModel):
|
|
16
|
-
address: Address
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class PersistQueryMixin(BaseModel):
|
|
20
|
-
output_to: MaterializedDataset
|
|
21
|
-
datasource: Datasource
|
|
22
|
-
|
|
23
|
-
|
|
24
14
|
class SelectTypeMixin(BaseModel):
|
|
25
15
|
where_clause: Union["WhereClause", None] = Field(default=None)
|
|
26
16
|
having_clause: Union["HavingClause", None] = Field(default=None)
|