pytrilogy 0.0.3.93__py3-none-any.whl → 0.0.3.95__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/METADATA +170 -145
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/RECORD +38 -34
- trilogy/__init__.py +1 -1
- trilogy/authoring/__init__.py +4 -0
- trilogy/core/enums.py +13 -0
- trilogy/core/env_processor.py +21 -10
- trilogy/core/environment_helpers.py +111 -0
- trilogy/core/exceptions.py +21 -1
- trilogy/core/functions.py +6 -1
- trilogy/core/graph_models.py +60 -67
- trilogy/core/internal.py +18 -0
- trilogy/core/models/author.py +16 -25
- trilogy/core/models/build.py +5 -4
- trilogy/core/models/core.py +3 -0
- trilogy/core/models/environment.py +28 -0
- trilogy/core/models/execute.py +7 -0
- trilogy/core/processing/node_generators/node_merge_node.py +30 -28
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +25 -11
- trilogy/core/processing/node_generators/select_merge_node.py +68 -82
- trilogy/core/query_processor.py +2 -1
- trilogy/core/statements/author.py +18 -3
- trilogy/core/statements/common.py +0 -10
- trilogy/core/statements/execute.py +71 -16
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +109 -0
- trilogy/core/validation/concept.py +122 -0
- trilogy/core/validation/datasource.py +192 -0
- trilogy/core/validation/environment.py +71 -0
- trilogy/dialect/base.py +40 -21
- trilogy/dialect/sql_server.py +3 -1
- trilogy/engine.py +25 -7
- trilogy/executor.py +145 -83
- trilogy/parsing/parse_engine.py +35 -4
- trilogy/parsing/trilogy.lark +11 -5
- trilogy/core/processing/node_generators/select_merge_node_v2.py +0 -792
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.95.dist-info}/top_level.txt +0 -0
|
@@ -6,7 +6,11 @@ from networkx.algorithms import approximation as ax
|
|
|
6
6
|
from trilogy.constants import logger
|
|
7
7
|
from trilogy.core.enums import Derivation
|
|
8
8
|
from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
|
|
9
|
-
from trilogy.core.graph_models import
|
|
9
|
+
from trilogy.core.graph_models import (
|
|
10
|
+
ReferenceGraph,
|
|
11
|
+
concept_to_node,
|
|
12
|
+
prune_sources_for_conditions,
|
|
13
|
+
)
|
|
10
14
|
from trilogy.core.models.build import BuildConcept, BuildConditional, BuildWhereClause
|
|
11
15
|
from trilogy.core.models.build_environment import BuildEnvironment
|
|
12
16
|
from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
|
|
@@ -17,11 +21,12 @@ LOGGER_PREFIX = "[GEN_MERGE_NODE]"
|
|
|
17
21
|
AMBIGUITY_CHECK_LIMIT = 20
|
|
18
22
|
|
|
19
23
|
|
|
20
|
-
def filter_pseudonyms_for_source(
|
|
24
|
+
def filter_pseudonyms_for_source(
|
|
25
|
+
ds_graph: nx.DiGraph, node: str, pseudonyms: set[tuple[str, str]]
|
|
26
|
+
):
|
|
21
27
|
to_remove = set()
|
|
22
|
-
|
|
23
28
|
for edge in ds_graph.edges:
|
|
24
|
-
if
|
|
29
|
+
if edge in pseudonyms:
|
|
25
30
|
lengths = {}
|
|
26
31
|
for n in edge:
|
|
27
32
|
lengths[n] = nx.shortest_path_length(ds_graph, node, n)
|
|
@@ -52,12 +57,14 @@ def filter_unique_graphs(graphs: list[list[str]]) -> list[list[str]]:
|
|
|
52
57
|
return [list(x) for x in unique_graphs]
|
|
53
58
|
|
|
54
59
|
|
|
55
|
-
def extract_ds_components(
|
|
60
|
+
def extract_ds_components(
|
|
61
|
+
g: nx.DiGraph, nodelist: list[str], pseudonyms: set[tuple[str, str]]
|
|
62
|
+
) -> list[list[str]]:
|
|
56
63
|
graphs = []
|
|
57
64
|
for node in g.nodes:
|
|
58
65
|
if node.startswith("ds~"):
|
|
59
66
|
local = g.copy()
|
|
60
|
-
filter_pseudonyms_for_source(local, node)
|
|
67
|
+
filter_pseudonyms_for_source(local, node, pseudonyms)
|
|
61
68
|
ds_graph: nx.DiGraph = nx.ego_graph(local, node, radius=10).copy()
|
|
62
69
|
graphs.append(
|
|
63
70
|
[
|
|
@@ -78,7 +85,7 @@ def extract_ds_components(g: nx.DiGraph, nodelist: list[str]) -> list[list[str]]
|
|
|
78
85
|
|
|
79
86
|
|
|
80
87
|
def determine_induced_minimal_nodes(
|
|
81
|
-
G:
|
|
88
|
+
G: ReferenceGraph,
|
|
82
89
|
nodelist: list[str],
|
|
83
90
|
environment: BuildEnvironment,
|
|
84
91
|
filter_downstream: bool,
|
|
@@ -86,23 +93,19 @@ def determine_induced_minimal_nodes(
|
|
|
86
93
|
) -> nx.DiGraph | None:
|
|
87
94
|
H: nx.Graph = nx.to_undirected(G).copy()
|
|
88
95
|
nodes_to_remove = []
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
if
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
)
|
|
102
|
-
nodes_to_remove.append(node)
|
|
103
|
-
# purge a node if we're already looking for all it's parents
|
|
104
|
-
if filter_downstream and lookup.derivation not in (Derivation.ROOT,):
|
|
105
|
-
nodes_to_remove.append(node)
|
|
96
|
+
for node, lookup in G.concepts.items():
|
|
97
|
+
# inclusion of aggregates can create ambiguous node relation chains
|
|
98
|
+
# there may be a better way to handle this
|
|
99
|
+
# can be revisited if we need to connect a derived synonym based on an aggregate
|
|
100
|
+
if lookup.derivation in (
|
|
101
|
+
Derivation.CONSTANT,
|
|
102
|
+
Derivation.AGGREGATE,
|
|
103
|
+
Derivation.FILTER,
|
|
104
|
+
):
|
|
105
|
+
nodes_to_remove.append(node)
|
|
106
|
+
# purge a node if we're already looking for all it's parents
|
|
107
|
+
if filter_downstream and lookup.derivation not in (Derivation.ROOT,):
|
|
108
|
+
nodes_to_remove.append(node)
|
|
106
109
|
if nodes_to_remove:
|
|
107
110
|
# logger.debug(f"Removing nodes {nodes_to_remove} from graph")
|
|
108
111
|
H.remove_nodes_from(nodes_to_remove)
|
|
@@ -259,7 +262,7 @@ def filter_duplicate_subgraphs(
|
|
|
259
262
|
def resolve_weak_components(
|
|
260
263
|
all_concepts: List[BuildConcept],
|
|
261
264
|
environment: BuildEnvironment,
|
|
262
|
-
environment_graph:
|
|
265
|
+
environment_graph: ReferenceGraph,
|
|
263
266
|
filter_downstream: bool = True,
|
|
264
267
|
accept_partial: bool = False,
|
|
265
268
|
search_conditions: BuildWhereClause | None = None,
|
|
@@ -316,8 +319,6 @@ def resolve_weak_components(
|
|
|
316
319
|
]
|
|
317
320
|
new = [x for x in all_graph_concepts if x.address not in all_concepts]
|
|
318
321
|
|
|
319
|
-
new_addresses = set([x.address for x in new if x.address not in synonyms])
|
|
320
|
-
|
|
321
322
|
if not new:
|
|
322
323
|
break_flag = True
|
|
323
324
|
# remove our new nodes for the next search path
|
|
@@ -329,6 +330,7 @@ def resolve_weak_components(
|
|
|
329
330
|
# from trilogy.hooks.graph_hook import GraphHook
|
|
330
331
|
# GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
|
|
331
332
|
found.append(g)
|
|
333
|
+
new_addresses = set([x.address for x in new if x.address not in synonyms])
|
|
332
334
|
reduced_concept_sets.append(new_addresses)
|
|
333
335
|
|
|
334
336
|
except nx.exception.NetworkXNoPath:
|
|
@@ -346,7 +348,7 @@ def resolve_weak_components(
|
|
|
346
348
|
subgraphs: list[list[BuildConcept]] = []
|
|
347
349
|
# components = nx.strongly_connected_components(g)
|
|
348
350
|
node_list = [x for x in g.nodes if x.startswith("c~")]
|
|
349
|
-
components = extract_ds_components(g, node_list)
|
|
351
|
+
components = extract_ds_components(g, node_list, environment_graph.pseudonyms)
|
|
350
352
|
logger.debug(f"Extracted components {components} from {node_list}")
|
|
351
353
|
for component in components:
|
|
352
354
|
# we need to take unique again as different addresses may map to the same concept
|
|
@@ -128,17 +128,32 @@ def simplify_conditions(
|
|
|
128
128
|
for condition in conditions:
|
|
129
129
|
if not isinstance(condition, BuildComparison):
|
|
130
130
|
return False
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
if
|
|
136
|
-
|
|
131
|
+
left_is_concept = False
|
|
132
|
+
left_is_reducable = False
|
|
133
|
+
right_is_concept = False
|
|
134
|
+
right_is_reducable = False
|
|
135
|
+
if isinstance(condition.left, BuildConcept):
|
|
136
|
+
left_is_concept = True
|
|
137
|
+
elif isinstance(condition.left, REDUCABLE_TYPES):
|
|
138
|
+
left_is_reducable = True
|
|
139
|
+
|
|
140
|
+
if isinstance(condition.right, BuildConcept):
|
|
141
|
+
right_is_concept = True
|
|
142
|
+
elif isinstance(condition.right, REDUCABLE_TYPES):
|
|
143
|
+
right_is_reducable = True
|
|
144
|
+
|
|
145
|
+
if not (
|
|
146
|
+
(left_is_concept and right_is_reducable)
|
|
147
|
+
or (right_is_concept and left_is_reducable)
|
|
137
148
|
):
|
|
138
149
|
return False
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
150
|
+
if left_is_concept:
|
|
151
|
+
concept = condition.left
|
|
152
|
+
raw_comparison = condition.right
|
|
153
|
+
else:
|
|
154
|
+
concept = condition.right
|
|
155
|
+
raw_comparison = condition.left
|
|
156
|
+
|
|
142
157
|
if isinstance(raw_comparison, BuildFunction):
|
|
143
158
|
if not raw_comparison.operator == FunctionType.CONSTANT:
|
|
144
159
|
return False
|
|
@@ -154,7 +169,7 @@ def simplify_conditions(
|
|
|
154
169
|
if not isinstance(comparison, REDUCABLE_TYPES):
|
|
155
170
|
return False
|
|
156
171
|
|
|
157
|
-
var = concept
|
|
172
|
+
var: BuildConcept = concept # type: ignore
|
|
158
173
|
op = condition.operator
|
|
159
174
|
grouped[var].append((op, comparison))
|
|
160
175
|
|
|
@@ -240,7 +255,6 @@ def get_union_sources(
|
|
|
240
255
|
assocs[merge_key.address].append(x)
|
|
241
256
|
final: list[list[BuildDatasource]] = []
|
|
242
257
|
for _, dses in assocs.items():
|
|
243
|
-
|
|
244
258
|
conditions = [c.non_partial_for.conditional for c in dses if c.non_partial_for]
|
|
245
259
|
if simplify_conditions(conditions):
|
|
246
260
|
final.append(dses)
|
|
@@ -6,6 +6,7 @@ import networkx as nx
|
|
|
6
6
|
from trilogy.constants import logger
|
|
7
7
|
from trilogy.core.enums import Derivation
|
|
8
8
|
from trilogy.core.graph_models import (
|
|
9
|
+
ReferenceGraph,
|
|
9
10
|
concept_to_node,
|
|
10
11
|
get_graph_exact_match,
|
|
11
12
|
prune_sources_for_conditions,
|
|
@@ -41,77 +42,68 @@ def extract_address(node: str):
|
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
def get_graph_partial_nodes(
|
|
44
|
-
g:
|
|
45
|
+
g: ReferenceGraph, conditions: BuildWhereClause | None
|
|
45
46
|
) -> dict[str, list[str]]:
|
|
46
|
-
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
47
|
-
nx.get_node_attributes(g, "datasource")
|
|
48
|
-
)
|
|
49
47
|
partial: dict[str, list[str]] = {}
|
|
50
|
-
for node in g.
|
|
51
|
-
if node in datasources:
|
|
52
|
-
ds = datasources[node]
|
|
53
|
-
if not isinstance(ds, list):
|
|
54
|
-
|
|
55
|
-
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
56
|
-
partial[node] = []
|
|
57
|
-
continue
|
|
58
|
-
partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
|
|
59
|
-
ds = [ds]
|
|
60
|
-
# assume union sources have no partial
|
|
61
|
-
else:
|
|
62
|
-
partial[node] = []
|
|
48
|
+
for node, ds in g.datasources.items():
|
|
63
49
|
|
|
50
|
+
if not isinstance(ds, list):
|
|
51
|
+
|
|
52
|
+
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
53
|
+
partial[node] = []
|
|
54
|
+
continue
|
|
55
|
+
partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
|
|
56
|
+
# assume union sources have no partial
|
|
57
|
+
else:
|
|
58
|
+
partial[node] = []
|
|
64
59
|
return partial
|
|
65
60
|
|
|
66
61
|
|
|
67
|
-
def get_graph_grains(g:
|
|
68
|
-
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
69
|
-
nx.get_node_attributes(g, "datasource")
|
|
70
|
-
)
|
|
62
|
+
def get_graph_grains(g: ReferenceGraph) -> dict[str, list[str]]:
|
|
71
63
|
grain_length: dict[str, list[str]] = {}
|
|
72
|
-
for node in g.
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
)
|
|
64
|
+
for node, lookup in g.datasources.items():
|
|
65
|
+
|
|
66
|
+
base: set[str] = set()
|
|
67
|
+
if not isinstance(lookup, list):
|
|
68
|
+
flookup = [lookup]
|
|
69
|
+
else:
|
|
70
|
+
flookup = lookup
|
|
71
|
+
assert isinstance(flookup, list)
|
|
72
|
+
grain_length[node] = reduce(
|
|
73
|
+
lambda x, y: x.union(y.grain.components), flookup, base # type: ignore
|
|
74
|
+
)
|
|
82
75
|
return grain_length
|
|
83
76
|
|
|
84
77
|
|
|
85
78
|
def subgraph_is_complete(
|
|
86
79
|
nodes: list[str], targets: set[str], mapping: dict[str, str], g: nx.DiGraph
|
|
87
80
|
) -> bool:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
if not
|
|
81
|
+
# Check if all targets are present in mapped nodes
|
|
82
|
+
mapped = {mapping.get(n, n) for n in nodes}
|
|
83
|
+
if not targets.issubset(mapped):
|
|
91
84
|
# logger.info(
|
|
92
85
|
# f"Subgraph {nodes} is not complete, missing targets {targets} - mapped {mapped}"
|
|
93
86
|
# )
|
|
94
87
|
return False
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
return all(has_ds_edge.values()) and passed
|
|
88
|
+
|
|
89
|
+
# Check if at least one concept node has a datasource edge
|
|
90
|
+
has_ds_edge = {target: False for target in targets}
|
|
91
|
+
|
|
92
|
+
for node in nodes:
|
|
93
|
+
if node.startswith("c~"):
|
|
94
|
+
mapped_node = mapping.get(node, node)
|
|
95
|
+
if mapped_node in targets and not has_ds_edge[mapped_node]:
|
|
96
|
+
# Only check neighbors if we haven't found a ds edge for this mapped node yet
|
|
97
|
+
if any(
|
|
98
|
+
neighbor.startswith("ds~") for neighbor in nx.neighbors(g, node)
|
|
99
|
+
):
|
|
100
|
+
has_ds_edge[mapped_node] = True
|
|
101
|
+
|
|
102
|
+
return all(has_ds_edge.values())
|
|
111
103
|
|
|
112
104
|
|
|
113
105
|
def create_pruned_concept_graph(
|
|
114
|
-
g:
|
|
106
|
+
g: ReferenceGraph,
|
|
115
107
|
all_concepts: List[BuildConcept],
|
|
116
108
|
datasources: list[BuildDatasource],
|
|
117
109
|
accept_partial: bool = False,
|
|
@@ -131,16 +123,15 @@ def create_pruned_concept_graph(
|
|
|
131
123
|
common: set[BuildConcept] = set.intersection(
|
|
132
124
|
*[set(x.output_concepts) for x in ds_list]
|
|
133
125
|
)
|
|
134
|
-
g.
|
|
126
|
+
g.add_datasource_node(node_address, ds_list)
|
|
135
127
|
for c in common:
|
|
136
|
-
|
|
137
|
-
g.add_edge(
|
|
128
|
+
cnode = concept_to_node(c)
|
|
129
|
+
g.add_edge(node_address, cnode)
|
|
130
|
+
g.add_edge(cnode, node_address)
|
|
138
131
|
prune_sources_for_conditions(g, accept_partial, conditions)
|
|
139
132
|
target_addresses = set([c.address for c in all_concepts])
|
|
140
|
-
concepts: dict[str, BuildConcept] =
|
|
141
|
-
datasource_map: dict[str, BuildDatasource
|
|
142
|
-
nx.get_node_attributes(orig_g, "datasource")
|
|
143
|
-
)
|
|
133
|
+
concepts: dict[str, BuildConcept] = orig_g.concepts
|
|
134
|
+
datasource_map: dict[str, BuildDatasource] = orig_g.datasources
|
|
144
135
|
relevant_concepts_pre = {
|
|
145
136
|
n: x.address
|
|
146
137
|
for n in g.nodes()
|
|
@@ -170,31 +161,27 @@ def create_pruned_concept_graph(
|
|
|
170
161
|
for edge in to_remove:
|
|
171
162
|
g.remove_edge(*edge)
|
|
172
163
|
|
|
173
|
-
for n in g.
|
|
174
|
-
if
|
|
175
|
-
continue
|
|
176
|
-
actual_neighbors = [
|
|
177
|
-
x for x in relevant_concepts if x in (nx.all_neighbors(g, n))
|
|
178
|
-
]
|
|
179
|
-
if actual_neighbors:
|
|
164
|
+
for n in g.datasources:
|
|
165
|
+
if any([[n, x] in g.edges for x in relevant_concepts]):
|
|
180
166
|
relevent_datasets.append(n)
|
|
167
|
+
continue
|
|
181
168
|
|
|
182
169
|
# for injecting extra join concepts that are shared between datasets
|
|
183
170
|
# use the original graph, pre-partial pruning
|
|
184
|
-
for n in orig_g.
|
|
171
|
+
for n in orig_g.concepts:
|
|
185
172
|
# readd ignoring grain
|
|
186
173
|
# we want to join inclusive of all concepts
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
174
|
+
if n not in relevant_concepts:
|
|
175
|
+
n_neighbors = nx.all_neighbors(orig_g, n)
|
|
176
|
+
# check if the irrelevant concept is a join between
|
|
177
|
+
# two relevant datasets
|
|
178
|
+
neighbors = set()
|
|
179
|
+
for neighbor in n_neighbors:
|
|
192
180
|
if neighbor in relevent_datasets:
|
|
193
181
|
neighbors.add(neighbor)
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
182
|
+
if len(neighbors) > 1:
|
|
183
|
+
relevant_concepts.append(n)
|
|
184
|
+
continue
|
|
198
185
|
g.remove_nodes_from(
|
|
199
186
|
[
|
|
200
187
|
n
|
|
@@ -227,7 +214,7 @@ def create_pruned_concept_graph(
|
|
|
227
214
|
relevant = set(relevant_concepts + relevent_datasets)
|
|
228
215
|
for edge in orig_g.edges():
|
|
229
216
|
if edge[0] in relevant and edge[1] in relevant:
|
|
230
|
-
g.add_edge(edge[0], edge[1])
|
|
217
|
+
g.add_edge(edge[0], edge[1], fast=True)
|
|
231
218
|
# if we have no ds nodes at all, for non constant, we can't find it
|
|
232
219
|
if not any([n.startswith("ds~") for n in g.nodes]):
|
|
233
220
|
logger.info(
|
|
@@ -238,7 +225,7 @@ def create_pruned_concept_graph(
|
|
|
238
225
|
|
|
239
226
|
|
|
240
227
|
def resolve_subgraphs(
|
|
241
|
-
g:
|
|
228
|
+
g: ReferenceGraph,
|
|
242
229
|
relevant: list[BuildConcept],
|
|
243
230
|
accept_partial: bool,
|
|
244
231
|
conditions: BuildWhereClause | None,
|
|
@@ -261,7 +248,7 @@ def resolve_subgraphs(
|
|
|
261
248
|
partial_map = get_graph_partial_nodes(g, conditions)
|
|
262
249
|
exact_map = get_graph_exact_match(g, accept_partial, conditions)
|
|
263
250
|
grain_length = get_graph_grains(g)
|
|
264
|
-
concepts: dict[str, BuildConcept] =
|
|
251
|
+
concepts: dict[str, BuildConcept] = g.concepts
|
|
265
252
|
non_partial_map = {
|
|
266
253
|
ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
|
|
267
254
|
for ds in datasources
|
|
@@ -460,7 +447,7 @@ def create_select_node(
|
|
|
460
447
|
ds_name: str,
|
|
461
448
|
subgraph: list[str],
|
|
462
449
|
accept_partial: bool,
|
|
463
|
-
g,
|
|
450
|
+
g: ReferenceGraph,
|
|
464
451
|
environment: BuildEnvironment,
|
|
465
452
|
depth: int,
|
|
466
453
|
conditions: BuildWhereClause | None = None,
|
|
@@ -486,9 +473,7 @@ def create_select_node(
|
|
|
486
473
|
preexisting_conditions=conditions.conditional if conditions else None,
|
|
487
474
|
)
|
|
488
475
|
|
|
489
|
-
datasource:
|
|
490
|
-
nx.get_node_attributes(g, "datasource")[ds_name]
|
|
491
|
-
)
|
|
476
|
+
datasource: BuildDatasource = g.datasources[ds_name]
|
|
492
477
|
if isinstance(datasource, BuildDatasource):
|
|
493
478
|
bcandidate, force_group = create_datasource_node(
|
|
494
479
|
datasource,
|
|
@@ -581,6 +566,7 @@ def gen_select_merge_node(
|
|
|
581
566
|
logger.info(
|
|
582
567
|
f"{padding(depth)}{LOGGER_PREFIX} searching for root source graph for concepts {[c.address for c in all_concepts]} and conditions {conditions}"
|
|
583
568
|
)
|
|
569
|
+
pruned_concept_graph = None
|
|
584
570
|
for attempt in attempts:
|
|
585
571
|
pruned_concept_graph = create_pruned_concept_graph(
|
|
586
572
|
g,
|
trilogy/core/query_processor.py
CHANGED
|
@@ -40,8 +40,8 @@ from trilogy.core.statements.author import (
|
|
|
40
40
|
PersistStatement,
|
|
41
41
|
SelectStatement,
|
|
42
42
|
)
|
|
43
|
-
from trilogy.core.statements.common import MaterializedDataset
|
|
44
43
|
from trilogy.core.statements.execute import (
|
|
44
|
+
MaterializedDataset,
|
|
45
45
|
ProcessedCopyStatement,
|
|
46
46
|
ProcessedQuery,
|
|
47
47
|
ProcessedQueryPersist,
|
|
@@ -567,4 +567,5 @@ def process_query(
|
|
|
567
567
|
base=root_cte,
|
|
568
568
|
hidden_columns=set([x for x in statement.hidden_components]),
|
|
569
569
|
local_concepts=statement.local_concepts,
|
|
570
|
+
locally_derived=statement.locally_derived,
|
|
570
571
|
)
|
|
@@ -12,6 +12,7 @@ from trilogy.core.enums import (
|
|
|
12
12
|
IOType,
|
|
13
13
|
Modifier,
|
|
14
14
|
ShowCategory,
|
|
15
|
+
ValidationScope,
|
|
15
16
|
)
|
|
16
17
|
from trilogy.core.models.author import (
|
|
17
18
|
AggregateWrapper,
|
|
@@ -147,11 +148,13 @@ class SelectStatement(HasUUID, SelectTypeMixin, BaseModel):
|
|
|
147
148
|
continue
|
|
148
149
|
if CONFIG.parsing.select_as_definition and not environment.frozen:
|
|
149
150
|
if x.concept.address not in environment.concepts:
|
|
150
|
-
environment.add_concept(x.content.output)
|
|
151
|
+
environment.add_concept(x.content.output, add_derived=False)
|
|
151
152
|
elif x.concept.address in environment.concepts:
|
|
152
153
|
version = environment.concepts[x.concept.address]
|
|
153
154
|
if version.metadata.concept_source == ConceptSource.SELECT:
|
|
154
|
-
environment.add_concept(
|
|
155
|
+
environment.add_concept(
|
|
156
|
+
x.content.output, force=True, add_derived=False
|
|
157
|
+
)
|
|
155
158
|
x.content.output = x.content.output.set_select_grain(
|
|
156
159
|
output.grain, environment
|
|
157
160
|
)
|
|
@@ -378,6 +381,13 @@ class MultiSelectStatement(HasUUID, SelectTypeMixin, BaseModel):
|
|
|
378
381
|
output = output.union(select.hidden_components)
|
|
379
382
|
return output
|
|
380
383
|
|
|
384
|
+
@property
|
|
385
|
+
def locally_derived(self) -> set[str]:
|
|
386
|
+
locally_derived: set[str] = set([x.address for x in self.derived_concepts])
|
|
387
|
+
for select in self.selects:
|
|
388
|
+
locally_derived = locally_derived.union(select.locally_derived)
|
|
389
|
+
return locally_derived
|
|
390
|
+
|
|
381
391
|
|
|
382
392
|
class RowsetDerivationStatement(HasUUID, BaseModel):
|
|
383
393
|
name: str
|
|
@@ -428,8 +438,13 @@ class PersistStatement(HasUUID, BaseModel):
|
|
|
428
438
|
return self.datasource.address
|
|
429
439
|
|
|
430
440
|
|
|
441
|
+
class ValidateStatement(BaseModel):
|
|
442
|
+
scope: ValidationScope
|
|
443
|
+
targets: Optional[List[str]] = None # list of identifiers
|
|
444
|
+
|
|
445
|
+
|
|
431
446
|
class ShowStatement(BaseModel):
|
|
432
|
-
content: SelectStatement | PersistStatement | ShowCategory
|
|
447
|
+
content: SelectStatement | PersistStatement | ValidateStatement | ShowCategory
|
|
433
448
|
|
|
434
449
|
|
|
435
450
|
class Limit(BaseModel):
|
|
@@ -4,7 +4,6 @@ from pydantic import BaseModel, Field
|
|
|
4
4
|
|
|
5
5
|
from trilogy.core.enums import IOType
|
|
6
6
|
from trilogy.core.models.author import ConceptRef, HavingClause, WhereClause
|
|
7
|
-
from trilogy.core.models.datasource import Address, Datasource
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
class CopyQueryMixin(BaseModel):
|
|
@@ -12,15 +11,6 @@ class CopyQueryMixin(BaseModel):
|
|
|
12
11
|
target_type: IOType
|
|
13
12
|
|
|
14
13
|
|
|
15
|
-
class MaterializedDataset(BaseModel):
|
|
16
|
-
address: Address
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class PersistQueryMixin(BaseModel):
|
|
20
|
-
output_to: MaterializedDataset
|
|
21
|
-
datasource: Datasource
|
|
22
|
-
|
|
23
|
-
|
|
24
14
|
class SelectTypeMixin(BaseModel):
|
|
25
15
|
where_clause: Union["WhereClause", None] = Field(default=None)
|
|
26
16
|
having_clause: Union["HavingClause", None] = Field(default=None)
|
|
@@ -1,49 +1,104 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import List, Optional, Union
|
|
2
3
|
|
|
3
|
-
from
|
|
4
|
-
from
|
|
5
|
-
|
|
6
|
-
from trilogy.core.models.author import ConceptRef
|
|
4
|
+
from trilogy.core.enums import IOType, ValidationScope
|
|
5
|
+
from trilogy.core.models.author import ConceptRef, HavingClause, WhereClause
|
|
7
6
|
from trilogy.core.models.build import (
|
|
8
7
|
BuildConcept,
|
|
9
8
|
BuildDatasource,
|
|
10
9
|
BuildOrderBy,
|
|
11
10
|
)
|
|
12
|
-
from trilogy.core.models.
|
|
11
|
+
from trilogy.core.models.datasource import Address, Datasource
|
|
12
|
+
from trilogy.core.models.environment import EnvironmentConceptDict
|
|
13
13
|
from trilogy.core.models.execute import CTE, UnionCTE
|
|
14
|
-
from trilogy.core.statements.common import CopyQueryMixin, PersistQueryMixin
|
|
15
14
|
|
|
16
15
|
|
|
17
|
-
|
|
16
|
+
@dataclass
|
|
17
|
+
class CopyQueryMixin:
|
|
18
|
+
target: str
|
|
19
|
+
target_type: IOType
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class MaterializedDataset:
|
|
24
|
+
address: Address
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class PersistQueryMixin:
|
|
29
|
+
output_to: MaterializedDataset
|
|
30
|
+
datasource: Datasource
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class SelectTypeMixin:
|
|
35
|
+
where_clause: Union["WhereClause", None] = field(default=None)
|
|
36
|
+
having_clause: Union["HavingClause", None] = field(default=None)
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def output_components(self) -> List[ConceptRef]:
|
|
40
|
+
raise NotImplementedError
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ProcessedQuery:
|
|
18
45
|
output_columns: List[ConceptRef]
|
|
19
46
|
ctes: List[CTE | UnionCTE]
|
|
20
47
|
base: CTE | UnionCTE
|
|
21
|
-
hidden_columns: set[str] =
|
|
48
|
+
hidden_columns: set[str] = field(default_factory=set)
|
|
22
49
|
limit: Optional[int] = None
|
|
23
50
|
order_by: Optional[BuildOrderBy] = None
|
|
24
|
-
local_concepts:
|
|
25
|
-
EnvironmentConceptDict
|
|
26
|
-
|
|
51
|
+
local_concepts: EnvironmentConceptDict = field(
|
|
52
|
+
default_factory=EnvironmentConceptDict
|
|
53
|
+
)
|
|
54
|
+
locally_derived: set[str] = field(default_factory=set)
|
|
27
55
|
|
|
28
56
|
|
|
57
|
+
@dataclass
|
|
29
58
|
class ProcessedQueryPersist(ProcessedQuery, PersistQueryMixin):
|
|
30
59
|
pass
|
|
31
60
|
|
|
32
61
|
|
|
62
|
+
@dataclass
|
|
33
63
|
class ProcessedCopyStatement(ProcessedQuery, CopyQueryMixin):
|
|
34
64
|
pass
|
|
35
65
|
|
|
36
66
|
|
|
37
|
-
|
|
67
|
+
@dataclass
|
|
68
|
+
class ProcessedRawSQLStatement:
|
|
38
69
|
text: str
|
|
39
70
|
|
|
40
71
|
|
|
41
|
-
|
|
72
|
+
@dataclass
|
|
73
|
+
class ProcessedValidateStatement:
|
|
74
|
+
scope: ValidationScope
|
|
75
|
+
targets: Optional[List[str]]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class ProcessedStaticValueOutput:
|
|
42
80
|
values: List[dict]
|
|
43
81
|
|
|
44
82
|
|
|
45
|
-
|
|
83
|
+
@dataclass
|
|
84
|
+
class ProcessedShowStatement:
|
|
46
85
|
output_columns: List[ConceptRef]
|
|
47
86
|
output_values: List[
|
|
48
|
-
Union[
|
|
87
|
+
Union[
|
|
88
|
+
BuildConcept,
|
|
89
|
+
BuildDatasource,
|
|
90
|
+
ProcessedQuery,
|
|
91
|
+
ProcessedValidateStatement,
|
|
92
|
+
ProcessedStaticValueOutput,
|
|
93
|
+
]
|
|
49
94
|
]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
PROCESSED_STATEMENT_TYPES = (
|
|
98
|
+
ProcessedCopyStatement
|
|
99
|
+
| ProcessedQuery
|
|
100
|
+
| ProcessedRawSQLStatement
|
|
101
|
+
| ProcessedQueryPersist
|
|
102
|
+
| ProcessedShowStatement
|
|
103
|
+
| ProcessedValidateStatement
|
|
104
|
+
)
|
|
File without changes
|