pytrilogy 0.0.1.117__py3-none-any.whl → 0.0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/METADATA +1 -1
- pytrilogy-0.0.2.1.dist-info/RECORD +82 -0
- {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +1 -1
- trilogy/constants.py +6 -0
- trilogy/core/enums.py +7 -2
- trilogy/core/env_processor.py +43 -19
- trilogy/core/functions.py +11 -0
- trilogy/core/models.py +737 -146
- trilogy/core/optimization.py +31 -28
- trilogy/core/optimizations/inline_constant.py +4 -1
- trilogy/core/optimizations/inline_datasource.py +25 -4
- trilogy/core/optimizations/predicate_pushdown.py +94 -54
- trilogy/core/processing/concept_strategies_v3.py +69 -39
- trilogy/core/processing/graph_utils.py +3 -3
- trilogy/core/processing/node_generators/__init__.py +0 -2
- trilogy/core/processing/node_generators/basic_node.py +30 -17
- trilogy/core/processing/node_generators/filter_node.py +3 -1
- trilogy/core/processing/node_generators/node_merge_node.py +345 -96
- trilogy/core/processing/node_generators/rowset_node.py +18 -16
- trilogy/core/processing/node_generators/select_node.py +44 -83
- trilogy/core/processing/nodes/__init__.py +2 -0
- trilogy/core/processing/nodes/base_node.py +22 -5
- trilogy/core/processing/nodes/filter_node.py +3 -0
- trilogy/core/processing/nodes/group_node.py +20 -2
- trilogy/core/processing/nodes/merge_node.py +32 -18
- trilogy/core/processing/nodes/select_node_v2.py +17 -3
- trilogy/core/processing/utility.py +100 -8
- trilogy/core/query_processor.py +77 -24
- trilogy/dialect/base.py +11 -46
- trilogy/dialect/bigquery.py +1 -1
- trilogy/dialect/common.py +11 -0
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/presto.py +1 -0
- trilogy/executor.py +29 -0
- trilogy/hooks/graph_hook.py +50 -5
- trilogy/hooks/query_debugger.py +1 -0
- trilogy/parsing/common.py +8 -5
- trilogy/parsing/parse_engine.py +48 -27
- trilogy/parsing/render.py +13 -6
- trilogy/parsing/trilogy.lark +12 -7
- pytrilogy-0.0.1.117.dist-info/RECORD +0 -83
- trilogy/core/processing/node_generators/concept_merge_node.py +0 -214
- {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.1.117.dist-info → pytrilogy-0.0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@ from typing import List
|
|
|
4
4
|
from trilogy.core.models import (
|
|
5
5
|
Concept,
|
|
6
6
|
)
|
|
7
|
-
from trilogy.core.processing.nodes import StrategyNode, History
|
|
7
|
+
from trilogy.core.processing.nodes import StrategyNode, History
|
|
8
8
|
from trilogy.core.processing.node_generators.common import (
|
|
9
9
|
resolve_function_parent_concepts,
|
|
10
10
|
)
|
|
@@ -30,16 +30,25 @@ def gen_basic_node(
|
|
|
30
30
|
f"{depth_prefix}{LOGGER_PREFIX} basic node for {concept} has parents {[x.address for x in parent_concepts]}"
|
|
31
31
|
)
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
local_optional_redundant = [x for x in local_optional if x in parent_concepts]
|
|
34
|
+
attempts = [(parent_concepts, [concept] + local_optional_redundant)]
|
|
35
|
+
from itertools import combinations
|
|
35
36
|
|
|
36
|
-
attempts = [(parent_concepts, [concept])]
|
|
37
37
|
if local_optional:
|
|
38
|
-
|
|
38
|
+
for combo in range(1, len(local_optional) + 1):
|
|
39
|
+
combos = combinations(local_optional, combo)
|
|
40
|
+
for optional_set in combos:
|
|
41
|
+
attempts.append(
|
|
42
|
+
(
|
|
43
|
+
unique(parent_concepts + list(optional_set), "address"),
|
|
44
|
+
list(optional_set) + [concept],
|
|
45
|
+
)
|
|
46
|
+
)
|
|
39
47
|
|
|
40
|
-
for attempt,
|
|
48
|
+
for attempt, basic_output in reversed(attempts):
|
|
49
|
+
partials = []
|
|
41
50
|
attempt = unique(attempt, "address")
|
|
42
|
-
parent_node = source_concepts(
|
|
51
|
+
parent_node: StrategyNode = source_concepts(
|
|
43
52
|
mandatory_list=attempt,
|
|
44
53
|
environment=environment,
|
|
45
54
|
g=g,
|
|
@@ -49,24 +58,28 @@ def gen_basic_node(
|
|
|
49
58
|
if not parent_node:
|
|
50
59
|
continue
|
|
51
60
|
parents: List[StrategyNode] = [parent_node]
|
|
52
|
-
for x in
|
|
61
|
+
for x in basic_output:
|
|
53
62
|
sources = [p for p in parents if x in p.output_concepts]
|
|
54
63
|
if not sources:
|
|
55
64
|
continue
|
|
56
65
|
if all(x in source.partial_concepts for source in sources):
|
|
57
66
|
partials.append(x)
|
|
67
|
+
outputs = parent_node.output_concepts + [concept]
|
|
58
68
|
logger.info(
|
|
59
|
-
f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}"
|
|
69
|
+
f"{depth_prefix}{LOGGER_PREFIX} Returning basic select for {concept} with attempted extra {[x.address for x in attempt]}, output {[x.address for x in outputs]}"
|
|
60
70
|
)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
71
|
+
# parents.resolve()
|
|
72
|
+
|
|
73
|
+
parent_node.add_output_concept(concept)
|
|
74
|
+
|
|
75
|
+
parent_node.remove_output_concepts(
|
|
76
|
+
[
|
|
77
|
+
x
|
|
78
|
+
for x in parent_node.output_concepts
|
|
79
|
+
if x.address not in [y.address for y in basic_output]
|
|
80
|
+
]
|
|
69
81
|
)
|
|
82
|
+
return parent_node
|
|
70
83
|
logger.info(
|
|
71
84
|
f"{depth_prefix}{LOGGER_PREFIX} No basic node could be generated for {concept}"
|
|
72
85
|
)
|
|
@@ -2,7 +2,7 @@ from typing import List
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
from trilogy.core.enums import JoinType
|
|
5
|
-
from trilogy.core.models import Concept, Environment, FilterItem
|
|
5
|
+
from trilogy.core.models import Concept, Environment, FilterItem, Grain
|
|
6
6
|
from trilogy.core.processing.nodes import (
|
|
7
7
|
FilterNode,
|
|
8
8
|
MergeNode,
|
|
@@ -65,7 +65,9 @@ def gen_filter_node(
|
|
|
65
65
|
else:
|
|
66
66
|
parent.conditions = where.conditional
|
|
67
67
|
parent.output_concepts = [concept]
|
|
68
|
+
parent.grain = Grain(components=[concept])
|
|
68
69
|
parent.rebuild_cache()
|
|
70
|
+
|
|
69
71
|
logger.info(
|
|
70
72
|
f"{padding(depth)}{LOGGER_PREFIX} returning optimized filter node with pushdown to parent with condition {where.conditional}"
|
|
71
73
|
)
|
|
@@ -1,79 +1,301 @@
|
|
|
1
1
|
from typing import List, Optional
|
|
2
2
|
|
|
3
|
-
from trilogy.core.models import Concept, Environment,
|
|
4
|
-
from trilogy.core.processing.nodes import MergeNode, History
|
|
3
|
+
from trilogy.core.models import Concept, Environment, Conditional
|
|
4
|
+
from trilogy.core.processing.nodes import MergeNode, History, StrategyNode
|
|
5
5
|
import networkx as nx
|
|
6
|
-
from trilogy.core.graph_models import concept_to_node
|
|
6
|
+
from trilogy.core.graph_models import concept_to_node
|
|
7
7
|
from trilogy.core.processing.utility import PathInfo
|
|
8
8
|
from trilogy.constants import logger
|
|
9
9
|
from trilogy.utility import unique
|
|
10
10
|
from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
|
|
11
11
|
from trilogy.core.processing.utility import padding
|
|
12
|
-
from
|
|
12
|
+
from networkx.algorithms import approximation as ax
|
|
13
13
|
from trilogy.core.enums import PurposeLineage
|
|
14
14
|
|
|
15
|
+
|
|
15
16
|
LOGGER_PREFIX = "[GEN_MERGE_NODE]"
|
|
17
|
+
AMBIGUITY_CHECK_LIMIT = 20
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def filter_pseudonyms_for_source(ds_graph: nx.DiGraph, node: str):
|
|
21
|
+
to_remove = set()
|
|
22
|
+
|
|
23
|
+
for edge in ds_graph.edges:
|
|
24
|
+
if ds_graph.edges[edge].get("pseudonym", False):
|
|
25
|
+
lengths = {}
|
|
26
|
+
for n in edge:
|
|
27
|
+
lengths[n] = nx.shortest_path_length(ds_graph, node, n)
|
|
28
|
+
to_remove.add(max(lengths, key=lambda x: lengths.get(x, 0)))
|
|
29
|
+
for node in to_remove:
|
|
30
|
+
ds_graph.remove_node(node)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def extract_address(node: str):
|
|
34
|
+
return node.split("~")[1].split("@")[0]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def extract_concept(node: str, env: Environment):
|
|
38
|
+
if node in env.alias_origin_lookup:
|
|
39
|
+
return env.alias_origin_lookup[node]
|
|
40
|
+
return env.concepts[node]
|
|
16
41
|
|
|
17
42
|
|
|
18
|
-
def
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
final: List[Concept] = unique(concept_nodes, "address")
|
|
24
|
-
return set([x.address for x in final])
|
|
43
|
+
def filter_unique_graphs(graphs: list[list[str]]) -> list[list[str]]:
|
|
44
|
+
unique_graphs: list[set[str]] = []
|
|
45
|
+
for graph in graphs:
|
|
46
|
+
if not any(set(graph).issubset(x) for x in unique_graphs):
|
|
47
|
+
unique_graphs.append(set(graph))
|
|
25
48
|
|
|
49
|
+
return [list(x) for x in unique_graphs]
|
|
26
50
|
|
|
27
|
-
|
|
51
|
+
|
|
52
|
+
def extract_ds_components(g: nx.DiGraph, nodelist: list[str]) -> list[list[str]]:
|
|
53
|
+
graphs = []
|
|
54
|
+
for node in g.nodes:
|
|
55
|
+
if node.startswith("ds~"):
|
|
56
|
+
local = g.copy()
|
|
57
|
+
filter_pseudonyms_for_source(local, node)
|
|
58
|
+
ds_graph: nx.DiGraph = nx.ego_graph(local, node, radius=10).copy()
|
|
59
|
+
graphs.append(
|
|
60
|
+
[
|
|
61
|
+
extract_address(x)
|
|
62
|
+
for x in ds_graph.nodes
|
|
63
|
+
if not str(x).startswith("ds~")
|
|
64
|
+
]
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
graphs = filter_unique_graphs(graphs)
|
|
68
|
+
for node in nodelist:
|
|
69
|
+
parsed = extract_address(node)
|
|
70
|
+
if not any(parsed in x for x in graphs):
|
|
71
|
+
graphs.append([parsed])
|
|
72
|
+
return graphs
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def determine_induced_minimal_nodes(
|
|
76
|
+
G: nx.DiGraph,
|
|
77
|
+
nodelist: list[str],
|
|
78
|
+
environment: Environment,
|
|
79
|
+
filter_downstream: bool,
|
|
80
|
+
accept_partial: bool = False,
|
|
81
|
+
) -> nx.DiGraph | None:
|
|
82
|
+
H: nx.Graph = nx.to_undirected(G).copy()
|
|
83
|
+
nodes_to_remove = []
|
|
84
|
+
concepts = nx.get_node_attributes(G, "concept")
|
|
85
|
+
for node in G.nodes:
|
|
86
|
+
if concepts.get(node):
|
|
87
|
+
lookup = concepts[node]
|
|
88
|
+
if lookup.derivation not in (PurposeLineage.BASIC, PurposeLineage.ROOT):
|
|
89
|
+
nodes_to_remove.append(node)
|
|
90
|
+
elif lookup.derivation == PurposeLineage.BASIC and G.out_degree(node) == 0:
|
|
91
|
+
nodes_to_remove.append(node)
|
|
92
|
+
# purge a node if we're already looking for all it's parents
|
|
93
|
+
elif filter_downstream and lookup.derivation == PurposeLineage.BASIC:
|
|
94
|
+
nodes_to_remove.append(node)
|
|
95
|
+
|
|
96
|
+
H.remove_nodes_from(nodes_to_remove)
|
|
97
|
+
|
|
98
|
+
H.remove_nodes_from(list(nx.isolates(H)))
|
|
99
|
+
|
|
100
|
+
zero_out = list(x for x in H.nodes if G.out_degree(x) == 0 and x not in nodelist)
|
|
101
|
+
while zero_out:
|
|
102
|
+
H.remove_nodes_from(zero_out)
|
|
103
|
+
zero_out = list(
|
|
104
|
+
x for x in H.nodes if G.out_degree(x) == 0 and x not in nodelist
|
|
105
|
+
)
|
|
106
|
+
try:
|
|
107
|
+
paths = nx.multi_source_dijkstra_path(H, nodelist)
|
|
108
|
+
except nx.exception.NodeNotFound:
|
|
109
|
+
return None
|
|
110
|
+
H.remove_nodes_from(list(x for x in H.nodes if x not in paths))
|
|
111
|
+
sG: nx.Graph = ax.steinertree.steiner_tree(H, nodelist).copy()
|
|
112
|
+
final: nx.DiGraph = nx.subgraph(G, sG.nodes).copy()
|
|
113
|
+
for edge in G.edges:
|
|
114
|
+
if edge[1] in final.nodes and edge[0].startswith("ds~"):
|
|
115
|
+
ds_name = extract_address(edge[0])
|
|
116
|
+
ds = environment.datasources[ds_name]
|
|
117
|
+
concept = environment.concepts[extract_address(edge[1])]
|
|
118
|
+
if concept.address in [x.address for x in ds.partial_concepts]:
|
|
119
|
+
if not accept_partial:
|
|
120
|
+
continue
|
|
121
|
+
final.add_edge(*edge)
|
|
122
|
+
# all concept nodes must have a parent
|
|
123
|
+
|
|
124
|
+
if not all(
|
|
125
|
+
[final.in_degree(node) > 0 for node in final.nodes if node.startswith("c~")]
|
|
126
|
+
):
|
|
127
|
+
return None
|
|
128
|
+
if not all([node in final.nodes for node in nodelist]):
|
|
129
|
+
return None
|
|
130
|
+
return final
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def detect_ambiguity_and_raise(all_concepts, reduced_concept_sets) -> None:
|
|
134
|
+
final_candidates: list[set[str]] = []
|
|
135
|
+
common: set[str] = set()
|
|
136
|
+
# find all values that show up in every join_additions
|
|
137
|
+
for ja in reduced_concept_sets:
|
|
138
|
+
if not common:
|
|
139
|
+
common = ja
|
|
140
|
+
else:
|
|
141
|
+
common = common.intersection(ja)
|
|
142
|
+
if all(set(ja).issubset(y) for y in reduced_concept_sets):
|
|
143
|
+
final_candidates.append(ja)
|
|
144
|
+
|
|
145
|
+
if not final_candidates:
|
|
146
|
+
filtered_paths = [x.difference(common) for x in reduced_concept_sets]
|
|
147
|
+
raise AmbiguousRelationshipResolutionException(
|
|
148
|
+
message=f"Multiple possible concept injections found for {[x.address for x in all_concepts]}, got {' or '.join([str(x) for x in reduced_concept_sets])}",
|
|
149
|
+
parents=filtered_paths,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def resolve_weak_components(
|
|
28
154
|
all_concepts: List[Concept],
|
|
29
|
-
|
|
30
|
-
|
|
155
|
+
environment: Environment,
|
|
156
|
+
environment_graph: nx.DiGraph,
|
|
157
|
+
filter_downstream: bool = True,
|
|
31
158
|
accept_partial: bool = False,
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
159
|
+
) -> list[list[Concept]] | None:
|
|
160
|
+
|
|
161
|
+
break_flag = False
|
|
162
|
+
found = []
|
|
163
|
+
search_graph = environment_graph.copy()
|
|
164
|
+
reduced_concept_sets: list[set[str]] = []
|
|
165
|
+
|
|
166
|
+
# loop through, removing new nodes we find
|
|
167
|
+
# to ensure there are not ambiguous loops
|
|
168
|
+
# (if we did not care about raising ambiguity errors, we could just use the first one)
|
|
169
|
+
count = 0
|
|
170
|
+
node_list = [
|
|
171
|
+
concept_to_node(c.with_default_grain())
|
|
172
|
+
for c in all_concepts
|
|
173
|
+
if "__preql_internal" not in c.address
|
|
174
|
+
]
|
|
175
|
+
while break_flag is not True:
|
|
176
|
+
count += 1
|
|
177
|
+
if count > AMBIGUITY_CHECK_LIMIT:
|
|
178
|
+
break_flag = True
|
|
40
179
|
try:
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
180
|
+
g = determine_induced_minimal_nodes(
|
|
181
|
+
search_graph,
|
|
182
|
+
node_list,
|
|
183
|
+
filter_downstream=filter_downstream,
|
|
184
|
+
accept_partial=accept_partial,
|
|
185
|
+
environment=environment,
|
|
45
186
|
)
|
|
46
|
-
|
|
47
|
-
if
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
187
|
+
|
|
188
|
+
if not g or not g.nodes:
|
|
189
|
+
break_flag = True
|
|
190
|
+
continue
|
|
191
|
+
if not nx.is_weakly_connected(g):
|
|
192
|
+
break_flag = True
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
all_graph_concepts = [
|
|
196
|
+
extract_concept(extract_address(node), environment)
|
|
197
|
+
for node in g.nodes
|
|
198
|
+
if node.startswith("c~")
|
|
199
|
+
]
|
|
200
|
+
new = [
|
|
201
|
+
x
|
|
202
|
+
for x in all_graph_concepts
|
|
203
|
+
if x.address not in [y.address for y in all_concepts]
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
new_addresses = set([x.address for x in new])
|
|
207
|
+
if not new:
|
|
208
|
+
break_flag = True
|
|
209
|
+
# remove our new nodes for the next search path
|
|
210
|
+
for n in new:
|
|
211
|
+
node = concept_to_node(n)
|
|
212
|
+
if node in search_graph:
|
|
213
|
+
search_graph.remove_node(node)
|
|
214
|
+
# TODO: figure out better place for debugging
|
|
215
|
+
# from trilogy.hooks.graph_hook import GraphHook
|
|
216
|
+
# GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
|
|
217
|
+
found.append(g)
|
|
218
|
+
reduced_concept_sets.append(new_addresses)
|
|
219
|
+
|
|
55
220
|
except nx.exception.NetworkXNoPath:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
]
|
|
67
|
-
if partial and not accept_partial:
|
|
68
|
-
return None
|
|
221
|
+
break_flag = True
|
|
222
|
+
if g and not g.nodes:
|
|
223
|
+
break_flag = True
|
|
224
|
+
if not found:
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
detect_ambiguity_and_raise(all_concepts, reduced_concept_sets)
|
|
228
|
+
|
|
229
|
+
# take our first one as the actual graph
|
|
230
|
+
g = found[0]
|
|
69
231
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
232
|
+
subgraphs: list[list[Concept]] = []
|
|
233
|
+
# components = nx.strongly_connected_components(g)
|
|
234
|
+
components = extract_ds_components(g, node_list)
|
|
235
|
+
for component in components:
|
|
236
|
+
# we need to take unique again as different addresses may map to the same concept
|
|
237
|
+
sub_component = unique(
|
|
238
|
+
# sorting here is required for reproducibility
|
|
239
|
+
# todo: we should sort in an optimized order
|
|
240
|
+
[extract_concept(x, environment) for x in sorted(component)],
|
|
241
|
+
"address",
|
|
75
242
|
)
|
|
76
|
-
|
|
243
|
+
if not sub_component:
|
|
244
|
+
continue
|
|
245
|
+
subgraphs.append(sub_component)
|
|
246
|
+
return subgraphs
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def subgraphs_to_merge_node(
|
|
250
|
+
concept_subgraphs: list[list[Concept]],
|
|
251
|
+
depth: int,
|
|
252
|
+
all_concepts: List[Concept],
|
|
253
|
+
environment,
|
|
254
|
+
g,
|
|
255
|
+
source_concepts,
|
|
256
|
+
history,
|
|
257
|
+
conditions,
|
|
258
|
+
):
|
|
259
|
+
parents: List[StrategyNode] = []
|
|
260
|
+
logger.info(
|
|
261
|
+
f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {[[c.address for c in subgraph] for subgraph in concept_subgraphs]}"
|
|
262
|
+
)
|
|
263
|
+
for graph in concept_subgraphs:
|
|
264
|
+
logger.info(
|
|
265
|
+
f"{padding(depth)}{LOGGER_PREFIX} fetching subgraph {[c.address for c in graph]}"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
parent: StrategyNode | None = source_concepts(
|
|
269
|
+
mandatory_list=graph,
|
|
270
|
+
environment=environment,
|
|
271
|
+
g=g,
|
|
272
|
+
depth=depth + 1,
|
|
273
|
+
history=history,
|
|
274
|
+
)
|
|
275
|
+
if not parent:
|
|
276
|
+
logger.info(
|
|
277
|
+
f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
|
|
278
|
+
)
|
|
279
|
+
return None
|
|
280
|
+
logger.info(
|
|
281
|
+
f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)} w/ {[c.address for c in parent.output_concepts]}"
|
|
282
|
+
)
|
|
283
|
+
parents.append(parent)
|
|
284
|
+
input_c = []
|
|
285
|
+
for x in parents:
|
|
286
|
+
for y in x.output_concepts:
|
|
287
|
+
input_c.append(y)
|
|
288
|
+
|
|
289
|
+
return MergeNode(
|
|
290
|
+
input_concepts=unique(input_c, "address"),
|
|
291
|
+
output_concepts=[x for x in all_concepts],
|
|
292
|
+
environment=environment,
|
|
293
|
+
g=g,
|
|
294
|
+
parents=parents,
|
|
295
|
+
depth=depth,
|
|
296
|
+
conditions=conditions,
|
|
297
|
+
# node_joins=[]
|
|
298
|
+
)
|
|
77
299
|
|
|
78
300
|
|
|
79
301
|
def gen_merge_node(
|
|
@@ -87,15 +309,32 @@ def gen_merge_node(
|
|
|
87
309
|
conditions: Conditional | None = None,
|
|
88
310
|
) -> Optional[MergeNode]:
|
|
89
311
|
join_candidates: List[PathInfo] = []
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
312
|
+
|
|
313
|
+
# inject new concepts into search, and identify if two dses can reach there
|
|
314
|
+
if not join_candidates:
|
|
315
|
+
for filter_downstream in [True, False]:
|
|
316
|
+
weak_resolve = resolve_weak_components(
|
|
317
|
+
all_concepts,
|
|
318
|
+
environment,
|
|
319
|
+
g,
|
|
320
|
+
filter_downstream=filter_downstream,
|
|
321
|
+
accept_partial=accept_partial,
|
|
322
|
+
)
|
|
323
|
+
if weak_resolve:
|
|
324
|
+
log_graph = [[y.address for y in x] for x in weak_resolve]
|
|
325
|
+
logger.info(
|
|
326
|
+
f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
|
|
327
|
+
)
|
|
328
|
+
return subgraphs_to_merge_node(
|
|
329
|
+
weak_resolve,
|
|
330
|
+
depth=depth,
|
|
331
|
+
all_concepts=all_concepts,
|
|
332
|
+
environment=environment,
|
|
333
|
+
g=g,
|
|
334
|
+
source_concepts=source_concepts,
|
|
335
|
+
history=history,
|
|
336
|
+
conditions=conditions,
|
|
337
|
+
)
|
|
99
338
|
if not join_candidates:
|
|
100
339
|
return None
|
|
101
340
|
join_additions: list[set[str]] = []
|
|
@@ -129,41 +368,51 @@ def gen_merge_node(
|
|
|
129
368
|
key=lambda x: len(x.reduced_concepts),
|
|
130
369
|
)[0]
|
|
131
370
|
logger.info(f"{padding(depth)}{LOGGER_PREFIX} final path is {shortest.paths}")
|
|
132
|
-
# logger.info(f'{padding(depth)}{LOGGER_PREFIX} final reduced concepts are {shortest.concs}')
|
|
133
|
-
parents = []
|
|
134
|
-
for graph in shortest.concept_subgraphs:
|
|
135
|
-
logger.info(
|
|
136
|
-
f"{padding(depth)}{LOGGER_PREFIX} fetching subgraph {[c.address for c in graph]}"
|
|
137
|
-
)
|
|
138
|
-
parent = source_concepts(
|
|
139
|
-
mandatory_list=graph,
|
|
140
|
-
environment=environment,
|
|
141
|
-
g=g,
|
|
142
|
-
depth=depth + 1,
|
|
143
|
-
history=history,
|
|
144
|
-
)
|
|
145
|
-
if not parent:
|
|
146
|
-
logger.info(
|
|
147
|
-
f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
|
|
148
|
-
)
|
|
149
|
-
return None
|
|
150
|
-
logger.info(
|
|
151
|
-
f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)}"
|
|
152
|
-
)
|
|
153
|
-
parents.append(parent)
|
|
154
371
|
|
|
155
|
-
return
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
if environment.concepts[x].derivation != PurposeLineage.MERGE
|
|
160
|
-
],
|
|
161
|
-
output_concepts=[
|
|
162
|
-
x for x in all_concepts if x.derivation != PurposeLineage.MERGE
|
|
163
|
-
],
|
|
372
|
+
return subgraphs_to_merge_node(
|
|
373
|
+
shortest.concept_subgraphs,
|
|
374
|
+
depth=depth,
|
|
375
|
+
all_concepts=all_concepts,
|
|
164
376
|
environment=environment,
|
|
165
377
|
g=g,
|
|
166
|
-
|
|
167
|
-
|
|
378
|
+
source_concepts=source_concepts,
|
|
379
|
+
history=history,
|
|
168
380
|
conditions=conditions,
|
|
169
381
|
)
|
|
382
|
+
# parents = []
|
|
383
|
+
# for graph in shortest.concept_subgraphs:
|
|
384
|
+
# logger.info(
|
|
385
|
+
# f"{padding(depth)}{LOGGER_PREFIX} fetching subgraph {[c.address for c in graph]}"
|
|
386
|
+
# )
|
|
387
|
+
# parent = source_concepts(
|
|
388
|
+
# mandatory_list=graph,
|
|
389
|
+
# environment=environment,
|
|
390
|
+
# g=g,
|
|
391
|
+
# depth=depth + 1,
|
|
392
|
+
# history=history,
|
|
393
|
+
# )
|
|
394
|
+
# if not parent:
|
|
395
|
+
# logger.info(
|
|
396
|
+
# f"{padding(depth)}{LOGGER_PREFIX} Unable to instantiate target subgraph"
|
|
397
|
+
# )
|
|
398
|
+
# return None
|
|
399
|
+
# logger.info(
|
|
400
|
+
# f"{padding(depth)}{LOGGER_PREFIX} finished subgraph fetch for {[c.address for c in graph]}, have parent {type(parent)}"
|
|
401
|
+
# )
|
|
402
|
+
# parents.append(parent)
|
|
403
|
+
|
|
404
|
+
# return MergeNode(
|
|
405
|
+
# input_concepts=[
|
|
406
|
+
# environment.concepts[x]
|
|
407
|
+
# for x in shortest.reduced_concepts
|
|
408
|
+
# if environment.concepts[x].derivation != PurposeLineage.MERGE
|
|
409
|
+
# ],
|
|
410
|
+
# output_concepts=[
|
|
411
|
+
# x for x in all_concepts if x.derivation != PurposeLineage.MERGE
|
|
412
|
+
# ],
|
|
413
|
+
# environment=environment,
|
|
414
|
+
# g=g,
|
|
415
|
+
# parents=parents,
|
|
416
|
+
# depth=depth,
|
|
417
|
+
# conditions=conditions,
|
|
418
|
+
# )
|
|
@@ -35,7 +35,7 @@ def gen_rowset_node(
|
|
|
35
35
|
lineage: RowsetItem = concept.lineage
|
|
36
36
|
rowset: RowsetDerivationStatement = lineage.rowset
|
|
37
37
|
select: SelectStatement | MultiSelectStatement = lineage.rowset.select
|
|
38
|
-
|
|
38
|
+
existence_parents: List[StrategyNode] = []
|
|
39
39
|
if where := select.where_clause:
|
|
40
40
|
targets = select.output_components + where.conditional.row_arguments
|
|
41
41
|
for sub_select in where.conditional.existence_arguments:
|
|
@@ -54,7 +54,7 @@ def gen_rowset_node(
|
|
|
54
54
|
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate parent existence node for rowset node for {concept}"
|
|
55
55
|
)
|
|
56
56
|
return None
|
|
57
|
-
|
|
57
|
+
existence_parents.append(parent_check)
|
|
58
58
|
else:
|
|
59
59
|
targets = select.output_components
|
|
60
60
|
node: StrategyNode = source_concepts(
|
|
@@ -65,18 +65,22 @@ def gen_rowset_node(
|
|
|
65
65
|
history=history,
|
|
66
66
|
)
|
|
67
67
|
|
|
68
|
-
# add our existence concepts in
|
|
69
|
-
if parents:
|
|
70
|
-
node.parents += parents
|
|
71
|
-
for parent in parents:
|
|
72
|
-
for x in parent.output_concepts:
|
|
73
|
-
if x.address not in node.output_lcl:
|
|
74
|
-
node.existence_concepts.append(x)
|
|
75
68
|
if not node:
|
|
76
69
|
logger.info(
|
|
77
70
|
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate rowset node for {concept}"
|
|
78
71
|
)
|
|
79
72
|
return None
|
|
73
|
+
# add our existence concepts in
|
|
74
|
+
if existence_parents:
|
|
75
|
+
node.parents += existence_parents
|
|
76
|
+
# we don't need to join to any existence parents
|
|
77
|
+
if isinstance(node, MergeNode):
|
|
78
|
+
node.node_joins = []
|
|
79
|
+
for parent in existence_parents:
|
|
80
|
+
for x in parent.output_concepts:
|
|
81
|
+
if x.address not in node.output_lcl:
|
|
82
|
+
node.existence_concepts.append(x)
|
|
83
|
+
|
|
80
84
|
node.conditions = select.where_clause.conditional if select.where_clause else None
|
|
81
85
|
enrichment = set([x.address for x in local_optional])
|
|
82
86
|
rowset_relevant = [x for x in rowset.derived_concepts]
|
|
@@ -91,22 +95,21 @@ def gen_rowset_node(
|
|
|
91
95
|
x for x in select.output_components if x.address in enrichment
|
|
92
96
|
]
|
|
93
97
|
# add in other other concepts
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
for item in additional_relevant:
|
|
97
|
-
node.output_concepts.append(item)
|
|
98
|
+
|
|
99
|
+
node.add_output_concepts(rowset_relevant + additional_relevant)
|
|
98
100
|
if select.where_clause:
|
|
99
101
|
for item in additional_relevant:
|
|
100
102
|
node.partial_concepts.append(item)
|
|
101
|
-
|
|
103
|
+
|
|
104
|
+
final_hidden = rowset_hidden + [
|
|
102
105
|
x
|
|
103
106
|
for x in node.output_concepts
|
|
104
107
|
if x.address not in [y.address for y in local_optional + [concept]]
|
|
105
108
|
and x.derivation != PurposeLineage.ROWSET
|
|
106
109
|
]
|
|
110
|
+
node.hide_output_concepts(final_hidden)
|
|
107
111
|
# assume grain to be output of select
|
|
108
112
|
# but don't include anything aggregate at this point
|
|
109
|
-
node.rebuild_cache()
|
|
110
113
|
assert node.resolution_cache
|
|
111
114
|
|
|
112
115
|
node.resolution_cache.grain = concept_list_to_grain(
|
|
@@ -143,7 +146,6 @@ def gen_rowset_node(
|
|
|
143
146
|
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate rowset enrichment node for {concept} with optional {local_optional}, returning just rowset node"
|
|
144
147
|
)
|
|
145
148
|
return node
|
|
146
|
-
|
|
147
149
|
return MergeNode(
|
|
148
150
|
input_concepts=enrich_node.output_concepts + node.output_concepts,
|
|
149
151
|
output_concepts=node.output_concepts + local_optional,
|