pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
- pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +2 -2
- trilogy/core/enums.py +1 -7
- trilogy/core/env_processor.py +17 -5
- trilogy/core/environment_helpers.py +11 -25
- trilogy/core/exceptions.py +4 -0
- trilogy/core/functions.py +695 -261
- trilogy/core/graph_models.py +10 -10
- trilogy/core/internal.py +11 -2
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2110 -0
- trilogy/core/models/build.py +1859 -0
- trilogy/core/models/build_environment.py +151 -0
- trilogy/core/models/core.py +370 -0
- trilogy/core/models/datasource.py +297 -0
- trilogy/core/models/environment.py +701 -0
- trilogy/core/models/execute.py +931 -0
- trilogy/core/optimization.py +14 -16
- trilogy/core/optimizations/base_optimization.py +1 -1
- trilogy/core/optimizations/inline_constant.py +6 -6
- trilogy/core/optimizations/inline_datasource.py +17 -11
- trilogy/core/optimizations/predicate_pushdown.py +17 -16
- trilogy/core/processing/concept_strategies_v3.py +178 -145
- trilogy/core/processing/graph_utils.py +1 -1
- trilogy/core/processing/node_generators/basic_node.py +19 -18
- trilogy/core/processing/node_generators/common.py +50 -44
- trilogy/core/processing/node_generators/filter_node.py +26 -13
- trilogy/core/processing/node_generators/group_node.py +26 -21
- trilogy/core/processing/node_generators/group_to_node.py +11 -8
- trilogy/core/processing/node_generators/multiselect_node.py +60 -43
- trilogy/core/processing/node_generators/node_merge_node.py +76 -38
- trilogy/core/processing/node_generators/rowset_node.py +55 -36
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
- trilogy/core/processing/node_generators/select_merge_node.py +161 -64
- trilogy/core/processing/node_generators/select_node.py +13 -13
- trilogy/core/processing/node_generators/union_node.py +12 -11
- trilogy/core/processing/node_generators/unnest_node.py +9 -7
- trilogy/core/processing/node_generators/window_node.py +18 -16
- trilogy/core/processing/nodes/__init__.py +21 -18
- trilogy/core/processing/nodes/base_node.py +82 -66
- trilogy/core/processing/nodes/filter_node.py +19 -13
- trilogy/core/processing/nodes/group_node.py +50 -35
- trilogy/core/processing/nodes/merge_node.py +45 -36
- trilogy/core/processing/nodes/select_node_v2.py +53 -39
- trilogy/core/processing/nodes/union_node.py +5 -7
- trilogy/core/processing/nodes/unnest_node.py +7 -11
- trilogy/core/processing/nodes/window_node.py +9 -4
- trilogy/core/processing/utility.py +103 -75
- trilogy/core/query_processor.py +70 -47
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +413 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +30 -0
- trilogy/core/statements/execute.py +42 -0
- trilogy/dialect/base.py +148 -106
- trilogy/dialect/common.py +9 -10
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/enums.py +4 -2
- trilogy/dialect/presto.py +1 -1
- trilogy/dialect/sql_server.py +1 -1
- trilogy/executor.py +44 -32
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +6 -4
- trilogy/hooks/query_debugger.py +113 -97
- trilogy/parser.py +1 -1
- trilogy/parsing/common.py +307 -64
- trilogy/parsing/parse_engine.py +277 -618
- trilogy/parsing/render.py +50 -26
- trilogy/scripts/trilogy.py +2 -1
- pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
- trilogy/core/models.py +0 -4960
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -3,15 +3,14 @@ from itertools import combinations
|
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
5
|
from trilogy.constants import logger
|
|
6
|
-
from trilogy.core.enums import
|
|
7
|
-
from trilogy.core.models import (
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
MultiSelectStatement,
|
|
13
|
-
WhereClause,
|
|
6
|
+
from trilogy.core.enums import JoinType, Purpose
|
|
7
|
+
from trilogy.core.models.build import (
|
|
8
|
+
BuildConcept,
|
|
9
|
+
BuildGrain,
|
|
10
|
+
BuildMultiSelectLineage,
|
|
11
|
+
BuildWhereClause,
|
|
14
12
|
)
|
|
13
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
15
14
|
from trilogy.core.processing.node_generators.common import resolve_join_order
|
|
16
15
|
from trilogy.core.processing.nodes import History, MergeNode, NodeJoin
|
|
17
16
|
from trilogy.core.processing.nodes.base_node import StrategyNode
|
|
@@ -21,12 +20,14 @@ LOGGER_PREFIX = "[GEN_MULTISELECT_NODE]"
|
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
def extra_align_joins(
|
|
24
|
-
base:
|
|
23
|
+
base: BuildMultiSelectLineage,
|
|
24
|
+
environment: BuildEnvironment,
|
|
25
|
+
parents: List[StrategyNode],
|
|
25
26
|
) -> List[NodeJoin]:
|
|
26
27
|
node_merge_concept_map = defaultdict(list)
|
|
27
28
|
output = []
|
|
28
29
|
for align in base.align.items:
|
|
29
|
-
jc = align.
|
|
30
|
+
jc = environment.concepts[align.aligned_concept]
|
|
30
31
|
if jc.purpose == Purpose.CONSTANT:
|
|
31
32
|
continue
|
|
32
33
|
for node in parents:
|
|
@@ -52,51 +53,43 @@ def extra_align_joins(
|
|
|
52
53
|
|
|
53
54
|
|
|
54
55
|
def gen_multiselect_node(
|
|
55
|
-
concept:
|
|
56
|
-
local_optional: List[
|
|
57
|
-
environment:
|
|
56
|
+
concept: BuildConcept,
|
|
57
|
+
local_optional: List[BuildConcept],
|
|
58
|
+
environment: BuildEnvironment,
|
|
58
59
|
g,
|
|
59
60
|
depth: int,
|
|
60
61
|
source_concepts,
|
|
61
|
-
history: History
|
|
62
|
-
conditions:
|
|
62
|
+
history: History,
|
|
63
|
+
conditions: BuildWhereClause | None = None,
|
|
63
64
|
) -> MergeNode | None:
|
|
64
|
-
|
|
65
|
+
from trilogy.core.query_processor import get_query_node
|
|
66
|
+
|
|
67
|
+
if not isinstance(concept.lineage, BuildMultiSelectLineage):
|
|
65
68
|
logger.info(
|
|
66
69
|
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate multiselect node for {concept}"
|
|
67
70
|
)
|
|
68
71
|
return None
|
|
69
|
-
lineage:
|
|
72
|
+
lineage: BuildMultiSelectLineage = concept.lineage
|
|
70
73
|
|
|
71
74
|
base_parents: List[StrategyNode] = []
|
|
72
75
|
partial = []
|
|
73
76
|
for select in lineage.selects:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
depth
|
|
79
|
-
history=history,
|
|
80
|
-
conditions=select.where_clause,
|
|
77
|
+
|
|
78
|
+
snode: StrategyNode = get_query_node(history.base_environment, select)
|
|
79
|
+
# raise SyntaxError(select.output_components)
|
|
80
|
+
logger.info(
|
|
81
|
+
f"{padding(depth)}{LOGGER_PREFIX} Fetched parent node with outputs {select.output_components}"
|
|
81
82
|
)
|
|
82
83
|
if not snode:
|
|
83
84
|
logger.info(
|
|
84
85
|
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate multiselect node for {concept}"
|
|
85
86
|
)
|
|
86
87
|
return None
|
|
87
|
-
if select.having_clause:
|
|
88
|
-
if snode.conditions:
|
|
89
|
-
snode.conditions = Conditional(
|
|
90
|
-
left=snode.conditions,
|
|
91
|
-
right=select.having_clause.conditional,
|
|
92
|
-
operator=BooleanOperator.AND,
|
|
93
|
-
)
|
|
94
|
-
else:
|
|
95
|
-
snode.conditions = select.having_clause.conditional
|
|
96
88
|
merge_concepts = []
|
|
97
89
|
for x in [*snode.output_concepts]:
|
|
98
|
-
|
|
99
|
-
if
|
|
90
|
+
merge_name = lineage.get_merge_concept(x)
|
|
91
|
+
if merge_name:
|
|
92
|
+
merge = environment.concepts[merge_name]
|
|
100
93
|
snode.output_concepts.append(merge)
|
|
101
94
|
merge_concepts.append(merge)
|
|
102
95
|
# clear cache so QPS
|
|
@@ -107,24 +100,46 @@ def gen_multiselect_node(
|
|
|
107
100
|
if select.where_clause:
|
|
108
101
|
for item in select.output_components:
|
|
109
102
|
partial.append(item)
|
|
103
|
+
logger.info(snode.hidden_concepts)
|
|
110
104
|
|
|
111
|
-
node_joins = extra_align_joins(lineage, base_parents)
|
|
105
|
+
node_joins = extra_align_joins(lineage, environment, base_parents)
|
|
106
|
+
logger.info(
|
|
107
|
+
f"Non-hidden {[x for y in base_parents for x in y.output_concepts if x.address not in y.hidden_concepts]}"
|
|
108
|
+
)
|
|
112
109
|
node = MergeNode(
|
|
113
|
-
input_concepts=[
|
|
114
|
-
|
|
110
|
+
input_concepts=[
|
|
111
|
+
x
|
|
112
|
+
for y in base_parents
|
|
113
|
+
for x in y.output_concepts
|
|
114
|
+
if x.address not in y.hidden_concepts
|
|
115
|
+
],
|
|
116
|
+
output_concepts=[
|
|
117
|
+
x
|
|
118
|
+
for y in base_parents
|
|
119
|
+
for x in y.output_concepts
|
|
120
|
+
if x.address not in y.hidden_concepts
|
|
121
|
+
],
|
|
115
122
|
environment=environment,
|
|
116
123
|
depth=depth,
|
|
117
124
|
parents=base_parents,
|
|
118
125
|
node_joins=node_joins,
|
|
119
|
-
|
|
126
|
+
grain=BuildGrain.from_concepts(
|
|
127
|
+
[
|
|
128
|
+
x
|
|
129
|
+
for y in base_parents
|
|
130
|
+
for x in y.output_concepts
|
|
131
|
+
if x.address not in y.hidden_concepts
|
|
132
|
+
],
|
|
133
|
+
environment=environment,
|
|
134
|
+
),
|
|
120
135
|
)
|
|
121
136
|
|
|
122
137
|
enrichment = set([x.address for x in local_optional])
|
|
123
138
|
|
|
124
139
|
multiselect_relevant = [
|
|
125
|
-
x
|
|
140
|
+
environment.concepts[x]
|
|
126
141
|
for x in lineage.derived_concepts
|
|
127
|
-
if x
|
|
142
|
+
if x == concept.address or x in enrichment
|
|
128
143
|
]
|
|
129
144
|
additional_relevant = [x for x in node.output_concepts if x.address in enrichment]
|
|
130
145
|
# add in other other concepts
|
|
@@ -135,7 +150,7 @@ def gen_multiselect_node(
|
|
|
135
150
|
# if select.where_clause:
|
|
136
151
|
# for item in additional_relevant:
|
|
137
152
|
# node.partial_concepts.append(item)
|
|
138
|
-
node.grain =
|
|
153
|
+
node.grain = BuildGrain.from_concepts(node.output_concepts, environment=environment)
|
|
139
154
|
node.rebuild_cache()
|
|
140
155
|
# we need a better API for refreshing a nodes QDS
|
|
141
156
|
possible_joins = concept_to_relevant_joins(additional_relevant)
|
|
@@ -156,7 +171,9 @@ def gen_multiselect_node(
|
|
|
156
171
|
f"{padding(depth)}{LOGGER_PREFIX} all enriched concepts returned from base rowset node; exiting early"
|
|
157
172
|
)
|
|
158
173
|
return node
|
|
159
|
-
|
|
174
|
+
logger.info(
|
|
175
|
+
f"{padding(depth)}{LOGGER_PREFIX} Missing required concepts {[x for x in local_optional if x.address not in [y.address for y in node.output_concepts]]}"
|
|
176
|
+
)
|
|
160
177
|
enrich_node: MergeNode = source_concepts( # this fetches the parent + join keys
|
|
161
178
|
# to then connect to the rest of the query
|
|
162
179
|
mandatory_list=additional_relevant + local_optional,
|
|
@@ -4,10 +4,11 @@ import networkx as nx
|
|
|
4
4
|
from networkx.algorithms import approximation as ax
|
|
5
5
|
|
|
6
6
|
from trilogy.constants import logger
|
|
7
|
-
from trilogy.core.enums import
|
|
7
|
+
from trilogy.core.enums import Derivation
|
|
8
8
|
from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
|
|
9
9
|
from trilogy.core.graph_models import concept_to_node
|
|
10
|
-
from trilogy.core.models import
|
|
10
|
+
from trilogy.core.models.build import BuildConcept, BuildConditional, BuildWhereClause
|
|
11
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
11
12
|
from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
|
|
12
13
|
from trilogy.core.processing.utility import padding
|
|
13
14
|
from trilogy.utility import unique
|
|
@@ -33,7 +34,7 @@ def extract_address(node: str):
|
|
|
33
34
|
return node.split("~")[1].split("@")[0]
|
|
34
35
|
|
|
35
36
|
|
|
36
|
-
def extract_concept(node: str, env:
|
|
37
|
+
def extract_concept(node: str, env: BuildEnvironment):
|
|
37
38
|
if node in env.alias_origin_lookup:
|
|
38
39
|
return env.alias_origin_lookup[node]
|
|
39
40
|
return env.concepts[node]
|
|
@@ -76,7 +77,7 @@ def extract_ds_components(g: nx.DiGraph, nodelist: list[str]) -> list[list[str]]
|
|
|
76
77
|
def determine_induced_minimal_nodes(
|
|
77
78
|
G: nx.DiGraph,
|
|
78
79
|
nodelist: list[str],
|
|
79
|
-
environment:
|
|
80
|
+
environment: BuildEnvironment,
|
|
80
81
|
filter_downstream: bool,
|
|
81
82
|
accept_partial: bool = False,
|
|
82
83
|
) -> nx.DiGraph | None:
|
|
@@ -86,11 +87,11 @@ def determine_induced_minimal_nodes(
|
|
|
86
87
|
|
|
87
88
|
for node in G.nodes:
|
|
88
89
|
if concepts.get(node):
|
|
89
|
-
lookup:
|
|
90
|
-
if lookup.derivation in (
|
|
90
|
+
lookup: BuildConcept = concepts[node]
|
|
91
|
+
if lookup.derivation in (Derivation.CONSTANT,):
|
|
91
92
|
nodes_to_remove.append(node)
|
|
92
93
|
# purge a node if we're already looking for all it's parents
|
|
93
|
-
if filter_downstream and lookup.derivation not in (
|
|
94
|
+
if filter_downstream and lookup.derivation not in (Derivation.ROOT,):
|
|
94
95
|
nodes_to_remove.append(node)
|
|
95
96
|
|
|
96
97
|
H.remove_nodes_from(nodes_to_remove)
|
|
@@ -106,8 +107,8 @@ def determine_induced_minimal_nodes(
|
|
|
106
107
|
|
|
107
108
|
try:
|
|
108
109
|
paths = nx.multi_source_dijkstra_path(H, nodelist)
|
|
109
|
-
except nx.exception.NodeNotFound:
|
|
110
|
-
logger.debug(f"Unable to find paths for {nodelist}")
|
|
110
|
+
except nx.exception.NodeNotFound as e:
|
|
111
|
+
logger.debug(f"Unable to find paths for {nodelist}- {str(e)}")
|
|
111
112
|
return None
|
|
112
113
|
H.remove_nodes_from(list(x for x in H.nodes if x not in paths))
|
|
113
114
|
sG: nx.Graph = ax.steinertree.steiner_tree(H, nodelist).copy()
|
|
@@ -140,14 +141,17 @@ def determine_induced_minimal_nodes(
|
|
|
140
141
|
return None
|
|
141
142
|
|
|
142
143
|
if not all([node in final.nodes for node in nodelist]):
|
|
143
|
-
|
|
144
|
+
missing = [node for node in nodelist if node not in final.nodes]
|
|
145
|
+
logger.debug(
|
|
146
|
+
f"Skipping graph for {nodelist} as missing nodes {missing} from {final.nodes}"
|
|
147
|
+
)
|
|
144
148
|
return None
|
|
145
149
|
logger.debug(f"Found final graph {final.nodes}")
|
|
146
150
|
return final
|
|
147
151
|
|
|
148
152
|
|
|
149
153
|
def detect_ambiguity_and_raise(
|
|
150
|
-
all_concepts: list[
|
|
154
|
+
all_concepts: list[BuildConcept], reduced_concept_sets: list[set[str]]
|
|
151
155
|
) -> None:
|
|
152
156
|
final_candidates: list[set[str]] = []
|
|
153
157
|
common: set[str] = set()
|
|
@@ -167,7 +171,7 @@ def detect_ambiguity_and_raise(
|
|
|
167
171
|
)
|
|
168
172
|
|
|
169
173
|
|
|
170
|
-
def has_synonym(concept:
|
|
174
|
+
def has_synonym(concept: BuildConcept, others: list[list[BuildConcept]]) -> bool:
|
|
171
175
|
return any(
|
|
172
176
|
c.address in concept.pseudonyms or concept.address in c.pseudonyms
|
|
173
177
|
for sublist in others
|
|
@@ -175,7 +179,9 @@ def has_synonym(concept: Concept, others: list[list[Concept]]) -> bool:
|
|
|
175
179
|
)
|
|
176
180
|
|
|
177
181
|
|
|
178
|
-
def filter_relevant_subgraphs(
|
|
182
|
+
def filter_relevant_subgraphs(
|
|
183
|
+
subgraphs: list[list[BuildConcept]],
|
|
184
|
+
) -> list[list[BuildConcept]]:
|
|
179
185
|
return [
|
|
180
186
|
subgraph
|
|
181
187
|
for subgraph in subgraphs
|
|
@@ -187,20 +193,39 @@ def filter_relevant_subgraphs(subgraphs: list[list[Concept]]) -> list[list[Conce
|
|
|
187
193
|
]
|
|
188
194
|
|
|
189
195
|
|
|
196
|
+
def filter_duplicate_subgraphs(
|
|
197
|
+
subgraphs: list[list[BuildConcept]],
|
|
198
|
+
) -> list[list[BuildConcept]]:
|
|
199
|
+
seen: list[set[str]] = []
|
|
200
|
+
|
|
201
|
+
for graph in subgraphs:
|
|
202
|
+
seen.append(set([x.address for x in graph]))
|
|
203
|
+
final = []
|
|
204
|
+
# sometimes w can get two subcomponents that are the same
|
|
205
|
+
# due to alias resolution
|
|
206
|
+
# if so, drop any that are strict subsets.
|
|
207
|
+
for graph in subgraphs:
|
|
208
|
+
set_x = set([x.address for x in graph])
|
|
209
|
+
if any([set_x.issubset(y) and set_x != y for y in seen]):
|
|
210
|
+
continue
|
|
211
|
+
final.append(graph)
|
|
212
|
+
return final
|
|
213
|
+
|
|
214
|
+
|
|
190
215
|
def resolve_weak_components(
|
|
191
|
-
all_concepts: List[
|
|
192
|
-
environment:
|
|
216
|
+
all_concepts: List[BuildConcept],
|
|
217
|
+
environment: BuildEnvironment,
|
|
193
218
|
environment_graph: nx.DiGraph,
|
|
194
219
|
filter_downstream: bool = True,
|
|
195
220
|
accept_partial: bool = False,
|
|
196
|
-
) -> list[list[
|
|
221
|
+
) -> list[list[BuildConcept]] | None:
|
|
197
222
|
break_flag = False
|
|
198
223
|
found = []
|
|
199
224
|
search_graph = environment_graph.copy()
|
|
200
225
|
reduced_concept_sets: list[set[str]] = []
|
|
201
226
|
|
|
202
227
|
# loop through, removing new nodes we find
|
|
203
|
-
# to ensure there are not ambiguous
|
|
228
|
+
# to ensure there are not ambiguous discovery paths
|
|
204
229
|
# (if we did not care about raising ambiguity errors, we could just use the first one)
|
|
205
230
|
count = 0
|
|
206
231
|
node_list = [
|
|
@@ -211,6 +236,8 @@ def resolve_weak_components(
|
|
|
211
236
|
synonyms: set[str] = set()
|
|
212
237
|
for x in all_concepts:
|
|
213
238
|
synonyms = synonyms.union(x.pseudonyms)
|
|
239
|
+
# from trilogy.hooks.graph_hook import GraphHook
|
|
240
|
+
# GraphHook().query_graph_built(search_graph, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
|
|
214
241
|
while break_flag is not True:
|
|
215
242
|
count += 1
|
|
216
243
|
if count > AMBIGUITY_CHECK_LIMIT:
|
|
@@ -239,6 +266,7 @@ def resolve_weak_components(
|
|
|
239
266
|
new = [x for x in all_graph_concepts if x.address not in all_concepts]
|
|
240
267
|
|
|
241
268
|
new_addresses = set([x.address for x in new if x.address not in synonyms])
|
|
269
|
+
|
|
242
270
|
if not new:
|
|
243
271
|
break_flag = True
|
|
244
272
|
# remove our new nodes for the next search path
|
|
@@ -264,7 +292,7 @@ def resolve_weak_components(
|
|
|
264
292
|
# take our first one as the actual graph
|
|
265
293
|
g = found[0]
|
|
266
294
|
|
|
267
|
-
subgraphs: list[list[
|
|
295
|
+
subgraphs: list[list[BuildConcept]] = []
|
|
268
296
|
# components = nx.strongly_connected_components(g)
|
|
269
297
|
node_list = [x for x in g.nodes if x.startswith("c~")]
|
|
270
298
|
components = extract_ds_components(g, node_list)
|
|
@@ -280,20 +308,22 @@ def resolve_weak_components(
|
|
|
280
308
|
if not sub_component:
|
|
281
309
|
continue
|
|
282
310
|
subgraphs.append(sub_component)
|
|
283
|
-
|
|
311
|
+
final = filter_duplicate_subgraphs(subgraphs)
|
|
312
|
+
return final
|
|
284
313
|
# return filter_relevant_subgraphs(subgraphs)
|
|
285
314
|
|
|
286
315
|
|
|
287
316
|
def subgraphs_to_merge_node(
|
|
288
|
-
concept_subgraphs: list[list[
|
|
317
|
+
concept_subgraphs: list[list[BuildConcept]],
|
|
289
318
|
depth: int,
|
|
290
|
-
all_concepts: List[
|
|
319
|
+
all_concepts: List[BuildConcept],
|
|
291
320
|
environment,
|
|
292
321
|
g,
|
|
293
322
|
source_concepts,
|
|
294
323
|
history,
|
|
295
324
|
conditions,
|
|
296
|
-
|
|
325
|
+
output_concepts: List[BuildConcept],
|
|
326
|
+
search_conditions: BuildWhereClause | None = None,
|
|
297
327
|
enable_early_exit: bool = True,
|
|
298
328
|
):
|
|
299
329
|
parents: List[StrategyNode] = []
|
|
@@ -331,16 +361,13 @@ def subgraphs_to_merge_node(
|
|
|
331
361
|
f"{padding(depth)}{LOGGER_PREFIX} only one parent node, exiting early w/ {[c.address for c in parents[0].output_concepts]}"
|
|
332
362
|
)
|
|
333
363
|
return parents[0]
|
|
334
|
-
base_output = [x for x in all_concepts]
|
|
335
|
-
# for x in base_output:
|
|
336
|
-
# if x not in input_c:
|
|
337
|
-
# input_c.append(x)
|
|
338
364
|
return MergeNode(
|
|
339
365
|
input_concepts=unique(input_c, "address"),
|
|
340
|
-
output_concepts=
|
|
366
|
+
output_concepts=output_concepts,
|
|
341
367
|
environment=environment,
|
|
342
368
|
parents=parents,
|
|
343
369
|
depth=depth,
|
|
370
|
+
# hidden_concepts=[]
|
|
344
371
|
# conditions=conditions,
|
|
345
372
|
# conditions=search_conditions.conditional,
|
|
346
373
|
# preexisting_conditions=search_conditions.conditional,
|
|
@@ -349,33 +376,41 @@ def subgraphs_to_merge_node(
|
|
|
349
376
|
|
|
350
377
|
|
|
351
378
|
def gen_merge_node(
|
|
352
|
-
all_concepts: List[
|
|
379
|
+
all_concepts: List[BuildConcept],
|
|
353
380
|
g: nx.DiGraph,
|
|
354
|
-
environment:
|
|
381
|
+
environment: BuildEnvironment,
|
|
355
382
|
depth: int,
|
|
356
383
|
source_concepts,
|
|
357
384
|
accept_partial: bool = False,
|
|
358
385
|
history: History | None = None,
|
|
359
|
-
conditions:
|
|
360
|
-
search_conditions:
|
|
386
|
+
conditions: BuildConditional | None = None,
|
|
387
|
+
search_conditions: BuildWhereClause | None = None,
|
|
361
388
|
) -> Optional[MergeNode]:
|
|
362
389
|
if search_conditions:
|
|
363
|
-
|
|
390
|
+
all_search_concepts = unique(
|
|
391
|
+
all_concepts + list(search_conditions.row_arguments), "address"
|
|
392
|
+
)
|
|
393
|
+
else:
|
|
394
|
+
all_search_concepts = all_concepts
|
|
364
395
|
for filter_downstream in [True, False]:
|
|
365
396
|
weak_resolve = resolve_weak_components(
|
|
366
|
-
|
|
397
|
+
all_search_concepts,
|
|
367
398
|
environment,
|
|
368
399
|
g,
|
|
369
400
|
filter_downstream=filter_downstream,
|
|
370
401
|
accept_partial=accept_partial,
|
|
371
402
|
)
|
|
372
|
-
if weak_resolve:
|
|
403
|
+
if not weak_resolve:
|
|
404
|
+
logger.info(
|
|
405
|
+
f"{padding(depth)}{LOGGER_PREFIX} wasn't able to resolve graph through intermediate concept injection with accept_partial {accept_partial}, filter_downstream {filter_downstream}"
|
|
406
|
+
)
|
|
407
|
+
else:
|
|
373
408
|
log_graph = [[y.address for y in x] for x in weak_resolve]
|
|
374
409
|
logger.info(
|
|
375
410
|
f"{padding(depth)}{LOGGER_PREFIX} Was able to resolve graph through weak component resolution - final graph {log_graph}"
|
|
376
411
|
)
|
|
377
412
|
for flat in log_graph:
|
|
378
|
-
if set(flat) == set([x.address for x in
|
|
413
|
+
if set(flat) == set([x.address for x in all_search_concepts]):
|
|
379
414
|
logger.info(
|
|
380
415
|
f"{padding(depth)}{LOGGER_PREFIX} expanded concept resolution was identical to search resolution; breaking to avoid recursion error."
|
|
381
416
|
)
|
|
@@ -383,17 +418,19 @@ def gen_merge_node(
|
|
|
383
418
|
return subgraphs_to_merge_node(
|
|
384
419
|
weak_resolve,
|
|
385
420
|
depth=depth,
|
|
386
|
-
all_concepts=
|
|
421
|
+
all_concepts=all_search_concepts,
|
|
387
422
|
environment=environment,
|
|
388
423
|
g=g,
|
|
389
424
|
source_concepts=source_concepts,
|
|
390
425
|
history=history,
|
|
391
426
|
conditions=conditions,
|
|
392
427
|
search_conditions=search_conditions,
|
|
428
|
+
output_concepts=all_concepts,
|
|
393
429
|
)
|
|
430
|
+
|
|
394
431
|
# one concept handling may need to be kicked to alias
|
|
395
|
-
if len(
|
|
396
|
-
concept =
|
|
432
|
+
if len(all_search_concepts) == 1:
|
|
433
|
+
concept = all_search_concepts[0]
|
|
397
434
|
for v in concept.pseudonyms:
|
|
398
435
|
test = subgraphs_to_merge_node(
|
|
399
436
|
[[concept, environment.alias_origin_lookup[v]]],
|
|
@@ -406,6 +443,7 @@ def gen_merge_node(
|
|
|
406
443
|
conditions=conditions,
|
|
407
444
|
enable_early_exit=False,
|
|
408
445
|
search_conditions=search_conditions,
|
|
446
|
+
output_concepts=[concept],
|
|
409
447
|
)
|
|
410
448
|
if test:
|
|
411
449
|
return test
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
|
|
3
3
|
from trilogy.constants import logger
|
|
4
|
-
from trilogy.core.enums import
|
|
5
|
-
from trilogy.core.
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
4
|
+
from trilogy.core.enums import Derivation
|
|
5
|
+
from trilogy.core.exceptions import UnresolvableQueryException
|
|
6
|
+
from trilogy.core.models.author import MultiSelectLineage, SelectLineage
|
|
7
|
+
from trilogy.core.models.build import (
|
|
8
|
+
BuildConcept,
|
|
9
|
+
BuildGrain,
|
|
10
|
+
BuildRowsetItem,
|
|
11
|
+
BuildRowsetLineage,
|
|
12
|
+
BuildWhereClause,
|
|
13
|
+
Factory,
|
|
14
14
|
)
|
|
15
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
15
16
|
from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
|
|
16
17
|
from trilogy.core.processing.utility import concept_to_relevant_joins, padding
|
|
17
18
|
|
|
@@ -19,62 +20,74 @@ LOGGER_PREFIX = "[GEN_ROWSET_NODE]"
|
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
def gen_rowset_node(
|
|
22
|
-
concept:
|
|
23
|
-
local_optional: List[
|
|
24
|
-
environment:
|
|
23
|
+
concept: BuildConcept,
|
|
24
|
+
local_optional: List[BuildConcept],
|
|
25
|
+
environment: BuildEnvironment,
|
|
25
26
|
g,
|
|
26
27
|
depth: int,
|
|
27
28
|
source_concepts,
|
|
28
|
-
history: History
|
|
29
|
-
conditions:
|
|
29
|
+
history: History,
|
|
30
|
+
conditions: BuildWhereClause | None = None,
|
|
30
31
|
) -> StrategyNode | None:
|
|
31
32
|
from trilogy.core.query_processor import get_query_node
|
|
32
33
|
|
|
33
|
-
if not isinstance(concept.lineage,
|
|
34
|
+
if not isinstance(concept.lineage, BuildRowsetItem):
|
|
34
35
|
raise SyntaxError(
|
|
35
|
-
f"Invalid lineage passed into rowset fetch, got {type(concept.lineage)}, expected {
|
|
36
|
+
f"Invalid lineage passed into rowset fetch, got {type(concept.lineage)}, expected {BuildRowsetItem}"
|
|
36
37
|
)
|
|
37
|
-
lineage:
|
|
38
|
-
rowset:
|
|
39
|
-
select:
|
|
38
|
+
lineage: BuildRowsetItem = concept.lineage
|
|
39
|
+
rowset: BuildRowsetLineage = lineage.rowset
|
|
40
|
+
select: SelectLineage | MultiSelectLineage = lineage.rowset.select
|
|
40
41
|
|
|
41
|
-
node = get_query_node(
|
|
42
|
+
node = get_query_node(history.base_environment, select)
|
|
42
43
|
|
|
43
44
|
if not node:
|
|
44
45
|
logger.info(
|
|
45
46
|
f"{padding(depth)}{LOGGER_PREFIX} Cannot generate parent rowset node for {concept}"
|
|
46
47
|
)
|
|
47
|
-
|
|
48
|
+
raise UnresolvableQueryException(
|
|
49
|
+
f"Cannot generate parent select for concept {concept} in rowset {rowset.name}; ensure the rowset is a valid statement."
|
|
50
|
+
)
|
|
48
51
|
enrichment = set([x.address for x in local_optional])
|
|
49
|
-
|
|
50
|
-
|
|
52
|
+
|
|
53
|
+
factory = Factory(environment=history.base_environment, grain=select.grain)
|
|
54
|
+
rowset_relevant: list[BuildConcept] = [
|
|
55
|
+
v
|
|
56
|
+
for v in environment.concepts.values()
|
|
57
|
+
if v.address in lineage.rowset.derived_concepts
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
select_hidden = node.hidden_concepts
|
|
51
61
|
rowset_hidden = [
|
|
52
62
|
x
|
|
53
|
-
for x in
|
|
54
|
-
if
|
|
63
|
+
for x in rowset_relevant
|
|
64
|
+
if x.address in lineage.rowset.derived_concepts
|
|
65
|
+
and isinstance(x.lineage, BuildRowsetItem)
|
|
55
66
|
and x.lineage.content.address in select_hidden
|
|
56
67
|
]
|
|
57
68
|
additional_relevant = [
|
|
58
|
-
x for x in select.output_components if x.address in enrichment
|
|
69
|
+
factory.build(x) for x in select.output_components if x.address in enrichment
|
|
59
70
|
]
|
|
60
71
|
# add in other other concepts
|
|
61
|
-
|
|
62
72
|
node.add_output_concepts(rowset_relevant + additional_relevant)
|
|
63
73
|
if select.where_clause:
|
|
64
74
|
for item in additional_relevant:
|
|
75
|
+
logger.info(
|
|
76
|
+
f"{padding(depth)}{LOGGER_PREFIX} adding {item} to partial concepts"
|
|
77
|
+
)
|
|
65
78
|
node.partial_concepts.append(item)
|
|
66
79
|
|
|
67
80
|
final_hidden = rowset_hidden + [
|
|
68
81
|
x
|
|
69
82
|
for x in node.output_concepts
|
|
70
83
|
if x.address not in local_optional + [concept]
|
|
71
|
-
and x.derivation !=
|
|
84
|
+
and x.derivation != Derivation.ROWSET
|
|
72
85
|
]
|
|
73
86
|
node.hide_output_concepts(final_hidden)
|
|
74
87
|
assert node.resolution_cache
|
|
75
88
|
# assume grain to be output of select
|
|
76
89
|
# but don't include anything hidden(the non-rowset concepts)
|
|
77
|
-
node.grain =
|
|
90
|
+
node.grain = BuildGrain.from_concepts(
|
|
78
91
|
[
|
|
79
92
|
x
|
|
80
93
|
for x in node.output_concepts
|
|
@@ -82,29 +95,35 @@ def gen_rowset_node(
|
|
|
82
95
|
not in [
|
|
83
96
|
y
|
|
84
97
|
for y in node.hidden_concepts
|
|
85
|
-
if
|
|
98
|
+
if y in environment.concepts
|
|
99
|
+
and environment.concepts[y].derivation != Derivation.ROWSET
|
|
86
100
|
]
|
|
87
101
|
],
|
|
88
102
|
)
|
|
89
103
|
|
|
90
104
|
node.rebuild_cache()
|
|
91
|
-
|
|
92
105
|
if not local_optional or all(
|
|
93
|
-
x.address in node.output_concepts
|
|
106
|
+
x.address in node.output_concepts and x.address not in node.partial_concepts
|
|
107
|
+
for x in local_optional
|
|
94
108
|
):
|
|
95
109
|
logger.info(
|
|
96
110
|
f"{padding(depth)}{LOGGER_PREFIX} no enrichment required for rowset node as all optional {[x.address for x in local_optional]} found or no optional; exiting early."
|
|
97
111
|
)
|
|
98
112
|
return node
|
|
113
|
+
remaining = [
|
|
114
|
+
x
|
|
115
|
+
for x in local_optional
|
|
116
|
+
if x not in node.output_concepts or x in node.partial_concepts
|
|
117
|
+
]
|
|
99
118
|
possible_joins = concept_to_relevant_joins(
|
|
100
|
-
[x for x in node.output_concepts if x.derivation !=
|
|
119
|
+
[x for x in node.output_concepts if x.derivation != Derivation.ROWSET]
|
|
101
120
|
)
|
|
102
121
|
if not possible_joins:
|
|
103
122
|
logger.info(
|
|
104
123
|
f"{padding(depth)}{LOGGER_PREFIX} no possible joins for rowset node to get {[x.address for x in local_optional]}; have {[x.address for x in node.output_concepts]}"
|
|
105
124
|
)
|
|
106
125
|
return node
|
|
107
|
-
if any(x.derivation ==
|
|
126
|
+
if any(x.derivation == Derivation.ROWSET for x in possible_joins):
|
|
108
127
|
|
|
109
128
|
logger.info(
|
|
110
129
|
f"{padding(depth)}{LOGGER_PREFIX} cannot enrich rowset node with rowset concepts; exiting early"
|
|
@@ -113,7 +132,7 @@ def gen_rowset_node(
|
|
|
113
132
|
logger.info([x.address for x in possible_joins + local_optional])
|
|
114
133
|
enrich_node: MergeNode = source_concepts( # this fetches the parent + join keys
|
|
115
134
|
# to then connect to the rest of the query
|
|
116
|
-
mandatory_list=possible_joins +
|
|
135
|
+
mandatory_list=possible_joins + remaining,
|
|
117
136
|
environment=environment,
|
|
118
137
|
g=g,
|
|
119
138
|
depth=depth + 1,
|