pytrilogy 0.0.2.58__py3-none-any.whl → 0.0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/METADATA +9 -2
- pytrilogy-0.0.3.1.dist-info/RECORD +99 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/WHEEL +1 -1
- trilogy/__init__.py +2 -2
- trilogy/core/enums.py +1 -7
- trilogy/core/env_processor.py +17 -5
- trilogy/core/environment_helpers.py +11 -25
- trilogy/core/exceptions.py +4 -0
- trilogy/core/functions.py +695 -261
- trilogy/core/graph_models.py +10 -10
- trilogy/core/internal.py +11 -2
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2110 -0
- trilogy/core/models/build.py +1859 -0
- trilogy/core/models/build_environment.py +151 -0
- trilogy/core/models/core.py +370 -0
- trilogy/core/models/datasource.py +297 -0
- trilogy/core/models/environment.py +701 -0
- trilogy/core/models/execute.py +931 -0
- trilogy/core/optimization.py +14 -16
- trilogy/core/optimizations/base_optimization.py +1 -1
- trilogy/core/optimizations/inline_constant.py +6 -6
- trilogy/core/optimizations/inline_datasource.py +17 -11
- trilogy/core/optimizations/predicate_pushdown.py +17 -16
- trilogy/core/processing/concept_strategies_v3.py +178 -145
- trilogy/core/processing/graph_utils.py +1 -1
- trilogy/core/processing/node_generators/basic_node.py +19 -18
- trilogy/core/processing/node_generators/common.py +50 -44
- trilogy/core/processing/node_generators/filter_node.py +26 -13
- trilogy/core/processing/node_generators/group_node.py +26 -21
- trilogy/core/processing/node_generators/group_to_node.py +11 -8
- trilogy/core/processing/node_generators/multiselect_node.py +60 -43
- trilogy/core/processing/node_generators/node_merge_node.py +76 -38
- trilogy/core/processing/node_generators/rowset_node.py +55 -36
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +27 -34
- trilogy/core/processing/node_generators/select_merge_node.py +161 -64
- trilogy/core/processing/node_generators/select_node.py +13 -13
- trilogy/core/processing/node_generators/union_node.py +12 -11
- trilogy/core/processing/node_generators/unnest_node.py +9 -7
- trilogy/core/processing/node_generators/window_node.py +18 -16
- trilogy/core/processing/nodes/__init__.py +21 -18
- trilogy/core/processing/nodes/base_node.py +82 -66
- trilogy/core/processing/nodes/filter_node.py +19 -13
- trilogy/core/processing/nodes/group_node.py +50 -35
- trilogy/core/processing/nodes/merge_node.py +45 -36
- trilogy/core/processing/nodes/select_node_v2.py +53 -39
- trilogy/core/processing/nodes/union_node.py +5 -7
- trilogy/core/processing/nodes/unnest_node.py +7 -11
- trilogy/core/processing/nodes/window_node.py +9 -4
- trilogy/core/processing/utility.py +103 -75
- trilogy/core/query_processor.py +70 -47
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +413 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +30 -0
- trilogy/core/statements/execute.py +42 -0
- trilogy/dialect/base.py +148 -106
- trilogy/dialect/common.py +9 -10
- trilogy/dialect/duckdb.py +1 -1
- trilogy/dialect/enums.py +4 -2
- trilogy/dialect/presto.py +1 -1
- trilogy/dialect/sql_server.py +1 -1
- trilogy/executor.py +44 -32
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +6 -4
- trilogy/hooks/query_debugger.py +113 -97
- trilogy/parser.py +1 -1
- trilogy/parsing/common.py +307 -64
- trilogy/parsing/parse_engine.py +277 -618
- trilogy/parsing/render.py +50 -26
- trilogy/scripts/trilogy.py +2 -1
- pytrilogy-0.0.2.58.dist-info/RECORD +0 -87
- trilogy/core/models.py +0 -4960
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/LICENSE.md +0 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.2.58.dist-info → pytrilogy-0.0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -2,24 +2,23 @@ from collections import defaultdict
|
|
|
2
2
|
from datetime import date, datetime, timedelta
|
|
3
3
|
from typing import List, Tuple, TypeVar
|
|
4
4
|
|
|
5
|
-
from trilogy.core.enums import ComparisonOperator
|
|
6
|
-
from trilogy.core.models import (
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
FunctionType,
|
|
14
|
-
Parenthetical,
|
|
5
|
+
from trilogy.core.enums import ComparisonOperator, FunctionType
|
|
6
|
+
from trilogy.core.models.build import (
|
|
7
|
+
BuildComparison,
|
|
8
|
+
BuildConcept,
|
|
9
|
+
BuildConditional,
|
|
10
|
+
BuildDatasource,
|
|
11
|
+
BuildFunction,
|
|
12
|
+
BuildParenthetical,
|
|
15
13
|
)
|
|
14
|
+
from trilogy.core.models.core import DataType
|
|
16
15
|
|
|
17
16
|
# Define a generic type that ensures start and end are the same type
|
|
18
17
|
T = TypeVar("T", int, date, datetime)
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
def reduce_expression(
|
|
22
|
-
var:
|
|
21
|
+
var: BuildConcept, group_tuple: list[tuple[ComparisonOperator, T]]
|
|
23
22
|
) -> bool:
|
|
24
23
|
# Track ranges
|
|
25
24
|
lower_check: T
|
|
@@ -95,27 +94,27 @@ def reduce_expression(
|
|
|
95
94
|
|
|
96
95
|
|
|
97
96
|
def simplify_conditions(
|
|
98
|
-
conditions: list[
|
|
97
|
+
conditions: list[BuildComparison | BuildConditional | BuildParenthetical],
|
|
99
98
|
) -> bool:
|
|
100
99
|
# Group conditions by variable
|
|
101
|
-
grouped: dict[
|
|
102
|
-
|
|
103
|
-
)
|
|
100
|
+
grouped: dict[
|
|
101
|
+
BuildConcept, list[tuple[ComparisonOperator, datetime | int | date]]
|
|
102
|
+
] = defaultdict(list)
|
|
104
103
|
for condition in conditions:
|
|
105
|
-
if not isinstance(condition,
|
|
104
|
+
if not isinstance(condition, BuildComparison):
|
|
106
105
|
return False
|
|
107
106
|
if not isinstance(
|
|
108
|
-
condition.left, (int, date, datetime,
|
|
109
|
-
) and not isinstance(condition.right, (int, date, datetime,
|
|
107
|
+
condition.left, (int, date, datetime, BuildFunction)
|
|
108
|
+
) and not isinstance(condition.right, (int, date, datetime, BuildFunction)):
|
|
110
109
|
return False
|
|
111
|
-
if not isinstance(condition.left,
|
|
112
|
-
condition.right,
|
|
110
|
+
if not isinstance(condition.left, BuildConcept) and not isinstance(
|
|
111
|
+
condition.right, BuildConcept
|
|
113
112
|
):
|
|
114
113
|
return False
|
|
115
114
|
vars = [condition.left, condition.right]
|
|
116
|
-
concept = [x for x in vars if isinstance(x,
|
|
117
|
-
comparison = [x for x in vars if not isinstance(x,
|
|
118
|
-
if isinstance(comparison,
|
|
115
|
+
concept = [x for x in vars if isinstance(x, BuildConcept)][0]
|
|
116
|
+
comparison = [x for x in vars if not isinstance(x, BuildConcept)][0]
|
|
117
|
+
if isinstance(comparison, BuildFunction):
|
|
119
118
|
if not comparison.operator == FunctionType.CONSTANT:
|
|
120
119
|
return False
|
|
121
120
|
first_arg = comparison.arguments[0]
|
|
@@ -159,26 +158,19 @@ def is_fully_covered(
|
|
|
159
158
|
|
|
160
159
|
# Check for gaps
|
|
161
160
|
current_end = start
|
|
162
|
-
print(ranges)
|
|
163
161
|
for r_start, r_end in ranges:
|
|
164
|
-
print(r_start, r_end)
|
|
165
162
|
# If there's a gap between the current range and the previous coverage
|
|
166
|
-
print(r_start - current_end)
|
|
167
163
|
if (r_start - current_end) > increment: # type: ignore
|
|
168
|
-
print("gap")
|
|
169
164
|
return False
|
|
170
|
-
print("okay")
|
|
171
165
|
# Extend the current coverage
|
|
172
166
|
current_end = max(current_end, r_end)
|
|
173
167
|
|
|
174
168
|
# If the loop ends and we haven't reached the end, return False
|
|
175
|
-
print(current_end, end)
|
|
176
|
-
print(current_end >= end)
|
|
177
169
|
return current_end >= end
|
|
178
170
|
|
|
179
171
|
|
|
180
|
-
def get_union_sources(datasources: list[
|
|
181
|
-
candidates: list[
|
|
172
|
+
def get_union_sources(datasources: list[BuildDatasource], concepts: list[BuildConcept]):
|
|
173
|
+
candidates: list[BuildDatasource] = []
|
|
182
174
|
for x in datasources:
|
|
183
175
|
if all([c.address in x.output_concepts for c in concepts]):
|
|
184
176
|
if (
|
|
@@ -187,7 +179,7 @@ def get_union_sources(datasources: list[Datasource], concepts: list[Concept]):
|
|
|
187
179
|
):
|
|
188
180
|
candidates.append(x)
|
|
189
181
|
|
|
190
|
-
assocs: dict[str, list[
|
|
182
|
+
assocs: dict[str, list[BuildDatasource]] = defaultdict(list[BuildDatasource])
|
|
191
183
|
for x in candidates:
|
|
192
184
|
if not x.non_partial_for:
|
|
193
185
|
continue
|
|
@@ -195,8 +187,9 @@ def get_union_sources(datasources: list[Datasource], concepts: list[Concept]):
|
|
|
195
187
|
continue
|
|
196
188
|
merge_key = x.non_partial_for.concept_arguments[0]
|
|
197
189
|
assocs[merge_key.address].append(x)
|
|
198
|
-
final: list[list[
|
|
190
|
+
final: list[list[BuildDatasource]] = []
|
|
199
191
|
for _, dses in assocs.items():
|
|
192
|
+
|
|
200
193
|
conditions = [c.non_partial_for.conditional for c in dses if c.non_partial_for]
|
|
201
194
|
if simplify_conditions(conditions):
|
|
202
195
|
final.append(dses)
|
|
@@ -1,18 +1,19 @@
|
|
|
1
|
+
from functools import reduce
|
|
1
2
|
from typing import List, Optional
|
|
2
3
|
|
|
3
4
|
import networkx as nx
|
|
4
5
|
|
|
5
6
|
from trilogy.constants import logger
|
|
6
|
-
from trilogy.core.enums import
|
|
7
|
+
from trilogy.core.enums import Derivation
|
|
7
8
|
from trilogy.core.graph_models import concept_to_node
|
|
8
|
-
from trilogy.core.models import (
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
WhereClause,
|
|
9
|
+
from trilogy.core.models.build import (
|
|
10
|
+
BuildConcept,
|
|
11
|
+
BuildDatasource,
|
|
12
|
+
BuildGrain,
|
|
13
|
+
BuildWhereClause,
|
|
14
|
+
LooseBuildConceptList,
|
|
15
15
|
)
|
|
16
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
16
17
|
from trilogy.core.processing.node_generators.select_helpers.datasource_injection import (
|
|
17
18
|
get_union_sources,
|
|
18
19
|
)
|
|
@@ -33,16 +34,17 @@ def extract_address(node: str):
|
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
def get_graph_partial_nodes(
|
|
36
|
-
g: nx.DiGraph, conditions:
|
|
37
|
+
g: nx.DiGraph, conditions: BuildWhereClause | None
|
|
37
38
|
) -> dict[str, list[str]]:
|
|
38
|
-
datasources: dict[str,
|
|
39
|
-
g, "datasource"
|
|
39
|
+
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
40
|
+
nx.get_node_attributes(g, "datasource")
|
|
40
41
|
)
|
|
41
42
|
partial: dict[str, list[str]] = {}
|
|
42
43
|
for node in g.nodes:
|
|
43
44
|
if node in datasources:
|
|
44
45
|
ds = datasources[node]
|
|
45
46
|
if not isinstance(ds, list):
|
|
47
|
+
|
|
46
48
|
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
47
49
|
partial[node] = []
|
|
48
50
|
continue
|
|
@@ -55,36 +57,58 @@ def get_graph_partial_nodes(
|
|
|
55
57
|
return partial
|
|
56
58
|
|
|
57
59
|
|
|
58
|
-
def
|
|
59
|
-
|
|
60
|
-
|
|
60
|
+
def get_graph_exact_match(
|
|
61
|
+
g: nx.DiGraph, conditions: BuildWhereClause | None
|
|
62
|
+
) -> set[str]:
|
|
63
|
+
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
64
|
+
nx.get_node_attributes(g, "datasource")
|
|
61
65
|
)
|
|
62
|
-
|
|
66
|
+
exact: set[str] = set()
|
|
63
67
|
for node in g.nodes:
|
|
64
68
|
if node in datasources:
|
|
69
|
+
ds = datasources[node]
|
|
70
|
+
if not isinstance(ds, list):
|
|
71
|
+
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
72
|
+
exact.add(node)
|
|
73
|
+
continue
|
|
74
|
+
else:
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
return exact
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_graph_grains(g: nx.DiGraph) -> dict[str, list[str]]:
|
|
81
|
+
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
82
|
+
nx.get_node_attributes(g, "datasource")
|
|
83
|
+
)
|
|
84
|
+
grain_length: dict[str, list[str]] = {}
|
|
85
|
+
for node in g.nodes:
|
|
86
|
+
if node in datasources:
|
|
87
|
+
base: set[str] = set()
|
|
65
88
|
lookup = datasources[node]
|
|
66
89
|
if not isinstance(lookup, list):
|
|
67
90
|
lookup = [lookup]
|
|
68
91
|
assert isinstance(lookup, list)
|
|
69
|
-
grain_length[node] =
|
|
92
|
+
grain_length[node] = reduce(
|
|
93
|
+
lambda x, y: x.union(y.grain.components), lookup, base # type: ignore
|
|
94
|
+
)
|
|
70
95
|
return grain_length
|
|
71
96
|
|
|
72
97
|
|
|
73
98
|
def create_pruned_concept_graph(
|
|
74
99
|
g: nx.DiGraph,
|
|
75
|
-
all_concepts: List[
|
|
76
|
-
datasources: list[
|
|
100
|
+
all_concepts: List[BuildConcept],
|
|
101
|
+
datasources: list[BuildDatasource],
|
|
77
102
|
accept_partial: bool = False,
|
|
78
|
-
conditions:
|
|
103
|
+
conditions: BuildWhereClause | None = None,
|
|
79
104
|
depth: int = 0,
|
|
80
105
|
) -> nx.DiGraph:
|
|
81
106
|
orig_g = g
|
|
82
107
|
g = g.copy()
|
|
83
|
-
|
|
84
108
|
union_options = get_union_sources(datasources, all_concepts)
|
|
85
109
|
for ds_list in union_options:
|
|
86
110
|
node_address = "ds~" + "-".join([x.name for x in ds_list])
|
|
87
|
-
common: set[
|
|
111
|
+
common: set[BuildConcept] = set.intersection(
|
|
88
112
|
*[set(x.output_concepts) for x in ds_list]
|
|
89
113
|
)
|
|
90
114
|
g.add_node(node_address, datasource=ds_list)
|
|
@@ -92,9 +116,9 @@ def create_pruned_concept_graph(
|
|
|
92
116
|
g.add_edge(node_address, concept_to_node(c))
|
|
93
117
|
|
|
94
118
|
target_addresses = set([c.address for c in all_concepts])
|
|
95
|
-
concepts: dict[str,
|
|
96
|
-
datasource_map: dict[str,
|
|
97
|
-
orig_g, "datasource"
|
|
119
|
+
concepts: dict[str, BuildConcept] = nx.get_node_attributes(orig_g, "concept")
|
|
120
|
+
datasource_map: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
121
|
+
nx.get_node_attributes(orig_g, "datasource")
|
|
98
122
|
)
|
|
99
123
|
relevant_concepts_pre = {
|
|
100
124
|
n: x.address
|
|
@@ -149,6 +173,7 @@ def create_pruned_concept_graph(
|
|
|
149
173
|
if len(neighbors) > 1:
|
|
150
174
|
relevant_concepts.append(n)
|
|
151
175
|
roots[root] = set()
|
|
176
|
+
|
|
152
177
|
g.remove_nodes_from(
|
|
153
178
|
[
|
|
154
179
|
n
|
|
@@ -163,6 +188,8 @@ def create_pruned_concept_graph(
|
|
|
163
188
|
f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - no subgraphs after node prune"
|
|
164
189
|
)
|
|
165
190
|
return None
|
|
191
|
+
# from trilogy.hooks.graph_hook import GraphHook
|
|
192
|
+
# GraphHook().query_graph_built(g)
|
|
166
193
|
if subgraphs and len(subgraphs) != 1:
|
|
167
194
|
logger.info(
|
|
168
195
|
f"{padding(depth)}{LOGGER_PREFIX} cannot resolve root graph - subgraphs are split - have {len(subgraphs)} from {subgraphs}"
|
|
@@ -183,15 +210,26 @@ def create_pruned_concept_graph(
|
|
|
183
210
|
|
|
184
211
|
|
|
185
212
|
def resolve_subgraphs(
|
|
186
|
-
g: nx.DiGraph, conditions:
|
|
213
|
+
g: nx.DiGraph, relevant: list[BuildConcept], conditions: BuildWhereClause | None
|
|
187
214
|
) -> dict[str, list[str]]:
|
|
215
|
+
"""When we have multiple distinct subgraphs within our matched
|
|
216
|
+
nodes that can satisfy a query, resolve which one of those we should
|
|
217
|
+
ultimately ues.
|
|
218
|
+
This should generally return one subgraph for each
|
|
219
|
+
unique set of sub concepts that can be referenced,
|
|
220
|
+
discarding duplicates.
|
|
221
|
+
Duplicate subgraphs will be resolved based on which
|
|
222
|
+
ones are most 'optimal' to use, a hueristic
|
|
223
|
+
that can evolve in the future but is currently based on
|
|
224
|
+
cardinality."""
|
|
188
225
|
datasources = [n for n in g.nodes if n.startswith("ds~")]
|
|
189
226
|
subgraphs: dict[str, list[str]] = {
|
|
190
227
|
ds: list(set(list(nx.all_neighbors(g, ds)))) for ds in datasources
|
|
191
228
|
}
|
|
192
229
|
partial_map = get_graph_partial_nodes(g, conditions)
|
|
193
|
-
|
|
194
|
-
|
|
230
|
+
exact_map = get_graph_exact_match(g, conditions)
|
|
231
|
+
grain_length = get_graph_grains(g)
|
|
232
|
+
concepts: dict[str, BuildConcept] = nx.get_node_attributes(g, "concept")
|
|
195
233
|
non_partial_map = {
|
|
196
234
|
ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
|
|
197
235
|
for ds in datasources
|
|
@@ -200,7 +238,27 @@ def resolve_subgraphs(
|
|
|
200
238
|
ds: [concepts[c].address for c in subgraphs[ds]] for ds in datasources
|
|
201
239
|
}
|
|
202
240
|
pruned_subgraphs = {}
|
|
241
|
+
|
|
242
|
+
def score_node(input: str):
|
|
243
|
+
logger.debug(f"scoring node {input}")
|
|
244
|
+
grain = grain_length[input]
|
|
245
|
+
# first - go for lowest grain
|
|
246
|
+
# but if the object we want is in the grain, treat that as "free"
|
|
247
|
+
# ex - pick source with grain(product_id) over grain(order_id)
|
|
248
|
+
# when going for product_id
|
|
249
|
+
score = (
|
|
250
|
+
len(list(grain)) - sum([1 for x in concept_map[input] if x in grain]),
|
|
251
|
+
# then check if it's an exact condition match
|
|
252
|
+
0 if input in exact_map else 0.5,
|
|
253
|
+
# last, number of concepts
|
|
254
|
+
len(subgraphs[input]),
|
|
255
|
+
input,
|
|
256
|
+
)
|
|
257
|
+
logger.debug(score)
|
|
258
|
+
return score
|
|
259
|
+
|
|
203
260
|
for key, nodes in subgraphs.items():
|
|
261
|
+
|
|
204
262
|
value = non_partial_map[key]
|
|
205
263
|
all_concepts = concept_map[key]
|
|
206
264
|
is_subset = False
|
|
@@ -219,31 +277,66 @@ def resolve_subgraphs(
|
|
|
219
277
|
logger.debug(
|
|
220
278
|
f"Dropping subgraph {key} with {value} as it is a subset of {other_key} with {other_value}"
|
|
221
279
|
)
|
|
222
|
-
break
|
|
223
280
|
elif len(value) == len(other_value) and len(all_concepts) == len(
|
|
224
281
|
other_all_concepts
|
|
225
282
|
):
|
|
226
283
|
matches.add(other_key)
|
|
227
284
|
matches.add(key)
|
|
228
|
-
if matches:
|
|
229
|
-
|
|
285
|
+
if matches and not is_subset:
|
|
286
|
+
min_node = min(matches, key=score_node)
|
|
287
|
+
logger.debug(f"minimum source score is {min_node}")
|
|
288
|
+
is_subset = key is not min(matches, key=score_node)
|
|
230
289
|
if not is_subset:
|
|
231
290
|
pruned_subgraphs[key] = nodes
|
|
291
|
+
|
|
292
|
+
final_nodes: set[str] = set([n for v in pruned_subgraphs.values() for n in v])
|
|
293
|
+
relevant_concepts_pre = {
|
|
294
|
+
n: x.address
|
|
295
|
+
for n in g.nodes()
|
|
296
|
+
# filter out synonyms
|
|
297
|
+
if (x := concepts.get(n, None)) and x.address in relevant
|
|
298
|
+
}
|
|
299
|
+
for node in final_nodes:
|
|
300
|
+
keep = True
|
|
301
|
+
if node.startswith("c~") and node not in relevant_concepts_pre:
|
|
302
|
+
keep = (
|
|
303
|
+
sum(
|
|
304
|
+
[
|
|
305
|
+
1 if node in sub_nodes else 0
|
|
306
|
+
for _, sub_nodes in pruned_subgraphs.items()
|
|
307
|
+
]
|
|
308
|
+
)
|
|
309
|
+
> 1
|
|
310
|
+
)
|
|
311
|
+
if not keep:
|
|
312
|
+
logger.debug(f"Pruning node {node} as irrelevant after subgraph resolution")
|
|
313
|
+
pruned_subgraphs = {
|
|
314
|
+
k: [n for n in v if n != node] for k, v in pruned_subgraphs.items()
|
|
315
|
+
}
|
|
316
|
+
|
|
232
317
|
return pruned_subgraphs
|
|
233
318
|
|
|
234
319
|
|
|
235
320
|
def create_datasource_node(
|
|
236
|
-
datasource:
|
|
237
|
-
all_concepts: List[
|
|
321
|
+
datasource: BuildDatasource,
|
|
322
|
+
all_concepts: List[BuildConcept],
|
|
238
323
|
accept_partial: bool,
|
|
239
|
-
environment:
|
|
324
|
+
environment: BuildEnvironment,
|
|
240
325
|
depth: int,
|
|
241
|
-
conditions:
|
|
326
|
+
conditions: BuildWhereClause | None = None,
|
|
242
327
|
) -> tuple[StrategyNode, bool]:
|
|
243
|
-
|
|
328
|
+
logger.info(all_concepts)
|
|
329
|
+
target_grain = BuildGrain.from_concepts(all_concepts, environment=environment)
|
|
244
330
|
force_group = False
|
|
245
331
|
if not datasource.grain.issubset(target_grain):
|
|
332
|
+
logger.info(
|
|
333
|
+
f"{padding(depth)}{LOGGER_PREFIX}_DS_NODE Select node must be wrapped in group, {datasource.grain} not subset of target grain {target_grain}"
|
|
334
|
+
)
|
|
246
335
|
force_group = True
|
|
336
|
+
else:
|
|
337
|
+
logger.info(
|
|
338
|
+
f"{padding(depth)}{LOGGER_PREFIX}_DS_NODE Select node grain {datasource.grain} is subset of target grain {target_grain}, no group required"
|
|
339
|
+
)
|
|
247
340
|
if not datasource.grain.components:
|
|
248
341
|
force_group = True
|
|
249
342
|
partial_concepts = [
|
|
@@ -251,14 +344,17 @@ def create_datasource_node(
|
|
|
251
344
|
for c in datasource.columns
|
|
252
345
|
if not c.is_complete and c.concept.address in all_concepts
|
|
253
346
|
]
|
|
254
|
-
partial_lcl =
|
|
347
|
+
partial_lcl = LooseBuildConceptList(concepts=partial_concepts)
|
|
255
348
|
nullable_concepts = [
|
|
256
349
|
c.concept
|
|
257
350
|
for c in datasource.columns
|
|
258
351
|
if c.is_nullable and c.concept.address in all_concepts
|
|
259
352
|
]
|
|
260
|
-
nullable_lcl =
|
|
353
|
+
nullable_lcl = LooseBuildConceptList(concepts=nullable_concepts)
|
|
261
354
|
partial_is_full = conditions and (conditions == datasource.non_partial_for)
|
|
355
|
+
|
|
356
|
+
datasource_conditions = datasource.where.conditional if datasource.where else None
|
|
357
|
+
|
|
262
358
|
return (
|
|
263
359
|
SelectNode(
|
|
264
360
|
input_concepts=[c.concept for c in datasource.columns],
|
|
@@ -272,8 +368,8 @@ def create_datasource_node(
|
|
|
272
368
|
nullable_concepts=[c for c in all_concepts if c in nullable_lcl],
|
|
273
369
|
accept_partial=accept_partial,
|
|
274
370
|
datasource=datasource,
|
|
275
|
-
grain=
|
|
276
|
-
conditions=
|
|
371
|
+
grain=datasource.grain,
|
|
372
|
+
conditions=datasource_conditions,
|
|
277
373
|
preexisting_conditions=(
|
|
278
374
|
conditions.conditional if partial_is_full and conditions else None
|
|
279
375
|
),
|
|
@@ -287,16 +383,16 @@ def create_select_node(
|
|
|
287
383
|
subgraph: list[str],
|
|
288
384
|
accept_partial: bool,
|
|
289
385
|
g,
|
|
290
|
-
environment:
|
|
386
|
+
environment: BuildEnvironment,
|
|
291
387
|
depth: int,
|
|
292
|
-
conditions:
|
|
388
|
+
conditions: BuildWhereClause | None = None,
|
|
293
389
|
) -> StrategyNode:
|
|
294
390
|
|
|
295
391
|
all_concepts = [
|
|
296
392
|
environment.concepts[extract_address(c)] for c in subgraph if c.startswith("c~")
|
|
297
393
|
]
|
|
298
394
|
|
|
299
|
-
if all([c.derivation ==
|
|
395
|
+
if all([c.derivation == Derivation.CONSTANT for c in all_concepts]):
|
|
300
396
|
logger.info(
|
|
301
397
|
f"{padding(depth)}{LOGGER_PREFIX} All concepts {[x.address for x in all_concepts]} are constants, returning constant node"
|
|
302
398
|
)
|
|
@@ -311,10 +407,10 @@ def create_select_node(
|
|
|
311
407
|
force_group=False,
|
|
312
408
|
)
|
|
313
409
|
|
|
314
|
-
datasource: dict[str,
|
|
315
|
-
g, "datasource"
|
|
316
|
-
)
|
|
317
|
-
if isinstance(datasource,
|
|
410
|
+
datasource: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
411
|
+
nx.get_node_attributes(g, "datasource")[ds_name]
|
|
412
|
+
)
|
|
413
|
+
if isinstance(datasource, BuildDatasource):
|
|
318
414
|
bcandidate, force_group = create_datasource_node(
|
|
319
415
|
datasource,
|
|
320
416
|
all_concepts,
|
|
@@ -325,6 +421,9 @@ def create_select_node(
|
|
|
325
421
|
)
|
|
326
422
|
|
|
327
423
|
elif isinstance(datasource, list):
|
|
424
|
+
logger.info(
|
|
425
|
+
f"{padding(depth)}{LOGGER_PREFIX} generating union node parents with condition {conditions}"
|
|
426
|
+
)
|
|
328
427
|
from trilogy.core.processing.nodes.union_node import UnionNode
|
|
329
428
|
|
|
330
429
|
force_group = False
|
|
@@ -340,6 +439,7 @@ def create_select_node(
|
|
|
340
439
|
)
|
|
341
440
|
parents.append(subnode)
|
|
342
441
|
force_group = force_group or fg
|
|
442
|
+
logger.info(f"{padding(depth)}{LOGGER_PREFIX} generating union node")
|
|
343
443
|
bcandidate = UnionNode(
|
|
344
444
|
output_concepts=all_concepts,
|
|
345
445
|
input_concepts=all_concepts,
|
|
@@ -373,15 +473,15 @@ def create_select_node(
|
|
|
373
473
|
|
|
374
474
|
|
|
375
475
|
def gen_select_merge_node(
|
|
376
|
-
all_concepts: List[
|
|
476
|
+
all_concepts: List[BuildConcept],
|
|
377
477
|
g: nx.DiGraph,
|
|
378
|
-
environment:
|
|
478
|
+
environment: BuildEnvironment,
|
|
379
479
|
depth: int,
|
|
380
480
|
accept_partial: bool = False,
|
|
381
|
-
conditions:
|
|
481
|
+
conditions: BuildWhereClause | None = None,
|
|
382
482
|
) -> Optional[StrategyNode]:
|
|
383
|
-
non_constant = [c for c in all_concepts if c.derivation !=
|
|
384
|
-
constants = [c for c in all_concepts if c.derivation ==
|
|
483
|
+
non_constant = [c for c in all_concepts if c.derivation != Derivation.CONSTANT]
|
|
484
|
+
constants = [c for c in all_concepts if c.derivation == Derivation.CONSTANT]
|
|
385
485
|
if not non_constant and constants:
|
|
386
486
|
return ConstantNode(
|
|
387
487
|
output_concepts=constants,
|
|
@@ -398,7 +498,7 @@ def gen_select_merge_node(
|
|
|
398
498
|
non_constant,
|
|
399
499
|
accept_partial=attempt,
|
|
400
500
|
conditions=conditions,
|
|
401
|
-
datasources=list(environment.datasources.values()),
|
|
501
|
+
datasources=list([x for x in environment.datasources.values()]),
|
|
402
502
|
depth=depth,
|
|
403
503
|
)
|
|
404
504
|
if pruned_concept_graph:
|
|
@@ -411,9 +511,12 @@ def gen_select_merge_node(
|
|
|
411
511
|
logger.info(f"{padding(depth)}{LOGGER_PREFIX} no covering graph found.")
|
|
412
512
|
return None
|
|
413
513
|
|
|
414
|
-
sub_nodes = resolve_subgraphs(
|
|
514
|
+
sub_nodes = resolve_subgraphs(
|
|
515
|
+
pruned_concept_graph, relevant=non_constant, conditions=conditions
|
|
516
|
+
)
|
|
415
517
|
|
|
416
518
|
logger.info(f"{padding(depth)}{LOGGER_PREFIX} fetching subgraphs {sub_nodes}")
|
|
519
|
+
|
|
417
520
|
parents = [
|
|
418
521
|
create_select_node(
|
|
419
522
|
k,
|
|
@@ -444,6 +547,10 @@ def gen_select_merge_node(
|
|
|
444
547
|
|
|
445
548
|
if len(parents) == 1:
|
|
446
549
|
return parents[0]
|
|
550
|
+
logger.info(
|
|
551
|
+
f"{padding(depth)}{LOGGER_PREFIX} Multiple parent DS nodes resolved - {[type(x) for x in parents]}, wrapping in merge"
|
|
552
|
+
)
|
|
553
|
+
|
|
447
554
|
preexisting_conditions = None
|
|
448
555
|
if conditions and all(
|
|
449
556
|
[
|
|
@@ -461,15 +568,5 @@ def gen_select_merge_node(
|
|
|
461
568
|
parents=parents,
|
|
462
569
|
preexisting_conditions=preexisting_conditions,
|
|
463
570
|
)
|
|
464
|
-
|
|
465
|
-
if not base.resolve().grain.issubset(target_grain):
|
|
466
|
-
return GroupNode(
|
|
467
|
-
output_concepts=all_concepts,
|
|
468
|
-
input_concepts=all_concepts,
|
|
469
|
-
environment=environment,
|
|
470
|
-
parents=[base],
|
|
471
|
-
depth=depth,
|
|
472
|
-
preexisting_conditions=preexisting_conditions,
|
|
473
|
-
partial_concepts=base.partial_concepts,
|
|
474
|
-
)
|
|
571
|
+
|
|
475
572
|
return base
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from trilogy.constants import logger
|
|
2
|
-
from trilogy.core.enums import
|
|
2
|
+
from trilogy.core.enums import Derivation
|
|
3
3
|
from trilogy.core.exceptions import NoDatasourceException
|
|
4
|
-
from trilogy.core.models import (
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
WhereClause,
|
|
4
|
+
from trilogy.core.models.build import (
|
|
5
|
+
BuildConcept,
|
|
6
|
+
BuildWhereClause,
|
|
7
|
+
LooseBuildConceptList,
|
|
9
8
|
)
|
|
9
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
10
10
|
from trilogy.core.processing.node_generators.select_merge_node import (
|
|
11
11
|
gen_select_merge_node,
|
|
12
12
|
)
|
|
@@ -19,23 +19,23 @@ LOGGER_PREFIX = "[GEN_SELECT_NODE]"
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def gen_select_node(
|
|
22
|
-
concept:
|
|
23
|
-
local_optional: list[
|
|
24
|
-
environment:
|
|
22
|
+
concept: BuildConcept,
|
|
23
|
+
local_optional: list[BuildConcept],
|
|
24
|
+
environment: BuildEnvironment,
|
|
25
25
|
g,
|
|
26
26
|
depth: int,
|
|
27
27
|
accept_partial: bool = False,
|
|
28
28
|
fail_if_not_found: bool = True,
|
|
29
|
-
conditions:
|
|
29
|
+
conditions: BuildWhereClause | None = None,
|
|
30
30
|
) -> StrategyNode | None:
|
|
31
31
|
all_concepts = [concept] + local_optional
|
|
32
|
-
all_lcl =
|
|
33
|
-
materialized_lcl =
|
|
32
|
+
all_lcl = LooseBuildConceptList(concepts=all_concepts)
|
|
33
|
+
materialized_lcl = LooseBuildConceptList(
|
|
34
34
|
concepts=[
|
|
35
35
|
x
|
|
36
36
|
for x in all_concepts
|
|
37
37
|
if x.address in environment.materialized_concepts
|
|
38
|
-
or x.derivation ==
|
|
38
|
+
or x.derivation == Derivation.CONSTANT
|
|
39
39
|
]
|
|
40
40
|
)
|
|
41
41
|
if materialized_lcl != all_lcl:
|
|
@@ -2,26 +2,29 @@ from typing import List
|
|
|
2
2
|
|
|
3
3
|
from trilogy.constants import logger
|
|
4
4
|
from trilogy.core.enums import FunctionType, Purpose
|
|
5
|
-
from trilogy.core.models import
|
|
5
|
+
from trilogy.core.models.build import BuildConcept, BuildFunction, BuildWhereClause
|
|
6
6
|
from trilogy.core.processing.nodes import History, StrategyNode, UnionNode
|
|
7
7
|
from trilogy.core.processing.utility import padding
|
|
8
8
|
|
|
9
9
|
LOGGER_PREFIX = "[GEN_UNION_NODE]"
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def is_union(c:
|
|
13
|
-
return
|
|
12
|
+
def is_union(c: BuildConcept):
|
|
13
|
+
return (
|
|
14
|
+
isinstance(c.lineage, BuildFunction)
|
|
15
|
+
and c.lineage.operator == FunctionType.UNION
|
|
16
|
+
)
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
def gen_union_node(
|
|
17
|
-
concept:
|
|
18
|
-
local_optional: List[
|
|
20
|
+
concept: BuildConcept,
|
|
21
|
+
local_optional: List[BuildConcept],
|
|
19
22
|
environment,
|
|
20
23
|
g,
|
|
21
24
|
depth: int,
|
|
22
25
|
source_concepts,
|
|
23
26
|
history: History | None = None,
|
|
24
|
-
conditions:
|
|
27
|
+
conditions: BuildWhereClause | None = None,
|
|
25
28
|
) -> StrategyNode | None:
|
|
26
29
|
all_unions = [x for x in local_optional if is_union(x)] + [concept]
|
|
27
30
|
|
|
@@ -30,15 +33,13 @@ def gen_union_node(
|
|
|
30
33
|
base = keys.pop()
|
|
31
34
|
remaining = [x for x in all_unions if x.address != base.address]
|
|
32
35
|
arguments = []
|
|
33
|
-
if isinstance(base.lineage,
|
|
36
|
+
if isinstance(base.lineage, BuildFunction):
|
|
34
37
|
arguments = base.lineage.concept_arguments
|
|
35
38
|
for arg in arguments:
|
|
36
|
-
relevant_parents: list[
|
|
39
|
+
relevant_parents: list[BuildConcept] = []
|
|
37
40
|
for other_union in remaining:
|
|
38
41
|
assert other_union.lineage
|
|
39
|
-
potential_parents = [
|
|
40
|
-
z for z in other_union.lineage.arguments if isinstance(z, Concept)
|
|
41
|
-
]
|
|
42
|
+
potential_parents = [z for z in other_union.lineage.concept_arguments]
|
|
42
43
|
relevant_parents += [
|
|
43
44
|
x for x in potential_parents if x.keys and arg.address in x.keys
|
|
44
45
|
]
|