pytrilogy 0.0.3.92__py3-none-any.whl → 0.0.3.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.92.dist-info → pytrilogy-0.0.3.94.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.3.92.dist-info → pytrilogy-0.0.3.94.dist-info}/RECORD +18 -18
- trilogy/__init__.py +1 -1
- trilogy/core/env_processor.py +4 -2
- trilogy/core/graph_models.py +63 -44
- trilogy/core/models/author.py +17 -26
- trilogy/core/models/build.py +141 -151
- trilogy/core/models/build_environment.py +2 -6
- trilogy/core/models/execute.py +3 -3
- trilogy/core/processing/node_generators/group_node.py +3 -7
- trilogy/core/processing/node_generators/node_merge_node.py +30 -28
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +25 -11
- trilogy/core/processing/node_generators/select_merge_node.py +66 -80
- trilogy/parsing/parse_engine.py +1 -1
- {pytrilogy-0.0.3.92.dist-info → pytrilogy-0.0.3.94.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.92.dist-info → pytrilogy-0.0.3.94.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.92.dist-info → pytrilogy-0.0.3.94.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.92.dist-info → pytrilogy-0.0.3.94.dist-info}/top_level.txt +0 -0
|
@@ -90,9 +90,7 @@ def validate_concepts(v) -> BuildEnvironmentConceptDict:
|
|
|
90
90
|
if isinstance(v, BuildEnvironmentConceptDict):
|
|
91
91
|
return v
|
|
92
92
|
elif isinstance(v, dict):
|
|
93
|
-
return BuildEnvironmentConceptDict(
|
|
94
|
-
**{x: BuildConcept.model_validate(y) for x, y in v.items()}
|
|
95
|
-
)
|
|
93
|
+
return BuildEnvironmentConceptDict(**{x: y for x, y in v.items()})
|
|
96
94
|
raise ValueError
|
|
97
95
|
|
|
98
96
|
|
|
@@ -100,9 +98,7 @@ def validate_datasources(v) -> BuildEnvironmentDatasourceDict:
|
|
|
100
98
|
if isinstance(v, BuildEnvironmentDatasourceDict):
|
|
101
99
|
return v
|
|
102
100
|
elif isinstance(v, dict):
|
|
103
|
-
return BuildEnvironmentDatasourceDict(
|
|
104
|
-
**{x: BuildDatasource.model_validate(y) for x, y in v.items()}
|
|
105
|
-
)
|
|
101
|
+
return BuildEnvironmentDatasourceDict(**{x: y for x, y in v.items()})
|
|
106
102
|
raise ValueError
|
|
107
103
|
|
|
108
104
|
|
trilogy/core/models/execute.py
CHANGED
|
@@ -859,7 +859,7 @@ class RecursiveCTE(CTE):
|
|
|
859
859
|
),
|
|
860
860
|
BuildCaseElse(expr=False),
|
|
861
861
|
],
|
|
862
|
-
|
|
862
|
+
output_data_type=DataType.BOOL,
|
|
863
863
|
output_purpose=Purpose.KEY,
|
|
864
864
|
),
|
|
865
865
|
)
|
|
@@ -884,7 +884,7 @@ class RecursiveCTE(CTE):
|
|
|
884
884
|
),
|
|
885
885
|
BuildCaseElse(expr=False),
|
|
886
886
|
],
|
|
887
|
-
|
|
887
|
+
output_data_type=DataType.BOOL,
|
|
888
888
|
output_purpose=Purpose.KEY,
|
|
889
889
|
),
|
|
890
890
|
)
|
|
@@ -909,7 +909,7 @@ class RecursiveCTE(CTE):
|
|
|
909
909
|
),
|
|
910
910
|
BuildCaseElse(expr=right_recurse_concept),
|
|
911
911
|
],
|
|
912
|
-
|
|
912
|
+
output_data_type=recursive_derived.datatype,
|
|
913
913
|
output_purpose=recursive_derived.purpose,
|
|
914
914
|
),
|
|
915
915
|
)
|
|
@@ -61,13 +61,13 @@ def gen_group_node(
|
|
|
61
61
|
|
|
62
62
|
# if the aggregation has a grain, we need to ensure these are the ONLY optional in the output of the select
|
|
63
63
|
output_concepts = [concept]
|
|
64
|
-
|
|
64
|
+
grain_components = [environment.concepts[c] for c in concept.grain.components]
|
|
65
65
|
if (
|
|
66
66
|
concept.grain
|
|
67
67
|
and len(concept.grain.components) > 0
|
|
68
68
|
and not concept.grain.abstract
|
|
69
69
|
):
|
|
70
|
-
|
|
70
|
+
|
|
71
71
|
parent_concepts += grain_components
|
|
72
72
|
build_grain_parents = get_aggregate_grain(concept, environment)
|
|
73
73
|
output_concepts += grain_components
|
|
@@ -131,10 +131,6 @@ def gen_group_node(
|
|
|
131
131
|
else:
|
|
132
132
|
parents = []
|
|
133
133
|
|
|
134
|
-
# the keys we group by
|
|
135
|
-
# are what we can use for enrichment
|
|
136
|
-
group_key_parents = [environment.concepts[c] for c in concept.grain.components]
|
|
137
|
-
|
|
138
134
|
group_node = GroupNode(
|
|
139
135
|
output_concepts=output_concepts,
|
|
140
136
|
input_concepts=parent_concepts,
|
|
@@ -164,7 +160,7 @@ def gen_group_node(
|
|
|
164
160
|
)
|
|
165
161
|
return gen_enrichment_node(
|
|
166
162
|
group_node,
|
|
167
|
-
join_keys=
|
|
163
|
+
join_keys=grain_components,
|
|
168
164
|
local_optional=local_optional,
|
|
169
165
|
environment=environment,
|
|
170
166
|
g=g,
|
|
@@ -6,7 +6,11 @@ from networkx.algorithms import approximation as ax
|
|
|
6
6
|
from trilogy.constants import logger
|
|
7
7
|
from trilogy.core.enums import Derivation
|
|
8
8
|
from trilogy.core.exceptions import AmbiguousRelationshipResolutionException
|
|
9
|
-
from trilogy.core.graph_models import
|
|
9
|
+
from trilogy.core.graph_models import (
|
|
10
|
+
ReferenceGraph,
|
|
11
|
+
concept_to_node,
|
|
12
|
+
prune_sources_for_conditions,
|
|
13
|
+
)
|
|
10
14
|
from trilogy.core.models.build import BuildConcept, BuildConditional, BuildWhereClause
|
|
11
15
|
from trilogy.core.models.build_environment import BuildEnvironment
|
|
12
16
|
from trilogy.core.processing.nodes import History, MergeNode, StrategyNode
|
|
@@ -17,11 +21,12 @@ LOGGER_PREFIX = "[GEN_MERGE_NODE]"
|
|
|
17
21
|
AMBIGUITY_CHECK_LIMIT = 20
|
|
18
22
|
|
|
19
23
|
|
|
20
|
-
def filter_pseudonyms_for_source(
|
|
24
|
+
def filter_pseudonyms_for_source(
|
|
25
|
+
ds_graph: nx.DiGraph, node: str, pseudonyms: set[tuple[str, str]]
|
|
26
|
+
):
|
|
21
27
|
to_remove = set()
|
|
22
|
-
|
|
23
28
|
for edge in ds_graph.edges:
|
|
24
|
-
if
|
|
29
|
+
if edge in pseudonyms:
|
|
25
30
|
lengths = {}
|
|
26
31
|
for n in edge:
|
|
27
32
|
lengths[n] = nx.shortest_path_length(ds_graph, node, n)
|
|
@@ -52,12 +57,14 @@ def filter_unique_graphs(graphs: list[list[str]]) -> list[list[str]]:
|
|
|
52
57
|
return [list(x) for x in unique_graphs]
|
|
53
58
|
|
|
54
59
|
|
|
55
|
-
def extract_ds_components(
|
|
60
|
+
def extract_ds_components(
|
|
61
|
+
g: nx.DiGraph, nodelist: list[str], pseudonyms: set[tuple[str, str]]
|
|
62
|
+
) -> list[list[str]]:
|
|
56
63
|
graphs = []
|
|
57
64
|
for node in g.nodes:
|
|
58
65
|
if node.startswith("ds~"):
|
|
59
66
|
local = g.copy()
|
|
60
|
-
filter_pseudonyms_for_source(local, node)
|
|
67
|
+
filter_pseudonyms_for_source(local, node, pseudonyms)
|
|
61
68
|
ds_graph: nx.DiGraph = nx.ego_graph(local, node, radius=10).copy()
|
|
62
69
|
graphs.append(
|
|
63
70
|
[
|
|
@@ -78,7 +85,7 @@ def extract_ds_components(g: nx.DiGraph, nodelist: list[str]) -> list[list[str]]
|
|
|
78
85
|
|
|
79
86
|
|
|
80
87
|
def determine_induced_minimal_nodes(
|
|
81
|
-
G:
|
|
88
|
+
G: ReferenceGraph,
|
|
82
89
|
nodelist: list[str],
|
|
83
90
|
environment: BuildEnvironment,
|
|
84
91
|
filter_downstream: bool,
|
|
@@ -86,23 +93,19 @@ def determine_induced_minimal_nodes(
|
|
|
86
93
|
) -> nx.DiGraph | None:
|
|
87
94
|
H: nx.Graph = nx.to_undirected(G).copy()
|
|
88
95
|
nodes_to_remove = []
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
if
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
)
|
|
102
|
-
nodes_to_remove.append(node)
|
|
103
|
-
# purge a node if we're already looking for all it's parents
|
|
104
|
-
if filter_downstream and lookup.derivation not in (Derivation.ROOT,):
|
|
105
|
-
nodes_to_remove.append(node)
|
|
96
|
+
for node, lookup in G.concepts.items():
|
|
97
|
+
# inclusion of aggregates can create ambiguous node relation chains
|
|
98
|
+
# there may be a better way to handle this
|
|
99
|
+
# can be revisited if we need to connect a derived synonym based on an aggregate
|
|
100
|
+
if lookup.derivation in (
|
|
101
|
+
Derivation.CONSTANT,
|
|
102
|
+
Derivation.AGGREGATE,
|
|
103
|
+
Derivation.FILTER,
|
|
104
|
+
):
|
|
105
|
+
nodes_to_remove.append(node)
|
|
106
|
+
# purge a node if we're already looking for all it's parents
|
|
107
|
+
if filter_downstream and lookup.derivation not in (Derivation.ROOT,):
|
|
108
|
+
nodes_to_remove.append(node)
|
|
106
109
|
if nodes_to_remove:
|
|
107
110
|
# logger.debug(f"Removing nodes {nodes_to_remove} from graph")
|
|
108
111
|
H.remove_nodes_from(nodes_to_remove)
|
|
@@ -259,7 +262,7 @@ def filter_duplicate_subgraphs(
|
|
|
259
262
|
def resolve_weak_components(
|
|
260
263
|
all_concepts: List[BuildConcept],
|
|
261
264
|
environment: BuildEnvironment,
|
|
262
|
-
environment_graph:
|
|
265
|
+
environment_graph: ReferenceGraph,
|
|
263
266
|
filter_downstream: bool = True,
|
|
264
267
|
accept_partial: bool = False,
|
|
265
268
|
search_conditions: BuildWhereClause | None = None,
|
|
@@ -316,8 +319,6 @@ def resolve_weak_components(
|
|
|
316
319
|
]
|
|
317
320
|
new = [x for x in all_graph_concepts if x.address not in all_concepts]
|
|
318
321
|
|
|
319
|
-
new_addresses = set([x.address for x in new if x.address not in synonyms])
|
|
320
|
-
|
|
321
322
|
if not new:
|
|
322
323
|
break_flag = True
|
|
323
324
|
# remove our new nodes for the next search path
|
|
@@ -329,6 +330,7 @@ def resolve_weak_components(
|
|
|
329
330
|
# from trilogy.hooks.graph_hook import GraphHook
|
|
330
331
|
# GraphHook().query_graph_built(g, highlight_nodes=[concept_to_node(c.with_default_grain()) for c in all_concepts if "__preql_internal" not in c.address])
|
|
331
332
|
found.append(g)
|
|
333
|
+
new_addresses = set([x.address for x in new if x.address not in synonyms])
|
|
332
334
|
reduced_concept_sets.append(new_addresses)
|
|
333
335
|
|
|
334
336
|
except nx.exception.NetworkXNoPath:
|
|
@@ -346,7 +348,7 @@ def resolve_weak_components(
|
|
|
346
348
|
subgraphs: list[list[BuildConcept]] = []
|
|
347
349
|
# components = nx.strongly_connected_components(g)
|
|
348
350
|
node_list = [x for x in g.nodes if x.startswith("c~")]
|
|
349
|
-
components = extract_ds_components(g, node_list)
|
|
351
|
+
components = extract_ds_components(g, node_list, environment_graph.pseudonyms)
|
|
350
352
|
logger.debug(f"Extracted components {components} from {node_list}")
|
|
351
353
|
for component in components:
|
|
352
354
|
# we need to take unique again as different addresses may map to the same concept
|
|
@@ -128,17 +128,32 @@ def simplify_conditions(
|
|
|
128
128
|
for condition in conditions:
|
|
129
129
|
if not isinstance(condition, BuildComparison):
|
|
130
130
|
return False
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
if
|
|
136
|
-
|
|
131
|
+
left_is_concept = False
|
|
132
|
+
left_is_reducable = False
|
|
133
|
+
right_is_concept = False
|
|
134
|
+
right_is_reducable = False
|
|
135
|
+
if isinstance(condition.left, BuildConcept):
|
|
136
|
+
left_is_concept = True
|
|
137
|
+
elif isinstance(condition.left, REDUCABLE_TYPES):
|
|
138
|
+
left_is_reducable = True
|
|
139
|
+
|
|
140
|
+
if isinstance(condition.right, BuildConcept):
|
|
141
|
+
right_is_concept = True
|
|
142
|
+
elif isinstance(condition.right, REDUCABLE_TYPES):
|
|
143
|
+
right_is_reducable = True
|
|
144
|
+
|
|
145
|
+
if not (
|
|
146
|
+
(left_is_concept and right_is_reducable)
|
|
147
|
+
or (right_is_concept and left_is_reducable)
|
|
137
148
|
):
|
|
138
149
|
return False
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
150
|
+
if left_is_concept:
|
|
151
|
+
concept = condition.left
|
|
152
|
+
raw_comparison = condition.right
|
|
153
|
+
else:
|
|
154
|
+
concept = condition.right
|
|
155
|
+
raw_comparison = condition.left
|
|
156
|
+
|
|
142
157
|
if isinstance(raw_comparison, BuildFunction):
|
|
143
158
|
if not raw_comparison.operator == FunctionType.CONSTANT:
|
|
144
159
|
return False
|
|
@@ -154,7 +169,7 @@ def simplify_conditions(
|
|
|
154
169
|
if not isinstance(comparison, REDUCABLE_TYPES):
|
|
155
170
|
return False
|
|
156
171
|
|
|
157
|
-
var = concept
|
|
172
|
+
var: BuildConcept = concept # type: ignore
|
|
158
173
|
op = condition.operator
|
|
159
174
|
grouped[var].append((op, comparison))
|
|
160
175
|
|
|
@@ -240,7 +255,6 @@ def get_union_sources(
|
|
|
240
255
|
assocs[merge_key.address].append(x)
|
|
241
256
|
final: list[list[BuildDatasource]] = []
|
|
242
257
|
for _, dses in assocs.items():
|
|
243
|
-
|
|
244
258
|
conditions = [c.non_partial_for.conditional for c in dses if c.non_partial_for]
|
|
245
259
|
if simplify_conditions(conditions):
|
|
246
260
|
final.append(dses)
|
|
@@ -6,6 +6,7 @@ import networkx as nx
|
|
|
6
6
|
from trilogy.constants import logger
|
|
7
7
|
from trilogy.core.enums import Derivation
|
|
8
8
|
from trilogy.core.graph_models import (
|
|
9
|
+
ReferenceGraph,
|
|
9
10
|
concept_to_node,
|
|
10
11
|
get_graph_exact_match,
|
|
11
12
|
prune_sources_for_conditions,
|
|
@@ -41,77 +42,68 @@ def extract_address(node: str):
|
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
def get_graph_partial_nodes(
|
|
44
|
-
g:
|
|
45
|
+
g: ReferenceGraph, conditions: BuildWhereClause | None
|
|
45
46
|
) -> dict[str, list[str]]:
|
|
46
|
-
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
47
|
-
nx.get_node_attributes(g, "datasource")
|
|
48
|
-
)
|
|
49
47
|
partial: dict[str, list[str]] = {}
|
|
50
|
-
for node in g.
|
|
51
|
-
if node in datasources:
|
|
52
|
-
ds = datasources[node]
|
|
53
|
-
if not isinstance(ds, list):
|
|
54
|
-
|
|
55
|
-
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
56
|
-
partial[node] = []
|
|
57
|
-
continue
|
|
58
|
-
partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
|
|
59
|
-
ds = [ds]
|
|
60
|
-
# assume union sources have no partial
|
|
61
|
-
else:
|
|
62
|
-
partial[node] = []
|
|
48
|
+
for node, ds in g.datasources.items():
|
|
63
49
|
|
|
50
|
+
if not isinstance(ds, list):
|
|
51
|
+
|
|
52
|
+
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
53
|
+
partial[node] = []
|
|
54
|
+
continue
|
|
55
|
+
partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
|
|
56
|
+
# assume union sources have no partial
|
|
57
|
+
else:
|
|
58
|
+
partial[node] = []
|
|
64
59
|
return partial
|
|
65
60
|
|
|
66
61
|
|
|
67
|
-
def get_graph_grains(g:
|
|
68
|
-
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
69
|
-
nx.get_node_attributes(g, "datasource")
|
|
70
|
-
)
|
|
62
|
+
def get_graph_grains(g: ReferenceGraph) -> dict[str, list[str]]:
|
|
71
63
|
grain_length: dict[str, list[str]] = {}
|
|
72
|
-
for node in g.
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
)
|
|
64
|
+
for node, lookup in g.datasources.items():
|
|
65
|
+
|
|
66
|
+
base: set[str] = set()
|
|
67
|
+
if not isinstance(lookup, list):
|
|
68
|
+
flookup = [lookup]
|
|
69
|
+
else:
|
|
70
|
+
flookup = lookup
|
|
71
|
+
assert isinstance(flookup, list)
|
|
72
|
+
grain_length[node] = reduce(
|
|
73
|
+
lambda x, y: x.union(y.grain.components), flookup, base # type: ignore
|
|
74
|
+
)
|
|
82
75
|
return grain_length
|
|
83
76
|
|
|
84
77
|
|
|
85
78
|
def subgraph_is_complete(
|
|
86
79
|
nodes: list[str], targets: set[str], mapping: dict[str, str], g: nx.DiGraph
|
|
87
80
|
) -> bool:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
if not
|
|
81
|
+
# Check if all targets are present in mapped nodes
|
|
82
|
+
mapped = {mapping.get(n, n) for n in nodes}
|
|
83
|
+
if not targets.issubset(mapped):
|
|
91
84
|
# logger.info(
|
|
92
85
|
# f"Subgraph {nodes} is not complete, missing targets {targets} - mapped {mapped}"
|
|
93
86
|
# )
|
|
94
87
|
return False
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
return all(has_ds_edge.values()) and passed
|
|
88
|
+
|
|
89
|
+
# Check if at least one concept node has a datasource edge
|
|
90
|
+
has_ds_edge = {target: False for target in targets}
|
|
91
|
+
|
|
92
|
+
for node in nodes:
|
|
93
|
+
if node.startswith("c~"):
|
|
94
|
+
mapped_node = mapping.get(node, node)
|
|
95
|
+
if mapped_node in targets and not has_ds_edge[mapped_node]:
|
|
96
|
+
# Only check neighbors if we haven't found a ds edge for this mapped node yet
|
|
97
|
+
if any(
|
|
98
|
+
neighbor.startswith("ds~") for neighbor in nx.neighbors(g, node)
|
|
99
|
+
):
|
|
100
|
+
has_ds_edge[mapped_node] = True
|
|
101
|
+
|
|
102
|
+
return all(has_ds_edge.values())
|
|
111
103
|
|
|
112
104
|
|
|
113
105
|
def create_pruned_concept_graph(
|
|
114
|
-
g:
|
|
106
|
+
g: ReferenceGraph,
|
|
115
107
|
all_concepts: List[BuildConcept],
|
|
116
108
|
datasources: list[BuildDatasource],
|
|
117
109
|
accept_partial: bool = False,
|
|
@@ -133,14 +125,13 @@ def create_pruned_concept_graph(
|
|
|
133
125
|
)
|
|
134
126
|
g.add_node(node_address, datasource=ds_list)
|
|
135
127
|
for c in common:
|
|
136
|
-
|
|
137
|
-
g.add_edge(
|
|
128
|
+
cnode = concept_to_node(c)
|
|
129
|
+
g.add_edge(node_address, cnode)
|
|
130
|
+
g.add_edge(cnode, node_address)
|
|
138
131
|
prune_sources_for_conditions(g, accept_partial, conditions)
|
|
139
132
|
target_addresses = set([c.address for c in all_concepts])
|
|
140
|
-
concepts: dict[str, BuildConcept] =
|
|
141
|
-
datasource_map: dict[str, BuildDatasource
|
|
142
|
-
nx.get_node_attributes(orig_g, "datasource")
|
|
143
|
-
)
|
|
133
|
+
concepts: dict[str, BuildConcept] = orig_g.concepts
|
|
134
|
+
datasource_map: dict[str, BuildDatasource] = orig_g.datasources
|
|
144
135
|
relevant_concepts_pre = {
|
|
145
136
|
n: x.address
|
|
146
137
|
for n in g.nodes()
|
|
@@ -170,31 +161,27 @@ def create_pruned_concept_graph(
|
|
|
170
161
|
for edge in to_remove:
|
|
171
162
|
g.remove_edge(*edge)
|
|
172
163
|
|
|
173
|
-
for n in g.
|
|
174
|
-
if
|
|
175
|
-
continue
|
|
176
|
-
actual_neighbors = [
|
|
177
|
-
x for x in relevant_concepts if x in (nx.all_neighbors(g, n))
|
|
178
|
-
]
|
|
179
|
-
if actual_neighbors:
|
|
164
|
+
for n in g.datasources:
|
|
165
|
+
if any([[n, x] in g.edges for x in relevant_concepts]):
|
|
180
166
|
relevent_datasets.append(n)
|
|
167
|
+
continue
|
|
181
168
|
|
|
182
169
|
# for injecting extra join concepts that are shared between datasets
|
|
183
170
|
# use the original graph, pre-partial pruning
|
|
184
|
-
for n in orig_g.
|
|
171
|
+
for n in orig_g.concepts:
|
|
185
172
|
# readd ignoring grain
|
|
186
173
|
# we want to join inclusive of all concepts
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
174
|
+
if n not in relevant_concepts:
|
|
175
|
+
n_neighbors = nx.all_neighbors(orig_g, n)
|
|
176
|
+
# check if the irrelevant concept is a join between
|
|
177
|
+
# two relevant datasets
|
|
178
|
+
neighbors = set()
|
|
179
|
+
for neighbor in n_neighbors:
|
|
192
180
|
if neighbor in relevent_datasets:
|
|
193
181
|
neighbors.add(neighbor)
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
182
|
+
if len(neighbors) > 1:
|
|
183
|
+
relevant_concepts.append(n)
|
|
184
|
+
continue
|
|
198
185
|
g.remove_nodes_from(
|
|
199
186
|
[
|
|
200
187
|
n
|
|
@@ -238,7 +225,7 @@ def create_pruned_concept_graph(
|
|
|
238
225
|
|
|
239
226
|
|
|
240
227
|
def resolve_subgraphs(
|
|
241
|
-
g:
|
|
228
|
+
g: ReferenceGraph,
|
|
242
229
|
relevant: list[BuildConcept],
|
|
243
230
|
accept_partial: bool,
|
|
244
231
|
conditions: BuildWhereClause | None,
|
|
@@ -261,7 +248,7 @@ def resolve_subgraphs(
|
|
|
261
248
|
partial_map = get_graph_partial_nodes(g, conditions)
|
|
262
249
|
exact_map = get_graph_exact_match(g, accept_partial, conditions)
|
|
263
250
|
grain_length = get_graph_grains(g)
|
|
264
|
-
concepts: dict[str, BuildConcept] =
|
|
251
|
+
concepts: dict[str, BuildConcept] = g.concepts
|
|
265
252
|
non_partial_map = {
|
|
266
253
|
ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
|
|
267
254
|
for ds in datasources
|
|
@@ -460,7 +447,7 @@ def create_select_node(
|
|
|
460
447
|
ds_name: str,
|
|
461
448
|
subgraph: list[str],
|
|
462
449
|
accept_partial: bool,
|
|
463
|
-
g,
|
|
450
|
+
g: ReferenceGraph,
|
|
464
451
|
environment: BuildEnvironment,
|
|
465
452
|
depth: int,
|
|
466
453
|
conditions: BuildWhereClause | None = None,
|
|
@@ -486,9 +473,7 @@ def create_select_node(
|
|
|
486
473
|
preexisting_conditions=conditions.conditional if conditions else None,
|
|
487
474
|
)
|
|
488
475
|
|
|
489
|
-
datasource:
|
|
490
|
-
nx.get_node_attributes(g, "datasource")[ds_name]
|
|
491
|
-
)
|
|
476
|
+
datasource: BuildDatasource = g.datasources[ds_name]
|
|
492
477
|
if isinstance(datasource, BuildDatasource):
|
|
493
478
|
bcandidate, force_group = create_datasource_node(
|
|
494
479
|
datasource,
|
|
@@ -581,6 +566,7 @@ def gen_select_merge_node(
|
|
|
581
566
|
logger.info(
|
|
582
567
|
f"{padding(depth)}{LOGGER_PREFIX} searching for root source graph for concepts {[c.address for c in all_concepts]} and conditions {conditions}"
|
|
583
568
|
)
|
|
569
|
+
pruned_concept_graph = None
|
|
584
570
|
for attempt in attempts:
|
|
585
571
|
pruned_concept_graph = create_pruned_concept_graph(
|
|
586
572
|
g,
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -244,7 +244,7 @@ def unwrap_transformation(
|
|
|
244
244
|
elif isinstance(input, Parenthetical):
|
|
245
245
|
return unwrap_transformation(input.content, environment)
|
|
246
246
|
else:
|
|
247
|
-
return Function(
|
|
247
|
+
return Function.model_construct(
|
|
248
248
|
operator=FunctionType.CONSTANT,
|
|
249
249
|
output_datatype=arg_to_datatype(input),
|
|
250
250
|
output_purpose=Purpose.CONSTANT,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|