pytrilogy 0.0.3.93__py3-none-any.whl → 0.0.3.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.94.dist-info}/METADATA +1 -1
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.94.dist-info}/RECORD +15 -16
- trilogy/__init__.py +1 -1
- trilogy/core/env_processor.py +4 -2
- trilogy/core/graph_models.py +63 -44
- trilogy/core/models/author.py +16 -25
- trilogy/core/models/build.py +5 -4
- trilogy/core/processing/node_generators/node_merge_node.py +30 -28
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +25 -11
- trilogy/core/processing/node_generators/select_merge_node.py +66 -80
- trilogy/parsing/parse_engine.py +1 -1
- trilogy/core/processing/node_generators/select_merge_node_v2.py +0 -792
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.94.dist-info}/WHEEL +0 -0
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.94.dist-info}/entry_points.txt +0 -0
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.94.dist-info}/licenses/LICENSE.md +0 -0
- {pytrilogy-0.0.3.93.dist-info → pytrilogy-0.0.3.94.dist-info}/top_level.txt +0 -0
|
@@ -6,6 +6,7 @@ import networkx as nx
|
|
|
6
6
|
from trilogy.constants import logger
|
|
7
7
|
from trilogy.core.enums import Derivation
|
|
8
8
|
from trilogy.core.graph_models import (
|
|
9
|
+
ReferenceGraph,
|
|
9
10
|
concept_to_node,
|
|
10
11
|
get_graph_exact_match,
|
|
11
12
|
prune_sources_for_conditions,
|
|
@@ -41,77 +42,68 @@ def extract_address(node: str):
|
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
def get_graph_partial_nodes(
|
|
44
|
-
g:
|
|
45
|
+
g: ReferenceGraph, conditions: BuildWhereClause | None
|
|
45
46
|
) -> dict[str, list[str]]:
|
|
46
|
-
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
47
|
-
nx.get_node_attributes(g, "datasource")
|
|
48
|
-
)
|
|
49
47
|
partial: dict[str, list[str]] = {}
|
|
50
|
-
for node in g.
|
|
51
|
-
if node in datasources:
|
|
52
|
-
ds = datasources[node]
|
|
53
|
-
if not isinstance(ds, list):
|
|
54
|
-
|
|
55
|
-
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
56
|
-
partial[node] = []
|
|
57
|
-
continue
|
|
58
|
-
partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
|
|
59
|
-
ds = [ds]
|
|
60
|
-
# assume union sources have no partial
|
|
61
|
-
else:
|
|
62
|
-
partial[node] = []
|
|
48
|
+
for node, ds in g.datasources.items():
|
|
63
49
|
|
|
50
|
+
if not isinstance(ds, list):
|
|
51
|
+
|
|
52
|
+
if ds.non_partial_for and conditions == ds.non_partial_for:
|
|
53
|
+
partial[node] = []
|
|
54
|
+
continue
|
|
55
|
+
partial[node] = [concept_to_node(c) for c in ds.partial_concepts]
|
|
56
|
+
# assume union sources have no partial
|
|
57
|
+
else:
|
|
58
|
+
partial[node] = []
|
|
64
59
|
return partial
|
|
65
60
|
|
|
66
61
|
|
|
67
|
-
def get_graph_grains(g:
|
|
68
|
-
datasources: dict[str, BuildDatasource | list[BuildDatasource]] = (
|
|
69
|
-
nx.get_node_attributes(g, "datasource")
|
|
70
|
-
)
|
|
62
|
+
def get_graph_grains(g: ReferenceGraph) -> dict[str, list[str]]:
|
|
71
63
|
grain_length: dict[str, list[str]] = {}
|
|
72
|
-
for node in g.
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
)
|
|
64
|
+
for node, lookup in g.datasources.items():
|
|
65
|
+
|
|
66
|
+
base: set[str] = set()
|
|
67
|
+
if not isinstance(lookup, list):
|
|
68
|
+
flookup = [lookup]
|
|
69
|
+
else:
|
|
70
|
+
flookup = lookup
|
|
71
|
+
assert isinstance(flookup, list)
|
|
72
|
+
grain_length[node] = reduce(
|
|
73
|
+
lambda x, y: x.union(y.grain.components), flookup, base # type: ignore
|
|
74
|
+
)
|
|
82
75
|
return grain_length
|
|
83
76
|
|
|
84
77
|
|
|
85
78
|
def subgraph_is_complete(
|
|
86
79
|
nodes: list[str], targets: set[str], mapping: dict[str, str], g: nx.DiGraph
|
|
87
80
|
) -> bool:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
if not
|
|
81
|
+
# Check if all targets are present in mapped nodes
|
|
82
|
+
mapped = {mapping.get(n, n) for n in nodes}
|
|
83
|
+
if not targets.issubset(mapped):
|
|
91
84
|
# logger.info(
|
|
92
85
|
# f"Subgraph {nodes} is not complete, missing targets {targets} - mapped {mapped}"
|
|
93
86
|
# )
|
|
94
87
|
return False
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
return all(has_ds_edge.values()) and passed
|
|
88
|
+
|
|
89
|
+
# Check if at least one concept node has a datasource edge
|
|
90
|
+
has_ds_edge = {target: False for target in targets}
|
|
91
|
+
|
|
92
|
+
for node in nodes:
|
|
93
|
+
if node.startswith("c~"):
|
|
94
|
+
mapped_node = mapping.get(node, node)
|
|
95
|
+
if mapped_node in targets and not has_ds_edge[mapped_node]:
|
|
96
|
+
# Only check neighbors if we haven't found a ds edge for this mapped node yet
|
|
97
|
+
if any(
|
|
98
|
+
neighbor.startswith("ds~") for neighbor in nx.neighbors(g, node)
|
|
99
|
+
):
|
|
100
|
+
has_ds_edge[mapped_node] = True
|
|
101
|
+
|
|
102
|
+
return all(has_ds_edge.values())
|
|
111
103
|
|
|
112
104
|
|
|
113
105
|
def create_pruned_concept_graph(
|
|
114
|
-
g:
|
|
106
|
+
g: ReferenceGraph,
|
|
115
107
|
all_concepts: List[BuildConcept],
|
|
116
108
|
datasources: list[BuildDatasource],
|
|
117
109
|
accept_partial: bool = False,
|
|
@@ -133,14 +125,13 @@ def create_pruned_concept_graph(
|
|
|
133
125
|
)
|
|
134
126
|
g.add_node(node_address, datasource=ds_list)
|
|
135
127
|
for c in common:
|
|
136
|
-
|
|
137
|
-
g.add_edge(
|
|
128
|
+
cnode = concept_to_node(c)
|
|
129
|
+
g.add_edge(node_address, cnode)
|
|
130
|
+
g.add_edge(cnode, node_address)
|
|
138
131
|
prune_sources_for_conditions(g, accept_partial, conditions)
|
|
139
132
|
target_addresses = set([c.address for c in all_concepts])
|
|
140
|
-
concepts: dict[str, BuildConcept] =
|
|
141
|
-
datasource_map: dict[str, BuildDatasource
|
|
142
|
-
nx.get_node_attributes(orig_g, "datasource")
|
|
143
|
-
)
|
|
133
|
+
concepts: dict[str, BuildConcept] = orig_g.concepts
|
|
134
|
+
datasource_map: dict[str, BuildDatasource] = orig_g.datasources
|
|
144
135
|
relevant_concepts_pre = {
|
|
145
136
|
n: x.address
|
|
146
137
|
for n in g.nodes()
|
|
@@ -170,31 +161,27 @@ def create_pruned_concept_graph(
|
|
|
170
161
|
for edge in to_remove:
|
|
171
162
|
g.remove_edge(*edge)
|
|
172
163
|
|
|
173
|
-
for n in g.
|
|
174
|
-
if
|
|
175
|
-
continue
|
|
176
|
-
actual_neighbors = [
|
|
177
|
-
x for x in relevant_concepts if x in (nx.all_neighbors(g, n))
|
|
178
|
-
]
|
|
179
|
-
if actual_neighbors:
|
|
164
|
+
for n in g.datasources:
|
|
165
|
+
if any([[n, x] in g.edges for x in relevant_concepts]):
|
|
180
166
|
relevent_datasets.append(n)
|
|
167
|
+
continue
|
|
181
168
|
|
|
182
169
|
# for injecting extra join concepts that are shared between datasets
|
|
183
170
|
# use the original graph, pre-partial pruning
|
|
184
|
-
for n in orig_g.
|
|
171
|
+
for n in orig_g.concepts:
|
|
185
172
|
# readd ignoring grain
|
|
186
173
|
# we want to join inclusive of all concepts
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
174
|
+
if n not in relevant_concepts:
|
|
175
|
+
n_neighbors = nx.all_neighbors(orig_g, n)
|
|
176
|
+
# check if the irrelevant concept is a join between
|
|
177
|
+
# two relevant datasets
|
|
178
|
+
neighbors = set()
|
|
179
|
+
for neighbor in n_neighbors:
|
|
192
180
|
if neighbor in relevent_datasets:
|
|
193
181
|
neighbors.add(neighbor)
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
182
|
+
if len(neighbors) > 1:
|
|
183
|
+
relevant_concepts.append(n)
|
|
184
|
+
continue
|
|
198
185
|
g.remove_nodes_from(
|
|
199
186
|
[
|
|
200
187
|
n
|
|
@@ -238,7 +225,7 @@ def create_pruned_concept_graph(
|
|
|
238
225
|
|
|
239
226
|
|
|
240
227
|
def resolve_subgraphs(
|
|
241
|
-
g:
|
|
228
|
+
g: ReferenceGraph,
|
|
242
229
|
relevant: list[BuildConcept],
|
|
243
230
|
accept_partial: bool,
|
|
244
231
|
conditions: BuildWhereClause | None,
|
|
@@ -261,7 +248,7 @@ def resolve_subgraphs(
|
|
|
261
248
|
partial_map = get_graph_partial_nodes(g, conditions)
|
|
262
249
|
exact_map = get_graph_exact_match(g, accept_partial, conditions)
|
|
263
250
|
grain_length = get_graph_grains(g)
|
|
264
|
-
concepts: dict[str, BuildConcept] =
|
|
251
|
+
concepts: dict[str, BuildConcept] = g.concepts
|
|
265
252
|
non_partial_map = {
|
|
266
253
|
ds: [concepts[c].address for c in subgraphs[ds] if c not in partial_map[ds]]
|
|
267
254
|
for ds in datasources
|
|
@@ -460,7 +447,7 @@ def create_select_node(
|
|
|
460
447
|
ds_name: str,
|
|
461
448
|
subgraph: list[str],
|
|
462
449
|
accept_partial: bool,
|
|
463
|
-
g,
|
|
450
|
+
g: ReferenceGraph,
|
|
464
451
|
environment: BuildEnvironment,
|
|
465
452
|
depth: int,
|
|
466
453
|
conditions: BuildWhereClause | None = None,
|
|
@@ -486,9 +473,7 @@ def create_select_node(
|
|
|
486
473
|
preexisting_conditions=conditions.conditional if conditions else None,
|
|
487
474
|
)
|
|
488
475
|
|
|
489
|
-
datasource:
|
|
490
|
-
nx.get_node_attributes(g, "datasource")[ds_name]
|
|
491
|
-
)
|
|
476
|
+
datasource: BuildDatasource = g.datasources[ds_name]
|
|
492
477
|
if isinstance(datasource, BuildDatasource):
|
|
493
478
|
bcandidate, force_group = create_datasource_node(
|
|
494
479
|
datasource,
|
|
@@ -581,6 +566,7 @@ def gen_select_merge_node(
|
|
|
581
566
|
logger.info(
|
|
582
567
|
f"{padding(depth)}{LOGGER_PREFIX} searching for root source graph for concepts {[c.address for c in all_concepts]} and conditions {conditions}"
|
|
583
568
|
)
|
|
569
|
+
pruned_concept_graph = None
|
|
584
570
|
for attempt in attempts:
|
|
585
571
|
pruned_concept_graph = create_pruned_concept_graph(
|
|
586
572
|
g,
|
trilogy/parsing/parse_engine.py
CHANGED
|
@@ -244,7 +244,7 @@ def unwrap_transformation(
|
|
|
244
244
|
elif isinstance(input, Parenthetical):
|
|
245
245
|
return unwrap_transformation(input.content, environment)
|
|
246
246
|
else:
|
|
247
|
-
return Function(
|
|
247
|
+
return Function.model_construct(
|
|
248
248
|
operator=FunctionType.CONSTANT,
|
|
249
249
|
output_datatype=arg_to_datatype(input),
|
|
250
250
|
output_purpose=Purpose.CONSTANT,
|