napistu 0.2.5.dev7__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +126 -96
- napistu/constants.py +35 -41
- napistu/context/__init__.py +10 -0
- napistu/context/discretize.py +462 -0
- napistu/context/filtering.py +387 -0
- napistu/gcs/__init__.py +1 -1
- napistu/identifiers.py +74 -15
- napistu/indices.py +68 -0
- napistu/ingestion/__init__.py +1 -1
- napistu/ingestion/bigg.py +47 -62
- napistu/ingestion/constants.py +18 -133
- napistu/ingestion/gtex.py +113 -0
- napistu/ingestion/hpa.py +147 -0
- napistu/ingestion/sbml.py +0 -97
- napistu/ingestion/string.py +2 -2
- napistu/matching/__init__.py +10 -0
- napistu/matching/constants.py +18 -0
- napistu/matching/interactions.py +518 -0
- napistu/matching/mount.py +529 -0
- napistu/matching/species.py +510 -0
- napistu/mcp/__init__.py +7 -4
- napistu/mcp/__main__.py +128 -72
- napistu/mcp/client.py +16 -25
- napistu/mcp/codebase.py +201 -145
- napistu/mcp/component_base.py +170 -0
- napistu/mcp/config.py +223 -0
- napistu/mcp/constants.py +45 -2
- napistu/mcp/documentation.py +253 -136
- napistu/mcp/documentation_utils.py +13 -48
- napistu/mcp/execution.py +372 -305
- napistu/mcp/health.py +47 -65
- napistu/mcp/profiles.py +10 -6
- napistu/mcp/server.py +161 -80
- napistu/mcp/tutorials.py +139 -87
- napistu/modify/__init__.py +1 -1
- napistu/modify/gaps.py +1 -1
- napistu/network/__init__.py +1 -1
- napistu/network/constants.py +101 -34
- napistu/network/data_handling.py +388 -0
- napistu/network/ig_utils.py +351 -0
- napistu/network/napistu_graph_core.py +354 -0
- napistu/network/neighborhoods.py +40 -40
- napistu/network/net_create.py +373 -309
- napistu/network/net_propagation.py +47 -19
- napistu/network/{net_utils.py → ng_utils.py} +124 -272
- napistu/network/paths.py +67 -51
- napistu/network/precompute.py +11 -11
- napistu/ontologies/__init__.py +10 -0
- napistu/ontologies/constants.py +129 -0
- napistu/ontologies/dogma.py +243 -0
- napistu/ontologies/genodexito.py +649 -0
- napistu/ontologies/mygene.py +369 -0
- napistu/ontologies/renaming.py +198 -0
- napistu/rpy2/__init__.py +229 -86
- napistu/rpy2/callr.py +47 -77
- napistu/rpy2/constants.py +24 -23
- napistu/rpy2/rids.py +61 -648
- napistu/sbml_dfs_core.py +587 -222
- napistu/scverse/__init__.py +15 -0
- napistu/scverse/constants.py +28 -0
- napistu/scverse/loading.py +727 -0
- napistu/utils.py +118 -10
- {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/METADATA +8 -3
- napistu-0.3.1.dist-info/RECORD +133 -0
- tests/conftest.py +22 -0
- tests/test_context_discretize.py +56 -0
- tests/test_context_filtering.py +267 -0
- tests/test_identifiers.py +100 -0
- tests/test_indices.py +65 -0
- tests/{test_edgelist.py → test_ingestion_napistu_edgelist.py} +2 -2
- tests/test_matching_interactions.py +108 -0
- tests/test_matching_mount.py +305 -0
- tests/test_matching_species.py +394 -0
- tests/test_mcp_config.py +193 -0
- tests/test_mcp_documentation_utils.py +12 -3
- tests/test_mcp_server.py +156 -19
- tests/test_network_data_handling.py +397 -0
- tests/test_network_ig_utils.py +23 -0
- tests/test_network_neighborhoods.py +19 -0
- tests/test_network_net_create.py +459 -0
- tests/test_network_ng_utils.py +30 -0
- tests/test_network_paths.py +56 -0
- tests/{test_precomputed_distances.py → test_network_precompute.py} +8 -6
- tests/test_ontologies_genodexito.py +58 -0
- tests/test_ontologies_mygene.py +39 -0
- tests/test_ontologies_renaming.py +110 -0
- tests/test_rpy2_callr.py +79 -0
- tests/test_rpy2_init.py +151 -0
- tests/test_sbml.py +0 -31
- tests/test_sbml_dfs_core.py +134 -10
- tests/test_scverse_loading.py +778 -0
- tests/test_set_coverage.py +2 -2
- tests/test_utils.py +121 -1
- napistu/mechanism_matching.py +0 -1353
- napistu/rpy2/netcontextr.py +0 -467
- napistu-0.2.5.dev7.dist-info/RECORD +0 -98
- tests/test_igraph.py +0 -367
- tests/test_mechanism_matching.py +0 -784
- tests/test_net_utils.py +0 -149
- tests/test_netcontextr.py +0 -105
- tests/test_rpy2.py +0 -61
- /napistu/ingestion/{cpr_edgelist.py → napistu_edgelist.py} +0 -0
- {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/WHEEL +0 -0
- {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/entry_points.txt +0 -0
- {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/top_level.txt +0 -0
- /tests/{test_obo.py → test_ingestion_obo.py} +0 -0
napistu/network/net_create.py
CHANGED
@@ -13,108 +13,85 @@ from pydantic import BaseModel
|
|
13
13
|
|
14
14
|
from napistu import sbml_dfs_core
|
15
15
|
from napistu import utils
|
16
|
+
from napistu.network.napistu_graph_core import NapistuGraph
|
16
17
|
|
17
|
-
from napistu.constants import DEFAULT_WT_TRANS
|
18
|
-
from napistu.constants import DEFINED_WEIGHT_TRANSFORMATION
|
19
18
|
from napistu.constants import MINI_SBO_FROM_NAME
|
20
19
|
from napistu.constants import MINI_SBO_TO_NAME
|
21
20
|
from napistu.constants import SBML_DFS
|
22
21
|
from napistu.constants import SBO_MODIFIER_NAMES
|
23
|
-
from napistu.constants import SCORE_CALIBRATION_POINTS_DICT
|
24
22
|
from napistu.constants import ENTITIES_W_DATA
|
25
|
-
|
26
|
-
|
27
|
-
from napistu.network.constants import
|
28
|
-
from napistu.network.constants import
|
29
|
-
from napistu.network.constants import
|
30
|
-
from napistu.network.constants import
|
31
|
-
from napistu.network.constants import
|
32
|
-
from napistu.network.constants import CPR_GRAPH_TYPES
|
33
|
-
from napistu.network.constants import CPR_WEIGHTING_STRATEGIES
|
23
|
+
|
24
|
+
from napistu.network.constants import NAPISTU_GRAPH_NODES
|
25
|
+
from napistu.network.constants import NAPISTU_GRAPH_EDGES
|
26
|
+
from napistu.network.constants import NAPISTU_GRAPH_EDGE_DIRECTIONS
|
27
|
+
from napistu.network.constants import NAPISTU_GRAPH_NODE_TYPES
|
28
|
+
from napistu.network.constants import NAPISTU_GRAPH_TYPES
|
29
|
+
from napistu.network.constants import NAPISTU_WEIGHTING_STRATEGIES
|
34
30
|
from napistu.network.constants import SBOTERM_NAMES
|
35
31
|
from napistu.network.constants import REGULATORY_GRAPH_HIERARCHY
|
36
32
|
from napistu.network.constants import SURROGATE_GRAPH_HIERARCHY
|
37
|
-
from napistu.network.constants import
|
33
|
+
from napistu.network.constants import VALID_NAPISTU_GRAPH_TYPES
|
38
34
|
from napistu.network.constants import VALID_WEIGHTING_STRATEGIES
|
35
|
+
from napistu.network.constants import DEFAULT_WT_TRANS
|
36
|
+
from napistu.network.constants import DEFINED_WEIGHT_TRANSFORMATION
|
37
|
+
from napistu.network.constants import SCORE_CALIBRATION_POINTS_DICT
|
38
|
+
from napistu.network.constants import SOURCE_VARS_DICT
|
39
|
+
|
39
40
|
|
40
41
|
logger = logging.getLogger(__name__)
|
41
42
|
|
42
43
|
|
43
|
-
def
|
44
|
+
def create_napistu_graph(
|
44
45
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
45
46
|
reaction_graph_attrs: Optional[dict] = None,
|
46
47
|
directed: bool = True,
|
47
48
|
edge_reversed: bool = False,
|
48
|
-
graph_type: str =
|
49
|
+
graph_type: str = NAPISTU_GRAPH_TYPES.REGULATORY,
|
49
50
|
verbose: bool = False,
|
50
51
|
custom_transformations: Optional[dict] = None,
|
51
|
-
) ->
|
52
|
+
) -> NapistuGraph:
|
52
53
|
"""
|
53
|
-
Create
|
54
|
-
|
55
|
-
Create an igraph network from a mechanistic network using one of a set of graph_types.
|
54
|
+
Create a NapistuGraph network from a mechanistic network using one of a set of graph_types.
|
56
55
|
|
57
56
|
Parameters
|
58
57
|
----------
|
59
|
-
sbml_dfs : SBML_dfs
|
60
|
-
A model formed by aggregating pathways
|
61
|
-
reaction_graph_attrs: dict
|
62
|
-
Dictionary containing attributes to pull out of reaction_data and
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
-
|
71
|
-
-
|
72
|
-
|
73
|
-
|
74
|
-
not modified by a substrate per-se).
|
75
|
-
verbose : bool
|
76
|
-
Extra reporting
|
58
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
59
|
+
A model formed by aggregating pathways.
|
60
|
+
reaction_graph_attrs : dict, optional
|
61
|
+
Dictionary containing attributes to pull out of reaction_data and a weighting scheme for the graph.
|
62
|
+
directed : bool, optional
|
63
|
+
Should a directed (True) or undirected graph be made (False). Default is True.
|
64
|
+
edge_reversed : bool, optional
|
65
|
+
Should the directions of edges be reversed or not (False). Default is False.
|
66
|
+
graph_type : str, optional
|
67
|
+
Type of graph to create. Valid values are:
|
68
|
+
- 'bipartite': substrates and modifiers point to the reaction they drive, this reaction points to products
|
69
|
+
- 'regulatory': non-enzymatic modifiers point to enzymes, enzymes point to substrates and products
|
70
|
+
- 'surrogate': non-enzymatic modifiers -> substrates -> enzymes -> reaction -> products
|
71
|
+
verbose : bool, optional
|
72
|
+
Extra reporting. Default is False.
|
77
73
|
custom_transformations : dict, optional
|
78
74
|
Dictionary of custom transformation functions to use for attribute transformation.
|
79
75
|
|
80
|
-
Returns
|
81
|
-
|
82
|
-
|
76
|
+
Returns
|
77
|
+
-------
|
78
|
+
NapistuGraph
|
79
|
+
A NapistuGraph network (subclass of igraph.Graph).
|
83
80
|
"""
|
84
81
|
|
85
82
|
if reaction_graph_attrs is None:
|
86
83
|
reaction_graph_attrs = {}
|
87
84
|
|
88
|
-
if not
|
89
|
-
raise TypeError(
|
90
|
-
f"sbml_dfs must be a sbml_dfs_core.SBML_dfs, but was {type(sbml_dfs)}"
|
91
|
-
)
|
92
|
-
|
93
|
-
if not isinstance(reaction_graph_attrs, dict):
|
94
|
-
raise TypeError(
|
95
|
-
f"reaction_graph_attrs must be a dict, but was {type(reaction_graph_attrs)}"
|
96
|
-
)
|
97
|
-
|
98
|
-
if not isinstance(directed, bool):
|
99
|
-
raise TypeError(f"directed must be a bool, but was {type(directed)}")
|
100
|
-
|
101
|
-
if not isinstance(edge_reversed, bool):
|
102
|
-
raise TypeError(f"edge_reverse must be a bool, but was {type(edge_reversed)}")
|
103
|
-
|
104
|
-
if not isinstance(graph_type, str):
|
105
|
-
raise TypeError(f"graph_type must be a str, but was {type(verbose)}")
|
106
|
-
|
107
|
-
if graph_type not in VALID_CPR_GRAPH_TYPES:
|
85
|
+
if graph_type not in VALID_NAPISTU_GRAPH_TYPES:
|
108
86
|
raise ValueError(
|
109
|
-
f"graph_type is not a valid value ({graph_type}), valid values are {','.join(
|
87
|
+
f"graph_type is not a valid value ({graph_type}), valid values are {','.join(VALID_NAPISTU_GRAPH_TYPES)}"
|
110
88
|
)
|
111
89
|
|
112
|
-
if not
|
113
|
-
raise TypeError(f"verbose must be a bool, but was {type(verbose)}")
|
114
|
-
|
115
|
-
# fail fast in reaction_graph_attrs is not properly formatted
|
90
|
+
# fail fast if reaction_graph_attrs is not properly formatted
|
116
91
|
for k in reaction_graph_attrs.keys():
|
117
|
-
_validate_entity_attrs(
|
92
|
+
_validate_entity_attrs(
|
93
|
+
reaction_graph_attrs[k], custom_transformations=custom_transformations
|
94
|
+
)
|
118
95
|
|
119
96
|
working_sbml_dfs = copy.deepcopy(sbml_dfs)
|
120
97
|
reaction_species_counts = working_sbml_dfs.reaction_species.value_counts(
|
@@ -149,26 +126,26 @@ def create_cpr_graph(
|
|
149
126
|
[SBML_DFS.SC_ID, SBML_DFS.SC_NAME]
|
150
127
|
]
|
151
128
|
.rename(columns={SBML_DFS.SC_ID: "node_id", SBML_DFS.SC_NAME: "node_name"})
|
152
|
-
.assign(node_type=
|
129
|
+
.assign(node_type=NAPISTU_GRAPH_NODE_TYPES.SPECIES)
|
153
130
|
)
|
154
131
|
network_nodes.append(
|
155
132
|
working_sbml_dfs.reactions.reset_index()[[SBML_DFS.R_ID, SBML_DFS.R_NAME]]
|
156
133
|
.rename(columns={SBML_DFS.R_ID: "node_id", SBML_DFS.R_NAME: "node_name"})
|
157
|
-
.assign(node_type=
|
134
|
+
.assign(node_type=NAPISTU_GRAPH_NODE_TYPES.REACTION)
|
158
135
|
)
|
159
136
|
|
160
137
|
# rename nodes to name since it is treated specially
|
161
138
|
network_nodes_df = pd.concat(network_nodes).rename(
|
162
|
-
columns={"node_id":
|
139
|
+
columns={"node_id": NAPISTU_GRAPH_NODES.NAME}
|
163
140
|
)
|
164
141
|
|
165
142
|
logger.info(f"Formatting edges as a {graph_type} graph")
|
166
143
|
|
167
|
-
if graph_type ==
|
168
|
-
network_edges =
|
169
|
-
elif graph_type in [
|
144
|
+
if graph_type == NAPISTU_GRAPH_TYPES.BIPARTITE:
|
145
|
+
network_edges = _create_napistu_graph_bipartite(working_sbml_dfs)
|
146
|
+
elif graph_type in [NAPISTU_GRAPH_TYPES.REGULATORY, NAPISTU_GRAPH_TYPES.SURROGATE]:
|
170
147
|
# pass graph_type so that an appropriate tiered schema can be used.
|
171
|
-
network_edges =
|
148
|
+
network_edges = _create_napistu_graph_tiered(working_sbml_dfs, graph_type)
|
172
149
|
else:
|
173
150
|
raise NotImplementedError("Invalid graph_type")
|
174
151
|
|
@@ -188,20 +165,24 @@ def create_cpr_graph(
|
|
188
165
|
[
|
189
166
|
# assign forward edges
|
190
167
|
augmented_network_edges.assign(
|
191
|
-
|
168
|
+
**{
|
169
|
+
NAPISTU_GRAPH_EDGES.DIRECTION: NAPISTU_GRAPH_EDGE_DIRECTIONS.FORWARD
|
170
|
+
}
|
192
171
|
),
|
193
|
-
# create reverse edges for
|
172
|
+
# create reverse edges for reversible reactions
|
194
173
|
_reverse_network_edges(augmented_network_edges),
|
195
174
|
]
|
196
175
|
)
|
197
176
|
else:
|
198
177
|
directed_network_edges = augmented_network_edges.assign(
|
199
|
-
|
178
|
+
**{NAPISTU_GRAPH_EDGES.DIRECTION: NAPISTU_GRAPH_EDGE_DIRECTIONS.UNDIRECTED}
|
200
179
|
)
|
201
180
|
|
202
181
|
# de-duplicate edges
|
203
182
|
unique_edges = (
|
204
|
-
directed_network_edges.groupby(
|
183
|
+
directed_network_edges.groupby(
|
184
|
+
[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO]
|
185
|
+
)
|
205
186
|
.first()
|
206
187
|
.reset_index()
|
207
188
|
)
|
@@ -216,7 +197,7 @@ def create_cpr_graph(
|
|
216
197
|
if verbose:
|
217
198
|
# report duplicated edges
|
218
199
|
grouped_edges = directed_network_edges.groupby(
|
219
|
-
[
|
200
|
+
[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO]
|
220
201
|
)
|
221
202
|
duplicated_edges = [
|
222
203
|
grouped_edges.get_group(x)
|
@@ -229,93 +210,80 @@ def create_cpr_graph(
|
|
229
210
|
|
230
211
|
logger.warning(utils.style_df(example_duplicates, headers="keys"))
|
231
212
|
|
232
|
-
# reverse edge directions if edge_reversed is True:
|
233
|
-
|
234
|
-
if edge_reversed:
|
235
|
-
rev_unique_edges_df = unique_edges.copy()
|
236
|
-
rev_unique_edges_df[CPR_GRAPH_EDGES.FROM] = unique_edges[CPR_GRAPH_EDGES.TO]
|
237
|
-
rev_unique_edges_df[CPR_GRAPH_EDGES.TO] = unique_edges[CPR_GRAPH_EDGES.FROM]
|
238
|
-
rev_unique_edges_df[CPR_GRAPH_EDGES.SC_PARENTS] = unique_edges[
|
239
|
-
CPR_GRAPH_EDGES.SC_CHILDREN
|
240
|
-
]
|
241
|
-
rev_unique_edges_df[CPR_GRAPH_EDGES.SC_CHILDREN] = unique_edges[
|
242
|
-
CPR_GRAPH_EDGES.SC_PARENTS
|
243
|
-
]
|
244
|
-
rev_unique_edges_df[CPR_GRAPH_EDGES.STOICHOMETRY] = unique_edges[
|
245
|
-
CPR_GRAPH_EDGES.STOICHOMETRY
|
246
|
-
] * (-1)
|
247
|
-
|
248
|
-
rev_unique_edges_df[CPR_GRAPH_EDGES.DIRECTION] = unique_edges[
|
249
|
-
CPR_GRAPH_EDGES.DIRECTION
|
250
|
-
].replace(
|
251
|
-
{
|
252
|
-
CPR_GRAPH_EDGE_DIRECTIONS.REVERSE: CPR_GRAPH_EDGE_DIRECTIONS.FORWARD,
|
253
|
-
CPR_GRAPH_EDGE_DIRECTIONS.FORWARD: CPR_GRAPH_EDGE_DIRECTIONS.REVERSE,
|
254
|
-
}
|
255
|
-
)
|
256
|
-
else:
|
257
|
-
# unchanged if edge_reversed is False:
|
258
|
-
rev_unique_edges_df = unique_edges
|
259
|
-
|
260
213
|
# convert nodes and edgelist into an igraph network
|
261
|
-
|
262
214
|
logger.info("Formatting cpr_graph output")
|
263
|
-
|
215
|
+
napistu_ig_graph = ig.Graph.DictList(
|
264
216
|
vertices=network_nodes_df.to_dict("records"),
|
265
|
-
edges=
|
217
|
+
edges=unique_edges.to_dict("records"),
|
266
218
|
directed=directed,
|
267
|
-
vertex_name_attr=
|
268
|
-
edge_foreign_keys=(
|
219
|
+
vertex_name_attr=NAPISTU_GRAPH_NODES.NAME,
|
220
|
+
edge_foreign_keys=(NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO),
|
269
221
|
)
|
270
222
|
|
271
|
-
return
|
223
|
+
# Always return NapistuGraph
|
224
|
+
napistu_graph = NapistuGraph.from_igraph(
|
225
|
+
napistu_ig_graph, graph_type=graph_type, is_reversed=edge_reversed
|
226
|
+
)
|
227
|
+
|
228
|
+
if edge_reversed:
|
229
|
+
logger.info("Applying edge reversal using reversal utilities")
|
230
|
+
napistu_graph.reverse_edges()
|
272
231
|
|
232
|
+
return napistu_graph
|
273
233
|
|
274
|
-
|
234
|
+
|
235
|
+
def process_napistu_graph(
|
275
236
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
276
237
|
reaction_graph_attrs: Optional[dict] = None,
|
277
238
|
directed: bool = True,
|
278
239
|
edge_reversed: bool = False,
|
279
|
-
graph_type: str =
|
280
|
-
weighting_strategy: str =
|
240
|
+
graph_type: str = NAPISTU_GRAPH_TYPES.BIPARTITE,
|
241
|
+
weighting_strategy: str = NAPISTU_WEIGHTING_STRATEGIES.UNWEIGHTED,
|
281
242
|
verbose: bool = False,
|
282
243
|
custom_transformations: dict = None,
|
283
|
-
) ->
|
244
|
+
) -> NapistuGraph:
|
284
245
|
"""
|
285
246
|
Process Consensus Graph
|
286
247
|
|
287
|
-
Setup
|
248
|
+
Setup a NapistuGraph network and then add weights and other malleable attributes.
|
288
249
|
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
250
|
+
Parameters
|
251
|
+
----------
|
252
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
253
|
+
A model formed by aggregating pathways.
|
254
|
+
reaction_graph_attrs : dict, optional
|
255
|
+
Dictionary containing attributes to pull out of reaction_data and a weighting scheme for the graph.
|
256
|
+
directed : bool, optional
|
257
|
+
Should a directed (True) or undirected graph be made (False). Default is True.
|
258
|
+
edge_reversed : bool, optional
|
259
|
+
Should directions of edges be reversed (False). Default is False.
|
260
|
+
graph_type : str, optional
|
261
|
+
Type of graph to create. Valid values are:
|
262
|
+
- 'bipartite': substrates and modifiers point to the reaction they drive, this reaction points to products
|
263
|
+
- 'regulatory': non-enzymatic modifiers point to enzymes, enzymes point to substrates and products
|
264
|
+
- 'surrogate': non-enzymatic modifiers -> substrates -> enzymes -> reaction -> products
|
265
|
+
weighting_strategy : str, optional
|
266
|
+
A network weighting strategy with options:
|
267
|
+
- 'unweighted': all weights (and upstream_weights for directed graphs) are set to 1.
|
268
|
+
- 'topology': weight edges by the degree of the source nodes favoring nodes with few connections.
|
269
|
+
- 'mixed': transform edges with a quantitative score based on reaction_attrs; and set edges without quantitative score as a source-specific weight.
|
270
|
+
- 'calibrated': transform edges with a quantitative score based on reaction_attrs and combine them with topology scores to generate a consensus.
|
271
|
+
verbose : bool, optional
|
272
|
+
Extra reporting. Default is False.
|
273
|
+
custom_transformations : dict, optional
|
274
|
+
Dictionary of custom transformation functions to use for attribute transformation.
|
309
275
|
|
310
|
-
Returns
|
311
|
-
|
276
|
+
Returns
|
277
|
+
-------
|
278
|
+
NapistuGraph
|
279
|
+
A weighted NapistuGraph network (subclass of igraph.Graph).
|
312
280
|
"""
|
313
281
|
|
314
282
|
if reaction_graph_attrs is None:
|
315
283
|
reaction_graph_attrs = {}
|
316
284
|
|
317
285
|
logging.info("Constructing network")
|
318
|
-
|
286
|
+
napistu_graph = create_napistu_graph(
|
319
287
|
sbml_dfs,
|
320
288
|
reaction_graph_attrs,
|
321
289
|
directed=directed,
|
@@ -332,13 +300,13 @@ def process_cpr_graph(
|
|
332
300
|
|
333
301
|
logging.info(f"Adding edge weights with an {weighting_strategy} strategy")
|
334
302
|
|
335
|
-
|
336
|
-
|
303
|
+
weighted_napistu_graph = add_graph_weights(
|
304
|
+
napistu_graph=napistu_graph,
|
337
305
|
reaction_attrs=reaction_attrs,
|
338
306
|
weighting_strategy=weighting_strategy,
|
339
307
|
)
|
340
308
|
|
341
|
-
return
|
309
|
+
return weighted_napistu_graph
|
342
310
|
|
343
311
|
|
344
312
|
def pluck_entity_data(
|
@@ -484,7 +452,9 @@ def apply_weight_transformations(
|
|
484
452
|
return transformed_edges_df
|
485
453
|
|
486
454
|
|
487
|
-
def summarize_weight_calibration(
|
455
|
+
def summarize_weight_calibration(
|
456
|
+
napistu_graph: NapistuGraph, reaction_attrs: dict
|
457
|
+
) -> None:
|
488
458
|
"""
|
489
459
|
Summarize Weight Calibration
|
490
460
|
|
@@ -493,7 +463,7 @@ def summarize_weight_calibration(cpr_graph: ig.Graph, reaction_attrs: dict) -> N
|
|
493
463
|
"dubious" weights.
|
494
464
|
|
495
465
|
Args:
|
496
|
-
|
466
|
+
napistu_graph (ig.Graph): A graph where edge weights have already been calibrated.
|
497
467
|
reaction_attrs (dict): a dictionary summarizing the types of weights that
|
498
468
|
exist and how they are transformed for calibration.
|
499
469
|
|
@@ -507,7 +477,7 @@ def summarize_weight_calibration(cpr_graph: ig.Graph, reaction_attrs: dict) -> N
|
|
507
477
|
score_calibration_df, reaction_attrs
|
508
478
|
)
|
509
479
|
|
510
|
-
calibrated_edges =
|
480
|
+
calibrated_edges = napistu_graph.get_edge_dataframe()
|
511
481
|
|
512
482
|
_summarize_weight_calibration_table(
|
513
483
|
calibrated_edges, score_calibration_df, score_calibration_df_calibrated
|
@@ -521,31 +491,36 @@ def summarize_weight_calibration(cpr_graph: ig.Graph, reaction_attrs: dict) -> N
|
|
521
491
|
|
522
492
|
|
523
493
|
def add_graph_weights(
|
524
|
-
|
494
|
+
napistu_graph: NapistuGraph,
|
525
495
|
reaction_attrs: dict,
|
526
|
-
weighting_strategy: str =
|
527
|
-
) ->
|
496
|
+
weighting_strategy: str = NAPISTU_WEIGHTING_STRATEGIES.UNWEIGHTED,
|
497
|
+
) -> NapistuGraph:
|
528
498
|
"""
|
529
499
|
Add Graph Weights
|
530
500
|
|
531
|
-
Apply a weighting strategy to generate edge weights on a
|
532
|
-
be generated as well which should be used when searching for a node's ancestors.
|
501
|
+
Apply a weighting strategy to generate edge weights on a NapistuGraph. For directed graphs, "upstream_weights" will
|
502
|
+
be generated as well, which should be used when searching for a node's ancestors.
|
533
503
|
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
-
|
544
|
-
|
504
|
+
Parameters
|
505
|
+
----------
|
506
|
+
napistu_graph : NapistuGraph
|
507
|
+
A graphical network of molecules/reactions (nodes) and edges linking them (subclass of igraph.Graph).
|
508
|
+
reaction_attrs : dict
|
509
|
+
An optional dict of reaction attributes.
|
510
|
+
weighting_strategy : str, optional
|
511
|
+
A network weighting strategy with options:
|
512
|
+
- 'unweighted': all weights (and upstream_weights for directed graphs) are set to 1.
|
513
|
+
- 'topology': weight edges by the degree of the source nodes favoring nodes emerging from nodes with few connections.
|
514
|
+
- 'mixed': transform edges with a quantitative score based on reaction_attrs; and set edges without quantitative score as a source-specific weight.
|
515
|
+
- 'calibrated': transform edges with a quantitative score based on reaction_attrs and combine them with topology scores to generate a consensus.
|
545
516
|
|
517
|
+
Returns
|
518
|
+
-------
|
519
|
+
NapistuGraph
|
520
|
+
The weighted NapistuGraph.
|
546
521
|
"""
|
547
522
|
|
548
|
-
|
523
|
+
napistu_graph_updated = copy.deepcopy(napistu_graph)
|
549
524
|
|
550
525
|
_validate_entity_attrs(reaction_attrs)
|
551
526
|
|
@@ -556,36 +531,36 @@ def add_graph_weights(
|
|
556
531
|
)
|
557
532
|
|
558
533
|
# count parents and children and create weights based on them
|
559
|
-
topology_weighted_graph = _create_topology_weights(
|
534
|
+
topology_weighted_graph = _create_topology_weights(napistu_graph_updated)
|
560
535
|
|
561
|
-
if weighting_strategy ==
|
562
|
-
topology_weighted_graph.es[
|
536
|
+
if weighting_strategy == NAPISTU_WEIGHTING_STRATEGIES.TOPOLOGY:
|
537
|
+
topology_weighted_graph.es[NAPISTU_GRAPH_EDGES.WEIGHTS] = (
|
563
538
|
topology_weighted_graph.es["topo_weights"]
|
564
539
|
)
|
565
|
-
if
|
566
|
-
topology_weighted_graph.es[
|
540
|
+
if napistu_graph_updated.is_directed():
|
541
|
+
topology_weighted_graph.es[NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS] = (
|
567
542
|
topology_weighted_graph.es["upstream_topo_weights"]
|
568
543
|
)
|
569
544
|
|
570
545
|
return topology_weighted_graph
|
571
546
|
|
572
|
-
if weighting_strategy ==
|
547
|
+
if weighting_strategy == NAPISTU_WEIGHTING_STRATEGIES.UNWEIGHTED:
|
573
548
|
# set weights as a constant
|
574
|
-
topology_weighted_graph.es[
|
575
|
-
if
|
576
|
-
topology_weighted_graph.es[
|
549
|
+
topology_weighted_graph.es[NAPISTU_GRAPH_EDGES.WEIGHTS] = 1
|
550
|
+
if napistu_graph_updated.is_directed():
|
551
|
+
topology_weighted_graph.es[NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS] = 1
|
577
552
|
return topology_weighted_graph
|
578
553
|
|
579
|
-
if weighting_strategy ==
|
554
|
+
if weighting_strategy == NAPISTU_WEIGHTING_STRATEGIES.MIXED:
|
580
555
|
return _add_graph_weights_mixed(topology_weighted_graph, reaction_attrs)
|
581
556
|
|
582
|
-
if weighting_strategy ==
|
557
|
+
if weighting_strategy == NAPISTU_WEIGHTING_STRATEGIES.CALIBRATED:
|
583
558
|
return _add_graph_weights_calibration(topology_weighted_graph, reaction_attrs)
|
584
559
|
|
585
560
|
raise ValueError(f"No logic implemented for {weighting_strategy}")
|
586
561
|
|
587
562
|
|
588
|
-
def
|
563
|
+
def _create_napistu_graph_bipartite(sbml_dfs: sbml_dfs_core.SBML_dfs) -> pd.DataFrame:
|
589
564
|
"""Turn an sbml_dfs model into a bipartite graph linking molecules to reactions."""
|
590
565
|
|
591
566
|
# setup edges
|
@@ -596,18 +571,20 @@ def _create_cpr_graph_bipartite(sbml_dfs: sbml_dfs_core.SBML_dfs) -> pd.DataFram
|
|
596
571
|
# rename species and reactions to reflect from -> to edges
|
597
572
|
.rename(
|
598
573
|
columns={
|
599
|
-
SBML_DFS.SC_ID:
|
600
|
-
SBML_DFS.R_ID:
|
574
|
+
SBML_DFS.SC_ID: NAPISTU_GRAPH_NODE_TYPES.SPECIES,
|
575
|
+
SBML_DFS.R_ID: NAPISTU_GRAPH_NODE_TYPES.REACTION,
|
601
576
|
}
|
602
577
|
)
|
603
578
|
)
|
604
579
|
# add back an r_id variable so that each edge is annotated by a reaction
|
605
|
-
network_edges[
|
580
|
+
network_edges[NAPISTU_GRAPH_EDGES.R_ID] = network_edges[
|
581
|
+
NAPISTU_GRAPH_NODE_TYPES.REACTION
|
582
|
+
]
|
606
583
|
|
607
584
|
# add edge weights
|
608
585
|
cspecies_features = sbml_dfs.get_cspecies_features()
|
609
586
|
network_edges = network_edges.merge(
|
610
|
-
cspecies_features, left_on=
|
587
|
+
cspecies_features, left_on=NAPISTU_GRAPH_NODE_TYPES.SPECIES, right_index=True
|
611
588
|
)
|
612
589
|
|
613
590
|
# if directed then flip substrates and modifiers to the origin edge
|
@@ -616,16 +593,16 @@ def _create_cpr_graph_bipartite(sbml_dfs: sbml_dfs_core.SBML_dfs) -> pd.DataFram
|
|
616
593
|
origins = network_edges[network_edges[SBML_DFS.STOICHIOMETRY] <= 0]
|
617
594
|
origin_edges = origins.loc[:, [edge_vars[1], edge_vars[0]] + edge_vars[2:]].rename(
|
618
595
|
columns={
|
619
|
-
|
620
|
-
|
596
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES: NAPISTU_GRAPH_EDGES.FROM,
|
597
|
+
NAPISTU_GRAPH_NODE_TYPES.REACTION: NAPISTU_GRAPH_EDGES.TO,
|
621
598
|
}
|
622
599
|
)
|
623
600
|
|
624
601
|
dests = network_edges[network_edges[SBML_DFS.STOICHIOMETRY] > 0]
|
625
602
|
dest_edges = dests.rename(
|
626
603
|
columns={
|
627
|
-
|
628
|
-
|
604
|
+
NAPISTU_GRAPH_NODE_TYPES.REACTION: NAPISTU_GRAPH_EDGES.FROM,
|
605
|
+
NAPISTU_GRAPH_NODE_TYPES.SPECIES: NAPISTU_GRAPH_EDGES.TO,
|
629
606
|
}
|
630
607
|
)
|
631
608
|
|
@@ -634,7 +611,7 @@ def _create_cpr_graph_bipartite(sbml_dfs: sbml_dfs_core.SBML_dfs) -> pd.DataFram
|
|
634
611
|
return network_edges
|
635
612
|
|
636
613
|
|
637
|
-
def
|
614
|
+
def _create_napistu_graph_tiered(
|
638
615
|
sbml_dfs: sbml_dfs_core.SBML_dfs, graph_type: str
|
639
616
|
) -> pd.DataFrame:
|
640
617
|
"""Turn an sbml_dfs model into a tiered graph which links upstream entities to downstream ones."""
|
@@ -698,21 +675,23 @@ def _create_cpr_graph_tiered(
|
|
698
675
|
# not the bipartite network which can be trivially obtained from the pathway
|
699
676
|
# specification
|
700
677
|
unique_edges = (
|
701
|
-
all_reaction_edges_df.groupby(
|
678
|
+
all_reaction_edges_df.groupby(
|
679
|
+
[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO]
|
680
|
+
)
|
702
681
|
.first()
|
703
682
|
.reset_index()
|
704
683
|
)
|
705
684
|
|
706
685
|
# children
|
707
686
|
n_children = (
|
708
|
-
unique_edges[
|
687
|
+
unique_edges[NAPISTU_GRAPH_EDGES.FROM]
|
709
688
|
.value_counts()
|
710
689
|
# rename values to the child name
|
711
|
-
.to_frame(name=
|
690
|
+
.to_frame(name=NAPISTU_GRAPH_EDGES.SC_CHILDREN)
|
712
691
|
.reset_index()
|
713
692
|
.rename(
|
714
693
|
{
|
715
|
-
|
694
|
+
NAPISTU_GRAPH_EDGES.FROM: SBML_DFS.SC_ID,
|
716
695
|
},
|
717
696
|
axis=1,
|
718
697
|
)
|
@@ -720,14 +699,14 @@ def _create_cpr_graph_tiered(
|
|
720
699
|
|
721
700
|
# parents
|
722
701
|
n_parents = (
|
723
|
-
unique_edges[
|
702
|
+
unique_edges[NAPISTU_GRAPH_EDGES.TO]
|
724
703
|
.value_counts()
|
725
704
|
# rename values to the parent name
|
726
|
-
.to_frame(name=
|
705
|
+
.to_frame(name=NAPISTU_GRAPH_EDGES.SC_PARENTS)
|
727
706
|
.reset_index()
|
728
707
|
.rename(
|
729
708
|
{
|
730
|
-
|
709
|
+
NAPISTU_GRAPH_EDGES.TO: SBML_DFS.SC_ID,
|
731
710
|
},
|
732
711
|
axis=1,
|
733
712
|
)
|
@@ -735,9 +714,9 @@ def _create_cpr_graph_tiered(
|
|
735
714
|
|
736
715
|
graph_degree_by_edgelist = n_children.merge(n_parents, how="outer").fillna(int(0))
|
737
716
|
|
738
|
-
graph_degree_by_edgelist[
|
739
|
-
graph_degree_by_edgelist[
|
740
|
-
+ graph_degree_by_edgelist[
|
717
|
+
graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_DEGREE] = (
|
718
|
+
graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_CHILDREN]
|
719
|
+
+ graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_PARENTS]
|
741
720
|
)
|
742
721
|
graph_degree_by_edgelist = (
|
743
722
|
graph_degree_by_edgelist[
|
@@ -750,9 +729,9 @@ def _create_cpr_graph_tiered(
|
|
750
729
|
cspecies_features = (
|
751
730
|
cspecies_features.drop(
|
752
731
|
[
|
753
|
-
|
754
|
-
|
755
|
-
|
732
|
+
NAPISTU_GRAPH_EDGES.SC_DEGREE,
|
733
|
+
NAPISTU_GRAPH_EDGES.SC_CHILDREN,
|
734
|
+
NAPISTU_GRAPH_EDGES.SC_PARENTS,
|
756
735
|
],
|
757
736
|
axis=1,
|
758
737
|
)
|
@@ -760,10 +739,10 @@ def _create_cpr_graph_tiered(
|
|
760
739
|
.fillna(int(0))
|
761
740
|
)
|
762
741
|
|
763
|
-
is_from_reaction = all_reaction_edges_df[
|
742
|
+
is_from_reaction = all_reaction_edges_df[NAPISTU_GRAPH_EDGES.FROM].isin(
|
764
743
|
sbml_dfs.reactions.index.tolist()
|
765
744
|
)
|
766
|
-
is_from_reaction = all_reaction_edges_df[
|
745
|
+
is_from_reaction = all_reaction_edges_df[NAPISTU_GRAPH_EDGES.FROM].isin(
|
767
746
|
sbml_dfs.reactions.index
|
768
747
|
)
|
769
748
|
# add substrate weight whenever "from" edge is a molecule
|
@@ -771,10 +750,10 @@ def _create_cpr_graph_tiered(
|
|
771
750
|
decorated_all_reaction_edges_df = pd.concat(
|
772
751
|
[
|
773
752
|
all_reaction_edges_df[~is_from_reaction].merge(
|
774
|
-
cspecies_features, left_on=
|
753
|
+
cspecies_features, left_on=NAPISTU_GRAPH_EDGES.FROM, right_index=True
|
775
754
|
),
|
776
755
|
all_reaction_edges_df[is_from_reaction].merge(
|
777
|
-
cspecies_features, left_on=
|
756
|
+
cspecies_features, left_on=NAPISTU_GRAPH_EDGES.TO, right_index=True
|
778
757
|
),
|
779
758
|
]
|
780
759
|
).sort_index()
|
@@ -841,8 +820,8 @@ def _format_tiered_reaction_species(
|
|
841
820
|
.merge(graph_hierarchy_df)
|
842
821
|
),
|
843
822
|
graph_hierarchy_df[
|
844
|
-
graph_hierarchy_df[
|
845
|
-
==
|
823
|
+
graph_hierarchy_df[NAPISTU_GRAPH_EDGES.SBO_NAME]
|
824
|
+
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
846
825
|
].assign(entity_id=r_id, r_id=r_id),
|
847
826
|
]
|
848
827
|
)
|
@@ -856,7 +835,8 @@ def _format_tiered_reaction_species(
|
|
856
835
|
|
857
836
|
# which tier is the reaction?
|
858
837
|
reaction_tier = graph_hierarchy_df["tier"][
|
859
|
-
graph_hierarchy_df[
|
838
|
+
graph_hierarchy_df[NAPISTU_GRAPH_EDGES.SBO_NAME]
|
839
|
+
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
860
840
|
].tolist()[0]
|
861
841
|
|
862
842
|
rxn_edges = list()
|
@@ -876,10 +856,10 @@ def _format_tiered_reaction_species(
|
|
876
856
|
rxn_edges_df = (
|
877
857
|
pd.concat(rxn_edges)[
|
878
858
|
[
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
859
|
+
NAPISTU_GRAPH_EDGES.FROM,
|
860
|
+
NAPISTU_GRAPH_EDGES.TO,
|
861
|
+
NAPISTU_GRAPH_EDGES.STOICHIOMETRY,
|
862
|
+
NAPISTU_GRAPH_EDGES.SBO_TERM,
|
883
863
|
]
|
884
864
|
]
|
885
865
|
.reset_index(drop=True)
|
@@ -927,12 +907,12 @@ def _format_tier_combo(
|
|
927
907
|
|
928
908
|
formatted_tier_combo = (
|
929
909
|
upstream_tier[upstream_fields]
|
930
|
-
.rename({"entity_id":
|
910
|
+
.rename({"entity_id": NAPISTU_GRAPH_EDGES.FROM}, axis=1)
|
931
911
|
.assign(_joiner=1)
|
932
912
|
).merge(
|
933
913
|
(
|
934
914
|
downstream_tier[downstream_fields]
|
935
|
-
.rename({"entity_id":
|
915
|
+
.rename({"entity_id": NAPISTU_GRAPH_EDGES.TO}, axis=1)
|
936
916
|
.assign(_joiner=1)
|
937
917
|
),
|
938
918
|
left_on="_joiner",
|
@@ -957,9 +937,9 @@ def _create_graph_hierarchy_df(graph_type: str) -> pd.DataFrame:
|
|
957
937
|
|
958
938
|
"""
|
959
939
|
|
960
|
-
if graph_type ==
|
940
|
+
if graph_type == NAPISTU_GRAPH_TYPES.REGULATORY:
|
961
941
|
sbo_names_hierarchy = REGULATORY_GRAPH_HIERARCHY
|
962
|
-
elif graph_type ==
|
942
|
+
elif graph_type == NAPISTU_GRAPH_TYPES.SURROGATE:
|
963
943
|
sbo_names_hierarchy = SURROGATE_GRAPH_HIERARCHY
|
964
944
|
else:
|
965
945
|
raise NotImplementedError(f"{graph_type} is not a valid graph_type")
|
@@ -972,23 +952,41 @@ def _create_graph_hierarchy_df(graph_type: str) -> pd.DataFrame:
|
|
972
952
|
]
|
973
953
|
).reset_index(drop=True)
|
974
954
|
graph_hierarchy_df[SBML_DFS.SBO_TERM] = graph_hierarchy_df["sbo_name"].apply(
|
975
|
-
lambda x:
|
955
|
+
lambda x: (
|
956
|
+
MINI_SBO_FROM_NAME[x] if x != NAPISTU_GRAPH_NODE_TYPES.REACTION else ""
|
957
|
+
)
|
976
958
|
)
|
977
959
|
|
978
960
|
# ensure that the output is expected
|
979
961
|
utils.match_pd_vars(
|
980
962
|
graph_hierarchy_df,
|
981
|
-
req_vars={
|
963
|
+
req_vars={NAPISTU_GRAPH_EDGES.SBO_NAME, "tier", SBML_DFS.SBO_TERM},
|
982
964
|
allow_series=False,
|
983
965
|
).assert_present()
|
984
966
|
|
985
967
|
return graph_hierarchy_df
|
986
968
|
|
987
969
|
|
988
|
-
def _add_graph_weights_mixed(
|
989
|
-
|
970
|
+
def _add_graph_weights_mixed(
|
971
|
+
napistu_graph: NapistuGraph, reaction_attrs: dict
|
972
|
+
) -> NapistuGraph:
|
973
|
+
"""
|
974
|
+
Weight a NapistuGraph using a mixed approach combining source-specific weights and existing edge weights.
|
990
975
|
|
991
|
-
|
976
|
+
Parameters
|
977
|
+
----------
|
978
|
+
napistu_graph : NapistuGraph
|
979
|
+
The network to weight (subclass of igraph.Graph).
|
980
|
+
reaction_attrs : dict
|
981
|
+
Dictionary of reaction attributes to use for weighting.
|
982
|
+
|
983
|
+
Returns
|
984
|
+
-------
|
985
|
+
NapistuGraph
|
986
|
+
The weighted NapistuGraph.
|
987
|
+
"""
|
988
|
+
|
989
|
+
edges_df = napistu_graph.get_edge_dataframe()
|
992
990
|
|
993
991
|
calibrated_edges = apply_weight_transformations(edges_df, reaction_attrs)
|
994
992
|
calibrated_edges = _create_source_weights(calibrated_edges, "source_wt")
|
@@ -1000,26 +998,42 @@ def _add_graph_weights_mixed(cpr_graph: ig.Graph, reaction_attrs: dict) -> ig.Gr
|
|
1000
998
|
|
1001
999
|
calibrated_edges["weights"] = calibrated_edges[score_vars].min(axis=1)
|
1002
1000
|
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1001
|
+
napistu_graph.es[NAPISTU_GRAPH_EDGES.WEIGHTS] = calibrated_edges[
|
1002
|
+
NAPISTU_GRAPH_EDGES.WEIGHTS
|
1003
|
+
]
|
1004
|
+
if napistu_graph.is_directed():
|
1005
|
+
napistu_graph.es[NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS] = calibrated_edges[
|
1006
|
+
NAPISTU_GRAPH_EDGES.WEIGHTS
|
1007
1007
|
]
|
1008
1008
|
|
1009
1009
|
# add other attributes and update transformed attributes
|
1010
|
-
|
1010
|
+
napistu_graph.es["source_wt"] = calibrated_edges["source_wt"]
|
1011
1011
|
for k in reaction_attrs.keys():
|
1012
|
-
|
1012
|
+
napistu_graph.es[k] = calibrated_edges[k]
|
1013
1013
|
|
1014
|
-
return
|
1014
|
+
return napistu_graph
|
1015
1015
|
|
1016
1016
|
|
1017
1017
|
def _add_graph_weights_calibration(
|
1018
|
-
|
1019
|
-
) ->
|
1020
|
-
"""
|
1018
|
+
napistu_graph: NapistuGraph, reaction_attrs: dict
|
1019
|
+
) -> NapistuGraph:
|
1020
|
+
"""
|
1021
|
+
Weight a NapistuGraph using a calibrated strategy which aims to roughly align qualitatively similar weights from different sources.
|
1022
|
+
|
1023
|
+
Parameters
|
1024
|
+
----------
|
1025
|
+
napistu_graph : NapistuGraph
|
1026
|
+
The network to weight (subclass of igraph.Graph).
|
1027
|
+
reaction_attrs : dict
|
1028
|
+
Dictionary of reaction attributes to use for weighting.
|
1029
|
+
|
1030
|
+
Returns
|
1031
|
+
-------
|
1032
|
+
NapistuGraph
|
1033
|
+
The weighted NapistuGraph.
|
1034
|
+
"""
|
1021
1035
|
|
1022
|
-
edges_df =
|
1036
|
+
edges_df = napistu_graph.get_edge_dataframe()
|
1023
1037
|
|
1024
1038
|
calibrated_edges = apply_weight_transformations(edges_df, reaction_attrs)
|
1025
1039
|
|
@@ -1027,57 +1041,58 @@ def _add_graph_weights_calibration(
|
|
1027
1041
|
score_vars.append("topo_weights")
|
1028
1042
|
|
1029
1043
|
logger.info(f"Creating calibrated scores based on {', '.join(score_vars)}")
|
1030
|
-
|
1044
|
+
napistu_graph.es["weights"] = calibrated_edges[score_vars].min(axis=1)
|
1031
1045
|
|
1032
|
-
if
|
1046
|
+
if napistu_graph.is_directed():
|
1033
1047
|
score_vars = list(reaction_attrs.keys())
|
1034
1048
|
score_vars.append("upstream_topo_weights")
|
1035
|
-
|
1049
|
+
napistu_graph.es["upstream_weights"] = calibrated_edges[score_vars].min(axis=1)
|
1036
1050
|
|
1037
1051
|
# add other attributes and update transformed attributes
|
1038
1052
|
for k in reaction_attrs.keys():
|
1039
|
-
|
1053
|
+
napistu_graph.es[k] = calibrated_edges[k]
|
1040
1054
|
|
1041
|
-
return
|
1055
|
+
return napistu_graph
|
1042
1056
|
|
1043
1057
|
|
1044
1058
|
def _add_edge_attr_to_vertex_graph(
|
1045
|
-
|
1059
|
+
napistu_graph: NapistuGraph,
|
1046
1060
|
edge_attr_list: list,
|
1047
1061
|
shared_node_key: str = "r_id",
|
1048
|
-
) ->
|
1062
|
+
) -> NapistuGraph:
|
1049
1063
|
"""
|
1050
|
-
Merge edge attribute(s) from edge_attr_list to
|
1064
|
+
Merge edge attribute(s) from edge_attr_list to vertices of a NapistuGraph.
|
1051
1065
|
|
1052
1066
|
Parameters
|
1053
1067
|
----------
|
1054
|
-
|
1055
|
-
A graph generated by
|
1056
|
-
edge_attr_list: list
|
1057
|
-
A list containing attributes to pull out of edges, then to add to vertices
|
1058
|
-
shared_node_key : str
|
1059
|
-
|
1068
|
+
napistu_graph : NapistuGraph
|
1069
|
+
A graph generated by create_napistu_graph() (subclass of igraph.Graph).
|
1070
|
+
edge_attr_list : list
|
1071
|
+
A list containing attributes to pull out of edges, then to add to vertices.
|
1072
|
+
shared_node_key : str, optional
|
1073
|
+
Key in edge that is shared with vertex, to map edge ids to corresponding vertex ids. Default is "r_id".
|
1060
1074
|
|
1061
|
-
Returns
|
1062
|
-
|
1063
|
-
|
1075
|
+
Returns
|
1076
|
+
-------
|
1077
|
+
NapistuGraph
|
1078
|
+
The input NapistuGraph with additional vertex attributes added from edge attributes.
|
1064
1079
|
"""
|
1065
1080
|
|
1066
1081
|
if len(edge_attr_list) == 0:
|
1067
1082
|
logger.warning(
|
1068
1083
|
"No edge attributes were passed, " "thus return the input graph."
|
1069
1084
|
)
|
1070
|
-
return
|
1085
|
+
return napistu_graph
|
1071
1086
|
|
1072
|
-
graph_vertex_df =
|
1073
|
-
graph_edge_df =
|
1087
|
+
graph_vertex_df = napistu_graph.get_vertex_dataframe()
|
1088
|
+
graph_edge_df = napistu_graph.get_edge_dataframe()
|
1074
1089
|
|
1075
1090
|
if shared_node_key not in graph_edge_df.columns.to_list():
|
1076
1091
|
logger.warning(
|
1077
1092
|
f"{shared_node_key} is not in the current edge attributes. "
|
1078
1093
|
"shared_node_key must be an existing edge attribute"
|
1079
1094
|
)
|
1080
|
-
return
|
1095
|
+
return napistu_graph
|
1081
1096
|
|
1082
1097
|
graph_edge_df_sub = graph_edge_df.loc[:, [shared_node_key] + edge_attr_list].copy()
|
1083
1098
|
|
@@ -1112,7 +1127,7 @@ def _add_edge_attr_to_vertex_graph(
|
|
1112
1127
|
)
|
1113
1128
|
|
1114
1129
|
# rename shared_node_key to vertex key 'name'
|
1115
|
-
# as in net_create.
|
1130
|
+
# as in net_create.create_napistu_graph(), vertex_name_attr is set to 'name'
|
1116
1131
|
graph_edge_df_sub_no_duplicate = graph_edge_df_sub_no_duplicate.rename(
|
1117
1132
|
columns={shared_node_key: "name"},
|
1118
1133
|
)
|
@@ -1134,12 +1149,12 @@ def _add_edge_attr_to_vertex_graph(
|
|
1134
1149
|
"Please assign proper values to those vertex attributes."
|
1135
1150
|
)
|
1136
1151
|
|
1137
|
-
# assign the edge_attrs from edge_attr_list to
|
1152
|
+
# assign the edge_attrs from edge_attr_list to napistu_graph's vertices:
|
1138
1153
|
# keep the same edge attribute names:
|
1139
1154
|
for col_name in edge_attr_list:
|
1140
|
-
|
1155
|
+
napistu_graph.vs[col_name] = graph_vertex_df_w_edge_attr[col_name]
|
1141
1156
|
|
1142
|
-
return
|
1157
|
+
return napistu_graph
|
1143
1158
|
|
1144
1159
|
|
1145
1160
|
def _summarize_weight_calibration_table(
|
@@ -1343,23 +1358,23 @@ def _format_interactors_for_tiered_graph(
|
|
1343
1358
|
|
1344
1359
|
|
1345
1360
|
def _add_graph_species_attribute(
|
1346
|
-
|
1361
|
+
napistu_graph: NapistuGraph,
|
1347
1362
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
1348
1363
|
species_graph_attrs: dict,
|
1349
1364
|
custom_transformations: Optional[dict] = None,
|
1350
|
-
) ->
|
1365
|
+
) -> NapistuGraph:
|
1351
1366
|
"""
|
1352
|
-
Add meta-data from species_data to existing
|
1367
|
+
Add meta-data from species_data to existing NapistuGraph's vertices.
|
1353
1368
|
|
1354
|
-
This function augments the vertices of
|
1369
|
+
This function augments the vertices of a NapistuGraph network with additional attributes
|
1355
1370
|
derived from the species-level data in the provided SBML_dfs object. The attributes
|
1356
1371
|
to add are specified in the species_graph_attrs dictionary, and can be transformed
|
1357
1372
|
using either built-in or user-supplied transformation functions.
|
1358
1373
|
|
1359
1374
|
Parameters
|
1360
1375
|
----------
|
1361
|
-
|
1362
|
-
The
|
1376
|
+
napistu_graph : NapistuGraph
|
1377
|
+
The NapistuGraph network to augment (subclass of igraph.Graph).
|
1363
1378
|
sbml_dfs : sbml_dfs_core.SBML_dfs
|
1364
1379
|
The SBML_dfs object containing species data.
|
1365
1380
|
species_graph_attrs : dict
|
@@ -1371,8 +1386,8 @@ def _add_graph_species_attribute(
|
|
1371
1386
|
|
1372
1387
|
Returns
|
1373
1388
|
-------
|
1374
|
-
|
1375
|
-
The input
|
1389
|
+
NapistuGraph
|
1390
|
+
The input NapistuGraph with additional vertex attributes added from species_data.
|
1376
1391
|
"""
|
1377
1392
|
if not isinstance(species_graph_attrs, dict):
|
1378
1393
|
raise TypeError(
|
@@ -1396,7 +1411,7 @@ def _add_graph_species_attribute(
|
|
1396
1411
|
|
1397
1412
|
logger.info("Adding meta-data from species_data")
|
1398
1413
|
|
1399
|
-
curr_network_nodes_df =
|
1414
|
+
curr_network_nodes_df = napistu_graph.get_vertex_dataframe()
|
1400
1415
|
|
1401
1416
|
# add species-level attributes to nodes dataframe
|
1402
1417
|
augmented_network_nodes_df = _augment_network_nodes(
|
@@ -1409,9 +1424,9 @@ def _add_graph_species_attribute(
|
|
1409
1424
|
for vs_attr in flat_sp_node_attr_list:
|
1410
1425
|
# in case more than one vs_attr in the flat_sp_node_attr_list
|
1411
1426
|
logger.info(f"Adding new attribute {vs_attr} to vertices")
|
1412
|
-
|
1427
|
+
napistu_graph.vs[vs_attr] = augmented_network_nodes_df[vs_attr].values
|
1413
1428
|
|
1414
|
-
return
|
1429
|
+
return napistu_graph
|
1415
1430
|
|
1416
1431
|
|
1417
1432
|
def _augment_network_nodes(
|
@@ -1462,10 +1477,10 @@ def _augment_network_nodes(
|
|
1462
1477
|
)
|
1463
1478
|
|
1464
1479
|
# include matching s_ids and c_ids of sc_ids
|
1465
|
-
|
1466
|
-
network_nodes_sid = pd.merge(
|
1480
|
+
network_nodes_sid = utils._merge_and_log_overwrites(
|
1467
1481
|
network_nodes,
|
1468
1482
|
sbml_dfs.compartmentalized_species[["s_id", "c_id"]],
|
1483
|
+
"network nodes",
|
1469
1484
|
left_on="name",
|
1470
1485
|
right_index=True,
|
1471
1486
|
how="left",
|
@@ -1481,8 +1496,13 @@ def _augment_network_nodes(
|
|
1481
1496
|
|
1482
1497
|
if species_graph_data is not None:
|
1483
1498
|
# add species_graph_data to the network_nodes df, based on s_id
|
1484
|
-
network_nodes_wdata =
|
1485
|
-
|
1499
|
+
network_nodes_wdata = utils._merge_and_log_overwrites(
|
1500
|
+
network_nodes_sid,
|
1501
|
+
species_graph_data,
|
1502
|
+
"species graph data",
|
1503
|
+
left_on="s_id",
|
1504
|
+
right_index=True,
|
1505
|
+
how="left",
|
1486
1506
|
)
|
1487
1507
|
else:
|
1488
1508
|
network_nodes_wdata = network_nodes_sid
|
@@ -1566,7 +1586,8 @@ def _reverse_network_edges(augmented_network_edges: pd.DataFrame) -> pd.DataFram
|
|
1566
1586
|
"""Flip reversible reactions to derive the reverse reaction."""
|
1567
1587
|
|
1568
1588
|
# validate inputs
|
1569
|
-
|
1589
|
+
required_vars = {NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO}
|
1590
|
+
missing_required_vars = required_vars.difference(
|
1570
1591
|
set(augmented_network_edges.columns.tolist())
|
1571
1592
|
)
|
1572
1593
|
|
@@ -1576,16 +1597,23 @@ def _reverse_network_edges(augmented_network_edges: pd.DataFrame) -> pd.DataFram
|
|
1576
1597
|
f"{', '.join(missing_required_vars)}"
|
1577
1598
|
)
|
1578
1599
|
|
1600
|
+
# Check if direction already exists
|
1601
|
+
if NAPISTU_GRAPH_EDGES.DIRECTION in augmented_network_edges.columns:
|
1602
|
+
logger.warning(
|
1603
|
+
f"{NAPISTU_GRAPH_EDGES.DIRECTION} field already exists in augmented_network_edges. "
|
1604
|
+
"This is unexpected and may indicate an issue in the graph creation process."
|
1605
|
+
)
|
1606
|
+
|
1579
1607
|
# select all edges derived from reversible reactions
|
1580
1608
|
reversible_reaction_edges = augmented_network_edges[
|
1581
|
-
augmented_network_edges[
|
1609
|
+
augmented_network_edges[NAPISTU_GRAPH_EDGES.R_ISREVERSIBLE]
|
1582
1610
|
]
|
1583
1611
|
|
1584
1612
|
r_reaction_edges = (
|
1585
1613
|
# ignore edges which start in a regulator or catalyst; even for a reversible reaction it
|
1586
1614
|
# doesn't make sense for a regulator to be impacted by a target
|
1587
1615
|
reversible_reaction_edges[
|
1588
|
-
~reversible_reaction_edges[
|
1616
|
+
~reversible_reaction_edges[NAPISTU_GRAPH_EDGES.SBO_TERM].isin(
|
1589
1617
|
[
|
1590
1618
|
MINI_SBO_FROM_NAME[x]
|
1591
1619
|
for x in SBO_MODIFIER_NAMES.union({SBOTERM_NAMES.CATALYST})
|
@@ -1595,18 +1623,18 @@ def _reverse_network_edges(augmented_network_edges: pd.DataFrame) -> pd.DataFram
|
|
1595
1623
|
# flip parent and child attributes
|
1596
1624
|
.rename(
|
1597
1625
|
{
|
1598
|
-
|
1599
|
-
|
1600
|
-
|
1601
|
-
|
1626
|
+
NAPISTU_GRAPH_EDGES.FROM: NAPISTU_GRAPH_EDGES.TO,
|
1627
|
+
NAPISTU_GRAPH_EDGES.TO: NAPISTU_GRAPH_EDGES.FROM,
|
1628
|
+
NAPISTU_GRAPH_EDGES.SC_CHILDREN: NAPISTU_GRAPH_EDGES.SC_PARENTS,
|
1629
|
+
NAPISTU_GRAPH_EDGES.SC_PARENTS: NAPISTU_GRAPH_EDGES.SC_CHILDREN,
|
1602
1630
|
},
|
1603
1631
|
axis=1,
|
1604
1632
|
)
|
1605
1633
|
)
|
1606
1634
|
|
1607
1635
|
# switch substrates and products
|
1608
|
-
r_reaction_edges[
|
1609
|
-
|
1636
|
+
r_reaction_edges[NAPISTU_GRAPH_EDGES.STOICHIOMETRY] = r_reaction_edges[
|
1637
|
+
NAPISTU_GRAPH_EDGES.STOICHIOMETRY
|
1610
1638
|
].apply(
|
1611
1639
|
# the ifelse statement prevents 0 being converted to -0 ...
|
1612
1640
|
lambda x: -1 * x if x != 0 else 0
|
@@ -1616,18 +1644,18 @@ def _reverse_network_edges(augmented_network_edges: pd.DataFrame) -> pd.DataFram
|
|
1616
1644
|
[
|
1617
1645
|
(
|
1618
1646
|
r_reaction_edges[
|
1619
|
-
r_reaction_edges[
|
1647
|
+
r_reaction_edges[NAPISTU_GRAPH_EDGES.SBO_TERM]
|
1620
1648
|
== MINI_SBO_FROM_NAME[SBOTERM_NAMES.REACTANT]
|
1621
1649
|
].assign(sbo_term=MINI_SBO_FROM_NAME[SBOTERM_NAMES.PRODUCT])
|
1622
1650
|
),
|
1623
1651
|
(
|
1624
1652
|
r_reaction_edges[
|
1625
|
-
r_reaction_edges[
|
1653
|
+
r_reaction_edges[NAPISTU_GRAPH_EDGES.SBO_TERM]
|
1626
1654
|
== MINI_SBO_FROM_NAME[SBOTERM_NAMES.PRODUCT]
|
1627
1655
|
].assign(sbo_term=MINI_SBO_FROM_NAME[SBOTERM_NAMES.REACTANT])
|
1628
1656
|
),
|
1629
1657
|
r_reaction_edges[
|
1630
|
-
~r_reaction_edges[
|
1658
|
+
~r_reaction_edges[NAPISTU_GRAPH_EDGES.SBO_TERM].isin(
|
1631
1659
|
[
|
1632
1660
|
MINI_SBO_FROM_NAME[SBOTERM_NAMES.REACTANT],
|
1633
1661
|
MINI_SBO_FROM_NAME[SBOTERM_NAMES.PRODUCT],
|
@@ -1643,12 +1671,12 @@ def _reverse_network_edges(augmented_network_edges: pd.DataFrame) -> pd.DataFram
|
|
1643
1671
|
)
|
1644
1672
|
|
1645
1673
|
return transformed_r_reaction_edges.assign(
|
1646
|
-
|
1674
|
+
**{NAPISTU_GRAPH_EDGES.DIRECTION: NAPISTU_GRAPH_EDGE_DIRECTIONS.REVERSE}
|
1647
1675
|
)
|
1648
1676
|
|
1649
1677
|
|
1650
1678
|
def _create_topology_weights(
|
1651
|
-
|
1679
|
+
napistu_graph: ig.Graph,
|
1652
1680
|
base_score: float = 2,
|
1653
1681
|
protein_multiplier: int = 1,
|
1654
1682
|
metabolite_multiplier: int = 3,
|
@@ -1665,7 +1693,7 @@ def _create_topology_weights(
|
|
1665
1693
|
schemes.
|
1666
1694
|
|
1667
1695
|
Args:
|
1668
|
-
|
1696
|
+
napistu_graph (ig.Graph): a graph containing connections between molecules, proteins, and reactions.
|
1669
1697
|
base_score (float): offset which will be added to all weights.
|
1670
1698
|
protein_multiplier (int): multiplier for non-metabolite species (lower weight paths will tend to be selected).
|
1671
1699
|
metabolite_multiplier (int): multiplier for metabolites [defined a species with a ChEBI ID).
|
@@ -1675,20 +1703,22 @@ def _create_topology_weights(
|
|
1675
1703
|
size and sparsity.
|
1676
1704
|
|
1677
1705
|
Returns:
|
1678
|
-
|
1706
|
+
napistu_graph (ig.Graph): graph with added topology weights
|
1679
1707
|
|
1680
1708
|
"""
|
1681
1709
|
|
1682
1710
|
# check for required attribute before proceeding
|
1683
1711
|
|
1684
1712
|
required_attrs = {
|
1685
|
-
|
1686
|
-
|
1687
|
-
|
1688
|
-
|
1713
|
+
NAPISTU_GRAPH_EDGES.SC_DEGREE,
|
1714
|
+
NAPISTU_GRAPH_EDGES.SC_CHILDREN,
|
1715
|
+
NAPISTU_GRAPH_EDGES.SC_PARENTS,
|
1716
|
+
NAPISTU_GRAPH_EDGES.SPECIES_TYPE,
|
1689
1717
|
}
|
1690
1718
|
|
1691
|
-
missing_required_attrs = required_attrs.difference(
|
1719
|
+
missing_required_attrs = required_attrs.difference(
|
1720
|
+
set(napistu_graph.es.attributes())
|
1721
|
+
)
|
1692
1722
|
if len(missing_required_attrs) != 0:
|
1693
1723
|
raise ValueError(
|
1694
1724
|
f"model is missing {len(missing_required_attrs)} required attributes: {', '.join(missing_required_attrs)}"
|
@@ -1713,10 +1743,18 @@ def _create_topology_weights(
|
|
1713
1743
|
|
1714
1744
|
weight_table = pd.DataFrame(
|
1715
1745
|
{
|
1716
|
-
|
1717
|
-
|
1718
|
-
|
1719
|
-
|
1746
|
+
NAPISTU_GRAPH_EDGES.SC_DEGREE: napistu_graph.es[
|
1747
|
+
NAPISTU_GRAPH_EDGES.SC_DEGREE
|
1748
|
+
],
|
1749
|
+
NAPISTU_GRAPH_EDGES.SC_CHILDREN: napistu_graph.es[
|
1750
|
+
NAPISTU_GRAPH_EDGES.SC_CHILDREN
|
1751
|
+
],
|
1752
|
+
NAPISTU_GRAPH_EDGES.SC_PARENTS: napistu_graph.es[
|
1753
|
+
NAPISTU_GRAPH_EDGES.SC_PARENTS
|
1754
|
+
],
|
1755
|
+
NAPISTU_GRAPH_EDGES.SPECIES_TYPE: napistu_graph.es[
|
1756
|
+
NAPISTU_GRAPH_EDGES.SPECIES_TYPE
|
1757
|
+
],
|
1720
1758
|
}
|
1721
1759
|
)
|
1722
1760
|
|
@@ -1735,18 +1773,20 @@ def _create_topology_weights(
|
|
1735
1773
|
# for interpretability and filtering, we can rescale topology weights by the
|
1736
1774
|
# average degree of nodes
|
1737
1775
|
if scale_multiplier_by_meandegree:
|
1738
|
-
mean_degree = len(
|
1739
|
-
if not
|
1776
|
+
mean_degree = len(napistu_graph.es) / len(napistu_graph.vs)
|
1777
|
+
if not napistu_graph.is_directed():
|
1740
1778
|
# for a directed network in- and out-degree are separately treated while
|
1741
1779
|
# an undirected network's degree will be the sum of these two measures.
|
1742
1780
|
mean_degree = mean_degree * 2
|
1743
1781
|
|
1744
1782
|
weight_table["multiplier"] = weight_table["multiplier"] / mean_degree
|
1745
1783
|
|
1746
|
-
if
|
1747
|
-
weight_table["connection_weight"] = weight_table[
|
1784
|
+
if napistu_graph.is_directed():
|
1785
|
+
weight_table["connection_weight"] = weight_table[
|
1786
|
+
NAPISTU_GRAPH_EDGES.SC_CHILDREN
|
1787
|
+
]
|
1748
1788
|
else:
|
1749
|
-
weight_table["connection_weight"] = weight_table[
|
1789
|
+
weight_table["connection_weight"] = weight_table[NAPISTU_GRAPH_EDGES.SC_DEGREE]
|
1750
1790
|
|
1751
1791
|
# weight traveling through a species based on
|
1752
1792
|
# - a constant
|
@@ -1756,17 +1796,19 @@ def _create_topology_weights(
|
|
1756
1796
|
base_score + (x * y)
|
1757
1797
|
for x, y in zip(weight_table["multiplier"], weight_table["connection_weight"])
|
1758
1798
|
]
|
1759
|
-
|
1799
|
+
napistu_graph.es["topo_weights"] = weight_table["topo_weights"]
|
1760
1800
|
|
1761
1801
|
# if directed and we want to use travel upstream define a corresponding weighting scheme
|
1762
|
-
if
|
1802
|
+
if napistu_graph.is_directed():
|
1763
1803
|
weight_table["upstream_topo_weights"] = [
|
1764
1804
|
base_score + (x * y)
|
1765
1805
|
for x, y in zip(weight_table["multiplier"], weight_table["sc_parents"])
|
1766
1806
|
]
|
1767
|
-
|
1807
|
+
napistu_graph.es["upstream_topo_weights"] = weight_table[
|
1808
|
+
"upstream_topo_weights"
|
1809
|
+
]
|
1768
1810
|
|
1769
|
-
return
|
1811
|
+
return napistu_graph
|
1770
1812
|
|
1771
1813
|
|
1772
1814
|
def _validate_entity_attrs(
|
@@ -1774,23 +1816,45 @@ def _validate_entity_attrs(
|
|
1774
1816
|
validate_transformations: bool = True,
|
1775
1817
|
custom_transformations: Optional[dict] = None,
|
1776
1818
|
) -> None:
|
1777
|
-
"""Validate that graph attributes are a valid format.
|
1819
|
+
"""Validate that graph attributes are a valid format.
|
1820
|
+
|
1821
|
+
Parameters
|
1822
|
+
----------
|
1823
|
+
entity_attrs : dict
|
1824
|
+
Dictionary of entity attributes to validate
|
1825
|
+
validate_transformations : bool, optional
|
1826
|
+
Whether to validate transformation names, by default True
|
1827
|
+
custom_transformations : Optional[dict], optional
|
1828
|
+
Dictionary of custom transformation functions, by default None
|
1829
|
+
Keys are transformation names, values are transformation functions
|
1830
|
+
|
1831
|
+
Returns
|
1832
|
+
-------
|
1833
|
+
None
|
1834
|
+
|
1835
|
+
Raises
|
1836
|
+
------
|
1837
|
+
AssertionError
|
1838
|
+
If entity_attrs is not a dictionary
|
1839
|
+
ValueError
|
1840
|
+
If a transformation is not found in DEFINED_WEIGHT_TRANSFORMATION or custom_transformations
|
1841
|
+
"""
|
1842
|
+
assert isinstance(entity_attrs, dict), "entity_attrs must be a dictionary"
|
1778
1843
|
|
1779
|
-
|
1780
|
-
for v in entity_attrs.values():
|
1844
|
+
for k, v in entity_attrs.items():
|
1781
1845
|
# check structure against pydantic config
|
1782
|
-
|
1846
|
+
validated_attrs = _EntityAttrValidator(**v).model_dump()
|
1783
1847
|
|
1784
1848
|
if validate_transformations:
|
1785
|
-
trans_name =
|
1849
|
+
trans_name = validated_attrs.get("trans", DEFAULT_WT_TRANS)
|
1786
1850
|
valid_trans = set(DEFINED_WEIGHT_TRANSFORMATION.keys())
|
1787
1851
|
if custom_transformations:
|
1788
1852
|
valid_trans = valid_trans.union(set(custom_transformations.keys()))
|
1789
1853
|
if trans_name not in valid_trans:
|
1790
1854
|
raise ValueError(
|
1791
|
-
f"transformation {trans_name} was not defined as an alias in "
|
1855
|
+
f"transformation '{trans_name}' was not defined as an alias in "
|
1792
1856
|
"DEFINED_WEIGHT_TRANSFORMATION or custom_transformations. The defined transformations "
|
1793
|
-
f"are {', '.join(valid_trans)}"
|
1857
|
+
f"are {', '.join(sorted(valid_trans))}"
|
1794
1858
|
)
|
1795
1859
|
|
1796
1860
|
return None
|