napistu 0.3.7__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +8 -4
- napistu/constants.py +30 -35
- napistu/gcs/constants.py +11 -11
- napistu/ingestion/napistu_edgelist.py +4 -4
- napistu/matching/interactions.py +41 -39
- napistu/modify/gaps.py +2 -1
- napistu/network/constants.py +61 -45
- napistu/network/data_handling.py +1 -1
- napistu/network/neighborhoods.py +3 -3
- napistu/network/net_create.py +440 -616
- napistu/network/net_create_utils.py +734 -0
- napistu/network/net_propagation.py +1 -1
- napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
- napistu/network/ng_utils.py +28 -21
- napistu/network/paths.py +4 -4
- napistu/network/precompute.py +35 -74
- napistu/ontologies/id_tables.py +282 -0
- napistu/sbml_dfs_core.py +53 -63
- napistu/sbml_dfs_utils.py +126 -16
- napistu/utils.py +80 -5
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/METADATA +7 -2
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/RECORD +39 -34
- tests/conftest.py +102 -1
- tests/test_network_data_handling.py +5 -2
- tests/test_network_net_create.py +92 -201
- tests/test_network_net_create_utils.py +538 -0
- tests/test_network_ng_core.py +19 -0
- tests/test_network_ng_utils.py +1 -1
- tests/test_network_precompute.py +4 -3
- tests/test_ontologies_id_tables.py +198 -0
- tests/test_rpy2_callr.py +0 -1
- tests/test_rpy2_init.py +0 -1
- tests/test_sbml_dfs_core.py +30 -19
- tests/test_sbml_dfs_utils.py +115 -0
- tests/test_utils.py +26 -2
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/WHEEL +0 -0
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/top_level.txt +0 -0
napistu/network/net_create.py
CHANGED
@@ -13,29 +13,33 @@ from pydantic import BaseModel
|
|
13
13
|
|
14
14
|
from napistu import sbml_dfs_core
|
15
15
|
from napistu import utils
|
16
|
-
from napistu.network
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
from napistu.constants import
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
from napistu.network.constants import
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
16
|
+
from napistu.network import net_create_utils
|
17
|
+
from napistu.network.ng_core import NapistuGraph
|
18
|
+
|
19
|
+
|
20
|
+
from napistu.constants import (
|
21
|
+
MINI_SBO_FROM_NAME,
|
22
|
+
SBO_MODIFIER_NAMES,
|
23
|
+
SBOTERM_NAMES,
|
24
|
+
SBML_DFS,
|
25
|
+
ENTITIES_W_DATA,
|
26
|
+
)
|
27
|
+
|
28
|
+
from napistu.network.constants import (
|
29
|
+
NAPISTU_GRAPH_NODES,
|
30
|
+
NAPISTU_GRAPH_EDGES,
|
31
|
+
NAPISTU_GRAPH_EDGE_DIRECTIONS,
|
32
|
+
NAPISTU_GRAPH_NODE_TYPES,
|
33
|
+
GRAPH_WIRING_APPROACHES,
|
34
|
+
NAPISTU_WEIGHTING_STRATEGIES,
|
35
|
+
VALID_GRAPH_WIRING_APPROACHES,
|
36
|
+
VALID_WEIGHTING_STRATEGIES,
|
37
|
+
DEFAULT_WT_TRANS,
|
38
|
+
DEFINED_WEIGHT_TRANSFORMATION,
|
39
|
+
SCORE_CALIBRATION_POINTS_DICT,
|
40
|
+
SOURCE_VARS_DICT,
|
41
|
+
DROP_REACTIONS_WHEN,
|
42
|
+
)
|
39
43
|
|
40
44
|
|
41
45
|
logger = logging.getLogger(__name__)
|
@@ -46,12 +50,13 @@ def create_napistu_graph(
|
|
46
50
|
reaction_graph_attrs: Optional[dict] = None,
|
47
51
|
directed: bool = True,
|
48
52
|
edge_reversed: bool = False,
|
49
|
-
|
53
|
+
wiring_approach: str = GRAPH_WIRING_APPROACHES.REGULATORY,
|
54
|
+
drop_reactions_when: str = DROP_REACTIONS_WHEN.SAME_TIER,
|
50
55
|
verbose: bool = False,
|
51
56
|
custom_transformations: Optional[dict] = None,
|
52
57
|
) -> NapistuGraph:
|
53
58
|
"""
|
54
|
-
Create a NapistuGraph network from a mechanistic network using one of a set of
|
59
|
+
Create a NapistuGraph network from a mechanistic network using one of a set of wiring approaches.
|
55
60
|
|
56
61
|
Parameters
|
57
62
|
----------
|
@@ -60,14 +65,20 @@ def create_napistu_graph(
|
|
60
65
|
reaction_graph_attrs : dict, optional
|
61
66
|
Dictionary containing attributes to pull out of reaction_data and a weighting scheme for the graph.
|
62
67
|
directed : bool, optional
|
63
|
-
|
68
|
+
Whether to create a directed (True) or undirected (False) graph. Default is True.
|
64
69
|
edge_reversed : bool, optional
|
65
|
-
|
66
|
-
|
70
|
+
Whether to reverse the directions of edges. Default is False.
|
71
|
+
wiring_approach : str, optional
|
67
72
|
Type of graph to create. Valid values are:
|
68
73
|
- 'bipartite': substrates and modifiers point to the reaction they drive, this reaction points to products
|
69
74
|
- 'regulatory': non-enzymatic modifiers point to enzymes, enzymes point to substrates and products
|
70
75
|
- 'surrogate': non-enzymatic modifiers -> substrates -> enzymes -> reaction -> products
|
76
|
+
- 'bipartite_og': old method for generating a true bipartite graph. Retained primarily for regression testing.
|
77
|
+
drop_reactions_when : str, optional
|
78
|
+
The condition under which to drop reactions as a network vertex. Valid values are:
|
79
|
+
- 'same_tier': drop reactions when all participants are on the same tier of a wiring hierarchy
|
80
|
+
- 'edgelist': drop reactions when the reaction species are only 2 (1 reactant + 1 product)
|
81
|
+
- 'always': drop reactions regardless of tiers
|
71
82
|
verbose : bool, optional
|
72
83
|
Extra reporting. Default is False.
|
73
84
|
custom_transformations : dict, optional
|
@@ -77,14 +88,19 @@ def create_napistu_graph(
|
|
77
88
|
-------
|
78
89
|
NapistuGraph
|
79
90
|
A NapistuGraph network (subclass of igraph.Graph).
|
91
|
+
|
92
|
+
Raises
|
93
|
+
------
|
94
|
+
ValueError
|
95
|
+
If wiring_approach is not valid or if required attributes are missing.
|
80
96
|
"""
|
81
97
|
|
82
98
|
if reaction_graph_attrs is None:
|
83
99
|
reaction_graph_attrs = {}
|
84
100
|
|
85
|
-
if
|
101
|
+
if wiring_approach not in VALID_GRAPH_WIRING_APPROACHES + ["bipartite_og"]:
|
86
102
|
raise ValueError(
|
87
|
-
f"
|
103
|
+
f"wiring_approach is not a valid value ({wiring_approach}), valid values are {','.join(VALID_GRAPH_WIRING_APPROACHES)}"
|
88
104
|
)
|
89
105
|
|
90
106
|
# fail fast if reaction_graph_attrs is not properly formatted
|
@@ -139,15 +155,17 @@ def create_napistu_graph(
|
|
139
155
|
columns={"node_id": NAPISTU_GRAPH_NODES.NAME}
|
140
156
|
)
|
141
157
|
|
142
|
-
logger.info(f"Formatting edges as a {
|
158
|
+
logger.info(f"Formatting edges as a {wiring_approach} graph")
|
143
159
|
|
144
|
-
if
|
160
|
+
if wiring_approach == "bipartite_og":
|
145
161
|
network_edges = _create_napistu_graph_bipartite(working_sbml_dfs)
|
146
|
-
elif
|
147
|
-
# pass
|
148
|
-
network_edges =
|
162
|
+
elif wiring_approach in VALID_GRAPH_WIRING_APPROACHES:
|
163
|
+
# pass wiring_approach so that an appropriate tiered schema can be used.
|
164
|
+
network_edges = create_napistu_graph_wiring(
|
165
|
+
working_sbml_dfs, wiring_approach, drop_reactions_when
|
166
|
+
)
|
149
167
|
else:
|
150
|
-
raise NotImplementedError("Invalid
|
168
|
+
raise NotImplementedError("Invalid wiring_approach")
|
151
169
|
|
152
170
|
logger.info("Adding reversibility and other meta-data from reactions_data")
|
153
171
|
augmented_network_edges = _augment_network_edges(
|
@@ -220,11 +238,16 @@ def create_napistu_graph(
|
|
220
238
|
edge_foreign_keys=(NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO),
|
221
239
|
)
|
222
240
|
|
241
|
+
# delete singleton nodes (most of these will be reaction nodes associated with pairwise interactions)
|
242
|
+
|
223
243
|
# Always return NapistuGraph
|
224
244
|
napistu_graph = NapistuGraph.from_igraph(
|
225
|
-
napistu_ig_graph,
|
245
|
+
napistu_ig_graph, wiring_approach=wiring_approach, is_reversed=edge_reversed
|
226
246
|
)
|
227
247
|
|
248
|
+
# remove singleton nodes (mostly reactions that are not part of any interaction)
|
249
|
+
napistu_graph.remove_isolated_vertices()
|
250
|
+
|
228
251
|
if edge_reversed:
|
229
252
|
logger.info("Applying edge reversal using reversal utilities")
|
230
253
|
napistu_graph.reverse_edges()
|
@@ -237,15 +260,15 @@ def process_napistu_graph(
|
|
237
260
|
reaction_graph_attrs: Optional[dict] = None,
|
238
261
|
directed: bool = True,
|
239
262
|
edge_reversed: bool = False,
|
240
|
-
|
263
|
+
wiring_approach: str = GRAPH_WIRING_APPROACHES.BIPARTITE,
|
241
264
|
weighting_strategy: str = NAPISTU_WEIGHTING_STRATEGIES.UNWEIGHTED,
|
242
265
|
verbose: bool = False,
|
243
266
|
custom_transformations: dict = None,
|
244
267
|
) -> NapistuGraph:
|
245
268
|
"""
|
246
|
-
Process Consensus Graph
|
269
|
+
Process Consensus Graph.
|
247
270
|
|
248
|
-
|
271
|
+
Sets up a NapistuGraph network and then adds weights and other malleable attributes.
|
249
272
|
|
250
273
|
Parameters
|
251
274
|
----------
|
@@ -254,16 +277,13 @@ def process_napistu_graph(
|
|
254
277
|
reaction_graph_attrs : dict, optional
|
255
278
|
Dictionary containing attributes to pull out of reaction_data and a weighting scheme for the graph.
|
256
279
|
directed : bool, optional
|
257
|
-
|
280
|
+
Whether to create a directed (True) or undirected (False) graph. Default is True.
|
258
281
|
edge_reversed : bool, optional
|
259
|
-
|
260
|
-
|
261
|
-
Type of graph to create.
|
262
|
-
- 'bipartite': substrates and modifiers point to the reaction they drive, this reaction points to products
|
263
|
-
- 'regulatory': non-enzymatic modifiers point to enzymes, enzymes point to substrates and products
|
264
|
-
- 'surrogate': non-enzymatic modifiers -> substrates -> enzymes -> reaction -> products
|
282
|
+
Whether to reverse the directions of edges. Default is False.
|
283
|
+
wiring_approach : str, optional
|
284
|
+
Type of graph to create. See `create_napistu_graph` for valid values.
|
265
285
|
weighting_strategy : str, optional
|
266
|
-
A network weighting strategy
|
286
|
+
A network weighting strategy. Options:
|
267
287
|
- 'unweighted': all weights (and upstream_weights for directed graphs) are set to 1.
|
268
288
|
- 'topology': weight edges by the degree of the source nodes favoring nodes with few connections.
|
269
289
|
- 'mixed': transform edges with a quantitative score based on reaction_attrs; and set edges without quantitative score as a source-specific weight.
|
@@ -288,7 +308,7 @@ def process_napistu_graph(
|
|
288
308
|
reaction_graph_attrs,
|
289
309
|
directed=directed,
|
290
310
|
edge_reversed=edge_reversed,
|
291
|
-
|
311
|
+
wiring_approach=wiring_approach,
|
292
312
|
verbose=verbose,
|
293
313
|
custom_transformations=custom_transformations,
|
294
314
|
)
|
@@ -309,6 +329,151 @@ def process_napistu_graph(
|
|
309
329
|
return weighted_napistu_graph
|
310
330
|
|
311
331
|
|
332
|
+
def create_napistu_graph_wiring(
|
333
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
334
|
+
wiring_approach: str,
|
335
|
+
drop_reactions_when: str = DROP_REACTIONS_WHEN.SAME_TIER,
|
336
|
+
) -> pd.DataFrame:
|
337
|
+
"""
|
338
|
+
Turn an sbml_dfs model into a tiered graph which links upstream entities to downstream ones.
|
339
|
+
|
340
|
+
Parameters
|
341
|
+
----------
|
342
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
343
|
+
The SBML_dfs object containing the model data.
|
344
|
+
wiring_approach : str
|
345
|
+
The wiring approach to use for the graph.
|
346
|
+
drop_reactions_when : str, optional
|
347
|
+
The condition under which to drop reactions as a network vertex. Default is 'same_tier'.
|
348
|
+
|
349
|
+
Returns
|
350
|
+
-------
|
351
|
+
pd.DataFrame
|
352
|
+
DataFrame representing the tiered network edges.
|
353
|
+
|
354
|
+
Raises
|
355
|
+
------
|
356
|
+
ValueError
|
357
|
+
If invalid SBO terms are present or required attributes are missing.
|
358
|
+
"""
|
359
|
+
|
360
|
+
# organize reaction species for defining connections
|
361
|
+
logger.info(
|
362
|
+
f"Turning {sbml_dfs.reaction_species.shape[0]} reactions species into edges."
|
363
|
+
)
|
364
|
+
|
365
|
+
all_reaction_edges_df = net_create_utils.wire_reaction_species(
|
366
|
+
sbml_dfs.reaction_species, wiring_approach, drop_reactions_when
|
367
|
+
)
|
368
|
+
|
369
|
+
logger.info(
|
370
|
+
"Adding additional attributes to edges, e.g., # of children and parents."
|
371
|
+
)
|
372
|
+
|
373
|
+
# add compartmentalized species summaries to weight edges
|
374
|
+
cspecies_features = sbml_dfs.get_cspecies_features()
|
375
|
+
|
376
|
+
# calculate undirected and directed degrees (i.e., # of parents and children)
|
377
|
+
# based on a network's edgelist. this used when the network representation is
|
378
|
+
# not the bipartite network which can be trivially obtained from the pathway
|
379
|
+
# specification
|
380
|
+
unique_edges = (
|
381
|
+
all_reaction_edges_df.groupby(
|
382
|
+
[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO]
|
383
|
+
)
|
384
|
+
.first()
|
385
|
+
.reset_index()
|
386
|
+
)
|
387
|
+
|
388
|
+
# children
|
389
|
+
n_children = (
|
390
|
+
unique_edges[NAPISTU_GRAPH_EDGES.FROM]
|
391
|
+
.value_counts()
|
392
|
+
# rename values to the child name
|
393
|
+
.to_frame(name=NAPISTU_GRAPH_EDGES.SC_CHILDREN)
|
394
|
+
.reset_index()
|
395
|
+
.rename(
|
396
|
+
{
|
397
|
+
NAPISTU_GRAPH_EDGES.FROM: SBML_DFS.SC_ID,
|
398
|
+
},
|
399
|
+
axis=1,
|
400
|
+
)
|
401
|
+
)
|
402
|
+
|
403
|
+
# parents
|
404
|
+
n_parents = (
|
405
|
+
unique_edges[NAPISTU_GRAPH_EDGES.TO]
|
406
|
+
.value_counts()
|
407
|
+
# rename values to the parent name
|
408
|
+
.to_frame(name=NAPISTU_GRAPH_EDGES.SC_PARENTS)
|
409
|
+
.reset_index()
|
410
|
+
.rename(
|
411
|
+
{
|
412
|
+
NAPISTU_GRAPH_EDGES.TO: SBML_DFS.SC_ID,
|
413
|
+
},
|
414
|
+
axis=1,
|
415
|
+
)
|
416
|
+
)
|
417
|
+
|
418
|
+
graph_degree_by_edgelist = n_children.merge(n_parents, how="outer").fillna(int(0))
|
419
|
+
|
420
|
+
graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_DEGREE] = (
|
421
|
+
graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_CHILDREN]
|
422
|
+
+ graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_PARENTS]
|
423
|
+
)
|
424
|
+
graph_degree_by_edgelist = (
|
425
|
+
graph_degree_by_edgelist[
|
426
|
+
~graph_degree_by_edgelist[SBML_DFS.SC_ID].str.contains("R[0-9]{8}")
|
427
|
+
]
|
428
|
+
.set_index(SBML_DFS.SC_ID)
|
429
|
+
.sort_index()
|
430
|
+
)
|
431
|
+
|
432
|
+
cspecies_features = (
|
433
|
+
cspecies_features.drop(
|
434
|
+
[
|
435
|
+
NAPISTU_GRAPH_EDGES.SC_DEGREE,
|
436
|
+
NAPISTU_GRAPH_EDGES.SC_CHILDREN,
|
437
|
+
NAPISTU_GRAPH_EDGES.SC_PARENTS,
|
438
|
+
],
|
439
|
+
axis=1,
|
440
|
+
)
|
441
|
+
.join(graph_degree_by_edgelist)
|
442
|
+
.fillna(int(0))
|
443
|
+
)
|
444
|
+
|
445
|
+
is_from_reaction = all_reaction_edges_df[NAPISTU_GRAPH_EDGES.FROM].isin(
|
446
|
+
sbml_dfs.reactions.index.tolist()
|
447
|
+
)
|
448
|
+
is_from_reaction = all_reaction_edges_df[NAPISTU_GRAPH_EDGES.FROM].isin(
|
449
|
+
sbml_dfs.reactions.index
|
450
|
+
)
|
451
|
+
# add substrate weight whenever "from" edge is a molecule
|
452
|
+
# and product weight when the "from" edge is a reaction
|
453
|
+
decorated_all_reaction_edges_df = pd.concat(
|
454
|
+
[
|
455
|
+
all_reaction_edges_df[~is_from_reaction].merge(
|
456
|
+
cspecies_features, left_on=NAPISTU_GRAPH_EDGES.FROM, right_index=True
|
457
|
+
),
|
458
|
+
all_reaction_edges_df[is_from_reaction].merge(
|
459
|
+
cspecies_features, left_on=NAPISTU_GRAPH_EDGES.TO, right_index=True
|
460
|
+
),
|
461
|
+
]
|
462
|
+
).sort_index()
|
463
|
+
|
464
|
+
if all_reaction_edges_df.shape[0] != decorated_all_reaction_edges_df.shape[0]:
|
465
|
+
msg = (
|
466
|
+
"'decorated_all_reaction_edges_df' and 'all_reaction_edges_df' should\n"
|
467
|
+
"have the same number of rows but they did not"
|
468
|
+
)
|
469
|
+
|
470
|
+
raise ValueError(msg)
|
471
|
+
|
472
|
+
logger.info(f"Done preparing {wiring_approach} graph")
|
473
|
+
|
474
|
+
return decorated_all_reaction_edges_df
|
475
|
+
|
476
|
+
|
312
477
|
def pluck_entity_data(
|
313
478
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
314
479
|
graph_attrs: dict[str, dict],
|
@@ -316,31 +481,35 @@ def pluck_entity_data(
|
|
316
481
|
custom_transformations: Optional[dict[str, callable]] = None,
|
317
482
|
) -> pd.DataFrame | None:
|
318
483
|
"""
|
319
|
-
Pluck Entity Attributes
|
484
|
+
Pluck Entity Attributes from an sbml_dfs based on a set of tables and variables to look for.
|
320
485
|
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
A mechanistic model
|
327
|
-
graph_attrs: dict
|
486
|
+
Parameters
|
487
|
+
----------
|
488
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
489
|
+
A mechanistic model.
|
490
|
+
graph_attrs : dict
|
328
491
|
A dictionary of species/reaction attributes to pull out. If the requested
|
329
492
|
data_type ("species" or "reactions") is not present as a key, or if the value
|
330
493
|
is an empty dict, this function will return None (no error).
|
331
|
-
data_type: str
|
332
|
-
"species" or "reactions" to pull out species_data or reactions_data
|
333
|
-
custom_transformations: dict[str, callable], optional
|
494
|
+
data_type : str
|
495
|
+
"species" or "reactions" to pull out species_data or reactions_data.
|
496
|
+
custom_transformations : dict[str, callable], optional
|
334
497
|
A dictionary mapping transformation names to functions. If provided, these
|
335
498
|
will be checked before built-in transformations. Example:
|
336
499
|
custom_transformations = {"square": lambda x: x**2}
|
337
500
|
|
338
|
-
Returns
|
501
|
+
Returns
|
502
|
+
-------
|
503
|
+
pd.DataFrame or None
|
339
504
|
A table where all extracted attributes are merged based on a common index or None
|
340
505
|
if no attributes were extracted. If the requested data_type is not present in
|
341
506
|
graph_attrs, or if the attribute dict is empty, returns None. This is intended
|
342
507
|
to allow optional annotation blocks.
|
343
508
|
|
509
|
+
Raises
|
510
|
+
------
|
511
|
+
ValueError
|
512
|
+
If data_type is not valid or if requested tables/variables are missing.
|
344
513
|
"""
|
345
514
|
|
346
515
|
if data_type not in ENTITIES_W_DATA:
|
@@ -408,22 +577,29 @@ def apply_weight_transformations(
|
|
408
577
|
edges_df: pd.DataFrame, reaction_attrs: dict, custom_transformations: dict = None
|
409
578
|
):
|
410
579
|
"""
|
411
|
-
Apply Weight Transformations
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
580
|
+
Apply Weight Transformations to edge attributes.
|
581
|
+
|
582
|
+
Parameters
|
583
|
+
----------
|
584
|
+
edges_df : pd.DataFrame
|
585
|
+
A table of edges and their attributes extracted from a cpr_graph.
|
586
|
+
reaction_attrs : dict
|
587
|
+
A dictionary of attributes identifying weighting attributes within
|
588
|
+
an sbml_df's reaction_data, how they will be named in edges_df (the keys),
|
589
|
+
and how they should be transformed (the "trans" aliases).
|
590
|
+
custom_transformations : dict, optional
|
591
|
+
A dictionary mapping transformation names to functions. If provided, these
|
592
|
+
will be checked before built-in transformations.
|
593
|
+
|
594
|
+
Returns
|
595
|
+
-------
|
596
|
+
pd.DataFrame
|
597
|
+
edges_df with weight variables transformed.
|
426
598
|
|
599
|
+
Raises
|
600
|
+
------
|
601
|
+
ValueError
|
602
|
+
If a weighting variable is missing or transformation is not found.
|
427
603
|
"""
|
428
604
|
|
429
605
|
_validate_entity_attrs(
|
@@ -456,20 +632,18 @@ def summarize_weight_calibration(
|
|
456
632
|
napistu_graph: NapistuGraph, reaction_attrs: dict
|
457
633
|
) -> None:
|
458
634
|
"""
|
459
|
-
Summarize Weight Calibration
|
635
|
+
Summarize Weight Calibration for a network with multiple sources for edge weights.
|
460
636
|
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
reaction_attrs (dict): a dictionary summarizing the types of weights that
|
468
|
-
exist and how they are transformed for calibration.
|
469
|
-
|
470
|
-
Returns:
|
471
|
-
None
|
637
|
+
Parameters
|
638
|
+
----------
|
639
|
+
napistu_graph : NapistuGraph
|
640
|
+
A graph where edge weights have already been calibrated.
|
641
|
+
reaction_attrs : dict
|
642
|
+
A dictionary summarizing the types of weights that exist and how they are transformed for calibration.
|
472
643
|
|
644
|
+
Returns
|
645
|
+
-------
|
646
|
+
None
|
473
647
|
"""
|
474
648
|
|
475
649
|
score_calibration_df = pd.DataFrame(SCORE_CALIBRATION_POINTS_DICT)
|
@@ -496,10 +670,7 @@ def add_graph_weights(
|
|
496
670
|
weighting_strategy: str = NAPISTU_WEIGHTING_STRATEGIES.UNWEIGHTED,
|
497
671
|
) -> NapistuGraph:
|
498
672
|
"""
|
499
|
-
Add Graph Weights
|
500
|
-
|
501
|
-
Apply a weighting strategy to generate edge weights on a NapistuGraph. For directed graphs, "upstream_weights" will
|
502
|
-
be generated as well, which should be used when searching for a node's ancestors.
|
673
|
+
Add Graph Weights to a NapistuGraph using a specified weighting strategy.
|
503
674
|
|
504
675
|
Parameters
|
505
676
|
----------
|
@@ -508,7 +679,7 @@ def add_graph_weights(
|
|
508
679
|
reaction_attrs : dict
|
509
680
|
An optional dict of reaction attributes.
|
510
681
|
weighting_strategy : str, optional
|
511
|
-
A network weighting strategy
|
682
|
+
A network weighting strategy. Options:
|
512
683
|
- 'unweighted': all weights (and upstream_weights for directed graphs) are set to 1.
|
513
684
|
- 'topology': weight edges by the degree of the source nodes favoring nodes emerging from nodes with few connections.
|
514
685
|
- 'mixed': transform edges with a quantitative score based on reaction_attrs; and set edges without quantitative score as a source-specific weight.
|
@@ -518,6 +689,11 @@ def add_graph_weights(
|
|
518
689
|
-------
|
519
690
|
NapistuGraph
|
520
691
|
The weighted NapistuGraph.
|
692
|
+
|
693
|
+
Raises
|
694
|
+
------
|
695
|
+
ValueError
|
696
|
+
If weighting_strategy is not valid.
|
521
697
|
"""
|
522
698
|
|
523
699
|
napistu_graph_updated = copy.deepcopy(napistu_graph)
|
@@ -561,7 +737,19 @@ def add_graph_weights(
|
|
561
737
|
|
562
738
|
|
563
739
|
def _create_napistu_graph_bipartite(sbml_dfs: sbml_dfs_core.SBML_dfs) -> pd.DataFrame:
|
564
|
-
"""
|
740
|
+
"""
|
741
|
+
Turn an sbml_dfs model into a bipartite graph linking molecules to reactions.
|
742
|
+
|
743
|
+
Parameters
|
744
|
+
----------
|
745
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
746
|
+
The SBML_dfs object containing the model data.
|
747
|
+
|
748
|
+
Returns
|
749
|
+
-------
|
750
|
+
pd.DataFrame
|
751
|
+
DataFrame representing the bipartite network edges.
|
752
|
+
"""
|
565
753
|
|
566
754
|
# setup edges
|
567
755
|
network_edges = (
|
@@ -611,362 +799,6 @@ def _create_napistu_graph_bipartite(sbml_dfs: sbml_dfs_core.SBML_dfs) -> pd.Data
|
|
611
799
|
return network_edges
|
612
800
|
|
613
801
|
|
614
|
-
def _create_napistu_graph_tiered(
|
615
|
-
sbml_dfs: sbml_dfs_core.SBML_dfs, graph_type: str
|
616
|
-
) -> pd.DataFrame:
|
617
|
-
"""Turn an sbml_dfs model into a tiered graph which links upstream entities to downstream ones."""
|
618
|
-
|
619
|
-
# check whether all expect SBO terms are present
|
620
|
-
invalid_sbo_terms = sbml_dfs.reaction_species[
|
621
|
-
~sbml_dfs.reaction_species[SBML_DFS.SBO_TERM].isin(MINI_SBO_TO_NAME.keys())
|
622
|
-
]
|
623
|
-
|
624
|
-
if invalid_sbo_terms.shape[0] != 0:
|
625
|
-
invalid_counts = invalid_sbo_terms.value_counts(SBML_DFS.SBO_TERM).to_frame("N")
|
626
|
-
if not isinstance(invalid_counts, pd.DataFrame):
|
627
|
-
raise TypeError("invalid_counts must be a pandas DataFrame")
|
628
|
-
logger.warning(utils.style_df(invalid_counts, headers="keys")) # type: ignore
|
629
|
-
raise ValueError("Some reaction species have unusable SBO terms")
|
630
|
-
|
631
|
-
# load and validate the schema of graph_type
|
632
|
-
graph_hierarchy_df = _create_graph_hierarchy_df(graph_type)
|
633
|
-
|
634
|
-
# organize reaction species for defining connections
|
635
|
-
sorted_reaction_species = sbml_dfs.reaction_species.set_index(
|
636
|
-
[SBML_DFS.R_ID, SBML_DFS.SBO_TERM]
|
637
|
-
).sort_index()
|
638
|
-
|
639
|
-
logger.info(
|
640
|
-
f"Formatting {sorted_reaction_species.shape[0]} reactions species as "
|
641
|
-
"tiered edges."
|
642
|
-
)
|
643
|
-
|
644
|
-
# infer tiered edges in each reaction
|
645
|
-
all_reaction_edges = [
|
646
|
-
_format_tiered_reaction_species(
|
647
|
-
r, sorted_reaction_species, sbml_dfs, graph_hierarchy_df
|
648
|
-
)
|
649
|
-
for r in sorted_reaction_species.index.get_level_values(SBML_DFS.R_ID).unique()
|
650
|
-
]
|
651
|
-
all_reaction_edges_df = pd.concat(all_reaction_edges).reset_index(drop=True)
|
652
|
-
|
653
|
-
# test for reactions missing substrates
|
654
|
-
r_id_list = sorted_reaction_species.index.get_level_values(0).unique()
|
655
|
-
r_id_reactant_only = [
|
656
|
-
x for x in r_id_list if len(sorted_reaction_species.loc[x]) == 1
|
657
|
-
]
|
658
|
-
|
659
|
-
if len(r_id_reactant_only) > 0:
|
660
|
-
logger.warning(f"{len(r_id_reactant_only)} reactions are missing substrates")
|
661
|
-
all_reaction_edges_df_pre = all_reaction_edges_df.copy()
|
662
|
-
all_reaction_edges_df = all_reaction_edges_df_pre[
|
663
|
-
~all_reaction_edges_df_pre[SBML_DFS.R_ID].isin(r_id_reactant_only)
|
664
|
-
]
|
665
|
-
|
666
|
-
logger.info(
|
667
|
-
"Adding additional attributes to edges, e.g., # of children and parents."
|
668
|
-
)
|
669
|
-
|
670
|
-
# add compartmentalized species summaries to weight edges
|
671
|
-
cspecies_features = sbml_dfs.get_cspecies_features()
|
672
|
-
|
673
|
-
# calculate undirected and directed degrees (i.e., # of parents and children)
|
674
|
-
# based on a network's edgelist. this used when the network representation is
|
675
|
-
# not the bipartite network which can be trivially obtained from the pathway
|
676
|
-
# specification
|
677
|
-
unique_edges = (
|
678
|
-
all_reaction_edges_df.groupby(
|
679
|
-
[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO]
|
680
|
-
)
|
681
|
-
.first()
|
682
|
-
.reset_index()
|
683
|
-
)
|
684
|
-
|
685
|
-
# children
|
686
|
-
n_children = (
|
687
|
-
unique_edges[NAPISTU_GRAPH_EDGES.FROM]
|
688
|
-
.value_counts()
|
689
|
-
# rename values to the child name
|
690
|
-
.to_frame(name=NAPISTU_GRAPH_EDGES.SC_CHILDREN)
|
691
|
-
.reset_index()
|
692
|
-
.rename(
|
693
|
-
{
|
694
|
-
NAPISTU_GRAPH_EDGES.FROM: SBML_DFS.SC_ID,
|
695
|
-
},
|
696
|
-
axis=1,
|
697
|
-
)
|
698
|
-
)
|
699
|
-
|
700
|
-
# parents
|
701
|
-
n_parents = (
|
702
|
-
unique_edges[NAPISTU_GRAPH_EDGES.TO]
|
703
|
-
.value_counts()
|
704
|
-
# rename values to the parent name
|
705
|
-
.to_frame(name=NAPISTU_GRAPH_EDGES.SC_PARENTS)
|
706
|
-
.reset_index()
|
707
|
-
.rename(
|
708
|
-
{
|
709
|
-
NAPISTU_GRAPH_EDGES.TO: SBML_DFS.SC_ID,
|
710
|
-
},
|
711
|
-
axis=1,
|
712
|
-
)
|
713
|
-
)
|
714
|
-
|
715
|
-
graph_degree_by_edgelist = n_children.merge(n_parents, how="outer").fillna(int(0))
|
716
|
-
|
717
|
-
graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_DEGREE] = (
|
718
|
-
graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_CHILDREN]
|
719
|
-
+ graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_PARENTS]
|
720
|
-
)
|
721
|
-
graph_degree_by_edgelist = (
|
722
|
-
graph_degree_by_edgelist[
|
723
|
-
~graph_degree_by_edgelist[SBML_DFS.SC_ID].str.contains("R[0-9]{8}")
|
724
|
-
]
|
725
|
-
.set_index(SBML_DFS.SC_ID)
|
726
|
-
.sort_index()
|
727
|
-
)
|
728
|
-
|
729
|
-
cspecies_features = (
|
730
|
-
cspecies_features.drop(
|
731
|
-
[
|
732
|
-
NAPISTU_GRAPH_EDGES.SC_DEGREE,
|
733
|
-
NAPISTU_GRAPH_EDGES.SC_CHILDREN,
|
734
|
-
NAPISTU_GRAPH_EDGES.SC_PARENTS,
|
735
|
-
],
|
736
|
-
axis=1,
|
737
|
-
)
|
738
|
-
.join(graph_degree_by_edgelist)
|
739
|
-
.fillna(int(0))
|
740
|
-
)
|
741
|
-
|
742
|
-
is_from_reaction = all_reaction_edges_df[NAPISTU_GRAPH_EDGES.FROM].isin(
|
743
|
-
sbml_dfs.reactions.index.tolist()
|
744
|
-
)
|
745
|
-
is_from_reaction = all_reaction_edges_df[NAPISTU_GRAPH_EDGES.FROM].isin(
|
746
|
-
sbml_dfs.reactions.index
|
747
|
-
)
|
748
|
-
# add substrate weight whenever "from" edge is a molecule
|
749
|
-
# and product weight when the "from" edge is a reaction
|
750
|
-
decorated_all_reaction_edges_df = pd.concat(
|
751
|
-
[
|
752
|
-
all_reaction_edges_df[~is_from_reaction].merge(
|
753
|
-
cspecies_features, left_on=NAPISTU_GRAPH_EDGES.FROM, right_index=True
|
754
|
-
),
|
755
|
-
all_reaction_edges_df[is_from_reaction].merge(
|
756
|
-
cspecies_features, left_on=NAPISTU_GRAPH_EDGES.TO, right_index=True
|
757
|
-
),
|
758
|
-
]
|
759
|
-
).sort_index()
|
760
|
-
|
761
|
-
if all_reaction_edges_df.shape[0] != decorated_all_reaction_edges_df.shape[0]:
|
762
|
-
msg = (
|
763
|
-
"'decorated_all_reaction_edges_df' and 'all_reaction_edges_df' should\n"
|
764
|
-
"have the same number of rows but they did not"
|
765
|
-
)
|
766
|
-
|
767
|
-
raise ValueError(msg)
|
768
|
-
|
769
|
-
logger.info(f"Done preparing {graph_type} graph")
|
770
|
-
|
771
|
-
return decorated_all_reaction_edges_df
|
772
|
-
|
773
|
-
|
774
|
-
def _format_tiered_reaction_species(
|
775
|
-
r_id: str,
|
776
|
-
sorted_reaction_species: pd.DataFrame,
|
777
|
-
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
778
|
-
graph_hierarchy_df: pd.DataFrame,
|
779
|
-
) -> pd.DataFrame:
|
780
|
-
"""
|
781
|
-
Format Tiered Reaction Species
|
782
|
-
|
783
|
-
Refactor a reaction's species into tiered edges between substrates, products, enzymes and allosteric regulators.
|
784
|
-
"""
|
785
|
-
|
786
|
-
rxn_species = sorted_reaction_species.loc[r_id]
|
787
|
-
if not isinstance(rxn_species, pd.DataFrame):
|
788
|
-
raise TypeError("rxn_species must be a pandas DataFrame")
|
789
|
-
if list(rxn_species.index.names) != [SBML_DFS.SBO_TERM]:
|
790
|
-
raise ValueError("rxn_species index names must be [SBML_DFS.SBO_TERM]")
|
791
|
-
if rxn_species.columns.tolist() != [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]:
|
792
|
-
raise ValueError(
|
793
|
-
"rxn_species columns must be [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]"
|
794
|
-
)
|
795
|
-
|
796
|
-
rxn_sbo_terms = set(rxn_species.index.unique())
|
797
|
-
# map to common names
|
798
|
-
rxn_sbo_names = {MINI_SBO_TO_NAME[x] for x in rxn_sbo_terms}
|
799
|
-
|
800
|
-
# is the reaction a general purpose interaction
|
801
|
-
if len(rxn_sbo_names) == 1:
|
802
|
-
if list(rxn_sbo_names)[0] == SBOTERM_NAMES.INTERACTOR:
|
803
|
-
# further validation happens in the function - e.g., exactly two interactors
|
804
|
-
return _format_interactors_for_tiered_graph(r_id, rxn_species, sbml_dfs)
|
805
|
-
|
806
|
-
if SBOTERM_NAMES.INTERACTOR in rxn_sbo_names:
|
807
|
-
logger.warning(
|
808
|
-
f"Invalid combinations of SBO_terms in {str(r_id)} : {sbml_dfs.reactions.loc[r_id][SBML_DFS.R_NAME]}. "
|
809
|
-
"If interactors are present then there can't be any other types of reaction species. "
|
810
|
-
f"The following roles were defined: {', '.join(rxn_sbo_names)}"
|
811
|
-
)
|
812
|
-
|
813
|
-
# reorganize molecules and the reaction itself into tiers
|
814
|
-
entities_ordered_by_tier = (
|
815
|
-
pd.concat(
|
816
|
-
[
|
817
|
-
(
|
818
|
-
rxn_species.reset_index()
|
819
|
-
.rename({SBML_DFS.SC_ID: "entity_id"}, axis=1)
|
820
|
-
.merge(graph_hierarchy_df)
|
821
|
-
),
|
822
|
-
graph_hierarchy_df[
|
823
|
-
graph_hierarchy_df[NAPISTU_GRAPH_EDGES.SBO_NAME]
|
824
|
-
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
825
|
-
].assign(entity_id=r_id, r_id=r_id),
|
826
|
-
]
|
827
|
-
)
|
828
|
-
.sort_values(["tier"])
|
829
|
-
.set_index("tier")
|
830
|
-
)
|
831
|
-
ordered_tiers = entities_ordered_by_tier.index.get_level_values("tier").unique()
|
832
|
-
|
833
|
-
if len(ordered_tiers) <= 1:
|
834
|
-
raise ValueError("ordered_tiers must have more than one element")
|
835
|
-
|
836
|
-
# which tier is the reaction?
|
837
|
-
reaction_tier = graph_hierarchy_df["tier"][
|
838
|
-
graph_hierarchy_df[NAPISTU_GRAPH_EDGES.SBO_NAME]
|
839
|
-
== NAPISTU_GRAPH_NODE_TYPES.REACTION
|
840
|
-
].tolist()[0]
|
841
|
-
|
842
|
-
rxn_edges = list()
|
843
|
-
past_reaction = False
|
844
|
-
for i in range(0, len(ordered_tiers) - 1):
|
845
|
-
formatted_tier_combo = _format_tier_combo(
|
846
|
-
entities_ordered_by_tier.loc[[ordered_tiers[i]]],
|
847
|
-
entities_ordered_by_tier.loc[[ordered_tiers[i + 1]]],
|
848
|
-
past_reaction,
|
849
|
-
)
|
850
|
-
|
851
|
-
if ordered_tiers[i + 1] == reaction_tier:
|
852
|
-
past_reaction = True
|
853
|
-
|
854
|
-
rxn_edges.append(formatted_tier_combo)
|
855
|
-
|
856
|
-
rxn_edges_df = (
|
857
|
-
pd.concat(rxn_edges)[
|
858
|
-
[
|
859
|
-
NAPISTU_GRAPH_EDGES.FROM,
|
860
|
-
NAPISTU_GRAPH_EDGES.TO,
|
861
|
-
NAPISTU_GRAPH_EDGES.STOICHIOMETRY,
|
862
|
-
NAPISTU_GRAPH_EDGES.SBO_TERM,
|
863
|
-
]
|
864
|
-
]
|
865
|
-
.reset_index(drop=True)
|
866
|
-
.assign(r_id=r_id)
|
867
|
-
)
|
868
|
-
|
869
|
-
return rxn_edges_df
|
870
|
-
|
871
|
-
|
872
|
-
def _format_tier_combo(
|
873
|
-
upstream_tier: pd.DataFrame, downstream_tier: pd.DataFrame, past_reaction: bool
|
874
|
-
) -> pd.DataFrame:
|
875
|
-
"""
|
876
|
-
Format Tier Combo
|
877
|
-
|
878
|
-
Create a set of edges crossing two tiers of a tiered graph. This will involve an
|
879
|
-
all x all combination of entries. Tiers form an ordering along the molecular entities
|
880
|
-
in a reaction plus a tier for the reaction itself. Attributes such as stoichiometry
|
881
|
-
and sbo_term will be passed from the tier which is furthest from the reaction tier
|
882
|
-
to ensure that each tier of molecular data applies its attributes to a single set of
|
883
|
-
edges while the "reaction" tier does not. Reaction entities have neither a
|
884
|
-
stoichiometery or sbo_term annotation.
|
885
|
-
|
886
|
-
Args:
|
887
|
-
upstream_tier (pd.DataFrame): A table containing upstream entities in a reaction,
|
888
|
-
e.g., regulators.
|
889
|
-
downstream_tier (pd.DataFrame): A table containing downstream entities in a reaction,
|
890
|
-
e.g., catalysts.
|
891
|
-
past_reaction (bool): if True then attributes will be taken from downstream_tier and
|
892
|
-
if False they will come from upstream_tier.
|
893
|
-
|
894
|
-
Returns:
|
895
|
-
formatted_tier_combo (pd.DataFrame): A table of edges containing (from, to, stoichiometry, sbo_term, r_id). The
|
896
|
-
number of edges is the product of the number of entities in the upstream tier
|
897
|
-
times the number in the downstream tier.
|
898
|
-
|
899
|
-
"""
|
900
|
-
|
901
|
-
upstream_fields = ["entity_id", SBML_DFS.STOICHIOMETRY, SBML_DFS.SBO_TERM]
|
902
|
-
downstream_fields = ["entity_id"]
|
903
|
-
|
904
|
-
if past_reaction:
|
905
|
-
# swap fields
|
906
|
-
upstream_fields, downstream_fields = downstream_fields, upstream_fields
|
907
|
-
|
908
|
-
formatted_tier_combo = (
|
909
|
-
upstream_tier[upstream_fields]
|
910
|
-
.rename({"entity_id": NAPISTU_GRAPH_EDGES.FROM}, axis=1)
|
911
|
-
.assign(_joiner=1)
|
912
|
-
).merge(
|
913
|
-
(
|
914
|
-
downstream_tier[downstream_fields]
|
915
|
-
.rename({"entity_id": NAPISTU_GRAPH_EDGES.TO}, axis=1)
|
916
|
-
.assign(_joiner=1)
|
917
|
-
),
|
918
|
-
left_on="_joiner",
|
919
|
-
right_on="_joiner",
|
920
|
-
)
|
921
|
-
|
922
|
-
return formatted_tier_combo
|
923
|
-
|
924
|
-
|
925
|
-
def _create_graph_hierarchy_df(graph_type: str) -> pd.DataFrame:
|
926
|
-
"""
|
927
|
-
Create Graph Hierarchy DataFrame
|
928
|
-
|
929
|
-
Format a graph hierarchy list of lists and a pd.DataFrame
|
930
|
-
|
931
|
-
Args:
|
932
|
-
graph_type (str):
|
933
|
-
The type of tiered graph to work with. Each type has its own specification in constants.py.
|
934
|
-
|
935
|
-
Returns:
|
936
|
-
A pandas DataFrame with sbo_name, tier, and sbo_term.
|
937
|
-
|
938
|
-
"""
|
939
|
-
|
940
|
-
if graph_type == NAPISTU_GRAPH_TYPES.REGULATORY:
|
941
|
-
sbo_names_hierarchy = REGULATORY_GRAPH_HIERARCHY
|
942
|
-
elif graph_type == NAPISTU_GRAPH_TYPES.SURROGATE:
|
943
|
-
sbo_names_hierarchy = SURROGATE_GRAPH_HIERARCHY
|
944
|
-
else:
|
945
|
-
raise NotImplementedError(f"{graph_type} is not a valid graph_type")
|
946
|
-
|
947
|
-
# format as a DF
|
948
|
-
graph_hierarchy_df = pd.concat(
|
949
|
-
[
|
950
|
-
pd.DataFrame({"sbo_name": sbo_names_hierarchy[i]}).assign(tier=i)
|
951
|
-
for i in range(0, len(sbo_names_hierarchy))
|
952
|
-
]
|
953
|
-
).reset_index(drop=True)
|
954
|
-
graph_hierarchy_df[SBML_DFS.SBO_TERM] = graph_hierarchy_df["sbo_name"].apply(
|
955
|
-
lambda x: (
|
956
|
-
MINI_SBO_FROM_NAME[x] if x != NAPISTU_GRAPH_NODE_TYPES.REACTION else ""
|
957
|
-
)
|
958
|
-
)
|
959
|
-
|
960
|
-
# ensure that the output is expected
|
961
|
-
utils.match_pd_vars(
|
962
|
-
graph_hierarchy_df,
|
963
|
-
req_vars={NAPISTU_GRAPH_EDGES.SBO_NAME, "tier", SBML_DFS.SBO_TERM},
|
964
|
-
allow_series=False,
|
965
|
-
).assert_present()
|
966
|
-
|
967
|
-
return graph_hierarchy_df
|
968
|
-
|
969
|
-
|
970
802
|
def _add_graph_weights_mixed(
|
971
803
|
napistu_graph: NapistuGraph, reaction_attrs: dict
|
972
804
|
) -> NapistuGraph:
|
@@ -1162,7 +994,23 @@ def _summarize_weight_calibration_table(
|
|
1162
994
|
score_calibration_df: pd.DataFrame,
|
1163
995
|
score_calibration_df_calibrated: pd.DataFrame,
|
1164
996
|
):
|
1165
|
-
"""
|
997
|
+
"""
|
998
|
+
Create a table comparing edge weights from multiple sources.
|
999
|
+
|
1000
|
+
Parameters
|
1001
|
+
----------
|
1002
|
+
calibrated_edges : pd.DataFrame
|
1003
|
+
DataFrame of calibrated edge weights.
|
1004
|
+
score_calibration_df : pd.DataFrame
|
1005
|
+
DataFrame of raw calibration points.
|
1006
|
+
score_calibration_df_calibrated : pd.DataFrame
|
1007
|
+
DataFrame of calibrated calibration points.
|
1008
|
+
|
1009
|
+
Returns
|
1010
|
+
-------
|
1011
|
+
pd.DataFrame
|
1012
|
+
Styled DataFrame summarizing calibration points and quantiles.
|
1013
|
+
"""
|
1166
1014
|
|
1167
1015
|
# generate a table summarizing different scoring measures
|
1168
1016
|
#
|
@@ -1214,7 +1062,20 @@ def _summarize_weight_calibration_table(
|
|
1214
1062
|
def _summarize_weight_calibration_plots(
|
1215
1063
|
calibrated_edges: pd.DataFrame, score_calibration_df_calibrated: pd.DataFrame
|
1216
1064
|
) -> None:
|
1217
|
-
"""
|
1065
|
+
"""
|
1066
|
+
Create plots summarizing the relationships between different scoring measures.
|
1067
|
+
|
1068
|
+
Parameters
|
1069
|
+
----------
|
1070
|
+
calibrated_edges : pd.DataFrame
|
1071
|
+
DataFrame of calibrated edge weights.
|
1072
|
+
score_calibration_df_calibrated : pd.DataFrame
|
1073
|
+
DataFrame of calibrated calibration points.
|
1074
|
+
|
1075
|
+
Returns
|
1076
|
+
-------
|
1077
|
+
None
|
1078
|
+
"""
|
1218
1079
|
|
1219
1080
|
# set up a 2 x 1 plot
|
1220
1081
|
f, (ax1, ax2) = plt.subplots(1, 2)
|
@@ -1240,28 +1101,24 @@ def _create_source_weights(
|
|
1240
1101
|
source_vars_dict: dict = SOURCE_VARS_DICT,
|
1241
1102
|
source_wt_default: int = 1,
|
1242
1103
|
) -> pd.DataFrame:
|
1243
|
-
"""
|
1244
|
-
Create
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
to that attribute. This value is generally the largest weight that can be assigned to an
|
1257
|
-
edge so that the numeric weight is chosen over the default.
|
1258
|
-
source_wt_default: int
|
1259
|
-
The default weight to assign to an edge if no other weight attribute is found.
|
1260
|
-
|
1261
|
-
Returns:
|
1262
|
-
pd.DataFrame
|
1263
|
-
The edges dataframe with the source weights added.
|
1104
|
+
"""
|
1105
|
+
Create weights based on an edge's source.
|
1106
|
+
|
1107
|
+
Parameters
|
1108
|
+
----------
|
1109
|
+
edges_df : pd.DataFrame
|
1110
|
+
The edges dataframe to add the source weights to.
|
1111
|
+
source_wt_var : str, optional
|
1112
|
+
The name of the column to store the source weights. Default is "source_wt".
|
1113
|
+
source_vars_dict : dict, optional
|
1114
|
+
Dictionary with keys indicating edge attributes and values indicating the weight to assign to that attribute. Default is SOURCE_VARS_DICT.
|
1115
|
+
source_wt_default : int, optional
|
1116
|
+
The default weight to assign to an edge if no other weight attribute is found. Default is 1.
|
1264
1117
|
|
1118
|
+
Returns
|
1119
|
+
-------
|
1120
|
+
pd.DataFrame
|
1121
|
+
The edges dataframe with the source weights added.
|
1265
1122
|
"""
|
1266
1123
|
|
1267
1124
|
logger.warning(
|
@@ -1299,134 +1156,59 @@ def _create_source_weights(
|
|
1299
1156
|
|
1300
1157
|
|
1301
1158
|
def _wt_transformation_identity(x):
|
1302
|
-
"""
|
1159
|
+
"""
|
1160
|
+
Identity transformation for weights.
|
1161
|
+
|
1162
|
+
Parameters
|
1163
|
+
----------
|
1164
|
+
x : any
|
1165
|
+
Input value.
|
1166
|
+
|
1167
|
+
Returns
|
1168
|
+
-------
|
1169
|
+
any
|
1170
|
+
The input value unchanged.
|
1171
|
+
"""
|
1303
1172
|
return x
|
1304
1173
|
|
1305
1174
|
|
1306
1175
|
def _wt_transformation_string(x):
|
1307
|
-
"""
|
1176
|
+
"""
|
1177
|
+
Map STRING scores to a similar scale as topology weights.
|
1178
|
+
|
1179
|
+
Parameters
|
1180
|
+
----------
|
1181
|
+
x : float
|
1182
|
+
STRING score.
|
1308
1183
|
|
1184
|
+
Returns
|
1185
|
+
-------
|
1186
|
+
float
|
1187
|
+
Transformed STRING score.
|
1188
|
+
"""
|
1309
1189
|
return 250000 / np.power(x, 1.7)
|
1310
1190
|
|
1311
1191
|
|
1312
1192
|
def _wt_transformation_string_inv(x):
|
1313
|
-
"""Map STRING scores so they work with source weights."""
|
1314
|
-
|
1315
|
-
# string scores are bounded on [0, 1000]
|
1316
|
-
# and score/1000 is roughly a probability that
|
1317
|
-
# there is a real interaction (physical, genetic, ...)
|
1318
|
-
# reported string scores are currently on [150, 1000]
|
1319
|
-
# so this transformation will map these onto {6.67, 1}
|
1320
|
-
|
1321
|
-
return 1 / (x / 1000)
|
1322
|
-
|
1323
|
-
|
1324
|
-
def _format_interactors_for_tiered_graph(
|
1325
|
-
r_id: str, rxn_species: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
|
1326
|
-
) -> pd.DataFrame:
|
1327
|
-
"""Format an undirected interactions for tiered graph so interactions are linked even though they would be on the same tier."""
|
1328
|
-
|
1329
|
-
interactor_data = rxn_species.loc[MINI_SBO_FROM_NAME["interactor"]]
|
1330
|
-
if interactor_data.shape[0] != 2:
|
1331
|
-
raise ValueError(
|
1332
|
-
f"{interactor_data.shape[0]} interactors present for {str(r_id)} : "
|
1333
|
-
f"{sbml_dfs.reactions.loc[r_id]['r_name']}. "
|
1334
|
-
"Reactions with interactors must have exactly two interactors"
|
1335
|
-
)
|
1336
|
-
|
1337
|
-
if not (interactor_data["stoichiometry"] == 0).any():
|
1338
|
-
raise ValueError(
|
1339
|
-
f"Interactors had non-zero stoichiometry for {str(r_id)} : {sbml_dfs.reactions.loc[r_id]['r_name']}. "
|
1340
|
-
"If stoichiometry is important for this reaction then it should use other SBO terms "
|
1341
|
-
"(e.g., substrate and product)."
|
1342
|
-
)
|
1343
|
-
|
1344
|
-
# set the first entry as "from" and second as "to" if stoi is zero.
|
1345
|
-
# the reverse reaction will generally be added later because these
|
1346
|
-
# reactions should be reversible
|
1347
|
-
|
1348
|
-
return pd.DataFrame(
|
1349
|
-
{
|
1350
|
-
"from": interactor_data["sc_id"].iloc[0],
|
1351
|
-
"to": interactor_data["sc_id"].iloc[1],
|
1352
|
-
"sbo_term": MINI_SBO_FROM_NAME["interactor"],
|
1353
|
-
"stoichiometry": 0,
|
1354
|
-
"r_id": r_id,
|
1355
|
-
},
|
1356
|
-
index=[0],
|
1357
|
-
)
|
1358
|
-
|
1359
|
-
|
1360
|
-
def _add_graph_species_attribute(
|
1361
|
-
napistu_graph: NapistuGraph,
|
1362
|
-
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
1363
|
-
species_graph_attrs: dict,
|
1364
|
-
custom_transformations: Optional[dict] = None,
|
1365
|
-
) -> NapistuGraph:
|
1366
1193
|
"""
|
1367
|
-
|
1368
|
-
|
1369
|
-
This function augments the vertices of a NapistuGraph network with additional attributes
|
1370
|
-
derived from the species-level data in the provided SBML_dfs object. The attributes
|
1371
|
-
to add are specified in the species_graph_attrs dictionary, and can be transformed
|
1372
|
-
using either built-in or user-supplied transformation functions.
|
1194
|
+
Map STRING scores so they work with source weights.
|
1373
1195
|
|
1374
1196
|
Parameters
|
1375
1197
|
----------
|
1376
|
-
|
1377
|
-
|
1378
|
-
sbml_dfs : sbml_dfs_core.SBML_dfs
|
1379
|
-
The SBML_dfs object containing species data.
|
1380
|
-
species_graph_attrs : dict
|
1381
|
-
Dictionary specifying which attributes to pull from species_data and how to transform them.
|
1382
|
-
The structure should be {attribute_name: {"table": ..., "variable": ..., "trans": ...}}.
|
1383
|
-
custom_transformations : dict, optional
|
1384
|
-
Dictionary mapping transformation names to functions. If provided, these will be checked
|
1385
|
-
before built-in transformations. Example: {"square": lambda x: x**2}
|
1198
|
+
x : float
|
1199
|
+
STRING score.
|
1386
1200
|
|
1387
1201
|
Returns
|
1388
1202
|
-------
|
1389
|
-
|
1390
|
-
|
1203
|
+
float
|
1204
|
+
Inverse transformed STRING score.
|
1391
1205
|
"""
|
1392
|
-
|
1393
|
-
|
1394
|
-
|
1395
|
-
|
1396
|
-
|
1397
|
-
|
1398
|
-
# also flatten attribute list to be added to vertex nodes
|
1399
|
-
sp_graph_key_list = []
|
1400
|
-
sp_node_attr_list = []
|
1401
|
-
for k in species_graph_attrs.keys():
|
1402
|
-
_validate_entity_attrs(
|
1403
|
-
species_graph_attrs[k], custom_transformations=custom_transformations
|
1404
|
-
)
|
1405
|
-
|
1406
|
-
sp_graph_key_list.append(k)
|
1407
|
-
sp_node_attr_list.append(list(species_graph_attrs[k].keys()))
|
1408
|
-
|
1409
|
-
# flatten sp_node_attr_list
|
1410
|
-
flat_sp_node_attr_list = [item for items in sp_node_attr_list for item in items]
|
1411
|
-
|
1412
|
-
logger.info("Adding meta-data from species_data")
|
1413
|
-
|
1414
|
-
curr_network_nodes_df = napistu_graph.get_vertex_dataframe()
|
1415
|
-
|
1416
|
-
# add species-level attributes to nodes dataframe
|
1417
|
-
augmented_network_nodes_df = _augment_network_nodes(
|
1418
|
-
curr_network_nodes_df,
|
1419
|
-
sbml_dfs,
|
1420
|
-
species_graph_attrs,
|
1421
|
-
custom_transformations=custom_transformations,
|
1422
|
-
)
|
1423
|
-
|
1424
|
-
for vs_attr in flat_sp_node_attr_list:
|
1425
|
-
# in case more than one vs_attr in the flat_sp_node_attr_list
|
1426
|
-
logger.info(f"Adding new attribute {vs_attr} to vertices")
|
1427
|
-
napistu_graph.vs[vs_attr] = augmented_network_nodes_df[vs_attr].values
|
1428
|
-
|
1429
|
-
return napistu_graph
|
1206
|
+
# string scores are bounded on [0, 1000]
|
1207
|
+
# and score/1000 is roughly a probability that
|
1208
|
+
# there is a real interaction (physical, genetic, ...)
|
1209
|
+
# reported string scores are currently on [150, 1000]
|
1210
|
+
# so this transformation will map these onto {6.67, 1}
|
1211
|
+
return 1 / (x / 1000)
|
1430
1212
|
|
1431
1213
|
|
1432
1214
|
def _augment_network_nodes(
|
@@ -1448,7 +1230,7 @@ def _augment_network_nodes(
|
|
1448
1230
|
DataFrame of network nodes. Must include columns 'name', 'node_name', and 'node_type'.
|
1449
1231
|
sbml_dfs : sbml_dfs_core.SBML_dfs
|
1450
1232
|
The SBML_dfs object containing species data.
|
1451
|
-
species_graph_attrs : dict
|
1233
|
+
species_graph_attrs : dict, optional
|
1452
1234
|
Dictionary specifying which attributes to pull from species_data and how to transform them.
|
1453
1235
|
The structure should be {attribute_name: {"table": ..., "variable": ..., "trans": ...}}.
|
1454
1236
|
custom_transformations : dict, optional
|
@@ -1459,6 +1241,11 @@ def _augment_network_nodes(
|
|
1459
1241
|
-------
|
1460
1242
|
pd.DataFrame
|
1461
1243
|
The input network_nodes DataFrame with additional columns for each extracted and transformed attribute.
|
1244
|
+
|
1245
|
+
Raises
|
1246
|
+
------
|
1247
|
+
ValueError
|
1248
|
+
If required attributes are missing from network_nodes.
|
1462
1249
|
"""
|
1463
1250
|
REQUIRED_NETWORK_NODE_ATTRS = {
|
1464
1251
|
"name",
|
@@ -1522,7 +1309,8 @@ def _augment_network_edges(
|
|
1522
1309
|
reaction_graph_attrs: dict = dict(),
|
1523
1310
|
custom_transformations: Optional[dict] = None,
|
1524
1311
|
) -> pd.DataFrame:
|
1525
|
-
"""
|
1312
|
+
"""
|
1313
|
+
Add reversibility and other metadata from reactions.
|
1526
1314
|
|
1527
1315
|
Parameters
|
1528
1316
|
----------
|
@@ -1530,10 +1318,20 @@ def _augment_network_edges(
|
|
1530
1318
|
DataFrame of network edges.
|
1531
1319
|
sbml_dfs : sbml_dfs_core.SBML_dfs
|
1532
1320
|
The SBML_dfs object containing reaction data.
|
1533
|
-
reaction_graph_attrs : dict
|
1321
|
+
reaction_graph_attrs : dict, optional
|
1534
1322
|
Dictionary of reaction attributes to add.
|
1535
1323
|
custom_transformations : dict, optional
|
1536
1324
|
Dictionary of custom transformation functions to use for attribute transformation.
|
1325
|
+
|
1326
|
+
Returns
|
1327
|
+
-------
|
1328
|
+
pd.DataFrame
|
1329
|
+
DataFrame of network edges with additional metadata.
|
1330
|
+
|
1331
|
+
Raises
|
1332
|
+
------
|
1333
|
+
ValueError
|
1334
|
+
If required attributes are missing from network_edges.
|
1537
1335
|
"""
|
1538
1336
|
REQUIRED_NETWORK_EDGE_ATTRS = {
|
1539
1337
|
"from",
|
@@ -1583,7 +1381,24 @@ def _augment_network_edges(
|
|
1583
1381
|
|
1584
1382
|
|
1585
1383
|
def _reverse_network_edges(augmented_network_edges: pd.DataFrame) -> pd.DataFrame:
|
1586
|
-
"""
|
1384
|
+
"""
|
1385
|
+
Flip reversible reactions to derive the reverse reaction.
|
1386
|
+
|
1387
|
+
Parameters
|
1388
|
+
----------
|
1389
|
+
augmented_network_edges : pd.DataFrame
|
1390
|
+
DataFrame of network edges with metadata.
|
1391
|
+
|
1392
|
+
Returns
|
1393
|
+
-------
|
1394
|
+
pd.DataFrame
|
1395
|
+
DataFrame with reversed edges for reversible reactions.
|
1396
|
+
|
1397
|
+
Raises
|
1398
|
+
------
|
1399
|
+
ValueError
|
1400
|
+
If required variables are missing or if the transformation fails.
|
1401
|
+
"""
|
1587
1402
|
|
1588
1403
|
# validate inputs
|
1589
1404
|
required_vars = {NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO}
|
@@ -1684,27 +1499,36 @@ def _create_topology_weights(
|
|
1684
1499
|
scale_multiplier_by_meandegree: bool = True,
|
1685
1500
|
) -> ig.Graph:
|
1686
1501
|
"""
|
1687
|
-
Create Topology Weights
|
1688
|
-
|
1689
|
-
|
1690
|
-
|
1691
|
-
|
1692
|
-
flawed heuristic which can be combined with more principled weighting
|
1693
|
-
schemes.
|
1694
|
-
|
1695
|
-
Args:
|
1696
|
-
napistu_graph (ig.Graph): a graph containing connections between molecules, proteins, and reactions.
|
1697
|
-
base_score (float): offset which will be added to all weights.
|
1698
|
-
protein_multiplier (int): multiplier for non-metabolite species (lower weight paths will tend to be selected).
|
1699
|
-
metabolite_multiplier (int): multiplier for metabolites [defined a species with a ChEBI ID).
|
1700
|
-
unknown_multiplier (int): multiplier for species without any identifier. See sbml_dfs_utils.species_type_types.
|
1701
|
-
scale_multiplier_by_meandegree (bool): if True then multipliers will be rescaled by the average number of
|
1702
|
-
connections a node has (i.e., its degree) so that weights will be relatively similar regardless of network
|
1703
|
-
size and sparsity.
|
1704
|
-
|
1705
|
-
Returns:
|
1706
|
-
napistu_graph (ig.Graph): graph with added topology weights
|
1502
|
+
Create Topology Weights for a network based on its topology.
|
1503
|
+
|
1504
|
+
Edges downstream of nodes with many connections receive a higher weight suggesting that any one
|
1505
|
+
of them is less likely to be regulatory. This is a simple and clearly flawed heuristic which can be
|
1506
|
+
combined with more principled weighting schemes.
|
1707
1507
|
|
1508
|
+
Parameters
|
1509
|
+
----------
|
1510
|
+
napistu_graph : ig.Graph
|
1511
|
+
A graph containing connections between molecules, proteins, and reactions.
|
1512
|
+
base_score : float, optional
|
1513
|
+
Offset which will be added to all weights. Default is 2.
|
1514
|
+
protein_multiplier : int, optional
|
1515
|
+
Multiplier for non-metabolite species. Default is 1.
|
1516
|
+
metabolite_multiplier : int, optional
|
1517
|
+
Multiplier for metabolites. Default is 3.
|
1518
|
+
unknown_multiplier : int, optional
|
1519
|
+
Multiplier for species without any identifier. Default is 10.
|
1520
|
+
scale_multiplier_by_meandegree : bool, optional
|
1521
|
+
If True, multipliers will be rescaled by the average number of connections a node has. Default is True.
|
1522
|
+
|
1523
|
+
Returns
|
1524
|
+
-------
|
1525
|
+
ig.Graph
|
1526
|
+
Graph with added topology weights.
|
1527
|
+
|
1528
|
+
Raises
|
1529
|
+
------
|
1530
|
+
ValueError
|
1531
|
+
If required attributes are missing or if parameters are invalid.
|
1708
1532
|
"""
|
1709
1533
|
|
1710
1534
|
# check for required attribute before proceeding
|
@@ -1816,17 +1640,17 @@ def _validate_entity_attrs(
|
|
1816
1640
|
validate_transformations: bool = True,
|
1817
1641
|
custom_transformations: Optional[dict] = None,
|
1818
1642
|
) -> None:
|
1819
|
-
"""
|
1643
|
+
"""
|
1644
|
+
Validate that graph attributes are a valid format.
|
1820
1645
|
|
1821
1646
|
Parameters
|
1822
1647
|
----------
|
1823
1648
|
entity_attrs : dict
|
1824
|
-
Dictionary of entity attributes to validate
|
1649
|
+
Dictionary of entity attributes to validate.
|
1825
1650
|
validate_transformations : bool, optional
|
1826
|
-
Whether to validate transformation names, by default True
|
1827
|
-
custom_transformations :
|
1828
|
-
Dictionary of custom transformation functions, by default None
|
1829
|
-
Keys are transformation names, values are transformation functions
|
1651
|
+
Whether to validate transformation names, by default True.
|
1652
|
+
custom_transformations : dict, optional
|
1653
|
+
Dictionary of custom transformation functions, by default None. Keys are transformation names, values are transformation functions.
|
1830
1654
|
|
1831
1655
|
Returns
|
1832
1656
|
-------
|
@@ -1835,9 +1659,9 @@ def _validate_entity_attrs(
|
|
1835
1659
|
Raises
|
1836
1660
|
------
|
1837
1661
|
AssertionError
|
1838
|
-
If entity_attrs is not a dictionary
|
1662
|
+
If entity_attrs is not a dictionary.
|
1839
1663
|
ValueError
|
1840
|
-
If a transformation is not found in DEFINED_WEIGHT_TRANSFORMATION or custom_transformations
|
1664
|
+
If a transformation is not found in DEFINED_WEIGHT_TRANSFORMATION or custom_transformations.
|
1841
1665
|
"""
|
1842
1666
|
assert isinstance(entity_attrs, dict), "entity_attrs must be a dictionary"
|
1843
1667
|
|