napistu 0.3.6__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. napistu/__main__.py +28 -13
  2. napistu/consensus.py +19 -25
  3. napistu/constants.py +102 -83
  4. napistu/indices.py +3 -1
  5. napistu/ingestion/napistu_edgelist.py +4 -4
  6. napistu/ingestion/sbml.py +298 -295
  7. napistu/ingestion/string.py +14 -18
  8. napistu/ingestion/trrust.py +22 -27
  9. napistu/matching/interactions.py +41 -39
  10. napistu/matching/species.py +1 -1
  11. napistu/modify/gaps.py +2 -1
  12. napistu/network/constants.py +61 -45
  13. napistu/network/data_handling.py +1 -1
  14. napistu/network/neighborhoods.py +3 -3
  15. napistu/network/net_create.py +440 -616
  16. napistu/network/net_create_utils.py +734 -0
  17. napistu/network/net_propagation.py +1 -1
  18. napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
  19. napistu/network/ng_utils.py +28 -21
  20. napistu/network/paths.py +4 -4
  21. napistu/network/precompute.py +35 -74
  22. napistu/ontologies/genodexito.py +5 -1
  23. napistu/ontologies/renaming.py +4 -0
  24. napistu/sbml_dfs_core.py +127 -64
  25. napistu/sbml_dfs_utils.py +50 -0
  26. napistu/utils.py +132 -46
  27. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/METADATA +2 -2
  28. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/RECORD +47 -44
  29. tests/conftest.py +171 -13
  30. tests/test_consensus.py +74 -5
  31. tests/test_gaps.py +26 -15
  32. tests/test_network_data_handling.py +5 -2
  33. tests/test_network_net_create.py +93 -202
  34. tests/test_network_net_create_utils.py +538 -0
  35. tests/test_network_ng_core.py +19 -0
  36. tests/test_network_ng_utils.py +1 -1
  37. tests/test_network_precompute.py +5 -4
  38. tests/test_ontologies_renaming.py +28 -24
  39. tests/test_rpy2_callr.py +0 -1
  40. tests/test_rpy2_init.py +0 -1
  41. tests/test_sbml_dfs_core.py +165 -15
  42. tests/test_sbml_dfs_utils.py +45 -0
  43. tests/test_utils.py +45 -2
  44. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/WHEEL +0 -0
  45. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/entry_points.txt +0 -0
  46. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/licenses/LICENSE +0 -0
  47. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/top_level.txt +0 -0
@@ -13,29 +13,33 @@ from pydantic import BaseModel
13
13
 
14
14
  from napistu import sbml_dfs_core
15
15
  from napistu import utils
16
- from napistu.network.napistu_graph_core import NapistuGraph
17
-
18
- from napistu.constants import MINI_SBO_FROM_NAME
19
- from napistu.constants import MINI_SBO_TO_NAME
20
- from napistu.constants import SBML_DFS
21
- from napistu.constants import SBO_MODIFIER_NAMES
22
- from napistu.constants import ENTITIES_W_DATA
23
-
24
- from napistu.network.constants import NAPISTU_GRAPH_NODES
25
- from napistu.network.constants import NAPISTU_GRAPH_EDGES
26
- from napistu.network.constants import NAPISTU_GRAPH_EDGE_DIRECTIONS
27
- from napistu.network.constants import NAPISTU_GRAPH_NODE_TYPES
28
- from napistu.network.constants import NAPISTU_GRAPH_TYPES
29
- from napistu.network.constants import NAPISTU_WEIGHTING_STRATEGIES
30
- from napistu.network.constants import SBOTERM_NAMES
31
- from napistu.network.constants import REGULATORY_GRAPH_HIERARCHY
32
- from napistu.network.constants import SURROGATE_GRAPH_HIERARCHY
33
- from napistu.network.constants import VALID_NAPISTU_GRAPH_TYPES
34
- from napistu.network.constants import VALID_WEIGHTING_STRATEGIES
35
- from napistu.network.constants import DEFAULT_WT_TRANS
36
- from napistu.network.constants import DEFINED_WEIGHT_TRANSFORMATION
37
- from napistu.network.constants import SCORE_CALIBRATION_POINTS_DICT
38
- from napistu.network.constants import SOURCE_VARS_DICT
16
+ from napistu.network import net_create_utils
17
+ from napistu.network.ng_core import NapistuGraph
18
+
19
+
20
+ from napistu.constants import (
21
+ MINI_SBO_FROM_NAME,
22
+ SBO_MODIFIER_NAMES,
23
+ SBOTERM_NAMES,
24
+ SBML_DFS,
25
+ ENTITIES_W_DATA,
26
+ )
27
+
28
+ from napistu.network.constants import (
29
+ NAPISTU_GRAPH_NODES,
30
+ NAPISTU_GRAPH_EDGES,
31
+ NAPISTU_GRAPH_EDGE_DIRECTIONS,
32
+ NAPISTU_GRAPH_NODE_TYPES,
33
+ GRAPH_WIRING_APPROACHES,
34
+ NAPISTU_WEIGHTING_STRATEGIES,
35
+ VALID_GRAPH_WIRING_APPROACHES,
36
+ VALID_WEIGHTING_STRATEGIES,
37
+ DEFAULT_WT_TRANS,
38
+ DEFINED_WEIGHT_TRANSFORMATION,
39
+ SCORE_CALIBRATION_POINTS_DICT,
40
+ SOURCE_VARS_DICT,
41
+ DROP_REACTIONS_WHEN,
42
+ )
39
43
 
40
44
 
41
45
  logger = logging.getLogger(__name__)
@@ -46,12 +50,13 @@ def create_napistu_graph(
46
50
  reaction_graph_attrs: Optional[dict] = None,
47
51
  directed: bool = True,
48
52
  edge_reversed: bool = False,
49
- graph_type: str = NAPISTU_GRAPH_TYPES.REGULATORY,
53
+ wiring_approach: str = GRAPH_WIRING_APPROACHES.REGULATORY,
54
+ drop_reactions_when: str = DROP_REACTIONS_WHEN.SAME_TIER,
50
55
  verbose: bool = False,
51
56
  custom_transformations: Optional[dict] = None,
52
57
  ) -> NapistuGraph:
53
58
  """
54
- Create a NapistuGraph network from a mechanistic network using one of a set of graph_types.
59
+ Create a NapistuGraph network from a mechanistic network using one of a set of wiring approaches.
55
60
 
56
61
  Parameters
57
62
  ----------
@@ -60,14 +65,20 @@ def create_napistu_graph(
60
65
  reaction_graph_attrs : dict, optional
61
66
  Dictionary containing attributes to pull out of reaction_data and a weighting scheme for the graph.
62
67
  directed : bool, optional
63
- Should a directed (True) or undirected graph be made (False). Default is True.
68
+ Whether to create a directed (True) or undirected (False) graph. Default is True.
64
69
  edge_reversed : bool, optional
65
- Should the directions of edges be reversed or not (False). Default is False.
66
- graph_type : str, optional
70
+ Whether to reverse the directions of edges. Default is False.
71
+ wiring_approach : str, optional
67
72
  Type of graph to create. Valid values are:
68
73
  - 'bipartite': substrates and modifiers point to the reaction they drive, this reaction points to products
69
74
  - 'regulatory': non-enzymatic modifiers point to enzymes, enzymes point to substrates and products
70
75
  - 'surrogate': non-enzymatic modifiers -> substrates -> enzymes -> reaction -> products
76
+ - 'bipartite_og': old method for generating a true bipartite graph. Retained primarily for regression testing.
77
+ drop_reactions_when : str, optional
78
+ The condition under which to drop reactions as a network vertex. Valid values are:
79
+ - 'same_tier': drop reactions when all participants are on the same tier of a wiring hierarchy
80
+ - 'edgelist': drop reactions when the reaction species are only 2 (1 reactant + 1 product)
81
+ - 'always': drop reactions regardless of tiers
71
82
  verbose : bool, optional
72
83
  Extra reporting. Default is False.
73
84
  custom_transformations : dict, optional
@@ -77,14 +88,19 @@ def create_napistu_graph(
77
88
  -------
78
89
  NapistuGraph
79
90
  A NapistuGraph network (subclass of igraph.Graph).
91
+
92
+ Raises
93
+ ------
94
+ ValueError
95
+ If wiring_approach is not valid or if required attributes are missing.
80
96
  """
81
97
 
82
98
  if reaction_graph_attrs is None:
83
99
  reaction_graph_attrs = {}
84
100
 
85
- if graph_type not in VALID_NAPISTU_GRAPH_TYPES:
101
+ if wiring_approach not in VALID_GRAPH_WIRING_APPROACHES + ["bipartite_og"]:
86
102
  raise ValueError(
87
- f"graph_type is not a valid value ({graph_type}), valid values are {','.join(VALID_NAPISTU_GRAPH_TYPES)}"
103
+ f"wiring_approach is not a valid value ({wiring_approach}), valid values are {','.join(VALID_GRAPH_WIRING_APPROACHES)}"
88
104
  )
89
105
 
90
106
  # fail fast if reaction_graph_attrs is not properly formatted
@@ -139,15 +155,17 @@ def create_napistu_graph(
139
155
  columns={"node_id": NAPISTU_GRAPH_NODES.NAME}
140
156
  )
141
157
 
142
- logger.info(f"Formatting edges as a {graph_type} graph")
158
+ logger.info(f"Formatting edges as a {wiring_approach} graph")
143
159
 
144
- if graph_type == NAPISTU_GRAPH_TYPES.BIPARTITE:
160
+ if wiring_approach == "bipartite_og":
145
161
  network_edges = _create_napistu_graph_bipartite(working_sbml_dfs)
146
- elif graph_type in [NAPISTU_GRAPH_TYPES.REGULATORY, NAPISTU_GRAPH_TYPES.SURROGATE]:
147
- # pass graph_type so that an appropriate tiered schema can be used.
148
- network_edges = _create_napistu_graph_tiered(working_sbml_dfs, graph_type)
162
+ elif wiring_approach in VALID_GRAPH_WIRING_APPROACHES:
163
+ # pass wiring_approach so that an appropriate tiered schema can be used.
164
+ network_edges = create_napistu_graph_wiring(
165
+ working_sbml_dfs, wiring_approach, drop_reactions_when
166
+ )
149
167
  else:
150
- raise NotImplementedError("Invalid graph_type")
168
+ raise NotImplementedError("Invalid wiring_approach")
151
169
 
152
170
  logger.info("Adding reversibility and other meta-data from reactions_data")
153
171
  augmented_network_edges = _augment_network_edges(
@@ -220,11 +238,16 @@ def create_napistu_graph(
220
238
  edge_foreign_keys=(NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO),
221
239
  )
222
240
 
241
+ # delete singleton nodes (most of these will be reaction nodes associated with pairwise interactions)
242
+
223
243
  # Always return NapistuGraph
224
244
  napistu_graph = NapistuGraph.from_igraph(
225
- napistu_ig_graph, graph_type=graph_type, is_reversed=edge_reversed
245
+ napistu_ig_graph, wiring_approach=wiring_approach, is_reversed=edge_reversed
226
246
  )
227
247
 
248
+ # remove singleton nodes (mostly reactions that are not part of any interaction)
249
+ napistu_graph.remove_isolated_vertices()
250
+
228
251
  if edge_reversed:
229
252
  logger.info("Applying edge reversal using reversal utilities")
230
253
  napistu_graph.reverse_edges()
@@ -237,15 +260,15 @@ def process_napistu_graph(
237
260
  reaction_graph_attrs: Optional[dict] = None,
238
261
  directed: bool = True,
239
262
  edge_reversed: bool = False,
240
- graph_type: str = NAPISTU_GRAPH_TYPES.BIPARTITE,
263
+ wiring_approach: str = GRAPH_WIRING_APPROACHES.BIPARTITE,
241
264
  weighting_strategy: str = NAPISTU_WEIGHTING_STRATEGIES.UNWEIGHTED,
242
265
  verbose: bool = False,
243
266
  custom_transformations: dict = None,
244
267
  ) -> NapistuGraph:
245
268
  """
246
- Process Consensus Graph
269
+ Process Consensus Graph.
247
270
 
248
- Setup a NapistuGraph network and then add weights and other malleable attributes.
271
+ Sets up a NapistuGraph network and then adds weights and other malleable attributes.
249
272
 
250
273
  Parameters
251
274
  ----------
@@ -254,16 +277,13 @@ def process_napistu_graph(
254
277
  reaction_graph_attrs : dict, optional
255
278
  Dictionary containing attributes to pull out of reaction_data and a weighting scheme for the graph.
256
279
  directed : bool, optional
257
- Should a directed (True) or undirected graph be made (False). Default is True.
280
+ Whether to create a directed (True) or undirected (False) graph. Default is True.
258
281
  edge_reversed : bool, optional
259
- Should directions of edges be reversed (False). Default is False.
260
- graph_type : str, optional
261
- Type of graph to create. Valid values are:
262
- - 'bipartite': substrates and modifiers point to the reaction they drive, this reaction points to products
263
- - 'regulatory': non-enzymatic modifiers point to enzymes, enzymes point to substrates and products
264
- - 'surrogate': non-enzymatic modifiers -> substrates -> enzymes -> reaction -> products
282
+ Whether to reverse the directions of edges. Default is False.
283
+ wiring_approach : str, optional
284
+ Type of graph to create. See `create_napistu_graph` for valid values.
265
285
  weighting_strategy : str, optional
266
- A network weighting strategy with options:
286
+ A network weighting strategy. Options:
267
287
  - 'unweighted': all weights (and upstream_weights for directed graphs) are set to 1.
268
288
  - 'topology': weight edges by the degree of the source nodes favoring nodes with few connections.
269
289
  - 'mixed': transform edges with a quantitative score based on reaction_attrs; and set edges without quantitative score as a source-specific weight.
@@ -288,7 +308,7 @@ def process_napistu_graph(
288
308
  reaction_graph_attrs,
289
309
  directed=directed,
290
310
  edge_reversed=edge_reversed,
291
- graph_type=graph_type,
311
+ wiring_approach=wiring_approach,
292
312
  verbose=verbose,
293
313
  custom_transformations=custom_transformations,
294
314
  )
@@ -309,6 +329,151 @@ def process_napistu_graph(
309
329
  return weighted_napistu_graph
310
330
 
311
331
 
332
+ def create_napistu_graph_wiring(
333
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
334
+ wiring_approach: str,
335
+ drop_reactions_when: str = DROP_REACTIONS_WHEN.SAME_TIER,
336
+ ) -> pd.DataFrame:
337
+ """
338
+ Turn an sbml_dfs model into a tiered graph which links upstream entities to downstream ones.
339
+
340
+ Parameters
341
+ ----------
342
+ sbml_dfs : sbml_dfs_core.SBML_dfs
343
+ The SBML_dfs object containing the model data.
344
+ wiring_approach : str
345
+ The wiring approach to use for the graph.
346
+ drop_reactions_when : str, optional
347
+ The condition under which to drop reactions as a network vertex. Default is 'same_tier'.
348
+
349
+ Returns
350
+ -------
351
+ pd.DataFrame
352
+ DataFrame representing the tiered network edges.
353
+
354
+ Raises
355
+ ------
356
+ ValueError
357
+ If invalid SBO terms are present or required attributes are missing.
358
+ """
359
+
360
+ # organize reaction species for defining connections
361
+ logger.info(
362
+ f"Turning {sbml_dfs.reaction_species.shape[0]} reactions species into edges."
363
+ )
364
+
365
+ all_reaction_edges_df = net_create_utils.wire_reaction_species(
366
+ sbml_dfs.reaction_species, wiring_approach, drop_reactions_when
367
+ )
368
+
369
+ logger.info(
370
+ "Adding additional attributes to edges, e.g., # of children and parents."
371
+ )
372
+
373
+ # add compartmentalized species summaries to weight edges
374
+ cspecies_features = sbml_dfs.get_cspecies_features()
375
+
376
+ # calculate undirected and directed degrees (i.e., # of parents and children)
377
+ # based on a network's edgelist. this used when the network representation is
378
+ # not the bipartite network which can be trivially obtained from the pathway
379
+ # specification
380
+ unique_edges = (
381
+ all_reaction_edges_df.groupby(
382
+ [NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO]
383
+ )
384
+ .first()
385
+ .reset_index()
386
+ )
387
+
388
+ # children
389
+ n_children = (
390
+ unique_edges[NAPISTU_GRAPH_EDGES.FROM]
391
+ .value_counts()
392
+ # rename values to the child name
393
+ .to_frame(name=NAPISTU_GRAPH_EDGES.SC_CHILDREN)
394
+ .reset_index()
395
+ .rename(
396
+ {
397
+ NAPISTU_GRAPH_EDGES.FROM: SBML_DFS.SC_ID,
398
+ },
399
+ axis=1,
400
+ )
401
+ )
402
+
403
+ # parents
404
+ n_parents = (
405
+ unique_edges[NAPISTU_GRAPH_EDGES.TO]
406
+ .value_counts()
407
+ # rename values to the parent name
408
+ .to_frame(name=NAPISTU_GRAPH_EDGES.SC_PARENTS)
409
+ .reset_index()
410
+ .rename(
411
+ {
412
+ NAPISTU_GRAPH_EDGES.TO: SBML_DFS.SC_ID,
413
+ },
414
+ axis=1,
415
+ )
416
+ )
417
+
418
+ graph_degree_by_edgelist = n_children.merge(n_parents, how="outer").fillna(int(0))
419
+
420
+ graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_DEGREE] = (
421
+ graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_CHILDREN]
422
+ + graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_PARENTS]
423
+ )
424
+ graph_degree_by_edgelist = (
425
+ graph_degree_by_edgelist[
426
+ ~graph_degree_by_edgelist[SBML_DFS.SC_ID].str.contains("R[0-9]{8}")
427
+ ]
428
+ .set_index(SBML_DFS.SC_ID)
429
+ .sort_index()
430
+ )
431
+
432
+ cspecies_features = (
433
+ cspecies_features.drop(
434
+ [
435
+ NAPISTU_GRAPH_EDGES.SC_DEGREE,
436
+ NAPISTU_GRAPH_EDGES.SC_CHILDREN,
437
+ NAPISTU_GRAPH_EDGES.SC_PARENTS,
438
+ ],
439
+ axis=1,
440
+ )
441
+ .join(graph_degree_by_edgelist)
442
+ .fillna(int(0))
443
+ )
444
+
445
+ is_from_reaction = all_reaction_edges_df[NAPISTU_GRAPH_EDGES.FROM].isin(
446
+ sbml_dfs.reactions.index.tolist()
447
+ )
448
+ is_from_reaction = all_reaction_edges_df[NAPISTU_GRAPH_EDGES.FROM].isin(
449
+ sbml_dfs.reactions.index
450
+ )
451
+ # add substrate weight whenever "from" edge is a molecule
452
+ # and product weight when the "from" edge is a reaction
453
+ decorated_all_reaction_edges_df = pd.concat(
454
+ [
455
+ all_reaction_edges_df[~is_from_reaction].merge(
456
+ cspecies_features, left_on=NAPISTU_GRAPH_EDGES.FROM, right_index=True
457
+ ),
458
+ all_reaction_edges_df[is_from_reaction].merge(
459
+ cspecies_features, left_on=NAPISTU_GRAPH_EDGES.TO, right_index=True
460
+ ),
461
+ ]
462
+ ).sort_index()
463
+
464
+ if all_reaction_edges_df.shape[0] != decorated_all_reaction_edges_df.shape[0]:
465
+ msg = (
466
+ "'decorated_all_reaction_edges_df' and 'all_reaction_edges_df' should\n"
467
+ "have the same number of rows but they did not"
468
+ )
469
+
470
+ raise ValueError(msg)
471
+
472
+ logger.info(f"Done preparing {wiring_approach} graph")
473
+
474
+ return decorated_all_reaction_edges_df
475
+
476
+
312
477
  def pluck_entity_data(
313
478
  sbml_dfs: sbml_dfs_core.SBML_dfs,
314
479
  graph_attrs: dict[str, dict],
@@ -316,31 +481,35 @@ def pluck_entity_data(
316
481
  custom_transformations: Optional[dict[str, callable]] = None,
317
482
  ) -> pd.DataFrame | None:
318
483
  """
319
- Pluck Entity Attributes
484
+ Pluck Entity Attributes from an sbml_dfs based on a set of tables and variables to look for.
320
485
 
321
- Pull species or reaction attributes out of an sbml_dfs based on a set of
322
- tables and variables to look for.
323
-
324
- Parameters:
325
- sbml_dfs: sbml_dfs_core.SBML_dfs
326
- A mechanistic model
327
- graph_attrs: dict
486
+ Parameters
487
+ ----------
488
+ sbml_dfs : sbml_dfs_core.SBML_dfs
489
+ A mechanistic model.
490
+ graph_attrs : dict
328
491
  A dictionary of species/reaction attributes to pull out. If the requested
329
492
  data_type ("species" or "reactions") is not present as a key, or if the value
330
493
  is an empty dict, this function will return None (no error).
331
- data_type: str
332
- "species" or "reactions" to pull out species_data or reactions_data
333
- custom_transformations: dict[str, callable], optional
494
+ data_type : str
495
+ "species" or "reactions" to pull out species_data or reactions_data.
496
+ custom_transformations : dict[str, callable], optional
334
497
  A dictionary mapping transformation names to functions. If provided, these
335
498
  will be checked before built-in transformations. Example:
336
499
  custom_transformations = {"square": lambda x: x**2}
337
500
 
338
- Returns:
501
+ Returns
502
+ -------
503
+ pd.DataFrame or None
339
504
  A table where all extracted attributes are merged based on a common index or None
340
505
  if no attributes were extracted. If the requested data_type is not present in
341
506
  graph_attrs, or if the attribute dict is empty, returns None. This is intended
342
507
  to allow optional annotation blocks.
343
508
 
509
+ Raises
510
+ ------
511
+ ValueError
512
+ If data_type is not valid or if requested tables/variables are missing.
344
513
  """
345
514
 
346
515
  if data_type not in ENTITIES_W_DATA:
@@ -408,22 +577,29 @@ def apply_weight_transformations(
408
577
  edges_df: pd.DataFrame, reaction_attrs: dict, custom_transformations: dict = None
409
578
  ):
410
579
  """
411
- Apply Weight Transformations
412
-
413
- Args:
414
- edges_df (pd.DataFrame): a table of edges and their attributes extracted
415
- from a cpr_grpah.
416
- reaction_attrs (dict):
417
- A dictionary of attributes identifying weighting attributes within
418
- an sbml_df's reaction_data, how they will be named in edges_df (the keys),
419
- and how they should be transformed (the "trans" aliases")
420
- custom_transformations (dict, optional):
421
- A dictionary mapping transformation names to functions. If provided, these
422
- will be checked before built-in transformations.
423
-
424
- Returns:
425
- transformed_edges_df (pd.DataFrame): edges_df with weight variables transformed.
580
+ Apply Weight Transformations to edge attributes.
581
+
582
+ Parameters
583
+ ----------
584
+ edges_df : pd.DataFrame
585
+ A table of edges and their attributes extracted from a cpr_graph.
586
+ reaction_attrs : dict
587
+ A dictionary of attributes identifying weighting attributes within
588
+ an sbml_df's reaction_data, how they will be named in edges_df (the keys),
589
+ and how they should be transformed (the "trans" aliases).
590
+ custom_transformations : dict, optional
591
+ A dictionary mapping transformation names to functions. If provided, these
592
+ will be checked before built-in transformations.
593
+
594
+ Returns
595
+ -------
596
+ pd.DataFrame
597
+ edges_df with weight variables transformed.
426
598
 
599
+ Raises
600
+ ------
601
+ ValueError
602
+ If a weighting variable is missing or transformation is not found.
427
603
  """
428
604
 
429
605
  _validate_entity_attrs(
@@ -456,20 +632,18 @@ def summarize_weight_calibration(
456
632
  napistu_graph: NapistuGraph, reaction_attrs: dict
457
633
  ) -> None:
458
634
  """
459
- Summarize Weight Calibration
635
+ Summarize Weight Calibration for a network with multiple sources for edge weights.
460
636
 
461
- For a network with multiple sources for edge weights summarize the alignment of
462
- different weighting schemes and how they map onto our notion of "good" versus
463
- "dubious" weights.
464
-
465
- Args:
466
- napistu_graph (ig.Graph): A graph where edge weights have already been calibrated.
467
- reaction_attrs (dict): a dictionary summarizing the types of weights that
468
- exist and how they are transformed for calibration.
469
-
470
- Returns:
471
- None
637
+ Parameters
638
+ ----------
639
+ napistu_graph : NapistuGraph
640
+ A graph where edge weights have already been calibrated.
641
+ reaction_attrs : dict
642
+ A dictionary summarizing the types of weights that exist and how they are transformed for calibration.
472
643
 
644
+ Returns
645
+ -------
646
+ None
473
647
  """
474
648
 
475
649
  score_calibration_df = pd.DataFrame(SCORE_CALIBRATION_POINTS_DICT)
@@ -496,10 +670,7 @@ def add_graph_weights(
496
670
  weighting_strategy: str = NAPISTU_WEIGHTING_STRATEGIES.UNWEIGHTED,
497
671
  ) -> NapistuGraph:
498
672
  """
499
- Add Graph Weights
500
-
501
- Apply a weighting strategy to generate edge weights on a NapistuGraph. For directed graphs, "upstream_weights" will
502
- be generated as well, which should be used when searching for a node's ancestors.
673
+ Add Graph Weights to a NapistuGraph using a specified weighting strategy.
503
674
 
504
675
  Parameters
505
676
  ----------
@@ -508,7 +679,7 @@ def add_graph_weights(
508
679
  reaction_attrs : dict
509
680
  An optional dict of reaction attributes.
510
681
  weighting_strategy : str, optional
511
- A network weighting strategy with options:
682
+ A network weighting strategy. Options:
512
683
  - 'unweighted': all weights (and upstream_weights for directed graphs) are set to 1.
513
684
  - 'topology': weight edges by the degree of the source nodes favoring nodes emerging from nodes with few connections.
514
685
  - 'mixed': transform edges with a quantitative score based on reaction_attrs; and set edges without quantitative score as a source-specific weight.
@@ -518,6 +689,11 @@ def add_graph_weights(
518
689
  -------
519
690
  NapistuGraph
520
691
  The weighted NapistuGraph.
692
+
693
+ Raises
694
+ ------
695
+ ValueError
696
+ If weighting_strategy is not valid.
521
697
  """
522
698
 
523
699
  napistu_graph_updated = copy.deepcopy(napistu_graph)
@@ -561,7 +737,19 @@ def add_graph_weights(
561
737
 
562
738
 
563
739
  def _create_napistu_graph_bipartite(sbml_dfs: sbml_dfs_core.SBML_dfs) -> pd.DataFrame:
564
- """Turn an sbml_dfs model into a bipartite graph linking molecules to reactions."""
740
+ """
741
+ Turn an sbml_dfs model into a bipartite graph linking molecules to reactions.
742
+
743
+ Parameters
744
+ ----------
745
+ sbml_dfs : sbml_dfs_core.SBML_dfs
746
+ The SBML_dfs object containing the model data.
747
+
748
+ Returns
749
+ -------
750
+ pd.DataFrame
751
+ DataFrame representing the bipartite network edges.
752
+ """
565
753
 
566
754
  # setup edges
567
755
  network_edges = (
@@ -611,362 +799,6 @@ def _create_napistu_graph_bipartite(sbml_dfs: sbml_dfs_core.SBML_dfs) -> pd.Data
611
799
  return network_edges
612
800
 
613
801
 
614
- def _create_napistu_graph_tiered(
615
- sbml_dfs: sbml_dfs_core.SBML_dfs, graph_type: str
616
- ) -> pd.DataFrame:
617
- """Turn an sbml_dfs model into a tiered graph which links upstream entities to downstream ones."""
618
-
619
- # check whether all expect SBO terms are present
620
- invalid_sbo_terms = sbml_dfs.reaction_species[
621
- ~sbml_dfs.reaction_species[SBML_DFS.SBO_TERM].isin(MINI_SBO_TO_NAME.keys())
622
- ]
623
-
624
- if invalid_sbo_terms.shape[0] != 0:
625
- invalid_counts = invalid_sbo_terms.value_counts(SBML_DFS.SBO_TERM).to_frame("N")
626
- if not isinstance(invalid_counts, pd.DataFrame):
627
- raise TypeError("invalid_counts must be a pandas DataFrame")
628
- logger.warning(utils.style_df(invalid_counts, headers="keys")) # type: ignore
629
- raise ValueError("Some reaction species have unusable SBO terms")
630
-
631
- # load and validate the schema of graph_type
632
- graph_hierarchy_df = _create_graph_hierarchy_df(graph_type)
633
-
634
- # organize reaction species for defining connections
635
- sorted_reaction_species = sbml_dfs.reaction_species.set_index(
636
- [SBML_DFS.R_ID, SBML_DFS.SBO_TERM]
637
- ).sort_index()
638
-
639
- logger.info(
640
- f"Formatting {sorted_reaction_species.shape[0]} reactions species as "
641
- "tiered edges."
642
- )
643
-
644
- # infer tiered edges in each reaction
645
- all_reaction_edges = [
646
- _format_tiered_reaction_species(
647
- r, sorted_reaction_species, sbml_dfs, graph_hierarchy_df
648
- )
649
- for r in sorted_reaction_species.index.get_level_values(SBML_DFS.R_ID).unique()
650
- ]
651
- all_reaction_edges_df = pd.concat(all_reaction_edges).reset_index(drop=True)
652
-
653
- # test for reactions missing substrates
654
- r_id_list = sorted_reaction_species.index.get_level_values(0).unique()
655
- r_id_reactant_only = [
656
- x for x in r_id_list if len(sorted_reaction_species.loc[x]) == 1
657
- ]
658
-
659
- if len(r_id_reactant_only) > 0:
660
- logger.warning(f"{len(r_id_reactant_only)} reactions are missing substrates")
661
- all_reaction_edges_df_pre = all_reaction_edges_df.copy()
662
- all_reaction_edges_df = all_reaction_edges_df_pre[
663
- ~all_reaction_edges_df_pre[SBML_DFS.R_ID].isin(r_id_reactant_only)
664
- ]
665
-
666
- logger.info(
667
- "Adding additional attributes to edges, e.g., # of children and parents."
668
- )
669
-
670
- # add compartmentalized species summaries to weight edges
671
- cspecies_features = sbml_dfs.get_cspecies_features()
672
-
673
- # calculate undirected and directed degrees (i.e., # of parents and children)
674
- # based on a network's edgelist. this used when the network representation is
675
- # not the bipartite network which can be trivially obtained from the pathway
676
- # specification
677
- unique_edges = (
678
- all_reaction_edges_df.groupby(
679
- [NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO]
680
- )
681
- .first()
682
- .reset_index()
683
- )
684
-
685
- # children
686
- n_children = (
687
- unique_edges[NAPISTU_GRAPH_EDGES.FROM]
688
- .value_counts()
689
- # rename values to the child name
690
- .to_frame(name=NAPISTU_GRAPH_EDGES.SC_CHILDREN)
691
- .reset_index()
692
- .rename(
693
- {
694
- NAPISTU_GRAPH_EDGES.FROM: SBML_DFS.SC_ID,
695
- },
696
- axis=1,
697
- )
698
- )
699
-
700
- # parents
701
- n_parents = (
702
- unique_edges[NAPISTU_GRAPH_EDGES.TO]
703
- .value_counts()
704
- # rename values to the parent name
705
- .to_frame(name=NAPISTU_GRAPH_EDGES.SC_PARENTS)
706
- .reset_index()
707
- .rename(
708
- {
709
- NAPISTU_GRAPH_EDGES.TO: SBML_DFS.SC_ID,
710
- },
711
- axis=1,
712
- )
713
- )
714
-
715
- graph_degree_by_edgelist = n_children.merge(n_parents, how="outer").fillna(int(0))
716
-
717
- graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_DEGREE] = (
718
- graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_CHILDREN]
719
- + graph_degree_by_edgelist[NAPISTU_GRAPH_EDGES.SC_PARENTS]
720
- )
721
- graph_degree_by_edgelist = (
722
- graph_degree_by_edgelist[
723
- ~graph_degree_by_edgelist[SBML_DFS.SC_ID].str.contains("R[0-9]{8}")
724
- ]
725
- .set_index(SBML_DFS.SC_ID)
726
- .sort_index()
727
- )
728
-
729
- cspecies_features = (
730
- cspecies_features.drop(
731
- [
732
- NAPISTU_GRAPH_EDGES.SC_DEGREE,
733
- NAPISTU_GRAPH_EDGES.SC_CHILDREN,
734
- NAPISTU_GRAPH_EDGES.SC_PARENTS,
735
- ],
736
- axis=1,
737
- )
738
- .join(graph_degree_by_edgelist)
739
- .fillna(int(0))
740
- )
741
-
742
- is_from_reaction = all_reaction_edges_df[NAPISTU_GRAPH_EDGES.FROM].isin(
743
- sbml_dfs.reactions.index.tolist()
744
- )
745
- is_from_reaction = all_reaction_edges_df[NAPISTU_GRAPH_EDGES.FROM].isin(
746
- sbml_dfs.reactions.index
747
- )
748
- # add substrate weight whenever "from" edge is a molecule
749
- # and product weight when the "from" edge is a reaction
750
- decorated_all_reaction_edges_df = pd.concat(
751
- [
752
- all_reaction_edges_df[~is_from_reaction].merge(
753
- cspecies_features, left_on=NAPISTU_GRAPH_EDGES.FROM, right_index=True
754
- ),
755
- all_reaction_edges_df[is_from_reaction].merge(
756
- cspecies_features, left_on=NAPISTU_GRAPH_EDGES.TO, right_index=True
757
- ),
758
- ]
759
- ).sort_index()
760
-
761
- if all_reaction_edges_df.shape[0] != decorated_all_reaction_edges_df.shape[0]:
762
- msg = (
763
- "'decorated_all_reaction_edges_df' and 'all_reaction_edges_df' should\n"
764
- "have the same number of rows but they did not"
765
- )
766
-
767
- raise ValueError(msg)
768
-
769
- logger.info(f"Done preparing {graph_type} graph")
770
-
771
- return decorated_all_reaction_edges_df
772
-
773
-
774
- def _format_tiered_reaction_species(
775
- r_id: str,
776
- sorted_reaction_species: pd.DataFrame,
777
- sbml_dfs: sbml_dfs_core.SBML_dfs,
778
- graph_hierarchy_df: pd.DataFrame,
779
- ) -> pd.DataFrame:
780
- """
781
- Format Tiered Reaction Species
782
-
783
- Refactor a reaction's species into tiered edges between substrates, products, enzymes and allosteric regulators.
784
- """
785
-
786
- rxn_species = sorted_reaction_species.loc[r_id]
787
- if not isinstance(rxn_species, pd.DataFrame):
788
- raise TypeError("rxn_species must be a pandas DataFrame")
789
- if list(rxn_species.index.names) != [SBML_DFS.SBO_TERM]:
790
- raise ValueError("rxn_species index names must be [SBML_DFS.SBO_TERM]")
791
- if rxn_species.columns.tolist() != [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]:
792
- raise ValueError(
793
- "rxn_species columns must be [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]"
794
- )
795
-
796
- rxn_sbo_terms = set(rxn_species.index.unique())
797
- # map to common names
798
- rxn_sbo_names = {MINI_SBO_TO_NAME[x] for x in rxn_sbo_terms}
799
-
800
- # is the reaction a general purpose interaction
801
- if len(rxn_sbo_names) == 1:
802
- if list(rxn_sbo_names)[0] == SBOTERM_NAMES.INTERACTOR:
803
- # further validation happens in the function - e.g., exactly two interactors
804
- return _format_interactors_for_tiered_graph(r_id, rxn_species, sbml_dfs)
805
-
806
- if SBOTERM_NAMES.INTERACTOR in rxn_sbo_names:
807
- logger.warning(
808
- f"Invalid combinations of SBO_terms in {str(r_id)} : {sbml_dfs.reactions.loc[r_id][SBML_DFS.R_NAME]}. "
809
- "If interactors are present then there can't be any other types of reaction species. "
810
- f"The following roles were defined: {', '.join(rxn_sbo_names)}"
811
- )
812
-
813
- # reorganize molecules and the reaction itself into tiers
814
- entities_ordered_by_tier = (
815
- pd.concat(
816
- [
817
- (
818
- rxn_species.reset_index()
819
- .rename({SBML_DFS.SC_ID: "entity_id"}, axis=1)
820
- .merge(graph_hierarchy_df)
821
- ),
822
- graph_hierarchy_df[
823
- graph_hierarchy_df[NAPISTU_GRAPH_EDGES.SBO_NAME]
824
- == NAPISTU_GRAPH_NODE_TYPES.REACTION
825
- ].assign(entity_id=r_id, r_id=r_id),
826
- ]
827
- )
828
- .sort_values(["tier"])
829
- .set_index("tier")
830
- )
831
- ordered_tiers = entities_ordered_by_tier.index.get_level_values("tier").unique()
832
-
833
- if len(ordered_tiers) <= 1:
834
- raise ValueError("ordered_tiers must have more than one element")
835
-
836
- # which tier is the reaction?
837
- reaction_tier = graph_hierarchy_df["tier"][
838
- graph_hierarchy_df[NAPISTU_GRAPH_EDGES.SBO_NAME]
839
- == NAPISTU_GRAPH_NODE_TYPES.REACTION
840
- ].tolist()[0]
841
-
842
- rxn_edges = list()
843
- past_reaction = False
844
- for i in range(0, len(ordered_tiers) - 1):
845
- formatted_tier_combo = _format_tier_combo(
846
- entities_ordered_by_tier.loc[[ordered_tiers[i]]],
847
- entities_ordered_by_tier.loc[[ordered_tiers[i + 1]]],
848
- past_reaction,
849
- )
850
-
851
- if ordered_tiers[i + 1] == reaction_tier:
852
- past_reaction = True
853
-
854
- rxn_edges.append(formatted_tier_combo)
855
-
856
- rxn_edges_df = (
857
- pd.concat(rxn_edges)[
858
- [
859
- NAPISTU_GRAPH_EDGES.FROM,
860
- NAPISTU_GRAPH_EDGES.TO,
861
- NAPISTU_GRAPH_EDGES.STOICHIOMETRY,
862
- NAPISTU_GRAPH_EDGES.SBO_TERM,
863
- ]
864
- ]
865
- .reset_index(drop=True)
866
- .assign(r_id=r_id)
867
- )
868
-
869
- return rxn_edges_df
870
-
871
-
872
- def _format_tier_combo(
873
- upstream_tier: pd.DataFrame, downstream_tier: pd.DataFrame, past_reaction: bool
874
- ) -> pd.DataFrame:
875
- """
876
- Format Tier Combo
877
-
878
- Create a set of edges crossing two tiers of a tiered graph. This will involve an
879
- all x all combination of entries. Tiers form an ordering along the molecular entities
880
- in a reaction plus a tier for the reaction itself. Attributes such as stoichiometry
881
- and sbo_term will be passed from the tier which is furthest from the reaction tier
882
- to ensure that each tier of molecular data applies its attributes to a single set of
883
- edges while the "reaction" tier does not. Reaction entities have neither a
884
- stoichiometery or sbo_term annotation.
885
-
886
- Args:
887
- upstream_tier (pd.DataFrame): A table containing upstream entities in a reaction,
888
- e.g., regulators.
889
- downstream_tier (pd.DataFrame): A table containing downstream entities in a reaction,
890
- e.g., catalysts.
891
- past_reaction (bool): if True then attributes will be taken from downstream_tier and
892
- if False they will come from upstream_tier.
893
-
894
- Returns:
895
- formatted_tier_combo (pd.DataFrame): A table of edges containing (from, to, stoichiometry, sbo_term, r_id). The
896
- number of edges is the product of the number of entities in the upstream tier
897
- times the number in the downstream tier.
898
-
899
- """
900
-
901
- upstream_fields = ["entity_id", SBML_DFS.STOICHIOMETRY, SBML_DFS.SBO_TERM]
902
- downstream_fields = ["entity_id"]
903
-
904
- if past_reaction:
905
- # swap fields
906
- upstream_fields, downstream_fields = downstream_fields, upstream_fields
907
-
908
- formatted_tier_combo = (
909
- upstream_tier[upstream_fields]
910
- .rename({"entity_id": NAPISTU_GRAPH_EDGES.FROM}, axis=1)
911
- .assign(_joiner=1)
912
- ).merge(
913
- (
914
- downstream_tier[downstream_fields]
915
- .rename({"entity_id": NAPISTU_GRAPH_EDGES.TO}, axis=1)
916
- .assign(_joiner=1)
917
- ),
918
- left_on="_joiner",
919
- right_on="_joiner",
920
- )
921
-
922
- return formatted_tier_combo
923
-
924
-
925
- def _create_graph_hierarchy_df(graph_type: str) -> pd.DataFrame:
926
- """
927
- Create Graph Hierarchy DataFrame
928
-
929
- Format a graph hierarchy list of lists and a pd.DataFrame
930
-
931
- Args:
932
- graph_type (str):
933
- The type of tiered graph to work with. Each type has its own specification in constants.py.
934
-
935
- Returns:
936
- A pandas DataFrame with sbo_name, tier, and sbo_term.
937
-
938
- """
939
-
940
- if graph_type == NAPISTU_GRAPH_TYPES.REGULATORY:
941
- sbo_names_hierarchy = REGULATORY_GRAPH_HIERARCHY
942
- elif graph_type == NAPISTU_GRAPH_TYPES.SURROGATE:
943
- sbo_names_hierarchy = SURROGATE_GRAPH_HIERARCHY
944
- else:
945
- raise NotImplementedError(f"{graph_type} is not a valid graph_type")
946
-
947
- # format as a DF
948
- graph_hierarchy_df = pd.concat(
949
- [
950
- pd.DataFrame({"sbo_name": sbo_names_hierarchy[i]}).assign(tier=i)
951
- for i in range(0, len(sbo_names_hierarchy))
952
- ]
953
- ).reset_index(drop=True)
954
- graph_hierarchy_df[SBML_DFS.SBO_TERM] = graph_hierarchy_df["sbo_name"].apply(
955
- lambda x: (
956
- MINI_SBO_FROM_NAME[x] if x != NAPISTU_GRAPH_NODE_TYPES.REACTION else ""
957
- )
958
- )
959
-
960
- # ensure that the output is expected
961
- utils.match_pd_vars(
962
- graph_hierarchy_df,
963
- req_vars={NAPISTU_GRAPH_EDGES.SBO_NAME, "tier", SBML_DFS.SBO_TERM},
964
- allow_series=False,
965
- ).assert_present()
966
-
967
- return graph_hierarchy_df
968
-
969
-
970
802
  def _add_graph_weights_mixed(
971
803
  napistu_graph: NapistuGraph, reaction_attrs: dict
972
804
  ) -> NapistuGraph:
@@ -1162,7 +994,23 @@ def _summarize_weight_calibration_table(
1162
994
  score_calibration_df: pd.DataFrame,
1163
995
  score_calibration_df_calibrated: pd.DataFrame,
1164
996
  ):
1165
- """Create a table comparing edge weights from multiple sources."""
997
+ """
998
+ Create a table comparing edge weights from multiple sources.
999
+
1000
+ Parameters
1001
+ ----------
1002
+ calibrated_edges : pd.DataFrame
1003
+ DataFrame of calibrated edge weights.
1004
+ score_calibration_df : pd.DataFrame
1005
+ DataFrame of raw calibration points.
1006
+ score_calibration_df_calibrated : pd.DataFrame
1007
+ DataFrame of calibrated calibration points.
1008
+
1009
+ Returns
1010
+ -------
1011
+ pd.DataFrame
1012
+ Styled DataFrame summarizing calibration points and quantiles.
1013
+ """
1166
1014
 
1167
1015
  # generate a table summarizing different scoring measures
1168
1016
  #
@@ -1214,7 +1062,20 @@ def _summarize_weight_calibration_table(
1214
1062
  def _summarize_weight_calibration_plots(
1215
1063
  calibrated_edges: pd.DataFrame, score_calibration_df_calibrated: pd.DataFrame
1216
1064
  ) -> None:
1217
- """Create a couple of plots summarizing the relationships between different scoring measures."""
1065
+ """
1066
+ Create plots summarizing the relationships between different scoring measures.
1067
+
1068
+ Parameters
1069
+ ----------
1070
+ calibrated_edges : pd.DataFrame
1071
+ DataFrame of calibrated edge weights.
1072
+ score_calibration_df_calibrated : pd.DataFrame
1073
+ DataFrame of calibrated calibration points.
1074
+
1075
+ Returns
1076
+ -------
1077
+ None
1078
+ """
1218
1079
 
1219
1080
  # set up a 2 x 1 plot
1220
1081
  f, (ax1, ax2) = plt.subplots(1, 2)
@@ -1240,28 +1101,24 @@ def _create_source_weights(
1240
1101
  source_vars_dict: dict = SOURCE_VARS_DICT,
1241
1102
  source_wt_default: int = 1,
1242
1103
  ) -> pd.DataFrame:
1243
- """ "
1244
- Create Source Weights
1245
-
1246
- Create weights based on an edges source. This is a simple but crude way of allowing different
1247
- data sources to have different support if we think that some are more trustworthly than others.
1248
-
1249
- Args:
1250
- edges_df: pd.DataFrame
1251
- The edges dataframe to add the source weights to.
1252
- source_wt_var: str
1253
- The name of the column to store the source weights.
1254
- source_vars_dict: dict
1255
- Dictionary with keys indicating edge attributes and values indicating the weight to assign
1256
- to that attribute. This value is generally the largest weight that can be assigned to an
1257
- edge so that the numeric weight is chosen over the default.
1258
- source_wt_default: int
1259
- The default weight to assign to an edge if no other weight attribute is found.
1260
-
1261
- Returns:
1262
- pd.DataFrame
1263
- The edges dataframe with the source weights added.
1104
+ """
1105
+ Create weights based on an edge's source.
1106
+
1107
+ Parameters
1108
+ ----------
1109
+ edges_df : pd.DataFrame
1110
+ The edges dataframe to add the source weights to.
1111
+ source_wt_var : str, optional
1112
+ The name of the column to store the source weights. Default is "source_wt".
1113
+ source_vars_dict : dict, optional
1114
+ Dictionary with keys indicating edge attributes and values indicating the weight to assign to that attribute. Default is SOURCE_VARS_DICT.
1115
+ source_wt_default : int, optional
1116
+ The default weight to assign to an edge if no other weight attribute is found. Default is 1.
1264
1117
 
1118
+ Returns
1119
+ -------
1120
+ pd.DataFrame
1121
+ The edges dataframe with the source weights added.
1265
1122
  """
1266
1123
 
1267
1124
  logger.warning(
@@ -1299,134 +1156,59 @@ def _create_source_weights(
1299
1156
 
1300
1157
 
1301
1158
  def _wt_transformation_identity(x):
1302
- """Identity"""
1159
+ """
1160
+ Identity transformation for weights.
1161
+
1162
+ Parameters
1163
+ ----------
1164
+ x : any
1165
+ Input value.
1166
+
1167
+ Returns
1168
+ -------
1169
+ any
1170
+ The input value unchanged.
1171
+ """
1303
1172
  return x
1304
1173
 
1305
1174
 
1306
1175
  def _wt_transformation_string(x):
1307
- """Map STRING scores to a similar scale as topology weights."""
1176
+ """
1177
+ Map STRING scores to a similar scale as topology weights.
1178
+
1179
+ Parameters
1180
+ ----------
1181
+ x : float
1182
+ STRING score.
1308
1183
 
1184
+ Returns
1185
+ -------
1186
+ float
1187
+ Transformed STRING score.
1188
+ """
1309
1189
  return 250000 / np.power(x, 1.7)
1310
1190
 
1311
1191
 
1312
1192
  def _wt_transformation_string_inv(x):
1313
- """Map STRING scores so they work with source weights."""
1314
-
1315
- # string scores are bounded on [0, 1000]
1316
- # and score/1000 is roughly a probability that
1317
- # there is a real interaction (physical, genetic, ...)
1318
- # reported string scores are currently on [150, 1000]
1319
- # so this transformation will map these onto {6.67, 1}
1320
-
1321
- return 1 / (x / 1000)
1322
-
1323
-
1324
- def _format_interactors_for_tiered_graph(
1325
- r_id: str, rxn_species: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
1326
- ) -> pd.DataFrame:
1327
- """Format an undirected interactions for tiered graph so interactions are linked even though they would be on the same tier."""
1328
-
1329
- interactor_data = rxn_species.loc[MINI_SBO_FROM_NAME["interactor"]]
1330
- if interactor_data.shape[0] != 2:
1331
- raise ValueError(
1332
- f"{interactor_data.shape[0]} interactors present for {str(r_id)} : "
1333
- f"{sbml_dfs.reactions.loc[r_id]['r_name']}. "
1334
- "Reactions with interactors must have exactly two interactors"
1335
- )
1336
-
1337
- if not (interactor_data["stoichiometry"] == 0).any():
1338
- raise ValueError(
1339
- f"Interactors had non-zero stoichiometry for {str(r_id)} : {sbml_dfs.reactions.loc[r_id]['r_name']}. "
1340
- "If stoichiometry is important for this reaction then it should use other SBO terms "
1341
- "(e.g., substrate and product)."
1342
- )
1343
-
1344
- # set the first entry as "from" and second as "to" if stoi is zero.
1345
- # the reverse reaction will generally be added later because these
1346
- # reactions should be reversible
1347
-
1348
- return pd.DataFrame(
1349
- {
1350
- "from": interactor_data["sc_id"].iloc[0],
1351
- "to": interactor_data["sc_id"].iloc[1],
1352
- "sbo_term": MINI_SBO_FROM_NAME["interactor"],
1353
- "stoichiometry": 0,
1354
- "r_id": r_id,
1355
- },
1356
- index=[0],
1357
- )
1358
-
1359
-
1360
- def _add_graph_species_attribute(
1361
- napistu_graph: NapistuGraph,
1362
- sbml_dfs: sbml_dfs_core.SBML_dfs,
1363
- species_graph_attrs: dict,
1364
- custom_transformations: Optional[dict] = None,
1365
- ) -> NapistuGraph:
1366
1193
  """
1367
- Add meta-data from species_data to existing NapistuGraph's vertices.
1368
-
1369
- This function augments the vertices of a NapistuGraph network with additional attributes
1370
- derived from the species-level data in the provided SBML_dfs object. The attributes
1371
- to add are specified in the species_graph_attrs dictionary, and can be transformed
1372
- using either built-in or user-supplied transformation functions.
1194
+ Map STRING scores so they work with source weights.
1373
1195
 
1374
1196
  Parameters
1375
1197
  ----------
1376
- napistu_graph : NapistuGraph
1377
- The NapistuGraph network to augment (subclass of igraph.Graph).
1378
- sbml_dfs : sbml_dfs_core.SBML_dfs
1379
- The SBML_dfs object containing species data.
1380
- species_graph_attrs : dict
1381
- Dictionary specifying which attributes to pull from species_data and how to transform them.
1382
- The structure should be {attribute_name: {"table": ..., "variable": ..., "trans": ...}}.
1383
- custom_transformations : dict, optional
1384
- Dictionary mapping transformation names to functions. If provided, these will be checked
1385
- before built-in transformations. Example: {"square": lambda x: x**2}
1198
+ x : float
1199
+ STRING score.
1386
1200
 
1387
1201
  Returns
1388
1202
  -------
1389
- NapistuGraph
1390
- The input NapistuGraph with additional vertex attributes added from species_data.
1203
+ float
1204
+ Inverse transformed STRING score.
1391
1205
  """
1392
- if not isinstance(species_graph_attrs, dict):
1393
- raise TypeError(
1394
- f"species_graph_attrs must be a dict, but was {type(species_graph_attrs)}"
1395
- )
1396
-
1397
- # fail fast if species_graph_attrs is not properly formatted
1398
- # also flatten attribute list to be added to vertex nodes
1399
- sp_graph_key_list = []
1400
- sp_node_attr_list = []
1401
- for k in species_graph_attrs.keys():
1402
- _validate_entity_attrs(
1403
- species_graph_attrs[k], custom_transformations=custom_transformations
1404
- )
1405
-
1406
- sp_graph_key_list.append(k)
1407
- sp_node_attr_list.append(list(species_graph_attrs[k].keys()))
1408
-
1409
- # flatten sp_node_attr_list
1410
- flat_sp_node_attr_list = [item for items in sp_node_attr_list for item in items]
1411
-
1412
- logger.info("Adding meta-data from species_data")
1413
-
1414
- curr_network_nodes_df = napistu_graph.get_vertex_dataframe()
1415
-
1416
- # add species-level attributes to nodes dataframe
1417
- augmented_network_nodes_df = _augment_network_nodes(
1418
- curr_network_nodes_df,
1419
- sbml_dfs,
1420
- species_graph_attrs,
1421
- custom_transformations=custom_transformations,
1422
- )
1423
-
1424
- for vs_attr in flat_sp_node_attr_list:
1425
- # in case more than one vs_attr in the flat_sp_node_attr_list
1426
- logger.info(f"Adding new attribute {vs_attr} to vertices")
1427
- napistu_graph.vs[vs_attr] = augmented_network_nodes_df[vs_attr].values
1428
-
1429
- return napistu_graph
1206
+ # string scores are bounded on [0, 1000]
1207
+ # and score/1000 is roughly a probability that
1208
+ # there is a real interaction (physical, genetic, ...)
1209
+ # reported string scores are currently on [150, 1000]
1210
+ # so this transformation will map these onto {6.67, 1}
1211
+ return 1 / (x / 1000)
1430
1212
 
1431
1213
 
1432
1214
  def _augment_network_nodes(
@@ -1448,7 +1230,7 @@ def _augment_network_nodes(
1448
1230
  DataFrame of network nodes. Must include columns 'name', 'node_name', and 'node_type'.
1449
1231
  sbml_dfs : sbml_dfs_core.SBML_dfs
1450
1232
  The SBML_dfs object containing species data.
1451
- species_graph_attrs : dict
1233
+ species_graph_attrs : dict, optional
1452
1234
  Dictionary specifying which attributes to pull from species_data and how to transform them.
1453
1235
  The structure should be {attribute_name: {"table": ..., "variable": ..., "trans": ...}}.
1454
1236
  custom_transformations : dict, optional
@@ -1459,6 +1241,11 @@ def _augment_network_nodes(
1459
1241
  -------
1460
1242
  pd.DataFrame
1461
1243
  The input network_nodes DataFrame with additional columns for each extracted and transformed attribute.
1244
+
1245
+ Raises
1246
+ ------
1247
+ ValueError
1248
+ If required attributes are missing from network_nodes.
1462
1249
  """
1463
1250
  REQUIRED_NETWORK_NODE_ATTRS = {
1464
1251
  "name",
@@ -1522,7 +1309,8 @@ def _augment_network_edges(
1522
1309
  reaction_graph_attrs: dict = dict(),
1523
1310
  custom_transformations: Optional[dict] = None,
1524
1311
  ) -> pd.DataFrame:
1525
- """Add reversibility and other metadata from reactions.
1312
+ """
1313
+ Add reversibility and other metadata from reactions.
1526
1314
 
1527
1315
  Parameters
1528
1316
  ----------
@@ -1530,10 +1318,20 @@ def _augment_network_edges(
1530
1318
  DataFrame of network edges.
1531
1319
  sbml_dfs : sbml_dfs_core.SBML_dfs
1532
1320
  The SBML_dfs object containing reaction data.
1533
- reaction_graph_attrs : dict
1321
+ reaction_graph_attrs : dict, optional
1534
1322
  Dictionary of reaction attributes to add.
1535
1323
  custom_transformations : dict, optional
1536
1324
  Dictionary of custom transformation functions to use for attribute transformation.
1325
+
1326
+ Returns
1327
+ -------
1328
+ pd.DataFrame
1329
+ DataFrame of network edges with additional metadata.
1330
+
1331
+ Raises
1332
+ ------
1333
+ ValueError
1334
+ If required attributes are missing from network_edges.
1537
1335
  """
1538
1336
  REQUIRED_NETWORK_EDGE_ATTRS = {
1539
1337
  "from",
@@ -1583,7 +1381,24 @@ def _augment_network_edges(
1583
1381
 
1584
1382
 
1585
1383
  def _reverse_network_edges(augmented_network_edges: pd.DataFrame) -> pd.DataFrame:
1586
- """Flip reversible reactions to derive the reverse reaction."""
1384
+ """
1385
+ Flip reversible reactions to derive the reverse reaction.
1386
+
1387
+ Parameters
1388
+ ----------
1389
+ augmented_network_edges : pd.DataFrame
1390
+ DataFrame of network edges with metadata.
1391
+
1392
+ Returns
1393
+ -------
1394
+ pd.DataFrame
1395
+ DataFrame with reversed edges for reversible reactions.
1396
+
1397
+ Raises
1398
+ ------
1399
+ ValueError
1400
+ If required variables are missing or if the transformation fails.
1401
+ """
1587
1402
 
1588
1403
  # validate inputs
1589
1404
  required_vars = {NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO}
@@ -1684,27 +1499,36 @@ def _create_topology_weights(
1684
1499
  scale_multiplier_by_meandegree: bool = True,
1685
1500
  ) -> ig.Graph:
1686
1501
  """
1687
- Create Topology Weights
1688
-
1689
- Add weights to a network based on its topology. Edges downstream of nodes
1690
- with many connections receive a higher weight suggesting that any one
1691
- of them is less likely to be regulatory. This is a simple and clearly
1692
- flawed heuristic which can be combined with more principled weighting
1693
- schemes.
1694
-
1695
- Args:
1696
- napistu_graph (ig.Graph): a graph containing connections between molecules, proteins, and reactions.
1697
- base_score (float): offset which will be added to all weights.
1698
- protein_multiplier (int): multiplier for non-metabolite species (lower weight paths will tend to be selected).
1699
- metabolite_multiplier (int): multiplier for metabolites [defined a species with a ChEBI ID).
1700
- unknown_multiplier (int): multiplier for species without any identifier. See sbml_dfs_utils.species_type_types.
1701
- scale_multiplier_by_meandegree (bool): if True then multipliers will be rescaled by the average number of
1702
- connections a node has (i.e., its degree) so that weights will be relatively similar regardless of network
1703
- size and sparsity.
1704
-
1705
- Returns:
1706
- napistu_graph (ig.Graph): graph with added topology weights
1502
+ Create Topology Weights for a network based on its topology.
1503
+
1504
+ Edges downstream of nodes with many connections receive a higher weight suggesting that any one
1505
+ of them is less likely to be regulatory. This is a simple and clearly flawed heuristic which can be
1506
+ combined with more principled weighting schemes.
1707
1507
 
1508
+ Parameters
1509
+ ----------
1510
+ napistu_graph : ig.Graph
1511
+ A graph containing connections between molecules, proteins, and reactions.
1512
+ base_score : float, optional
1513
+ Offset which will be added to all weights. Default is 2.
1514
+ protein_multiplier : int, optional
1515
+ Multiplier for non-metabolite species. Default is 1.
1516
+ metabolite_multiplier : int, optional
1517
+ Multiplier for metabolites. Default is 3.
1518
+ unknown_multiplier : int, optional
1519
+ Multiplier for species without any identifier. Default is 10.
1520
+ scale_multiplier_by_meandegree : bool, optional
1521
+ If True, multipliers will be rescaled by the average number of connections a node has. Default is True.
1522
+
1523
+ Returns
1524
+ -------
1525
+ ig.Graph
1526
+ Graph with added topology weights.
1527
+
1528
+ Raises
1529
+ ------
1530
+ ValueError
1531
+ If required attributes are missing or if parameters are invalid.
1708
1532
  """
1709
1533
 
1710
1534
  # check for required attribute before proceeding
@@ -1816,17 +1640,17 @@ def _validate_entity_attrs(
1816
1640
  validate_transformations: bool = True,
1817
1641
  custom_transformations: Optional[dict] = None,
1818
1642
  ) -> None:
1819
- """Validate that graph attributes are a valid format.
1643
+ """
1644
+ Validate that graph attributes are a valid format.
1820
1645
 
1821
1646
  Parameters
1822
1647
  ----------
1823
1648
  entity_attrs : dict
1824
- Dictionary of entity attributes to validate
1649
+ Dictionary of entity attributes to validate.
1825
1650
  validate_transformations : bool, optional
1826
- Whether to validate transformation names, by default True
1827
- custom_transformations : Optional[dict], optional
1828
- Dictionary of custom transformation functions, by default None
1829
- Keys are transformation names, values are transformation functions
1651
+ Whether to validate transformation names, by default True.
1652
+ custom_transformations : dict, optional
1653
+ Dictionary of custom transformation functions, by default None. Keys are transformation names, values are transformation functions.
1830
1654
 
1831
1655
  Returns
1832
1656
  -------
@@ -1835,9 +1659,9 @@ def _validate_entity_attrs(
1835
1659
  Raises
1836
1660
  ------
1837
1661
  AssertionError
1838
- If entity_attrs is not a dictionary
1662
+ If entity_attrs is not a dictionary.
1839
1663
  ValueError
1840
- If a transformation is not found in DEFINED_WEIGHT_TRANSFORMATION or custom_transformations
1664
+ If a transformation is not found in DEFINED_WEIGHT_TRANSFORMATION or custom_transformations.
1841
1665
  """
1842
1666
  assert isinstance(entity_attrs, dict), "entity_attrs must be a dictionary"
1843
1667