napistu 0.1.0__py3-none-any.whl → 0.2.4.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. napistu/__init__.py +1 -1
  2. napistu/consensus.py +1010 -513
  3. napistu/constants.py +24 -0
  4. napistu/gcs/constants.py +2 -2
  5. napistu/gcs/downloads.py +57 -25
  6. napistu/gcs/utils.py +21 -0
  7. napistu/identifiers.py +105 -6
  8. napistu/ingestion/constants.py +0 -1
  9. napistu/ingestion/obo.py +24 -8
  10. napistu/ingestion/psi_mi.py +20 -5
  11. napistu/ingestion/reactome.py +8 -32
  12. napistu/mcp/__init__.py +69 -0
  13. napistu/mcp/__main__.py +180 -0
  14. napistu/mcp/codebase.py +182 -0
  15. napistu/mcp/codebase_utils.py +298 -0
  16. napistu/mcp/constants.py +72 -0
  17. napistu/mcp/documentation.py +166 -0
  18. napistu/mcp/documentation_utils.py +235 -0
  19. napistu/mcp/execution.py +382 -0
  20. napistu/mcp/profiles.py +73 -0
  21. napistu/mcp/server.py +86 -0
  22. napistu/mcp/tutorials.py +124 -0
  23. napistu/mcp/tutorials_utils.py +230 -0
  24. napistu/mcp/utils.py +47 -0
  25. napistu/mechanism_matching.py +782 -26
  26. napistu/modify/constants.py +41 -0
  27. napistu/modify/curation.py +4 -1
  28. napistu/modify/gaps.py +243 -156
  29. napistu/modify/pathwayannot.py +26 -8
  30. napistu/network/neighborhoods.py +16 -7
  31. napistu/network/net_create.py +209 -54
  32. napistu/network/net_propagation.py +118 -0
  33. napistu/network/net_utils.py +1 -32
  34. napistu/rpy2/netcontextr.py +10 -7
  35. napistu/rpy2/rids.py +7 -5
  36. napistu/sbml_dfs_core.py +46 -29
  37. napistu/sbml_dfs_utils.py +37 -1
  38. napistu/source.py +8 -2
  39. napistu/utils.py +67 -8
  40. napistu-0.2.4.dev3.dist-info/METADATA +84 -0
  41. napistu-0.2.4.dev3.dist-info/RECORD +95 -0
  42. {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/WHEEL +1 -1
  43. tests/conftest.py +11 -5
  44. tests/test_consensus.py +4 -1
  45. tests/test_gaps.py +127 -0
  46. tests/test_gcs.py +3 -2
  47. tests/test_igraph.py +14 -0
  48. tests/test_mcp_documentation_utils.py +13 -0
  49. tests/test_mechanism_matching.py +658 -0
  50. tests/test_net_propagation.py +89 -0
  51. tests/test_net_utils.py +83 -0
  52. tests/test_sbml.py +2 -0
  53. tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
  54. tests/test_utils.py +81 -0
  55. napistu-0.1.0.dist-info/METADATA +0 -56
  56. napistu-0.1.0.dist-info/RECORD +0 -77
  57. {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/entry_points.txt +0 -0
  58. {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/licenses/LICENSE +0 -0
  59. {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/top_level.txt +0 -0
@@ -42,11 +42,12 @@ logger = logging.getLogger(__name__)
42
42
 
43
43
  def create_cpr_graph(
44
44
  sbml_dfs: sbml_dfs_core.SBML_dfs,
45
- reaction_graph_attrs: dict = dict(),
45
+ reaction_graph_attrs: Optional[dict] = None,
46
46
  directed: bool = True,
47
47
  edge_reversed: bool = False,
48
48
  graph_type: str = CPR_GRAPH_TYPES.BIPARTITE,
49
49
  verbose: bool = False,
50
+ custom_transformations: Optional[dict] = None,
50
51
  ) -> ig.Graph:
51
52
  """
52
53
  Create CPR Graph
@@ -73,12 +74,17 @@ def create_cpr_graph(
73
74
  not modified by a substrate per-se).
74
75
  verbose : bool
75
76
  Extra reporting
77
+ custom_transformations : dict, optional
78
+ Dictionary of custom transformation functions to use for attribute transformation.
76
79
 
77
80
  Returns:
78
81
  ----------
79
82
  An Igraph network
80
83
  """
81
84
 
85
+ if reaction_graph_attrs is None:
86
+ reaction_graph_attrs = {}
87
+
82
88
  if not isinstance(sbml_dfs, sbml_dfs_core.SBML_dfs):
83
89
  raise TypeError(
84
90
  f"sbml_dfs must be a sbml_dfs_core.SBML_dfs, but was {type(sbml_dfs)}"
@@ -168,7 +174,10 @@ def create_cpr_graph(
168
174
 
169
175
  logger.info("Adding reversibility and other meta-data from reactions_data")
170
176
  augmented_network_edges = _augment_network_edges(
171
- network_edges, working_sbml_dfs, reaction_graph_attrs
177
+ network_edges,
178
+ working_sbml_dfs,
179
+ reaction_graph_attrs,
180
+ custom_transformations=custom_transformations,
172
181
  )
173
182
 
174
183
  logger.info(
@@ -264,12 +273,13 @@ def create_cpr_graph(
264
273
 
265
274
  def process_cpr_graph(
266
275
  sbml_dfs: sbml_dfs_core.SBML_dfs,
267
- reaction_graph_attrs: dict = dict(),
276
+ reaction_graph_attrs: Optional[dict] = None,
268
277
  directed: bool = True,
269
278
  edge_reversed: bool = False,
270
279
  graph_type: str = CPR_GRAPH_TYPES.BIPARTITE,
271
280
  weighting_strategy: str = CPR_WEIGHTING_STRATEGIES.UNWEIGHTED,
272
281
  verbose: bool = False,
282
+ custom_transformations: dict = None,
273
283
  ) -> ig.Graph:
274
284
  """
275
285
  Process Consensus Graph
@@ -294,11 +304,16 @@ def process_cpr_graph(
294
304
  - calibrated: transforme edges with a quantitative score based on reaction_attrs and combine them
295
305
  with topology scores to generate a consensus.
296
306
  verbose (bool): Extra reporting
307
+ custom_transformations (dict, optional):
308
+ Dictionary of custom transformation functions to use for attribute transformation.
297
309
 
298
310
  Returns:
299
311
  weighted_graph (ig.Graph): An Igraph network
300
312
  """
301
313
 
314
+ if reaction_graph_attrs is None:
315
+ reaction_graph_attrs = {}
316
+
302
317
  logging.info("Constructing network")
303
318
  cpr_graph = create_cpr_graph(
304
319
  sbml_dfs,
@@ -307,6 +322,7 @@ def process_cpr_graph(
307
322
  edge_reversed=edge_reversed,
308
323
  graph_type=graph_type,
309
324
  verbose=verbose,
325
+ custom_transformations=custom_transformations,
310
326
  )
311
327
 
312
328
  if "reactions" in reaction_graph_attrs.keys():
@@ -326,7 +342,10 @@ def process_cpr_graph(
326
342
 
327
343
 
328
344
  def pluck_entity_data(
329
- sbml_dfs: sbml_dfs_core.SBML_dfs, graph_attrs: dict[str, dict], data_type: str
345
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
346
+ graph_attrs: dict[str, dict],
347
+ data_type: str,
348
+ custom_transformations: Optional[dict[str, callable]] = None,
330
349
  ) -> pd.DataFrame | None:
331
350
  """
332
351
  Pluck Entity Attributes
@@ -338,13 +357,21 @@ def pluck_entity_data(
338
357
  sbml_dfs: sbml_dfs_core.SBML_dfs
339
358
  A mechanistic model
340
359
  graph_attrs: dict
341
- A dictionary of species/reaction attributes to pull out
360
+ A dictionary of species/reaction attributes to pull out. If the requested
361
+ data_type ("species" or "reactions") is not present as a key, or if the value
362
+ is an empty dict, this function will return None (no error).
342
363
  data_type: str
343
364
  "species" or "reactions" to pull out species_data or reactions_data
365
+ custom_transformations: dict[str, callable], optional
366
+ A dictionary mapping transformation names to functions. If provided, these
367
+ will be checked before built-in transformations. Example:
368
+ custom_transformations = {"square": lambda x: x**2}
344
369
 
345
370
  Returns:
346
371
  A table where all extracted attributes are merged based on a common index or None
347
- if no attributes were extracted.
372
+ if no attributes were extracted. If the requested data_type is not present in
373
+ graph_attrs, or if the attribute dict is empty, returns None. This is intended
374
+ to allow optional annotation blocks.
348
375
 
349
376
  """
350
377
 
@@ -361,30 +388,47 @@ def pluck_entity_data(
361
388
 
362
389
  entity_attrs = graph_attrs[data_type]
363
390
  # validating dict
364
- _validate_entity_attrs(entity_attrs)
391
+ _validate_entity_attrs(entity_attrs, custom_transformations=custom_transformations)
392
+
393
+ if len(entity_attrs) == 0:
394
+ logger.info(
395
+ f'No attributes defined for "{data_type}" in graph_attrs; returning None'
396
+ )
397
+ return None
365
398
 
366
399
  data_type_attr = data_type + "_data"
367
400
  entity_data_tbls = getattr(sbml_dfs, data_type_attr)
368
401
 
369
402
  data_list = list()
370
403
  for k, v in entity_attrs.items():
371
- if v["table"] is not None:
372
- # does the data table exist?
373
- if v["table"] not in entity_data_tbls.keys():
374
- raise ValueError(
375
- f"{v['table']} was defined as a table in \"graph_attrs\" but "
376
- f'it is not present in the "{data_type_attr}" of the sbml_dfs'
377
- )
404
+ # v["table"] is always present if entity_attrs is non-empty and validated
405
+ if v["table"] not in entity_data_tbls.keys():
406
+ raise ValueError(
407
+ f"{v['table']} was defined as a table in \"graph_attrs\" but "
408
+ f'it is not present in the "{data_type_attr}" of the sbml_dfs'
409
+ )
378
410
 
379
- if v["variable"] not in entity_data_tbls[v["table"]].columns.tolist():
380
- raise ValueError(
381
- f"{v['variable']} was defined as a variable in \"graph_attrs\" but "
382
- f"it is not present in the {v['table']} of the \"{data_type_attr}\" of "
383
- "the sbml_dfs"
384
- )
411
+ if v["variable"] not in entity_data_tbls[v["table"]].columns.tolist():
412
+ raise ValueError(
413
+ f"{v['variable']} was defined as a variable in \"graph_attrs\" but "
414
+ f"it is not present in the {v['table']} of the \"{data_type_attr}\" of "
415
+ "the sbml_dfs"
416
+ )
385
417
 
386
- entity_series = entity_data_tbls[v["table"]][v["variable"]].rename(k)
387
- data_list.append(entity_series)
418
+ entity_series = entity_data_tbls[v["table"]][v["variable"]].rename(k)
419
+ trans_name = v.get("trans", DEFAULT_WT_TRANS)
420
+ # Look up transformation
421
+ if custom_transformations and trans_name in custom_transformations:
422
+ trans_fxn = custom_transformations[trans_name]
423
+ elif trans_name in DEFINED_WEIGHT_TRANSFORMATION:
424
+ trans_fxn = globals()[DEFINED_WEIGHT_TRANSFORMATION[trans_name]]
425
+ else:
426
+ # This should never be hit if _validate_entity_attrs is called correctly.
427
+ raise ValueError(
428
+ f"Transformation '{trans_name}' not found in custom_transformations or DEFINED_WEIGHT_TRANSFORMATION."
429
+ )
430
+ entity_series = entity_series.apply(trans_fxn)
431
+ data_list.append(entity_series)
388
432
 
389
433
  if len(data_list) == 0:
390
434
  return None
@@ -392,7 +436,9 @@ def pluck_entity_data(
392
436
  return pd.concat(data_list, axis=1)
393
437
 
394
438
 
395
- def apply_weight_transformations(edges_df: pd.DataFrame, reaction_attrs: dict):
439
+ def apply_weight_transformations(
440
+ edges_df: pd.DataFrame, reaction_attrs: dict, custom_transformations: dict = None
441
+ ):
396
442
  """
397
443
  Apply Weight Transformations
398
444
 
@@ -403,22 +449,37 @@ def apply_weight_transformations(edges_df: pd.DataFrame, reaction_attrs: dict):
403
449
  A dictionary of attributes identifying weighting attributes within
404
450
  an sbml_df's reaction_data, how they will be named in edges_df (the keys),
405
451
  and how they should be transformed (the "trans" aliases")
452
+ custom_transformations (dict, optional):
453
+ A dictionary mapping transformation names to functions. If provided, these
454
+ will be checked before built-in transformations.
406
455
 
407
456
  Returns:
408
457
  transformed_edges_df (pd.DataFrame): edges_df with weight variables transformed.
409
458
 
410
459
  """
411
460
 
412
- _validate_entity_attrs(reaction_attrs)
461
+ _validate_entity_attrs(
462
+ reaction_attrs, custom_transformations=custom_transformations
463
+ )
413
464
 
414
465
  transformed_edges_df = copy.deepcopy(edges_df)
415
466
  for k, v in reaction_attrs.items():
416
467
  if k not in transformed_edges_df.columns:
417
468
  raise ValueError(f"A weighting variable {k} was missing from edges_df")
418
469
 
419
- trans_fxn = DEFINED_WEIGHT_TRANSFORMATION[v["trans"]]
470
+ trans_name = v["trans"]
471
+ # Look up transformation
472
+ if custom_transformations and trans_name in custom_transformations:
473
+ trans_fxn = custom_transformations[trans_name]
474
+ elif trans_name in DEFINED_WEIGHT_TRANSFORMATION:
475
+ trans_fxn = globals()[DEFINED_WEIGHT_TRANSFORMATION[trans_name]]
476
+ else:
477
+ # This should never be hit if _validate_entity_attrs is called correctly.
478
+ raise ValueError(
479
+ f"Transformation '{trans_name}' not found in custom_transformations or DEFINED_WEIGHT_TRANSFORMATION."
480
+ )
420
481
 
421
- transformed_edges_df[k] = transformed_edges_df[k].apply(globals()[trans_fxn])
482
+ transformed_edges_df[k] = transformed_edges_df[k].apply(trans_fxn)
422
483
 
423
484
  return transformed_edges_df
424
485
 
@@ -582,12 +643,11 @@ def _create_cpr_graph_tiered(
582
643
  invalid_sbo_terms = sbml_dfs.reaction_species[
583
644
  ~sbml_dfs.reaction_species[SBML_DFS.SBO_TERM].isin(MINI_SBO_TO_NAME.keys())
584
645
  ]
585
- assert isinstance(invalid_sbo_terms, pd.DataFrame)
586
646
 
587
647
  if invalid_sbo_terms.shape[0] != 0:
588
648
  invalid_counts = invalid_sbo_terms.value_counts(SBML_DFS.SBO_TERM).to_frame("N")
589
- assert isinstance(invalid_counts, pd.DataFrame)
590
-
649
+ if not isinstance(invalid_counts, pd.DataFrame):
650
+ raise TypeError("invalid_counts must be a pandas DataFrame")
591
651
  logger.warning(utils.style_df(invalid_counts, headers="keys")) # type: ignore
592
652
  raise ValueError("Some reaction species have unusable SBO terms")
593
653
 
@@ -647,28 +707,33 @@ def _create_cpr_graph_tiered(
647
707
  n_children = (
648
708
  unique_edges[CPR_GRAPH_EDGES.FROM]
649
709
  .value_counts()
650
- .to_frame()
710
+ # rename values to the child name
711
+ .to_frame(name=CPR_GRAPH_EDGES.SC_CHILDREN)
651
712
  .reset_index()
652
713
  .rename(
653
714
  {
654
- "index": SBML_DFS.SC_ID,
655
- CPR_GRAPH_EDGES.FROM: CPR_GRAPH_EDGES.SC_CHILDREN,
715
+ CPR_GRAPH_EDGES.FROM: SBML_DFS.SC_ID,
656
716
  },
657
717
  axis=1,
658
718
  )
659
719
  )
720
+
660
721
  # parents
661
722
  n_parents = (
662
723
  unique_edges[CPR_GRAPH_EDGES.TO]
663
724
  .value_counts()
664
- .to_frame()
725
+ # rename values to the parent name
726
+ .to_frame(name=CPR_GRAPH_EDGES.SC_PARENTS)
665
727
  .reset_index()
666
728
  .rename(
667
- {"index": SBML_DFS.SC_ID, CPR_GRAPH_EDGES.TO: CPR_GRAPH_EDGES.SC_PARENTS},
729
+ {
730
+ CPR_GRAPH_EDGES.TO: SBML_DFS.SC_ID,
731
+ },
668
732
  axis=1,
669
733
  )
670
734
  )
671
- graph_degree_by_edgelist = n_children.merge(n_parents, how="outer").fillna(0)
735
+
736
+ graph_degree_by_edgelist = n_children.merge(n_parents, how="outer").fillna(int(0))
672
737
 
673
738
  graph_degree_by_edgelist[CPR_GRAPH_EDGES.SC_DEGREE] = (
674
739
  graph_degree_by_edgelist[CPR_GRAPH_EDGES.SC_CHILDREN]
@@ -692,7 +757,7 @@ def _create_cpr_graph_tiered(
692
757
  axis=1,
693
758
  )
694
759
  .join(graph_degree_by_edgelist)
695
- .fillna(0)
760
+ .fillna(int(0))
696
761
  )
697
762
 
698
763
  is_from_reaction = all_reaction_edges_df[CPR_GRAPH_EDGES.FROM].isin(
@@ -740,9 +805,14 @@ def _format_tiered_reaction_species(
740
805
  """
741
806
 
742
807
  rxn_species = sorted_reaction_species.loc[r_id]
743
- assert isinstance(rxn_species, pd.DataFrame)
744
- assert list(rxn_species.index.names) == [SBML_DFS.SBO_TERM]
745
- assert rxn_species.columns.tolist() == [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]
808
+ if not isinstance(rxn_species, pd.DataFrame):
809
+ raise TypeError("rxn_species must be a pandas DataFrame")
810
+ if list(rxn_species.index.names) != [SBML_DFS.SBO_TERM]:
811
+ raise ValueError("rxn_species index names must be [SBML_DFS.SBO_TERM]")
812
+ if rxn_species.columns.tolist() != [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]:
813
+ raise ValueError(
814
+ "rxn_species columns must be [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]"
815
+ )
746
816
 
747
817
  rxn_sbo_terms = set(rxn_species.index.unique())
748
818
  # map to common names
@@ -781,7 +851,8 @@ def _format_tiered_reaction_species(
781
851
  )
782
852
  ordered_tiers = entities_ordered_by_tier.index.get_level_values("tier").unique()
783
853
 
784
- assert len(ordered_tiers) > 1
854
+ if len(ordered_tiers) <= 1:
855
+ raise ValueError("ordered_tiers must have more than one element")
785
856
 
786
857
  # which tier is the reaction?
787
858
  reaction_tier = graph_hierarchy_df["tier"][
@@ -1275,9 +1346,34 @@ def _add_graph_species_attribute(
1275
1346
  cpr_graph: ig.Graph,
1276
1347
  sbml_dfs: sbml_dfs_core.SBML_dfs,
1277
1348
  species_graph_attrs: dict,
1349
+ custom_transformations: Optional[dict] = None,
1278
1350
  ) -> ig.Graph:
1279
- """Add meta-data from species_data to existing igraph's vertices."""
1351
+ """
1352
+ Add meta-data from species_data to existing igraph's vertices.
1353
+
1354
+ This function augments the vertices of an igraph network with additional attributes
1355
+ derived from the species-level data in the provided SBML_dfs object. The attributes
1356
+ to add are specified in the species_graph_attrs dictionary, and can be transformed
1357
+ using either built-in or user-supplied transformation functions.
1280
1358
 
1359
+ Parameters
1360
+ ----------
1361
+ cpr_graph : ig.Graph
1362
+ The igraph network to augment.
1363
+ sbml_dfs : sbml_dfs_core.SBML_dfs
1364
+ The SBML_dfs object containing species data.
1365
+ species_graph_attrs : dict
1366
+ Dictionary specifying which attributes to pull from species_data and how to transform them.
1367
+ The structure should be {attribute_name: {"table": ..., "variable": ..., "trans": ...}}.
1368
+ custom_transformations : dict, optional
1369
+ Dictionary mapping transformation names to functions. If provided, these will be checked
1370
+ before built-in transformations. Example: {"square": lambda x: x**2}
1371
+
1372
+ Returns
1373
+ -------
1374
+ ig.Graph
1375
+ The input igraph network with additional vertex attributes added from species_data.
1376
+ """
1281
1377
  if not isinstance(species_graph_attrs, dict):
1282
1378
  raise TypeError(
1283
1379
  f"species_graph_attrs must be a dict, but was {type(species_graph_attrs)}"
@@ -1288,7 +1384,9 @@ def _add_graph_species_attribute(
1288
1384
  sp_graph_key_list = []
1289
1385
  sp_node_attr_list = []
1290
1386
  for k in species_graph_attrs.keys():
1291
- _validate_entity_attrs(species_graph_attrs[k])
1387
+ _validate_entity_attrs(
1388
+ species_graph_attrs[k], custom_transformations=custom_transformations
1389
+ )
1292
1390
 
1293
1391
  sp_graph_key_list.append(k)
1294
1392
  sp_node_attr_list.append(list(species_graph_attrs[k].keys()))
@@ -1305,6 +1403,7 @@ def _add_graph_species_attribute(
1305
1403
  curr_network_nodes_df,
1306
1404
  sbml_dfs,
1307
1405
  species_graph_attrs,
1406
+ custom_transformations=custom_transformations,
1308
1407
  )
1309
1408
 
1310
1409
  for vs_attr in flat_sp_node_attr_list:
@@ -1319,9 +1418,33 @@ def _augment_network_nodes(
1319
1418
  network_nodes: pd.DataFrame,
1320
1419
  sbml_dfs: sbml_dfs_core.SBML_dfs,
1321
1420
  species_graph_attrs: dict = dict(),
1421
+ custom_transformations: Optional[dict] = None,
1322
1422
  ) -> pd.DataFrame:
1323
- """Add species-level attributes, expand network_nodes with s_id and c_id and then map to species-level attributes by s_id."""
1423
+ """
1424
+ Add species-level attributes, expand network_nodes with s_id and c_id and then map to species-level attributes by s_id.
1425
+
1426
+ This function merges species-level attributes from sbml_dfs into the provided network_nodes DataFrame,
1427
+ using the mapping in species_graph_attrs. Optionally, custom transformation functions can be provided
1428
+ to transform the attributes as they are added.
1324
1429
 
1430
+ Parameters
1431
+ ----------
1432
+ network_nodes : pd.DataFrame
1433
+ DataFrame of network nodes. Must include columns 'name', 'node_name', and 'node_type'.
1434
+ sbml_dfs : sbml_dfs_core.SBML_dfs
1435
+ The SBML_dfs object containing species data.
1436
+ species_graph_attrs : dict
1437
+ Dictionary specifying which attributes to pull from species_data and how to transform them.
1438
+ The structure should be {attribute_name: {"table": ..., "variable": ..., "trans": ...}}.
1439
+ custom_transformations : dict, optional
1440
+ Dictionary mapping transformation names to functions. If provided, these will be checked
1441
+ before built-in transformations. Example: {"square": lambda x: x**2}
1442
+
1443
+ Returns
1444
+ -------
1445
+ pd.DataFrame
1446
+ The input network_nodes DataFrame with additional columns for each extracted and transformed attribute.
1447
+ """
1325
1448
  REQUIRED_NETWORK_NODE_ATTRS = {
1326
1449
  "name",
1327
1450
  "node_name",
@@ -1349,18 +1472,26 @@ def _augment_network_nodes(
1349
1472
  )
1350
1473
 
1351
1474
  # assign species_data related attributes to s_id
1352
- species_graph_data = pluck_entity_data(sbml_dfs, species_graph_attrs, "species")
1475
+ species_graph_data = pluck_entity_data(
1476
+ sbml_dfs,
1477
+ species_graph_attrs,
1478
+ "species",
1479
+ custom_transformations=custom_transformations,
1480
+ )
1353
1481
 
1354
1482
  if species_graph_data is not None:
1355
1483
  # add species_graph_data to the network_nodes df, based on s_id
1356
1484
  network_nodes_wdata = network_nodes_sid.merge(
1357
1485
  species_graph_data, left_on="s_id", right_index=True, how="left"
1358
1486
  )
1487
+ else:
1488
+ network_nodes_wdata = network_nodes_sid
1359
1489
 
1360
1490
  # Note: multiple sc_ids with the same s_id will be assign with the same species_graph_data
1361
1491
 
1362
- network_nodes_wdata.fillna(0, inplace=True)
1363
- network_nodes_wdata.drop(columns=["s_id", "c_id"], inplace=True)
1492
+ network_nodes_wdata = network_nodes_wdata.fillna(int(0)).drop(
1493
+ columns=["s_id", "c_id"]
1494
+ )
1364
1495
 
1365
1496
  return network_nodes_wdata
1366
1497
 
@@ -1369,9 +1500,21 @@ def _augment_network_edges(
1369
1500
  network_edges: pd.DataFrame,
1370
1501
  sbml_dfs: sbml_dfs_core.SBML_dfs,
1371
1502
  reaction_graph_attrs: dict = dict(),
1503
+ custom_transformations: Optional[dict] = None,
1372
1504
  ) -> pd.DataFrame:
1373
- """Add reversibility and other metadata from reactions."""
1505
+ """Add reversibility and other metadata from reactions.
1374
1506
 
1507
+ Parameters
1508
+ ----------
1509
+ network_edges : pd.DataFrame
1510
+ DataFrame of network edges.
1511
+ sbml_dfs : sbml_dfs_core.SBML_dfs
1512
+ The SBML_dfs object containing reaction data.
1513
+ reaction_graph_attrs : dict
1514
+ Dictionary of reaction attributes to add.
1515
+ custom_transformations : dict, optional
1516
+ Dictionary of custom transformation functions to use for attribute transformation.
1517
+ """
1375
1518
  REQUIRED_NETWORK_EDGE_ATTRS = {
1376
1519
  "from",
1377
1520
  "to",
@@ -1406,7 +1549,10 @@ def _augment_network_edges(
1406
1549
 
1407
1550
  # add other attributes based on reactions data
1408
1551
  reaction_graph_data = pluck_entity_data(
1409
- sbml_dfs, reaction_graph_attrs, SBML_DFS.REACTIONS
1552
+ sbml_dfs,
1553
+ reaction_graph_attrs,
1554
+ SBML_DFS.REACTIONS,
1555
+ custom_transformations=custom_transformations,
1410
1556
  )
1411
1557
  if reaction_graph_data is not None:
1412
1558
  network_edges = network_edges.merge(
@@ -1491,7 +1637,10 @@ def _reverse_network_edges(augmented_network_edges: pd.DataFrame) -> pd.DataFram
1491
1637
  ]
1492
1638
  )
1493
1639
 
1494
- assert transformed_r_reaction_edges.shape[0] == r_reaction_edges.shape[0]
1640
+ if transformed_r_reaction_edges.shape[0] != r_reaction_edges.shape[0]:
1641
+ raise ValueError(
1642
+ "transformed_r_reaction_edges and r_reaction_edges must have the same number of rows"
1643
+ )
1495
1644
 
1496
1645
  return transformed_r_reaction_edges.assign(
1497
1646
  direction=CPR_GRAPH_EDGE_DIRECTIONS.REVERSE
@@ -1621,7 +1770,9 @@ def _create_topology_weights(
1621
1770
 
1622
1771
 
1623
1772
  def _validate_entity_attrs(
1624
- entity_attrs: dict, validate_transformations: bool = True
1773
+ entity_attrs: dict,
1774
+ validate_transformations: bool = True,
1775
+ custom_transformations: Optional[dict] = None,
1625
1776
  ) -> None:
1626
1777
  """Validate that graph attributes are a valid format."""
1627
1778
 
@@ -1631,11 +1782,15 @@ def _validate_entity_attrs(
1631
1782
  entity_attrs = _EntityAttrValidator(**v).model_dump()
1632
1783
 
1633
1784
  if validate_transformations:
1634
- if v["trans"] not in DEFINED_WEIGHT_TRANSFORMATION.keys():
1785
+ trans_name = v["trans"]
1786
+ valid_trans = set(DEFINED_WEIGHT_TRANSFORMATION.keys())
1787
+ if custom_transformations:
1788
+ valid_trans = valid_trans.union(set(custom_transformations.keys()))
1789
+ if trans_name not in valid_trans:
1635
1790
  raise ValueError(
1636
- f"transformation {v['trans']} was not defined as an alias in "
1637
- "DEFINED_WEIGHT_TRANSFORMATION. The defined transformations "
1638
- f"are {', '.join(DEFINED_WEIGHT_TRANSFORMATION.keys())}"
1791
+ f"transformation {trans_name} was not defined as an alias in "
1792
+ "DEFINED_WEIGHT_TRANSFORMATION or custom_transformations. The defined transformations "
1793
+ f"are {', '.join(valid_trans)}"
1639
1794
  )
1640
1795
 
1641
1796
  return None
@@ -0,0 +1,118 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import igraph as ig
4
+ import inspect
5
+
6
+ from typing import Optional
7
+
8
+
9
+ def personalized_pagerank_by_attribute(
10
+ g: ig.Graph,
11
+ attribute: str,
12
+ damping: float = 0.85,
13
+ calculate_uniform_dist: bool = True,
14
+ additional_propagation_args: Optional[dict] = None,
15
+ ) -> pd.DataFrame:
16
+ """
17
+ Run personalized PageRank with reset probability proportional to a vertex attribute.
18
+ Optionally computes uniform PPR over nonzero attribute nodes.
19
+
20
+ Parameters
21
+ ----------
22
+ g : igraph.Graph
23
+ The input graph.
24
+ attribute : str
25
+ The vertex attribute to use for personalization.
26
+ damping : float, optional
27
+ Damping factor (default 0.85).
28
+ calculate_uniform_dist : bool, optional
29
+ If True, also compute uniform PPR over nonzero attribute nodes.
30
+ additional_propagation_args : dict, optional
31
+ Additional arguments to pass to igraph's personalized_pagerank. Keys must match the method's signature.
32
+
33
+ Returns
34
+ -------
35
+ pd.DataFrame
36
+ DataFrame with columns ['name', 'pagerank_by_attribute', attribute] and optionally 'pagerank_uniform'.
37
+
38
+ Example
39
+ -------
40
+ >>> import igraph as ig
41
+ >>> from scraps.utils import personalized_pagerank_by_attribute
42
+ >>> g = ig.Graph.Full(3)
43
+ >>> g.vs['name'] = ['A', 'B', 'C']
44
+ >>> g.vs['score'] = [1, 0, 2]
45
+ >>> df = personalized_pagerank_by_attribute(g, 'score')
46
+ >>> print(df)
47
+ """
48
+ # Validate and extract attribute (missing/None as 0)
49
+ attr = _ensure_nonnegative_vertex_attribute(g, attribute)
50
+
51
+ # Validate additional_propagation_args
52
+ if additional_propagation_args is None:
53
+ additional_propagation_args = {}
54
+ else:
55
+ valid_args = set(inspect.signature(g.personalized_pagerank).parameters.keys())
56
+ for k in additional_propagation_args:
57
+ if k not in valid_args:
58
+ raise ValueError(f"Invalid argument for personalized_pagerank: {k}")
59
+
60
+ # Personalized PageRank (no normalization, igraph handles it)
61
+ pr_attr = g.personalized_pagerank(
62
+ reset=attr.tolist(), damping=damping, **additional_propagation_args
63
+ )
64
+
65
+ # Node names
66
+ names = g.vs["name"] if "name" in g.vs.attributes() else list(range(g.vcount()))
67
+
68
+ data = {"name": names, "pagerank_by_attribute": pr_attr, attribute: attr}
69
+
70
+ # Uniform PPR over nonzero attribute nodes
71
+ if calculate_uniform_dist:
72
+ used_in_uniform = attr > 0
73
+ n_uniform = used_in_uniform.sum()
74
+ if n_uniform == 0:
75
+ raise ValueError("No nonzero attribute values for uniform PPR.")
76
+ uniform_vec = np.zeros_like(attr, dtype=float)
77
+ uniform_vec[used_in_uniform] = 1.0 / n_uniform
78
+ pr_uniform = g.personalized_pagerank(
79
+ reset=uniform_vec.tolist(), damping=damping, **additional_propagation_args
80
+ )
81
+ data["pagerank_uniform"] = pr_uniform
82
+
83
+ return pd.DataFrame(data)
84
+
85
+
86
+ def _ensure_nonnegative_vertex_attribute(g: ig.Graph, attribute: str):
87
+ """
88
+ Utility to check that a vertex attribute is present, numeric, and non-negative.
89
+ Raises ValueError if checks fail.
90
+ Missing or None values are treated as 0.
91
+ Raises ValueError if attribute is missing for all vertices or all values are zero.
92
+ """
93
+
94
+ all_missing = all(
95
+ (attribute not in v.attributes() or v[attribute] is None) for v in g.vs
96
+ )
97
+ if all_missing:
98
+ raise ValueError(f"Vertex attribute '{attribute}' is missing for all vertices.")
99
+
100
+ values = [
101
+ (
102
+ v[attribute]
103
+ if (attribute in v.attributes() and v[attribute] is not None)
104
+ else 0.0
105
+ )
106
+ for v in g.vs
107
+ ]
108
+
109
+ arr = np.array(values, dtype=float)
110
+
111
+ if np.all(arr == 0):
112
+ raise ValueError(
113
+ f"Vertex attribute '{attribute}' is zero for all vertices; cannot use as reset vector."
114
+ )
115
+ if np.any(arr < 0):
116
+ raise ValueError(f"Attribute '{attribute}' contains negative values.")
117
+
118
+ return arr
@@ -18,6 +18,7 @@ from napistu.network import net_create
18
18
  from napistu.constants import SBML_DFS
19
19
  from napistu.constants import SOURCE_SPEC
20
20
 
21
+ from napistu.identifiers import _validate_assets_sbml_ids
21
22
  from napistu.network.constants import CPR_GRAPH_NODES
22
23
  from napistu.network.constants import CPR_GRAPH_TYPES
23
24
 
@@ -520,38 +521,6 @@ def _validate_assets_graph_dist(
520
521
  return None
521
522
 
522
523
 
523
- def _validate_assets_sbml_ids(
524
- sbml_dfs: sbml_dfs_core.SBML_dfs, identifiers_df: pd.DataFrame
525
- ) -> None:
526
- """Check an sbml_dfs file and identifiers table for inconsistencies."""
527
-
528
- joined_species_w_ids = sbml_dfs.species.merge(
529
- identifiers_df[["s_id", "s_name"]].drop_duplicates(),
530
- left_index=True,
531
- right_on="s_id",
532
- )
533
-
534
- inconsistent_names_df = joined_species_w_ids.query("s_name_x != s_name_y").dropna()
535
- inconsistent_names_list = [
536
- f"{x} != {y}"
537
- for x, y in zip(
538
- inconsistent_names_df["s_name_x"], inconsistent_names_df["s_name_y"]
539
- )
540
- ]
541
-
542
- if len(inconsistent_names_list):
543
- example_inconsistent_names = inconsistent_names_list[
544
- 0 : min(10, len(inconsistent_names_list))
545
- ]
546
-
547
- raise ValueError(
548
- f"{len(inconsistent_names_list)} species names do not match between "
549
- f"sbml_dfs and identifiers_df including: {', '.join(example_inconsistent_names)}"
550
- )
551
-
552
- return None
553
-
554
-
555
524
  def _get_top_n_idx(arr: Sequence, n: int, ascending: bool = False) -> Sequence[int]:
556
525
  """Returns the indices of the top n values in an array
557
526