napistu 0.1.0__py3-none-any.whl → 0.2.4.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__init__.py +1 -1
- napistu/consensus.py +1010 -513
- napistu/constants.py +24 -0
- napistu/gcs/constants.py +2 -2
- napistu/gcs/downloads.py +57 -25
- napistu/gcs/utils.py +21 -0
- napistu/identifiers.py +105 -6
- napistu/ingestion/constants.py +0 -1
- napistu/ingestion/obo.py +24 -8
- napistu/ingestion/psi_mi.py +20 -5
- napistu/ingestion/reactome.py +8 -32
- napistu/mcp/__init__.py +69 -0
- napistu/mcp/__main__.py +180 -0
- napistu/mcp/codebase.py +182 -0
- napistu/mcp/codebase_utils.py +298 -0
- napistu/mcp/constants.py +72 -0
- napistu/mcp/documentation.py +166 -0
- napistu/mcp/documentation_utils.py +235 -0
- napistu/mcp/execution.py +382 -0
- napistu/mcp/profiles.py +73 -0
- napistu/mcp/server.py +86 -0
- napistu/mcp/tutorials.py +124 -0
- napistu/mcp/tutorials_utils.py +230 -0
- napistu/mcp/utils.py +47 -0
- napistu/mechanism_matching.py +782 -26
- napistu/modify/constants.py +41 -0
- napistu/modify/curation.py +4 -1
- napistu/modify/gaps.py +243 -156
- napistu/modify/pathwayannot.py +26 -8
- napistu/network/neighborhoods.py +16 -7
- napistu/network/net_create.py +209 -54
- napistu/network/net_propagation.py +118 -0
- napistu/network/net_utils.py +1 -32
- napistu/rpy2/netcontextr.py +10 -7
- napistu/rpy2/rids.py +7 -5
- napistu/sbml_dfs_core.py +46 -29
- napistu/sbml_dfs_utils.py +37 -1
- napistu/source.py +8 -2
- napistu/utils.py +67 -8
- napistu-0.2.4.dev3.dist-info/METADATA +84 -0
- napistu-0.2.4.dev3.dist-info/RECORD +95 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/WHEEL +1 -1
- tests/conftest.py +11 -5
- tests/test_consensus.py +4 -1
- tests/test_gaps.py +127 -0
- tests/test_gcs.py +3 -2
- tests/test_igraph.py +14 -0
- tests/test_mcp_documentation_utils.py +13 -0
- tests/test_mechanism_matching.py +658 -0
- tests/test_net_propagation.py +89 -0
- tests/test_net_utils.py +83 -0
- tests/test_sbml.py +2 -0
- tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
- tests/test_utils.py +81 -0
- napistu-0.1.0.dist-info/METADATA +0 -56
- napistu-0.1.0.dist-info/RECORD +0 -77
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/entry_points.txt +0 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev3.dist-info}/top_level.txt +0 -0
napistu/network/net_create.py
CHANGED
@@ -42,11 +42,12 @@ logger = logging.getLogger(__name__)
|
|
42
42
|
|
43
43
|
def create_cpr_graph(
|
44
44
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
45
|
-
reaction_graph_attrs: dict =
|
45
|
+
reaction_graph_attrs: Optional[dict] = None,
|
46
46
|
directed: bool = True,
|
47
47
|
edge_reversed: bool = False,
|
48
48
|
graph_type: str = CPR_GRAPH_TYPES.BIPARTITE,
|
49
49
|
verbose: bool = False,
|
50
|
+
custom_transformations: Optional[dict] = None,
|
50
51
|
) -> ig.Graph:
|
51
52
|
"""
|
52
53
|
Create CPR Graph
|
@@ -73,12 +74,17 @@ def create_cpr_graph(
|
|
73
74
|
not modified by a substrate per-se).
|
74
75
|
verbose : bool
|
75
76
|
Extra reporting
|
77
|
+
custom_transformations : dict, optional
|
78
|
+
Dictionary of custom transformation functions to use for attribute transformation.
|
76
79
|
|
77
80
|
Returns:
|
78
81
|
----------
|
79
82
|
An Igraph network
|
80
83
|
"""
|
81
84
|
|
85
|
+
if reaction_graph_attrs is None:
|
86
|
+
reaction_graph_attrs = {}
|
87
|
+
|
82
88
|
if not isinstance(sbml_dfs, sbml_dfs_core.SBML_dfs):
|
83
89
|
raise TypeError(
|
84
90
|
f"sbml_dfs must be a sbml_dfs_core.SBML_dfs, but was {type(sbml_dfs)}"
|
@@ -168,7 +174,10 @@ def create_cpr_graph(
|
|
168
174
|
|
169
175
|
logger.info("Adding reversibility and other meta-data from reactions_data")
|
170
176
|
augmented_network_edges = _augment_network_edges(
|
171
|
-
network_edges,
|
177
|
+
network_edges,
|
178
|
+
working_sbml_dfs,
|
179
|
+
reaction_graph_attrs,
|
180
|
+
custom_transformations=custom_transformations,
|
172
181
|
)
|
173
182
|
|
174
183
|
logger.info(
|
@@ -264,12 +273,13 @@ def create_cpr_graph(
|
|
264
273
|
|
265
274
|
def process_cpr_graph(
|
266
275
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
267
|
-
reaction_graph_attrs: dict =
|
276
|
+
reaction_graph_attrs: Optional[dict] = None,
|
268
277
|
directed: bool = True,
|
269
278
|
edge_reversed: bool = False,
|
270
279
|
graph_type: str = CPR_GRAPH_TYPES.BIPARTITE,
|
271
280
|
weighting_strategy: str = CPR_WEIGHTING_STRATEGIES.UNWEIGHTED,
|
272
281
|
verbose: bool = False,
|
282
|
+
custom_transformations: dict = None,
|
273
283
|
) -> ig.Graph:
|
274
284
|
"""
|
275
285
|
Process Consensus Graph
|
@@ -294,11 +304,16 @@ def process_cpr_graph(
|
|
294
304
|
- calibrated: transforme edges with a quantitative score based on reaction_attrs and combine them
|
295
305
|
with topology scores to generate a consensus.
|
296
306
|
verbose (bool): Extra reporting
|
307
|
+
custom_transformations (dict, optional):
|
308
|
+
Dictionary of custom transformation functions to use for attribute transformation.
|
297
309
|
|
298
310
|
Returns:
|
299
311
|
weighted_graph (ig.Graph): An Igraph network
|
300
312
|
"""
|
301
313
|
|
314
|
+
if reaction_graph_attrs is None:
|
315
|
+
reaction_graph_attrs = {}
|
316
|
+
|
302
317
|
logging.info("Constructing network")
|
303
318
|
cpr_graph = create_cpr_graph(
|
304
319
|
sbml_dfs,
|
@@ -307,6 +322,7 @@ def process_cpr_graph(
|
|
307
322
|
edge_reversed=edge_reversed,
|
308
323
|
graph_type=graph_type,
|
309
324
|
verbose=verbose,
|
325
|
+
custom_transformations=custom_transformations,
|
310
326
|
)
|
311
327
|
|
312
328
|
if "reactions" in reaction_graph_attrs.keys():
|
@@ -326,7 +342,10 @@ def process_cpr_graph(
|
|
326
342
|
|
327
343
|
|
328
344
|
def pluck_entity_data(
|
329
|
-
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
345
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
346
|
+
graph_attrs: dict[str, dict],
|
347
|
+
data_type: str,
|
348
|
+
custom_transformations: Optional[dict[str, callable]] = None,
|
330
349
|
) -> pd.DataFrame | None:
|
331
350
|
"""
|
332
351
|
Pluck Entity Attributes
|
@@ -338,13 +357,21 @@ def pluck_entity_data(
|
|
338
357
|
sbml_dfs: sbml_dfs_core.SBML_dfs
|
339
358
|
A mechanistic model
|
340
359
|
graph_attrs: dict
|
341
|
-
A dictionary of species/reaction attributes to pull out
|
360
|
+
A dictionary of species/reaction attributes to pull out. If the requested
|
361
|
+
data_type ("species" or "reactions") is not present as a key, or if the value
|
362
|
+
is an empty dict, this function will return None (no error).
|
342
363
|
data_type: str
|
343
364
|
"species" or "reactions" to pull out species_data or reactions_data
|
365
|
+
custom_transformations: dict[str, callable], optional
|
366
|
+
A dictionary mapping transformation names to functions. If provided, these
|
367
|
+
will be checked before built-in transformations. Example:
|
368
|
+
custom_transformations = {"square": lambda x: x**2}
|
344
369
|
|
345
370
|
Returns:
|
346
371
|
A table where all extracted attributes are merged based on a common index or None
|
347
|
-
if no attributes were extracted.
|
372
|
+
if no attributes were extracted. If the requested data_type is not present in
|
373
|
+
graph_attrs, or if the attribute dict is empty, returns None. This is intended
|
374
|
+
to allow optional annotation blocks.
|
348
375
|
|
349
376
|
"""
|
350
377
|
|
@@ -361,30 +388,47 @@ def pluck_entity_data(
|
|
361
388
|
|
362
389
|
entity_attrs = graph_attrs[data_type]
|
363
390
|
# validating dict
|
364
|
-
_validate_entity_attrs(entity_attrs)
|
391
|
+
_validate_entity_attrs(entity_attrs, custom_transformations=custom_transformations)
|
392
|
+
|
393
|
+
if len(entity_attrs) == 0:
|
394
|
+
logger.info(
|
395
|
+
f'No attributes defined for "{data_type}" in graph_attrs; returning None'
|
396
|
+
)
|
397
|
+
return None
|
365
398
|
|
366
399
|
data_type_attr = data_type + "_data"
|
367
400
|
entity_data_tbls = getattr(sbml_dfs, data_type_attr)
|
368
401
|
|
369
402
|
data_list = list()
|
370
403
|
for k, v in entity_attrs.items():
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
)
|
404
|
+
# v["table"] is always present if entity_attrs is non-empty and validated
|
405
|
+
if v["table"] not in entity_data_tbls.keys():
|
406
|
+
raise ValueError(
|
407
|
+
f"{v['table']} was defined as a table in \"graph_attrs\" but "
|
408
|
+
f'it is not present in the "{data_type_attr}" of the sbml_dfs'
|
409
|
+
)
|
378
410
|
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
411
|
+
if v["variable"] not in entity_data_tbls[v["table"]].columns.tolist():
|
412
|
+
raise ValueError(
|
413
|
+
f"{v['variable']} was defined as a variable in \"graph_attrs\" but "
|
414
|
+
f"it is not present in the {v['table']} of the \"{data_type_attr}\" of "
|
415
|
+
"the sbml_dfs"
|
416
|
+
)
|
385
417
|
|
386
|
-
|
387
|
-
|
418
|
+
entity_series = entity_data_tbls[v["table"]][v["variable"]].rename(k)
|
419
|
+
trans_name = v.get("trans", DEFAULT_WT_TRANS)
|
420
|
+
# Look up transformation
|
421
|
+
if custom_transformations and trans_name in custom_transformations:
|
422
|
+
trans_fxn = custom_transformations[trans_name]
|
423
|
+
elif trans_name in DEFINED_WEIGHT_TRANSFORMATION:
|
424
|
+
trans_fxn = globals()[DEFINED_WEIGHT_TRANSFORMATION[trans_name]]
|
425
|
+
else:
|
426
|
+
# This should never be hit if _validate_entity_attrs is called correctly.
|
427
|
+
raise ValueError(
|
428
|
+
f"Transformation '{trans_name}' not found in custom_transformations or DEFINED_WEIGHT_TRANSFORMATION."
|
429
|
+
)
|
430
|
+
entity_series = entity_series.apply(trans_fxn)
|
431
|
+
data_list.append(entity_series)
|
388
432
|
|
389
433
|
if len(data_list) == 0:
|
390
434
|
return None
|
@@ -392,7 +436,9 @@ def pluck_entity_data(
|
|
392
436
|
return pd.concat(data_list, axis=1)
|
393
437
|
|
394
438
|
|
395
|
-
def apply_weight_transformations(
|
439
|
+
def apply_weight_transformations(
|
440
|
+
edges_df: pd.DataFrame, reaction_attrs: dict, custom_transformations: dict = None
|
441
|
+
):
|
396
442
|
"""
|
397
443
|
Apply Weight Transformations
|
398
444
|
|
@@ -403,22 +449,37 @@ def apply_weight_transformations(edges_df: pd.DataFrame, reaction_attrs: dict):
|
|
403
449
|
A dictionary of attributes identifying weighting attributes within
|
404
450
|
an sbml_df's reaction_data, how they will be named in edges_df (the keys),
|
405
451
|
and how they should be transformed (the "trans" aliases")
|
452
|
+
custom_transformations (dict, optional):
|
453
|
+
A dictionary mapping transformation names to functions. If provided, these
|
454
|
+
will be checked before built-in transformations.
|
406
455
|
|
407
456
|
Returns:
|
408
457
|
transformed_edges_df (pd.DataFrame): edges_df with weight variables transformed.
|
409
458
|
|
410
459
|
"""
|
411
460
|
|
412
|
-
_validate_entity_attrs(
|
461
|
+
_validate_entity_attrs(
|
462
|
+
reaction_attrs, custom_transformations=custom_transformations
|
463
|
+
)
|
413
464
|
|
414
465
|
transformed_edges_df = copy.deepcopy(edges_df)
|
415
466
|
for k, v in reaction_attrs.items():
|
416
467
|
if k not in transformed_edges_df.columns:
|
417
468
|
raise ValueError(f"A weighting variable {k} was missing from edges_df")
|
418
469
|
|
419
|
-
|
470
|
+
trans_name = v["trans"]
|
471
|
+
# Look up transformation
|
472
|
+
if custom_transformations and trans_name in custom_transformations:
|
473
|
+
trans_fxn = custom_transformations[trans_name]
|
474
|
+
elif trans_name in DEFINED_WEIGHT_TRANSFORMATION:
|
475
|
+
trans_fxn = globals()[DEFINED_WEIGHT_TRANSFORMATION[trans_name]]
|
476
|
+
else:
|
477
|
+
# This should never be hit if _validate_entity_attrs is called correctly.
|
478
|
+
raise ValueError(
|
479
|
+
f"Transformation '{trans_name}' not found in custom_transformations or DEFINED_WEIGHT_TRANSFORMATION."
|
480
|
+
)
|
420
481
|
|
421
|
-
transformed_edges_df[k] = transformed_edges_df[k].apply(
|
482
|
+
transformed_edges_df[k] = transformed_edges_df[k].apply(trans_fxn)
|
422
483
|
|
423
484
|
return transformed_edges_df
|
424
485
|
|
@@ -582,12 +643,11 @@ def _create_cpr_graph_tiered(
|
|
582
643
|
invalid_sbo_terms = sbml_dfs.reaction_species[
|
583
644
|
~sbml_dfs.reaction_species[SBML_DFS.SBO_TERM].isin(MINI_SBO_TO_NAME.keys())
|
584
645
|
]
|
585
|
-
assert isinstance(invalid_sbo_terms, pd.DataFrame)
|
586
646
|
|
587
647
|
if invalid_sbo_terms.shape[0] != 0:
|
588
648
|
invalid_counts = invalid_sbo_terms.value_counts(SBML_DFS.SBO_TERM).to_frame("N")
|
589
|
-
|
590
|
-
|
649
|
+
if not isinstance(invalid_counts, pd.DataFrame):
|
650
|
+
raise TypeError("invalid_counts must be a pandas DataFrame")
|
591
651
|
logger.warning(utils.style_df(invalid_counts, headers="keys")) # type: ignore
|
592
652
|
raise ValueError("Some reaction species have unusable SBO terms")
|
593
653
|
|
@@ -647,28 +707,33 @@ def _create_cpr_graph_tiered(
|
|
647
707
|
n_children = (
|
648
708
|
unique_edges[CPR_GRAPH_EDGES.FROM]
|
649
709
|
.value_counts()
|
650
|
-
|
710
|
+
# rename values to the child name
|
711
|
+
.to_frame(name=CPR_GRAPH_EDGES.SC_CHILDREN)
|
651
712
|
.reset_index()
|
652
713
|
.rename(
|
653
714
|
{
|
654
|
-
|
655
|
-
CPR_GRAPH_EDGES.FROM: CPR_GRAPH_EDGES.SC_CHILDREN,
|
715
|
+
CPR_GRAPH_EDGES.FROM: SBML_DFS.SC_ID,
|
656
716
|
},
|
657
717
|
axis=1,
|
658
718
|
)
|
659
719
|
)
|
720
|
+
|
660
721
|
# parents
|
661
722
|
n_parents = (
|
662
723
|
unique_edges[CPR_GRAPH_EDGES.TO]
|
663
724
|
.value_counts()
|
664
|
-
|
725
|
+
# rename values to the parent name
|
726
|
+
.to_frame(name=CPR_GRAPH_EDGES.SC_PARENTS)
|
665
727
|
.reset_index()
|
666
728
|
.rename(
|
667
|
-
{
|
729
|
+
{
|
730
|
+
CPR_GRAPH_EDGES.TO: SBML_DFS.SC_ID,
|
731
|
+
},
|
668
732
|
axis=1,
|
669
733
|
)
|
670
734
|
)
|
671
|
-
|
735
|
+
|
736
|
+
graph_degree_by_edgelist = n_children.merge(n_parents, how="outer").fillna(int(0))
|
672
737
|
|
673
738
|
graph_degree_by_edgelist[CPR_GRAPH_EDGES.SC_DEGREE] = (
|
674
739
|
graph_degree_by_edgelist[CPR_GRAPH_EDGES.SC_CHILDREN]
|
@@ -692,7 +757,7 @@ def _create_cpr_graph_tiered(
|
|
692
757
|
axis=1,
|
693
758
|
)
|
694
759
|
.join(graph_degree_by_edgelist)
|
695
|
-
.fillna(0)
|
760
|
+
.fillna(int(0))
|
696
761
|
)
|
697
762
|
|
698
763
|
is_from_reaction = all_reaction_edges_df[CPR_GRAPH_EDGES.FROM].isin(
|
@@ -740,9 +805,14 @@ def _format_tiered_reaction_species(
|
|
740
805
|
"""
|
741
806
|
|
742
807
|
rxn_species = sorted_reaction_species.loc[r_id]
|
743
|
-
|
744
|
-
|
745
|
-
|
808
|
+
if not isinstance(rxn_species, pd.DataFrame):
|
809
|
+
raise TypeError("rxn_species must be a pandas DataFrame")
|
810
|
+
if list(rxn_species.index.names) != [SBML_DFS.SBO_TERM]:
|
811
|
+
raise ValueError("rxn_species index names must be [SBML_DFS.SBO_TERM]")
|
812
|
+
if rxn_species.columns.tolist() != [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]:
|
813
|
+
raise ValueError(
|
814
|
+
"rxn_species columns must be [SBML_DFS.SC_ID, SBML_DFS.STOICHIOMETRY]"
|
815
|
+
)
|
746
816
|
|
747
817
|
rxn_sbo_terms = set(rxn_species.index.unique())
|
748
818
|
# map to common names
|
@@ -781,7 +851,8 @@ def _format_tiered_reaction_species(
|
|
781
851
|
)
|
782
852
|
ordered_tiers = entities_ordered_by_tier.index.get_level_values("tier").unique()
|
783
853
|
|
784
|
-
|
854
|
+
if len(ordered_tiers) <= 1:
|
855
|
+
raise ValueError("ordered_tiers must have more than one element")
|
785
856
|
|
786
857
|
# which tier is the reaction?
|
787
858
|
reaction_tier = graph_hierarchy_df["tier"][
|
@@ -1275,9 +1346,34 @@ def _add_graph_species_attribute(
|
|
1275
1346
|
cpr_graph: ig.Graph,
|
1276
1347
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
1277
1348
|
species_graph_attrs: dict,
|
1349
|
+
custom_transformations: Optional[dict] = None,
|
1278
1350
|
) -> ig.Graph:
|
1279
|
-
"""
|
1351
|
+
"""
|
1352
|
+
Add meta-data from species_data to existing igraph's vertices.
|
1353
|
+
|
1354
|
+
This function augments the vertices of an igraph network with additional attributes
|
1355
|
+
derived from the species-level data in the provided SBML_dfs object. The attributes
|
1356
|
+
to add are specified in the species_graph_attrs dictionary, and can be transformed
|
1357
|
+
using either built-in or user-supplied transformation functions.
|
1280
1358
|
|
1359
|
+
Parameters
|
1360
|
+
----------
|
1361
|
+
cpr_graph : ig.Graph
|
1362
|
+
The igraph network to augment.
|
1363
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
1364
|
+
The SBML_dfs object containing species data.
|
1365
|
+
species_graph_attrs : dict
|
1366
|
+
Dictionary specifying which attributes to pull from species_data and how to transform them.
|
1367
|
+
The structure should be {attribute_name: {"table": ..., "variable": ..., "trans": ...}}.
|
1368
|
+
custom_transformations : dict, optional
|
1369
|
+
Dictionary mapping transformation names to functions. If provided, these will be checked
|
1370
|
+
before built-in transformations. Example: {"square": lambda x: x**2}
|
1371
|
+
|
1372
|
+
Returns
|
1373
|
+
-------
|
1374
|
+
ig.Graph
|
1375
|
+
The input igraph network with additional vertex attributes added from species_data.
|
1376
|
+
"""
|
1281
1377
|
if not isinstance(species_graph_attrs, dict):
|
1282
1378
|
raise TypeError(
|
1283
1379
|
f"species_graph_attrs must be a dict, but was {type(species_graph_attrs)}"
|
@@ -1288,7 +1384,9 @@ def _add_graph_species_attribute(
|
|
1288
1384
|
sp_graph_key_list = []
|
1289
1385
|
sp_node_attr_list = []
|
1290
1386
|
for k in species_graph_attrs.keys():
|
1291
|
-
_validate_entity_attrs(
|
1387
|
+
_validate_entity_attrs(
|
1388
|
+
species_graph_attrs[k], custom_transformations=custom_transformations
|
1389
|
+
)
|
1292
1390
|
|
1293
1391
|
sp_graph_key_list.append(k)
|
1294
1392
|
sp_node_attr_list.append(list(species_graph_attrs[k].keys()))
|
@@ -1305,6 +1403,7 @@ def _add_graph_species_attribute(
|
|
1305
1403
|
curr_network_nodes_df,
|
1306
1404
|
sbml_dfs,
|
1307
1405
|
species_graph_attrs,
|
1406
|
+
custom_transformations=custom_transformations,
|
1308
1407
|
)
|
1309
1408
|
|
1310
1409
|
for vs_attr in flat_sp_node_attr_list:
|
@@ -1319,9 +1418,33 @@ def _augment_network_nodes(
|
|
1319
1418
|
network_nodes: pd.DataFrame,
|
1320
1419
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
1321
1420
|
species_graph_attrs: dict = dict(),
|
1421
|
+
custom_transformations: Optional[dict] = None,
|
1322
1422
|
) -> pd.DataFrame:
|
1323
|
-
"""
|
1423
|
+
"""
|
1424
|
+
Add species-level attributes, expand network_nodes with s_id and c_id and then map to species-level attributes by s_id.
|
1425
|
+
|
1426
|
+
This function merges species-level attributes from sbml_dfs into the provided network_nodes DataFrame,
|
1427
|
+
using the mapping in species_graph_attrs. Optionally, custom transformation functions can be provided
|
1428
|
+
to transform the attributes as they are added.
|
1324
1429
|
|
1430
|
+
Parameters
|
1431
|
+
----------
|
1432
|
+
network_nodes : pd.DataFrame
|
1433
|
+
DataFrame of network nodes. Must include columns 'name', 'node_name', and 'node_type'.
|
1434
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
1435
|
+
The SBML_dfs object containing species data.
|
1436
|
+
species_graph_attrs : dict
|
1437
|
+
Dictionary specifying which attributes to pull from species_data and how to transform them.
|
1438
|
+
The structure should be {attribute_name: {"table": ..., "variable": ..., "trans": ...}}.
|
1439
|
+
custom_transformations : dict, optional
|
1440
|
+
Dictionary mapping transformation names to functions. If provided, these will be checked
|
1441
|
+
before built-in transformations. Example: {"square": lambda x: x**2}
|
1442
|
+
|
1443
|
+
Returns
|
1444
|
+
-------
|
1445
|
+
pd.DataFrame
|
1446
|
+
The input network_nodes DataFrame with additional columns for each extracted and transformed attribute.
|
1447
|
+
"""
|
1325
1448
|
REQUIRED_NETWORK_NODE_ATTRS = {
|
1326
1449
|
"name",
|
1327
1450
|
"node_name",
|
@@ -1349,18 +1472,26 @@ def _augment_network_nodes(
|
|
1349
1472
|
)
|
1350
1473
|
|
1351
1474
|
# assign species_data related attributes to s_id
|
1352
|
-
species_graph_data = pluck_entity_data(
|
1475
|
+
species_graph_data = pluck_entity_data(
|
1476
|
+
sbml_dfs,
|
1477
|
+
species_graph_attrs,
|
1478
|
+
"species",
|
1479
|
+
custom_transformations=custom_transformations,
|
1480
|
+
)
|
1353
1481
|
|
1354
1482
|
if species_graph_data is not None:
|
1355
1483
|
# add species_graph_data to the network_nodes df, based on s_id
|
1356
1484
|
network_nodes_wdata = network_nodes_sid.merge(
|
1357
1485
|
species_graph_data, left_on="s_id", right_index=True, how="left"
|
1358
1486
|
)
|
1487
|
+
else:
|
1488
|
+
network_nodes_wdata = network_nodes_sid
|
1359
1489
|
|
1360
1490
|
# Note: multiple sc_ids with the same s_id will be assign with the same species_graph_data
|
1361
1491
|
|
1362
|
-
network_nodes_wdata.fillna(0
|
1363
|
-
|
1492
|
+
network_nodes_wdata = network_nodes_wdata.fillna(int(0)).drop(
|
1493
|
+
columns=["s_id", "c_id"]
|
1494
|
+
)
|
1364
1495
|
|
1365
1496
|
return network_nodes_wdata
|
1366
1497
|
|
@@ -1369,9 +1500,21 @@ def _augment_network_edges(
|
|
1369
1500
|
network_edges: pd.DataFrame,
|
1370
1501
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
1371
1502
|
reaction_graph_attrs: dict = dict(),
|
1503
|
+
custom_transformations: Optional[dict] = None,
|
1372
1504
|
) -> pd.DataFrame:
|
1373
|
-
"""Add reversibility and other metadata from reactions.
|
1505
|
+
"""Add reversibility and other metadata from reactions.
|
1374
1506
|
|
1507
|
+
Parameters
|
1508
|
+
----------
|
1509
|
+
network_edges : pd.DataFrame
|
1510
|
+
DataFrame of network edges.
|
1511
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
1512
|
+
The SBML_dfs object containing reaction data.
|
1513
|
+
reaction_graph_attrs : dict
|
1514
|
+
Dictionary of reaction attributes to add.
|
1515
|
+
custom_transformations : dict, optional
|
1516
|
+
Dictionary of custom transformation functions to use for attribute transformation.
|
1517
|
+
"""
|
1375
1518
|
REQUIRED_NETWORK_EDGE_ATTRS = {
|
1376
1519
|
"from",
|
1377
1520
|
"to",
|
@@ -1406,7 +1549,10 @@ def _augment_network_edges(
|
|
1406
1549
|
|
1407
1550
|
# add other attributes based on reactions data
|
1408
1551
|
reaction_graph_data = pluck_entity_data(
|
1409
|
-
sbml_dfs,
|
1552
|
+
sbml_dfs,
|
1553
|
+
reaction_graph_attrs,
|
1554
|
+
SBML_DFS.REACTIONS,
|
1555
|
+
custom_transformations=custom_transformations,
|
1410
1556
|
)
|
1411
1557
|
if reaction_graph_data is not None:
|
1412
1558
|
network_edges = network_edges.merge(
|
@@ -1491,7 +1637,10 @@ def _reverse_network_edges(augmented_network_edges: pd.DataFrame) -> pd.DataFram
|
|
1491
1637
|
]
|
1492
1638
|
)
|
1493
1639
|
|
1494
|
-
|
1640
|
+
if transformed_r_reaction_edges.shape[0] != r_reaction_edges.shape[0]:
|
1641
|
+
raise ValueError(
|
1642
|
+
"transformed_r_reaction_edges and r_reaction_edges must have the same number of rows"
|
1643
|
+
)
|
1495
1644
|
|
1496
1645
|
return transformed_r_reaction_edges.assign(
|
1497
1646
|
direction=CPR_GRAPH_EDGE_DIRECTIONS.REVERSE
|
@@ -1621,7 +1770,9 @@ def _create_topology_weights(
|
|
1621
1770
|
|
1622
1771
|
|
1623
1772
|
def _validate_entity_attrs(
|
1624
|
-
entity_attrs: dict,
|
1773
|
+
entity_attrs: dict,
|
1774
|
+
validate_transformations: bool = True,
|
1775
|
+
custom_transformations: Optional[dict] = None,
|
1625
1776
|
) -> None:
|
1626
1777
|
"""Validate that graph attributes are a valid format."""
|
1627
1778
|
|
@@ -1631,11 +1782,15 @@ def _validate_entity_attrs(
|
|
1631
1782
|
entity_attrs = _EntityAttrValidator(**v).model_dump()
|
1632
1783
|
|
1633
1784
|
if validate_transformations:
|
1634
|
-
|
1785
|
+
trans_name = v["trans"]
|
1786
|
+
valid_trans = set(DEFINED_WEIGHT_TRANSFORMATION.keys())
|
1787
|
+
if custom_transformations:
|
1788
|
+
valid_trans = valid_trans.union(set(custom_transformations.keys()))
|
1789
|
+
if trans_name not in valid_trans:
|
1635
1790
|
raise ValueError(
|
1636
|
-
f"transformation {
|
1637
|
-
"DEFINED_WEIGHT_TRANSFORMATION. The defined transformations "
|
1638
|
-
f"are {', '.join(
|
1791
|
+
f"transformation {trans_name} was not defined as an alias in "
|
1792
|
+
"DEFINED_WEIGHT_TRANSFORMATION or custom_transformations. The defined transformations "
|
1793
|
+
f"are {', '.join(valid_trans)}"
|
1639
1794
|
)
|
1640
1795
|
|
1641
1796
|
return None
|
@@ -0,0 +1,118 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import numpy as np
|
3
|
+
import igraph as ig
|
4
|
+
import inspect
|
5
|
+
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
|
9
|
+
def personalized_pagerank_by_attribute(
|
10
|
+
g: ig.Graph,
|
11
|
+
attribute: str,
|
12
|
+
damping: float = 0.85,
|
13
|
+
calculate_uniform_dist: bool = True,
|
14
|
+
additional_propagation_args: Optional[dict] = None,
|
15
|
+
) -> pd.DataFrame:
|
16
|
+
"""
|
17
|
+
Run personalized PageRank with reset probability proportional to a vertex attribute.
|
18
|
+
Optionally computes uniform PPR over nonzero attribute nodes.
|
19
|
+
|
20
|
+
Parameters
|
21
|
+
----------
|
22
|
+
g : igraph.Graph
|
23
|
+
The input graph.
|
24
|
+
attribute : str
|
25
|
+
The vertex attribute to use for personalization.
|
26
|
+
damping : float, optional
|
27
|
+
Damping factor (default 0.85).
|
28
|
+
calculate_uniform_dist : bool, optional
|
29
|
+
If True, also compute uniform PPR over nonzero attribute nodes.
|
30
|
+
additional_propagation_args : dict, optional
|
31
|
+
Additional arguments to pass to igraph's personalized_pagerank. Keys must match the method's signature.
|
32
|
+
|
33
|
+
Returns
|
34
|
+
-------
|
35
|
+
pd.DataFrame
|
36
|
+
DataFrame with columns ['name', 'pagerank_by_attribute', attribute] and optionally 'pagerank_uniform'.
|
37
|
+
|
38
|
+
Example
|
39
|
+
-------
|
40
|
+
>>> import igraph as ig
|
41
|
+
>>> from scraps.utils import personalized_pagerank_by_attribute
|
42
|
+
>>> g = ig.Graph.Full(3)
|
43
|
+
>>> g.vs['name'] = ['A', 'B', 'C']
|
44
|
+
>>> g.vs['score'] = [1, 0, 2]
|
45
|
+
>>> df = personalized_pagerank_by_attribute(g, 'score')
|
46
|
+
>>> print(df)
|
47
|
+
"""
|
48
|
+
# Validate and extract attribute (missing/None as 0)
|
49
|
+
attr = _ensure_nonnegative_vertex_attribute(g, attribute)
|
50
|
+
|
51
|
+
# Validate additional_propagation_args
|
52
|
+
if additional_propagation_args is None:
|
53
|
+
additional_propagation_args = {}
|
54
|
+
else:
|
55
|
+
valid_args = set(inspect.signature(g.personalized_pagerank).parameters.keys())
|
56
|
+
for k in additional_propagation_args:
|
57
|
+
if k not in valid_args:
|
58
|
+
raise ValueError(f"Invalid argument for personalized_pagerank: {k}")
|
59
|
+
|
60
|
+
# Personalized PageRank (no normalization, igraph handles it)
|
61
|
+
pr_attr = g.personalized_pagerank(
|
62
|
+
reset=attr.tolist(), damping=damping, **additional_propagation_args
|
63
|
+
)
|
64
|
+
|
65
|
+
# Node names
|
66
|
+
names = g.vs["name"] if "name" in g.vs.attributes() else list(range(g.vcount()))
|
67
|
+
|
68
|
+
data = {"name": names, "pagerank_by_attribute": pr_attr, attribute: attr}
|
69
|
+
|
70
|
+
# Uniform PPR over nonzero attribute nodes
|
71
|
+
if calculate_uniform_dist:
|
72
|
+
used_in_uniform = attr > 0
|
73
|
+
n_uniform = used_in_uniform.sum()
|
74
|
+
if n_uniform == 0:
|
75
|
+
raise ValueError("No nonzero attribute values for uniform PPR.")
|
76
|
+
uniform_vec = np.zeros_like(attr, dtype=float)
|
77
|
+
uniform_vec[used_in_uniform] = 1.0 / n_uniform
|
78
|
+
pr_uniform = g.personalized_pagerank(
|
79
|
+
reset=uniform_vec.tolist(), damping=damping, **additional_propagation_args
|
80
|
+
)
|
81
|
+
data["pagerank_uniform"] = pr_uniform
|
82
|
+
|
83
|
+
return pd.DataFrame(data)
|
84
|
+
|
85
|
+
|
86
|
+
def _ensure_nonnegative_vertex_attribute(g: ig.Graph, attribute: str):
|
87
|
+
"""
|
88
|
+
Utility to check that a vertex attribute is present, numeric, and non-negative.
|
89
|
+
Raises ValueError if checks fail.
|
90
|
+
Missing or None values are treated as 0.
|
91
|
+
Raises ValueError if attribute is missing for all vertices or all values are zero.
|
92
|
+
"""
|
93
|
+
|
94
|
+
all_missing = all(
|
95
|
+
(attribute not in v.attributes() or v[attribute] is None) for v in g.vs
|
96
|
+
)
|
97
|
+
if all_missing:
|
98
|
+
raise ValueError(f"Vertex attribute '{attribute}' is missing for all vertices.")
|
99
|
+
|
100
|
+
values = [
|
101
|
+
(
|
102
|
+
v[attribute]
|
103
|
+
if (attribute in v.attributes() and v[attribute] is not None)
|
104
|
+
else 0.0
|
105
|
+
)
|
106
|
+
for v in g.vs
|
107
|
+
]
|
108
|
+
|
109
|
+
arr = np.array(values, dtype=float)
|
110
|
+
|
111
|
+
if np.all(arr == 0):
|
112
|
+
raise ValueError(
|
113
|
+
f"Vertex attribute '{attribute}' is zero for all vertices; cannot use as reset vector."
|
114
|
+
)
|
115
|
+
if np.any(arr < 0):
|
116
|
+
raise ValueError(f"Attribute '{attribute}' contains negative values.")
|
117
|
+
|
118
|
+
return arr
|
napistu/network/net_utils.py
CHANGED
@@ -18,6 +18,7 @@ from napistu.network import net_create
|
|
18
18
|
from napistu.constants import SBML_DFS
|
19
19
|
from napistu.constants import SOURCE_SPEC
|
20
20
|
|
21
|
+
from napistu.identifiers import _validate_assets_sbml_ids
|
21
22
|
from napistu.network.constants import CPR_GRAPH_NODES
|
22
23
|
from napistu.network.constants import CPR_GRAPH_TYPES
|
23
24
|
|
@@ -520,38 +521,6 @@ def _validate_assets_graph_dist(
|
|
520
521
|
return None
|
521
522
|
|
522
523
|
|
523
|
-
def _validate_assets_sbml_ids(
|
524
|
-
sbml_dfs: sbml_dfs_core.SBML_dfs, identifiers_df: pd.DataFrame
|
525
|
-
) -> None:
|
526
|
-
"""Check an sbml_dfs file and identifiers table for inconsistencies."""
|
527
|
-
|
528
|
-
joined_species_w_ids = sbml_dfs.species.merge(
|
529
|
-
identifiers_df[["s_id", "s_name"]].drop_duplicates(),
|
530
|
-
left_index=True,
|
531
|
-
right_on="s_id",
|
532
|
-
)
|
533
|
-
|
534
|
-
inconsistent_names_df = joined_species_w_ids.query("s_name_x != s_name_y").dropna()
|
535
|
-
inconsistent_names_list = [
|
536
|
-
f"{x} != {y}"
|
537
|
-
for x, y in zip(
|
538
|
-
inconsistent_names_df["s_name_x"], inconsistent_names_df["s_name_y"]
|
539
|
-
)
|
540
|
-
]
|
541
|
-
|
542
|
-
if len(inconsistent_names_list):
|
543
|
-
example_inconsistent_names = inconsistent_names_list[
|
544
|
-
0 : min(10, len(inconsistent_names_list))
|
545
|
-
]
|
546
|
-
|
547
|
-
raise ValueError(
|
548
|
-
f"{len(inconsistent_names_list)} species names do not match between "
|
549
|
-
f"sbml_dfs and identifiers_df including: {', '.join(example_inconsistent_names)}"
|
550
|
-
)
|
551
|
-
|
552
|
-
return None
|
553
|
-
|
554
|
-
|
555
524
|
def _get_top_n_idx(arr: Sequence, n: int, ascending: bool = False) -> Sequence[int]:
|
556
525
|
"""Returns the indices of the top n values in an array
|
557
526
|
|