napistu 0.2.5.dev6__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +126 -96
- napistu/constants.py +35 -41
- napistu/context/__init__.py +10 -0
- napistu/context/discretize.py +462 -0
- napistu/context/filtering.py +387 -0
- napistu/gcs/__init__.py +1 -1
- napistu/identifiers.py +74 -15
- napistu/indices.py +68 -0
- napistu/ingestion/__init__.py +1 -1
- napistu/ingestion/bigg.py +47 -62
- napistu/ingestion/constants.py +18 -133
- napistu/ingestion/gtex.py +113 -0
- napistu/ingestion/hpa.py +147 -0
- napistu/ingestion/sbml.py +0 -97
- napistu/ingestion/string.py +2 -2
- napistu/matching/__init__.py +10 -0
- napistu/matching/constants.py +18 -0
- napistu/matching/interactions.py +518 -0
- napistu/matching/mount.py +529 -0
- napistu/matching/species.py +510 -0
- napistu/mcp/__init__.py +7 -4
- napistu/mcp/__main__.py +128 -72
- napistu/mcp/client.py +16 -25
- napistu/mcp/codebase.py +201 -153
- napistu/mcp/component_base.py +170 -0
- napistu/mcp/config.py +223 -0
- napistu/mcp/constants.py +45 -2
- napistu/mcp/documentation.py +253 -136
- napistu/mcp/documentation_utils.py +13 -48
- napistu/mcp/execution.py +372 -305
- napistu/mcp/health.py +49 -67
- napistu/mcp/profiles.py +10 -6
- napistu/mcp/server.py +161 -80
- napistu/mcp/tutorials.py +139 -87
- napistu/modify/__init__.py +1 -1
- napistu/modify/gaps.py +1 -1
- napistu/network/__init__.py +1 -1
- napistu/network/constants.py +101 -34
- napistu/network/data_handling.py +388 -0
- napistu/network/ig_utils.py +351 -0
- napistu/network/napistu_graph_core.py +354 -0
- napistu/network/neighborhoods.py +40 -40
- napistu/network/net_create.py +373 -309
- napistu/network/net_propagation.py +47 -19
- napistu/network/{net_utils.py → ng_utils.py} +124 -272
- napistu/network/paths.py +67 -51
- napistu/network/precompute.py +11 -11
- napistu/ontologies/__init__.py +10 -0
- napistu/ontologies/constants.py +129 -0
- napistu/ontologies/dogma.py +243 -0
- napistu/ontologies/genodexito.py +649 -0
- napistu/ontologies/mygene.py +369 -0
- napistu/ontologies/renaming.py +198 -0
- napistu/rpy2/__init__.py +229 -86
- napistu/rpy2/callr.py +47 -77
- napistu/rpy2/constants.py +24 -23
- napistu/rpy2/rids.py +61 -648
- napistu/sbml_dfs_core.py +587 -222
- napistu/scverse/__init__.py +15 -0
- napistu/scverse/constants.py +28 -0
- napistu/scverse/loading.py +727 -0
- napistu/utils.py +118 -10
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/METADATA +8 -3
- napistu-0.3.1.dist-info/RECORD +133 -0
- tests/conftest.py +22 -0
- tests/test_context_discretize.py +56 -0
- tests/test_context_filtering.py +267 -0
- tests/test_identifiers.py +100 -0
- tests/test_indices.py +65 -0
- tests/{test_edgelist.py → test_ingestion_napistu_edgelist.py} +2 -2
- tests/test_matching_interactions.py +108 -0
- tests/test_matching_mount.py +305 -0
- tests/test_matching_species.py +394 -0
- tests/test_mcp_config.py +193 -0
- tests/test_mcp_documentation_utils.py +12 -3
- tests/test_mcp_server.py +356 -0
- tests/test_network_data_handling.py +397 -0
- tests/test_network_ig_utils.py +23 -0
- tests/test_network_neighborhoods.py +19 -0
- tests/test_network_net_create.py +459 -0
- tests/test_network_ng_utils.py +30 -0
- tests/test_network_paths.py +56 -0
- tests/{test_precomputed_distances.py → test_network_precompute.py} +8 -6
- tests/test_ontologies_genodexito.py +58 -0
- tests/test_ontologies_mygene.py +39 -0
- tests/test_ontologies_renaming.py +110 -0
- tests/test_rpy2_callr.py +79 -0
- tests/test_rpy2_init.py +151 -0
- tests/test_sbml.py +0 -31
- tests/test_sbml_dfs_core.py +134 -10
- tests/test_scverse_loading.py +778 -0
- tests/test_set_coverage.py +2 -2
- tests/test_utils.py +121 -1
- napistu/mechanism_matching.py +0 -1353
- napistu/rpy2/netcontextr.py +0 -467
- napistu-0.2.5.dev6.dist-info/RECORD +0 -97
- tests/test_igraph.py +0 -367
- tests/test_mechanism_matching.py +0 -784
- tests/test_net_utils.py +0 -149
- tests/test_netcontextr.py +0 -105
- tests/test_rpy2.py +0 -61
- /napistu/ingestion/{cpr_edgelist.py → napistu_edgelist.py} +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/WHEEL +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/entry_points.txt +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/top_level.txt +0 -0
- /tests/{test_obo.py → test_ingestion_obo.py} +0 -0
@@ -1,26 +1,29 @@
|
|
1
|
+
"""
|
2
|
+
Utilities specific to NapistuGraph objects and the wider Napistu ecosystem.
|
3
|
+
|
4
|
+
This module contains utilities that are specific to NapistuGraph subclasses
|
5
|
+
and require knowledge of the Napistu data model (SBML_dfs objects, etc.).
|
6
|
+
"""
|
7
|
+
|
1
8
|
from __future__ import annotations
|
2
9
|
|
3
10
|
import logging
|
4
11
|
import os
|
5
|
-
import random
|
6
|
-
import textwrap
|
7
12
|
import yaml
|
8
|
-
from typing import
|
9
|
-
from typing import Sequence
|
13
|
+
from typing import Optional, Union
|
10
14
|
|
11
15
|
import igraph as ig
|
12
|
-
import numpy as np
|
13
16
|
import pandas as pd
|
14
17
|
from napistu import sbml_dfs_core
|
15
18
|
from napistu import source
|
16
19
|
from napistu.network import net_create
|
20
|
+
from napistu.network.napistu_graph_core import NapistuGraph
|
17
21
|
|
18
22
|
from napistu.constants import SBML_DFS
|
19
23
|
from napistu.constants import SOURCE_SPEC
|
20
|
-
|
21
24
|
from napistu.identifiers import _validate_assets_sbml_ids
|
22
|
-
from napistu.network.constants import
|
23
|
-
from napistu.network.constants import
|
25
|
+
from napistu.network.constants import NAPISTU_GRAPH_TYPES
|
26
|
+
from napistu.network.constants import NAPISTU_GRAPH_DIRECTEDNESS
|
24
27
|
|
25
28
|
logger = logging.getLogger(__name__)
|
26
29
|
|
@@ -44,7 +47,6 @@ def compartmentalize_species(
|
|
44
47
|
-------
|
45
48
|
pd.DataFrame containings the s_id and sc_id pairs
|
46
49
|
"""
|
47
|
-
|
48
50
|
if isinstance(species, str):
|
49
51
|
species = [species]
|
50
52
|
if not isinstance(species, list):
|
@@ -79,7 +81,6 @@ def compartmentalize_species_pairs(
|
|
79
81
|
-------
|
80
82
|
pd.DataFrame containing pairs of origin and destination compartmentalized species
|
81
83
|
"""
|
82
|
-
|
83
84
|
compartmentalized_origins = compartmentalize_species(
|
84
85
|
sbml_dfs, origin_species
|
85
86
|
).rename(columns={SBML_DFS.SC_ID: "sc_id_origin", SBML_DFS.S_ID: "s_id_origin"})
|
@@ -114,7 +115,6 @@ def get_minimal_sources_edges(
|
|
114
115
|
vertices: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
|
115
116
|
) -> pd.DataFrame | None:
|
116
117
|
"""Assign edges to a set of sources."""
|
117
|
-
|
118
118
|
nodes = vertices["node"].tolist()
|
119
119
|
present_reactions = sbml_dfs.reactions[sbml_dfs.reactions.index.isin(nodes)]
|
120
120
|
|
@@ -133,65 +133,15 @@ def get_minimal_sources_edges(
|
|
133
133
|
]
|
134
134
|
|
135
135
|
|
136
|
-
def get_graph_summary(graph: ig.Graph) -> dict[str, Any]:
|
137
|
-
"""Calculates common summary statistics for a network
|
138
|
-
|
139
|
-
Args:
|
140
|
-
graph (ig.Graph): An igraph
|
141
|
-
|
142
|
-
returns:
|
143
|
-
dict: A dictionary of summary statistics with values
|
144
|
-
n_edges [int]: number of edges
|
145
|
-
n_vertices [int]: number of vertices
|
146
|
-
n_components [int]: number of weakly connected components
|
147
|
-
(i.e. without considering edge directionality)
|
148
|
-
stats_component_sizes [dict[str, float]]: summary statistics for the component sizes
|
149
|
-
top10_large_components [list[dict[str, Any]]]: the top 10 largest components with 10 example vertices
|
150
|
-
top10_smallest_components [list[dict[str, Any]]]: the top 10 smallest components with 10 example vertices
|
151
|
-
average_path_length [float]: the average shortest path length between all vertices
|
152
|
-
top10_betweenness [list[dict[str, Any]]]: the top 10 vertices by betweenness centrality.
|
153
|
-
Roughly: measures how many shortest paths go through a vertices
|
154
|
-
top10_harmonic_centrality [list[dict[str, Any]]]: the top 10 vertices by harmonic centrality:
|
155
|
-
Roughly: mean inverse distance to all other vertices
|
156
|
-
"""
|
157
|
-
stats = {}
|
158
|
-
stats["n_edges"] = graph.ecount()
|
159
|
-
stats["n_vertices"] = graph.vcount()
|
160
|
-
components = graph.components(mode="weak")
|
161
|
-
stats["n_components"] = len(components)
|
162
|
-
component_sizes = [len(c) for c in components]
|
163
|
-
stats["stats_component_sizes"] = pd.Series(component_sizes).describe().to_dict()
|
164
|
-
# get the top 10 largest components and 10 example nodes
|
165
|
-
|
166
|
-
stats["top10_large_components"] = _get_top_n_component_stats(
|
167
|
-
graph, components, component_sizes, n=10, ascending=False
|
168
|
-
)
|
169
|
-
|
170
|
-
stats["top10_smallest_components"] = _get_top_n_component_stats(
|
171
|
-
graph, components, component_sizes, n=10, ascending=True
|
172
|
-
)
|
173
|
-
|
174
|
-
stats["average_path_length"] = graph.average_path_length()
|
175
|
-
|
176
|
-
between = list(graph.betweenness(directed=False))
|
177
|
-
stats["top10_betweenness"] = _get_top_n_nodes(
|
178
|
-
graph, between, "betweenness", n=10, ascending=False
|
179
|
-
)
|
180
|
-
|
181
|
-
harmonic_centrality = list(graph.harmonic_centrality())
|
182
|
-
stats["top10_harmonic_centrality"] = _get_top_n_nodes(
|
183
|
-
graph, harmonic_centrality, "harmonic_centrality", n=10, ascending=False
|
184
|
-
)
|
185
|
-
|
186
|
-
return stats
|
187
|
-
|
188
|
-
|
189
136
|
def export_networks(
|
190
137
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
191
138
|
model_prefix: str,
|
192
139
|
outdir: str,
|
193
140
|
directeds: list[bool] = [True, False],
|
194
|
-
graph_types: list[str] = [
|
141
|
+
graph_types: list[str] = [
|
142
|
+
NAPISTU_GRAPH_TYPES.BIPARTITE,
|
143
|
+
NAPISTU_GRAPH_TYPES.REGULATORY,
|
144
|
+
],
|
195
145
|
) -> None:
|
196
146
|
"""
|
197
147
|
Exports Networks
|
@@ -217,7 +167,6 @@ def export_networks(
|
|
217
167
|
----------
|
218
168
|
None
|
219
169
|
"""
|
220
|
-
|
221
170
|
if not isinstance(sbml_dfs, sbml_dfs_core.SBML_dfs):
|
222
171
|
raise TypeError(
|
223
172
|
f"sbml_dfs must be a sbml_dfs_core.SBML_dfs, but was {type(sbml_dfs)}"
|
@@ -242,7 +191,7 @@ def export_networks(
|
|
242
191
|
)
|
243
192
|
print(f"Exporting {graph_type} network to {export_pkl_path}")
|
244
193
|
|
245
|
-
network_graph = net_create.
|
194
|
+
network_graph = net_create.process_napistu_graph(
|
246
195
|
sbml_dfs=sbml_dfs,
|
247
196
|
directed=directed,
|
248
197
|
graph_type=graph_type,
|
@@ -259,7 +208,7 @@ def read_network_pkl(
|
|
259
208
|
network_dir: str,
|
260
209
|
graph_type: str,
|
261
210
|
directed: bool = True,
|
262
|
-
) ->
|
211
|
+
) -> NapistuGraph:
|
263
212
|
"""
|
264
213
|
Read Network Pickle
|
265
214
|
|
@@ -280,11 +229,10 @@ def read_network_pkl(
|
|
280
229
|
|
281
230
|
Returns
|
282
231
|
-------
|
283
|
-
network_graph:
|
284
|
-
|
232
|
+
network_graph: NapistuGraph
|
233
|
+
A NapistuGraph network of the pathway
|
285
234
|
|
286
235
|
"""
|
287
|
-
|
288
236
|
if not isinstance(model_prefix, str):
|
289
237
|
raise TypeError(f"model_prefix was a {type(model_prefix)} and must be a str")
|
290
238
|
if not os.path.isdir(network_dir):
|
@@ -306,104 +254,102 @@ def read_network_pkl(
|
|
306
254
|
return network_graph
|
307
255
|
|
308
256
|
|
309
|
-
def filter_to_largest_subgraph(cpr_graph: ig.Graph) -> ig.Graph:
|
310
|
-
"""Filter a graph to its largest weakly connected component."""
|
311
|
-
|
312
|
-
component_members = cpr_graph.components(mode="weak")
|
313
|
-
component_sizes = [len(x) for x in component_members]
|
314
|
-
|
315
|
-
top_component_members = [
|
316
|
-
m
|
317
|
-
for s, m in zip(component_sizes, component_members)
|
318
|
-
if s == max(component_sizes)
|
319
|
-
][0]
|
320
|
-
|
321
|
-
largest_subgraph = cpr_graph.induced_subgraph(top_component_members)
|
322
|
-
|
323
|
-
return largest_subgraph
|
324
|
-
|
325
|
-
|
326
257
|
def validate_assets(
|
327
258
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
328
|
-
|
329
|
-
precomputed_distances: pd.DataFrame,
|
330
|
-
identifiers_df: pd.DataFrame,
|
259
|
+
napistu_graph: Optional[Union[NapistuGraph, ig.Graph]] = None,
|
260
|
+
precomputed_distances: Optional[pd.DataFrame] = None,
|
261
|
+
identifiers_df: Optional[pd.DataFrame] = None,
|
331
262
|
) -> None:
|
332
263
|
"""
|
333
264
|
Validate Assets
|
334
265
|
|
335
266
|
Perform a few quick checks of inputs to catch inconsistencies.
|
336
267
|
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
268
|
+
Parameters
|
269
|
+
----------
|
270
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
271
|
+
A pathway representation. (Required)
|
272
|
+
napistu_graph : NapistuGraph, optional
|
273
|
+
A network-based representation of `sbml_dfs`. NapistuGraph is a subclass of igraph.Graph.
|
274
|
+
precomputed_distances : pandas.DataFrame, optional
|
275
|
+
Precomputed distances between vertices in `napistu_graph`.
|
276
|
+
identifiers_df : pandas.DataFrame, optional
|
277
|
+
A table of systematic identifiers for compartmentalized species in `sbml_dfs`.
|
346
278
|
|
347
|
-
Returns
|
348
|
-
|
279
|
+
Returns
|
280
|
+
-------
|
281
|
+
None
|
349
282
|
|
283
|
+
Warns
|
284
|
+
-----
|
285
|
+
If only sbml_dfs is provided and no other assets are given, a warning is logged.
|
350
286
|
|
287
|
+
Raises
|
288
|
+
------
|
289
|
+
ValueError
|
290
|
+
If precomputed_distances is provided but napistu_graph is not.
|
351
291
|
"""
|
292
|
+
if (
|
293
|
+
napistu_graph is None
|
294
|
+
and precomputed_distances is None
|
295
|
+
and identifiers_df is None
|
296
|
+
):
|
297
|
+
logger.warning(
|
298
|
+
"validate_assets: Only sbml_dfs was provided; nothing to validate."
|
299
|
+
)
|
300
|
+
return None
|
352
301
|
|
353
|
-
#
|
354
|
-
|
355
|
-
|
302
|
+
# Validate napistu_graph if provided
|
303
|
+
if napistu_graph is not None:
|
304
|
+
_validate_assets_sbml_graph(sbml_dfs, napistu_graph)
|
356
305
|
|
357
|
-
#
|
358
|
-
|
359
|
-
|
306
|
+
# Validate precomputed_distances if provided (requires napistu_graph)
|
307
|
+
if precomputed_distances is not None:
|
308
|
+
if napistu_graph is None:
|
309
|
+
raise ValueError(
|
310
|
+
"napistu_graph must be provided if precomputed_distances is provided."
|
311
|
+
)
|
312
|
+
_validate_assets_graph_dist(napistu_graph, precomputed_distances)
|
360
313
|
|
361
|
-
#
|
362
|
-
|
363
|
-
|
314
|
+
# Validate identifiers_df if provided
|
315
|
+
if identifiers_df is not None:
|
316
|
+
_validate_assets_sbml_ids(sbml_dfs, identifiers_df)
|
364
317
|
|
365
318
|
return None
|
366
319
|
|
367
320
|
|
368
|
-
def
|
321
|
+
def napistu_graph_to_pandas_dfs(
|
322
|
+
napistu_graph: Union[NapistuGraph, ig.Graph],
|
323
|
+
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
369
324
|
"""
|
370
|
-
|
371
|
-
|
372
|
-
Take an igraph representation of a network and turn it into vertices and edges tables.
|
325
|
+
Convert a NapistuGraph to Pandas DataFrames for vertices and edges.
|
373
326
|
|
374
|
-
|
375
|
-
|
327
|
+
Parameters
|
328
|
+
----------
|
329
|
+
napistu_graph : NapistuGraph
|
330
|
+
A NapistuGraph network (subclass of igraph.Graph).
|
376
331
|
|
377
|
-
Returns
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
332
|
+
Returns
|
333
|
+
-------
|
334
|
+
vertices : pandas.DataFrame
|
335
|
+
A table with one row per vertex.
|
336
|
+
edges : pandas.DataFrame
|
337
|
+
A table with one row per edge.
|
382
338
|
"""
|
383
|
-
|
384
339
|
vertices = pd.DataFrame(
|
385
|
-
[{**{"index": v.index}, **v.attributes()} for v in
|
340
|
+
[{**{"index": v.index}, **v.attributes()} for v in napistu_graph.vs]
|
386
341
|
)
|
387
342
|
edges = pd.DataFrame(
|
388
343
|
[
|
389
344
|
{**{"source": e.source, "target": e.target}, **e.attributes()}
|
390
|
-
for e in
|
345
|
+
for e in napistu_graph.es
|
391
346
|
]
|
392
347
|
)
|
393
|
-
|
394
348
|
return vertices, edges
|
395
349
|
|
396
350
|
|
397
|
-
def safe_fill(x, fill_width=15):
|
398
|
-
if x == "":
|
399
|
-
return ""
|
400
|
-
else:
|
401
|
-
return textwrap.fill(x, 15)
|
402
|
-
|
403
|
-
|
404
351
|
def read_graph_attrs_spec(graph_attrs_spec_uri: str) -> dict:
|
405
|
-
"""Read a YAML file containing the specification for adding reaction- and/or species-attributes to a
|
406
|
-
|
352
|
+
"""Read a YAML file containing the specification for adding reaction- and/or species-attributes to a napistu_graph."""
|
407
353
|
with open(graph_attrs_spec_uri) as f:
|
408
354
|
graph_attrs_spec = yaml.safe_load(f)
|
409
355
|
|
@@ -426,13 +372,14 @@ def read_graph_attrs_spec(graph_attrs_spec_uri: str) -> dict:
|
|
426
372
|
return graph_attrs_spec
|
427
373
|
|
428
374
|
|
375
|
+
# Internal utility functions
|
429
376
|
def _create_network_save_string(
|
430
377
|
model_prefix: str, outdir: str, directed: bool, graph_type: str
|
431
378
|
) -> str:
|
432
379
|
if directed:
|
433
|
-
directed_str =
|
380
|
+
directed_str = NAPISTU_GRAPH_DIRECTEDNESS.DIRECTED
|
434
381
|
else:
|
435
|
-
directed_str =
|
382
|
+
directed_str = NAPISTU_GRAPH_DIRECTEDNESS.UNDIRECTED
|
436
383
|
|
437
384
|
export_pkl_path = os.path.join(
|
438
385
|
outdir, model_prefix + "_network_" + graph_type + "_" + directed_str + ".pkl"
|
@@ -441,34 +388,31 @@ def _create_network_save_string(
|
|
441
388
|
return export_pkl_path
|
442
389
|
|
443
390
|
|
444
|
-
def
|
445
|
-
|
446
|
-
) ->
|
447
|
-
"""
|
448
|
-
Utility function for creating subgraphs including a set of vertices and their connections
|
449
|
-
|
391
|
+
def _validate_assets_sbml_graph(
|
392
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs, napistu_graph: Union[NapistuGraph, ig.Graph]
|
393
|
+
) -> None:
|
450
394
|
"""
|
395
|
+
Check an sbml_dfs model and NapistuGraph for inconsistencies.
|
451
396
|
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
subgraph = cpr_graph.induced_subgraph(selected_vertices)
|
459
|
-
|
460
|
-
return subgraph
|
461
|
-
|
397
|
+
Parameters
|
398
|
+
----------
|
399
|
+
sbml_dfs : sbml_dfs_core.SBML_dfs
|
400
|
+
The pathway representation.
|
401
|
+
napistu_graph : NapistuGraph
|
402
|
+
The network representation (subclass of igraph.Graph).
|
462
403
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
""" "Check an sbml_dfs model and cpr_graph for inconsistencies."""
|
404
|
+
Returns
|
405
|
+
-------
|
406
|
+
None
|
467
407
|
|
408
|
+
Raises
|
409
|
+
------
|
410
|
+
ValueError
|
411
|
+
If species names do not match between sbml_dfs and napistu_graph.
|
412
|
+
"""
|
468
413
|
vertices = pd.DataFrame(
|
469
|
-
[{**{"index": v.index}, **v.attributes()} for v in
|
414
|
+
[{**{"index": v.index}, **v.attributes()} for v in napistu_graph.vs]
|
470
415
|
)
|
471
|
-
|
472
416
|
matched_cspecies = sbml_dfs.compartmentalized_species.reset_index()[
|
473
417
|
["sc_id", "sc_name"]
|
474
418
|
].merge(
|
@@ -476,146 +420,54 @@ def _validate_assets_sbml_graph(
|
|
476
420
|
left_on=["sc_id"],
|
477
421
|
right_on=["name"],
|
478
422
|
)
|
479
|
-
|
480
423
|
mismatched_names = [
|
481
424
|
f"{x} != {y}"
|
482
425
|
for x, y in zip(matched_cspecies["sc_name"], matched_cspecies["node_name"])
|
483
426
|
if x != y
|
484
427
|
]
|
485
|
-
|
486
428
|
if len(mismatched_names) > 0:
|
487
429
|
example_names = mismatched_names[: min(10, len(mismatched_names))]
|
488
|
-
|
489
430
|
raise ValueError(
|
490
|
-
f"{len(mismatched_names)} species names do not match between sbml_dfs and
|
431
|
+
f"{len(mismatched_names)} species names do not match between sbml_dfs and napistu_graph: {example_names}"
|
491
432
|
)
|
492
|
-
|
493
433
|
return None
|
494
434
|
|
495
435
|
|
496
436
|
def _validate_assets_graph_dist(
|
497
|
-
|
437
|
+
napistu_graph: NapistuGraph, precomputed_distances: pd.DataFrame
|
498
438
|
) -> None:
|
499
|
-
"""
|
439
|
+
"""
|
440
|
+
Check a NapistuGraph and precomputed distances table for inconsistencies.
|
500
441
|
|
442
|
+
Parameters
|
443
|
+
----------
|
444
|
+
napistu_graph : NapistuGraph
|
445
|
+
The network representation (subclass of igraph.Graph).
|
446
|
+
precomputed_distances : pandas.DataFrame
|
447
|
+
Precomputed distances between vertices in the network.
|
448
|
+
|
449
|
+
Returns
|
450
|
+
-------
|
451
|
+
None
|
452
|
+
|
453
|
+
Warns
|
454
|
+
-----
|
455
|
+
If edge weights are inconsistent between the graph and precomputed distances.
|
456
|
+
"""
|
501
457
|
edges = pd.DataFrame(
|
502
|
-
[{**{"index": e.index}, **e.attributes()} for e in
|
458
|
+
[{**{"index": e.index}, **e.attributes()} for e in napistu_graph.es]
|
503
459
|
)
|
504
|
-
|
505
460
|
direct_interactions = precomputed_distances.query("path_length == 1")
|
506
|
-
|
507
461
|
edges_with_distances = direct_interactions.merge(
|
508
462
|
edges[["from", "to", "weights", "upstream_weights"]],
|
509
463
|
left_on=["sc_id_origin", "sc_id_dest"],
|
510
464
|
right_on=["from", "to"],
|
511
465
|
)
|
512
|
-
|
513
466
|
inconsistent_weights = edges_with_distances.query("path_weights != weights")
|
514
467
|
if inconsistent_weights.shape[0] > 0:
|
515
468
|
logger.warning(
|
516
469
|
f"{inconsistent_weights.shape[0]} edges' weights are inconsistent between",
|
517
|
-
"edges in the
|
470
|
+
"edges in the napistu_graph and length 1 paths in precomputed_distances."
|
518
471
|
f"This is {inconsistent_weights.shape[0] / edges_with_distances.shape[0]:.2%} of all edges.",
|
519
472
|
)
|
520
|
-
|
521
|
-
return None
|
522
|
-
|
523
|
-
|
524
|
-
def _get_top_n_idx(arr: Sequence, n: int, ascending: bool = False) -> Sequence[int]:
|
525
|
-
"""Returns the indices of the top n values in an array
|
526
|
-
|
527
|
-
Args:
|
528
|
-
arr (Sequence): An array of values
|
529
|
-
n (int): The number of top values to return
|
530
|
-
ascending (bool, optional): Whether to return the top or bottom n values. Defaults to False.
|
531
|
-
|
532
|
-
Returns:
|
533
|
-
Sequence[int]: The indices of the top n values
|
534
|
-
"""
|
535
|
-
order = np.argsort(arr)
|
536
|
-
if ascending:
|
537
|
-
return order[:n] # type: ignore
|
538
|
-
else:
|
539
|
-
return order[-n:][::-1] # type: ignore
|
540
|
-
|
541
|
-
|
542
|
-
def _get_top_n_objects(
|
543
|
-
object_vals: Sequence, objects: Sequence, n: int = 10, ascending: bool = False
|
544
|
-
) -> list:
|
545
|
-
"""Get the top N objects based on a ranking measure."""
|
546
|
-
|
547
|
-
idxs = _get_top_n_idx(object_vals, n, ascending=ascending)
|
548
|
-
top_objects = [objects[idx] for idx in idxs]
|
549
|
-
return top_objects
|
550
|
-
|
551
|
-
|
552
|
-
def _get_top_n_component_stats(
|
553
|
-
graph: ig.Graph,
|
554
|
-
components,
|
555
|
-
component_sizes: Sequence[int],
|
556
|
-
n: int = 10,
|
557
|
-
ascending: bool = False,
|
558
|
-
) -> list[dict[str, Any]]:
|
559
|
-
"""Summarize the top N components' network properties."""
|
560
|
-
|
561
|
-
top_components = _get_top_n_objects(component_sizes, components, n, ascending)
|
562
|
-
top_component_stats = [
|
563
|
-
{"n": len(c), "examples": [graph.vs[n].attributes() for n in c[:10]]}
|
564
|
-
for c in top_components
|
565
|
-
]
|
566
|
-
return top_component_stats
|
567
|
-
|
568
|
-
|
569
|
-
def _get_top_n_nodes(
|
570
|
-
graph: ig.Graph, vals: Sequence, val_name: str, n: int = 10, ascending: bool = False
|
571
|
-
) -> list[dict[str, Any]]:
|
572
|
-
"""Get the top N nodes by a node attribute."""
|
573
|
-
|
574
|
-
top_idxs = _get_top_n_idx(vals, n, ascending=ascending)
|
575
|
-
top_node_attrs = [graph.vs[idx].attributes() for idx in top_idxs]
|
576
|
-
top_vals = [vals[idx] for idx in top_idxs]
|
577
|
-
return [{val_name: val, **node} for val, node in zip(top_vals, top_node_attrs)]
|
578
|
-
|
579
|
-
|
580
|
-
def _validate_edge_attributes(graph: ig.Graph, edge_attributes: list[str]) -> None:
|
581
|
-
"""Check for the existence of one or more edge attributes."""
|
582
|
-
|
583
|
-
if isinstance(edge_attributes, list):
|
584
|
-
attrs = edge_attributes
|
585
|
-
elif isinstance(edge_attributes, str):
|
586
|
-
attrs = [edge_attributes]
|
587
|
-
else:
|
588
|
-
raise TypeError('"edge_attributes" must be a list or str')
|
589
|
-
|
590
|
-
available_attributes = graph.es[0].attributes().keys()
|
591
|
-
missing_attributes = set(attrs).difference(available_attributes)
|
592
|
-
n_missing_attrs = len(missing_attributes)
|
593
|
-
|
594
|
-
if n_missing_attrs > 0:
|
595
|
-
raise ValueError(
|
596
|
-
f"{n_missing_attrs} edge attributes were missing ({', '.join(missing_attributes)}). The available edge attributes are {', '.join(available_attributes)}"
|
597
|
-
)
|
598
|
-
|
599
|
-
return None
|
600
|
-
|
601
|
-
|
602
|
-
def _validate_vertex_attributes(graph: ig.Graph, vertex_attributes: list[str]) -> None:
|
603
|
-
"""Check for the existence of one or more vertex attributes."""
|
604
|
-
|
605
|
-
if isinstance(vertex_attributes, list):
|
606
|
-
attrs = vertex_attributes
|
607
|
-
elif isinstance(vertex_attributes, str):
|
608
|
-
attrs = [vertex_attributes]
|
609
|
-
else:
|
610
|
-
raise TypeError('"vertex_attributes" must be a list or str')
|
611
|
-
|
612
|
-
available_attributes = graph.vs[0].attributes().keys()
|
613
|
-
missing_attributes = set(attrs).difference(available_attributes)
|
614
|
-
n_missing_attrs = len(missing_attributes)
|
615
|
-
|
616
|
-
if n_missing_attrs > 0:
|
617
|
-
raise ValueError(
|
618
|
-
f"{n_missing_attrs} vertex attributes were missing ({', '.join(missing_attributes)}). The available vertex attributes are {', '.join(available_attributes)}"
|
619
|
-
)
|
620
|
-
|
621
473
|
return None
|