napistu 0.2.5.dev6__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. napistu/__main__.py +126 -96
  2. napistu/constants.py +35 -41
  3. napistu/context/__init__.py +10 -0
  4. napistu/context/discretize.py +462 -0
  5. napistu/context/filtering.py +387 -0
  6. napistu/gcs/__init__.py +1 -1
  7. napistu/identifiers.py +74 -15
  8. napistu/indices.py +68 -0
  9. napistu/ingestion/__init__.py +1 -1
  10. napistu/ingestion/bigg.py +47 -62
  11. napistu/ingestion/constants.py +18 -133
  12. napistu/ingestion/gtex.py +113 -0
  13. napistu/ingestion/hpa.py +147 -0
  14. napistu/ingestion/sbml.py +0 -97
  15. napistu/ingestion/string.py +2 -2
  16. napistu/matching/__init__.py +10 -0
  17. napistu/matching/constants.py +18 -0
  18. napistu/matching/interactions.py +518 -0
  19. napistu/matching/mount.py +529 -0
  20. napistu/matching/species.py +510 -0
  21. napistu/mcp/__init__.py +7 -4
  22. napistu/mcp/__main__.py +128 -72
  23. napistu/mcp/client.py +16 -25
  24. napistu/mcp/codebase.py +201 -153
  25. napistu/mcp/component_base.py +170 -0
  26. napistu/mcp/config.py +223 -0
  27. napistu/mcp/constants.py +45 -2
  28. napistu/mcp/documentation.py +253 -136
  29. napistu/mcp/documentation_utils.py +13 -48
  30. napistu/mcp/execution.py +372 -305
  31. napistu/mcp/health.py +49 -67
  32. napistu/mcp/profiles.py +10 -6
  33. napistu/mcp/server.py +161 -80
  34. napistu/mcp/tutorials.py +139 -87
  35. napistu/modify/__init__.py +1 -1
  36. napistu/modify/gaps.py +1 -1
  37. napistu/network/__init__.py +1 -1
  38. napistu/network/constants.py +101 -34
  39. napistu/network/data_handling.py +388 -0
  40. napistu/network/ig_utils.py +351 -0
  41. napistu/network/napistu_graph_core.py +354 -0
  42. napistu/network/neighborhoods.py +40 -40
  43. napistu/network/net_create.py +373 -309
  44. napistu/network/net_propagation.py +47 -19
  45. napistu/network/{net_utils.py → ng_utils.py} +124 -272
  46. napistu/network/paths.py +67 -51
  47. napistu/network/precompute.py +11 -11
  48. napistu/ontologies/__init__.py +10 -0
  49. napistu/ontologies/constants.py +129 -0
  50. napistu/ontologies/dogma.py +243 -0
  51. napistu/ontologies/genodexito.py +649 -0
  52. napistu/ontologies/mygene.py +369 -0
  53. napistu/ontologies/renaming.py +198 -0
  54. napistu/rpy2/__init__.py +229 -86
  55. napistu/rpy2/callr.py +47 -77
  56. napistu/rpy2/constants.py +24 -23
  57. napistu/rpy2/rids.py +61 -648
  58. napistu/sbml_dfs_core.py +587 -222
  59. napistu/scverse/__init__.py +15 -0
  60. napistu/scverse/constants.py +28 -0
  61. napistu/scverse/loading.py +727 -0
  62. napistu/utils.py +118 -10
  63. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/METADATA +8 -3
  64. napistu-0.3.1.dist-info/RECORD +133 -0
  65. tests/conftest.py +22 -0
  66. tests/test_context_discretize.py +56 -0
  67. tests/test_context_filtering.py +267 -0
  68. tests/test_identifiers.py +100 -0
  69. tests/test_indices.py +65 -0
  70. tests/{test_edgelist.py → test_ingestion_napistu_edgelist.py} +2 -2
  71. tests/test_matching_interactions.py +108 -0
  72. tests/test_matching_mount.py +305 -0
  73. tests/test_matching_species.py +394 -0
  74. tests/test_mcp_config.py +193 -0
  75. tests/test_mcp_documentation_utils.py +12 -3
  76. tests/test_mcp_server.py +356 -0
  77. tests/test_network_data_handling.py +397 -0
  78. tests/test_network_ig_utils.py +23 -0
  79. tests/test_network_neighborhoods.py +19 -0
  80. tests/test_network_net_create.py +459 -0
  81. tests/test_network_ng_utils.py +30 -0
  82. tests/test_network_paths.py +56 -0
  83. tests/{test_precomputed_distances.py → test_network_precompute.py} +8 -6
  84. tests/test_ontologies_genodexito.py +58 -0
  85. tests/test_ontologies_mygene.py +39 -0
  86. tests/test_ontologies_renaming.py +110 -0
  87. tests/test_rpy2_callr.py +79 -0
  88. tests/test_rpy2_init.py +151 -0
  89. tests/test_sbml.py +0 -31
  90. tests/test_sbml_dfs_core.py +134 -10
  91. tests/test_scverse_loading.py +778 -0
  92. tests/test_set_coverage.py +2 -2
  93. tests/test_utils.py +121 -1
  94. napistu/mechanism_matching.py +0 -1353
  95. napistu/rpy2/netcontextr.py +0 -467
  96. napistu-0.2.5.dev6.dist-info/RECORD +0 -97
  97. tests/test_igraph.py +0 -367
  98. tests/test_mechanism_matching.py +0 -784
  99. tests/test_net_utils.py +0 -149
  100. tests/test_netcontextr.py +0 -105
  101. tests/test_rpy2.py +0 -61
  102. /napistu/ingestion/{cpr_edgelist.py → napistu_edgelist.py} +0 -0
  103. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/WHEEL +0 -0
  104. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/entry_points.txt +0 -0
  105. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/licenses/LICENSE +0 -0
  106. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/top_level.txt +0 -0
  107. /tests/{test_obo.py → test_ingestion_obo.py} +0 -0
@@ -1,26 +1,29 @@
1
+ """
2
+ Utilities specific to NapistuGraph objects and the wider Napistu ecosystem.
3
+
4
+ This module contains utilities that are specific to NapistuGraph subclasses
5
+ and require knowledge of the Napistu data model (SBML_dfs objects, etc.).
6
+ """
7
+
1
8
  from __future__ import annotations
2
9
 
3
10
  import logging
4
11
  import os
5
- import random
6
- import textwrap
7
12
  import yaml
8
- from typing import Any
9
- from typing import Sequence
13
+ from typing import Optional, Union
10
14
 
11
15
  import igraph as ig
12
- import numpy as np
13
16
  import pandas as pd
14
17
  from napistu import sbml_dfs_core
15
18
  from napistu import source
16
19
  from napistu.network import net_create
20
+ from napistu.network.napistu_graph_core import NapistuGraph
17
21
 
18
22
  from napistu.constants import SBML_DFS
19
23
  from napistu.constants import SOURCE_SPEC
20
-
21
24
  from napistu.identifiers import _validate_assets_sbml_ids
22
- from napistu.network.constants import CPR_GRAPH_NODES
23
- from napistu.network.constants import CPR_GRAPH_TYPES
25
+ from napistu.network.constants import NAPISTU_GRAPH_TYPES
26
+ from napistu.network.constants import NAPISTU_GRAPH_DIRECTEDNESS
24
27
 
25
28
  logger = logging.getLogger(__name__)
26
29
 
@@ -44,7 +47,6 @@ def compartmentalize_species(
44
47
  -------
45
48
  pd.DataFrame containings the s_id and sc_id pairs
46
49
  """
47
-
48
50
  if isinstance(species, str):
49
51
  species = [species]
50
52
  if not isinstance(species, list):
@@ -79,7 +81,6 @@ def compartmentalize_species_pairs(
79
81
  -------
80
82
  pd.DataFrame containing pairs of origin and destination compartmentalized species
81
83
  """
82
-
83
84
  compartmentalized_origins = compartmentalize_species(
84
85
  sbml_dfs, origin_species
85
86
  ).rename(columns={SBML_DFS.SC_ID: "sc_id_origin", SBML_DFS.S_ID: "s_id_origin"})
@@ -114,7 +115,6 @@ def get_minimal_sources_edges(
114
115
  vertices: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
115
116
  ) -> pd.DataFrame | None:
116
117
  """Assign edges to a set of sources."""
117
-
118
118
  nodes = vertices["node"].tolist()
119
119
  present_reactions = sbml_dfs.reactions[sbml_dfs.reactions.index.isin(nodes)]
120
120
 
@@ -133,65 +133,15 @@ def get_minimal_sources_edges(
133
133
  ]
134
134
 
135
135
 
136
- def get_graph_summary(graph: ig.Graph) -> dict[str, Any]:
137
- """Calculates common summary statistics for a network
138
-
139
- Args:
140
- graph (ig.Graph): An igraph
141
-
142
- returns:
143
- dict: A dictionary of summary statistics with values
144
- n_edges [int]: number of edges
145
- n_vertices [int]: number of vertices
146
- n_components [int]: number of weakly connected components
147
- (i.e. without considering edge directionality)
148
- stats_component_sizes [dict[str, float]]: summary statistics for the component sizes
149
- top10_large_components [list[dict[str, Any]]]: the top 10 largest components with 10 example vertices
150
- top10_smallest_components [list[dict[str, Any]]]: the top 10 smallest components with 10 example vertices
151
- average_path_length [float]: the average shortest path length between all vertices
152
- top10_betweenness [list[dict[str, Any]]]: the top 10 vertices by betweenness centrality.
153
- Roughly: measures how many shortest paths go through a vertices
154
- top10_harmonic_centrality [list[dict[str, Any]]]: the top 10 vertices by harmonic centrality:
155
- Roughly: mean inverse distance to all other vertices
156
- """
157
- stats = {}
158
- stats["n_edges"] = graph.ecount()
159
- stats["n_vertices"] = graph.vcount()
160
- components = graph.components(mode="weak")
161
- stats["n_components"] = len(components)
162
- component_sizes = [len(c) for c in components]
163
- stats["stats_component_sizes"] = pd.Series(component_sizes).describe().to_dict()
164
- # get the top 10 largest components and 10 example nodes
165
-
166
- stats["top10_large_components"] = _get_top_n_component_stats(
167
- graph, components, component_sizes, n=10, ascending=False
168
- )
169
-
170
- stats["top10_smallest_components"] = _get_top_n_component_stats(
171
- graph, components, component_sizes, n=10, ascending=True
172
- )
173
-
174
- stats["average_path_length"] = graph.average_path_length()
175
-
176
- between = list(graph.betweenness(directed=False))
177
- stats["top10_betweenness"] = _get_top_n_nodes(
178
- graph, between, "betweenness", n=10, ascending=False
179
- )
180
-
181
- harmonic_centrality = list(graph.harmonic_centrality())
182
- stats["top10_harmonic_centrality"] = _get_top_n_nodes(
183
- graph, harmonic_centrality, "harmonic_centrality", n=10, ascending=False
184
- )
185
-
186
- return stats
187
-
188
-
189
136
  def export_networks(
190
137
  sbml_dfs: sbml_dfs_core.SBML_dfs,
191
138
  model_prefix: str,
192
139
  outdir: str,
193
140
  directeds: list[bool] = [True, False],
194
- graph_types: list[str] = [CPR_GRAPH_TYPES.BIPARTITE, CPR_GRAPH_TYPES.REGULATORY],
141
+ graph_types: list[str] = [
142
+ NAPISTU_GRAPH_TYPES.BIPARTITE,
143
+ NAPISTU_GRAPH_TYPES.REGULATORY,
144
+ ],
195
145
  ) -> None:
196
146
  """
197
147
  Exports Networks
@@ -217,7 +167,6 @@ def export_networks(
217
167
  ----------
218
168
  None
219
169
  """
220
-
221
170
  if not isinstance(sbml_dfs, sbml_dfs_core.SBML_dfs):
222
171
  raise TypeError(
223
172
  f"sbml_dfs must be a sbml_dfs_core.SBML_dfs, but was {type(sbml_dfs)}"
@@ -242,7 +191,7 @@ def export_networks(
242
191
  )
243
192
  print(f"Exporting {graph_type} network to {export_pkl_path}")
244
193
 
245
- network_graph = net_create.process_cpr_graph(
194
+ network_graph = net_create.process_napistu_graph(
246
195
  sbml_dfs=sbml_dfs,
247
196
  directed=directed,
248
197
  graph_type=graph_type,
@@ -259,7 +208,7 @@ def read_network_pkl(
259
208
  network_dir: str,
260
209
  graph_type: str,
261
210
  directed: bool = True,
262
- ) -> ig.Graph:
211
+ ) -> NapistuGraph:
263
212
  """
264
213
  Read Network Pickle
265
214
 
@@ -280,11 +229,10 @@ def read_network_pkl(
280
229
 
281
230
  Returns
282
231
  -------
283
- network_graph: igraph.Graph
284
- An igraph network of the pathway
232
+ network_graph: NapistuGraph
233
+ A NapistuGraph network of the pathway
285
234
 
286
235
  """
287
-
288
236
  if not isinstance(model_prefix, str):
289
237
  raise TypeError(f"model_prefix was a {type(model_prefix)} and must be a str")
290
238
  if not os.path.isdir(network_dir):
@@ -306,104 +254,102 @@ def read_network_pkl(
306
254
  return network_graph
307
255
 
308
256
 
309
- def filter_to_largest_subgraph(cpr_graph: ig.Graph) -> ig.Graph:
310
- """Filter a graph to its largest weakly connected component."""
311
-
312
- component_members = cpr_graph.components(mode="weak")
313
- component_sizes = [len(x) for x in component_members]
314
-
315
- top_component_members = [
316
- m
317
- for s, m in zip(component_sizes, component_members)
318
- if s == max(component_sizes)
319
- ][0]
320
-
321
- largest_subgraph = cpr_graph.induced_subgraph(top_component_members)
322
-
323
- return largest_subgraph
324
-
325
-
326
257
  def validate_assets(
327
258
  sbml_dfs: sbml_dfs_core.SBML_dfs,
328
- cpr_graph: ig.Graph,
329
- precomputed_distances: pd.DataFrame,
330
- identifiers_df: pd.DataFrame,
259
+ napistu_graph: Optional[Union[NapistuGraph, ig.Graph]] = None,
260
+ precomputed_distances: Optional[pd.DataFrame] = None,
261
+ identifiers_df: Optional[pd.DataFrame] = None,
331
262
  ) -> None:
332
263
  """
333
264
  Validate Assets
334
265
 
335
266
  Perform a few quick checks of inputs to catch inconsistencies.
336
267
 
337
- Args:
338
- sbml_dfs (sbml_dfs_core.SBML_dfs):
339
- A pathway representation.
340
- cpr_graph (igraph.Graph):
341
- A network-based representation of "sbml_dfs".
342
- precomputed_distances (pd.DataFrame):
343
- Precomputed distances between vertices in "cpr_graph".
344
- identifiers_df (pd.DataFrame):
345
- A table of systematic identifiers for compartmentalized species in "sbml_dfs".
268
+ Parameters
269
+ ----------
270
+ sbml_dfs : sbml_dfs_core.SBML_dfs
271
+ A pathway representation. (Required)
272
+ napistu_graph : NapistuGraph, optional
273
+ A network-based representation of `sbml_dfs`. NapistuGraph is a subclass of igraph.Graph.
274
+ precomputed_distances : pandas.DataFrame, optional
275
+ Precomputed distances between vertices in `napistu_graph`.
276
+ identifiers_df : pandas.DataFrame, optional
277
+ A table of systematic identifiers for compartmentalized species in `sbml_dfs`.
346
278
 
347
- Returns:
348
- None
279
+ Returns
280
+ -------
281
+ None
349
282
 
283
+ Warns
284
+ -----
285
+ If only sbml_dfs is provided and no other assets are given, a warning is logged.
350
286
 
287
+ Raises
288
+ ------
289
+ ValueError
290
+ If precomputed_distances is provided but napistu_graph is not.
351
291
  """
292
+ if (
293
+ napistu_graph is None
294
+ and precomputed_distances is None
295
+ and identifiers_df is None
296
+ ):
297
+ logger.warning(
298
+ "validate_assets: Only sbml_dfs was provided; nothing to validate."
299
+ )
300
+ return None
352
301
 
353
- # compare cpr_graph to sbml_dfs
354
- # test for consistent sc_id to sc_name mappings
355
- _validate_assets_sbml_graph(sbml_dfs, cpr_graph)
302
+ # Validate napistu_graph if provided
303
+ if napistu_graph is not None:
304
+ _validate_assets_sbml_graph(sbml_dfs, napistu_graph)
356
305
 
357
- # compare precomputed_distances to cpr_graph
358
- # test whether dircetly connected sc_ids are in the same reaction
359
- _validate_assets_graph_dist(cpr_graph, precomputed_distances)
306
+ # Validate precomputed_distances if provided (requires napistu_graph)
307
+ if precomputed_distances is not None:
308
+ if napistu_graph is None:
309
+ raise ValueError(
310
+ "napistu_graph must be provided if precomputed_distances is provided."
311
+ )
312
+ _validate_assets_graph_dist(napistu_graph, precomputed_distances)
360
313
 
361
- # compare identifiers_df to sbml_dfs
362
- # do the (sc_id, s_name) tuples in in identifiers match (sc_id, s_name) tuples in sbml_dfs
363
- _validate_assets_sbml_ids(sbml_dfs, identifiers_df)
314
+ # Validate identifiers_df if provided
315
+ if identifiers_df is not None:
316
+ _validate_assets_sbml_ids(sbml_dfs, identifiers_df)
364
317
 
365
318
  return None
366
319
 
367
320
 
368
- def cpr_graph_to_pandas_dfs(cpr_graph: ig.Graph):
321
+ def napistu_graph_to_pandas_dfs(
322
+ napistu_graph: Union[NapistuGraph, ig.Graph],
323
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
369
324
  """
370
- CPR Graph to Pandas DataFrames
371
-
372
- Take an igraph representation of a network and turn it into vertices and edges tables.
325
+ Convert a NapistuGraph to Pandas DataFrames for vertices and edges.
373
326
 
374
- Args:
375
- cpr_graph(ig.Graph): an igraph network
327
+ Parameters
328
+ ----------
329
+ napistu_graph : NapistuGraph
330
+ A NapistuGraph network (subclass of igraph.Graph).
376
331
 
377
- Returns:
378
- vertices (pd.DataFrame):
379
- A table with one row per vertex.
380
- edges (pd.DataFrame):
381
- A table with one row per edge.
332
+ Returns
333
+ -------
334
+ vertices : pandas.DataFrame
335
+ A table with one row per vertex.
336
+ edges : pandas.DataFrame
337
+ A table with one row per edge.
382
338
  """
383
-
384
339
  vertices = pd.DataFrame(
385
- [{**{"index": v.index}, **v.attributes()} for v in cpr_graph.vs]
340
+ [{**{"index": v.index}, **v.attributes()} for v in napistu_graph.vs]
386
341
  )
387
342
  edges = pd.DataFrame(
388
343
  [
389
344
  {**{"source": e.source, "target": e.target}, **e.attributes()}
390
- for e in cpr_graph.es
345
+ for e in napistu_graph.es
391
346
  ]
392
347
  )
393
-
394
348
  return vertices, edges
395
349
 
396
350
 
397
- def safe_fill(x, fill_width=15):
398
- if x == "":
399
- return ""
400
- else:
401
- return textwrap.fill(x, 15)
402
-
403
-
404
351
  def read_graph_attrs_spec(graph_attrs_spec_uri: str) -> dict:
405
- """Read a YAML file containing the specification for adding reaction- and/or species-attributes to a cpr_graph."""
406
-
352
+ """Read a YAML file containing the specification for adding reaction- and/or species-attributes to a napistu_graph."""
407
353
  with open(graph_attrs_spec_uri) as f:
408
354
  graph_attrs_spec = yaml.safe_load(f)
409
355
 
@@ -426,13 +372,14 @@ def read_graph_attrs_spec(graph_attrs_spec_uri: str) -> dict:
426
372
  return graph_attrs_spec
427
373
 
428
374
 
375
+ # Internal utility functions
429
376
  def _create_network_save_string(
430
377
  model_prefix: str, outdir: str, directed: bool, graph_type: str
431
378
  ) -> str:
432
379
  if directed:
433
- directed_str = "directed"
380
+ directed_str = NAPISTU_GRAPH_DIRECTEDNESS.DIRECTED
434
381
  else:
435
- directed_str = "undirected"
382
+ directed_str = NAPISTU_GRAPH_DIRECTEDNESS.UNDIRECTED
436
383
 
437
384
  export_pkl_path = os.path.join(
438
385
  outdir, model_prefix + "_network_" + graph_type + "_" + directed_str + ".pkl"
@@ -441,34 +388,31 @@ def _create_network_save_string(
441
388
  return export_pkl_path
442
389
 
443
390
 
444
- def _create_induced_subgraph(
445
- cpr_graph: ig.Graph, vertices=None, n_vertices: int = 5000
446
- ) -> ig.Graph:
447
- """
448
- Utility function for creating subgraphs including a set of vertices and their connections
449
-
391
+ def _validate_assets_sbml_graph(
392
+ sbml_dfs: sbml_dfs_core.SBML_dfs, napistu_graph: Union[NapistuGraph, ig.Graph]
393
+ ) -> None:
450
394
  """
395
+ Check an sbml_dfs model and NapistuGraph for inconsistencies.
451
396
 
452
- if vertices is not None:
453
- selected_vertices = vertices
454
- else:
455
- vertex_names = cpr_graph.vs[CPR_GRAPH_NODES.NAME]
456
- selected_vertices = random.sample(vertex_names, n_vertices)
457
-
458
- subgraph = cpr_graph.induced_subgraph(selected_vertices)
459
-
460
- return subgraph
461
-
397
+ Parameters
398
+ ----------
399
+ sbml_dfs : sbml_dfs_core.SBML_dfs
400
+ The pathway representation.
401
+ napistu_graph : NapistuGraph
402
+ The network representation (subclass of igraph.Graph).
462
403
 
463
- def _validate_assets_sbml_graph(
464
- sbml_dfs: sbml_dfs_core.SBML_dfs, cpr_graph: ig.Graph
465
- ) -> None:
466
- """ "Check an sbml_dfs model and cpr_graph for inconsistencies."""
404
+ Returns
405
+ -------
406
+ None
467
407
 
408
+ Raises
409
+ ------
410
+ ValueError
411
+ If species names do not match between sbml_dfs and napistu_graph.
412
+ """
468
413
  vertices = pd.DataFrame(
469
- [{**{"index": v.index}, **v.attributes()} for v in cpr_graph.vs]
414
+ [{**{"index": v.index}, **v.attributes()} for v in napistu_graph.vs]
470
415
  )
471
-
472
416
  matched_cspecies = sbml_dfs.compartmentalized_species.reset_index()[
473
417
  ["sc_id", "sc_name"]
474
418
  ].merge(
@@ -476,146 +420,54 @@ def _validate_assets_sbml_graph(
476
420
  left_on=["sc_id"],
477
421
  right_on=["name"],
478
422
  )
479
-
480
423
  mismatched_names = [
481
424
  f"{x} != {y}"
482
425
  for x, y in zip(matched_cspecies["sc_name"], matched_cspecies["node_name"])
483
426
  if x != y
484
427
  ]
485
-
486
428
  if len(mismatched_names) > 0:
487
429
  example_names = mismatched_names[: min(10, len(mismatched_names))]
488
-
489
430
  raise ValueError(
490
- f"{len(mismatched_names)} species names do not match between sbml_dfs and cpr_graph: {example_names}"
431
+ f"{len(mismatched_names)} species names do not match between sbml_dfs and napistu_graph: {example_names}"
491
432
  )
492
-
493
433
  return None
494
434
 
495
435
 
496
436
  def _validate_assets_graph_dist(
497
- cpr_graph: ig.Graph, precomputed_distances: pd.DataFrame
437
+ napistu_graph: NapistuGraph, precomputed_distances: pd.DataFrame
498
438
  ) -> None:
499
- """ "Check an cpr_graph and precomputed distances table for inconsistencies."""
439
+ """
440
+ Check a NapistuGraph and precomputed distances table for inconsistencies.
500
441
 
442
+ Parameters
443
+ ----------
444
+ napistu_graph : NapistuGraph
445
+ The network representation (subclass of igraph.Graph).
446
+ precomputed_distances : pandas.DataFrame
447
+ Precomputed distances between vertices in the network.
448
+
449
+ Returns
450
+ -------
451
+ None
452
+
453
+ Warns
454
+ -----
455
+ If edge weights are inconsistent between the graph and precomputed distances.
456
+ """
501
457
  edges = pd.DataFrame(
502
- [{**{"index": e.index}, **e.attributes()} for e in cpr_graph.es]
458
+ [{**{"index": e.index}, **e.attributes()} for e in napistu_graph.es]
503
459
  )
504
-
505
460
  direct_interactions = precomputed_distances.query("path_length == 1")
506
-
507
461
  edges_with_distances = direct_interactions.merge(
508
462
  edges[["from", "to", "weights", "upstream_weights"]],
509
463
  left_on=["sc_id_origin", "sc_id_dest"],
510
464
  right_on=["from", "to"],
511
465
  )
512
-
513
466
  inconsistent_weights = edges_with_distances.query("path_weights != weights")
514
467
  if inconsistent_weights.shape[0] > 0:
515
468
  logger.warning(
516
469
  f"{inconsistent_weights.shape[0]} edges' weights are inconsistent between",
517
- "edges in the cpr_graph and length 1 paths in precomputed_distances."
470
+ "edges in the napistu_graph and length 1 paths in precomputed_distances."
518
471
  f"This is {inconsistent_weights.shape[0] / edges_with_distances.shape[0]:.2%} of all edges.",
519
472
  )
520
-
521
- return None
522
-
523
-
524
- def _get_top_n_idx(arr: Sequence, n: int, ascending: bool = False) -> Sequence[int]:
525
- """Returns the indices of the top n values in an array
526
-
527
- Args:
528
- arr (Sequence): An array of values
529
- n (int): The number of top values to return
530
- ascending (bool, optional): Whether to return the top or bottom n values. Defaults to False.
531
-
532
- Returns:
533
- Sequence[int]: The indices of the top n values
534
- """
535
- order = np.argsort(arr)
536
- if ascending:
537
- return order[:n] # type: ignore
538
- else:
539
- return order[-n:][::-1] # type: ignore
540
-
541
-
542
- def _get_top_n_objects(
543
- object_vals: Sequence, objects: Sequence, n: int = 10, ascending: bool = False
544
- ) -> list:
545
- """Get the top N objects based on a ranking measure."""
546
-
547
- idxs = _get_top_n_idx(object_vals, n, ascending=ascending)
548
- top_objects = [objects[idx] for idx in idxs]
549
- return top_objects
550
-
551
-
552
- def _get_top_n_component_stats(
553
- graph: ig.Graph,
554
- components,
555
- component_sizes: Sequence[int],
556
- n: int = 10,
557
- ascending: bool = False,
558
- ) -> list[dict[str, Any]]:
559
- """Summarize the top N components' network properties."""
560
-
561
- top_components = _get_top_n_objects(component_sizes, components, n, ascending)
562
- top_component_stats = [
563
- {"n": len(c), "examples": [graph.vs[n].attributes() for n in c[:10]]}
564
- for c in top_components
565
- ]
566
- return top_component_stats
567
-
568
-
569
- def _get_top_n_nodes(
570
- graph: ig.Graph, vals: Sequence, val_name: str, n: int = 10, ascending: bool = False
571
- ) -> list[dict[str, Any]]:
572
- """Get the top N nodes by a node attribute."""
573
-
574
- top_idxs = _get_top_n_idx(vals, n, ascending=ascending)
575
- top_node_attrs = [graph.vs[idx].attributes() for idx in top_idxs]
576
- top_vals = [vals[idx] for idx in top_idxs]
577
- return [{val_name: val, **node} for val, node in zip(top_vals, top_node_attrs)]
578
-
579
-
580
- def _validate_edge_attributes(graph: ig.Graph, edge_attributes: list[str]) -> None:
581
- """Check for the existence of one or more edge attributes."""
582
-
583
- if isinstance(edge_attributes, list):
584
- attrs = edge_attributes
585
- elif isinstance(edge_attributes, str):
586
- attrs = [edge_attributes]
587
- else:
588
- raise TypeError('"edge_attributes" must be a list or str')
589
-
590
- available_attributes = graph.es[0].attributes().keys()
591
- missing_attributes = set(attrs).difference(available_attributes)
592
- n_missing_attrs = len(missing_attributes)
593
-
594
- if n_missing_attrs > 0:
595
- raise ValueError(
596
- f"{n_missing_attrs} edge attributes were missing ({', '.join(missing_attributes)}). The available edge attributes are {', '.join(available_attributes)}"
597
- )
598
-
599
- return None
600
-
601
-
602
- def _validate_vertex_attributes(graph: ig.Graph, vertex_attributes: list[str]) -> None:
603
- """Check for the existence of one or more vertex attributes."""
604
-
605
- if isinstance(vertex_attributes, list):
606
- attrs = vertex_attributes
607
- elif isinstance(vertex_attributes, str):
608
- attrs = [vertex_attributes]
609
- else:
610
- raise TypeError('"vertex_attributes" must be a list or str')
611
-
612
- available_attributes = graph.vs[0].attributes().keys()
613
- missing_attributes = set(attrs).difference(available_attributes)
614
- n_missing_attrs = len(missing_attributes)
615
-
616
- if n_missing_attrs > 0:
617
- raise ValueError(
618
- f"{n_missing_attrs} vertex attributes were missing ({', '.join(missing_attributes)}). The available vertex attributes are {', '.join(available_attributes)}"
619
- )
620
-
621
473
  return None