napistu 0.2.5.dev6__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. napistu/__main__.py +126 -96
  2. napistu/constants.py +35 -41
  3. napistu/context/__init__.py +10 -0
  4. napistu/context/discretize.py +462 -0
  5. napistu/context/filtering.py +387 -0
  6. napistu/gcs/__init__.py +1 -1
  7. napistu/identifiers.py +74 -15
  8. napistu/indices.py +68 -0
  9. napistu/ingestion/__init__.py +1 -1
  10. napistu/ingestion/bigg.py +47 -62
  11. napistu/ingestion/constants.py +18 -133
  12. napistu/ingestion/gtex.py +113 -0
  13. napistu/ingestion/hpa.py +147 -0
  14. napistu/ingestion/sbml.py +0 -97
  15. napistu/ingestion/string.py +2 -2
  16. napistu/matching/__init__.py +10 -0
  17. napistu/matching/constants.py +18 -0
  18. napistu/matching/interactions.py +518 -0
  19. napistu/matching/mount.py +529 -0
  20. napistu/matching/species.py +510 -0
  21. napistu/mcp/__init__.py +7 -4
  22. napistu/mcp/__main__.py +128 -72
  23. napistu/mcp/client.py +16 -25
  24. napistu/mcp/codebase.py +201 -153
  25. napistu/mcp/component_base.py +170 -0
  26. napistu/mcp/config.py +223 -0
  27. napistu/mcp/constants.py +45 -2
  28. napistu/mcp/documentation.py +253 -136
  29. napistu/mcp/documentation_utils.py +13 -48
  30. napistu/mcp/execution.py +372 -305
  31. napistu/mcp/health.py +49 -67
  32. napistu/mcp/profiles.py +10 -6
  33. napistu/mcp/server.py +161 -80
  34. napistu/mcp/tutorials.py +139 -87
  35. napistu/modify/__init__.py +1 -1
  36. napistu/modify/gaps.py +1 -1
  37. napistu/network/__init__.py +1 -1
  38. napistu/network/constants.py +101 -34
  39. napistu/network/data_handling.py +388 -0
  40. napistu/network/ig_utils.py +351 -0
  41. napistu/network/napistu_graph_core.py +354 -0
  42. napistu/network/neighborhoods.py +40 -40
  43. napistu/network/net_create.py +373 -309
  44. napistu/network/net_propagation.py +47 -19
  45. napistu/network/{net_utils.py → ng_utils.py} +124 -272
  46. napistu/network/paths.py +67 -51
  47. napistu/network/precompute.py +11 -11
  48. napistu/ontologies/__init__.py +10 -0
  49. napistu/ontologies/constants.py +129 -0
  50. napistu/ontologies/dogma.py +243 -0
  51. napistu/ontologies/genodexito.py +649 -0
  52. napistu/ontologies/mygene.py +369 -0
  53. napistu/ontologies/renaming.py +198 -0
  54. napistu/rpy2/__init__.py +229 -86
  55. napistu/rpy2/callr.py +47 -77
  56. napistu/rpy2/constants.py +24 -23
  57. napistu/rpy2/rids.py +61 -648
  58. napistu/sbml_dfs_core.py +587 -222
  59. napistu/scverse/__init__.py +15 -0
  60. napistu/scverse/constants.py +28 -0
  61. napistu/scverse/loading.py +727 -0
  62. napistu/utils.py +118 -10
  63. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/METADATA +8 -3
  64. napistu-0.3.1.dist-info/RECORD +133 -0
  65. tests/conftest.py +22 -0
  66. tests/test_context_discretize.py +56 -0
  67. tests/test_context_filtering.py +267 -0
  68. tests/test_identifiers.py +100 -0
  69. tests/test_indices.py +65 -0
  70. tests/{test_edgelist.py → test_ingestion_napistu_edgelist.py} +2 -2
  71. tests/test_matching_interactions.py +108 -0
  72. tests/test_matching_mount.py +305 -0
  73. tests/test_matching_species.py +394 -0
  74. tests/test_mcp_config.py +193 -0
  75. tests/test_mcp_documentation_utils.py +12 -3
  76. tests/test_mcp_server.py +356 -0
  77. tests/test_network_data_handling.py +397 -0
  78. tests/test_network_ig_utils.py +23 -0
  79. tests/test_network_neighborhoods.py +19 -0
  80. tests/test_network_net_create.py +459 -0
  81. tests/test_network_ng_utils.py +30 -0
  82. tests/test_network_paths.py +56 -0
  83. tests/{test_precomputed_distances.py → test_network_precompute.py} +8 -6
  84. tests/test_ontologies_genodexito.py +58 -0
  85. tests/test_ontologies_mygene.py +39 -0
  86. tests/test_ontologies_renaming.py +110 -0
  87. tests/test_rpy2_callr.py +79 -0
  88. tests/test_rpy2_init.py +151 -0
  89. tests/test_sbml.py +0 -31
  90. tests/test_sbml_dfs_core.py +134 -10
  91. tests/test_scverse_loading.py +778 -0
  92. tests/test_set_coverage.py +2 -2
  93. tests/test_utils.py +121 -1
  94. napistu/mechanism_matching.py +0 -1353
  95. napistu/rpy2/netcontextr.py +0 -467
  96. napistu-0.2.5.dev6.dist-info/RECORD +0 -97
  97. tests/test_igraph.py +0 -367
  98. tests/test_mechanism_matching.py +0 -784
  99. tests/test_net_utils.py +0 -149
  100. tests/test_netcontextr.py +0 -105
  101. tests/test_rpy2.py +0 -61
  102. /napistu/ingestion/{cpr_edgelist.py → napistu_edgelist.py} +0 -0
  103. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/WHEEL +0 -0
  104. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/entry_points.txt +0 -0
  105. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/licenses/LICENSE +0 -0
  106. {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/top_level.txt +0 -0
  107. /tests/{test_obo.py → test_ingestion_obo.py} +0 -0
napistu/network/paths.py CHANGED
@@ -5,20 +5,25 @@ import math
5
5
  import warnings
6
6
  from typing import Any
7
7
 
8
- import igraph as ig
9
8
  import pandas as pd
9
+
10
10
  from napistu import sbml_dfs_core
11
11
  from napistu import utils
12
+ from napistu.network.napistu_graph_core import NapistuGraph
13
+ from napistu.network.ng_utils import get_minimal_sources_edges
12
14
  from napistu.constants import CPR_PATH_REQ_VARS
13
15
  from napistu.constants import MINI_SBO_NAME_TO_POLARITY
14
16
  from napistu.constants import MINI_SBO_TO_NAME
15
- from napistu.network import net_utils
17
+ from napistu.constants import SBML_DFS
18
+ from napistu.network.constants import NET_POLARITY
19
+ from napistu.network.constants import NAPISTU_GRAPH_EDGES
20
+ from napistu.network.constants import VALID_LINK_POLARITIES
16
21
 
17
22
  logger = logging.getLogger(__name__)
18
23
 
19
24
 
20
25
  def find_shortest_reaction_paths(
21
- cpr_graph: ig.Graph,
26
+ napistu_graph: NapistuGraph,
22
27
  sbml_dfs: sbml_dfs_core.SBML_dfs,
23
28
  origin: str,
24
29
  dest: str | list,
@@ -31,8 +36,8 @@ def find_shortest_reaction_paths(
31
36
 
32
37
  Parameters
33
38
  ----------
34
- cpr_graph : igraph.Graph
35
- A bipartite network connecting molecular species and reactions
39
+ napistu_graph : NapistuGraph
40
+ A network of molecular species and reactions (subclass of igraph.Graph)
36
41
  sbml_dfs : sbml_dfs_core.SBML_dfs
37
42
  A model formed by aggregating pathways
38
43
  origin : str
@@ -63,7 +68,7 @@ def find_shortest_reaction_paths(
63
68
  # igraph throws warnings for each pair of unconnected species
64
69
  warnings.simplefilter("ignore")
65
70
 
66
- shortest_paths = cpr_graph.get_all_shortest_paths(
71
+ shortest_paths = napistu_graph.get_all_shortest_paths(
67
72
  origin, to=dest, weights=weight_var
68
73
  )
69
74
 
@@ -75,24 +80,34 @@ def find_shortest_reaction_paths(
75
80
  # igraph throws warnings for each pair of unconnected species
76
81
  warnings.simplefilter("ignore")
77
82
 
78
- shortest_paths = cpr_graph.get_all_shortest_paths(
83
+ shortest_paths = napistu_graph.get_all_shortest_paths(
79
84
  origin, to=dest, weights=weight_var
80
85
  )
81
86
 
82
87
  # summarize the graph which is being evaluated
83
- cpr_graph_names = [v.attributes()["name"] for v in cpr_graph.vs]
88
+ napistu_graph_names = [v.attributes()["name"] for v in napistu_graph.vs]
84
89
 
85
- cpr_graph_edges = pd.DataFrame(
90
+ napistu_graph_edges = pd.DataFrame(
86
91
  {
87
- "from": cpr_graph.es.get_attribute_values("from"),
88
- "to": cpr_graph.es.get_attribute_values("to"),
89
- "weights": cpr_graph.es.get_attribute_values(weight_var),
90
- "sbo_term": cpr_graph.es.get_attribute_values("sbo_term"),
91
- "direction": cpr_graph.es.get_attribute_values("direction"),
92
+ NAPISTU_GRAPH_EDGES.FROM: napistu_graph.es.get_attribute_values(
93
+ NAPISTU_GRAPH_EDGES.FROM
94
+ ),
95
+ NAPISTU_GRAPH_EDGES.TO: napistu_graph.es.get_attribute_values(
96
+ NAPISTU_GRAPH_EDGES.TO
97
+ ),
98
+ NAPISTU_GRAPH_EDGES.WEIGHTS: napistu_graph.es.get_attribute_values(
99
+ weight_var
100
+ ),
101
+ NAPISTU_GRAPH_EDGES.SBO_TERM: napistu_graph.es.get_attribute_values(
102
+ NAPISTU_GRAPH_EDGES.SBO_TERM
103
+ ),
104
+ NAPISTU_GRAPH_EDGES.DIRECTION: napistu_graph.es.get_attribute_values(
105
+ NAPISTU_GRAPH_EDGES.DIRECTION
106
+ ),
92
107
  }
93
108
  )
94
109
 
95
- directed = cpr_graph.is_directed()
110
+ directed = napistu_graph.is_directed()
96
111
 
97
112
  # format shortest paths
98
113
  # summaries of nodes
@@ -103,7 +118,7 @@ def find_shortest_reaction_paths(
103
118
  entry = 0
104
119
  for path in shortest_paths:
105
120
  path_df = (
106
- pd.DataFrame({"node": [cpr_graph_names[x] for x in path]})
121
+ pd.DataFrame({"node": [napistu_graph_names[x] for x in path]})
107
122
  .reset_index()
108
123
  .rename(columns={"index": "step"})
109
124
  .assign(path=entry)
@@ -119,19 +134,19 @@ def find_shortest_reaction_paths(
119
134
 
120
135
  if directed:
121
136
  path_edges = path_edges.merge(
122
- cpr_graph_edges,
137
+ napistu_graph_edges,
123
138
  left_on=["from", "to"],
124
139
  right_on=["from", "to"],
125
140
  )
126
141
 
127
- path_edges["link_polarity"] = (
142
+ path_edges[NET_POLARITY.LINK_POLARITY] = (
128
143
  path_edges["sbo_term"]
129
144
  .map(MINI_SBO_TO_NAME)
130
145
  .map(MINI_SBO_NAME_TO_POLARITY)
131
146
  )
132
147
  # is the edge predicted to be activating, inhibiting or ambiguous?
133
- path_edges["net_polarity"] = _calculate_net_polarity(
134
- path_edges["link_polarity"]
148
+ path_edges[NET_POLARITY.NET_POLARITY] = _calculate_net_polarity(
149
+ path_edges[NET_POLARITY.LINK_POLARITY]
135
150
  )
136
151
 
137
152
  else:
@@ -148,7 +163,7 @@ def find_shortest_reaction_paths(
148
163
  ]
149
164
  )
150
165
  .merge(
151
- cpr_graph_edges,
166
+ napistu_graph_edges,
152
167
  left_on=["from", "to"],
153
168
  right_on=["from", "to"],
154
169
  # keep at most 1 entry per step
@@ -221,10 +236,10 @@ def find_shortest_reaction_paths(
221
236
 
222
237
 
223
238
  def find_all_shortest_reaction_paths(
224
- cpr_graph: ig.Graph,
239
+ napistu_graph: NapistuGraph,
225
240
  sbml_dfs: sbml_dfs_core.SBML_dfs,
226
241
  target_species_paths: pd.DataFrame,
227
- weight_var: str = "weights",
242
+ weight_var: str = NAPISTU_GRAPH_EDGES.WEIGHTS,
228
243
  precomputed_distances: pd.DataFrame | None = None,
229
244
  ):
230
245
  """
@@ -234,8 +249,8 @@ def find_all_shortest_reaction_paths(
234
249
 
235
250
  Parameters
236
251
  ----------
237
- cpr_graph : igraph.Graph
238
- A bipartite network connecting molecular species and reactions
252
+ napistu_graph : NapistuGraph
253
+ A network interconnecting molecular species and reactions (subclass of igraph.Graph)
239
254
  sbml_dfs : SBML_dfs
240
255
  A model formed by aggregating pathways
241
256
  target_species_paths : pd.DataFrame
@@ -273,7 +288,7 @@ def find_all_shortest_reaction_paths(
273
288
  one_search = target_species_paths.iloc[i]
274
289
 
275
290
  paths = find_shortest_reaction_paths(
276
- cpr_graph,
291
+ napistu_graph,
277
292
  sbml_dfs,
278
293
  origin=one_search["sc_id_origin"],
279
294
  dest=one_search["sc_id_dest"],
@@ -310,9 +325,7 @@ def find_all_shortest_reaction_paths(
310
325
  ).reset_index()
311
326
 
312
327
  # at a minimal set of pathway sources to organize reactions
313
- edge_sources = net_utils.get_minimal_sources_edges(
314
- all_shortest_reaction_paths_df, sbml_dfs
315
- )
328
+ edge_sources = get_minimal_sources_edges(all_shortest_reaction_paths_df, sbml_dfs)
316
329
 
317
330
  # create a new small network of shortest paths
318
331
  unique_path_nodes = (
@@ -322,8 +335,8 @@ def find_all_shortest_reaction_paths(
322
335
  .drop(columns=["index", "step", "path", "origin", "dest"])
323
336
  )
324
337
 
325
- directed = cpr_graph.is_directed()
326
- paths_graph = ig.Graph.DictList(
338
+ directed = napistu_graph.is_directed()
339
+ paths_graph = NapistuGraph.DictList(
327
340
  vertices=unique_path_nodes.to_dict("records"),
328
341
  edges=all_shortest_reaction_path_edges_df.to_dict("records"),
329
342
  directed=directed,
@@ -339,16 +352,16 @@ def find_all_shortest_reaction_paths(
339
352
  )
340
353
 
341
354
 
342
- def plot_shortest_paths(paths_graph: ig.Graph) -> ig.plot:
355
+ def plot_shortest_paths(napistu_graph: NapistuGraph) -> NapistuGraph.plot:
343
356
  """Plot a shortest paths graph."""
344
357
 
345
- if "label" not in paths_graph.vs.attributes():
358
+ if "label" not in napistu_graph.vs.attributes():
346
359
  logger.warning(
347
360
  "label was not defined as a vertex attribute so paths will not be colored"
348
361
  )
349
- paths_graph.vs.set_attribute_values("label", "")
362
+ napistu_graph.vs.set_attribute_values("label", "")
350
363
 
351
- paths_graph_layout = paths_graph.layout("kk")
364
+ paths_graph_layout = napistu_graph.layout("kk")
352
365
 
353
366
  color_dict = {"reaction": "dodgerblue", "species": "firebrick"}
354
367
 
@@ -356,19 +369,21 @@ def plot_shortest_paths(paths_graph: ig.Graph) -> ig.plot:
356
369
  visual_style["background"] = "black"
357
370
  visual_style["vertex_size"] = 10
358
371
  visual_style["vertex_label"] = [
359
- net_utils.safe_fill(x) for x in paths_graph.vs["label"]
372
+ utils.safe_fill(x) for x in napistu_graph.vs["label"]
360
373
  ]
361
374
  visual_style["vertex_label_color"] = "white"
362
375
  visual_style["vertex_label_size"] = 8
363
376
  visual_style["vertex_label_angle"] = 90
364
- visual_style["vertex_color"] = [color_dict[x] for x in paths_graph.vs["node_type"]]
365
- visual_style["edge_width"] = [math.sqrt(x) for x in paths_graph.es["weights"]]
377
+ visual_style["vertex_color"] = [
378
+ color_dict[x] for x in napistu_graph.vs["node_type"]
379
+ ]
380
+ visual_style["edge_width"] = [math.sqrt(x) for x in napistu_graph.es["weights"]]
366
381
  visual_style["edge_color"] = "dimgray"
367
382
  visual_style["layout"] = paths_graph_layout
368
383
  visual_style["bbox"] = (2000, 2000)
369
384
  visual_style["margin"] = 50
370
385
 
371
- return ig.plot(paths_graph, **visual_style)
386
+ return napistu_graph.plot(**visual_style)
372
387
 
373
388
 
374
389
  def _filter_paths_by_precomputed_distances(
@@ -401,27 +416,26 @@ def _calculate_net_polarity(link_polarity_series: pd.Series) -> str:
401
416
  """Determine whether a path implies activation, inhbition, or an ambiguous regulatory relationship."""
402
417
 
403
418
  assert isinstance(link_polarity_series, pd.Series)
404
- assert link_polarity_series.name == "link_polarity"
419
+ assert link_polarity_series.name == NET_POLARITY.LINK_POLARITY
405
420
 
406
421
  # loop through loop polarity and
407
422
  # determine the cumulative polarity account for inhibition steps which flip polarity
408
423
  # and ambiguous steps which will add an ambiguous label to the net result
409
424
 
410
425
  observed_polarities = set(link_polarity_series.tolist()) # type: set[str]
411
- valid_polarities = {"activation", "inhibition", "ambiguous"} # type: set[str]
412
426
  invalid_polarities = observed_polarities.difference(
413
- valid_polarities
427
+ VALID_LINK_POLARITIES
414
428
  ) # type: set[str]
415
429
  if len(invalid_polarities) > 0:
416
430
  raise ValueError(
417
431
  f"Some edge polarities were invalid: {', '.join(invalid_polarities)}. "
418
- f"Valid polarities are {', '.join(valid_polarities)}."
432
+ f"Valid polarities are {', '.join(VALID_LINK_POLARITIES)}."
419
433
  )
420
434
 
421
435
  # catch fully ambiguous case
422
436
  if link_polarity_series.eq("ambiguous").all():
423
437
  running_polarity = [
424
- "ambiguous" for i in range(link_polarity_series.shape[0])
438
+ NET_POLARITY.AMBIGUOUS for i in range(link_polarity_series.shape[0])
425
439
  ] # type : list[str]
426
440
  return running_polarity
427
441
 
@@ -430,16 +444,16 @@ def _calculate_net_polarity(link_polarity_series: pd.Series) -> str:
430
444
  ambig_prefix = ""
431
445
 
432
446
  for polarity in link_polarity_series:
433
- if polarity == "ambiguous":
447
+ if polarity == NET_POLARITY.AMBIGUOUS:
434
448
  # once a polarity becomes ambiguous it is stuck
435
449
  ambig_prefix = "ambiguous "
436
- if polarity == "inhibition":
450
+ if polarity == NET_POLARITY.INHIBITION:
437
451
  current_polarity = current_polarity * -1
438
452
 
439
453
  if current_polarity == 1:
440
- running_polarity.append(ambig_prefix + "activation")
454
+ running_polarity.append(ambig_prefix + NET_POLARITY.ACTIVATION)
441
455
  else:
442
- running_polarity.append(ambig_prefix + "inhibition")
456
+ running_polarity.append(ambig_prefix + NET_POLARITY.INHIBITION)
443
457
 
444
458
  return running_polarity
445
459
 
@@ -480,18 +494,20 @@ def _label_path_reactions(sbml_dfs: sbml_dfs_core.SBML_dfs, paths_df: pd.DataFra
480
494
  ]
481
495
  )
482
496
  .to_frame()
483
- .join(sbml_dfs.reactions["r_name"])
497
+ .join(sbml_dfs.reactions[SBML_DFS.R_NAME])
484
498
  )
485
499
 
486
500
  labelled_reactions = (
487
501
  reaction_paths.merge(reaction_info, left_on="node", right_index=True)
488
- .rename(columns={"r_name": "label"})
502
+ .rename(columns={SBML_DFS.R_NAME: "label"})
489
503
  .assign(node_type="reaction")
490
504
  )
491
505
 
492
506
  # add uri urls
493
507
  labelled_reactions = labelled_reactions.merge(
494
- sbml_dfs.get_uri_urls("reactions", labelled_reactions["node"].tolist()),
508
+ sbml_dfs.get_uri_urls(
509
+ SBML_DFS.REACTIONS, labelled_reactions["node"].tolist()
510
+ ),
495
511
  left_on="node",
496
512
  right_index=True,
497
513
  how="left",
@@ -3,17 +3,17 @@ from __future__ import annotations
3
3
  import logging
4
4
  import math
5
5
 
6
- import igraph as ig
7
6
  import numpy as np
8
7
  import pandas as pd
9
8
 
10
- from napistu.network import net_utils
9
+ from napistu.network.napistu_graph_core import NapistuGraph
10
+ from napistu.network.ig_utils import validate_edge_attributes
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
14
14
 
15
15
  def precompute_distances(
16
- cpr_graph: ig.Graph,
16
+ napistu_graph: NapistuGraph,
17
17
  max_steps: int = -1,
18
18
  max_score_q: float = float(1),
19
19
  partition_size: int = int(5000),
@@ -24,8 +24,8 @@ def precompute_distances(
24
24
 
25
25
  Parameters
26
26
  ----------
27
- cpr_graph: ig.Graph
28
- An igraph network model
27
+ napistu_graph: NapistuGraph
28
+ An NapistuGraph network model (subclass of igraph.Graph)
29
29
  max_steps: int
30
30
  The maximum number of steps between pairs of species to save a distance
31
31
  max_score_q: float
@@ -60,11 +60,11 @@ def precompute_distances(
60
60
  raise ValueError(f"max_score_q must be between 0 and 1 but was {max_score_q}")
61
61
 
62
62
  # make sure weight vars exist
63
- net_utils._validate_edge_attributes(cpr_graph, weights_vars)
63
+ validate_edge_attributes(napistu_graph, weights_vars)
64
64
 
65
65
  # assign molecular species to partitions
66
66
  vs_to_partition = pd.DataFrame(
67
- {"sc_id": cpr_graph.vs["name"], "node_type": cpr_graph.vs["node_type"]}
67
+ {"sc_id": napistu_graph.vs["name"], "node_type": napistu_graph.vs["node_type"]}
68
68
  ).query("node_type == 'species'")
69
69
 
70
70
  n_paritions = math.ceil(vs_to_partition.shape[0] / partition_size)
@@ -79,7 +79,7 @@ def precompute_distances(
79
79
  pd.concat(
80
80
  [
81
81
  _calculate_distances_subset(
82
- cpr_graph,
82
+ napistu_graph,
83
83
  vs_to_partition,
84
84
  vs_to_partition.loc[uq_part],
85
85
  weights_vars=weights_vars,
@@ -103,7 +103,7 @@ def precompute_distances(
103
103
 
104
104
 
105
105
  def _calculate_distances_subset(
106
- cpr_graph: ig.Graph,
106
+ napistu_graph: NapistuGraph,
107
107
  vs_to_partition: pd.DataFrame,
108
108
  one_partition: pd.DataFrame,
109
109
  weights_vars: list[str] = ["weights", "upstream_weights"],
@@ -113,7 +113,7 @@ def _calculate_distances_subset(
113
113
  d_steps = (
114
114
  pd.DataFrame(
115
115
  np.array(
116
- cpr_graph.distances(
116
+ napistu_graph.distances(
117
117
  source=one_partition["sc_id"], target=vs_to_partition["sc_id"]
118
118
  )
119
119
  ),
@@ -131,7 +131,7 @@ def _calculate_distances_subset(
131
131
  d_weights_subset = (
132
132
  pd.DataFrame(
133
133
  np.array(
134
- cpr_graph.distances(
134
+ napistu_graph.distances(
135
135
  source=one_partition["sc_id"],
136
136
  target=vs_to_partition["sc_id"],
137
137
  weights=weight_type,
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib.metadata import PackageNotFoundError
4
+ from importlib.metadata import version
5
+
6
+ try:
7
+ __version__ = version("napistu")
8
+ except PackageNotFoundError:
9
+ # package is not installed
10
+ pass
@@ -0,0 +1,129 @@
1
+ import logging
2
+
3
+ from typing import Dict
4
+ from types import SimpleNamespace
5
+ from napistu.constants import ONTOLOGIES
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Valid ontologies that can be interconverted
10
+ INTERCONVERTIBLE_GENIC_ONTOLOGIES = {
11
+ ONTOLOGIES.ENSEMBL_GENE,
12
+ ONTOLOGIES.ENSEMBL_TRANSCRIPT,
13
+ ONTOLOGIES.ENSEMBL_PROTEIN,
14
+ ONTOLOGIES.NCBI_ENTREZ_GENE,
15
+ ONTOLOGIES.UNIPROT,
16
+ ONTOLOGIES.GENE_NAME,
17
+ ONTOLOGIES.SYMBOL,
18
+ }
19
+
20
+ GENODEXITO_DEFS = SimpleNamespace(
21
+ BIOCONDUCTOR="bioconductor",
22
+ PYTHON="python",
23
+ )
24
+ GENODEXITO_MAPPERS = {GENODEXITO_DEFS.BIOCONDUCTOR, GENODEXITO_DEFS.PYTHON}
25
+
26
+ # Mapping from our ontology names to MyGene field names
27
+ MYGENE_DEFS = SimpleNamespace(
28
+ ENSEMBL_GENE="ensembl.gene",
29
+ ENSEMBL_TRANSCRIPT="ensembl.transcript",
30
+ ENSEMBL_PROTEIN="ensembl.protein",
31
+ UNIPROT="uniprot.Swiss-Prot",
32
+ SYMBOL="symbol",
33
+ GENE_NAME="name",
34
+ NCBI_ENTREZ_GENE="entrezgene",
35
+ )
36
+
37
+ NAPISTU_TO_MYGENE_FIELDS = {
38
+ ONTOLOGIES.ENSEMBL_GENE: MYGENE_DEFS.ENSEMBL_GENE,
39
+ ONTOLOGIES.ENSEMBL_TRANSCRIPT: MYGENE_DEFS.ENSEMBL_TRANSCRIPT,
40
+ ONTOLOGIES.ENSEMBL_PROTEIN: MYGENE_DEFS.ENSEMBL_PROTEIN,
41
+ ONTOLOGIES.UNIPROT: MYGENE_DEFS.UNIPROT,
42
+ ONTOLOGIES.SYMBOL: MYGENE_DEFS.SYMBOL,
43
+ ONTOLOGIES.GENE_NAME: MYGENE_DEFS.GENE_NAME,
44
+ ONTOLOGIES.NCBI_ENTREZ_GENE: MYGENE_DEFS.NCBI_ENTREZ_GENE,
45
+ }
46
+
47
+ NAPISTU_FROM_MYGENE_FIELDS = {
48
+ MYGENE_DEFS.ENSEMBL_GENE: ONTOLOGIES.ENSEMBL_GENE,
49
+ MYGENE_DEFS.ENSEMBL_TRANSCRIPT: ONTOLOGIES.ENSEMBL_TRANSCRIPT,
50
+ MYGENE_DEFS.ENSEMBL_PROTEIN: ONTOLOGIES.ENSEMBL_PROTEIN,
51
+ MYGENE_DEFS.UNIPROT: ONTOLOGIES.UNIPROT,
52
+ MYGENE_DEFS.SYMBOL: ONTOLOGIES.SYMBOL,
53
+ MYGENE_DEFS.GENE_NAME: ONTOLOGIES.GENE_NAME,
54
+ MYGENE_DEFS.NCBI_ENTREZ_GENE: ONTOLOGIES.NCBI_ENTREZ_GENE,
55
+ }
56
+
57
+
58
+ SPECIES_TO_TAXID: Dict[str, int] = {
59
+ # MyGene.info supported common species (9 species with common names)
60
+ "Homo sapiens": 9606, # human
61
+ "Mus musculus": 10090, # mouse
62
+ "Rattus norvegicus": 10116, # rat
63
+ "Drosophila melanogaster": 7227, # fruitfly
64
+ "Caenorhabditis elegans": 6239, # nematode
65
+ "Danio rerio": 7955, # zebrafish
66
+ "Arabidopsis thaliana": 3702, # thale-cress
67
+ "Xenopus tropicalis": 8364, # frog
68
+ "Xenopus laevis": 8355, # frog (alternative species)
69
+ "Sus scrofa": 9823, # pig
70
+ # Additional commonly used model organisms
71
+ "Saccharomyces cerevisiae": 4932, # yeast
72
+ "Schizosaccharomyces pombe": 4896, # fission yeast
73
+ "Gallus gallus": 9031, # chicken
74
+ "Bos taurus": 9913, # cow/cattle
75
+ "Canis familiaris": 9615, # dog
76
+ "Macaca mulatta": 9544, # rhesus monkey/macaque
77
+ "Pan troglodytes": 9598, # chimpanzee
78
+ "Escherichia coli": 511145, # E. coli (K-12 MG1655)
79
+ # Additional species that might be encountered
80
+ "Anopheles gambiae": 7165, # malaria mosquito
81
+ "Oryza sativa": 4530, # rice
82
+ "Neurospora crassa": 5141, # bread mold
83
+ "Kluyveromyces lactis": 28985, # yeast species
84
+ "Magnaporthe oryzae": 318829, # rice blast fungus
85
+ "Eremothecium gossypii": 33169, # cotton fungus
86
+ }
87
+
88
+ MYGENE_QUERY_DEFS = SimpleNamespace(
89
+ BIOLOGICAL_REGION="type_of_gene:biological-region",
90
+ NCRNA="type_of_gene:ncrna",
91
+ PROTEIN_CODING="type_of_gene:protein-coding",
92
+ PSEUDO="type_of_gene:pseudo",
93
+ SNORNA="type_of_gene:snorna",
94
+ UNKNOWN="type_of_gene:unknown",
95
+ OTHER="type_of_gene:other",
96
+ RRNA="type_of_gene:rrna",
97
+ TRNA="type_of_gene:trna",
98
+ SNRNA="type_of_gene:snrna",
99
+ )
100
+
101
+ MYGENE_QUERY_DEFS_LIST = [
102
+ MYGENE_QUERY_DEFS.BIOLOGICAL_REGION,
103
+ MYGENE_QUERY_DEFS.NCRNA,
104
+ MYGENE_QUERY_DEFS.PROTEIN_CODING,
105
+ MYGENE_QUERY_DEFS.PSEUDO,
106
+ MYGENE_QUERY_DEFS.SNORNA,
107
+ MYGENE_QUERY_DEFS.UNKNOWN,
108
+ MYGENE_QUERY_DEFS.OTHER,
109
+ MYGENE_QUERY_DEFS.RRNA,
110
+ MYGENE_QUERY_DEFS.TRNA,
111
+ MYGENE_QUERY_DEFS.SNRNA,
112
+ ]
113
+
114
+ MYGENE_DEFAULT_QUERIES = [MYGENE_QUERY_DEFS.PROTEIN_CODING, MYGENE_QUERY_DEFS.NCRNA]
115
+
116
+ # bioc ontologies used for linking systematic identifiers
117
+ # (entrez is not part of this list because it forms the gene index)
118
+ PROTEIN_ONTOLOGIES = [ONTOLOGIES.UNIPROT, ONTOLOGIES.ENSEMBL_PROTEIN]
119
+ GENE_ONTOLOGIES = [
120
+ ONTOLOGIES.NCBI_ENTREZ_GENE,
121
+ ONTOLOGIES.ENSEMBL_GENE,
122
+ ONTOLOGIES.ENSEMBL_TRANSCRIPT,
123
+ ]
124
+ NAME_ONTOLOGIES = {
125
+ ONTOLOGIES.GENE_NAME: 0,
126
+ ONTOLOGIES.SYMBOL: 1,
127
+ ONTOLOGIES.UNIPROT: 2,
128
+ ONTOLOGIES.ENSEMBL_PROTEIN: 3,
129
+ }