napistu 0.2.5.dev7__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +126 -96
- napistu/constants.py +35 -41
- napistu/context/__init__.py +10 -0
- napistu/context/discretize.py +462 -0
- napistu/context/filtering.py +387 -0
- napistu/gcs/__init__.py +1 -1
- napistu/identifiers.py +74 -15
- napistu/indices.py +68 -0
- napistu/ingestion/__init__.py +1 -1
- napistu/ingestion/bigg.py +47 -62
- napistu/ingestion/constants.py +18 -133
- napistu/ingestion/gtex.py +113 -0
- napistu/ingestion/hpa.py +147 -0
- napistu/ingestion/sbml.py +0 -97
- napistu/ingestion/string.py +2 -2
- napistu/matching/__init__.py +10 -0
- napistu/matching/constants.py +18 -0
- napistu/matching/interactions.py +518 -0
- napistu/matching/mount.py +529 -0
- napistu/matching/species.py +510 -0
- napistu/mcp/__init__.py +7 -4
- napistu/mcp/__main__.py +128 -72
- napistu/mcp/client.py +16 -25
- napistu/mcp/codebase.py +201 -145
- napistu/mcp/component_base.py +170 -0
- napistu/mcp/config.py +223 -0
- napistu/mcp/constants.py +45 -2
- napistu/mcp/documentation.py +253 -136
- napistu/mcp/documentation_utils.py +13 -48
- napistu/mcp/execution.py +372 -305
- napistu/mcp/health.py +47 -65
- napistu/mcp/profiles.py +10 -6
- napistu/mcp/server.py +161 -80
- napistu/mcp/tutorials.py +139 -87
- napistu/modify/__init__.py +1 -1
- napistu/modify/gaps.py +1 -1
- napistu/network/__init__.py +1 -1
- napistu/network/constants.py +101 -34
- napistu/network/data_handling.py +388 -0
- napistu/network/ig_utils.py +351 -0
- napistu/network/napistu_graph_core.py +354 -0
- napistu/network/neighborhoods.py +40 -40
- napistu/network/net_create.py +373 -309
- napistu/network/net_propagation.py +47 -19
- napistu/network/{net_utils.py → ng_utils.py} +124 -272
- napistu/network/paths.py +67 -51
- napistu/network/precompute.py +11 -11
- napistu/ontologies/__init__.py +10 -0
- napistu/ontologies/constants.py +129 -0
- napistu/ontologies/dogma.py +243 -0
- napistu/ontologies/genodexito.py +649 -0
- napistu/ontologies/mygene.py +369 -0
- napistu/ontologies/renaming.py +198 -0
- napistu/rpy2/__init__.py +229 -86
- napistu/rpy2/callr.py +47 -77
- napistu/rpy2/constants.py +24 -23
- napistu/rpy2/rids.py +61 -648
- napistu/sbml_dfs_core.py +587 -222
- napistu/scverse/__init__.py +15 -0
- napistu/scverse/constants.py +28 -0
- napistu/scverse/loading.py +727 -0
- napistu/utils.py +118 -10
- {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/METADATA +8 -3
- napistu-0.3.1.dist-info/RECORD +133 -0
- tests/conftest.py +22 -0
- tests/test_context_discretize.py +56 -0
- tests/test_context_filtering.py +267 -0
- tests/test_identifiers.py +100 -0
- tests/test_indices.py +65 -0
- tests/{test_edgelist.py → test_ingestion_napistu_edgelist.py} +2 -2
- tests/test_matching_interactions.py +108 -0
- tests/test_matching_mount.py +305 -0
- tests/test_matching_species.py +394 -0
- tests/test_mcp_config.py +193 -0
- tests/test_mcp_documentation_utils.py +12 -3
- tests/test_mcp_server.py +156 -19
- tests/test_network_data_handling.py +397 -0
- tests/test_network_ig_utils.py +23 -0
- tests/test_network_neighborhoods.py +19 -0
- tests/test_network_net_create.py +459 -0
- tests/test_network_ng_utils.py +30 -0
- tests/test_network_paths.py +56 -0
- tests/{test_precomputed_distances.py → test_network_precompute.py} +8 -6
- tests/test_ontologies_genodexito.py +58 -0
- tests/test_ontologies_mygene.py +39 -0
- tests/test_ontologies_renaming.py +110 -0
- tests/test_rpy2_callr.py +79 -0
- tests/test_rpy2_init.py +151 -0
- tests/test_sbml.py +0 -31
- tests/test_sbml_dfs_core.py +134 -10
- tests/test_scverse_loading.py +778 -0
- tests/test_set_coverage.py +2 -2
- tests/test_utils.py +121 -1
- napistu/mechanism_matching.py +0 -1353
- napistu/rpy2/netcontextr.py +0 -467
- napistu-0.2.5.dev7.dist-info/RECORD +0 -98
- tests/test_igraph.py +0 -367
- tests/test_mechanism_matching.py +0 -784
- tests/test_net_utils.py +0 -149
- tests/test_netcontextr.py +0 -105
- tests/test_rpy2.py +0 -61
- /napistu/ingestion/{cpr_edgelist.py → napistu_edgelist.py} +0 -0
- {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/WHEEL +0 -0
- {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/entry_points.txt +0 -0
- {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/top_level.txt +0 -0
- /tests/{test_obo.py → test_ingestion_obo.py} +0 -0
napistu/network/paths.py
CHANGED
@@ -5,20 +5,25 @@ import math
|
|
5
5
|
import warnings
|
6
6
|
from typing import Any
|
7
7
|
|
8
|
-
import igraph as ig
|
9
8
|
import pandas as pd
|
9
|
+
|
10
10
|
from napistu import sbml_dfs_core
|
11
11
|
from napistu import utils
|
12
|
+
from napistu.network.napistu_graph_core import NapistuGraph
|
13
|
+
from napistu.network.ng_utils import get_minimal_sources_edges
|
12
14
|
from napistu.constants import CPR_PATH_REQ_VARS
|
13
15
|
from napistu.constants import MINI_SBO_NAME_TO_POLARITY
|
14
16
|
from napistu.constants import MINI_SBO_TO_NAME
|
15
|
-
from napistu.
|
17
|
+
from napistu.constants import SBML_DFS
|
18
|
+
from napistu.network.constants import NET_POLARITY
|
19
|
+
from napistu.network.constants import NAPISTU_GRAPH_EDGES
|
20
|
+
from napistu.network.constants import VALID_LINK_POLARITIES
|
16
21
|
|
17
22
|
logger = logging.getLogger(__name__)
|
18
23
|
|
19
24
|
|
20
25
|
def find_shortest_reaction_paths(
|
21
|
-
|
26
|
+
napistu_graph: NapistuGraph,
|
22
27
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
23
28
|
origin: str,
|
24
29
|
dest: str | list,
|
@@ -31,8 +36,8 @@ def find_shortest_reaction_paths(
|
|
31
36
|
|
32
37
|
Parameters
|
33
38
|
----------
|
34
|
-
|
35
|
-
A
|
39
|
+
napistu_graph : NapistuGraph
|
40
|
+
A network of molecular species and reactions (subclass of igraph.Graph)
|
36
41
|
sbml_dfs : sbml_dfs_core.SBML_dfs
|
37
42
|
A model formed by aggregating pathways
|
38
43
|
origin : str
|
@@ -63,7 +68,7 @@ def find_shortest_reaction_paths(
|
|
63
68
|
# igraph throws warnings for each pair of unconnected species
|
64
69
|
warnings.simplefilter("ignore")
|
65
70
|
|
66
|
-
shortest_paths =
|
71
|
+
shortest_paths = napistu_graph.get_all_shortest_paths(
|
67
72
|
origin, to=dest, weights=weight_var
|
68
73
|
)
|
69
74
|
|
@@ -75,24 +80,34 @@ def find_shortest_reaction_paths(
|
|
75
80
|
# igraph throws warnings for each pair of unconnected species
|
76
81
|
warnings.simplefilter("ignore")
|
77
82
|
|
78
|
-
shortest_paths =
|
83
|
+
shortest_paths = napistu_graph.get_all_shortest_paths(
|
79
84
|
origin, to=dest, weights=weight_var
|
80
85
|
)
|
81
86
|
|
82
87
|
# summarize the graph which is being evaluated
|
83
|
-
|
88
|
+
napistu_graph_names = [v.attributes()["name"] for v in napistu_graph.vs]
|
84
89
|
|
85
|
-
|
90
|
+
napistu_graph_edges = pd.DataFrame(
|
86
91
|
{
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
+
NAPISTU_GRAPH_EDGES.FROM: napistu_graph.es.get_attribute_values(
|
93
|
+
NAPISTU_GRAPH_EDGES.FROM
|
94
|
+
),
|
95
|
+
NAPISTU_GRAPH_EDGES.TO: napistu_graph.es.get_attribute_values(
|
96
|
+
NAPISTU_GRAPH_EDGES.TO
|
97
|
+
),
|
98
|
+
NAPISTU_GRAPH_EDGES.WEIGHTS: napistu_graph.es.get_attribute_values(
|
99
|
+
weight_var
|
100
|
+
),
|
101
|
+
NAPISTU_GRAPH_EDGES.SBO_TERM: napistu_graph.es.get_attribute_values(
|
102
|
+
NAPISTU_GRAPH_EDGES.SBO_TERM
|
103
|
+
),
|
104
|
+
NAPISTU_GRAPH_EDGES.DIRECTION: napistu_graph.es.get_attribute_values(
|
105
|
+
NAPISTU_GRAPH_EDGES.DIRECTION
|
106
|
+
),
|
92
107
|
}
|
93
108
|
)
|
94
109
|
|
95
|
-
directed =
|
110
|
+
directed = napistu_graph.is_directed()
|
96
111
|
|
97
112
|
# format shortest paths
|
98
113
|
# summaries of nodes
|
@@ -103,7 +118,7 @@ def find_shortest_reaction_paths(
|
|
103
118
|
entry = 0
|
104
119
|
for path in shortest_paths:
|
105
120
|
path_df = (
|
106
|
-
pd.DataFrame({"node": [
|
121
|
+
pd.DataFrame({"node": [napistu_graph_names[x] for x in path]})
|
107
122
|
.reset_index()
|
108
123
|
.rename(columns={"index": "step"})
|
109
124
|
.assign(path=entry)
|
@@ -119,19 +134,19 @@ def find_shortest_reaction_paths(
|
|
119
134
|
|
120
135
|
if directed:
|
121
136
|
path_edges = path_edges.merge(
|
122
|
-
|
137
|
+
napistu_graph_edges,
|
123
138
|
left_on=["from", "to"],
|
124
139
|
right_on=["from", "to"],
|
125
140
|
)
|
126
141
|
|
127
|
-
path_edges[
|
142
|
+
path_edges[NET_POLARITY.LINK_POLARITY] = (
|
128
143
|
path_edges["sbo_term"]
|
129
144
|
.map(MINI_SBO_TO_NAME)
|
130
145
|
.map(MINI_SBO_NAME_TO_POLARITY)
|
131
146
|
)
|
132
147
|
# is the edge predicted to be activating, inhibiting or ambiguous?
|
133
|
-
path_edges[
|
134
|
-
path_edges[
|
148
|
+
path_edges[NET_POLARITY.NET_POLARITY] = _calculate_net_polarity(
|
149
|
+
path_edges[NET_POLARITY.LINK_POLARITY]
|
135
150
|
)
|
136
151
|
|
137
152
|
else:
|
@@ -148,7 +163,7 @@ def find_shortest_reaction_paths(
|
|
148
163
|
]
|
149
164
|
)
|
150
165
|
.merge(
|
151
|
-
|
166
|
+
napistu_graph_edges,
|
152
167
|
left_on=["from", "to"],
|
153
168
|
right_on=["from", "to"],
|
154
169
|
# keep at most 1 entry per step
|
@@ -221,10 +236,10 @@ def find_shortest_reaction_paths(
|
|
221
236
|
|
222
237
|
|
223
238
|
def find_all_shortest_reaction_paths(
|
224
|
-
|
239
|
+
napistu_graph: NapistuGraph,
|
225
240
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
226
241
|
target_species_paths: pd.DataFrame,
|
227
|
-
weight_var: str =
|
242
|
+
weight_var: str = NAPISTU_GRAPH_EDGES.WEIGHTS,
|
228
243
|
precomputed_distances: pd.DataFrame | None = None,
|
229
244
|
):
|
230
245
|
"""
|
@@ -234,8 +249,8 @@ def find_all_shortest_reaction_paths(
|
|
234
249
|
|
235
250
|
Parameters
|
236
251
|
----------
|
237
|
-
|
238
|
-
A
|
252
|
+
napistu_graph : NapistuGraph
|
253
|
+
A network interconnecting molecular species and reactions (subclass of igraph.Graph)
|
239
254
|
sbml_dfs : SBML_dfs
|
240
255
|
A model formed by aggregating pathways
|
241
256
|
target_species_paths : pd.DataFrame
|
@@ -273,7 +288,7 @@ def find_all_shortest_reaction_paths(
|
|
273
288
|
one_search = target_species_paths.iloc[i]
|
274
289
|
|
275
290
|
paths = find_shortest_reaction_paths(
|
276
|
-
|
291
|
+
napistu_graph,
|
277
292
|
sbml_dfs,
|
278
293
|
origin=one_search["sc_id_origin"],
|
279
294
|
dest=one_search["sc_id_dest"],
|
@@ -310,9 +325,7 @@ def find_all_shortest_reaction_paths(
|
|
310
325
|
).reset_index()
|
311
326
|
|
312
327
|
# at a minimal set of pathway sources to organize reactions
|
313
|
-
edge_sources =
|
314
|
-
all_shortest_reaction_paths_df, sbml_dfs
|
315
|
-
)
|
328
|
+
edge_sources = get_minimal_sources_edges(all_shortest_reaction_paths_df, sbml_dfs)
|
316
329
|
|
317
330
|
# create a new small network of shortest paths
|
318
331
|
unique_path_nodes = (
|
@@ -322,8 +335,8 @@ def find_all_shortest_reaction_paths(
|
|
322
335
|
.drop(columns=["index", "step", "path", "origin", "dest"])
|
323
336
|
)
|
324
337
|
|
325
|
-
directed =
|
326
|
-
paths_graph =
|
338
|
+
directed = napistu_graph.is_directed()
|
339
|
+
paths_graph = NapistuGraph.DictList(
|
327
340
|
vertices=unique_path_nodes.to_dict("records"),
|
328
341
|
edges=all_shortest_reaction_path_edges_df.to_dict("records"),
|
329
342
|
directed=directed,
|
@@ -339,16 +352,16 @@ def find_all_shortest_reaction_paths(
|
|
339
352
|
)
|
340
353
|
|
341
354
|
|
342
|
-
def plot_shortest_paths(
|
355
|
+
def plot_shortest_paths(napistu_graph: NapistuGraph) -> NapistuGraph.plot:
|
343
356
|
"""Plot a shortest paths graph."""
|
344
357
|
|
345
|
-
if "label" not in
|
358
|
+
if "label" not in napistu_graph.vs.attributes():
|
346
359
|
logger.warning(
|
347
360
|
"label was not defined as a vertex attribute so paths will not be colored"
|
348
361
|
)
|
349
|
-
|
362
|
+
napistu_graph.vs.set_attribute_values("label", "")
|
350
363
|
|
351
|
-
paths_graph_layout =
|
364
|
+
paths_graph_layout = napistu_graph.layout("kk")
|
352
365
|
|
353
366
|
color_dict = {"reaction": "dodgerblue", "species": "firebrick"}
|
354
367
|
|
@@ -356,19 +369,21 @@ def plot_shortest_paths(paths_graph: ig.Graph) -> ig.plot:
|
|
356
369
|
visual_style["background"] = "black"
|
357
370
|
visual_style["vertex_size"] = 10
|
358
371
|
visual_style["vertex_label"] = [
|
359
|
-
|
372
|
+
utils.safe_fill(x) for x in napistu_graph.vs["label"]
|
360
373
|
]
|
361
374
|
visual_style["vertex_label_color"] = "white"
|
362
375
|
visual_style["vertex_label_size"] = 8
|
363
376
|
visual_style["vertex_label_angle"] = 90
|
364
|
-
visual_style["vertex_color"] = [
|
365
|
-
|
377
|
+
visual_style["vertex_color"] = [
|
378
|
+
color_dict[x] for x in napistu_graph.vs["node_type"]
|
379
|
+
]
|
380
|
+
visual_style["edge_width"] = [math.sqrt(x) for x in napistu_graph.es["weights"]]
|
366
381
|
visual_style["edge_color"] = "dimgray"
|
367
382
|
visual_style["layout"] = paths_graph_layout
|
368
383
|
visual_style["bbox"] = (2000, 2000)
|
369
384
|
visual_style["margin"] = 50
|
370
385
|
|
371
|
-
return
|
386
|
+
return napistu_graph.plot(**visual_style)
|
372
387
|
|
373
388
|
|
374
389
|
def _filter_paths_by_precomputed_distances(
|
@@ -401,27 +416,26 @@ def _calculate_net_polarity(link_polarity_series: pd.Series) -> str:
|
|
401
416
|
"""Determine whether a path implies activation, inhbition, or an ambiguous regulatory relationship."""
|
402
417
|
|
403
418
|
assert isinstance(link_polarity_series, pd.Series)
|
404
|
-
assert link_polarity_series.name ==
|
419
|
+
assert link_polarity_series.name == NET_POLARITY.LINK_POLARITY
|
405
420
|
|
406
421
|
# loop through loop polarity and
|
407
422
|
# determine the cumulative polarity account for inhibition steps which flip polarity
|
408
423
|
# and ambiguous steps which will add an ambiguous label to the net result
|
409
424
|
|
410
425
|
observed_polarities = set(link_polarity_series.tolist()) # type: set[str]
|
411
|
-
valid_polarities = {"activation", "inhibition", "ambiguous"} # type: set[str]
|
412
426
|
invalid_polarities = observed_polarities.difference(
|
413
|
-
|
427
|
+
VALID_LINK_POLARITIES
|
414
428
|
) # type: set[str]
|
415
429
|
if len(invalid_polarities) > 0:
|
416
430
|
raise ValueError(
|
417
431
|
f"Some edge polarities were invalid: {', '.join(invalid_polarities)}. "
|
418
|
-
f"Valid polarities are {', '.join(
|
432
|
+
f"Valid polarities are {', '.join(VALID_LINK_POLARITIES)}."
|
419
433
|
)
|
420
434
|
|
421
435
|
# catch fully ambiguous case
|
422
436
|
if link_polarity_series.eq("ambiguous").all():
|
423
437
|
running_polarity = [
|
424
|
-
|
438
|
+
NET_POLARITY.AMBIGUOUS for i in range(link_polarity_series.shape[0])
|
425
439
|
] # type : list[str]
|
426
440
|
return running_polarity
|
427
441
|
|
@@ -430,16 +444,16 @@ def _calculate_net_polarity(link_polarity_series: pd.Series) -> str:
|
|
430
444
|
ambig_prefix = ""
|
431
445
|
|
432
446
|
for polarity in link_polarity_series:
|
433
|
-
if polarity ==
|
447
|
+
if polarity == NET_POLARITY.AMBIGUOUS:
|
434
448
|
# once a polarity becomes ambiguous it is stuck
|
435
449
|
ambig_prefix = "ambiguous "
|
436
|
-
if polarity ==
|
450
|
+
if polarity == NET_POLARITY.INHIBITION:
|
437
451
|
current_polarity = current_polarity * -1
|
438
452
|
|
439
453
|
if current_polarity == 1:
|
440
|
-
running_polarity.append(ambig_prefix +
|
454
|
+
running_polarity.append(ambig_prefix + NET_POLARITY.ACTIVATION)
|
441
455
|
else:
|
442
|
-
running_polarity.append(ambig_prefix +
|
456
|
+
running_polarity.append(ambig_prefix + NET_POLARITY.INHIBITION)
|
443
457
|
|
444
458
|
return running_polarity
|
445
459
|
|
@@ -480,18 +494,20 @@ def _label_path_reactions(sbml_dfs: sbml_dfs_core.SBML_dfs, paths_df: pd.DataFra
|
|
480
494
|
]
|
481
495
|
)
|
482
496
|
.to_frame()
|
483
|
-
.join(sbml_dfs.reactions[
|
497
|
+
.join(sbml_dfs.reactions[SBML_DFS.R_NAME])
|
484
498
|
)
|
485
499
|
|
486
500
|
labelled_reactions = (
|
487
501
|
reaction_paths.merge(reaction_info, left_on="node", right_index=True)
|
488
|
-
.rename(columns={
|
502
|
+
.rename(columns={SBML_DFS.R_NAME: "label"})
|
489
503
|
.assign(node_type="reaction")
|
490
504
|
)
|
491
505
|
|
492
506
|
# add uri urls
|
493
507
|
labelled_reactions = labelled_reactions.merge(
|
494
|
-
sbml_dfs.get_uri_urls(
|
508
|
+
sbml_dfs.get_uri_urls(
|
509
|
+
SBML_DFS.REACTIONS, labelled_reactions["node"].tolist()
|
510
|
+
),
|
495
511
|
left_on="node",
|
496
512
|
right_index=True,
|
497
513
|
how="left",
|
napistu/network/precompute.py
CHANGED
@@ -3,17 +3,17 @@ from __future__ import annotations
|
|
3
3
|
import logging
|
4
4
|
import math
|
5
5
|
|
6
|
-
import igraph as ig
|
7
6
|
import numpy as np
|
8
7
|
import pandas as pd
|
9
8
|
|
10
|
-
from napistu.network import
|
9
|
+
from napistu.network.napistu_graph_core import NapistuGraph
|
10
|
+
from napistu.network.ig_utils import validate_edge_attributes
|
11
11
|
|
12
12
|
logger = logging.getLogger(__name__)
|
13
13
|
|
14
14
|
|
15
15
|
def precompute_distances(
|
16
|
-
|
16
|
+
napistu_graph: NapistuGraph,
|
17
17
|
max_steps: int = -1,
|
18
18
|
max_score_q: float = float(1),
|
19
19
|
partition_size: int = int(5000),
|
@@ -24,8 +24,8 @@ def precompute_distances(
|
|
24
24
|
|
25
25
|
Parameters
|
26
26
|
----------
|
27
|
-
|
28
|
-
An
|
27
|
+
napistu_graph: NapistuGraph
|
28
|
+
An NapistuGraph network model (subclass of igraph.Graph)
|
29
29
|
max_steps: int
|
30
30
|
The maximum number of steps between pairs of species to save a distance
|
31
31
|
max_score_q: float
|
@@ -60,11 +60,11 @@ def precompute_distances(
|
|
60
60
|
raise ValueError(f"max_score_q must be between 0 and 1 but was {max_score_q}")
|
61
61
|
|
62
62
|
# make sure weight vars exist
|
63
|
-
|
63
|
+
validate_edge_attributes(napistu_graph, weights_vars)
|
64
64
|
|
65
65
|
# assign molecular species to partitions
|
66
66
|
vs_to_partition = pd.DataFrame(
|
67
|
-
{"sc_id":
|
67
|
+
{"sc_id": napistu_graph.vs["name"], "node_type": napistu_graph.vs["node_type"]}
|
68
68
|
).query("node_type == 'species'")
|
69
69
|
|
70
70
|
n_paritions = math.ceil(vs_to_partition.shape[0] / partition_size)
|
@@ -79,7 +79,7 @@ def precompute_distances(
|
|
79
79
|
pd.concat(
|
80
80
|
[
|
81
81
|
_calculate_distances_subset(
|
82
|
-
|
82
|
+
napistu_graph,
|
83
83
|
vs_to_partition,
|
84
84
|
vs_to_partition.loc[uq_part],
|
85
85
|
weights_vars=weights_vars,
|
@@ -103,7 +103,7 @@ def precompute_distances(
|
|
103
103
|
|
104
104
|
|
105
105
|
def _calculate_distances_subset(
|
106
|
-
|
106
|
+
napistu_graph: NapistuGraph,
|
107
107
|
vs_to_partition: pd.DataFrame,
|
108
108
|
one_partition: pd.DataFrame,
|
109
109
|
weights_vars: list[str] = ["weights", "upstream_weights"],
|
@@ -113,7 +113,7 @@ def _calculate_distances_subset(
|
|
113
113
|
d_steps = (
|
114
114
|
pd.DataFrame(
|
115
115
|
np.array(
|
116
|
-
|
116
|
+
napistu_graph.distances(
|
117
117
|
source=one_partition["sc_id"], target=vs_to_partition["sc_id"]
|
118
118
|
)
|
119
119
|
),
|
@@ -131,7 +131,7 @@ def _calculate_distances_subset(
|
|
131
131
|
d_weights_subset = (
|
132
132
|
pd.DataFrame(
|
133
133
|
np.array(
|
134
|
-
|
134
|
+
napistu_graph.distances(
|
135
135
|
source=one_partition["sc_id"],
|
136
136
|
target=vs_to_partition["sc_id"],
|
137
137
|
weights=weight_type,
|
@@ -0,0 +1,129 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
from typing import Dict
|
4
|
+
from types import SimpleNamespace
|
5
|
+
from napistu.constants import ONTOLOGIES
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
# Valid ontologies that can be interconverted
|
10
|
+
INTERCONVERTIBLE_GENIC_ONTOLOGIES = {
|
11
|
+
ONTOLOGIES.ENSEMBL_GENE,
|
12
|
+
ONTOLOGIES.ENSEMBL_TRANSCRIPT,
|
13
|
+
ONTOLOGIES.ENSEMBL_PROTEIN,
|
14
|
+
ONTOLOGIES.NCBI_ENTREZ_GENE,
|
15
|
+
ONTOLOGIES.UNIPROT,
|
16
|
+
ONTOLOGIES.GENE_NAME,
|
17
|
+
ONTOLOGIES.SYMBOL,
|
18
|
+
}
|
19
|
+
|
20
|
+
GENODEXITO_DEFS = SimpleNamespace(
|
21
|
+
BIOCONDUCTOR="bioconductor",
|
22
|
+
PYTHON="python",
|
23
|
+
)
|
24
|
+
GENODEXITO_MAPPERS = {GENODEXITO_DEFS.BIOCONDUCTOR, GENODEXITO_DEFS.PYTHON}
|
25
|
+
|
26
|
+
# Mapping from our ontology names to MyGene field names
|
27
|
+
MYGENE_DEFS = SimpleNamespace(
|
28
|
+
ENSEMBL_GENE="ensembl.gene",
|
29
|
+
ENSEMBL_TRANSCRIPT="ensembl.transcript",
|
30
|
+
ENSEMBL_PROTEIN="ensembl.protein",
|
31
|
+
UNIPROT="uniprot.Swiss-Prot",
|
32
|
+
SYMBOL="symbol",
|
33
|
+
GENE_NAME="name",
|
34
|
+
NCBI_ENTREZ_GENE="entrezgene",
|
35
|
+
)
|
36
|
+
|
37
|
+
NAPISTU_TO_MYGENE_FIELDS = {
|
38
|
+
ONTOLOGIES.ENSEMBL_GENE: MYGENE_DEFS.ENSEMBL_GENE,
|
39
|
+
ONTOLOGIES.ENSEMBL_TRANSCRIPT: MYGENE_DEFS.ENSEMBL_TRANSCRIPT,
|
40
|
+
ONTOLOGIES.ENSEMBL_PROTEIN: MYGENE_DEFS.ENSEMBL_PROTEIN,
|
41
|
+
ONTOLOGIES.UNIPROT: MYGENE_DEFS.UNIPROT,
|
42
|
+
ONTOLOGIES.SYMBOL: MYGENE_DEFS.SYMBOL,
|
43
|
+
ONTOLOGIES.GENE_NAME: MYGENE_DEFS.GENE_NAME,
|
44
|
+
ONTOLOGIES.NCBI_ENTREZ_GENE: MYGENE_DEFS.NCBI_ENTREZ_GENE,
|
45
|
+
}
|
46
|
+
|
47
|
+
NAPISTU_FROM_MYGENE_FIELDS = {
|
48
|
+
MYGENE_DEFS.ENSEMBL_GENE: ONTOLOGIES.ENSEMBL_GENE,
|
49
|
+
MYGENE_DEFS.ENSEMBL_TRANSCRIPT: ONTOLOGIES.ENSEMBL_TRANSCRIPT,
|
50
|
+
MYGENE_DEFS.ENSEMBL_PROTEIN: ONTOLOGIES.ENSEMBL_PROTEIN,
|
51
|
+
MYGENE_DEFS.UNIPROT: ONTOLOGIES.UNIPROT,
|
52
|
+
MYGENE_DEFS.SYMBOL: ONTOLOGIES.SYMBOL,
|
53
|
+
MYGENE_DEFS.GENE_NAME: ONTOLOGIES.GENE_NAME,
|
54
|
+
MYGENE_DEFS.NCBI_ENTREZ_GENE: ONTOLOGIES.NCBI_ENTREZ_GENE,
|
55
|
+
}
|
56
|
+
|
57
|
+
|
58
|
+
SPECIES_TO_TAXID: Dict[str, int] = {
|
59
|
+
# MyGene.info supported common species (9 species with common names)
|
60
|
+
"Homo sapiens": 9606, # human
|
61
|
+
"Mus musculus": 10090, # mouse
|
62
|
+
"Rattus norvegicus": 10116, # rat
|
63
|
+
"Drosophila melanogaster": 7227, # fruitfly
|
64
|
+
"Caenorhabditis elegans": 6239, # nematode
|
65
|
+
"Danio rerio": 7955, # zebrafish
|
66
|
+
"Arabidopsis thaliana": 3702, # thale-cress
|
67
|
+
"Xenopus tropicalis": 8364, # frog
|
68
|
+
"Xenopus laevis": 8355, # frog (alternative species)
|
69
|
+
"Sus scrofa": 9823, # pig
|
70
|
+
# Additional commonly used model organisms
|
71
|
+
"Saccharomyces cerevisiae": 4932, # yeast
|
72
|
+
"Schizosaccharomyces pombe": 4896, # fission yeast
|
73
|
+
"Gallus gallus": 9031, # chicken
|
74
|
+
"Bos taurus": 9913, # cow/cattle
|
75
|
+
"Canis familiaris": 9615, # dog
|
76
|
+
"Macaca mulatta": 9544, # rhesus monkey/macaque
|
77
|
+
"Pan troglodytes": 9598, # chimpanzee
|
78
|
+
"Escherichia coli": 511145, # E. coli (K-12 MG1655)
|
79
|
+
# Additional species that might be encountered
|
80
|
+
"Anopheles gambiae": 7165, # malaria mosquito
|
81
|
+
"Oryza sativa": 4530, # rice
|
82
|
+
"Neurospora crassa": 5141, # bread mold
|
83
|
+
"Kluyveromyces lactis": 28985, # yeast species
|
84
|
+
"Magnaporthe oryzae": 318829, # rice blast fungus
|
85
|
+
"Eremothecium gossypii": 33169, # cotton fungus
|
86
|
+
}
|
87
|
+
|
88
|
+
MYGENE_QUERY_DEFS = SimpleNamespace(
|
89
|
+
BIOLOGICAL_REGION="type_of_gene:biological-region",
|
90
|
+
NCRNA="type_of_gene:ncrna",
|
91
|
+
PROTEIN_CODING="type_of_gene:protein-coding",
|
92
|
+
PSEUDO="type_of_gene:pseudo",
|
93
|
+
SNORNA="type_of_gene:snorna",
|
94
|
+
UNKNOWN="type_of_gene:unknown",
|
95
|
+
OTHER="type_of_gene:other",
|
96
|
+
RRNA="type_of_gene:rrna",
|
97
|
+
TRNA="type_of_gene:trna",
|
98
|
+
SNRNA="type_of_gene:snrna",
|
99
|
+
)
|
100
|
+
|
101
|
+
MYGENE_QUERY_DEFS_LIST = [
|
102
|
+
MYGENE_QUERY_DEFS.BIOLOGICAL_REGION,
|
103
|
+
MYGENE_QUERY_DEFS.NCRNA,
|
104
|
+
MYGENE_QUERY_DEFS.PROTEIN_CODING,
|
105
|
+
MYGENE_QUERY_DEFS.PSEUDO,
|
106
|
+
MYGENE_QUERY_DEFS.SNORNA,
|
107
|
+
MYGENE_QUERY_DEFS.UNKNOWN,
|
108
|
+
MYGENE_QUERY_DEFS.OTHER,
|
109
|
+
MYGENE_QUERY_DEFS.RRNA,
|
110
|
+
MYGENE_QUERY_DEFS.TRNA,
|
111
|
+
MYGENE_QUERY_DEFS.SNRNA,
|
112
|
+
]
|
113
|
+
|
114
|
+
MYGENE_DEFAULT_QUERIES = [MYGENE_QUERY_DEFS.PROTEIN_CODING, MYGENE_QUERY_DEFS.NCRNA]
|
115
|
+
|
116
|
+
# bioc ontologies used for linking systematic identifiers
|
117
|
+
# (entrez is not part of this list because it forms the gene index)
|
118
|
+
PROTEIN_ONTOLOGIES = [ONTOLOGIES.UNIPROT, ONTOLOGIES.ENSEMBL_PROTEIN]
|
119
|
+
GENE_ONTOLOGIES = [
|
120
|
+
ONTOLOGIES.NCBI_ENTREZ_GENE,
|
121
|
+
ONTOLOGIES.ENSEMBL_GENE,
|
122
|
+
ONTOLOGIES.ENSEMBL_TRANSCRIPT,
|
123
|
+
]
|
124
|
+
NAME_ONTOLOGIES = {
|
125
|
+
ONTOLOGIES.GENE_NAME: 0,
|
126
|
+
ONTOLOGIES.SYMBOL: 1,
|
127
|
+
ONTOLOGIES.UNIPROT: 2,
|
128
|
+
ONTOLOGIES.ENSEMBL_PROTEIN: 3,
|
129
|
+
}
|