napistu 0.2.5.dev6__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +126 -96
- napistu/constants.py +35 -41
- napistu/context/__init__.py +10 -0
- napistu/context/discretize.py +462 -0
- napistu/context/filtering.py +387 -0
- napistu/gcs/__init__.py +1 -1
- napistu/identifiers.py +74 -15
- napistu/indices.py +68 -0
- napistu/ingestion/__init__.py +1 -1
- napistu/ingestion/bigg.py +47 -62
- napistu/ingestion/constants.py +18 -133
- napistu/ingestion/gtex.py +113 -0
- napistu/ingestion/hpa.py +147 -0
- napistu/ingestion/sbml.py +0 -97
- napistu/ingestion/string.py +2 -2
- napistu/matching/__init__.py +10 -0
- napistu/matching/constants.py +18 -0
- napistu/matching/interactions.py +518 -0
- napistu/matching/mount.py +529 -0
- napistu/matching/species.py +510 -0
- napistu/mcp/__init__.py +7 -4
- napistu/mcp/__main__.py +128 -72
- napistu/mcp/client.py +16 -25
- napistu/mcp/codebase.py +201 -153
- napistu/mcp/component_base.py +170 -0
- napistu/mcp/config.py +223 -0
- napistu/mcp/constants.py +45 -2
- napistu/mcp/documentation.py +253 -136
- napistu/mcp/documentation_utils.py +13 -48
- napistu/mcp/execution.py +372 -305
- napistu/mcp/health.py +49 -67
- napistu/mcp/profiles.py +10 -6
- napistu/mcp/server.py +161 -80
- napistu/mcp/tutorials.py +139 -87
- napistu/modify/__init__.py +1 -1
- napistu/modify/gaps.py +1 -1
- napistu/network/__init__.py +1 -1
- napistu/network/constants.py +101 -34
- napistu/network/data_handling.py +388 -0
- napistu/network/ig_utils.py +351 -0
- napistu/network/napistu_graph_core.py +354 -0
- napistu/network/neighborhoods.py +40 -40
- napistu/network/net_create.py +373 -309
- napistu/network/net_propagation.py +47 -19
- napistu/network/{net_utils.py → ng_utils.py} +124 -272
- napistu/network/paths.py +67 -51
- napistu/network/precompute.py +11 -11
- napistu/ontologies/__init__.py +10 -0
- napistu/ontologies/constants.py +129 -0
- napistu/ontologies/dogma.py +243 -0
- napistu/ontologies/genodexito.py +649 -0
- napistu/ontologies/mygene.py +369 -0
- napistu/ontologies/renaming.py +198 -0
- napistu/rpy2/__init__.py +229 -86
- napistu/rpy2/callr.py +47 -77
- napistu/rpy2/constants.py +24 -23
- napistu/rpy2/rids.py +61 -648
- napistu/sbml_dfs_core.py +587 -222
- napistu/scverse/__init__.py +15 -0
- napistu/scverse/constants.py +28 -0
- napistu/scverse/loading.py +727 -0
- napistu/utils.py +118 -10
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/METADATA +8 -3
- napistu-0.3.1.dist-info/RECORD +133 -0
- tests/conftest.py +22 -0
- tests/test_context_discretize.py +56 -0
- tests/test_context_filtering.py +267 -0
- tests/test_identifiers.py +100 -0
- tests/test_indices.py +65 -0
- tests/{test_edgelist.py → test_ingestion_napistu_edgelist.py} +2 -2
- tests/test_matching_interactions.py +108 -0
- tests/test_matching_mount.py +305 -0
- tests/test_matching_species.py +394 -0
- tests/test_mcp_config.py +193 -0
- tests/test_mcp_documentation_utils.py +12 -3
- tests/test_mcp_server.py +356 -0
- tests/test_network_data_handling.py +397 -0
- tests/test_network_ig_utils.py +23 -0
- tests/test_network_neighborhoods.py +19 -0
- tests/test_network_net_create.py +459 -0
- tests/test_network_ng_utils.py +30 -0
- tests/test_network_paths.py +56 -0
- tests/{test_precomputed_distances.py → test_network_precompute.py} +8 -6
- tests/test_ontologies_genodexito.py +58 -0
- tests/test_ontologies_mygene.py +39 -0
- tests/test_ontologies_renaming.py +110 -0
- tests/test_rpy2_callr.py +79 -0
- tests/test_rpy2_init.py +151 -0
- tests/test_sbml.py +0 -31
- tests/test_sbml_dfs_core.py +134 -10
- tests/test_scverse_loading.py +778 -0
- tests/test_set_coverage.py +2 -2
- tests/test_utils.py +121 -1
- napistu/mechanism_matching.py +0 -1353
- napistu/rpy2/netcontextr.py +0 -467
- napistu-0.2.5.dev6.dist-info/RECORD +0 -97
- tests/test_igraph.py +0 -367
- tests/test_mechanism_matching.py +0 -784
- tests/test_net_utils.py +0 -149
- tests/test_netcontextr.py +0 -105
- tests/test_rpy2.py +0 -61
- /napistu/ingestion/{cpr_edgelist.py → napistu_edgelist.py} +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/WHEEL +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/entry_points.txt +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/top_level.txt +0 -0
- /tests/{test_obo.py → test_ingestion_obo.py} +0 -0
napistu/ingestion/sbml.py
CHANGED
@@ -15,9 +15,6 @@ from napistu import utils
|
|
15
15
|
|
16
16
|
from napistu.constants import BQB
|
17
17
|
|
18
|
-
from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_COMPARTMENT
|
19
|
-
from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_REACTION
|
20
|
-
from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_SPECIES
|
21
18
|
from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_ID
|
22
19
|
from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_IDENTIFIERS
|
23
20
|
from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_NAME
|
@@ -517,100 +514,6 @@ def setup_cspecies(sbml_model: SBML) -> pd.DataFrame:
|
|
517
514
|
return pd.DataFrame(comp_species).set_index(SMBL_REACTION_SPEC_SC_ID)
|
518
515
|
|
519
516
|
|
520
|
-
def add_sbml_annotations(
|
521
|
-
sbml_model: SBML, annotations: pd.DataFrame, save_path: str
|
522
|
-
) -> None:
|
523
|
-
"""
|
524
|
-
Add SBML Annotations
|
525
|
-
|
526
|
-
Add additional identifiers to an sbml file and save the updated document
|
527
|
-
|
528
|
-
Parameters:
|
529
|
-
sbml_model: SBML
|
530
|
-
A .sbml model
|
531
|
-
annotations: pd.DataFrame
|
532
|
-
A table of annotations to add containing an "id" matching the
|
533
|
-
primary key of an entity, "type" matching the type of entity,
|
534
|
-
and "uri" representing the annotation to add.
|
535
|
-
save_path: str
|
536
|
-
Path to save the model to
|
537
|
-
|
538
|
-
Returns:
|
539
|
-
None
|
540
|
-
"""
|
541
|
-
|
542
|
-
logger.warning(
|
543
|
-
"add_sbml_annotations is deprecated and may be removed in a future version of rcpr; "
|
544
|
-
"we are now adding these annotation during ingestion by sbml.sbml_df_from_sbml() rather "
|
545
|
-
"than directly appending them to the raw .sbml"
|
546
|
-
)
|
547
|
-
|
548
|
-
if not isinstance(sbml_model, SBML):
|
549
|
-
raise TypeError("sbml_model must be an SBML object")
|
550
|
-
|
551
|
-
if not isinstance(annotations, pd.DataFrame):
|
552
|
-
raise TypeError("annotations must be a pd.DataFrame")
|
553
|
-
|
554
|
-
for i in range(0, annotations.shape[0]):
|
555
|
-
annot_type = annotations["type"][i]
|
556
|
-
|
557
|
-
if annot_type == "species":
|
558
|
-
entity_fxn = SBML_ANNOTATION_METHOD_GET_SPECIES
|
559
|
-
elif annot_type == "compartment":
|
560
|
-
entity_fxn = SBML_ANNOTATION_METHOD_GET_COMPARTMENT
|
561
|
-
elif annot_type == "reaction":
|
562
|
-
entity_fxn = SBML_ANNOTATION_METHOD_GET_REACTION
|
563
|
-
else:
|
564
|
-
raise ValueError(
|
565
|
-
f"{annot_type} is not a valid annotation type,"
|
566
|
-
" valid types are species, compartment, and reaction"
|
567
|
-
)
|
568
|
-
# access the node to modify
|
569
|
-
entity_fxn_method = getattr(sbml_model.model, entity_fxn)
|
570
|
-
entity_node = entity_fxn_method(annotations["id"][i])
|
571
|
-
|
572
|
-
# TO DO - check for a valid entity_node in case id is not found
|
573
|
-
|
574
|
-
# set meta-id if there isn't one; required to add a node
|
575
|
-
if not entity_node.isSetMetaId():
|
576
|
-
add_metaid_code = entity_node.setMetaId(annotations["id"][i])
|
577
|
-
|
578
|
-
if add_metaid_code != libsbml.LIBSBML_OPERATION_SUCCESS:
|
579
|
-
raise ValueError(
|
580
|
-
f"adding metaId to {annotations['id'][i]} failed"
|
581
|
-
f" with return code {add_metaid_code} "
|
582
|
-
f"({libsbml.OperationReturnValue_toString(add_metaid_code).strip()})"
|
583
|
-
)
|
584
|
-
|
585
|
-
# create a controlled vocabulary term
|
586
|
-
cv = libsbml.CVTerm()
|
587
|
-
cv.setQualifierType(libsbml.BIOLOGICAL_QUALIFIER)
|
588
|
-
cv.setBiologicalQualifierType(libsbml.BQB_IS_VERSION_OF)
|
589
|
-
|
590
|
-
add_resource_code = cv.addResource(annotations["uri"][i])
|
591
|
-
if add_resource_code != libsbml.LIBSBML_OPERATION_SUCCESS:
|
592
|
-
raise ValueError(
|
593
|
-
"adding resource to CV term returned code"
|
594
|
-
f" {add_resource_code} "
|
595
|
-
f"({libsbml.OperationReturnValue_toString(add_resource_code).strip()})"
|
596
|
-
f" rather than {libsbml.LIBSBML_OPERATION_SUCCESS} when "
|
597
|
-
f"adding {annotations['uri'][i]} to {annotations['id'][i]}"
|
598
|
-
)
|
599
|
-
|
600
|
-
add_cv_code = entity_node.addCVTerm(cv)
|
601
|
-
if add_cv_code != libsbml.LIBSBML_OPERATION_SUCCESS:
|
602
|
-
raise ValueError(
|
603
|
-
f"adding CV to entity returned code {add_cv_code} "
|
604
|
-
f"({libsbml.OperationReturnValue_toString(add_cv_code).strip()})"
|
605
|
-
f" rather than {libsbml.LIBSBML_OPERATION_SUCCESS} when adding"
|
606
|
-
f" {annotations['uri'][i]} to {annotations['id'][i]}"
|
607
|
-
)
|
608
|
-
|
609
|
-
libsbml.writeSBML(sbml_model.document, save_path)
|
610
|
-
|
611
|
-
return None
|
612
|
-
|
613
|
-
|
614
517
|
def _get_gene_product_dict(gp):
|
615
518
|
"""Read a gene product node from an sbml file."""
|
616
519
|
return {
|
napistu/ingestion/string.py
CHANGED
@@ -10,7 +10,7 @@ from napistu import utils
|
|
10
10
|
from napistu.constants import BQB
|
11
11
|
from napistu.constants import COMPARTMENTS
|
12
12
|
from napistu.constants import MINI_SBO_FROM_NAME
|
13
|
-
from napistu.ingestion import
|
13
|
+
from napistu.ingestion import napistu_edgelist
|
14
14
|
from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
|
15
15
|
from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
|
16
16
|
from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
|
@@ -127,7 +127,7 @@ def convert_string_to_sbml_dfs(
|
|
127
127
|
# remove one edge since reciprocal edges are present; i.e., A-B and B-A
|
128
128
|
# and attributes (e.g., combined_score are the same across both reciprocal
|
129
129
|
# interactions
|
130
|
-
uq_string_edgelist =
|
130
|
+
uq_string_edgelist = napistu_edgelist.remove_reciprocal_interactions(
|
131
131
|
string_edgelist, extra_defining_vars=["combined_score"]
|
132
132
|
)
|
133
133
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from types import SimpleNamespace
|
2
|
+
|
3
|
+
FEATURE_ID_VAR_DEFAULT = "feature_id"
|
4
|
+
|
5
|
+
RESOLVE_MATCHES_AGGREGATORS = SimpleNamespace(
|
6
|
+
WEIGHTED_MEAN="weighted_mean", MEAN="mean", FIRST="first", MAX="max"
|
7
|
+
)
|
8
|
+
|
9
|
+
RESOLVE_MATCHES_TMP_WEIGHT_COL = "__tmp_weight_for_aggregation__"
|
10
|
+
|
11
|
+
BIND_DICT_OF_WIDE_RESULTS_STRATEGIES = SimpleNamespace(
|
12
|
+
CONTATENATE="concatenate", MULTIPLE_KEYS="multiple_keys", STAGGER="stagger"
|
13
|
+
)
|
14
|
+
BIND_DICT_OF_WIDE_RESULTS_STRATEGIES_LIST = [
|
15
|
+
BIND_DICT_OF_WIDE_RESULTS_STRATEGIES.CONTATENATE,
|
16
|
+
BIND_DICT_OF_WIDE_RESULTS_STRATEGIES.MULTIPLE_KEYS,
|
17
|
+
BIND_DICT_OF_WIDE_RESULTS_STRATEGIES.STAGGER,
|
18
|
+
]
|
@@ -0,0 +1,518 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
import igraph as ig
|
6
|
+
import pandas as pd
|
7
|
+
|
8
|
+
from napistu import identifiers
|
9
|
+
from napistu import utils
|
10
|
+
from napistu import sbml_dfs_core
|
11
|
+
from napistu.matching.species import features_to_pathway_species
|
12
|
+
from napistu.constants import (
|
13
|
+
CPR_EDGELIST_REQ_VARS,
|
14
|
+
IDENTIFIER_EDGELIST_REQ_VARS,
|
15
|
+
CPR_EDGELIST,
|
16
|
+
SBML_DFS,
|
17
|
+
IDENTIFIERS,
|
18
|
+
)
|
19
|
+
from napistu.network.constants import NAPISTU_GRAPH_EDGES
|
20
|
+
from napistu.matching.constants import FEATURE_ID_VAR_DEFAULT
|
21
|
+
from napistu.network import paths
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
|
26
|
+
def edgelist_to_pathway_species(
|
27
|
+
formatted_edgelist: pd.DataFrame,
|
28
|
+
species_identifiers: pd.DataFrame,
|
29
|
+
ontologies: set,
|
30
|
+
feature_id_var: str = FEATURE_ID_VAR_DEFAULT,
|
31
|
+
verbose: bool = False,
|
32
|
+
) -> pd.DataFrame:
|
33
|
+
"""
|
34
|
+
Edgelist to Pathway Species
|
35
|
+
|
36
|
+
Match an edgelist of molecular species pairs to their corresponding species in a pathway representation.
|
37
|
+
|
38
|
+
Parameters:
|
39
|
+
formatted_edgelist: pd.DataFrame
|
40
|
+
pd.Dataframe containing a "identifier_upstream" and "identifier_downstream" variables used to to match entries
|
41
|
+
species_identifiers: pd.DataFrame
|
42
|
+
A table of molecular species identifiers produced from sbml_dfs.get_identifiers("species") generally using
|
43
|
+
sbml_dfs_core.export_sbml_dfs()
|
44
|
+
ontologies: set
|
45
|
+
A set of ontologies used to match features to pathway species
|
46
|
+
feature_id_var: str, default=FEATURE_ID_VAR_DEFAULT
|
47
|
+
Variable in "formatted_edgelist" containing feature ids
|
48
|
+
verbose: bool, default=False
|
49
|
+
Whether to print verbose output
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
edges_on_pathway: pd.DataFrame
|
53
|
+
formatted_edgelist with upstream features mapped
|
54
|
+
to "s_id_upstream" and downstream species mapped
|
55
|
+
to "s_id_downstream"
|
56
|
+
"""
|
57
|
+
|
58
|
+
required_vars_distinct_features = {
|
59
|
+
CPR_EDGELIST.IDENTIFIER_UPSTREAM,
|
60
|
+
CPR_EDGELIST.IDENTIFIER_DOWNSTREAM,
|
61
|
+
}
|
62
|
+
missing_required_vars_distinct_features = (
|
63
|
+
required_vars_distinct_features.difference(
|
64
|
+
set(formatted_edgelist.columns.tolist())
|
65
|
+
)
|
66
|
+
)
|
67
|
+
|
68
|
+
if len(missing_required_vars_distinct_features) > 0:
|
69
|
+
raise ValueError(
|
70
|
+
f"{len(missing_required_vars_distinct_features)} required variables were "
|
71
|
+
"missing from 'formatted_edgelist': "
|
72
|
+
f"{', '.join(missing_required_vars_distinct_features)}"
|
73
|
+
)
|
74
|
+
|
75
|
+
# define all distinct identifiers in edgelist
|
76
|
+
distinct_identifiers = (
|
77
|
+
pd.concat(
|
78
|
+
[
|
79
|
+
formatted_edgelist[CPR_EDGELIST.IDENTIFIER_UPSTREAM],
|
80
|
+
formatted_edgelist[CPR_EDGELIST.IDENTIFIER_DOWNSTREAM],
|
81
|
+
]
|
82
|
+
)
|
83
|
+
.drop_duplicates()
|
84
|
+
.reset_index(drop=True)
|
85
|
+
.to_frame()
|
86
|
+
.rename({0: feature_id_var}, axis=1)
|
87
|
+
)
|
88
|
+
|
89
|
+
# merge edgelist identifiers with pathway identifiers to map s_ids to identifiers
|
90
|
+
features_on_pathway = features_to_pathway_species(
|
91
|
+
feature_identifiers=distinct_identifiers,
|
92
|
+
species_identifiers=species_identifiers,
|
93
|
+
ontologies=ontologies,
|
94
|
+
feature_identifiers_var=feature_id_var,
|
95
|
+
verbose=verbose,
|
96
|
+
)
|
97
|
+
|
98
|
+
# add s_ids of both upstream and downstream edges to pathway
|
99
|
+
edges_on_pathway = formatted_edgelist.merge(
|
100
|
+
features_on_pathway[[SBML_DFS.S_ID, IDENTIFIERS.IDENTIFIER]].rename(
|
101
|
+
{
|
102
|
+
SBML_DFS.S_ID: CPR_EDGELIST.S_ID_UPSTREAM,
|
103
|
+
IDENTIFIERS.IDENTIFIER: CPR_EDGELIST.IDENTIFIER_UPSTREAM,
|
104
|
+
},
|
105
|
+
axis=1,
|
106
|
+
)
|
107
|
+
).merge(
|
108
|
+
features_on_pathway[[SBML_DFS.S_ID, IDENTIFIERS.IDENTIFIER]].rename(
|
109
|
+
{
|
110
|
+
SBML_DFS.S_ID: CPR_EDGELIST.S_ID_DOWNSTREAM,
|
111
|
+
IDENTIFIERS.IDENTIFIER: CPR_EDGELIST.IDENTIFIER_DOWNSTREAM,
|
112
|
+
},
|
113
|
+
axis=1,
|
114
|
+
)
|
115
|
+
)
|
116
|
+
|
117
|
+
return edges_on_pathway
|
118
|
+
|
119
|
+
|
120
|
+
def edgelist_to_scids(
|
121
|
+
formatted_edgelist: pd.DataFrame,
|
122
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
123
|
+
species_identifiers: pd.DataFrame,
|
124
|
+
ontologies: set,
|
125
|
+
):
|
126
|
+
"""
|
127
|
+
|
128
|
+
Edgelist to Compartmentalized Species IDds
|
129
|
+
|
130
|
+
Map an edgelist of possible mechanistic interactions onto a
|
131
|
+
pathadex pathway
|
132
|
+
|
133
|
+
Parameters:
|
134
|
+
formatted_edgelist: pd.DataFrame
|
135
|
+
pd.Dataframe containing a "identifier_upstream" and
|
136
|
+
"identifier_downstream" variables used to to match entries
|
137
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs
|
138
|
+
A mechanistic model
|
139
|
+
species_identifiers: pd.DataFrame
|
140
|
+
A table of molecular species identifiers produced from
|
141
|
+
sbml_dfs.get_identifiers("species") generally using sbml_dfs_core.export_sbml_dfs()
|
142
|
+
ontologies: set
|
143
|
+
A set of ontologies used to match features to pathway species
|
144
|
+
|
145
|
+
Returns:
|
146
|
+
edgelist_w_scids: pd.DataFrame
|
147
|
+
formatted_edgelist with upstream features mapped to "sc_id_upstream" and
|
148
|
+
downstream species mapped to "sc_id_downstream"
|
149
|
+
"""
|
150
|
+
|
151
|
+
identifiers._check_species_identifiers_table(species_identifiers)
|
152
|
+
|
153
|
+
# map edges onto pathway entities based on shared identifiers
|
154
|
+
edges_on_pathway = edgelist_to_pathway_species(
|
155
|
+
formatted_edgelist=formatted_edgelist,
|
156
|
+
species_identifiers=species_identifiers,
|
157
|
+
ontologies=ontologies,
|
158
|
+
)
|
159
|
+
|
160
|
+
# expand from s_ids to sc_ids
|
161
|
+
s_id_pairs = edges_on_pathway[
|
162
|
+
[CPR_EDGELIST.S_ID_UPSTREAM, CPR_EDGELIST.S_ID_DOWNSTREAM]
|
163
|
+
].drop_duplicates()
|
164
|
+
sc_id_pairs = s_id_pairs.merge(
|
165
|
+
sbml_dfs.compartmentalized_species[[SBML_DFS.S_ID]]
|
166
|
+
.reset_index()
|
167
|
+
.rename(
|
168
|
+
{
|
169
|
+
SBML_DFS.S_ID: CPR_EDGELIST.S_ID_UPSTREAM,
|
170
|
+
SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM,
|
171
|
+
},
|
172
|
+
axis=1,
|
173
|
+
)
|
174
|
+
).merge(
|
175
|
+
sbml_dfs.compartmentalized_species[[SBML_DFS.S_ID]]
|
176
|
+
.reset_index()
|
177
|
+
.rename(
|
178
|
+
{
|
179
|
+
SBML_DFS.S_ID: CPR_EDGELIST.S_ID_DOWNSTREAM,
|
180
|
+
SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM,
|
181
|
+
},
|
182
|
+
axis=1,
|
183
|
+
)
|
184
|
+
)
|
185
|
+
|
186
|
+
# map sc_ids back to edges_on_pathway
|
187
|
+
# join lookup table of s_id_upstream, s_id_downstream -> sc_ids
|
188
|
+
edgelist_w_scids = edges_on_pathway.merge(sc_id_pairs)
|
189
|
+
|
190
|
+
logger_msg = (
|
191
|
+
f"{edgelist_w_scids.shape[0]} interactions mapped "
|
192
|
+
"onto pairs of compartmentalized species in the mechanistic model"
|
193
|
+
)
|
194
|
+
if edgelist_w_scids.shape[0] == 0:
|
195
|
+
logger.warning(logger_msg)
|
196
|
+
else:
|
197
|
+
logger.info(logger_msg)
|
198
|
+
|
199
|
+
return edgelist_w_scids
|
200
|
+
|
201
|
+
|
202
|
+
def filter_to_direct_mechanistic_interactions(
|
203
|
+
formatted_edgelist: pd.DataFrame,
|
204
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
205
|
+
species_identifiers: pd.DataFrame,
|
206
|
+
ontologies: set,
|
207
|
+
) -> pd.DataFrame:
|
208
|
+
"""
|
209
|
+
Filter to Direct Mechanistic Interactions
|
210
|
+
|
211
|
+
Filter an edgelist to direct mechanistic interactions
|
212
|
+
|
213
|
+
Parameters:
|
214
|
+
formatted_edgelist: pd.DataFrame
|
215
|
+
pd.Dataframe containing a "identifier_upstream" and "identifier_downstream" variables used to to match entries
|
216
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs
|
217
|
+
A mechanistic model
|
218
|
+
species_identifiers: pd.DataFrame
|
219
|
+
A table of molecular species identifiers
|
220
|
+
produced from sbml_dfs.get_identifiers("species") generally
|
221
|
+
using sbml_dfs_core.export_sbml_dfs()
|
222
|
+
ontologies: set
|
223
|
+
A set of ontologies used to match features to pathway species
|
224
|
+
|
225
|
+
Returns:
|
226
|
+
edgelist_w_direct_mechanistic_interactions: pd.DataFrame
|
227
|
+
formatted_edgelist filtered to mechanistic reactions present in the pathway representation
|
228
|
+
"""
|
229
|
+
|
230
|
+
edgelist_w_scids = _edgelist_to_scids_if_needed(
|
231
|
+
formatted_edgelist, sbml_dfs, species_identifiers, ontologies
|
232
|
+
)
|
233
|
+
|
234
|
+
# reduce to distinct sc_id pairs
|
235
|
+
sc_id_pairs = edgelist_w_scids[list(CPR_EDGELIST_REQ_VARS)].drop_duplicates()
|
236
|
+
|
237
|
+
# define all existing direct regulatory interactions
|
238
|
+
pathway_interactions = pd.concat(
|
239
|
+
[
|
240
|
+
# pair 0 -> <0 # modifiers affect substrates
|
241
|
+
sbml_dfs.reaction_species[
|
242
|
+
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] == 0
|
243
|
+
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
|
244
|
+
.rename({SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM}, axis=1)
|
245
|
+
.merge(
|
246
|
+
sbml_dfs.reaction_species[
|
247
|
+
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] < 0
|
248
|
+
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
|
249
|
+
{SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
|
250
|
+
)
|
251
|
+
),
|
252
|
+
# pair <0 -> >0 # substrates affect products
|
253
|
+
sbml_dfs.reaction_species[
|
254
|
+
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] < 0
|
255
|
+
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
|
256
|
+
.rename({SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM}, axis=1)
|
257
|
+
.merge(
|
258
|
+
sbml_dfs.reaction_species[
|
259
|
+
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] > 0
|
260
|
+
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
|
261
|
+
{SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
|
262
|
+
)
|
263
|
+
),
|
264
|
+
# pair 0 -> >0 # modifiers affect products
|
265
|
+
sbml_dfs.reaction_species[
|
266
|
+
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] == 0
|
267
|
+
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
|
268
|
+
.rename({SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM}, axis=1)
|
269
|
+
.merge(
|
270
|
+
sbml_dfs.reaction_species[
|
271
|
+
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] > 0
|
272
|
+
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
|
273
|
+
{SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
|
274
|
+
)
|
275
|
+
),
|
276
|
+
]
|
277
|
+
).reset_index(drop=True)
|
278
|
+
|
279
|
+
# filter pathway interactions based on matches to sc_id_pairs
|
280
|
+
direct_edge_interactions = (
|
281
|
+
sc_id_pairs.merge(pathway_interactions)
|
282
|
+
.merge(
|
283
|
+
sbml_dfs.species[SBML_DFS.S_NAME]
|
284
|
+
.to_frame()
|
285
|
+
.rename({SBML_DFS.S_NAME: CPR_EDGELIST.S_NAME_UPSTREAM}, axis=1),
|
286
|
+
left_on=CPR_EDGELIST.S_ID_UPSTREAM,
|
287
|
+
right_index=True,
|
288
|
+
# add species metadata for matches
|
289
|
+
)
|
290
|
+
.merge(
|
291
|
+
sbml_dfs.species[SBML_DFS.S_NAME]
|
292
|
+
.to_frame()
|
293
|
+
.rename({SBML_DFS.S_NAME: CPR_EDGELIST.S_NAME_DOWNSTREAM}, axis=1),
|
294
|
+
left_on=CPR_EDGELIST.S_ID_DOWNSTREAM,
|
295
|
+
right_index=True,
|
296
|
+
# add metadata for reactions where interaction occurs
|
297
|
+
)
|
298
|
+
.merge(
|
299
|
+
sbml_dfs.reactions[SBML_DFS.R_NAME].to_frame(),
|
300
|
+
left_on=SBML_DFS.R_ID,
|
301
|
+
right_index=True,
|
302
|
+
)
|
303
|
+
)
|
304
|
+
|
305
|
+
edgelist_w_direct_mechanistic_interactions = edgelist_w_scids.merge(
|
306
|
+
direct_edge_interactions[
|
307
|
+
[
|
308
|
+
CPR_EDGELIST.SC_ID_UPSTREAM,
|
309
|
+
CPR_EDGELIST.SC_ID_DOWNSTREAM,
|
310
|
+
SBML_DFS.R_ID,
|
311
|
+
CPR_EDGELIST.S_NAME_UPSTREAM,
|
312
|
+
CPR_EDGELIST.S_NAME_DOWNSTREAM,
|
313
|
+
SBML_DFS.R_NAME,
|
314
|
+
]
|
315
|
+
]
|
316
|
+
)
|
317
|
+
|
318
|
+
return edgelist_w_direct_mechanistic_interactions
|
319
|
+
|
320
|
+
|
321
|
+
def filter_to_indirect_mechanistic_interactions(
|
322
|
+
formatted_edgelist: pd.DataFrame,
|
323
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
324
|
+
species_identifiers: pd.DataFrame,
|
325
|
+
napistu_graph: ig.Graph,
|
326
|
+
ontologies: set,
|
327
|
+
precomputed_distances=None,
|
328
|
+
max_path_length=10,
|
329
|
+
):
|
330
|
+
"""
|
331
|
+
Filter to Indirect Mechanistic Interactions
|
332
|
+
|
333
|
+
Filter an edgelist to indirect mechanistic interactions.
|
334
|
+
Indirect relationships are identified by searching a
|
335
|
+
network for paths from an upstream species to a downstream species
|
336
|
+
|
337
|
+
Parameters:
|
338
|
+
formatted_edgelist: pd.DataFrame
|
339
|
+
pd.Dataframe containing a "identifier_upstream" and
|
340
|
+
"identifier_downstream" variables used to to match entries
|
341
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs
|
342
|
+
A mechanistic model
|
343
|
+
species_identifiers: pandas.DataFrame
|
344
|
+
A table of molecular species identifiers produced from
|
345
|
+
sbml_dfs.get_identifiers("species") generally using sbml_dfs_core.export_sbml_dfs()
|
346
|
+
napistu_graph: igraph.Graph
|
347
|
+
A network representation of the sbml_dfs model
|
348
|
+
ontologies: set
|
349
|
+
A set of ontologies used to match features to pathway species
|
350
|
+
precomputed_distances: None or a pd.DataFrame containing path lengths and weights
|
351
|
+
between pairs of cspecies.
|
352
|
+
max_path_length: int
|
353
|
+
Maximum number of steps to consider.
|
354
|
+
|
355
|
+
Returns:
|
356
|
+
edgelist_w_indirect_mechanistic_interactions: pd.DataFrame
|
357
|
+
formatted_edgelist filtered to mechanistic reactions which can be described
|
358
|
+
by an indirect mechanism. The mechanism is described by a path weight, length,
|
359
|
+
and a vpath and epath list of vertices and edges which were traversed to create the path.
|
360
|
+
"""
|
361
|
+
|
362
|
+
edgelist_w_scids = _edgelist_to_scids_if_needed(
|
363
|
+
formatted_edgelist, sbml_dfs, species_identifiers, ontologies
|
364
|
+
)
|
365
|
+
|
366
|
+
if precomputed_distances is not None:
|
367
|
+
# rename to match conventions in precomputed_distances
|
368
|
+
# filter by these precomputed distances and then restore naming
|
369
|
+
edgelist_w_scids = paths._filter_paths_by_precomputed_distances(
|
370
|
+
edgelist_w_scids.rename(
|
371
|
+
{
|
372
|
+
CPR_EDGELIST.SC_ID_UPSTREAM: CPR_EDGELIST.SC_ID_ORIGIN,
|
373
|
+
CPR_EDGELIST.SC_ID_DOWNSTREAM: CPR_EDGELIST.SC_ID_DEST,
|
374
|
+
},
|
375
|
+
axis=1,
|
376
|
+
),
|
377
|
+
precomputed_distances,
|
378
|
+
).rename(
|
379
|
+
{
|
380
|
+
CPR_EDGELIST.SC_ID_ORIGIN: CPR_EDGELIST.SC_ID_UPSTREAM,
|
381
|
+
CPR_EDGELIST.SC_ID_DEST: CPR_EDGELIST.SC_ID_DOWNSTREAM,
|
382
|
+
},
|
383
|
+
axis=1,
|
384
|
+
)
|
385
|
+
|
386
|
+
# find paths from 1 upstream to all desired downstream sc_ids
|
387
|
+
# (this is the convention with igraph)
|
388
|
+
indexed_origin_vertices = edgelist_w_scids.set_index(CPR_EDGELIST.SC_ID_UPSTREAM)
|
389
|
+
|
390
|
+
# loop through upstream cspecies and find paths to all downstream species
|
391
|
+
global_dict = dict()
|
392
|
+
for an_origin_index in indexed_origin_vertices.index.unique(): # type: ignore
|
393
|
+
origin_targets = indexed_origin_vertices.loc[
|
394
|
+
an_origin_index
|
395
|
+
] # type: pd.DataFrame
|
396
|
+
|
397
|
+
# if indexing only a single entry pd.DataFrame becomes a pd.Series
|
398
|
+
# convert back to DataFrame for consistency
|
399
|
+
origin_targets = utils.ensure_pd_df(origin_targets)
|
400
|
+
|
401
|
+
# log entry for debugging
|
402
|
+
logger.debug(
|
403
|
+
f"finding paths from {an_origin_index} to "
|
404
|
+
f"{origin_targets.shape[0]} target vertices"
|
405
|
+
)
|
406
|
+
|
407
|
+
# find all paths from indexed_origin to desired destination
|
408
|
+
shortest_paths = paths.find_shortest_reaction_paths(
|
409
|
+
napistu_graph,
|
410
|
+
sbml_dfs,
|
411
|
+
origin=an_origin_index,
|
412
|
+
# find all unique destinations (as a list for compatibility with igraph dest)
|
413
|
+
dest=origin_targets[CPR_EDGELIST.SC_ID_DOWNSTREAM].unique().tolist(),
|
414
|
+
weight_var=NAPISTU_GRAPH_EDGES.WEIGHTS,
|
415
|
+
)
|
416
|
+
|
417
|
+
if shortest_paths is None:
|
418
|
+
continue
|
419
|
+
|
420
|
+
vertices, edges = shortest_paths
|
421
|
+
indexed_edges = edges.set_index("path")
|
422
|
+
indexed_vertices = vertices.set_index("path")
|
423
|
+
|
424
|
+
paths_list = list()
|
425
|
+
for ind in indexed_edges.index.unique():
|
426
|
+
one_path = indexed_edges.loc[ind]
|
427
|
+
|
428
|
+
# make sure that we are working with a DF
|
429
|
+
if type(one_path) is pd.Series:
|
430
|
+
one_path = one_path.to_frame().T
|
431
|
+
|
432
|
+
if one_path.shape[0] > max_path_length:
|
433
|
+
continue
|
434
|
+
|
435
|
+
# find the destination node
|
436
|
+
# this is annoying because if the graph is undirected
|
437
|
+
# its not clear if the from or to edge is the actual destination
|
438
|
+
# when taking advantage of the fact that igraph lets you
|
439
|
+
# look up multiple destinations at once this information is lost
|
440
|
+
ancestor_species = {an_origin_index}
|
441
|
+
if one_path.shape[0] > 1:
|
442
|
+
penultimate_edge = one_path.iloc[one_path.shape[0] - 2]
|
443
|
+
ancestor_species = ancestor_species.union(
|
444
|
+
{
|
445
|
+
penultimate_edge[NAPISTU_GRAPH_EDGES.FROM],
|
446
|
+
penultimate_edge[NAPISTU_GRAPH_EDGES.TO],
|
447
|
+
}
|
448
|
+
)
|
449
|
+
|
450
|
+
terminal_edge = one_path.iloc[one_path.shape[0] - 1]
|
451
|
+
ending_cspecies = {terminal_edge[NAPISTU_GRAPH_EDGES.FROM], terminal_edge[NAPISTU_GRAPH_EDGES.TO]}.difference(ancestor_species) # type: ignore
|
452
|
+
|
453
|
+
if len(ending_cspecies) != 1:
|
454
|
+
raise ValueError(
|
455
|
+
"The terminal edge could not be determined when summarizing paths"
|
456
|
+
)
|
457
|
+
ending_cspecies = ending_cspecies.pop()
|
458
|
+
|
459
|
+
path_series = pd.Series(
|
460
|
+
{
|
461
|
+
NAPISTU_GRAPH_EDGES.FROM: an_origin_index,
|
462
|
+
NAPISTU_GRAPH_EDGES.TO: ending_cspecies,
|
463
|
+
"weight": sum(one_path[NAPISTU_GRAPH_EDGES.WEIGHTS]),
|
464
|
+
"path_length": one_path.shape[0],
|
465
|
+
"vpath": indexed_vertices.loc[ind],
|
466
|
+
"epath": one_path,
|
467
|
+
} # type: ignore
|
468
|
+
) # type: pd.Series
|
469
|
+
|
470
|
+
paths_list.append(path_series)
|
471
|
+
|
472
|
+
if len(paths_list) > 0:
|
473
|
+
origin_paths = pd.DataFrame(paths_list)
|
474
|
+
global_dict[an_origin_index] = origin_paths
|
475
|
+
|
476
|
+
if len(global_dict.keys()) == 0:
|
477
|
+
logger.warning(
|
478
|
+
"None of the provide molecular pairs could be mechanistically linked with a network path"
|
479
|
+
)
|
480
|
+
return None
|
481
|
+
|
482
|
+
all_shortest_paths = pd.concat(global_dict.values())
|
483
|
+
|
484
|
+
indirect_shortest_paths = edgelist_w_scids.merge(
|
485
|
+
all_shortest_paths,
|
486
|
+
left_on=[CPR_EDGELIST.SC_ID_UPSTREAM, CPR_EDGELIST.SC_ID_DOWNSTREAM],
|
487
|
+
right_on=[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO],
|
488
|
+
)
|
489
|
+
|
490
|
+
return indirect_shortest_paths
|
491
|
+
|
492
|
+
|
493
|
+
def _edgelist_to_scids_if_needed(
|
494
|
+
edgelist: pd.DataFrame,
|
495
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
496
|
+
species_identifiers: pd.DataFrame,
|
497
|
+
ontologies: set,
|
498
|
+
) -> pd.DataFrame:
|
499
|
+
"""Map a set of edgelist species to cspecies or skip if cspecies were provided."""
|
500
|
+
|
501
|
+
if utils.match_pd_vars(edgelist, CPR_EDGELIST_REQ_VARS).are_present:
|
502
|
+
logger.info(
|
503
|
+
f"An edgelist with {', '.join(CPR_EDGELIST_REQ_VARS)} was provided; identifier matching will be skipped"
|
504
|
+
)
|
505
|
+
return edgelist
|
506
|
+
else:
|
507
|
+
utils.match_pd_vars(edgelist, IDENTIFIER_EDGELIST_REQ_VARS).assert_present()
|
508
|
+
|
509
|
+
identifiers._check_species_identifiers_table(species_identifiers)
|
510
|
+
|
511
|
+
edgelist_w_scids = edgelist_to_scids(
|
512
|
+
edgelist,
|
513
|
+
sbml_dfs=sbml_dfs,
|
514
|
+
species_identifiers=species_identifiers,
|
515
|
+
ontologies=ontologies,
|
516
|
+
)
|
517
|
+
|
518
|
+
return edgelist_w_scids
|