napistu 0.3.6__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +28 -13
- napistu/consensus.py +19 -25
- napistu/constants.py +102 -83
- napistu/indices.py +3 -1
- napistu/ingestion/napistu_edgelist.py +4 -4
- napistu/ingestion/sbml.py +298 -295
- napistu/ingestion/string.py +14 -18
- napistu/ingestion/trrust.py +22 -27
- napistu/matching/interactions.py +41 -39
- napistu/matching/species.py +1 -1
- napistu/modify/gaps.py +2 -1
- napistu/network/constants.py +61 -45
- napistu/network/data_handling.py +1 -1
- napistu/network/neighborhoods.py +3 -3
- napistu/network/net_create.py +440 -616
- napistu/network/net_create_utils.py +734 -0
- napistu/network/net_propagation.py +1 -1
- napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
- napistu/network/ng_utils.py +28 -21
- napistu/network/paths.py +4 -4
- napistu/network/precompute.py +35 -74
- napistu/ontologies/genodexito.py +5 -1
- napistu/ontologies/renaming.py +4 -0
- napistu/sbml_dfs_core.py +127 -64
- napistu/sbml_dfs_utils.py +50 -0
- napistu/utils.py +132 -46
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/METADATA +2 -2
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/RECORD +47 -44
- tests/conftest.py +171 -13
- tests/test_consensus.py +74 -5
- tests/test_gaps.py +26 -15
- tests/test_network_data_handling.py +5 -2
- tests/test_network_net_create.py +93 -202
- tests/test_network_net_create_utils.py +538 -0
- tests/test_network_ng_core.py +19 -0
- tests/test_network_ng_utils.py +1 -1
- tests/test_network_precompute.py +5 -4
- tests/test_ontologies_renaming.py +28 -24
- tests/test_rpy2_callr.py +0 -1
- tests/test_rpy2_init.py +0 -1
- tests/test_sbml_dfs_core.py +165 -15
- tests/test_sbml_dfs_utils.py +45 -0
- tests/test_utils.py +45 -2
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/WHEEL +0 -0
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/top_level.txt +0 -0
napistu/ingestion/string.py
CHANGED
@@ -8,15 +8,11 @@ from napistu import sbml_dfs_core
|
|
8
8
|
from napistu import sbml_dfs_utils
|
9
9
|
from napistu import source
|
10
10
|
from napistu import utils
|
11
|
+
from napistu.ingestion import napistu_edgelist
|
11
12
|
from napistu.constants import BQB
|
12
13
|
from napistu.constants import MINI_SBO_FROM_NAME
|
13
|
-
from napistu.
|
14
|
-
from napistu.
|
15
|
-
from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
|
16
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
|
17
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
|
18
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
|
19
|
-
from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
|
14
|
+
from napistu.constants import ONTOLOGIES
|
15
|
+
from napistu.constants import SBML_DFS
|
20
16
|
from napistu.ingestion.constants import STRING_DOWNSTREAM_COMPARTMENT
|
21
17
|
from napistu.ingestion.constants import STRING_DOWNSTREAM_NAME
|
22
18
|
from napistu.ingestion.constants import STRING_PROTEIN_ID
|
@@ -137,10 +133,10 @@ def convert_string_to_sbml_dfs(
|
|
137
133
|
|
138
134
|
# define identifier mapping from aliases to use:
|
139
135
|
alias_to_identifier = {
|
140
|
-
"Ensembl_gene": (
|
141
|
-
"Ensembl_transcript": (
|
142
|
-
"Ensembl_translation": (
|
143
|
-
"Ensembl_UniProt_AC": (
|
136
|
+
"Ensembl_gene": (ONTOLOGIES.ENSEMBL_GENE, BQB.IS_ENCODED_BY),
|
137
|
+
"Ensembl_transcript": (ONTOLOGIES.ENSEMBL_TRANSCRIPT, BQB.IS_ENCODED_BY),
|
138
|
+
"Ensembl_translation": (ONTOLOGIES.ENSEMBL_PROTEIN, BQB.IS),
|
139
|
+
"Ensembl_UniProt_AC": (ONTOLOGIES.UNIPROT, BQB.IS),
|
144
140
|
}
|
145
141
|
|
146
142
|
# filter aliases to only keep required ones
|
@@ -276,17 +272,17 @@ def _build_species_df(
|
|
276
272
|
species_df = (
|
277
273
|
pd.Series(
|
278
274
|
list(set(edgelist[source_col]).union(edgelist[target_col])),
|
279
|
-
name=
|
275
|
+
name=SBML_DFS.S_NAME,
|
280
276
|
)
|
281
277
|
.to_frame()
|
282
|
-
.set_index(
|
278
|
+
.set_index(SBML_DFS.S_NAME, drop=False)
|
283
279
|
.apply(
|
284
280
|
_get_identifiers,
|
285
281
|
alias_to_identifier=alias_to_identifier,
|
286
282
|
dat_alias=aliases,
|
287
283
|
axis=1,
|
288
284
|
)
|
289
|
-
.rename(
|
285
|
+
.rename(SBML_DFS.S_IDENTIFIERS)
|
290
286
|
.reset_index()
|
291
287
|
)
|
292
288
|
return species_df
|
@@ -312,8 +308,8 @@ def _build_interactor_edgelist(
|
|
312
308
|
**{
|
313
309
|
STRING_UPSTREAM_COMPARTMENT: compartment,
|
314
310
|
STRING_DOWNSTREAM_COMPARTMENT: compartment,
|
315
|
-
|
316
|
-
|
311
|
+
SBML_DFS.SBO_TERM: sbo_interactor,
|
312
|
+
SBML_DFS.R_IDENTIFIERS: lambda x: identifiers.Identifiers([]),
|
317
313
|
}
|
318
314
|
)
|
319
315
|
if add_reverse_interactions:
|
@@ -336,10 +332,10 @@ def _build_interactor_edgelist(
|
|
336
332
|
)
|
337
333
|
|
338
334
|
interaction_edgelist = dat
|
339
|
-
interaction_edgelist[
|
335
|
+
interaction_edgelist[SBML_DFS.R_NAME] = _build_string_reaction_name(
|
340
336
|
dat[STRING_UPSTREAM_NAME], dat[STRING_DOWNSTREAM_NAME]
|
341
337
|
)
|
342
|
-
interaction_edgelist[
|
338
|
+
interaction_edgelist[SBML_DFS.R_ISREVERSIBLE] = True
|
343
339
|
|
344
340
|
return interaction_edgelist
|
345
341
|
|
napistu/ingestion/trrust.py
CHANGED
@@ -8,16 +8,11 @@ from napistu import identifiers
|
|
8
8
|
from napistu import sbml_dfs_core
|
9
9
|
from napistu import source
|
10
10
|
from napistu import utils
|
11
|
+
from napistu.constants import BQB
|
12
|
+
from napistu.constants import IDENTIFIERS
|
11
13
|
from napistu.constants import MINI_SBO_FROM_NAME
|
12
14
|
from napistu.constants import SBOTERM_NAMES
|
13
|
-
from napistu.
|
14
|
-
from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_NAME
|
15
|
-
from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
|
16
|
-
from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
|
17
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
|
18
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
|
19
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
|
20
|
-
from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
|
15
|
+
from napistu.constants import SBML_DFS
|
21
16
|
from napistu.ingestion.constants import SPECIES_FULL_NAME_HUMAN
|
22
17
|
from napistu.ingestion.constants import STRING_DOWNSTREAM_COMPARTMENT
|
23
18
|
from napistu.ingestion.constants import STRING_DOWNSTREAM_NAME
|
@@ -81,16 +76,16 @@ def convert_trrust_to_sbml_dfs(
|
|
81
76
|
species_df = (
|
82
77
|
pd.DataFrame(
|
83
78
|
{
|
84
|
-
|
79
|
+
SBML_DFS.S_NAME: list(
|
85
80
|
{*edge_summaries_df["from"], *edge_summaries_df["to"]}
|
86
81
|
)
|
87
82
|
}
|
88
83
|
)
|
89
84
|
.merge(
|
90
|
-
uniprot_2_symbol.rename({TRRUST_SYMBOL:
|
85
|
+
uniprot_2_symbol.rename({TRRUST_SYMBOL: SBML_DFS.S_NAME}, axis=1),
|
91
86
|
how="left",
|
92
87
|
)
|
93
|
-
.set_index(
|
88
|
+
.set_index(SBML_DFS.S_NAME)
|
94
89
|
)
|
95
90
|
|
96
91
|
# create Identifiers objects for all species with uniprot IDs
|
@@ -106,14 +101,14 @@ def convert_trrust_to_sbml_dfs(
|
|
106
101
|
[
|
107
102
|
identifiers.Identifiers(
|
108
103
|
[
|
109
|
-
identifiers.format_uri(uri=x, biological_qualifier_type=
|
110
|
-
for x in species_w_ids.loc[[ind]][
|
104
|
+
identifiers.format_uri(uri=x, biological_qualifier_type=BQB.IS)
|
105
|
+
for x in species_w_ids.loc[[ind]][IDENTIFIERS.URL].tolist()
|
111
106
|
]
|
112
107
|
)
|
113
108
|
for ind in species_w_ids.index.unique()
|
114
109
|
],
|
115
110
|
index=species_w_ids.index.unique(),
|
116
|
-
).rename(
|
111
|
+
).rename(SBML_DFS.S_IDENTIFIERS)
|
117
112
|
|
118
113
|
# just retain s_name and s_Identifiers
|
119
114
|
# this just needs a source object which will be added later
|
@@ -124,21 +119,21 @@ def convert_trrust_to_sbml_dfs(
|
|
124
119
|
.merge(
|
125
120
|
species_w_ids_series,
|
126
121
|
how="left",
|
127
|
-
left_on=
|
122
|
+
left_on=SBML_DFS.S_NAME,
|
128
123
|
right_index=True,
|
129
124
|
)
|
130
125
|
.reset_index(drop=True)
|
131
126
|
)
|
132
127
|
# stub genes with missing IDs
|
133
|
-
species_df[
|
128
|
+
species_df[SBML_DFS.S_IDENTIFIERS] = species_df[SBML_DFS.S_IDENTIFIERS].fillna( # type: ignore
|
134
129
|
value=identifiers.Identifiers([])
|
135
130
|
)
|
136
131
|
|
137
132
|
# define distinct compartments
|
138
133
|
compartments_df = pd.DataFrame(
|
139
134
|
{
|
140
|
-
|
141
|
-
|
135
|
+
SBML_DFS.C_NAME: TRRUST_COMPARTMENT_NUCLEOPLASM,
|
136
|
+
SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
|
142
137
|
[
|
143
138
|
identifiers.format_uri(
|
144
139
|
uri=identifiers.create_uri_url(
|
@@ -159,7 +154,7 @@ def convert_trrust_to_sbml_dfs(
|
|
159
154
|
upstream_compartment=TRRUST_COMPARTMENT_NUCLEOPLASM,
|
160
155
|
downstream_compartment=TRRUST_COMPARTMENT_NUCLEOPLASM,
|
161
156
|
)
|
162
|
-
gene_gene_identifier_edgelist[
|
157
|
+
gene_gene_identifier_edgelist[SBML_DFS.R_NAME] = [
|
163
158
|
f"{x} {y} of {z}"
|
164
159
|
for x, y, z in zip(
|
165
160
|
gene_gene_identifier_edgelist[STRING_UPSTREAM_NAME],
|
@@ -171,15 +166,15 @@ def convert_trrust_to_sbml_dfs(
|
|
171
166
|
# convert relationships to SBO terms
|
172
167
|
interaction_edgelist = gene_gene_identifier_edgelist.replace(
|
173
168
|
{"sign": MINI_SBO_FROM_NAME}
|
174
|
-
).rename({"sign":
|
169
|
+
).rename({"sign": SBML_DFS.SBO_TERM}, axis=1)
|
175
170
|
|
176
171
|
# format pubmed identifiers of interactions
|
177
|
-
interaction_edgelist[
|
172
|
+
interaction_edgelist[SBML_DFS.R_IDENTIFIERS] = [
|
178
173
|
_format_pubmed_for_interactions(x) for x in interaction_edgelist["reference"]
|
179
174
|
]
|
180
175
|
|
181
176
|
# directionality: by default, set r_isreversible to False for TRRUST data
|
182
|
-
interaction_edgelist[
|
177
|
+
interaction_edgelist[SBML_DFS.R_ISREVERSIBLE] = False
|
183
178
|
|
184
179
|
# reduce to essential variables
|
185
180
|
interaction_edgelist = interaction_edgelist[
|
@@ -188,10 +183,10 @@ def convert_trrust_to_sbml_dfs(
|
|
188
183
|
STRING_DOWNSTREAM_NAME,
|
189
184
|
STRING_UPSTREAM_COMPARTMENT,
|
190
185
|
STRING_DOWNSTREAM_COMPARTMENT,
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
186
|
+
SBML_DFS.R_NAME,
|
187
|
+
SBML_DFS.SBO_TERM,
|
188
|
+
SBML_DFS.R_IDENTIFIERS,
|
189
|
+
SBML_DFS.R_ISREVERSIBLE,
|
195
190
|
]
|
196
191
|
]
|
197
192
|
|
@@ -277,7 +272,7 @@ def _format_pubmed_for_interactions(pubmed_set):
|
|
277
272
|
url = identifiers.create_uri_url(ontology="pubmed", identifier=p, strict=False)
|
278
273
|
if url is not None:
|
279
274
|
valid_url = identifiers.format_uri(
|
280
|
-
uri=url, biological_qualifier_type=
|
275
|
+
uri=url, biological_qualifier_type=BQB.IS_DESCRIBED_BY
|
281
276
|
)
|
282
277
|
|
283
278
|
ids.append(valid_url)
|
napistu/matching/interactions.py
CHANGED
@@ -10,9 +10,9 @@ from napistu import utils
|
|
10
10
|
from napistu import sbml_dfs_core
|
11
11
|
from napistu.matching.species import features_to_pathway_species
|
12
12
|
from napistu.constants import (
|
13
|
-
|
13
|
+
NAPISTU_EDGELIST_REQ_VARS,
|
14
14
|
IDENTIFIER_EDGELIST_REQ_VARS,
|
15
|
-
|
15
|
+
NAPISTU_EDGELIST,
|
16
16
|
SBML_DFS,
|
17
17
|
IDENTIFIERS,
|
18
18
|
)
|
@@ -56,8 +56,8 @@ def edgelist_to_pathway_species(
|
|
56
56
|
"""
|
57
57
|
|
58
58
|
required_vars_distinct_features = {
|
59
|
-
|
60
|
-
|
59
|
+
NAPISTU_EDGELIST.IDENTIFIER_UPSTREAM,
|
60
|
+
NAPISTU_EDGELIST.IDENTIFIER_DOWNSTREAM,
|
61
61
|
}
|
62
62
|
missing_required_vars_distinct_features = (
|
63
63
|
required_vars_distinct_features.difference(
|
@@ -76,8 +76,8 @@ def edgelist_to_pathway_species(
|
|
76
76
|
distinct_identifiers = (
|
77
77
|
pd.concat(
|
78
78
|
[
|
79
|
-
formatted_edgelist[
|
80
|
-
formatted_edgelist[
|
79
|
+
formatted_edgelist[NAPISTU_EDGELIST.IDENTIFIER_UPSTREAM],
|
80
|
+
formatted_edgelist[NAPISTU_EDGELIST.IDENTIFIER_DOWNSTREAM],
|
81
81
|
]
|
82
82
|
)
|
83
83
|
.drop_duplicates()
|
@@ -99,16 +99,16 @@ def edgelist_to_pathway_species(
|
|
99
99
|
edges_on_pathway = formatted_edgelist.merge(
|
100
100
|
features_on_pathway[[SBML_DFS.S_ID, IDENTIFIERS.IDENTIFIER]].rename(
|
101
101
|
{
|
102
|
-
SBML_DFS.S_ID:
|
103
|
-
IDENTIFIERS.IDENTIFIER:
|
102
|
+
SBML_DFS.S_ID: NAPISTU_EDGELIST.S_ID_UPSTREAM,
|
103
|
+
IDENTIFIERS.IDENTIFIER: NAPISTU_EDGELIST.IDENTIFIER_UPSTREAM,
|
104
104
|
},
|
105
105
|
axis=1,
|
106
106
|
)
|
107
107
|
).merge(
|
108
108
|
features_on_pathway[[SBML_DFS.S_ID, IDENTIFIERS.IDENTIFIER]].rename(
|
109
109
|
{
|
110
|
-
SBML_DFS.S_ID:
|
111
|
-
IDENTIFIERS.IDENTIFIER:
|
110
|
+
SBML_DFS.S_ID: NAPISTU_EDGELIST.S_ID_DOWNSTREAM,
|
111
|
+
IDENTIFIERS.IDENTIFIER: NAPISTU_EDGELIST.IDENTIFIER_DOWNSTREAM,
|
112
112
|
},
|
113
113
|
axis=1,
|
114
114
|
)
|
@@ -159,15 +159,15 @@ def edgelist_to_scids(
|
|
159
159
|
|
160
160
|
# expand from s_ids to sc_ids
|
161
161
|
s_id_pairs = edges_on_pathway[
|
162
|
-
[
|
162
|
+
[NAPISTU_EDGELIST.S_ID_UPSTREAM, NAPISTU_EDGELIST.S_ID_DOWNSTREAM]
|
163
163
|
].drop_duplicates()
|
164
164
|
sc_id_pairs = s_id_pairs.merge(
|
165
165
|
sbml_dfs.compartmentalized_species[[SBML_DFS.S_ID]]
|
166
166
|
.reset_index()
|
167
167
|
.rename(
|
168
168
|
{
|
169
|
-
SBML_DFS.S_ID:
|
170
|
-
SBML_DFS.SC_ID:
|
169
|
+
SBML_DFS.S_ID: NAPISTU_EDGELIST.S_ID_UPSTREAM,
|
170
|
+
SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_UPSTREAM,
|
171
171
|
},
|
172
172
|
axis=1,
|
173
173
|
)
|
@@ -176,8 +176,8 @@ def edgelist_to_scids(
|
|
176
176
|
.reset_index()
|
177
177
|
.rename(
|
178
178
|
{
|
179
|
-
SBML_DFS.S_ID:
|
180
|
-
SBML_DFS.SC_ID:
|
179
|
+
SBML_DFS.S_ID: NAPISTU_EDGELIST.S_ID_DOWNSTREAM,
|
180
|
+
SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_DOWNSTREAM,
|
181
181
|
},
|
182
182
|
axis=1,
|
183
183
|
)
|
@@ -232,7 +232,7 @@ def filter_to_direct_mechanistic_interactions(
|
|
232
232
|
)
|
233
233
|
|
234
234
|
# reduce to distinct sc_id pairs
|
235
|
-
sc_id_pairs = edgelist_w_scids[list(
|
235
|
+
sc_id_pairs = edgelist_w_scids[list(NAPISTU_EDGELIST_REQ_VARS)].drop_duplicates()
|
236
236
|
|
237
237
|
# define all existing direct regulatory interactions
|
238
238
|
pathway_interactions = pd.concat(
|
@@ -241,36 +241,36 @@ def filter_to_direct_mechanistic_interactions(
|
|
241
241
|
sbml_dfs.reaction_species[
|
242
242
|
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] == 0
|
243
243
|
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
|
244
|
-
.rename({SBML_DFS.SC_ID:
|
244
|
+
.rename({SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_UPSTREAM}, axis=1)
|
245
245
|
.merge(
|
246
246
|
sbml_dfs.reaction_species[
|
247
247
|
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] < 0
|
248
248
|
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
|
249
|
-
{SBML_DFS.SC_ID:
|
249
|
+
{SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
|
250
250
|
)
|
251
251
|
),
|
252
252
|
# pair <0 -> >0 # substrates affect products
|
253
253
|
sbml_dfs.reaction_species[
|
254
254
|
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] < 0
|
255
255
|
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
|
256
|
-
.rename({SBML_DFS.SC_ID:
|
256
|
+
.rename({SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_UPSTREAM}, axis=1)
|
257
257
|
.merge(
|
258
258
|
sbml_dfs.reaction_species[
|
259
259
|
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] > 0
|
260
260
|
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
|
261
|
-
{SBML_DFS.SC_ID:
|
261
|
+
{SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
|
262
262
|
)
|
263
263
|
),
|
264
264
|
# pair 0 -> >0 # modifiers affect products
|
265
265
|
sbml_dfs.reaction_species[
|
266
266
|
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] == 0
|
267
267
|
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
|
268
|
-
.rename({SBML_DFS.SC_ID:
|
268
|
+
.rename({SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_UPSTREAM}, axis=1)
|
269
269
|
.merge(
|
270
270
|
sbml_dfs.reaction_species[
|
271
271
|
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] > 0
|
272
272
|
][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
|
273
|
-
{SBML_DFS.SC_ID:
|
273
|
+
{SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
|
274
274
|
)
|
275
275
|
),
|
276
276
|
]
|
@@ -282,16 +282,16 @@ def filter_to_direct_mechanistic_interactions(
|
|
282
282
|
.merge(
|
283
283
|
sbml_dfs.species[SBML_DFS.S_NAME]
|
284
284
|
.to_frame()
|
285
|
-
.rename({SBML_DFS.S_NAME:
|
286
|
-
left_on=
|
285
|
+
.rename({SBML_DFS.S_NAME: NAPISTU_EDGELIST.S_NAME_UPSTREAM}, axis=1),
|
286
|
+
left_on=NAPISTU_EDGELIST.S_ID_UPSTREAM,
|
287
287
|
right_index=True,
|
288
288
|
# add species metadata for matches
|
289
289
|
)
|
290
290
|
.merge(
|
291
291
|
sbml_dfs.species[SBML_DFS.S_NAME]
|
292
292
|
.to_frame()
|
293
|
-
.rename({SBML_DFS.S_NAME:
|
294
|
-
left_on=
|
293
|
+
.rename({SBML_DFS.S_NAME: NAPISTU_EDGELIST.S_NAME_DOWNSTREAM}, axis=1),
|
294
|
+
left_on=NAPISTU_EDGELIST.S_ID_DOWNSTREAM,
|
295
295
|
right_index=True,
|
296
296
|
# add metadata for reactions where interaction occurs
|
297
297
|
)
|
@@ -305,11 +305,11 @@ def filter_to_direct_mechanistic_interactions(
|
|
305
305
|
edgelist_w_direct_mechanistic_interactions = edgelist_w_scids.merge(
|
306
306
|
direct_edge_interactions[
|
307
307
|
[
|
308
|
-
|
309
|
-
|
308
|
+
NAPISTU_EDGELIST.SC_ID_UPSTREAM,
|
309
|
+
NAPISTU_EDGELIST.SC_ID_DOWNSTREAM,
|
310
310
|
SBML_DFS.R_ID,
|
311
|
-
|
312
|
-
|
311
|
+
NAPISTU_EDGELIST.S_NAME_UPSTREAM,
|
312
|
+
NAPISTU_EDGELIST.S_NAME_DOWNSTREAM,
|
313
313
|
SBML_DFS.R_NAME,
|
314
314
|
]
|
315
315
|
]
|
@@ -369,23 +369,25 @@ def filter_to_indirect_mechanistic_interactions(
|
|
369
369
|
edgelist_w_scids = paths._filter_paths_by_precomputed_distances(
|
370
370
|
edgelist_w_scids.rename(
|
371
371
|
{
|
372
|
-
|
373
|
-
|
372
|
+
NAPISTU_EDGELIST.SC_ID_UPSTREAM: NAPISTU_EDGELIST.SC_ID_ORIGIN,
|
373
|
+
NAPISTU_EDGELIST.SC_ID_DOWNSTREAM: NAPISTU_EDGELIST.SC_ID_DEST,
|
374
374
|
},
|
375
375
|
axis=1,
|
376
376
|
),
|
377
377
|
precomputed_distances,
|
378
378
|
).rename(
|
379
379
|
{
|
380
|
-
|
381
|
-
|
380
|
+
NAPISTU_EDGELIST.SC_ID_ORIGIN: NAPISTU_EDGELIST.SC_ID_UPSTREAM,
|
381
|
+
NAPISTU_EDGELIST.SC_ID_DEST: NAPISTU_EDGELIST.SC_ID_DOWNSTREAM,
|
382
382
|
},
|
383
383
|
axis=1,
|
384
384
|
)
|
385
385
|
|
386
386
|
# find paths from 1 upstream to all desired downstream sc_ids
|
387
387
|
# (this is the convention with igraph)
|
388
|
-
indexed_origin_vertices = edgelist_w_scids.set_index(
|
388
|
+
indexed_origin_vertices = edgelist_w_scids.set_index(
|
389
|
+
NAPISTU_EDGELIST.SC_ID_UPSTREAM
|
390
|
+
)
|
389
391
|
|
390
392
|
# loop through upstream cspecies and find paths to all downstream species
|
391
393
|
global_dict = dict()
|
@@ -410,7 +412,7 @@ def filter_to_indirect_mechanistic_interactions(
|
|
410
412
|
sbml_dfs,
|
411
413
|
origin=an_origin_index,
|
412
414
|
# find all unique destinations (as a list for compatibility with igraph dest)
|
413
|
-
dest=origin_targets[
|
415
|
+
dest=origin_targets[NAPISTU_EDGELIST.SC_ID_DOWNSTREAM].unique().tolist(),
|
414
416
|
weight_var=NAPISTU_GRAPH_EDGES.WEIGHTS,
|
415
417
|
)
|
416
418
|
|
@@ -483,7 +485,7 @@ def filter_to_indirect_mechanistic_interactions(
|
|
483
485
|
|
484
486
|
indirect_shortest_paths = edgelist_w_scids.merge(
|
485
487
|
all_shortest_paths,
|
486
|
-
left_on=[
|
488
|
+
left_on=[NAPISTU_EDGELIST.SC_ID_UPSTREAM, NAPISTU_EDGELIST.SC_ID_DOWNSTREAM],
|
487
489
|
right_on=[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO],
|
488
490
|
)
|
489
491
|
|
@@ -498,9 +500,9 @@ def _edgelist_to_scids_if_needed(
|
|
498
500
|
) -> pd.DataFrame:
|
499
501
|
"""Map a set of edgelist species to cspecies or skip if cspecies were provided."""
|
500
502
|
|
501
|
-
if utils.match_pd_vars(edgelist,
|
503
|
+
if utils.match_pd_vars(edgelist, NAPISTU_EDGELIST_REQ_VARS).are_present:
|
502
504
|
logger.info(
|
503
|
-
f"An edgelist with {', '.join(
|
505
|
+
f"An edgelist with {', '.join(NAPISTU_EDGELIST_REQ_VARS)} was provided; identifier matching will be skipped"
|
504
506
|
)
|
505
507
|
return edgelist
|
506
508
|
else:
|
napistu/matching/species.py
CHANGED
@@ -33,7 +33,7 @@ def features_to_pathway_species(
|
|
33
33
|
pd.Dataframe containing a "feature_identifiers_var" variable used to match entries
|
34
34
|
species_identifiers: pd.DataFrame
|
35
35
|
A table of molecular species identifiers produced from sbml_dfs.get_identifiers("species")
|
36
|
-
generally using
|
36
|
+
generally using sbml_dfs.export_sbml_dfs()
|
37
37
|
ontologies: set
|
38
38
|
A set of ontologies used to match features to pathway species
|
39
39
|
feature_identifiers_var: str
|
napistu/modify/gaps.py
CHANGED
@@ -20,6 +20,7 @@ from napistu.constants import MINI_SBO_FROM_NAME
|
|
20
20
|
from napistu.constants import SBOTERM_NAMES
|
21
21
|
from napistu.constants import SOURCE_SPEC
|
22
22
|
from napistu.ingestion.constants import EXCHANGE_COMPARTMENT
|
23
|
+
from napistu.network.constants import GRAPH_WIRING_APPROACHES
|
23
24
|
|
24
25
|
logger = logging.getLogger(__name__)
|
25
26
|
|
@@ -463,7 +464,7 @@ def _identify_species_needing_transport_reactions(
|
|
463
464
|
|
464
465
|
# create a directed graph
|
465
466
|
directed_graph = net_create.create_napistu_graph(
|
466
|
-
sbml_dfs, directed=True,
|
467
|
+
sbml_dfs, directed=True, wiring_approach=GRAPH_WIRING_APPROACHES.BIPARTITE
|
467
468
|
)
|
468
469
|
|
469
470
|
# consider each s_id and protein separately
|
napistu/network/constants.py
CHANGED
@@ -7,17 +7,6 @@ from types import SimpleNamespace
|
|
7
7
|
from napistu.constants import SBML_DFS
|
8
8
|
from napistu.constants import SBOTERM_NAMES
|
9
9
|
|
10
|
-
# Graph types
|
11
|
-
NAPISTU_GRAPH_TYPES = SimpleNamespace(
|
12
|
-
BIPARTITE="bipartite", REGULATORY="regulatory", SURROGATE="surrogate"
|
13
|
-
)
|
14
|
-
|
15
|
-
VALID_NAPISTU_GRAPH_TYPES = [
|
16
|
-
NAPISTU_GRAPH_TYPES.BIPARTITE,
|
17
|
-
NAPISTU_GRAPH_TYPES.REGULATORY,
|
18
|
-
NAPISTU_GRAPH_TYPES.SURROGATE,
|
19
|
-
]
|
20
|
-
|
21
10
|
NAPISTU_GRAPH = SimpleNamespace(VERTICES="vertices", EDGES="edges", METADATA="metadata")
|
22
11
|
|
23
12
|
NAPISTU_GRAPH_DIRECTEDNESS = SimpleNamespace(
|
@@ -57,6 +46,67 @@ VALID_NAPISTU_GRAPH_NODE_TYPES = [
|
|
57
46
|
NAPISTU_GRAPH_NODE_TYPES.SPECIES,
|
58
47
|
]
|
59
48
|
|
49
|
+
# translating an SBML_dfs -> NapistuGraph
|
50
|
+
|
51
|
+
GRAPH_WIRING_APPROACHES = SimpleNamespace(
|
52
|
+
BIPARTITE="bipartite", REGULATORY="regulatory", SURROGATE="surrogate"
|
53
|
+
)
|
54
|
+
|
55
|
+
VALID_GRAPH_WIRING_APPROACHES = list(GRAPH_WIRING_APPROACHES.__dict__.values())
|
56
|
+
|
57
|
+
GRAPH_WIRING_HIERARCHIES = {
|
58
|
+
# three tiers with reactions in the middle
|
59
|
+
# in a bipartite networks molecules are connected to reactions but not other molecules
|
60
|
+
GRAPH_WIRING_APPROACHES.BIPARTITE: [
|
61
|
+
[
|
62
|
+
SBOTERM_NAMES.CATALYST,
|
63
|
+
SBOTERM_NAMES.INHIBITOR,
|
64
|
+
SBOTERM_NAMES.INTERACTOR,
|
65
|
+
SBOTERM_NAMES.MODIFIER,
|
66
|
+
SBOTERM_NAMES.REACTANT,
|
67
|
+
SBOTERM_NAMES.STIMULATOR,
|
68
|
+
],
|
69
|
+
[NAPISTU_GRAPH_NODE_TYPES.REACTION],
|
70
|
+
[SBOTERM_NAMES.MODIFIED, SBOTERM_NAMES.PRODUCT],
|
71
|
+
],
|
72
|
+
# the regulatory graph defines a hierarchy of upstream and downstream
|
73
|
+
# entities in a reaction
|
74
|
+
# modifier/stimulator/inhibitor -> catalyst -> reactant -> reaction -> product
|
75
|
+
GRAPH_WIRING_APPROACHES.REGULATORY: [
|
76
|
+
[SBOTERM_NAMES.INHIBITOR, SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR],
|
77
|
+
[SBOTERM_NAMES.CATALYST],
|
78
|
+
[SBOTERM_NAMES.INTERACTOR, SBOTERM_NAMES.REACTANT],
|
79
|
+
[NAPISTU_GRAPH_NODE_TYPES.REACTION],
|
80
|
+
[SBOTERM_NAMES.MODIFIED, SBOTERM_NAMES.PRODUCT],
|
81
|
+
],
|
82
|
+
# an alternative layout to regulatory where enyzmes are downstream of substrates.
|
83
|
+
# this doesn't make much sense from a regulatory perspective because
|
84
|
+
# enzymes modify substrates not the other way around. but, its what one might
|
85
|
+
# expect if catalysts are a surrogate for reactions as is the case for metabolic
|
86
|
+
# network layouts
|
87
|
+
GRAPH_WIRING_APPROACHES.SURROGATE: [
|
88
|
+
[SBOTERM_NAMES.INHIBITOR, SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR],
|
89
|
+
[SBOTERM_NAMES.INTERACTOR, SBOTERM_NAMES.REACTANT],
|
90
|
+
[SBOTERM_NAMES.CATALYST],
|
91
|
+
[NAPISTU_GRAPH_NODE_TYPES.REACTION],
|
92
|
+
[SBOTERM_NAMES.MODIFIED, SBOTERM_NAMES.PRODUCT],
|
93
|
+
],
|
94
|
+
}
|
95
|
+
|
96
|
+
# when should reaction vertices be excluded from the graph?
|
97
|
+
|
98
|
+
DROP_REACTIONS_WHEN = SimpleNamespace(
|
99
|
+
ALWAYS="always",
|
100
|
+
# if there are 2 participants
|
101
|
+
EDGELIST="edgelist",
|
102
|
+
# if there are 2 participants which are both "interactor"
|
103
|
+
SAME_TIER="same_tier",
|
104
|
+
)
|
105
|
+
|
106
|
+
VALID_DROP_REACTIONS_WHEN = list(DROP_REACTIONS_WHEN.__dict__.values())
|
107
|
+
|
108
|
+
# adding weights to NapistuGraph
|
109
|
+
|
60
110
|
NAPISTU_WEIGHTING_STRATEGIES = SimpleNamespace(
|
61
111
|
CALIBRATED="calibrated", MIXED="mixed", TOPOLOGY="topology", UNWEIGHTED="unweighted"
|
62
112
|
)
|
@@ -116,40 +166,6 @@ VALID_NET_POLARITIES = [
|
|
116
166
|
NET_POLARITY.AMBIGUOUS_INHIBITION,
|
117
167
|
]
|
118
168
|
|
119
|
-
# the regulatory graph defines a hierarchy of upstream and downstream
|
120
|
-
# entities in a reaction
|
121
|
-
# modifier/stimulator/inhibitor -> catalyst -> reactant -> reaction -> product
|
122
|
-
|
123
|
-
REGULATORY_GRAPH_HIERARCHY = [
|
124
|
-
[SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR, SBOTERM_NAMES.INHIBITOR],
|
125
|
-
[SBOTERM_NAMES.CATALYST],
|
126
|
-
[SBOTERM_NAMES.REACTANT],
|
127
|
-
[NAPISTU_GRAPH_NODE_TYPES.REACTION],
|
128
|
-
# normally we don't expect interactors to be defined because they are handled by
|
129
|
-
# net_create._format_interactors_for_regulatory_graph() but include them here
|
130
|
-
# until Issue #102 is solved
|
131
|
-
[SBOTERM_NAMES.INTERACTOR],
|
132
|
-
[SBOTERM_NAMES.PRODUCT],
|
133
|
-
]
|
134
|
-
|
135
|
-
# an alternative layout to regulatory where enyzmes are downstream of substrates.
|
136
|
-
# this doesn't make much sense from a regulatory perspective because
|
137
|
-
# enzymes modify substrates not the other way around. but, its what one might
|
138
|
-
# expect if catalysts are a surrogate for reactions as is the case for metabolic
|
139
|
-
# network layouts
|
140
|
-
|
141
|
-
SURROGATE_GRAPH_HIERARCHY = [
|
142
|
-
[SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR, SBOTERM_NAMES.INHIBITOR],
|
143
|
-
[SBOTERM_NAMES.REACTANT],
|
144
|
-
[SBOTERM_NAMES.CATALYST],
|
145
|
-
[NAPISTU_GRAPH_NODE_TYPES.REACTION],
|
146
|
-
# normally we don't expect interactors to be defined because they are handled by
|
147
|
-
# net_create._format_interactors_for_regulatory_graph() but include them here
|
148
|
-
# until Issue #102 is solved
|
149
|
-
[SBOTERM_NAMES.INTERACTOR],
|
150
|
-
[SBOTERM_NAMES.PRODUCT],
|
151
|
-
]
|
152
|
-
|
153
169
|
NEIGHBORHOOD_NETWORK_TYPES = SimpleNamespace(
|
154
170
|
DOWNSTREAM="downstream", HOURGLASS="hourglass", UPSTREAM="upstream"
|
155
171
|
)
|
napistu/network/data_handling.py
CHANGED
@@ -6,7 +6,7 @@ import pandas as pd
|
|
6
6
|
|
7
7
|
from napistu import sbml_dfs_core
|
8
8
|
from napistu.network import net_create
|
9
|
-
from napistu.network.
|
9
|
+
from napistu.network.ng_core import NapistuGraph
|
10
10
|
|
11
11
|
from napistu.constants import SBML_DFS, ENTITIES_W_DATA
|
12
12
|
from napistu.network.constants import NAPISTU_GRAPH, DEFAULT_WT_TRANS, WEIGHTING_SPEC
|
napistu/network/neighborhoods.py
CHANGED
@@ -22,7 +22,7 @@ from napistu.constants import SBML_DFS
|
|
22
22
|
from napistu.constants import MINI_SBO_NAME_TO_POLARITY
|
23
23
|
from napistu.constants import MINI_SBO_TO_NAME
|
24
24
|
|
25
|
-
from napistu.network.constants import
|
25
|
+
from napistu.network.constants import GRAPH_WIRING_APPROACHES
|
26
26
|
from napistu.network.constants import NEIGHBORHOOD_NETWORK_TYPES
|
27
27
|
from napistu.network.constants import VALID_NEIGHBORHOOD_NETWORK_TYPES
|
28
28
|
|
@@ -316,7 +316,7 @@ def create_neighborhood_prefix(network_type: str, order: int, top_n: int) -> str
|
|
316
316
|
def load_neighborhoods_by_partition(
|
317
317
|
selected_partition: int,
|
318
318
|
neighborhood_outdir: str,
|
319
|
-
|
319
|
+
wiring_approach: str = GRAPH_WIRING_APPROACHES.REGULATORY,
|
320
320
|
) -> None:
|
321
321
|
"""
|
322
322
|
Load Neighborhoods By Partition
|
@@ -380,7 +380,7 @@ def load_neighborhoods_by_partition(
|
|
380
380
|
model_prefix="curated",
|
381
381
|
network_dir=consensus_outdir,
|
382
382
|
directed=True,
|
383
|
-
|
383
|
+
wiring_approach=wiring_approach,
|
384
384
|
)
|
385
385
|
|
386
386
|
all_neighborhoods_df, neighborhoods_dict = load_neighborhoods(
|