napistu 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +38 -27
- napistu/consensus.py +22 -27
- napistu/constants.py +91 -65
- napistu/context/filtering.py +2 -1
- napistu/identifiers.py +3 -6
- napistu/indices.py +3 -1
- napistu/ingestion/bigg.py +6 -6
- napistu/ingestion/sbml.py +298 -295
- napistu/ingestion/string.py +16 -19
- napistu/ingestion/trrust.py +22 -27
- napistu/ingestion/yeast.py +2 -1
- napistu/matching/interactions.py +4 -4
- napistu/matching/species.py +1 -1
- napistu/modify/uncompartmentalize.py +1 -1
- napistu/network/net_create.py +1 -1
- napistu/network/paths.py +1 -1
- napistu/ontologies/dogma.py +2 -1
- napistu/ontologies/genodexito.py +5 -1
- napistu/ontologies/renaming.py +4 -0
- napistu/sbml_dfs_core.py +1343 -2167
- napistu/sbml_dfs_utils.py +1086 -143
- napistu/utils.py +52 -41
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/METADATA +2 -2
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/RECORD +40 -40
- tests/conftest.py +113 -13
- tests/test_consensus.py +161 -4
- tests/test_context_filtering.py +2 -2
- tests/test_gaps.py +26 -15
- tests/test_network_net_create.py +1 -1
- tests/test_network_precompute.py +1 -1
- tests/test_ontologies_genodexito.py +3 -0
- tests/test_ontologies_mygene.py +3 -0
- tests/test_ontologies_renaming.py +28 -24
- tests/test_sbml_dfs_core.py +260 -211
- tests/test_sbml_dfs_utils.py +194 -36
- tests/test_utils.py +19 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/WHEEL +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/top_level.txt +0 -0
napistu/ingestion/string.py
CHANGED
@@ -5,17 +5,14 @@ import logging
|
|
5
5
|
import pandas as pd
|
6
6
|
from napistu import identifiers
|
7
7
|
from napistu import sbml_dfs_core
|
8
|
+
from napistu import sbml_dfs_utils
|
8
9
|
from napistu import source
|
9
10
|
from napistu import utils
|
11
|
+
from napistu.ingestion import napistu_edgelist
|
10
12
|
from napistu.constants import BQB
|
11
13
|
from napistu.constants import MINI_SBO_FROM_NAME
|
12
|
-
from napistu.
|
13
|
-
from napistu.
|
14
|
-
from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
|
15
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
|
16
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
|
17
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
|
18
|
-
from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
|
14
|
+
from napistu.constants import ONTOLOGIES
|
15
|
+
from napistu.constants import SBML_DFS
|
19
16
|
from napistu.ingestion.constants import STRING_DOWNSTREAM_COMPARTMENT
|
20
17
|
from napistu.ingestion.constants import STRING_DOWNSTREAM_NAME
|
21
18
|
from napistu.ingestion.constants import STRING_PROTEIN_ID
|
@@ -136,10 +133,10 @@ def convert_string_to_sbml_dfs(
|
|
136
133
|
|
137
134
|
# define identifier mapping from aliases to use:
|
138
135
|
alias_to_identifier = {
|
139
|
-
"Ensembl_gene": (
|
140
|
-
"Ensembl_transcript": (
|
141
|
-
"Ensembl_translation": (
|
142
|
-
"Ensembl_UniProt_AC": (
|
136
|
+
"Ensembl_gene": (ONTOLOGIES.ENSEMBL_GENE, BQB.IS_ENCODED_BY),
|
137
|
+
"Ensembl_transcript": (ONTOLOGIES.ENSEMBL_TRANSCRIPT, BQB.IS_ENCODED_BY),
|
138
|
+
"Ensembl_translation": (ONTOLOGIES.ENSEMBL_PROTEIN, BQB.IS),
|
139
|
+
"Ensembl_UniProt_AC": (ONTOLOGIES.UNIPROT, BQB.IS),
|
143
140
|
}
|
144
141
|
|
145
142
|
# filter aliases to only keep required ones
|
@@ -158,7 +155,7 @@ def convert_string_to_sbml_dfs(
|
|
158
155
|
# Define compartments
|
159
156
|
# Currently we are mapping everything to the `CELLULAR_COMPONENT`
|
160
157
|
# which is a catch-all go: for unknown localisation
|
161
|
-
compartments_df =
|
158
|
+
compartments_df = sbml_dfs_utils.stub_compartments()
|
162
159
|
|
163
160
|
# define interactions
|
164
161
|
interaction_edgelist = _build_interactor_edgelist(uq_string_edgelist)
|
@@ -275,17 +272,17 @@ def _build_species_df(
|
|
275
272
|
species_df = (
|
276
273
|
pd.Series(
|
277
274
|
list(set(edgelist[source_col]).union(edgelist[target_col])),
|
278
|
-
name=
|
275
|
+
name=SBML_DFS.S_NAME,
|
279
276
|
)
|
280
277
|
.to_frame()
|
281
|
-
.set_index(
|
278
|
+
.set_index(SBML_DFS.S_NAME, drop=False)
|
282
279
|
.apply(
|
283
280
|
_get_identifiers,
|
284
281
|
alias_to_identifier=alias_to_identifier,
|
285
282
|
dat_alias=aliases,
|
286
283
|
axis=1,
|
287
284
|
)
|
288
|
-
.rename(
|
285
|
+
.rename(SBML_DFS.S_IDENTIFIERS)
|
289
286
|
.reset_index()
|
290
287
|
)
|
291
288
|
return species_df
|
@@ -311,8 +308,8 @@ def _build_interactor_edgelist(
|
|
311
308
|
**{
|
312
309
|
STRING_UPSTREAM_COMPARTMENT: compartment,
|
313
310
|
STRING_DOWNSTREAM_COMPARTMENT: compartment,
|
314
|
-
|
315
|
-
|
311
|
+
SBML_DFS.SBO_TERM: sbo_interactor,
|
312
|
+
SBML_DFS.R_IDENTIFIERS: lambda x: identifiers.Identifiers([]),
|
316
313
|
}
|
317
314
|
)
|
318
315
|
if add_reverse_interactions:
|
@@ -335,10 +332,10 @@ def _build_interactor_edgelist(
|
|
335
332
|
)
|
336
333
|
|
337
334
|
interaction_edgelist = dat
|
338
|
-
interaction_edgelist[
|
335
|
+
interaction_edgelist[SBML_DFS.R_NAME] = _build_string_reaction_name(
|
339
336
|
dat[STRING_UPSTREAM_NAME], dat[STRING_DOWNSTREAM_NAME]
|
340
337
|
)
|
341
|
-
interaction_edgelist[
|
338
|
+
interaction_edgelist[SBML_DFS.R_ISREVERSIBLE] = True
|
342
339
|
|
343
340
|
return interaction_edgelist
|
344
341
|
|
napistu/ingestion/trrust.py
CHANGED
@@ -8,16 +8,11 @@ from napistu import identifiers
|
|
8
8
|
from napistu import sbml_dfs_core
|
9
9
|
from napistu import source
|
10
10
|
from napistu import utils
|
11
|
+
from napistu.constants import BQB
|
12
|
+
from napistu.constants import IDENTIFIERS
|
11
13
|
from napistu.constants import MINI_SBO_FROM_NAME
|
12
14
|
from napistu.constants import SBOTERM_NAMES
|
13
|
-
from napistu.
|
14
|
-
from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_NAME
|
15
|
-
from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
|
16
|
-
from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
|
17
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
|
18
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
|
19
|
-
from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
|
20
|
-
from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
|
15
|
+
from napistu.constants import SBML_DFS
|
21
16
|
from napistu.ingestion.constants import SPECIES_FULL_NAME_HUMAN
|
22
17
|
from napistu.ingestion.constants import STRING_DOWNSTREAM_COMPARTMENT
|
23
18
|
from napistu.ingestion.constants import STRING_DOWNSTREAM_NAME
|
@@ -81,16 +76,16 @@ def convert_trrust_to_sbml_dfs(
|
|
81
76
|
species_df = (
|
82
77
|
pd.DataFrame(
|
83
78
|
{
|
84
|
-
|
79
|
+
SBML_DFS.S_NAME: list(
|
85
80
|
{*edge_summaries_df["from"], *edge_summaries_df["to"]}
|
86
81
|
)
|
87
82
|
}
|
88
83
|
)
|
89
84
|
.merge(
|
90
|
-
uniprot_2_symbol.rename({TRRUST_SYMBOL:
|
85
|
+
uniprot_2_symbol.rename({TRRUST_SYMBOL: SBML_DFS.S_NAME}, axis=1),
|
91
86
|
how="left",
|
92
87
|
)
|
93
|
-
.set_index(
|
88
|
+
.set_index(SBML_DFS.S_NAME)
|
94
89
|
)
|
95
90
|
|
96
91
|
# create Identifiers objects for all species with uniprot IDs
|
@@ -106,14 +101,14 @@ def convert_trrust_to_sbml_dfs(
|
|
106
101
|
[
|
107
102
|
identifiers.Identifiers(
|
108
103
|
[
|
109
|
-
identifiers.format_uri(uri=x, biological_qualifier_type=
|
110
|
-
for x in species_w_ids.loc[[ind]][
|
104
|
+
identifiers.format_uri(uri=x, biological_qualifier_type=BQB.IS)
|
105
|
+
for x in species_w_ids.loc[[ind]][IDENTIFIERS.URL].tolist()
|
111
106
|
]
|
112
107
|
)
|
113
108
|
for ind in species_w_ids.index.unique()
|
114
109
|
],
|
115
110
|
index=species_w_ids.index.unique(),
|
116
|
-
).rename(
|
111
|
+
).rename(SBML_DFS.S_IDENTIFIERS)
|
117
112
|
|
118
113
|
# just retain s_name and s_Identifiers
|
119
114
|
# this just needs a source object which will be added later
|
@@ -124,21 +119,21 @@ def convert_trrust_to_sbml_dfs(
|
|
124
119
|
.merge(
|
125
120
|
species_w_ids_series,
|
126
121
|
how="left",
|
127
|
-
left_on=
|
122
|
+
left_on=SBML_DFS.S_NAME,
|
128
123
|
right_index=True,
|
129
124
|
)
|
130
125
|
.reset_index(drop=True)
|
131
126
|
)
|
132
127
|
# stub genes with missing IDs
|
133
|
-
species_df[
|
128
|
+
species_df[SBML_DFS.S_IDENTIFIERS] = species_df[SBML_DFS.S_IDENTIFIERS].fillna( # type: ignore
|
134
129
|
value=identifiers.Identifiers([])
|
135
130
|
)
|
136
131
|
|
137
132
|
# define distinct compartments
|
138
133
|
compartments_df = pd.DataFrame(
|
139
134
|
{
|
140
|
-
|
141
|
-
|
135
|
+
SBML_DFS.C_NAME: TRRUST_COMPARTMENT_NUCLEOPLASM,
|
136
|
+
SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
|
142
137
|
[
|
143
138
|
identifiers.format_uri(
|
144
139
|
uri=identifiers.create_uri_url(
|
@@ -159,7 +154,7 @@ def convert_trrust_to_sbml_dfs(
|
|
159
154
|
upstream_compartment=TRRUST_COMPARTMENT_NUCLEOPLASM,
|
160
155
|
downstream_compartment=TRRUST_COMPARTMENT_NUCLEOPLASM,
|
161
156
|
)
|
162
|
-
gene_gene_identifier_edgelist[
|
157
|
+
gene_gene_identifier_edgelist[SBML_DFS.R_NAME] = [
|
163
158
|
f"{x} {y} of {z}"
|
164
159
|
for x, y, z in zip(
|
165
160
|
gene_gene_identifier_edgelist[STRING_UPSTREAM_NAME],
|
@@ -171,15 +166,15 @@ def convert_trrust_to_sbml_dfs(
|
|
171
166
|
# convert relationships to SBO terms
|
172
167
|
interaction_edgelist = gene_gene_identifier_edgelist.replace(
|
173
168
|
{"sign": MINI_SBO_FROM_NAME}
|
174
|
-
).rename({"sign":
|
169
|
+
).rename({"sign": SBML_DFS.SBO_TERM}, axis=1)
|
175
170
|
|
176
171
|
# format pubmed identifiers of interactions
|
177
|
-
interaction_edgelist[
|
172
|
+
interaction_edgelist[SBML_DFS.R_IDENTIFIERS] = [
|
178
173
|
_format_pubmed_for_interactions(x) for x in interaction_edgelist["reference"]
|
179
174
|
]
|
180
175
|
|
181
176
|
# directionality: by default, set r_isreversible to False for TRRUST data
|
182
|
-
interaction_edgelist[
|
177
|
+
interaction_edgelist[SBML_DFS.R_ISREVERSIBLE] = False
|
183
178
|
|
184
179
|
# reduce to essential variables
|
185
180
|
interaction_edgelist = interaction_edgelist[
|
@@ -188,10 +183,10 @@ def convert_trrust_to_sbml_dfs(
|
|
188
183
|
STRING_DOWNSTREAM_NAME,
|
189
184
|
STRING_UPSTREAM_COMPARTMENT,
|
190
185
|
STRING_DOWNSTREAM_COMPARTMENT,
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
186
|
+
SBML_DFS.R_NAME,
|
187
|
+
SBML_DFS.SBO_TERM,
|
188
|
+
SBML_DFS.R_IDENTIFIERS,
|
189
|
+
SBML_DFS.R_ISREVERSIBLE,
|
195
190
|
]
|
196
191
|
]
|
197
192
|
|
@@ -277,7 +272,7 @@ def _format_pubmed_for_interactions(pubmed_set):
|
|
277
272
|
url = identifiers.create_uri_url(ontology="pubmed", identifier=p, strict=False)
|
278
273
|
if url is not None:
|
279
274
|
valid_url = identifiers.format_uri(
|
280
|
-
uri=url, biological_qualifier_type=
|
275
|
+
uri=url, biological_qualifier_type=BQB.IS_DESCRIBED_BY
|
281
276
|
)
|
282
277
|
|
283
278
|
ids.append(valid_url)
|
napistu/ingestion/yeast.py
CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
import pandas as pd
|
4
4
|
from napistu import identifiers
|
5
5
|
from napistu import sbml_dfs_core
|
6
|
+
from napistu import sbml_dfs_utils
|
6
7
|
from napistu import source
|
7
8
|
from napistu import utils
|
8
9
|
from napistu.constants import BQB
|
@@ -105,7 +106,7 @@ def convert_idea_kinetics_to_sbml_dfs(
|
|
105
106
|
# Constant fields (for this data source)
|
106
107
|
|
107
108
|
# setup compartments (just treat this as uncompartmentalized for now)
|
108
|
-
compartments_df =
|
109
|
+
compartments_df = sbml_dfs_utils.stub_compartments()
|
109
110
|
|
110
111
|
# Per convention unaggregated models receive an empty source
|
111
112
|
interaction_source = source.Source(init=True)
|
napistu/matching/interactions.py
CHANGED
@@ -40,7 +40,7 @@ def edgelist_to_pathway_species(
|
|
40
40
|
pd.Dataframe containing a "identifier_upstream" and "identifier_downstream" variables used to to match entries
|
41
41
|
species_identifiers: pd.DataFrame
|
42
42
|
A table of molecular species identifiers produced from sbml_dfs.get_identifiers("species") generally using
|
43
|
-
|
43
|
+
sbml_dfs.export_sbml_dfs()
|
44
44
|
ontologies: set
|
45
45
|
A set of ontologies used to match features to pathway species
|
46
46
|
feature_id_var: str, default=FEATURE_ID_VAR_DEFAULT
|
@@ -138,7 +138,7 @@ def edgelist_to_scids(
|
|
138
138
|
A mechanistic model
|
139
139
|
species_identifiers: pd.DataFrame
|
140
140
|
A table of molecular species identifiers produced from
|
141
|
-
sbml_dfs.get_identifiers("species") generally using
|
141
|
+
sbml_dfs.get_identifiers("species") generally using sbml_dfs.export_sbml_dfs()
|
142
142
|
ontologies: set
|
143
143
|
A set of ontologies used to match features to pathway species
|
144
144
|
|
@@ -218,7 +218,7 @@ def filter_to_direct_mechanistic_interactions(
|
|
218
218
|
species_identifiers: pd.DataFrame
|
219
219
|
A table of molecular species identifiers
|
220
220
|
produced from sbml_dfs.get_identifiers("species") generally
|
221
|
-
using
|
221
|
+
using sbml_dfs.export_sbml_dfs()
|
222
222
|
ontologies: set
|
223
223
|
A set of ontologies used to match features to pathway species
|
224
224
|
|
@@ -342,7 +342,7 @@ def filter_to_indirect_mechanistic_interactions(
|
|
342
342
|
A mechanistic model
|
343
343
|
species_identifiers: pandas.DataFrame
|
344
344
|
A table of molecular species identifiers produced from
|
345
|
-
sbml_dfs.get_identifiers("species") generally using
|
345
|
+
sbml_dfs.get_identifiers("species") generally using sbml_dfs.export_sbml_dfs()
|
346
346
|
napistu_graph: igraph.Graph
|
347
347
|
A network representation of the sbml_dfs model
|
348
348
|
ontologies: set
|
napistu/matching/species.py
CHANGED
@@ -33,7 +33,7 @@ def features_to_pathway_species(
|
|
33
33
|
pd.Dataframe containing a "feature_identifiers_var" variable used to match entries
|
34
34
|
species_identifiers: pd.DataFrame
|
35
35
|
A table of molecular species identifiers produced from sbml_dfs.get_identifiers("species")
|
36
|
-
generally using
|
36
|
+
generally using sbml_dfs.export_sbml_dfs()
|
37
37
|
ontologies: set
|
38
38
|
A set of ontologies used to match features to pathway species
|
39
39
|
feature_identifiers_var: str
|
@@ -48,7 +48,7 @@ def uncompartmentalize_sbml_dfs(
|
|
48
48
|
)
|
49
49
|
|
50
50
|
# 1. update the compartments table to the stubbed default level: GO CELLULAR_COMPONENT
|
51
|
-
stubbed_compartment =
|
51
|
+
stubbed_compartment = sbml_dfs_utils.stub_compartments().assign(
|
52
52
|
c_Source=_create_stubbed_source()
|
53
53
|
)
|
54
54
|
|
napistu/network/net_create.py
CHANGED
@@ -1697,7 +1697,7 @@ def _create_topology_weights(
|
|
1697
1697
|
base_score (float): offset which will be added to all weights.
|
1698
1698
|
protein_multiplier (int): multiplier for non-metabolite species (lower weight paths will tend to be selected).
|
1699
1699
|
metabolite_multiplier (int): multiplier for metabolites [defined a species with a ChEBI ID).
|
1700
|
-
unknown_multiplier (int): multiplier for species without any identifier. See
|
1700
|
+
unknown_multiplier (int): multiplier for species without any identifier. See sbml_dfs_utils.species_type_types.
|
1701
1701
|
scale_multiplier_by_meandegree (bool): if True then multipliers will be rescaled by the average number of
|
1702
1702
|
connections a node has (i.e., its degree) so that weights will be relatively similar regardless of network
|
1703
1703
|
size and sparsity.
|
napistu/network/paths.py
CHANGED
@@ -489,7 +489,7 @@ def _label_path_reactions(sbml_dfs: sbml_dfs_core.SBML_dfs, paths_df: pd.DataFra
|
|
489
489
|
reaction_info = (
|
490
490
|
pd.concat(
|
491
491
|
[
|
492
|
-
|
492
|
+
sbml_dfs.reaction_formulas(r_ids=x)
|
493
493
|
for x in set(reaction_paths["node"])
|
494
494
|
]
|
495
495
|
)
|
napistu/ontologies/dogma.py
CHANGED
@@ -4,6 +4,7 @@ import logging
|
|
4
4
|
import pandas as pd
|
5
5
|
|
6
6
|
from napistu import sbml_dfs_core
|
7
|
+
from napistu import sbml_dfs_utils
|
7
8
|
from napistu import source
|
8
9
|
from napistu import identifiers
|
9
10
|
from napistu import utils
|
@@ -59,7 +60,7 @@ def create_dogmatic_sbml_dfs(
|
|
59
60
|
)
|
60
61
|
|
61
62
|
# stub required but invariant variables
|
62
|
-
compartments_df =
|
63
|
+
compartments_df = sbml_dfs_utils.stub_compartments()
|
63
64
|
interaction_source = source.Source(init=True)
|
64
65
|
|
65
66
|
# interactions table. This is required to create the sbml_dfs but we'll drop the info later
|
napistu/ontologies/genodexito.py
CHANGED
@@ -356,7 +356,7 @@ class Genodexito:
|
|
356
356
|
)
|
357
357
|
logger.debug(
|
358
358
|
f"{ids.shape[0] - expanded_ids.shape[0]} "
|
359
|
-
"ids are not included in expanded ids"
|
359
|
+
"ids are not included in expanded ids. These will be filled with empty Identifiers"
|
360
360
|
)
|
361
361
|
else:
|
362
362
|
matched_expanded_ids = expanded_ids
|
@@ -364,6 +364,10 @@ class Genodexito:
|
|
364
364
|
updated_ids = ids.drop(SBML_DFS.S_IDENTIFIERS, axis=1).join(
|
365
365
|
pd.DataFrame(matched_expanded_ids)
|
366
366
|
)
|
367
|
+
# fill missing attributes with empty Identifiers
|
368
|
+
updated_ids[SBML_DFS.S_IDENTIFIERS] = updated_ids[
|
369
|
+
SBML_DFS.S_IDENTIFIERS
|
370
|
+
].fillna(identifiers.Identifiers([]))
|
367
371
|
|
368
372
|
setattr(sbml_dfs, "species", updated_ids)
|
369
373
|
|
napistu/ontologies/renaming.py
CHANGED
@@ -72,6 +72,10 @@ def rename_species_ontologies(
|
|
72
72
|
updated_species = sbml_dfs.species.drop(SBML_DFS.S_IDENTIFIERS, axis=1).join(
|
73
73
|
pd.DataFrame(species_identifiers)
|
74
74
|
)
|
75
|
+
# fill missing attributes with empty Identifiers
|
76
|
+
updated_species[SBML_DFS.S_IDENTIFIERS] = updated_species[
|
77
|
+
SBML_DFS.S_IDENTIFIERS
|
78
|
+
].fillna(identifiers.Identifiers([]))
|
75
79
|
|
76
80
|
setattr(sbml_dfs, "species", updated_species)
|
77
81
|
|