napistu 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. napistu/__main__.py +38 -27
  2. napistu/consensus.py +22 -27
  3. napistu/constants.py +91 -65
  4. napistu/context/filtering.py +2 -1
  5. napistu/identifiers.py +3 -6
  6. napistu/indices.py +3 -1
  7. napistu/ingestion/bigg.py +6 -6
  8. napistu/ingestion/sbml.py +298 -295
  9. napistu/ingestion/string.py +16 -19
  10. napistu/ingestion/trrust.py +22 -27
  11. napistu/ingestion/yeast.py +2 -1
  12. napistu/matching/interactions.py +4 -4
  13. napistu/matching/species.py +1 -1
  14. napistu/modify/uncompartmentalize.py +1 -1
  15. napistu/network/net_create.py +1 -1
  16. napistu/network/paths.py +1 -1
  17. napistu/ontologies/dogma.py +2 -1
  18. napistu/ontologies/genodexito.py +5 -1
  19. napistu/ontologies/renaming.py +4 -0
  20. napistu/sbml_dfs_core.py +1343 -2167
  21. napistu/sbml_dfs_utils.py +1086 -143
  22. napistu/utils.py +52 -41
  23. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/METADATA +2 -2
  24. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/RECORD +40 -40
  25. tests/conftest.py +113 -13
  26. tests/test_consensus.py +161 -4
  27. tests/test_context_filtering.py +2 -2
  28. tests/test_gaps.py +26 -15
  29. tests/test_network_net_create.py +1 -1
  30. tests/test_network_precompute.py +1 -1
  31. tests/test_ontologies_genodexito.py +3 -0
  32. tests/test_ontologies_mygene.py +3 -0
  33. tests/test_ontologies_renaming.py +28 -24
  34. tests/test_sbml_dfs_core.py +260 -211
  35. tests/test_sbml_dfs_utils.py +194 -36
  36. tests/test_utils.py +19 -0
  37. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/WHEEL +0 -0
  38. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/entry_points.txt +0 -0
  39. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/licenses/LICENSE +0 -0
  40. {napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/top_level.txt +0 -0
@@ -5,17 +5,14 @@ import logging
5
5
  import pandas as pd
6
6
  from napistu import identifiers
7
7
  from napistu import sbml_dfs_core
8
+ from napistu import sbml_dfs_utils
8
9
  from napistu import source
9
10
  from napistu import utils
11
+ from napistu.ingestion import napistu_edgelist
10
12
  from napistu.constants import BQB
11
13
  from napistu.constants import MINI_SBO_FROM_NAME
12
- from napistu.ingestion import napistu_edgelist
13
- from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
14
- from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
15
- from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
16
- from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
17
- from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
18
- from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
14
+ from napistu.constants import ONTOLOGIES
15
+ from napistu.constants import SBML_DFS
19
16
  from napistu.ingestion.constants import STRING_DOWNSTREAM_COMPARTMENT
20
17
  from napistu.ingestion.constants import STRING_DOWNSTREAM_NAME
21
18
  from napistu.ingestion.constants import STRING_PROTEIN_ID
@@ -136,10 +133,10 @@ def convert_string_to_sbml_dfs(
136
133
 
137
134
  # define identifier mapping from aliases to use:
138
135
  alias_to_identifier = {
139
- "Ensembl_gene": ("ensembl_gene", BQB.IS_ENCODED_BY),
140
- "Ensembl_transcript": ("ensembl_transcript", BQB.IS_ENCODED_BY),
141
- "Ensembl_translation": ("ensembl_protein", BQB.IS),
142
- "Ensembl_UniProt_AC": ("uniprot", BQB.IS),
136
+ "Ensembl_gene": (ONTOLOGIES.ENSEMBL_GENE, BQB.IS_ENCODED_BY),
137
+ "Ensembl_transcript": (ONTOLOGIES.ENSEMBL_TRANSCRIPT, BQB.IS_ENCODED_BY),
138
+ "Ensembl_translation": (ONTOLOGIES.ENSEMBL_PROTEIN, BQB.IS),
139
+ "Ensembl_UniProt_AC": (ONTOLOGIES.UNIPROT, BQB.IS),
143
140
  }
144
141
 
145
142
  # filter aliases to only keep required ones
@@ -158,7 +155,7 @@ def convert_string_to_sbml_dfs(
158
155
  # Define compartments
159
156
  # Currently we are mapping everything to the `CELLULAR_COMPONENT`
160
157
  # which is a catch-all go: for unknown localisation
161
- compartments_df = sbml_dfs_core._stub_compartments()
158
+ compartments_df = sbml_dfs_utils.stub_compartments()
162
159
 
163
160
  # define interactions
164
161
  interaction_edgelist = _build_interactor_edgelist(uq_string_edgelist)
@@ -275,17 +272,17 @@ def _build_species_df(
275
272
  species_df = (
276
273
  pd.Series(
277
274
  list(set(edgelist[source_col]).union(edgelist[target_col])),
278
- name=SBML_SPECIES_DICT_NAME,
275
+ name=SBML_DFS.S_NAME,
279
276
  )
280
277
  .to_frame()
281
- .set_index(SBML_SPECIES_DICT_NAME, drop=False)
278
+ .set_index(SBML_DFS.S_NAME, drop=False)
282
279
  .apply(
283
280
  _get_identifiers,
284
281
  alias_to_identifier=alias_to_identifier,
285
282
  dat_alias=aliases,
286
283
  axis=1,
287
284
  )
288
- .rename(SBML_SPECIES_DICT_IDENTIFIERS)
285
+ .rename(SBML_DFS.S_IDENTIFIERS)
289
286
  .reset_index()
290
287
  )
291
288
  return species_df
@@ -311,8 +308,8 @@ def _build_interactor_edgelist(
311
308
  **{
312
309
  STRING_UPSTREAM_COMPARTMENT: compartment,
313
310
  STRING_DOWNSTREAM_COMPARTMENT: compartment,
314
- SMBL_REACTION_SPEC_SBO_TERM: sbo_interactor,
315
- SMBL_REACTION_DICT_IDENTIFIERS: lambda x: identifiers.Identifiers([]),
311
+ SBML_DFS.SBO_TERM: sbo_interactor,
312
+ SBML_DFS.R_IDENTIFIERS: lambda x: identifiers.Identifiers([]),
316
313
  }
317
314
  )
318
315
  if add_reverse_interactions:
@@ -335,10 +332,10 @@ def _build_interactor_edgelist(
335
332
  )
336
333
 
337
334
  interaction_edgelist = dat
338
- interaction_edgelist[SMBL_REACTION_DICT_NAME] = _build_string_reaction_name(
335
+ interaction_edgelist[SBML_DFS.R_NAME] = _build_string_reaction_name(
339
336
  dat[STRING_UPSTREAM_NAME], dat[STRING_DOWNSTREAM_NAME]
340
337
  )
341
- interaction_edgelist[SMBL_REACTION_DICT_IS_REVERSIBLE] = True
338
+ interaction_edgelist[SBML_DFS.R_ISREVERSIBLE] = True
342
339
 
343
340
  return interaction_edgelist
344
341
 
@@ -8,16 +8,11 @@ from napistu import identifiers
8
8
  from napistu import sbml_dfs_core
9
9
  from napistu import source
10
10
  from napistu import utils
11
+ from napistu.constants import BQB
12
+ from napistu.constants import IDENTIFIERS
11
13
  from napistu.constants import MINI_SBO_FROM_NAME
12
14
  from napistu.constants import SBOTERM_NAMES
13
- from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_IDENTIFIERS
14
- from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_NAME
15
- from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
16
- from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
17
- from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
18
- from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
19
- from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
20
- from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
15
+ from napistu.constants import SBML_DFS
21
16
  from napistu.ingestion.constants import SPECIES_FULL_NAME_HUMAN
22
17
  from napistu.ingestion.constants import STRING_DOWNSTREAM_COMPARTMENT
23
18
  from napistu.ingestion.constants import STRING_DOWNSTREAM_NAME
@@ -81,16 +76,16 @@ def convert_trrust_to_sbml_dfs(
81
76
  species_df = (
82
77
  pd.DataFrame(
83
78
  {
84
- SBML_SPECIES_DICT_NAME: list(
79
+ SBML_DFS.S_NAME: list(
85
80
  {*edge_summaries_df["from"], *edge_summaries_df["to"]}
86
81
  )
87
82
  }
88
83
  )
89
84
  .merge(
90
- uniprot_2_symbol.rename({TRRUST_SYMBOL: SBML_SPECIES_DICT_NAME}, axis=1),
85
+ uniprot_2_symbol.rename({TRRUST_SYMBOL: SBML_DFS.S_NAME}, axis=1),
91
86
  how="left",
92
87
  )
93
- .set_index(SBML_SPECIES_DICT_NAME)
88
+ .set_index(SBML_DFS.S_NAME)
94
89
  )
95
90
 
96
91
  # create Identifiers objects for all species with uniprot IDs
@@ -106,14 +101,14 @@ def convert_trrust_to_sbml_dfs(
106
101
  [
107
102
  identifiers.Identifiers(
108
103
  [
109
- identifiers.format_uri(uri=x, biological_qualifier_type="BQB_IS")
110
- for x in species_w_ids.loc[[ind]]["url"].tolist()
104
+ identifiers.format_uri(uri=x, biological_qualifier_type=BQB.IS)
105
+ for x in species_w_ids.loc[[ind]][IDENTIFIERS.URL].tolist()
111
106
  ]
112
107
  )
113
108
  for ind in species_w_ids.index.unique()
114
109
  ],
115
110
  index=species_w_ids.index.unique(),
116
- ).rename(SBML_SPECIES_DICT_IDENTIFIERS)
111
+ ).rename(SBML_DFS.S_IDENTIFIERS)
117
112
 
118
113
  # just retain s_name and s_Identifiers
119
114
  # this just needs a source object which will be added later
@@ -124,21 +119,21 @@ def convert_trrust_to_sbml_dfs(
124
119
  .merge(
125
120
  species_w_ids_series,
126
121
  how="left",
127
- left_on=SBML_SPECIES_DICT_NAME,
122
+ left_on=SBML_DFS.S_NAME,
128
123
  right_index=True,
129
124
  )
130
125
  .reset_index(drop=True)
131
126
  )
132
127
  # stub genes with missing IDs
133
- species_df[SBML_SPECIES_DICT_IDENTIFIERS] = species_df[SBML_SPECIES_DICT_IDENTIFIERS].fillna( # type: ignore
128
+ species_df[SBML_DFS.S_IDENTIFIERS] = species_df[SBML_DFS.S_IDENTIFIERS].fillna( # type: ignore
134
129
  value=identifiers.Identifiers([])
135
130
  )
136
131
 
137
132
  # define distinct compartments
138
133
  compartments_df = pd.DataFrame(
139
134
  {
140
- SBML_COMPARTMENT_DICT_NAME: TRRUST_COMPARTMENT_NUCLEOPLASM,
141
- SBML_COMPARTMENT_DICT_IDENTIFIERS: identifiers.Identifiers(
135
+ SBML_DFS.C_NAME: TRRUST_COMPARTMENT_NUCLEOPLASM,
136
+ SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
142
137
  [
143
138
  identifiers.format_uri(
144
139
  uri=identifiers.create_uri_url(
@@ -159,7 +154,7 @@ def convert_trrust_to_sbml_dfs(
159
154
  upstream_compartment=TRRUST_COMPARTMENT_NUCLEOPLASM,
160
155
  downstream_compartment=TRRUST_COMPARTMENT_NUCLEOPLASM,
161
156
  )
162
- gene_gene_identifier_edgelist[SMBL_REACTION_DICT_NAME] = [
157
+ gene_gene_identifier_edgelist[SBML_DFS.R_NAME] = [
163
158
  f"{x} {y} of {z}"
164
159
  for x, y, z in zip(
165
160
  gene_gene_identifier_edgelist[STRING_UPSTREAM_NAME],
@@ -171,15 +166,15 @@ def convert_trrust_to_sbml_dfs(
171
166
  # convert relationships to SBO terms
172
167
  interaction_edgelist = gene_gene_identifier_edgelist.replace(
173
168
  {"sign": MINI_SBO_FROM_NAME}
174
- ).rename({"sign": SMBL_REACTION_SPEC_SBO_TERM}, axis=1)
169
+ ).rename({"sign": SBML_DFS.SBO_TERM}, axis=1)
175
170
 
176
171
  # format pubmed identifiers of interactions
177
- interaction_edgelist[SMBL_REACTION_DICT_IDENTIFIERS] = [
172
+ interaction_edgelist[SBML_DFS.R_IDENTIFIERS] = [
178
173
  _format_pubmed_for_interactions(x) for x in interaction_edgelist["reference"]
179
174
  ]
180
175
 
181
176
  # directionality: by default, set r_isreversible to False for TRRUST data
182
- interaction_edgelist[SMBL_REACTION_DICT_IS_REVERSIBLE] = False
177
+ interaction_edgelist[SBML_DFS.R_ISREVERSIBLE] = False
183
178
 
184
179
  # reduce to essential variables
185
180
  interaction_edgelist = interaction_edgelist[
@@ -188,10 +183,10 @@ def convert_trrust_to_sbml_dfs(
188
183
  STRING_DOWNSTREAM_NAME,
189
184
  STRING_UPSTREAM_COMPARTMENT,
190
185
  STRING_DOWNSTREAM_COMPARTMENT,
191
- SMBL_REACTION_DICT_NAME,
192
- SMBL_REACTION_SPEC_SBO_TERM,
193
- SMBL_REACTION_DICT_IDENTIFIERS,
194
- SMBL_REACTION_DICT_IS_REVERSIBLE,
186
+ SBML_DFS.R_NAME,
187
+ SBML_DFS.SBO_TERM,
188
+ SBML_DFS.R_IDENTIFIERS,
189
+ SBML_DFS.R_ISREVERSIBLE,
195
190
  ]
196
191
  ]
197
192
 
@@ -277,7 +272,7 @@ def _format_pubmed_for_interactions(pubmed_set):
277
272
  url = identifiers.create_uri_url(ontology="pubmed", identifier=p, strict=False)
278
273
  if url is not None:
279
274
  valid_url = identifiers.format_uri(
280
- uri=url, biological_qualifier_type="BQB_IS_DESCRIBED_BY"
275
+ uri=url, biological_qualifier_type=BQB.IS_DESCRIBED_BY
281
276
  )
282
277
 
283
278
  ids.append(valid_url)
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import pandas as pd
4
4
  from napistu import identifiers
5
5
  from napistu import sbml_dfs_core
6
+ from napistu import sbml_dfs_utils
6
7
  from napistu import source
7
8
  from napistu import utils
8
9
  from napistu.constants import BQB
@@ -105,7 +106,7 @@ def convert_idea_kinetics_to_sbml_dfs(
105
106
  # Constant fields (for this data source)
106
107
 
107
108
  # setup compartments (just treat this as uncompartmentalized for now)
108
- compartments_df = sbml_dfs_core._stub_compartments()
109
+ compartments_df = sbml_dfs_utils.stub_compartments()
109
110
 
110
111
  # Per convention unaggregated models receive an empty source
111
112
  interaction_source = source.Source(init=True)
@@ -40,7 +40,7 @@ def edgelist_to_pathway_species(
40
40
  pd.Dataframe containing a "identifier_upstream" and "identifier_downstream" variables used to to match entries
41
41
  species_identifiers: pd.DataFrame
42
42
  A table of molecular species identifiers produced from sbml_dfs.get_identifiers("species") generally using
43
- sbml_dfs_core.export_sbml_dfs()
43
+ sbml_dfs.export_sbml_dfs()
44
44
  ontologies: set
45
45
  A set of ontologies used to match features to pathway species
46
46
  feature_id_var: str, default=FEATURE_ID_VAR_DEFAULT
@@ -138,7 +138,7 @@ def edgelist_to_scids(
138
138
  A mechanistic model
139
139
  species_identifiers: pd.DataFrame
140
140
  A table of molecular species identifiers produced from
141
- sbml_dfs.get_identifiers("species") generally using sbml_dfs_core.export_sbml_dfs()
141
+ sbml_dfs.get_identifiers("species") generally using sbml_dfs.export_sbml_dfs()
142
142
  ontologies: set
143
143
  A set of ontologies used to match features to pathway species
144
144
 
@@ -218,7 +218,7 @@ def filter_to_direct_mechanistic_interactions(
218
218
  species_identifiers: pd.DataFrame
219
219
  A table of molecular species identifiers
220
220
  produced from sbml_dfs.get_identifiers("species") generally
221
- using sbml_dfs_core.export_sbml_dfs()
221
+ using sbml_dfs.export_sbml_dfs()
222
222
  ontologies: set
223
223
  A set of ontologies used to match features to pathway species
224
224
 
@@ -342,7 +342,7 @@ def filter_to_indirect_mechanistic_interactions(
342
342
  A mechanistic model
343
343
  species_identifiers: pandas.DataFrame
344
344
  A table of molecular species identifiers produced from
345
- sbml_dfs.get_identifiers("species") generally using sbml_dfs_core.export_sbml_dfs()
345
+ sbml_dfs.get_identifiers("species") generally using sbml_dfs.export_sbml_dfs()
346
346
  napistu_graph: igraph.Graph
347
347
  A network representation of the sbml_dfs model
348
348
  ontologies: set
@@ -33,7 +33,7 @@ def features_to_pathway_species(
33
33
  pd.Dataframe containing a "feature_identifiers_var" variable used to match entries
34
34
  species_identifiers: pd.DataFrame
35
35
  A table of molecular species identifiers produced from sbml_dfs.get_identifiers("species")
36
- generally using sbml_dfs_core.export_sbml_dfs()
36
+ generally using sbml_dfs.export_sbml_dfs()
37
37
  ontologies: set
38
38
  A set of ontologies used to match features to pathway species
39
39
  feature_identifiers_var: str
@@ -48,7 +48,7 @@ def uncompartmentalize_sbml_dfs(
48
48
  )
49
49
 
50
50
  # 1. update the compartments table to the stubbed default level: GO CELLULAR_COMPONENT
51
- stubbed_compartment = sbml_dfs_core._stub_compartments().assign(
51
+ stubbed_compartment = sbml_dfs_utils.stub_compartments().assign(
52
52
  c_Source=_create_stubbed_source()
53
53
  )
54
54
 
@@ -1697,7 +1697,7 @@ def _create_topology_weights(
1697
1697
  base_score (float): offset which will be added to all weights.
1698
1698
  protein_multiplier (int): multiplier for non-metabolite species (lower weight paths will tend to be selected).
1699
1699
  metabolite_multiplier (int): multiplier for metabolites [defined a species with a ChEBI ID).
1700
- unknown_multiplier (int): multiplier for species without any identifier. See sbml_dfs_core.species_type_types.
1700
+ unknown_multiplier (int): multiplier for species without any identifier. See sbml_dfs_utils.species_type_types.
1701
1701
  scale_multiplier_by_meandegree (bool): if True then multipliers will be rescaled by the average number of
1702
1702
  connections a node has (i.e., its degree) so that weights will be relatively similar regardless of network
1703
1703
  size and sparsity.
napistu/network/paths.py CHANGED
@@ -489,7 +489,7 @@ def _label_path_reactions(sbml_dfs: sbml_dfs_core.SBML_dfs, paths_df: pd.DataFra
489
489
  reaction_info = (
490
490
  pd.concat(
491
491
  [
492
- sbml_dfs_core.reaction_summaries(sbml_dfs, r_ids=x)
492
+ sbml_dfs.reaction_formulas(r_ids=x)
493
493
  for x in set(reaction_paths["node"])
494
494
  ]
495
495
  )
@@ -4,6 +4,7 @@ import logging
4
4
  import pandas as pd
5
5
 
6
6
  from napistu import sbml_dfs_core
7
+ from napistu import sbml_dfs_utils
7
8
  from napistu import source
8
9
  from napistu import identifiers
9
10
  from napistu import utils
@@ -59,7 +60,7 @@ def create_dogmatic_sbml_dfs(
59
60
  )
60
61
 
61
62
  # stub required but invariant variables
62
- compartments_df = sbml_dfs_core._stub_compartments()
63
+ compartments_df = sbml_dfs_utils.stub_compartments()
63
64
  interaction_source = source.Source(init=True)
64
65
 
65
66
  # interactions table. This is required to create the sbml_dfs but we'll drop the info later
@@ -356,7 +356,7 @@ class Genodexito:
356
356
  )
357
357
  logger.debug(
358
358
  f"{ids.shape[0] - expanded_ids.shape[0]} "
359
- "ids are not included in expanded ids"
359
+ "ids are not included in expanded ids. These will be filled with empty Identifiers"
360
360
  )
361
361
  else:
362
362
  matched_expanded_ids = expanded_ids
@@ -364,6 +364,10 @@ class Genodexito:
364
364
  updated_ids = ids.drop(SBML_DFS.S_IDENTIFIERS, axis=1).join(
365
365
  pd.DataFrame(matched_expanded_ids)
366
366
  )
367
+ # fill missing attributes with empty Identifiers
368
+ updated_ids[SBML_DFS.S_IDENTIFIERS] = updated_ids[
369
+ SBML_DFS.S_IDENTIFIERS
370
+ ].fillna(identifiers.Identifiers([]))
367
371
 
368
372
  setattr(sbml_dfs, "species", updated_ids)
369
373
 
@@ -72,6 +72,10 @@ def rename_species_ontologies(
72
72
  updated_species = sbml_dfs.species.drop(SBML_DFS.S_IDENTIFIERS, axis=1).join(
73
73
  pd.DataFrame(species_identifiers)
74
74
  )
75
+ # fill missing attributes with empty Identifiers
76
+ updated_species[SBML_DFS.S_IDENTIFIERS] = updated_species[
77
+ SBML_DFS.S_IDENTIFIERS
78
+ ].fillna(identifiers.Identifiers([]))
75
79
 
76
80
  setattr(sbml_dfs, "species", updated_species)
77
81