napistu 0.3.6__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. napistu/__main__.py +28 -13
  2. napistu/consensus.py +19 -25
  3. napistu/constants.py +102 -83
  4. napistu/indices.py +3 -1
  5. napistu/ingestion/napistu_edgelist.py +4 -4
  6. napistu/ingestion/sbml.py +298 -295
  7. napistu/ingestion/string.py +14 -18
  8. napistu/ingestion/trrust.py +22 -27
  9. napistu/matching/interactions.py +41 -39
  10. napistu/matching/species.py +1 -1
  11. napistu/modify/gaps.py +2 -1
  12. napistu/network/constants.py +61 -45
  13. napistu/network/data_handling.py +1 -1
  14. napistu/network/neighborhoods.py +3 -3
  15. napistu/network/net_create.py +440 -616
  16. napistu/network/net_create_utils.py +734 -0
  17. napistu/network/net_propagation.py +1 -1
  18. napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
  19. napistu/network/ng_utils.py +28 -21
  20. napistu/network/paths.py +4 -4
  21. napistu/network/precompute.py +35 -74
  22. napistu/ontologies/genodexito.py +5 -1
  23. napistu/ontologies/renaming.py +4 -0
  24. napistu/sbml_dfs_core.py +127 -64
  25. napistu/sbml_dfs_utils.py +50 -0
  26. napistu/utils.py +132 -46
  27. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/METADATA +2 -2
  28. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/RECORD +47 -44
  29. tests/conftest.py +171 -13
  30. tests/test_consensus.py +74 -5
  31. tests/test_gaps.py +26 -15
  32. tests/test_network_data_handling.py +5 -2
  33. tests/test_network_net_create.py +93 -202
  34. tests/test_network_net_create_utils.py +538 -0
  35. tests/test_network_ng_core.py +19 -0
  36. tests/test_network_ng_utils.py +1 -1
  37. tests/test_network_precompute.py +5 -4
  38. tests/test_ontologies_renaming.py +28 -24
  39. tests/test_rpy2_callr.py +0 -1
  40. tests/test_rpy2_init.py +0 -1
  41. tests/test_sbml_dfs_core.py +165 -15
  42. tests/test_sbml_dfs_utils.py +45 -0
  43. tests/test_utils.py +45 -2
  44. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/WHEEL +0 -0
  45. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/entry_points.txt +0 -0
  46. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/licenses/LICENSE +0 -0
  47. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/top_level.txt +0 -0
@@ -8,15 +8,11 @@ from napistu import sbml_dfs_core
8
8
  from napistu import sbml_dfs_utils
9
9
  from napistu import source
10
10
  from napistu import utils
11
+ from napistu.ingestion import napistu_edgelist
11
12
  from napistu.constants import BQB
12
13
  from napistu.constants import MINI_SBO_FROM_NAME
13
- from napistu.ingestion import napistu_edgelist
14
- from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
15
- from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
16
- from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
17
- from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
18
- from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
19
- from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
14
+ from napistu.constants import ONTOLOGIES
15
+ from napistu.constants import SBML_DFS
20
16
  from napistu.ingestion.constants import STRING_DOWNSTREAM_COMPARTMENT
21
17
  from napistu.ingestion.constants import STRING_DOWNSTREAM_NAME
22
18
  from napistu.ingestion.constants import STRING_PROTEIN_ID
@@ -137,10 +133,10 @@ def convert_string_to_sbml_dfs(
137
133
 
138
134
  # define identifier mapping from aliases to use:
139
135
  alias_to_identifier = {
140
- "Ensembl_gene": ("ensembl_gene", BQB.IS_ENCODED_BY),
141
- "Ensembl_transcript": ("ensembl_transcript", BQB.IS_ENCODED_BY),
142
- "Ensembl_translation": ("ensembl_protein", BQB.IS),
143
- "Ensembl_UniProt_AC": ("uniprot", BQB.IS),
136
+ "Ensembl_gene": (ONTOLOGIES.ENSEMBL_GENE, BQB.IS_ENCODED_BY),
137
+ "Ensembl_transcript": (ONTOLOGIES.ENSEMBL_TRANSCRIPT, BQB.IS_ENCODED_BY),
138
+ "Ensembl_translation": (ONTOLOGIES.ENSEMBL_PROTEIN, BQB.IS),
139
+ "Ensembl_UniProt_AC": (ONTOLOGIES.UNIPROT, BQB.IS),
144
140
  }
145
141
 
146
142
  # filter aliases to only keep required ones
@@ -276,17 +272,17 @@ def _build_species_df(
276
272
  species_df = (
277
273
  pd.Series(
278
274
  list(set(edgelist[source_col]).union(edgelist[target_col])),
279
- name=SBML_SPECIES_DICT_NAME,
275
+ name=SBML_DFS.S_NAME,
280
276
  )
281
277
  .to_frame()
282
- .set_index(SBML_SPECIES_DICT_NAME, drop=False)
278
+ .set_index(SBML_DFS.S_NAME, drop=False)
283
279
  .apply(
284
280
  _get_identifiers,
285
281
  alias_to_identifier=alias_to_identifier,
286
282
  dat_alias=aliases,
287
283
  axis=1,
288
284
  )
289
- .rename(SBML_SPECIES_DICT_IDENTIFIERS)
285
+ .rename(SBML_DFS.S_IDENTIFIERS)
290
286
  .reset_index()
291
287
  )
292
288
  return species_df
@@ -312,8 +308,8 @@ def _build_interactor_edgelist(
312
308
  **{
313
309
  STRING_UPSTREAM_COMPARTMENT: compartment,
314
310
  STRING_DOWNSTREAM_COMPARTMENT: compartment,
315
- SMBL_REACTION_SPEC_SBO_TERM: sbo_interactor,
316
- SMBL_REACTION_DICT_IDENTIFIERS: lambda x: identifiers.Identifiers([]),
311
+ SBML_DFS.SBO_TERM: sbo_interactor,
312
+ SBML_DFS.R_IDENTIFIERS: lambda x: identifiers.Identifiers([]),
317
313
  }
318
314
  )
319
315
  if add_reverse_interactions:
@@ -336,10 +332,10 @@ def _build_interactor_edgelist(
336
332
  )
337
333
 
338
334
  interaction_edgelist = dat
339
- interaction_edgelist[SMBL_REACTION_DICT_NAME] = _build_string_reaction_name(
335
+ interaction_edgelist[SBML_DFS.R_NAME] = _build_string_reaction_name(
340
336
  dat[STRING_UPSTREAM_NAME], dat[STRING_DOWNSTREAM_NAME]
341
337
  )
342
- interaction_edgelist[SMBL_REACTION_DICT_IS_REVERSIBLE] = True
338
+ interaction_edgelist[SBML_DFS.R_ISREVERSIBLE] = True
343
339
 
344
340
  return interaction_edgelist
345
341
 
@@ -8,16 +8,11 @@ from napistu import identifiers
8
8
  from napistu import sbml_dfs_core
9
9
  from napistu import source
10
10
  from napistu import utils
11
+ from napistu.constants import BQB
12
+ from napistu.constants import IDENTIFIERS
11
13
  from napistu.constants import MINI_SBO_FROM_NAME
12
14
  from napistu.constants import SBOTERM_NAMES
13
- from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_IDENTIFIERS
14
- from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_NAME
15
- from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
16
- from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
17
- from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
18
- from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
19
- from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
20
- from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
15
+ from napistu.constants import SBML_DFS
21
16
  from napistu.ingestion.constants import SPECIES_FULL_NAME_HUMAN
22
17
  from napistu.ingestion.constants import STRING_DOWNSTREAM_COMPARTMENT
23
18
  from napistu.ingestion.constants import STRING_DOWNSTREAM_NAME
@@ -81,16 +76,16 @@ def convert_trrust_to_sbml_dfs(
81
76
  species_df = (
82
77
  pd.DataFrame(
83
78
  {
84
- SBML_SPECIES_DICT_NAME: list(
79
+ SBML_DFS.S_NAME: list(
85
80
  {*edge_summaries_df["from"], *edge_summaries_df["to"]}
86
81
  )
87
82
  }
88
83
  )
89
84
  .merge(
90
- uniprot_2_symbol.rename({TRRUST_SYMBOL: SBML_SPECIES_DICT_NAME}, axis=1),
85
+ uniprot_2_symbol.rename({TRRUST_SYMBOL: SBML_DFS.S_NAME}, axis=1),
91
86
  how="left",
92
87
  )
93
- .set_index(SBML_SPECIES_DICT_NAME)
88
+ .set_index(SBML_DFS.S_NAME)
94
89
  )
95
90
 
96
91
  # create Identifiers objects for all species with uniprot IDs
@@ -106,14 +101,14 @@ def convert_trrust_to_sbml_dfs(
106
101
  [
107
102
  identifiers.Identifiers(
108
103
  [
109
- identifiers.format_uri(uri=x, biological_qualifier_type="BQB_IS")
110
- for x in species_w_ids.loc[[ind]]["url"].tolist()
104
+ identifiers.format_uri(uri=x, biological_qualifier_type=BQB.IS)
105
+ for x in species_w_ids.loc[[ind]][IDENTIFIERS.URL].tolist()
111
106
  ]
112
107
  )
113
108
  for ind in species_w_ids.index.unique()
114
109
  ],
115
110
  index=species_w_ids.index.unique(),
116
- ).rename(SBML_SPECIES_DICT_IDENTIFIERS)
111
+ ).rename(SBML_DFS.S_IDENTIFIERS)
117
112
 
118
113
  # just retain s_name and s_Identifiers
119
114
  # this just needs a source object which will be added later
@@ -124,21 +119,21 @@ def convert_trrust_to_sbml_dfs(
124
119
  .merge(
125
120
  species_w_ids_series,
126
121
  how="left",
127
- left_on=SBML_SPECIES_DICT_NAME,
122
+ left_on=SBML_DFS.S_NAME,
128
123
  right_index=True,
129
124
  )
130
125
  .reset_index(drop=True)
131
126
  )
132
127
  # stub genes with missing IDs
133
- species_df[SBML_SPECIES_DICT_IDENTIFIERS] = species_df[SBML_SPECIES_DICT_IDENTIFIERS].fillna( # type: ignore
128
+ species_df[SBML_DFS.S_IDENTIFIERS] = species_df[SBML_DFS.S_IDENTIFIERS].fillna( # type: ignore
134
129
  value=identifiers.Identifiers([])
135
130
  )
136
131
 
137
132
  # define distinct compartments
138
133
  compartments_df = pd.DataFrame(
139
134
  {
140
- SBML_COMPARTMENT_DICT_NAME: TRRUST_COMPARTMENT_NUCLEOPLASM,
141
- SBML_COMPARTMENT_DICT_IDENTIFIERS: identifiers.Identifiers(
135
+ SBML_DFS.C_NAME: TRRUST_COMPARTMENT_NUCLEOPLASM,
136
+ SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
142
137
  [
143
138
  identifiers.format_uri(
144
139
  uri=identifiers.create_uri_url(
@@ -159,7 +154,7 @@ def convert_trrust_to_sbml_dfs(
159
154
  upstream_compartment=TRRUST_COMPARTMENT_NUCLEOPLASM,
160
155
  downstream_compartment=TRRUST_COMPARTMENT_NUCLEOPLASM,
161
156
  )
162
- gene_gene_identifier_edgelist[SMBL_REACTION_DICT_NAME] = [
157
+ gene_gene_identifier_edgelist[SBML_DFS.R_NAME] = [
163
158
  f"{x} {y} of {z}"
164
159
  for x, y, z in zip(
165
160
  gene_gene_identifier_edgelist[STRING_UPSTREAM_NAME],
@@ -171,15 +166,15 @@ def convert_trrust_to_sbml_dfs(
171
166
  # convert relationships to SBO terms
172
167
  interaction_edgelist = gene_gene_identifier_edgelist.replace(
173
168
  {"sign": MINI_SBO_FROM_NAME}
174
- ).rename({"sign": SMBL_REACTION_SPEC_SBO_TERM}, axis=1)
169
+ ).rename({"sign": SBML_DFS.SBO_TERM}, axis=1)
175
170
 
176
171
  # format pubmed identifiers of interactions
177
- interaction_edgelist[SMBL_REACTION_DICT_IDENTIFIERS] = [
172
+ interaction_edgelist[SBML_DFS.R_IDENTIFIERS] = [
178
173
  _format_pubmed_for_interactions(x) for x in interaction_edgelist["reference"]
179
174
  ]
180
175
 
181
176
  # directionality: by default, set r_isreversible to False for TRRUST data
182
- interaction_edgelist[SMBL_REACTION_DICT_IS_REVERSIBLE] = False
177
+ interaction_edgelist[SBML_DFS.R_ISREVERSIBLE] = False
183
178
 
184
179
  # reduce to essential variables
185
180
  interaction_edgelist = interaction_edgelist[
@@ -188,10 +183,10 @@ def convert_trrust_to_sbml_dfs(
188
183
  STRING_DOWNSTREAM_NAME,
189
184
  STRING_UPSTREAM_COMPARTMENT,
190
185
  STRING_DOWNSTREAM_COMPARTMENT,
191
- SMBL_REACTION_DICT_NAME,
192
- SMBL_REACTION_SPEC_SBO_TERM,
193
- SMBL_REACTION_DICT_IDENTIFIERS,
194
- SMBL_REACTION_DICT_IS_REVERSIBLE,
186
+ SBML_DFS.R_NAME,
187
+ SBML_DFS.SBO_TERM,
188
+ SBML_DFS.R_IDENTIFIERS,
189
+ SBML_DFS.R_ISREVERSIBLE,
195
190
  ]
196
191
  ]
197
192
 
@@ -277,7 +272,7 @@ def _format_pubmed_for_interactions(pubmed_set):
277
272
  url = identifiers.create_uri_url(ontology="pubmed", identifier=p, strict=False)
278
273
  if url is not None:
279
274
  valid_url = identifiers.format_uri(
280
- uri=url, biological_qualifier_type="BQB_IS_DESCRIBED_BY"
275
+ uri=url, biological_qualifier_type=BQB.IS_DESCRIBED_BY
281
276
  )
282
277
 
283
278
  ids.append(valid_url)
@@ -10,9 +10,9 @@ from napistu import utils
10
10
  from napistu import sbml_dfs_core
11
11
  from napistu.matching.species import features_to_pathway_species
12
12
  from napistu.constants import (
13
- CPR_EDGELIST_REQ_VARS,
13
+ NAPISTU_EDGELIST_REQ_VARS,
14
14
  IDENTIFIER_EDGELIST_REQ_VARS,
15
- CPR_EDGELIST,
15
+ NAPISTU_EDGELIST,
16
16
  SBML_DFS,
17
17
  IDENTIFIERS,
18
18
  )
@@ -56,8 +56,8 @@ def edgelist_to_pathway_species(
56
56
  """
57
57
 
58
58
  required_vars_distinct_features = {
59
- CPR_EDGELIST.IDENTIFIER_UPSTREAM,
60
- CPR_EDGELIST.IDENTIFIER_DOWNSTREAM,
59
+ NAPISTU_EDGELIST.IDENTIFIER_UPSTREAM,
60
+ NAPISTU_EDGELIST.IDENTIFIER_DOWNSTREAM,
61
61
  }
62
62
  missing_required_vars_distinct_features = (
63
63
  required_vars_distinct_features.difference(
@@ -76,8 +76,8 @@ def edgelist_to_pathway_species(
76
76
  distinct_identifiers = (
77
77
  pd.concat(
78
78
  [
79
- formatted_edgelist[CPR_EDGELIST.IDENTIFIER_UPSTREAM],
80
- formatted_edgelist[CPR_EDGELIST.IDENTIFIER_DOWNSTREAM],
79
+ formatted_edgelist[NAPISTU_EDGELIST.IDENTIFIER_UPSTREAM],
80
+ formatted_edgelist[NAPISTU_EDGELIST.IDENTIFIER_DOWNSTREAM],
81
81
  ]
82
82
  )
83
83
  .drop_duplicates()
@@ -99,16 +99,16 @@ def edgelist_to_pathway_species(
99
99
  edges_on_pathway = formatted_edgelist.merge(
100
100
  features_on_pathway[[SBML_DFS.S_ID, IDENTIFIERS.IDENTIFIER]].rename(
101
101
  {
102
- SBML_DFS.S_ID: CPR_EDGELIST.S_ID_UPSTREAM,
103
- IDENTIFIERS.IDENTIFIER: CPR_EDGELIST.IDENTIFIER_UPSTREAM,
102
+ SBML_DFS.S_ID: NAPISTU_EDGELIST.S_ID_UPSTREAM,
103
+ IDENTIFIERS.IDENTIFIER: NAPISTU_EDGELIST.IDENTIFIER_UPSTREAM,
104
104
  },
105
105
  axis=1,
106
106
  )
107
107
  ).merge(
108
108
  features_on_pathway[[SBML_DFS.S_ID, IDENTIFIERS.IDENTIFIER]].rename(
109
109
  {
110
- SBML_DFS.S_ID: CPR_EDGELIST.S_ID_DOWNSTREAM,
111
- IDENTIFIERS.IDENTIFIER: CPR_EDGELIST.IDENTIFIER_DOWNSTREAM,
110
+ SBML_DFS.S_ID: NAPISTU_EDGELIST.S_ID_DOWNSTREAM,
111
+ IDENTIFIERS.IDENTIFIER: NAPISTU_EDGELIST.IDENTIFIER_DOWNSTREAM,
112
112
  },
113
113
  axis=1,
114
114
  )
@@ -159,15 +159,15 @@ def edgelist_to_scids(
159
159
 
160
160
  # expand from s_ids to sc_ids
161
161
  s_id_pairs = edges_on_pathway[
162
- [CPR_EDGELIST.S_ID_UPSTREAM, CPR_EDGELIST.S_ID_DOWNSTREAM]
162
+ [NAPISTU_EDGELIST.S_ID_UPSTREAM, NAPISTU_EDGELIST.S_ID_DOWNSTREAM]
163
163
  ].drop_duplicates()
164
164
  sc_id_pairs = s_id_pairs.merge(
165
165
  sbml_dfs.compartmentalized_species[[SBML_DFS.S_ID]]
166
166
  .reset_index()
167
167
  .rename(
168
168
  {
169
- SBML_DFS.S_ID: CPR_EDGELIST.S_ID_UPSTREAM,
170
- SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM,
169
+ SBML_DFS.S_ID: NAPISTU_EDGELIST.S_ID_UPSTREAM,
170
+ SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_UPSTREAM,
171
171
  },
172
172
  axis=1,
173
173
  )
@@ -176,8 +176,8 @@ def edgelist_to_scids(
176
176
  .reset_index()
177
177
  .rename(
178
178
  {
179
- SBML_DFS.S_ID: CPR_EDGELIST.S_ID_DOWNSTREAM,
180
- SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM,
179
+ SBML_DFS.S_ID: NAPISTU_EDGELIST.S_ID_DOWNSTREAM,
180
+ SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_DOWNSTREAM,
181
181
  },
182
182
  axis=1,
183
183
  )
@@ -232,7 +232,7 @@ def filter_to_direct_mechanistic_interactions(
232
232
  )
233
233
 
234
234
  # reduce to distinct sc_id pairs
235
- sc_id_pairs = edgelist_w_scids[list(CPR_EDGELIST_REQ_VARS)].drop_duplicates()
235
+ sc_id_pairs = edgelist_w_scids[list(NAPISTU_EDGELIST_REQ_VARS)].drop_duplicates()
236
236
 
237
237
  # define all existing direct regulatory interactions
238
238
  pathway_interactions = pd.concat(
@@ -241,36 +241,36 @@ def filter_to_direct_mechanistic_interactions(
241
241
  sbml_dfs.reaction_species[
242
242
  sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] == 0
243
243
  ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
244
- .rename({SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM}, axis=1)
244
+ .rename({SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_UPSTREAM}, axis=1)
245
245
  .merge(
246
246
  sbml_dfs.reaction_species[
247
247
  sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] < 0
248
248
  ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
249
- {SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
249
+ {SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
250
250
  )
251
251
  ),
252
252
  # pair <0 -> >0 # substrates affect products
253
253
  sbml_dfs.reaction_species[
254
254
  sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] < 0
255
255
  ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
256
- .rename({SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM}, axis=1)
256
+ .rename({SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_UPSTREAM}, axis=1)
257
257
  .merge(
258
258
  sbml_dfs.reaction_species[
259
259
  sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] > 0
260
260
  ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
261
- {SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
261
+ {SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
262
262
  )
263
263
  ),
264
264
  # pair 0 -> >0 # modifiers affect products
265
265
  sbml_dfs.reaction_species[
266
266
  sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] == 0
267
267
  ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
268
- .rename({SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM}, axis=1)
268
+ .rename({SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_UPSTREAM}, axis=1)
269
269
  .merge(
270
270
  sbml_dfs.reaction_species[
271
271
  sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] > 0
272
272
  ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
273
- {SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
273
+ {SBML_DFS.SC_ID: NAPISTU_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
274
274
  )
275
275
  ),
276
276
  ]
@@ -282,16 +282,16 @@ def filter_to_direct_mechanistic_interactions(
282
282
  .merge(
283
283
  sbml_dfs.species[SBML_DFS.S_NAME]
284
284
  .to_frame()
285
- .rename({SBML_DFS.S_NAME: CPR_EDGELIST.S_NAME_UPSTREAM}, axis=1),
286
- left_on=CPR_EDGELIST.S_ID_UPSTREAM,
285
+ .rename({SBML_DFS.S_NAME: NAPISTU_EDGELIST.S_NAME_UPSTREAM}, axis=1),
286
+ left_on=NAPISTU_EDGELIST.S_ID_UPSTREAM,
287
287
  right_index=True,
288
288
  # add species metadata for matches
289
289
  )
290
290
  .merge(
291
291
  sbml_dfs.species[SBML_DFS.S_NAME]
292
292
  .to_frame()
293
- .rename({SBML_DFS.S_NAME: CPR_EDGELIST.S_NAME_DOWNSTREAM}, axis=1),
294
- left_on=CPR_EDGELIST.S_ID_DOWNSTREAM,
293
+ .rename({SBML_DFS.S_NAME: NAPISTU_EDGELIST.S_NAME_DOWNSTREAM}, axis=1),
294
+ left_on=NAPISTU_EDGELIST.S_ID_DOWNSTREAM,
295
295
  right_index=True,
296
296
  # add metadata for reactions where interaction occurs
297
297
  )
@@ -305,11 +305,11 @@ def filter_to_direct_mechanistic_interactions(
305
305
  edgelist_w_direct_mechanistic_interactions = edgelist_w_scids.merge(
306
306
  direct_edge_interactions[
307
307
  [
308
- CPR_EDGELIST.SC_ID_UPSTREAM,
309
- CPR_EDGELIST.SC_ID_DOWNSTREAM,
308
+ NAPISTU_EDGELIST.SC_ID_UPSTREAM,
309
+ NAPISTU_EDGELIST.SC_ID_DOWNSTREAM,
310
310
  SBML_DFS.R_ID,
311
- CPR_EDGELIST.S_NAME_UPSTREAM,
312
- CPR_EDGELIST.S_NAME_DOWNSTREAM,
311
+ NAPISTU_EDGELIST.S_NAME_UPSTREAM,
312
+ NAPISTU_EDGELIST.S_NAME_DOWNSTREAM,
313
313
  SBML_DFS.R_NAME,
314
314
  ]
315
315
  ]
@@ -369,23 +369,25 @@ def filter_to_indirect_mechanistic_interactions(
369
369
  edgelist_w_scids = paths._filter_paths_by_precomputed_distances(
370
370
  edgelist_w_scids.rename(
371
371
  {
372
- CPR_EDGELIST.SC_ID_UPSTREAM: CPR_EDGELIST.SC_ID_ORIGIN,
373
- CPR_EDGELIST.SC_ID_DOWNSTREAM: CPR_EDGELIST.SC_ID_DEST,
372
+ NAPISTU_EDGELIST.SC_ID_UPSTREAM: NAPISTU_EDGELIST.SC_ID_ORIGIN,
373
+ NAPISTU_EDGELIST.SC_ID_DOWNSTREAM: NAPISTU_EDGELIST.SC_ID_DEST,
374
374
  },
375
375
  axis=1,
376
376
  ),
377
377
  precomputed_distances,
378
378
  ).rename(
379
379
  {
380
- CPR_EDGELIST.SC_ID_ORIGIN: CPR_EDGELIST.SC_ID_UPSTREAM,
381
- CPR_EDGELIST.SC_ID_DEST: CPR_EDGELIST.SC_ID_DOWNSTREAM,
380
+ NAPISTU_EDGELIST.SC_ID_ORIGIN: NAPISTU_EDGELIST.SC_ID_UPSTREAM,
381
+ NAPISTU_EDGELIST.SC_ID_DEST: NAPISTU_EDGELIST.SC_ID_DOWNSTREAM,
382
382
  },
383
383
  axis=1,
384
384
  )
385
385
 
386
386
  # find paths from 1 upstream to all desired downstream sc_ids
387
387
  # (this is the convention with igraph)
388
- indexed_origin_vertices = edgelist_w_scids.set_index(CPR_EDGELIST.SC_ID_UPSTREAM)
388
+ indexed_origin_vertices = edgelist_w_scids.set_index(
389
+ NAPISTU_EDGELIST.SC_ID_UPSTREAM
390
+ )
389
391
 
390
392
  # loop through upstream cspecies and find paths to all downstream species
391
393
  global_dict = dict()
@@ -410,7 +412,7 @@ def filter_to_indirect_mechanistic_interactions(
410
412
  sbml_dfs,
411
413
  origin=an_origin_index,
412
414
  # find all unique destinations (as a list for compatibility with igraph dest)
413
- dest=origin_targets[CPR_EDGELIST.SC_ID_DOWNSTREAM].unique().tolist(),
415
+ dest=origin_targets[NAPISTU_EDGELIST.SC_ID_DOWNSTREAM].unique().tolist(),
414
416
  weight_var=NAPISTU_GRAPH_EDGES.WEIGHTS,
415
417
  )
416
418
 
@@ -483,7 +485,7 @@ def filter_to_indirect_mechanistic_interactions(
483
485
 
484
486
  indirect_shortest_paths = edgelist_w_scids.merge(
485
487
  all_shortest_paths,
486
- left_on=[CPR_EDGELIST.SC_ID_UPSTREAM, CPR_EDGELIST.SC_ID_DOWNSTREAM],
488
+ left_on=[NAPISTU_EDGELIST.SC_ID_UPSTREAM, NAPISTU_EDGELIST.SC_ID_DOWNSTREAM],
487
489
  right_on=[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO],
488
490
  )
489
491
 
@@ -498,9 +500,9 @@ def _edgelist_to_scids_if_needed(
498
500
  ) -> pd.DataFrame:
499
501
  """Map a set of edgelist species to cspecies or skip if cspecies were provided."""
500
502
 
501
- if utils.match_pd_vars(edgelist, CPR_EDGELIST_REQ_VARS).are_present:
503
+ if utils.match_pd_vars(edgelist, NAPISTU_EDGELIST_REQ_VARS).are_present:
502
504
  logger.info(
503
- f"An edgelist with {', '.join(CPR_EDGELIST_REQ_VARS)} was provided; identifier matching will be skipped"
505
+ f"An edgelist with {', '.join(NAPISTU_EDGELIST_REQ_VARS)} was provided; identifier matching will be skipped"
504
506
  )
505
507
  return edgelist
506
508
  else:
@@ -33,7 +33,7 @@ def features_to_pathway_species(
33
33
  pd.Dataframe containing a "feature_identifiers_var" variable used to match entries
34
34
  species_identifiers: pd.DataFrame
35
35
  A table of molecular species identifiers produced from sbml_dfs.get_identifiers("species")
36
- generally using sbml_dfs_core.export_sbml_dfs()
36
+ generally using sbml_dfs.export_sbml_dfs()
37
37
  ontologies: set
38
38
  A set of ontologies used to match features to pathway species
39
39
  feature_identifiers_var: str
napistu/modify/gaps.py CHANGED
@@ -20,6 +20,7 @@ from napistu.constants import MINI_SBO_FROM_NAME
20
20
  from napistu.constants import SBOTERM_NAMES
21
21
  from napistu.constants import SOURCE_SPEC
22
22
  from napistu.ingestion.constants import EXCHANGE_COMPARTMENT
23
+ from napistu.network.constants import GRAPH_WIRING_APPROACHES
23
24
 
24
25
  logger = logging.getLogger(__name__)
25
26
 
@@ -463,7 +464,7 @@ def _identify_species_needing_transport_reactions(
463
464
 
464
465
  # create a directed graph
465
466
  directed_graph = net_create.create_napistu_graph(
466
- sbml_dfs, directed=True, graph_type="bipartite"
467
+ sbml_dfs, directed=True, wiring_approach=GRAPH_WIRING_APPROACHES.BIPARTITE
467
468
  )
468
469
 
469
470
  # consider each s_id and protein separately
@@ -7,17 +7,6 @@ from types import SimpleNamespace
7
7
  from napistu.constants import SBML_DFS
8
8
  from napistu.constants import SBOTERM_NAMES
9
9
 
10
- # Graph types
11
- NAPISTU_GRAPH_TYPES = SimpleNamespace(
12
- BIPARTITE="bipartite", REGULATORY="regulatory", SURROGATE="surrogate"
13
- )
14
-
15
- VALID_NAPISTU_GRAPH_TYPES = [
16
- NAPISTU_GRAPH_TYPES.BIPARTITE,
17
- NAPISTU_GRAPH_TYPES.REGULATORY,
18
- NAPISTU_GRAPH_TYPES.SURROGATE,
19
- ]
20
-
21
10
  NAPISTU_GRAPH = SimpleNamespace(VERTICES="vertices", EDGES="edges", METADATA="metadata")
22
11
 
23
12
  NAPISTU_GRAPH_DIRECTEDNESS = SimpleNamespace(
@@ -57,6 +46,67 @@ VALID_NAPISTU_GRAPH_NODE_TYPES = [
57
46
  NAPISTU_GRAPH_NODE_TYPES.SPECIES,
58
47
  ]
59
48
 
49
+ # translating an SBML_dfs -> NapistuGraph
50
+
51
+ GRAPH_WIRING_APPROACHES = SimpleNamespace(
52
+ BIPARTITE="bipartite", REGULATORY="regulatory", SURROGATE="surrogate"
53
+ )
54
+
55
+ VALID_GRAPH_WIRING_APPROACHES = list(GRAPH_WIRING_APPROACHES.__dict__.values())
56
+
57
+ GRAPH_WIRING_HIERARCHIES = {
58
+ # three tiers with reactions in the middle
59
+ # in a bipartite networks molecules are connected to reactions but not other molecules
60
+ GRAPH_WIRING_APPROACHES.BIPARTITE: [
61
+ [
62
+ SBOTERM_NAMES.CATALYST,
63
+ SBOTERM_NAMES.INHIBITOR,
64
+ SBOTERM_NAMES.INTERACTOR,
65
+ SBOTERM_NAMES.MODIFIER,
66
+ SBOTERM_NAMES.REACTANT,
67
+ SBOTERM_NAMES.STIMULATOR,
68
+ ],
69
+ [NAPISTU_GRAPH_NODE_TYPES.REACTION],
70
+ [SBOTERM_NAMES.MODIFIED, SBOTERM_NAMES.PRODUCT],
71
+ ],
72
+ # the regulatory graph defines a hierarchy of upstream and downstream
73
+ # entities in a reaction
74
+ # modifier/stimulator/inhibitor -> catalyst -> reactant -> reaction -> product
75
+ GRAPH_WIRING_APPROACHES.REGULATORY: [
76
+ [SBOTERM_NAMES.INHIBITOR, SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR],
77
+ [SBOTERM_NAMES.CATALYST],
78
+ [SBOTERM_NAMES.INTERACTOR, SBOTERM_NAMES.REACTANT],
79
+ [NAPISTU_GRAPH_NODE_TYPES.REACTION],
80
+ [SBOTERM_NAMES.MODIFIED, SBOTERM_NAMES.PRODUCT],
81
+ ],
82
+ # an alternative layout to regulatory where enyzmes are downstream of substrates.
83
+ # this doesn't make much sense from a regulatory perspective because
84
+ # enzymes modify substrates not the other way around. but, its what one might
85
+ # expect if catalysts are a surrogate for reactions as is the case for metabolic
86
+ # network layouts
87
+ GRAPH_WIRING_APPROACHES.SURROGATE: [
88
+ [SBOTERM_NAMES.INHIBITOR, SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR],
89
+ [SBOTERM_NAMES.INTERACTOR, SBOTERM_NAMES.REACTANT],
90
+ [SBOTERM_NAMES.CATALYST],
91
+ [NAPISTU_GRAPH_NODE_TYPES.REACTION],
92
+ [SBOTERM_NAMES.MODIFIED, SBOTERM_NAMES.PRODUCT],
93
+ ],
94
+ }
95
+
96
+ # when should reaction vertices be excluded from the graph?
97
+
98
+ DROP_REACTIONS_WHEN = SimpleNamespace(
99
+ ALWAYS="always",
100
+ # if there are 2 participants
101
+ EDGELIST="edgelist",
102
+ # if there are 2 participants which are both "interactor"
103
+ SAME_TIER="same_tier",
104
+ )
105
+
106
+ VALID_DROP_REACTIONS_WHEN = list(DROP_REACTIONS_WHEN.__dict__.values())
107
+
108
+ # adding weights to NapistuGraph
109
+
60
110
  NAPISTU_WEIGHTING_STRATEGIES = SimpleNamespace(
61
111
  CALIBRATED="calibrated", MIXED="mixed", TOPOLOGY="topology", UNWEIGHTED="unweighted"
62
112
  )
@@ -116,40 +166,6 @@ VALID_NET_POLARITIES = [
116
166
  NET_POLARITY.AMBIGUOUS_INHIBITION,
117
167
  ]
118
168
 
119
- # the regulatory graph defines a hierarchy of upstream and downstream
120
- # entities in a reaction
121
- # modifier/stimulator/inhibitor -> catalyst -> reactant -> reaction -> product
122
-
123
- REGULATORY_GRAPH_HIERARCHY = [
124
- [SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR, SBOTERM_NAMES.INHIBITOR],
125
- [SBOTERM_NAMES.CATALYST],
126
- [SBOTERM_NAMES.REACTANT],
127
- [NAPISTU_GRAPH_NODE_TYPES.REACTION],
128
- # normally we don't expect interactors to be defined because they are handled by
129
- # net_create._format_interactors_for_regulatory_graph() but include them here
130
- # until Issue #102 is solved
131
- [SBOTERM_NAMES.INTERACTOR],
132
- [SBOTERM_NAMES.PRODUCT],
133
- ]
134
-
135
- # an alternative layout to regulatory where enyzmes are downstream of substrates.
136
- # this doesn't make much sense from a regulatory perspective because
137
- # enzymes modify substrates not the other way around. but, its what one might
138
- # expect if catalysts are a surrogate for reactions as is the case for metabolic
139
- # network layouts
140
-
141
- SURROGATE_GRAPH_HIERARCHY = [
142
- [SBOTERM_NAMES.MODIFIER, SBOTERM_NAMES.STIMULATOR, SBOTERM_NAMES.INHIBITOR],
143
- [SBOTERM_NAMES.REACTANT],
144
- [SBOTERM_NAMES.CATALYST],
145
- [NAPISTU_GRAPH_NODE_TYPES.REACTION],
146
- # normally we don't expect interactors to be defined because they are handled by
147
- # net_create._format_interactors_for_regulatory_graph() but include them here
148
- # until Issue #102 is solved
149
- [SBOTERM_NAMES.INTERACTOR],
150
- [SBOTERM_NAMES.PRODUCT],
151
- ]
152
-
153
169
  NEIGHBORHOOD_NETWORK_TYPES = SimpleNamespace(
154
170
  DOWNSTREAM="downstream", HOURGLASS="hourglass", UPSTREAM="upstream"
155
171
  )
@@ -6,7 +6,7 @@ import pandas as pd
6
6
 
7
7
  from napistu import sbml_dfs_core
8
8
  from napistu.network import net_create
9
- from napistu.network.napistu_graph_core import NapistuGraph
9
+ from napistu.network.ng_core import NapistuGraph
10
10
 
11
11
  from napistu.constants import SBML_DFS, ENTITIES_W_DATA
12
12
  from napistu.network.constants import NAPISTU_GRAPH, DEFAULT_WT_TRANS, WEIGHTING_SPEC
@@ -22,7 +22,7 @@ from napistu.constants import SBML_DFS
22
22
  from napistu.constants import MINI_SBO_NAME_TO_POLARITY
23
23
  from napistu.constants import MINI_SBO_TO_NAME
24
24
 
25
- from napistu.network.constants import NAPISTU_GRAPH_TYPES
25
+ from napistu.network.constants import GRAPH_WIRING_APPROACHES
26
26
  from napistu.network.constants import NEIGHBORHOOD_NETWORK_TYPES
27
27
  from napistu.network.constants import VALID_NEIGHBORHOOD_NETWORK_TYPES
28
28
 
@@ -316,7 +316,7 @@ def create_neighborhood_prefix(network_type: str, order: int, top_n: int) -> str
316
316
  def load_neighborhoods_by_partition(
317
317
  selected_partition: int,
318
318
  neighborhood_outdir: str,
319
- graph_type: str = NAPISTU_GRAPH_TYPES.REGULATORY,
319
+ wiring_approach: str = GRAPH_WIRING_APPROACHES.REGULATORY,
320
320
  ) -> None:
321
321
  """
322
322
  Load Neighborhoods By Partition
@@ -380,7 +380,7 @@ def load_neighborhoods_by_partition(
380
380
  model_prefix="curated",
381
381
  network_dir=consensus_outdir,
382
382
  directed=True,
383
- graph_type=graph_type,
383
+ wiring_approach=wiring_approach,
384
384
  )
385
385
 
386
386
  all_neighborhoods_df, neighborhoods_dict = load_neighborhoods(