napistu 0.2.5.dev7__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. napistu/__main__.py +126 -96
  2. napistu/constants.py +35 -41
  3. napistu/context/__init__.py +10 -0
  4. napistu/context/discretize.py +462 -0
  5. napistu/context/filtering.py +387 -0
  6. napistu/gcs/__init__.py +1 -1
  7. napistu/identifiers.py +74 -15
  8. napistu/indices.py +68 -0
  9. napistu/ingestion/__init__.py +1 -1
  10. napistu/ingestion/bigg.py +47 -62
  11. napistu/ingestion/constants.py +18 -133
  12. napistu/ingestion/gtex.py +113 -0
  13. napistu/ingestion/hpa.py +147 -0
  14. napistu/ingestion/sbml.py +0 -97
  15. napistu/ingestion/string.py +2 -2
  16. napistu/matching/__init__.py +10 -0
  17. napistu/matching/constants.py +18 -0
  18. napistu/matching/interactions.py +518 -0
  19. napistu/matching/mount.py +529 -0
  20. napistu/matching/species.py +510 -0
  21. napistu/mcp/__init__.py +7 -4
  22. napistu/mcp/__main__.py +128 -72
  23. napistu/mcp/client.py +16 -25
  24. napistu/mcp/codebase.py +201 -145
  25. napistu/mcp/component_base.py +170 -0
  26. napistu/mcp/config.py +223 -0
  27. napistu/mcp/constants.py +45 -2
  28. napistu/mcp/documentation.py +253 -136
  29. napistu/mcp/documentation_utils.py +13 -48
  30. napistu/mcp/execution.py +372 -305
  31. napistu/mcp/health.py +47 -65
  32. napistu/mcp/profiles.py +10 -6
  33. napistu/mcp/server.py +161 -80
  34. napistu/mcp/tutorials.py +139 -87
  35. napistu/modify/__init__.py +1 -1
  36. napistu/modify/gaps.py +1 -1
  37. napistu/network/__init__.py +1 -1
  38. napistu/network/constants.py +101 -34
  39. napistu/network/data_handling.py +388 -0
  40. napistu/network/ig_utils.py +351 -0
  41. napistu/network/napistu_graph_core.py +354 -0
  42. napistu/network/neighborhoods.py +40 -40
  43. napistu/network/net_create.py +373 -309
  44. napistu/network/net_propagation.py +47 -19
  45. napistu/network/{net_utils.py → ng_utils.py} +124 -272
  46. napistu/network/paths.py +67 -51
  47. napistu/network/precompute.py +11 -11
  48. napistu/ontologies/__init__.py +10 -0
  49. napistu/ontologies/constants.py +129 -0
  50. napistu/ontologies/dogma.py +243 -0
  51. napistu/ontologies/genodexito.py +649 -0
  52. napistu/ontologies/mygene.py +369 -0
  53. napistu/ontologies/renaming.py +198 -0
  54. napistu/rpy2/__init__.py +229 -86
  55. napistu/rpy2/callr.py +47 -77
  56. napistu/rpy2/constants.py +24 -23
  57. napistu/rpy2/rids.py +61 -648
  58. napistu/sbml_dfs_core.py +587 -222
  59. napistu/scverse/__init__.py +15 -0
  60. napistu/scverse/constants.py +28 -0
  61. napistu/scverse/loading.py +727 -0
  62. napistu/utils.py +118 -10
  63. {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/METADATA +8 -3
  64. napistu-0.3.1.dist-info/RECORD +133 -0
  65. tests/conftest.py +22 -0
  66. tests/test_context_discretize.py +56 -0
  67. tests/test_context_filtering.py +267 -0
  68. tests/test_identifiers.py +100 -0
  69. tests/test_indices.py +65 -0
  70. tests/{test_edgelist.py → test_ingestion_napistu_edgelist.py} +2 -2
  71. tests/test_matching_interactions.py +108 -0
  72. tests/test_matching_mount.py +305 -0
  73. tests/test_matching_species.py +394 -0
  74. tests/test_mcp_config.py +193 -0
  75. tests/test_mcp_documentation_utils.py +12 -3
  76. tests/test_mcp_server.py +156 -19
  77. tests/test_network_data_handling.py +397 -0
  78. tests/test_network_ig_utils.py +23 -0
  79. tests/test_network_neighborhoods.py +19 -0
  80. tests/test_network_net_create.py +459 -0
  81. tests/test_network_ng_utils.py +30 -0
  82. tests/test_network_paths.py +56 -0
  83. tests/{test_precomputed_distances.py → test_network_precompute.py} +8 -6
  84. tests/test_ontologies_genodexito.py +58 -0
  85. tests/test_ontologies_mygene.py +39 -0
  86. tests/test_ontologies_renaming.py +110 -0
  87. tests/test_rpy2_callr.py +79 -0
  88. tests/test_rpy2_init.py +151 -0
  89. tests/test_sbml.py +0 -31
  90. tests/test_sbml_dfs_core.py +134 -10
  91. tests/test_scverse_loading.py +778 -0
  92. tests/test_set_coverage.py +2 -2
  93. tests/test_utils.py +121 -1
  94. napistu/mechanism_matching.py +0 -1353
  95. napistu/rpy2/netcontextr.py +0 -467
  96. napistu-0.2.5.dev7.dist-info/RECORD +0 -98
  97. tests/test_igraph.py +0 -367
  98. tests/test_mechanism_matching.py +0 -784
  99. tests/test_net_utils.py +0 -149
  100. tests/test_netcontextr.py +0 -105
  101. tests/test_rpy2.py +0 -61
  102. /napistu/ingestion/{cpr_edgelist.py → napistu_edgelist.py} +0 -0
  103. {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/WHEEL +0 -0
  104. {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/entry_points.txt +0 -0
  105. {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/licenses/LICENSE +0 -0
  106. {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dist-info}/top_level.txt +0 -0
  107. /tests/{test_obo.py → test_ingestion_obo.py} +0 -0
napistu/ingestion/sbml.py CHANGED
@@ -15,9 +15,6 @@ from napistu import utils
15
15
 
16
16
  from napistu.constants import BQB
17
17
 
18
- from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_COMPARTMENT
19
- from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_REACTION
20
- from napistu.ingestion.constants import SBML_ANNOTATION_METHOD_GET_SPECIES
21
18
  from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_ID
22
19
  from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_IDENTIFIERS
23
20
  from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_NAME
@@ -517,100 +514,6 @@ def setup_cspecies(sbml_model: SBML) -> pd.DataFrame:
517
514
  return pd.DataFrame(comp_species).set_index(SMBL_REACTION_SPEC_SC_ID)
518
515
 
519
516
 
520
- def add_sbml_annotations(
521
- sbml_model: SBML, annotations: pd.DataFrame, save_path: str
522
- ) -> None:
523
- """
524
- Add SBML Annotations
525
-
526
- Add additional identifiers to an sbml file and save the updated document
527
-
528
- Parameters:
529
- sbml_model: SBML
530
- A .sbml model
531
- annotations: pd.DataFrame
532
- A table of annotations to add containing an "id" matching the
533
- primary key of an entity, "type" matching the type of entity,
534
- and "uri" representing the annotation to add.
535
- save_path: str
536
- Path to save the model to
537
-
538
- Returns:
539
- None
540
- """
541
-
542
- logger.warning(
543
- "add_sbml_annotations is deprecated and may be removed in a future version of rcpr; "
544
- "we are now adding these annotation during ingestion by sbml.sbml_df_from_sbml() rather "
545
- "than directly appending them to the raw .sbml"
546
- )
547
-
548
- if not isinstance(sbml_model, SBML):
549
- raise TypeError("sbml_model must be an SBML object")
550
-
551
- if not isinstance(annotations, pd.DataFrame):
552
- raise TypeError("annotations must be a pd.DataFrame")
553
-
554
- for i in range(0, annotations.shape[0]):
555
- annot_type = annotations["type"][i]
556
-
557
- if annot_type == "species":
558
- entity_fxn = SBML_ANNOTATION_METHOD_GET_SPECIES
559
- elif annot_type == "compartment":
560
- entity_fxn = SBML_ANNOTATION_METHOD_GET_COMPARTMENT
561
- elif annot_type == "reaction":
562
- entity_fxn = SBML_ANNOTATION_METHOD_GET_REACTION
563
- else:
564
- raise ValueError(
565
- f"{annot_type} is not a valid annotation type,"
566
- " valid types are species, compartment, and reaction"
567
- )
568
- # access the node to modify
569
- entity_fxn_method = getattr(sbml_model.model, entity_fxn)
570
- entity_node = entity_fxn_method(annotations["id"][i])
571
-
572
- # TO DO - check for a valid entity_node in case id is not found
573
-
574
- # set meta-id if there isn't one; required to add a node
575
- if not entity_node.isSetMetaId():
576
- add_metaid_code = entity_node.setMetaId(annotations["id"][i])
577
-
578
- if add_metaid_code != libsbml.LIBSBML_OPERATION_SUCCESS:
579
- raise ValueError(
580
- f"adding metaId to {annotations['id'][i]} failed"
581
- f" with return code {add_metaid_code} "
582
- f"({libsbml.OperationReturnValue_toString(add_metaid_code).strip()})"
583
- )
584
-
585
- # create a controlled vocabulary term
586
- cv = libsbml.CVTerm()
587
- cv.setQualifierType(libsbml.BIOLOGICAL_QUALIFIER)
588
- cv.setBiologicalQualifierType(libsbml.BQB_IS_VERSION_OF)
589
-
590
- add_resource_code = cv.addResource(annotations["uri"][i])
591
- if add_resource_code != libsbml.LIBSBML_OPERATION_SUCCESS:
592
- raise ValueError(
593
- "adding resource to CV term returned code"
594
- f" {add_resource_code} "
595
- f"({libsbml.OperationReturnValue_toString(add_resource_code).strip()})"
596
- f" rather than {libsbml.LIBSBML_OPERATION_SUCCESS} when "
597
- f"adding {annotations['uri'][i]} to {annotations['id'][i]}"
598
- )
599
-
600
- add_cv_code = entity_node.addCVTerm(cv)
601
- if add_cv_code != libsbml.LIBSBML_OPERATION_SUCCESS:
602
- raise ValueError(
603
- f"adding CV to entity returned code {add_cv_code} "
604
- f"({libsbml.OperationReturnValue_toString(add_cv_code).strip()})"
605
- f" rather than {libsbml.LIBSBML_OPERATION_SUCCESS} when adding"
606
- f" {annotations['uri'][i]} to {annotations['id'][i]}"
607
- )
608
-
609
- libsbml.writeSBML(sbml_model.document, save_path)
610
-
611
- return None
612
-
613
-
614
517
  def _get_gene_product_dict(gp):
615
518
  """Read a gene product node from an sbml file."""
616
519
  return {
@@ -10,7 +10,7 @@ from napistu import utils
10
10
  from napistu.constants import BQB
11
11
  from napistu.constants import COMPARTMENTS
12
12
  from napistu.constants import MINI_SBO_FROM_NAME
13
- from napistu.ingestion import cpr_edgelist
13
+ from napistu.ingestion import napistu_edgelist
14
14
  from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
15
15
  from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
16
16
  from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
@@ -127,7 +127,7 @@ def convert_string_to_sbml_dfs(
127
127
  # remove one edge since reciprocal edges are present; i.e., A-B and B-A
128
128
  # and attributes (e.g., combined_score are the same across both reciprocal
129
129
  # interactions
130
- uq_string_edgelist = cpr_edgelist.remove_reciprocal_interactions(
130
+ uq_string_edgelist = napistu_edgelist.remove_reciprocal_interactions(
131
131
  string_edgelist, extra_defining_vars=["combined_score"]
132
132
  )
133
133
 
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib.metadata import PackageNotFoundError
4
+ from importlib.metadata import version
5
+
6
+ try:
7
+ __version__ = version("napistu")
8
+ except PackageNotFoundError:
9
+ # package is not installed
10
+ pass
@@ -0,0 +1,18 @@
1
+ from types import SimpleNamespace
2
+
3
+ FEATURE_ID_VAR_DEFAULT = "feature_id"
4
+
5
+ RESOLVE_MATCHES_AGGREGATORS = SimpleNamespace(
6
+ WEIGHTED_MEAN="weighted_mean", MEAN="mean", FIRST="first", MAX="max"
7
+ )
8
+
9
+ RESOLVE_MATCHES_TMP_WEIGHT_COL = "__tmp_weight_for_aggregation__"
10
+
11
+ BIND_DICT_OF_WIDE_RESULTS_STRATEGIES = SimpleNamespace(
12
+ CONTATENATE="concatenate", MULTIPLE_KEYS="multiple_keys", STAGGER="stagger"
13
+ )
14
+ BIND_DICT_OF_WIDE_RESULTS_STRATEGIES_LIST = [
15
+ BIND_DICT_OF_WIDE_RESULTS_STRATEGIES.CONTATENATE,
16
+ BIND_DICT_OF_WIDE_RESULTS_STRATEGIES.MULTIPLE_KEYS,
17
+ BIND_DICT_OF_WIDE_RESULTS_STRATEGIES.STAGGER,
18
+ ]
@@ -0,0 +1,518 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ import igraph as ig
6
+ import pandas as pd
7
+
8
+ from napistu import identifiers
9
+ from napistu import utils
10
+ from napistu import sbml_dfs_core
11
+ from napistu.matching.species import features_to_pathway_species
12
+ from napistu.constants import (
13
+ CPR_EDGELIST_REQ_VARS,
14
+ IDENTIFIER_EDGELIST_REQ_VARS,
15
+ CPR_EDGELIST,
16
+ SBML_DFS,
17
+ IDENTIFIERS,
18
+ )
19
+ from napistu.network.constants import NAPISTU_GRAPH_EDGES
20
+ from napistu.matching.constants import FEATURE_ID_VAR_DEFAULT
21
+ from napistu.network import paths
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def edgelist_to_pathway_species(
27
+ formatted_edgelist: pd.DataFrame,
28
+ species_identifiers: pd.DataFrame,
29
+ ontologies: set,
30
+ feature_id_var: str = FEATURE_ID_VAR_DEFAULT,
31
+ verbose: bool = False,
32
+ ) -> pd.DataFrame:
33
+ """
34
+ Edgelist to Pathway Species
35
+
36
+ Match an edgelist of molecular species pairs to their corresponding species in a pathway representation.
37
+
38
+ Parameters:
39
+ formatted_edgelist: pd.DataFrame
40
+ pd.Dataframe containing a "identifier_upstream" and "identifier_downstream" variables used to to match entries
41
+ species_identifiers: pd.DataFrame
42
+ A table of molecular species identifiers produced from sbml_dfs.get_identifiers("species") generally using
43
+ sbml_dfs_core.export_sbml_dfs()
44
+ ontologies: set
45
+ A set of ontologies used to match features to pathway species
46
+ feature_id_var: str, default=FEATURE_ID_VAR_DEFAULT
47
+ Variable in "formatted_edgelist" containing feature ids
48
+ verbose: bool, default=False
49
+ Whether to print verbose output
50
+
51
+ Returns:
52
+ edges_on_pathway: pd.DataFrame
53
+ formatted_edgelist with upstream features mapped
54
+ to "s_id_upstream" and downstream species mapped
55
+ to "s_id_downstream"
56
+ """
57
+
58
+ required_vars_distinct_features = {
59
+ CPR_EDGELIST.IDENTIFIER_UPSTREAM,
60
+ CPR_EDGELIST.IDENTIFIER_DOWNSTREAM,
61
+ }
62
+ missing_required_vars_distinct_features = (
63
+ required_vars_distinct_features.difference(
64
+ set(formatted_edgelist.columns.tolist())
65
+ )
66
+ )
67
+
68
+ if len(missing_required_vars_distinct_features) > 0:
69
+ raise ValueError(
70
+ f"{len(missing_required_vars_distinct_features)} required variables were "
71
+ "missing from 'formatted_edgelist': "
72
+ f"{', '.join(missing_required_vars_distinct_features)}"
73
+ )
74
+
75
+ # define all distinct identifiers in edgelist
76
+ distinct_identifiers = (
77
+ pd.concat(
78
+ [
79
+ formatted_edgelist[CPR_EDGELIST.IDENTIFIER_UPSTREAM],
80
+ formatted_edgelist[CPR_EDGELIST.IDENTIFIER_DOWNSTREAM],
81
+ ]
82
+ )
83
+ .drop_duplicates()
84
+ .reset_index(drop=True)
85
+ .to_frame()
86
+ .rename({0: feature_id_var}, axis=1)
87
+ )
88
+
89
+ # merge edgelist identifiers with pathway identifiers to map s_ids to identifiers
90
+ features_on_pathway = features_to_pathway_species(
91
+ feature_identifiers=distinct_identifiers,
92
+ species_identifiers=species_identifiers,
93
+ ontologies=ontologies,
94
+ feature_identifiers_var=feature_id_var,
95
+ verbose=verbose,
96
+ )
97
+
98
+ # add s_ids of both upstream and downstream edges to pathway
99
+ edges_on_pathway = formatted_edgelist.merge(
100
+ features_on_pathway[[SBML_DFS.S_ID, IDENTIFIERS.IDENTIFIER]].rename(
101
+ {
102
+ SBML_DFS.S_ID: CPR_EDGELIST.S_ID_UPSTREAM,
103
+ IDENTIFIERS.IDENTIFIER: CPR_EDGELIST.IDENTIFIER_UPSTREAM,
104
+ },
105
+ axis=1,
106
+ )
107
+ ).merge(
108
+ features_on_pathway[[SBML_DFS.S_ID, IDENTIFIERS.IDENTIFIER]].rename(
109
+ {
110
+ SBML_DFS.S_ID: CPR_EDGELIST.S_ID_DOWNSTREAM,
111
+ IDENTIFIERS.IDENTIFIER: CPR_EDGELIST.IDENTIFIER_DOWNSTREAM,
112
+ },
113
+ axis=1,
114
+ )
115
+ )
116
+
117
+ return edges_on_pathway
118
+
119
+
120
+ def edgelist_to_scids(
121
+ formatted_edgelist: pd.DataFrame,
122
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
123
+ species_identifiers: pd.DataFrame,
124
+ ontologies: set,
125
+ ):
126
+ """
127
+
128
+ Edgelist to Compartmentalized Species IDds
129
+
130
+ Map an edgelist of possible mechanistic interactions onto a
131
+ pathadex pathway
132
+
133
+ Parameters:
134
+ formatted_edgelist: pd.DataFrame
135
+ pd.Dataframe containing a "identifier_upstream" and
136
+ "identifier_downstream" variables used to to match entries
137
+ sbml_dfs: sbml_dfs_core.SBML_dfs
138
+ A mechanistic model
139
+ species_identifiers: pd.DataFrame
140
+ A table of molecular species identifiers produced from
141
+ sbml_dfs.get_identifiers("species") generally using sbml_dfs_core.export_sbml_dfs()
142
+ ontologies: set
143
+ A set of ontologies used to match features to pathway species
144
+
145
+ Returns:
146
+ edgelist_w_scids: pd.DataFrame
147
+ formatted_edgelist with upstream features mapped to "sc_id_upstream" and
148
+ downstream species mapped to "sc_id_downstream"
149
+ """
150
+
151
+ identifiers._check_species_identifiers_table(species_identifiers)
152
+
153
+ # map edges onto pathway entities based on shared identifiers
154
+ edges_on_pathway = edgelist_to_pathway_species(
155
+ formatted_edgelist=formatted_edgelist,
156
+ species_identifiers=species_identifiers,
157
+ ontologies=ontologies,
158
+ )
159
+
160
+ # expand from s_ids to sc_ids
161
+ s_id_pairs = edges_on_pathway[
162
+ [CPR_EDGELIST.S_ID_UPSTREAM, CPR_EDGELIST.S_ID_DOWNSTREAM]
163
+ ].drop_duplicates()
164
+ sc_id_pairs = s_id_pairs.merge(
165
+ sbml_dfs.compartmentalized_species[[SBML_DFS.S_ID]]
166
+ .reset_index()
167
+ .rename(
168
+ {
169
+ SBML_DFS.S_ID: CPR_EDGELIST.S_ID_UPSTREAM,
170
+ SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM,
171
+ },
172
+ axis=1,
173
+ )
174
+ ).merge(
175
+ sbml_dfs.compartmentalized_species[[SBML_DFS.S_ID]]
176
+ .reset_index()
177
+ .rename(
178
+ {
179
+ SBML_DFS.S_ID: CPR_EDGELIST.S_ID_DOWNSTREAM,
180
+ SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM,
181
+ },
182
+ axis=1,
183
+ )
184
+ )
185
+
186
+ # map sc_ids back to edges_on_pathway
187
+ # join lookup table of s_id_upstream, s_id_downstream -> sc_ids
188
+ edgelist_w_scids = edges_on_pathway.merge(sc_id_pairs)
189
+
190
+ logger_msg = (
191
+ f"{edgelist_w_scids.shape[0]} interactions mapped "
192
+ "onto pairs of compartmentalized species in the mechanistic model"
193
+ )
194
+ if edgelist_w_scids.shape[0] == 0:
195
+ logger.warning(logger_msg)
196
+ else:
197
+ logger.info(logger_msg)
198
+
199
+ return edgelist_w_scids
200
+
201
+
202
+ def filter_to_direct_mechanistic_interactions(
203
+ formatted_edgelist: pd.DataFrame,
204
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
205
+ species_identifiers: pd.DataFrame,
206
+ ontologies: set,
207
+ ) -> pd.DataFrame:
208
+ """
209
+ Filter to Direct Mechanistic Interactions
210
+
211
+ Filter an edgelist to direct mechanistic interactions
212
+
213
+ Parameters:
214
+ formatted_edgelist: pd.DataFrame
215
+ pd.Dataframe containing a "identifier_upstream" and "identifier_downstream" variables used to to match entries
216
+ sbml_dfs: sbml_dfs_core.SBML_dfs
217
+ A mechanistic model
218
+ species_identifiers: pd.DataFrame
219
+ A table of molecular species identifiers
220
+ produced from sbml_dfs.get_identifiers("species") generally
221
+ using sbml_dfs_core.export_sbml_dfs()
222
+ ontologies: set
223
+ A set of ontologies used to match features to pathway species
224
+
225
+ Returns:
226
+ edgelist_w_direct_mechanistic_interactions: pd.DataFrame
227
+ formatted_edgelist filtered to mechanistic reactions present in the pathway representation
228
+ """
229
+
230
+ edgelist_w_scids = _edgelist_to_scids_if_needed(
231
+ formatted_edgelist, sbml_dfs, species_identifiers, ontologies
232
+ )
233
+
234
+ # reduce to distinct sc_id pairs
235
+ sc_id_pairs = edgelist_w_scids[list(CPR_EDGELIST_REQ_VARS)].drop_duplicates()
236
+
237
+ # define all existing direct regulatory interactions
238
+ pathway_interactions = pd.concat(
239
+ [
240
+ # pair 0 -> <0 # modifiers affect substrates
241
+ sbml_dfs.reaction_species[
242
+ sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] == 0
243
+ ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
244
+ .rename({SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM}, axis=1)
245
+ .merge(
246
+ sbml_dfs.reaction_species[
247
+ sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] < 0
248
+ ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
249
+ {SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
250
+ )
251
+ ),
252
+ # pair <0 -> >0 # substrates affect products
253
+ sbml_dfs.reaction_species[
254
+ sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] < 0
255
+ ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
256
+ .rename({SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM}, axis=1)
257
+ .merge(
258
+ sbml_dfs.reaction_species[
259
+ sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] > 0
260
+ ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
261
+ {SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
262
+ )
263
+ ),
264
+ # pair 0 -> >0 # modifiers affect products
265
+ sbml_dfs.reaction_species[
266
+ sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] == 0
267
+ ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]]
268
+ .rename({SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_UPSTREAM}, axis=1)
269
+ .merge(
270
+ sbml_dfs.reaction_species[
271
+ sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] > 0
272
+ ][[SBML_DFS.R_ID, SBML_DFS.SC_ID]].rename(
273
+ {SBML_DFS.SC_ID: CPR_EDGELIST.SC_ID_DOWNSTREAM}, axis=1
274
+ )
275
+ ),
276
+ ]
277
+ ).reset_index(drop=True)
278
+
279
+ # filter pathway interactions based on matches to sc_id_pairs
280
+ direct_edge_interactions = (
281
+ sc_id_pairs.merge(pathway_interactions)
282
+ .merge(
283
+ sbml_dfs.species[SBML_DFS.S_NAME]
284
+ .to_frame()
285
+ .rename({SBML_DFS.S_NAME: CPR_EDGELIST.S_NAME_UPSTREAM}, axis=1),
286
+ left_on=CPR_EDGELIST.S_ID_UPSTREAM,
287
+ right_index=True,
288
+ # add species metadata for matches
289
+ )
290
+ .merge(
291
+ sbml_dfs.species[SBML_DFS.S_NAME]
292
+ .to_frame()
293
+ .rename({SBML_DFS.S_NAME: CPR_EDGELIST.S_NAME_DOWNSTREAM}, axis=1),
294
+ left_on=CPR_EDGELIST.S_ID_DOWNSTREAM,
295
+ right_index=True,
296
+ # add metadata for reactions where interaction occurs
297
+ )
298
+ .merge(
299
+ sbml_dfs.reactions[SBML_DFS.R_NAME].to_frame(),
300
+ left_on=SBML_DFS.R_ID,
301
+ right_index=True,
302
+ )
303
+ )
304
+
305
+ edgelist_w_direct_mechanistic_interactions = edgelist_w_scids.merge(
306
+ direct_edge_interactions[
307
+ [
308
+ CPR_EDGELIST.SC_ID_UPSTREAM,
309
+ CPR_EDGELIST.SC_ID_DOWNSTREAM,
310
+ SBML_DFS.R_ID,
311
+ CPR_EDGELIST.S_NAME_UPSTREAM,
312
+ CPR_EDGELIST.S_NAME_DOWNSTREAM,
313
+ SBML_DFS.R_NAME,
314
+ ]
315
+ ]
316
+ )
317
+
318
+ return edgelist_w_direct_mechanistic_interactions
319
+
320
+
321
+ def filter_to_indirect_mechanistic_interactions(
322
+ formatted_edgelist: pd.DataFrame,
323
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
324
+ species_identifiers: pd.DataFrame,
325
+ napistu_graph: ig.Graph,
326
+ ontologies: set,
327
+ precomputed_distances=None,
328
+ max_path_length=10,
329
+ ):
330
+ """
331
+ Filter to Indirect Mechanistic Interactions
332
+
333
+ Filter an edgelist to indirect mechanistic interactions.
334
+ Indirect relationships are identified by searching a
335
+ network for paths from an upstream species to a downstream species
336
+
337
+ Parameters:
338
+ formatted_edgelist: pd.DataFrame
339
+ pd.Dataframe containing a "identifier_upstream" and
340
+ "identifier_downstream" variables used to to match entries
341
+ sbml_dfs: sbml_dfs_core.SBML_dfs
342
+ A mechanistic model
343
+ species_identifiers: pandas.DataFrame
344
+ A table of molecular species identifiers produced from
345
+ sbml_dfs.get_identifiers("species") generally using sbml_dfs_core.export_sbml_dfs()
346
+ napistu_graph: igraph.Graph
347
+ A network representation of the sbml_dfs model
348
+ ontologies: set
349
+ A set of ontologies used to match features to pathway species
350
+ precomputed_distances: None or a pd.DataFrame containing path lengths and weights
351
+ between pairs of cspecies.
352
+ max_path_length: int
353
+ Maximum number of steps to consider.
354
+
355
+ Returns:
356
+ edgelist_w_indirect_mechanistic_interactions: pd.DataFrame
357
+ formatted_edgelist filtered to mechanistic reactions which can be described
358
+ by an indirect mechanism. The mechanism is described by a path weight, length,
359
+ and a vpath and epath list of vertices and edges which were traversed to create the path.
360
+ """
361
+
362
+ edgelist_w_scids = _edgelist_to_scids_if_needed(
363
+ formatted_edgelist, sbml_dfs, species_identifiers, ontologies
364
+ )
365
+
366
+ if precomputed_distances is not None:
367
+ # rename to match conventions in precomputed_distances
368
+ # filter by these precomputed distances and then restore naming
369
+ edgelist_w_scids = paths._filter_paths_by_precomputed_distances(
370
+ edgelist_w_scids.rename(
371
+ {
372
+ CPR_EDGELIST.SC_ID_UPSTREAM: CPR_EDGELIST.SC_ID_ORIGIN,
373
+ CPR_EDGELIST.SC_ID_DOWNSTREAM: CPR_EDGELIST.SC_ID_DEST,
374
+ },
375
+ axis=1,
376
+ ),
377
+ precomputed_distances,
378
+ ).rename(
379
+ {
380
+ CPR_EDGELIST.SC_ID_ORIGIN: CPR_EDGELIST.SC_ID_UPSTREAM,
381
+ CPR_EDGELIST.SC_ID_DEST: CPR_EDGELIST.SC_ID_DOWNSTREAM,
382
+ },
383
+ axis=1,
384
+ )
385
+
386
+ # find paths from 1 upstream to all desired downstream sc_ids
387
+ # (this is the convention with igraph)
388
+ indexed_origin_vertices = edgelist_w_scids.set_index(CPR_EDGELIST.SC_ID_UPSTREAM)
389
+
390
+ # loop through upstream cspecies and find paths to all downstream species
391
+ global_dict = dict()
392
+ for an_origin_index in indexed_origin_vertices.index.unique(): # type: ignore
393
+ origin_targets = indexed_origin_vertices.loc[
394
+ an_origin_index
395
+ ] # type: pd.DataFrame
396
+
397
+ # if indexing only a single entry pd.DataFrame becomes a pd.Series
398
+ # convert back to DataFrame for consistency
399
+ origin_targets = utils.ensure_pd_df(origin_targets)
400
+
401
+ # log entry for debugging
402
+ logger.debug(
403
+ f"finding paths from {an_origin_index} to "
404
+ f"{origin_targets.shape[0]} target vertices"
405
+ )
406
+
407
+ # find all paths from indexed_origin to desired destination
408
+ shortest_paths = paths.find_shortest_reaction_paths(
409
+ napistu_graph,
410
+ sbml_dfs,
411
+ origin=an_origin_index,
412
+ # find all unique destinations (as a list for compatibility with igraph dest)
413
+ dest=origin_targets[CPR_EDGELIST.SC_ID_DOWNSTREAM].unique().tolist(),
414
+ weight_var=NAPISTU_GRAPH_EDGES.WEIGHTS,
415
+ )
416
+
417
+ if shortest_paths is None:
418
+ continue
419
+
420
+ vertices, edges = shortest_paths
421
+ indexed_edges = edges.set_index("path")
422
+ indexed_vertices = vertices.set_index("path")
423
+
424
+ paths_list = list()
425
+ for ind in indexed_edges.index.unique():
426
+ one_path = indexed_edges.loc[ind]
427
+
428
+ # make sure that we are working with a DF
429
+ if type(one_path) is pd.Series:
430
+ one_path = one_path.to_frame().T
431
+
432
+ if one_path.shape[0] > max_path_length:
433
+ continue
434
+
435
+ # find the destination node
436
+ # this is annoying because if the graph is undirected
437
+ # its not clear if the from or to edge is the actual destination
438
+ # when taking advantage of the fact that igraph lets you
439
+ # look up multiple destinations at once this information is lost
440
+ ancestor_species = {an_origin_index}
441
+ if one_path.shape[0] > 1:
442
+ penultimate_edge = one_path.iloc[one_path.shape[0] - 2]
443
+ ancestor_species = ancestor_species.union(
444
+ {
445
+ penultimate_edge[NAPISTU_GRAPH_EDGES.FROM],
446
+ penultimate_edge[NAPISTU_GRAPH_EDGES.TO],
447
+ }
448
+ )
449
+
450
+ terminal_edge = one_path.iloc[one_path.shape[0] - 1]
451
+ ending_cspecies = {terminal_edge[NAPISTU_GRAPH_EDGES.FROM], terminal_edge[NAPISTU_GRAPH_EDGES.TO]}.difference(ancestor_species) # type: ignore
452
+
453
+ if len(ending_cspecies) != 1:
454
+ raise ValueError(
455
+ "The terminal edge could not be determined when summarizing paths"
456
+ )
457
+ ending_cspecies = ending_cspecies.pop()
458
+
459
+ path_series = pd.Series(
460
+ {
461
+ NAPISTU_GRAPH_EDGES.FROM: an_origin_index,
462
+ NAPISTU_GRAPH_EDGES.TO: ending_cspecies,
463
+ "weight": sum(one_path[NAPISTU_GRAPH_EDGES.WEIGHTS]),
464
+ "path_length": one_path.shape[0],
465
+ "vpath": indexed_vertices.loc[ind],
466
+ "epath": one_path,
467
+ } # type: ignore
468
+ ) # type: pd.Series
469
+
470
+ paths_list.append(path_series)
471
+
472
+ if len(paths_list) > 0:
473
+ origin_paths = pd.DataFrame(paths_list)
474
+ global_dict[an_origin_index] = origin_paths
475
+
476
+ if len(global_dict.keys()) == 0:
477
+ logger.warning(
478
+ "None of the provide molecular pairs could be mechanistically linked with a network path"
479
+ )
480
+ return None
481
+
482
+ all_shortest_paths = pd.concat(global_dict.values())
483
+
484
+ indirect_shortest_paths = edgelist_w_scids.merge(
485
+ all_shortest_paths,
486
+ left_on=[CPR_EDGELIST.SC_ID_UPSTREAM, CPR_EDGELIST.SC_ID_DOWNSTREAM],
487
+ right_on=[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO],
488
+ )
489
+
490
+ return indirect_shortest_paths
491
+
492
+
493
+ def _edgelist_to_scids_if_needed(
494
+ edgelist: pd.DataFrame,
495
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
496
+ species_identifiers: pd.DataFrame,
497
+ ontologies: set,
498
+ ) -> pd.DataFrame:
499
+ """Map a set of edgelist species to cspecies or skip if cspecies were provided."""
500
+
501
+ if utils.match_pd_vars(edgelist, CPR_EDGELIST_REQ_VARS).are_present:
502
+ logger.info(
503
+ f"An edgelist with {', '.join(CPR_EDGELIST_REQ_VARS)} was provided; identifier matching will be skipped"
504
+ )
505
+ return edgelist
506
+ else:
507
+ utils.match_pd_vars(edgelist, IDENTIFIER_EDGELIST_REQ_VARS).assert_present()
508
+
509
+ identifiers._check_species_identifiers_table(species_identifiers)
510
+
511
+ edgelist_w_scids = edgelist_to_scids(
512
+ edgelist,
513
+ sbml_dfs=sbml_dfs,
514
+ species_identifiers=species_identifiers,
515
+ ontologies=ontologies,
516
+ )
517
+
518
+ return edgelist_w_scids