napistu 0.2.5.dev7__py3-none-any.whl → 0.3.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. napistu/__init__.py +1 -3
  2. napistu/__main__.py +126 -96
  3. napistu/constants.py +35 -41
  4. napistu/context/__init__.py +10 -0
  5. napistu/context/discretize.py +462 -0
  6. napistu/context/filtering.py +387 -0
  7. napistu/gcs/__init__.py +1 -1
  8. napistu/identifiers.py +74 -15
  9. napistu/indices.py +68 -0
  10. napistu/ingestion/__init__.py +1 -1
  11. napistu/ingestion/bigg.py +47 -62
  12. napistu/ingestion/constants.py +18 -133
  13. napistu/ingestion/gtex.py +113 -0
  14. napistu/ingestion/hpa.py +147 -0
  15. napistu/ingestion/sbml.py +0 -97
  16. napistu/ingestion/string.py +2 -2
  17. napistu/matching/__init__.py +10 -0
  18. napistu/matching/constants.py +18 -0
  19. napistu/matching/interactions.py +518 -0
  20. napistu/matching/mount.py +529 -0
  21. napistu/matching/species.py +510 -0
  22. napistu/mcp/__init__.py +7 -4
  23. napistu/mcp/__main__.py +128 -72
  24. napistu/mcp/client.py +16 -25
  25. napistu/mcp/codebase.py +201 -145
  26. napistu/mcp/component_base.py +170 -0
  27. napistu/mcp/config.py +223 -0
  28. napistu/mcp/constants.py +45 -2
  29. napistu/mcp/documentation.py +253 -136
  30. napistu/mcp/documentation_utils.py +13 -48
  31. napistu/mcp/execution.py +372 -305
  32. napistu/mcp/health.py +47 -65
  33. napistu/mcp/profiles.py +10 -6
  34. napistu/mcp/server.py +161 -80
  35. napistu/mcp/tutorials.py +139 -87
  36. napistu/modify/__init__.py +1 -1
  37. napistu/modify/gaps.py +1 -1
  38. napistu/network/__init__.py +1 -1
  39. napistu/network/constants.py +101 -34
  40. napistu/network/data_handling.py +388 -0
  41. napistu/network/ig_utils.py +351 -0
  42. napistu/network/napistu_graph_core.py +354 -0
  43. napistu/network/neighborhoods.py +40 -40
  44. napistu/network/net_create.py +373 -309
  45. napistu/network/net_propagation.py +47 -19
  46. napistu/network/{net_utils.py → ng_utils.py} +124 -272
  47. napistu/network/paths.py +67 -51
  48. napistu/network/precompute.py +11 -11
  49. napistu/ontologies/__init__.py +10 -0
  50. napistu/ontologies/constants.py +129 -0
  51. napistu/ontologies/dogma.py +243 -0
  52. napistu/ontologies/genodexito.py +649 -0
  53. napistu/ontologies/mygene.py +369 -0
  54. napistu/ontologies/renaming.py +198 -0
  55. napistu/rpy2/__init__.py +229 -86
  56. napistu/rpy2/callr.py +47 -77
  57. napistu/rpy2/constants.py +24 -23
  58. napistu/rpy2/rids.py +61 -648
  59. napistu/sbml_dfs_core.py +587 -222
  60. napistu/scverse/__init__.py +15 -0
  61. napistu/scverse/constants.py +28 -0
  62. napistu/scverse/loading.py +727 -0
  63. napistu/utils.py +118 -10
  64. {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dev1.dist-info}/METADATA +8 -3
  65. napistu-0.3.1.dev1.dist-info/RECORD +133 -0
  66. tests/conftest.py +22 -0
  67. tests/test_context_discretize.py +56 -0
  68. tests/test_context_filtering.py +267 -0
  69. tests/test_identifiers.py +100 -0
  70. tests/test_indices.py +65 -0
  71. tests/{test_edgelist.py → test_ingestion_napistu_edgelist.py} +2 -2
  72. tests/test_matching_interactions.py +108 -0
  73. tests/test_matching_mount.py +305 -0
  74. tests/test_matching_species.py +394 -0
  75. tests/test_mcp_config.py +193 -0
  76. tests/test_mcp_documentation_utils.py +12 -3
  77. tests/test_mcp_server.py +156 -19
  78. tests/test_network_data_handling.py +397 -0
  79. tests/test_network_ig_utils.py +23 -0
  80. tests/test_network_neighborhoods.py +19 -0
  81. tests/test_network_net_create.py +459 -0
  82. tests/test_network_ng_utils.py +30 -0
  83. tests/test_network_paths.py +56 -0
  84. tests/{test_precomputed_distances.py → test_network_precompute.py} +8 -6
  85. tests/test_ontologies_genodexito.py +58 -0
  86. tests/test_ontologies_mygene.py +39 -0
  87. tests/test_ontologies_renaming.py +110 -0
  88. tests/test_rpy2_callr.py +79 -0
  89. tests/test_rpy2_init.py +151 -0
  90. tests/test_sbml.py +0 -31
  91. tests/test_sbml_dfs_core.py +134 -10
  92. tests/test_scverse_loading.py +778 -0
  93. tests/test_set_coverage.py +2 -2
  94. tests/test_utils.py +121 -1
  95. napistu/mechanism_matching.py +0 -1353
  96. napistu/rpy2/netcontextr.py +0 -467
  97. napistu-0.2.5.dev7.dist-info/RECORD +0 -98
  98. tests/test_igraph.py +0 -367
  99. tests/test_mechanism_matching.py +0 -784
  100. tests/test_net_utils.py +0 -149
  101. tests/test_netcontextr.py +0 -105
  102. tests/test_rpy2.py +0 -61
  103. /napistu/ingestion/{cpr_edgelist.py → napistu_edgelist.py} +0 -0
  104. {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dev1.dist-info}/WHEEL +0 -0
  105. {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dev1.dist-info}/entry_points.txt +0 -0
  106. {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dev1.dist-info}/licenses/LICENSE +0 -0
  107. {napistu-0.2.5.dev7.dist-info → napistu-0.3.1.dev1.dist-info}/top_level.txt +0 -0
  108. /tests/{test_obo.py → test_ingestion_obo.py} +0 -0
@@ -0,0 +1,267 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ import pytest
5
+ import pandas as pd
6
+ from napistu import sbml_dfs_core
7
+ from napistu.constants import SBML_DFS
8
+ from napistu.context.filtering import (
9
+ filter_species_by_attribute,
10
+ find_species_with_attribute,
11
+ _binarize_species_data,
12
+ filter_reactions_with_disconnected_cspecies,
13
+ )
14
+
15
+
16
+ @pytest.fixture
17
+ def sbml_dfs_with_test_data(sbml_dfs):
18
+ """Add test data to the sbml_dfs fixture for filtering tests."""
19
+ # Add location data
20
+ location_data = pd.DataFrame(
21
+ index=sbml_dfs.species.index[:5],
22
+ data={
23
+ "compartment": ["nucleus", "cytoplasm", "nucleus", "membrane", "cytoplasm"],
24
+ "confidence": [0.9, 0.8, 0.7, 0.95, 0.85],
25
+ },
26
+ )
27
+ sbml_dfs.add_species_data("location", location_data)
28
+
29
+ # Add expression data
30
+ expression_data = pd.DataFrame(
31
+ index=sbml_dfs.species.index[:5],
32
+ data={
33
+ "is_expressed": [True, True, False, True, False],
34
+ "expression_level": [100, 50, 0, 75, 0],
35
+ },
36
+ )
37
+ sbml_dfs.add_species_data("expression", expression_data)
38
+
39
+ return sbml_dfs
40
+
41
+
42
+ def test_find_species_to_filter_by_attribute(sbml_dfs_with_test_data):
43
+ """Test the find_species_to_filter_by_attribute function."""
44
+ # Get the first 5 species IDs for reference
45
+ test_species = list(sbml_dfs_with_test_data.species.index[:5])
46
+
47
+ # Test filtering by single value
48
+ filtered = find_species_with_attribute(
49
+ sbml_dfs_with_test_data.species_data["location"], "compartment", "nucleus"
50
+ )
51
+ assert len(filtered) == 2
52
+ assert all(s_id in test_species for s_id in filtered)
53
+
54
+ # Test filtering by list of values
55
+ filtered = find_species_with_attribute(
56
+ sbml_dfs_with_test_data.species_data["location"],
57
+ "compartment",
58
+ ["nucleus", "cytoplasm"],
59
+ )
60
+ assert len(filtered) == 4
61
+ assert all(s_id in test_species for s_id in filtered)
62
+
63
+ # Test filtering with negation
64
+ filtered = find_species_with_attribute(
65
+ sbml_dfs_with_test_data.species_data["location"],
66
+ "compartment",
67
+ "nucleus",
68
+ negate=True,
69
+ )
70
+ assert len(filtered) == 3
71
+ assert all(s_id in test_species for s_id in filtered)
72
+
73
+ # Test filtering boolean values
74
+ filtered = find_species_with_attribute(
75
+ sbml_dfs_with_test_data.species_data["expression"], "is_expressed", True
76
+ )
77
+ assert len(filtered) == 3
78
+ assert all(s_id in test_species for s_id in filtered)
79
+
80
+ # Test filtering numeric values
81
+ filtered = find_species_with_attribute(
82
+ sbml_dfs_with_test_data.species_data["location"], "confidence", 0.9
83
+ )
84
+ assert len(filtered) == 1
85
+ assert all(s_id in test_species for s_id in filtered)
86
+
87
+
88
+ def test_filter_species_by_attribute(sbml_dfs_with_test_data):
89
+ """Test the filter_species_by_attribute function."""
90
+ # Get the first 5 species IDs for reference
91
+ test_species = list(sbml_dfs_with_test_data.species.index[:5])
92
+ original_species_count = len(sbml_dfs_with_test_data.species)
93
+
94
+ # Test filtering in place - should remove species in nucleus
95
+ result = filter_species_by_attribute(
96
+ sbml_dfs_with_test_data, "location", "compartment", "nucleus"
97
+ )
98
+ assert result is None
99
+ # Should have removed the nucleus species from the test set
100
+ assert len(sbml_dfs_with_test_data.species) == original_species_count - 2
101
+ # Check that species in nucleus were removed
102
+ remaining_test_species = [
103
+ s for s in test_species if s in sbml_dfs_with_test_data.species.index
104
+ ]
105
+ assert (
106
+ len(remaining_test_species) == 3
107
+ ) # Should have 3 test species left (cytoplasm, membrane, cytoplasm)
108
+
109
+ # Test filtering with new object - should remove expressed species
110
+ sbml_dfs_copy = copy.deepcopy(sbml_dfs_with_test_data)
111
+
112
+ # Count how many species are expressed in our test data
113
+ expressed_count = sum(
114
+ sbml_dfs_copy.species_data["expression"]["is_expressed"].iloc[:5]
115
+ )
116
+
117
+ filtered_sbml_dfs = filter_species_by_attribute(
118
+ sbml_dfs_copy, "expression", "is_expressed", True, inplace=False
119
+ )
120
+ # Original should be unchanged
121
+ assert len(sbml_dfs_copy.species) == len(sbml_dfs_with_test_data.species)
122
+ # New object should have removed expressed species from our test set
123
+ assert (
124
+ len(filtered_sbml_dfs.species)
125
+ == len(sbml_dfs_with_test_data.species) - expressed_count
126
+ )
127
+
128
+ # Test filtering with invalid table name
129
+ with pytest.raises(ValueError, match="species_data_table .* not found"):
130
+ filter_species_by_attribute(
131
+ sbml_dfs_with_test_data, "nonexistent_table", "compartment", "nucleus"
132
+ )
133
+
134
+ # Test filtering with invalid attribute name
135
+ with pytest.raises(ValueError, match="attribute_name .* not found"):
136
+ filter_species_by_attribute(
137
+ sbml_dfs_with_test_data, "location", "nonexistent_attribute", "nucleus"
138
+ )
139
+
140
+ # Test filtering with list of values and negation
141
+ # Keep only species NOT in nucleus or cytoplasm (just membrane in our test data)
142
+
143
+ VALID_COMPARTMENTS = ["nucleus", "cytoplasm"]
144
+ filtered_sbml_dfs = filter_species_by_attribute(
145
+ sbml_dfs_with_test_data,
146
+ "location",
147
+ "compartment",
148
+ VALID_COMPARTMENTS,
149
+ negate=True,
150
+ inplace=False,
151
+ )
152
+
153
+ # Get remaining species from our test set
154
+ remaining_test_species = [
155
+ s for s in test_species if s in filtered_sbml_dfs.species.index
156
+ ]
157
+
158
+ assert all(filtered_sbml_dfs.species_data["location"].isin(VALID_COMPARTMENTS))
159
+
160
+
161
+ def test_binarize_species_data():
162
+ # Create test data with different column types
163
+ test_data = pd.DataFrame(
164
+ {
165
+ "bool_col": [True, False, True],
166
+ "binary_int": [1, 0, 1],
167
+ "non_binary_int": [1, 2, 3],
168
+ "float_col": [1.5, 2.5, 3.5],
169
+ "str_col": ["a", "b", "c"],
170
+ }
171
+ )
172
+
173
+ # Run the binarization
174
+ binary_df = _binarize_species_data(test_data)
175
+
176
+ # Check that only boolean and binary columns were kept
177
+ assert set(binary_df.columns) == {"bool_col", "binary_int"}
178
+
179
+ # Check that boolean was converted to int
180
+ assert (
181
+ binary_df["bool_col"].dtype == "int32" or binary_df["bool_col"].dtype == "int64"
182
+ )
183
+ assert binary_df["bool_col"].tolist() == [1, 0, 1]
184
+
185
+ # Check that binary int remained the same
186
+ assert binary_df["binary_int"].tolist() == [1, 0, 1]
187
+
188
+ # Test with only non-binary columns
189
+ non_binary_data = pd.DataFrame(
190
+ {
191
+ "non_binary_int": [1, 2, 3],
192
+ "float_col": [1.5, 2.5, 3.5],
193
+ }
194
+ )
195
+
196
+ # Should raise ValueError when no binary columns are found
197
+ with pytest.raises(ValueError, match="No binary or boolean columns found"):
198
+ _binarize_species_data(non_binary_data)
199
+
200
+ # Test with empty DataFrame
201
+ empty_data = pd.DataFrame()
202
+ with pytest.raises(ValueError, match="No binary or boolean columns found"):
203
+ _binarize_species_data(empty_data)
204
+
205
+
206
+ def test_filter_reactions_with_disconnected_cspecies(sbml_dfs):
207
+ # 1. Select first few reactions
208
+ first_reactions = list(sbml_dfs.reactions.index[:5])
209
+
210
+ # 2. Find defining species in these reactions
211
+ reaction_species = sbml_dfs_core.add_sbo_role(sbml_dfs.reaction_species)
212
+ defining_species = (
213
+ reaction_species[reaction_species[SBML_DFS.R_ID].isin(first_reactions)]
214
+ .query("sbo_role == 'DEFINING'")
215
+ # at most 1 record for an sc_id in a reaction (generally true anyways)
216
+ .groupby([SBML_DFS.R_ID, SBML_DFS.SC_ID])
217
+ .first()
218
+ .reset_index(drop=False)
219
+ .groupby(SBML_DFS.R_ID)
220
+ .head(2) # Take 2 defining species per reaction
221
+ )
222
+
223
+ # 3. Get species IDs for these compartmentalized species
224
+ species_info = defining_species.merge(
225
+ sbml_dfs.compartmentalized_species[[SBML_DFS.S_ID]],
226
+ left_on=SBML_DFS.SC_ID,
227
+ right_index=True,
228
+ )
229
+
230
+ # Filter out reactions that have less than 2 distinct s_ids (transport reactions)
231
+ valid_reactions = (
232
+ species_info.groupby(SBML_DFS.R_ID)[SBML_DFS.S_ID]
233
+ .nunique()
234
+ .pipe(lambda x: x[x >= 2])
235
+ .index
236
+ )
237
+ species_info = species_info[species_info[SBML_DFS.R_ID].isin(valid_reactions)]
238
+
239
+ # 4. Create binary occurrence data where DISJOINT_S_ID is in a different comaprtment from the other top species
240
+ # this should result in removing disconnected_reactions from the sbml_dfs
241
+ DISJOINT_S_ID = species_info.value_counts("s_id").index[0]
242
+ disconnected_reactions = set(
243
+ species_info["r_id"][species_info["s_id"] == DISJOINT_S_ID].tolist()
244
+ )
245
+
246
+ # mock data
247
+ mock_species_data = pd.DataFrame({SBML_DFS.S_ID: species_info["s_id"].unique()})
248
+ mock_species_data["compartment_A"] = [
249
+ 1 if s_id == DISJOINT_S_ID else 0 for s_id in mock_species_data[SBML_DFS.S_ID]
250
+ ]
251
+ mock_species_data["compartment_B"] = [
252
+ 0 if s_id == DISJOINT_S_ID else 1 for s_id in mock_species_data[SBML_DFS.S_ID]
253
+ ]
254
+ mock_species_data.set_index(SBML_DFS.S_ID, inplace=True)
255
+
256
+ sbml_dfs.add_species_data("test_data", mock_species_data)
257
+
258
+ # Run the filter function
259
+ filtered_sbml_dfs = filter_reactions_with_disconnected_cspecies(
260
+ sbml_dfs, "test_data", inplace=False
261
+ )
262
+
263
+ filtered_first_reactions = [
264
+ r for r in first_reactions if r not in filtered_sbml_dfs.reactions.index
265
+ ]
266
+
267
+ assert set(filtered_first_reactions) == disconnected_reactions
tests/test_identifiers.py CHANGED
@@ -5,6 +5,8 @@ import os
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
  from napistu import identifiers
8
+ from napistu.constants import IDENTIFIERS, SBML_DFS
9
+ import pytest
8
10
 
9
11
  # logger = logging.getLogger()
10
12
  # logger.setLevel("DEBUG")
@@ -139,6 +141,100 @@ def test_reciprocal_ensembl_dicts():
139
141
  )
140
142
 
141
143
 
144
+ def test_df_to_identifiers_basic():
145
+ """Test basic conversion of DataFrame to Identifiers objects."""
146
+ # Create a simple test DataFrame
147
+ df = pd.DataFrame(
148
+ {
149
+ "s_id": ["s1", "s1", "s2"],
150
+ IDENTIFIERS.ONTOLOGY: ["ncbi_entrez_gene", "uniprot", "ncbi_entrez_gene"],
151
+ IDENTIFIERS.IDENTIFIER: ["123", "P12345", "456"],
152
+ IDENTIFIERS.URL: [
153
+ "http://ncbi/123",
154
+ "http://uniprot/P12345",
155
+ "http://ncbi/456",
156
+ ],
157
+ IDENTIFIERS.BQB: ["is", "is", "is"],
158
+ }
159
+ )
160
+
161
+ # Convert to Identifiers objects
162
+ result = identifiers.df_to_identifiers(df, SBML_DFS.SPECIES)
163
+
164
+ # Check basic properties
165
+ assert isinstance(result, pd.Series)
166
+ assert len(result) == 2 # Two unique s_ids
167
+ assert all(isinstance(x, identifiers.Identifiers) for x in result)
168
+
169
+ # Check specific values
170
+ s1_ids = result["s1"].ids
171
+ assert len(s1_ids) == 2 # Two identifiers for s1
172
+ assert any(x[IDENTIFIERS.IDENTIFIER] == "123" for x in s1_ids)
173
+ assert any(x[IDENTIFIERS.IDENTIFIER] == "P12345" for x in s1_ids)
174
+
175
+ s2_ids = result["s2"].ids
176
+ assert len(s2_ids) == 1 # One identifier for s2
177
+ assert s2_ids[0][IDENTIFIERS.IDENTIFIER] == "456"
178
+
179
+
180
+ def test_df_to_identifiers_duplicates():
181
+ """Test that duplicates are handled correctly."""
182
+ # Create DataFrame with duplicate entries
183
+ df = pd.DataFrame(
184
+ {
185
+ "s_id": ["s1", "s1", "s1"],
186
+ IDENTIFIERS.ONTOLOGY: [
187
+ "ncbi_entrez_gene",
188
+ "ncbi_entrez_gene",
189
+ "ncbi_entrez_gene",
190
+ ],
191
+ IDENTIFIERS.IDENTIFIER: ["123", "123", "123"], # Same identifier repeated
192
+ IDENTIFIERS.URL: ["http://ncbi/123"] * 3,
193
+ IDENTIFIERS.BQB: ["is"] * 3,
194
+ }
195
+ )
196
+
197
+ result = identifiers.df_to_identifiers(df, SBML_DFS.SPECIES)
198
+
199
+ # Should collapse duplicates
200
+ assert len(result) == 1 # One unique s_id
201
+ assert len(result["s1"].ids) == 1 # One unique identifier
202
+
203
+
204
+ def test_df_to_identifiers_missing_columns():
205
+ """Test that missing required columns raise an error."""
206
+ # Create DataFrame missing required columns
207
+ df = pd.DataFrame(
208
+ {
209
+ "s_id": ["s1"],
210
+ IDENTIFIERS.ONTOLOGY: ["ncbi_entrez_gene"],
211
+ IDENTIFIERS.IDENTIFIER: ["123"],
212
+ # Missing URL and BQB
213
+ }
214
+ )
215
+
216
+ with pytest.raises(
217
+ ValueError, match="The DataFrame does not contain the required columns"
218
+ ):
219
+ identifiers.df_to_identifiers(df, SBML_DFS.SPECIES)
220
+
221
+
222
+ def test_df_to_identifiers_invalid_entity_type():
223
+ """Test that invalid entity type raises an error."""
224
+ df = pd.DataFrame(
225
+ {
226
+ "s_id": ["s1"],
227
+ IDENTIFIERS.ONTOLOGY: ["ncbi_entrez_gene"],
228
+ IDENTIFIERS.IDENTIFIER: ["123"],
229
+ IDENTIFIERS.URL: ["http://ncbi/123"],
230
+ IDENTIFIERS.BQB: ["is"],
231
+ }
232
+ )
233
+
234
+ with pytest.raises(ValueError, match="Invalid entity type"):
235
+ identifiers.df_to_identifiers(df, "invalid_type")
236
+
237
+
142
238
  ################################################
143
239
  # __main__
144
240
  ################################################
@@ -149,3 +245,7 @@ if __name__ == "__main__":
149
245
  test_url_from_identifiers()
150
246
  test_parsing_ensembl_ids()
151
247
  test_reciprocal_ensembl_dicts()
248
+ test_df_to_identifiers_basic()
249
+ test_df_to_identifiers_duplicates()
250
+ test_df_to_identifiers_missing_columns()
251
+ test_df_to_identifiers_invalid_entity_type()
tests/test_indices.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
+ import datetime
4
5
 
5
6
  import pandas as pd
6
7
  import pytest
@@ -10,6 +11,70 @@ test_path = os.path.abspath(os.path.join(__file__, os.pardir))
10
11
  test_data = os.path.join(test_path, "test_data")
11
12
 
12
13
 
14
+ def test_create_pathway_index_df():
15
+ """Test the creation of pathway index DataFrame."""
16
+ # Test input data
17
+ model_keys = {"human": "model1", "mouse": "model2"}
18
+ model_urls = {
19
+ "human": "http://example.com/model1.xml",
20
+ "mouse": "http://example.com/model2.xml",
21
+ }
22
+ model_species = {"human": "Homo sapiens", "mouse": "Mus musculus"}
23
+ base_path = "/test/path"
24
+ source_name = "TestSource"
25
+
26
+ # Create pathway index
27
+ result = indices.create_pathway_index_df(
28
+ model_keys=model_keys,
29
+ model_urls=model_urls,
30
+ model_species=model_species,
31
+ base_path=base_path,
32
+ source_name=source_name,
33
+ )
34
+
35
+ # Expected date in YYYYMMDD format
36
+ expected_date = datetime.date.today().strftime("%Y%m%d")
37
+
38
+ # Assertions
39
+ assert isinstance(result, pd.DataFrame), "Result should be a pandas DataFrame"
40
+ assert len(result) == 2, "Should have 2 rows for 2 models"
41
+
42
+ # Check required columns exist
43
+ required_columns = {
44
+ "url",
45
+ "species",
46
+ "sbml_path",
47
+ "file",
48
+ "date",
49
+ "pathway_id",
50
+ "name",
51
+ "source",
52
+ }
53
+ assert set(result.columns) == required_columns, "Missing required columns"
54
+
55
+ # Check content for first model (human)
56
+ human_row = result[result["pathway_id"] == "model1"].iloc[0]
57
+ assert human_row["url"] == "http://example.com/model1.xml"
58
+ assert human_row["species"] == "Homo sapiens"
59
+ assert human_row["file"] == "model1.sbml"
60
+ assert human_row["date"] == expected_date
61
+ assert human_row["source"] == "TestSource"
62
+ assert human_row["sbml_path"] == os.path.join(base_path, "model1.sbml")
63
+
64
+ # Test with custom file extension
65
+ result_custom_ext = indices.create_pathway_index_df(
66
+ model_keys=model_keys,
67
+ model_urls=model_urls,
68
+ model_species=model_species,
69
+ base_path=base_path,
70
+ source_name=source_name,
71
+ file_extension=".xml",
72
+ )
73
+ assert result_custom_ext.iloc[0]["file"].endswith(
74
+ ".xml"
75
+ ), "Custom extension not applied"
76
+
77
+
13
78
  def test_pwindex_from_file():
14
79
  pw_index_path = os.path.join(test_data, "pw_index.tsv")
15
80
  pw_index = indices.PWIndex(pw_index_path)
@@ -1,13 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import pandas as pd
4
- from napistu.ingestion import cpr_edgelist
4
+ from napistu.ingestion import napistu_edgelist
5
5
 
6
6
 
7
7
  def test_edgelist_remove_reciprocal_reactions():
8
8
  edgelist = pd.DataFrame({"from": ["A", "B", "C", "D"], "to": ["B", "A", "D", "C"]})
9
9
 
10
- nondegenerate_edgelist = cpr_edgelist.remove_reciprocal_interactions(edgelist)
10
+ nondegenerate_edgelist = napistu_edgelist.remove_reciprocal_interactions(edgelist)
11
11
 
12
12
  assert nondegenerate_edgelist.shape == (2, 2)
13
13
 
@@ -0,0 +1,108 @@
1
+ import pandas as pd
2
+ from napistu.network import net_create
3
+
4
+ from napistu.network import precompute
5
+ from napistu.matching.interactions import (
6
+ edgelist_to_pathway_species,
7
+ edgelist_to_scids,
8
+ filter_to_direct_mechanistic_interactions,
9
+ filter_to_indirect_mechanistic_interactions,
10
+ )
11
+
12
+
13
+ def test_edgelist_to_pathway_species(sbml_dfs):
14
+
15
+ edgelist = pd.DataFrame(
16
+ [
17
+ {"identifier_upstream": "17996", "identifier_downstream": "16526"},
18
+ {"identifier_upstream": "15377", "identifier_downstream": "17544"},
19
+ {"identifier_upstream": "15378", "identifier_downstream": "57945"},
20
+ {"identifier_upstream": "57540", "identifier_downstream": "17996"},
21
+ ]
22
+ )
23
+ species_identifiers = sbml_dfs.get_identifiers("species").query("bqb == 'BQB_IS'")
24
+
25
+ edgelist_w_sids = edgelist_to_pathway_species(
26
+ edgelist, species_identifiers, ontologies={"chebi", "uniprot"}
27
+ )
28
+ assert edgelist_w_sids.shape == (4, 4)
29
+
30
+ egelist_w_scids = edgelist_to_scids(
31
+ edgelist, sbml_dfs, species_identifiers, ontologies={"chebi"}
32
+ )
33
+
34
+ assert egelist_w_scids.shape == (12, 6)
35
+
36
+ direct_interactions = filter_to_direct_mechanistic_interactions(
37
+ edgelist, sbml_dfs, species_identifiers, ontologies={"chebi"}
38
+ )
39
+
40
+ assert direct_interactions.shape == (2, 10)
41
+
42
+
43
+ def test_direct_and_indirect_mechanism_matching(sbml_dfs_glucose_metabolism):
44
+
45
+ napistu_graph = net_create.process_napistu_graph(sbml_dfs_glucose_metabolism)
46
+
47
+ edgelist = pd.DataFrame(
48
+ [
49
+ {
50
+ "identifier_upstream": "17925",
51
+ "identifier_downstream": "32966",
52
+ }, # glu, fbp
53
+ {
54
+ "identifier_upstream": "57634",
55
+ "identifier_downstream": "32966",
56
+ }, # f6p, fbp
57
+ {
58
+ "identifier_upstream": "32966",
59
+ "identifier_downstream": "57642",
60
+ }, # fbp, dhap
61
+ {
62
+ "identifier_upstream": "17925",
63
+ "identifier_downstream": "15361",
64
+ }, # glu, pyr
65
+ ]
66
+ )
67
+
68
+ species_identifiers = sbml_dfs_glucose_metabolism.get_identifiers("species")
69
+
70
+ direct_interactions = filter_to_direct_mechanistic_interactions(
71
+ formatted_edgelist=edgelist,
72
+ sbml_dfs=sbml_dfs_glucose_metabolism,
73
+ species_identifiers=species_identifiers,
74
+ ontologies={"chebi"},
75
+ )
76
+
77
+ assert direct_interactions.shape == (2, 10)
78
+
79
+ indirect_interactions = filter_to_indirect_mechanistic_interactions(
80
+ formatted_edgelist=edgelist,
81
+ sbml_dfs=sbml_dfs_glucose_metabolism,
82
+ species_identifiers=species_identifiers,
83
+ napistu_graph=napistu_graph,
84
+ ontologies={"chebi"},
85
+ precomputed_distances=None,
86
+ max_path_length=10,
87
+ )
88
+
89
+ assert indirect_interactions.shape == (6, 12)
90
+
91
+ # confirm that we get the same thing even when using precomputed distances
92
+ precomputed_distances = precompute.precompute_distances(
93
+ napistu_graph, weights_vars=["weights"]
94
+ )
95
+
96
+ indirect_interactions_w_precompute = filter_to_indirect_mechanistic_interactions(
97
+ formatted_edgelist=edgelist,
98
+ sbml_dfs=sbml_dfs_glucose_metabolism,
99
+ species_identifiers=species_identifiers,
100
+ napistu_graph=napistu_graph,
101
+ ontologies={"chebi"},
102
+ precomputed_distances=precomputed_distances,
103
+ max_path_length=10,
104
+ )
105
+
106
+ assert all(
107
+ indirect_interactions["weight"] == indirect_interactions_w_precompute["weight"]
108
+ )