napistu 0.2.5.dev6__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +126 -96
- napistu/constants.py +35 -41
- napistu/context/__init__.py +10 -0
- napistu/context/discretize.py +462 -0
- napistu/context/filtering.py +387 -0
- napistu/gcs/__init__.py +1 -1
- napistu/identifiers.py +74 -15
- napistu/indices.py +68 -0
- napistu/ingestion/__init__.py +1 -1
- napistu/ingestion/bigg.py +47 -62
- napistu/ingestion/constants.py +18 -133
- napistu/ingestion/gtex.py +113 -0
- napistu/ingestion/hpa.py +147 -0
- napistu/ingestion/sbml.py +0 -97
- napistu/ingestion/string.py +2 -2
- napistu/matching/__init__.py +10 -0
- napistu/matching/constants.py +18 -0
- napistu/matching/interactions.py +518 -0
- napistu/matching/mount.py +529 -0
- napistu/matching/species.py +510 -0
- napistu/mcp/__init__.py +7 -4
- napistu/mcp/__main__.py +128 -72
- napistu/mcp/client.py +16 -25
- napistu/mcp/codebase.py +201 -153
- napistu/mcp/component_base.py +170 -0
- napistu/mcp/config.py +223 -0
- napistu/mcp/constants.py +45 -2
- napistu/mcp/documentation.py +253 -136
- napistu/mcp/documentation_utils.py +13 -48
- napistu/mcp/execution.py +372 -305
- napistu/mcp/health.py +49 -67
- napistu/mcp/profiles.py +10 -6
- napistu/mcp/server.py +161 -80
- napistu/mcp/tutorials.py +139 -87
- napistu/modify/__init__.py +1 -1
- napistu/modify/gaps.py +1 -1
- napistu/network/__init__.py +1 -1
- napistu/network/constants.py +101 -34
- napistu/network/data_handling.py +388 -0
- napistu/network/ig_utils.py +351 -0
- napistu/network/napistu_graph_core.py +354 -0
- napistu/network/neighborhoods.py +40 -40
- napistu/network/net_create.py +373 -309
- napistu/network/net_propagation.py +47 -19
- napistu/network/{net_utils.py → ng_utils.py} +124 -272
- napistu/network/paths.py +67 -51
- napistu/network/precompute.py +11 -11
- napistu/ontologies/__init__.py +10 -0
- napistu/ontologies/constants.py +129 -0
- napistu/ontologies/dogma.py +243 -0
- napistu/ontologies/genodexito.py +649 -0
- napistu/ontologies/mygene.py +369 -0
- napistu/ontologies/renaming.py +198 -0
- napistu/rpy2/__init__.py +229 -86
- napistu/rpy2/callr.py +47 -77
- napistu/rpy2/constants.py +24 -23
- napistu/rpy2/rids.py +61 -648
- napistu/sbml_dfs_core.py +587 -222
- napistu/scverse/__init__.py +15 -0
- napistu/scverse/constants.py +28 -0
- napistu/scverse/loading.py +727 -0
- napistu/utils.py +118 -10
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/METADATA +8 -3
- napistu-0.3.1.dist-info/RECORD +133 -0
- tests/conftest.py +22 -0
- tests/test_context_discretize.py +56 -0
- tests/test_context_filtering.py +267 -0
- tests/test_identifiers.py +100 -0
- tests/test_indices.py +65 -0
- tests/{test_edgelist.py → test_ingestion_napistu_edgelist.py} +2 -2
- tests/test_matching_interactions.py +108 -0
- tests/test_matching_mount.py +305 -0
- tests/test_matching_species.py +394 -0
- tests/test_mcp_config.py +193 -0
- tests/test_mcp_documentation_utils.py +12 -3
- tests/test_mcp_server.py +356 -0
- tests/test_network_data_handling.py +397 -0
- tests/test_network_ig_utils.py +23 -0
- tests/test_network_neighborhoods.py +19 -0
- tests/test_network_net_create.py +459 -0
- tests/test_network_ng_utils.py +30 -0
- tests/test_network_paths.py +56 -0
- tests/{test_precomputed_distances.py → test_network_precompute.py} +8 -6
- tests/test_ontologies_genodexito.py +58 -0
- tests/test_ontologies_mygene.py +39 -0
- tests/test_ontologies_renaming.py +110 -0
- tests/test_rpy2_callr.py +79 -0
- tests/test_rpy2_init.py +151 -0
- tests/test_sbml.py +0 -31
- tests/test_sbml_dfs_core.py +134 -10
- tests/test_scverse_loading.py +778 -0
- tests/test_set_coverage.py +2 -2
- tests/test_utils.py +121 -1
- napistu/mechanism_matching.py +0 -1353
- napistu/rpy2/netcontextr.py +0 -467
- napistu-0.2.5.dev6.dist-info/RECORD +0 -97
- tests/test_igraph.py +0 -367
- tests/test_mechanism_matching.py +0 -784
- tests/test_net_utils.py +0 -149
- tests/test_netcontextr.py +0 -105
- tests/test_rpy2.py +0 -61
- /napistu/ingestion/{cpr_edgelist.py → napistu_edgelist.py} +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/WHEEL +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/entry_points.txt +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/top_level.txt +0 -0
- /tests/{test_obo.py → test_ingestion_obo.py} +0 -0
@@ -0,0 +1,267 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import copy
|
4
|
+
import pytest
|
5
|
+
import pandas as pd
|
6
|
+
from napistu import sbml_dfs_core
|
7
|
+
from napistu.constants import SBML_DFS
|
8
|
+
from napistu.context.filtering import (
|
9
|
+
filter_species_by_attribute,
|
10
|
+
find_species_with_attribute,
|
11
|
+
_binarize_species_data,
|
12
|
+
filter_reactions_with_disconnected_cspecies,
|
13
|
+
)
|
14
|
+
|
15
|
+
|
16
|
+
@pytest.fixture
|
17
|
+
def sbml_dfs_with_test_data(sbml_dfs):
|
18
|
+
"""Add test data to the sbml_dfs fixture for filtering tests."""
|
19
|
+
# Add location data
|
20
|
+
location_data = pd.DataFrame(
|
21
|
+
index=sbml_dfs.species.index[:5],
|
22
|
+
data={
|
23
|
+
"compartment": ["nucleus", "cytoplasm", "nucleus", "membrane", "cytoplasm"],
|
24
|
+
"confidence": [0.9, 0.8, 0.7, 0.95, 0.85],
|
25
|
+
},
|
26
|
+
)
|
27
|
+
sbml_dfs.add_species_data("location", location_data)
|
28
|
+
|
29
|
+
# Add expression data
|
30
|
+
expression_data = pd.DataFrame(
|
31
|
+
index=sbml_dfs.species.index[:5],
|
32
|
+
data={
|
33
|
+
"is_expressed": [True, True, False, True, False],
|
34
|
+
"expression_level": [100, 50, 0, 75, 0],
|
35
|
+
},
|
36
|
+
)
|
37
|
+
sbml_dfs.add_species_data("expression", expression_data)
|
38
|
+
|
39
|
+
return sbml_dfs
|
40
|
+
|
41
|
+
|
42
|
+
def test_find_species_to_filter_by_attribute(sbml_dfs_with_test_data):
|
43
|
+
"""Test the find_species_to_filter_by_attribute function."""
|
44
|
+
# Get the first 5 species IDs for reference
|
45
|
+
test_species = list(sbml_dfs_with_test_data.species.index[:5])
|
46
|
+
|
47
|
+
# Test filtering by single value
|
48
|
+
filtered = find_species_with_attribute(
|
49
|
+
sbml_dfs_with_test_data.species_data["location"], "compartment", "nucleus"
|
50
|
+
)
|
51
|
+
assert len(filtered) == 2
|
52
|
+
assert all(s_id in test_species for s_id in filtered)
|
53
|
+
|
54
|
+
# Test filtering by list of values
|
55
|
+
filtered = find_species_with_attribute(
|
56
|
+
sbml_dfs_with_test_data.species_data["location"],
|
57
|
+
"compartment",
|
58
|
+
["nucleus", "cytoplasm"],
|
59
|
+
)
|
60
|
+
assert len(filtered) == 4
|
61
|
+
assert all(s_id in test_species for s_id in filtered)
|
62
|
+
|
63
|
+
# Test filtering with negation
|
64
|
+
filtered = find_species_with_attribute(
|
65
|
+
sbml_dfs_with_test_data.species_data["location"],
|
66
|
+
"compartment",
|
67
|
+
"nucleus",
|
68
|
+
negate=True,
|
69
|
+
)
|
70
|
+
assert len(filtered) == 3
|
71
|
+
assert all(s_id in test_species for s_id in filtered)
|
72
|
+
|
73
|
+
# Test filtering boolean values
|
74
|
+
filtered = find_species_with_attribute(
|
75
|
+
sbml_dfs_with_test_data.species_data["expression"], "is_expressed", True
|
76
|
+
)
|
77
|
+
assert len(filtered) == 3
|
78
|
+
assert all(s_id in test_species for s_id in filtered)
|
79
|
+
|
80
|
+
# Test filtering numeric values
|
81
|
+
filtered = find_species_with_attribute(
|
82
|
+
sbml_dfs_with_test_data.species_data["location"], "confidence", 0.9
|
83
|
+
)
|
84
|
+
assert len(filtered) == 1
|
85
|
+
assert all(s_id in test_species for s_id in filtered)
|
86
|
+
|
87
|
+
|
88
|
+
def test_filter_species_by_attribute(sbml_dfs_with_test_data):
|
89
|
+
"""Test the filter_species_by_attribute function."""
|
90
|
+
# Get the first 5 species IDs for reference
|
91
|
+
test_species = list(sbml_dfs_with_test_data.species.index[:5])
|
92
|
+
original_species_count = len(sbml_dfs_with_test_data.species)
|
93
|
+
|
94
|
+
# Test filtering in place - should remove species in nucleus
|
95
|
+
result = filter_species_by_attribute(
|
96
|
+
sbml_dfs_with_test_data, "location", "compartment", "nucleus"
|
97
|
+
)
|
98
|
+
assert result is None
|
99
|
+
# Should have removed the nucleus species from the test set
|
100
|
+
assert len(sbml_dfs_with_test_data.species) == original_species_count - 2
|
101
|
+
# Check that species in nucleus were removed
|
102
|
+
remaining_test_species = [
|
103
|
+
s for s in test_species if s in sbml_dfs_with_test_data.species.index
|
104
|
+
]
|
105
|
+
assert (
|
106
|
+
len(remaining_test_species) == 3
|
107
|
+
) # Should have 3 test species left (cytoplasm, membrane, cytoplasm)
|
108
|
+
|
109
|
+
# Test filtering with new object - should remove expressed species
|
110
|
+
sbml_dfs_copy = copy.deepcopy(sbml_dfs_with_test_data)
|
111
|
+
|
112
|
+
# Count how many species are expressed in our test data
|
113
|
+
expressed_count = sum(
|
114
|
+
sbml_dfs_copy.species_data["expression"]["is_expressed"].iloc[:5]
|
115
|
+
)
|
116
|
+
|
117
|
+
filtered_sbml_dfs = filter_species_by_attribute(
|
118
|
+
sbml_dfs_copy, "expression", "is_expressed", True, inplace=False
|
119
|
+
)
|
120
|
+
# Original should be unchanged
|
121
|
+
assert len(sbml_dfs_copy.species) == len(sbml_dfs_with_test_data.species)
|
122
|
+
# New object should have removed expressed species from our test set
|
123
|
+
assert (
|
124
|
+
len(filtered_sbml_dfs.species)
|
125
|
+
== len(sbml_dfs_with_test_data.species) - expressed_count
|
126
|
+
)
|
127
|
+
|
128
|
+
# Test filtering with invalid table name
|
129
|
+
with pytest.raises(ValueError, match="species_data_table .* not found"):
|
130
|
+
filter_species_by_attribute(
|
131
|
+
sbml_dfs_with_test_data, "nonexistent_table", "compartment", "nucleus"
|
132
|
+
)
|
133
|
+
|
134
|
+
# Test filtering with invalid attribute name
|
135
|
+
with pytest.raises(ValueError, match="attribute_name .* not found"):
|
136
|
+
filter_species_by_attribute(
|
137
|
+
sbml_dfs_with_test_data, "location", "nonexistent_attribute", "nucleus"
|
138
|
+
)
|
139
|
+
|
140
|
+
# Test filtering with list of values and negation
|
141
|
+
# Keep only species NOT in nucleus or cytoplasm (just membrane in our test data)
|
142
|
+
|
143
|
+
VALID_COMPARTMENTS = ["nucleus", "cytoplasm"]
|
144
|
+
filtered_sbml_dfs = filter_species_by_attribute(
|
145
|
+
sbml_dfs_with_test_data,
|
146
|
+
"location",
|
147
|
+
"compartment",
|
148
|
+
VALID_COMPARTMENTS,
|
149
|
+
negate=True,
|
150
|
+
inplace=False,
|
151
|
+
)
|
152
|
+
|
153
|
+
# Get remaining species from our test set
|
154
|
+
remaining_test_species = [
|
155
|
+
s for s in test_species if s in filtered_sbml_dfs.species.index
|
156
|
+
]
|
157
|
+
|
158
|
+
assert all(filtered_sbml_dfs.species_data["location"].isin(VALID_COMPARTMENTS))
|
159
|
+
|
160
|
+
|
161
|
+
def test_binarize_species_data():
|
162
|
+
# Create test data with different column types
|
163
|
+
test_data = pd.DataFrame(
|
164
|
+
{
|
165
|
+
"bool_col": [True, False, True],
|
166
|
+
"binary_int": [1, 0, 1],
|
167
|
+
"non_binary_int": [1, 2, 3],
|
168
|
+
"float_col": [1.5, 2.5, 3.5],
|
169
|
+
"str_col": ["a", "b", "c"],
|
170
|
+
}
|
171
|
+
)
|
172
|
+
|
173
|
+
# Run the binarization
|
174
|
+
binary_df = _binarize_species_data(test_data)
|
175
|
+
|
176
|
+
# Check that only boolean and binary columns were kept
|
177
|
+
assert set(binary_df.columns) == {"bool_col", "binary_int"}
|
178
|
+
|
179
|
+
# Check that boolean was converted to int
|
180
|
+
assert (
|
181
|
+
binary_df["bool_col"].dtype == "int32" or binary_df["bool_col"].dtype == "int64"
|
182
|
+
)
|
183
|
+
assert binary_df["bool_col"].tolist() == [1, 0, 1]
|
184
|
+
|
185
|
+
# Check that binary int remained the same
|
186
|
+
assert binary_df["binary_int"].tolist() == [1, 0, 1]
|
187
|
+
|
188
|
+
# Test with only non-binary columns
|
189
|
+
non_binary_data = pd.DataFrame(
|
190
|
+
{
|
191
|
+
"non_binary_int": [1, 2, 3],
|
192
|
+
"float_col": [1.5, 2.5, 3.5],
|
193
|
+
}
|
194
|
+
)
|
195
|
+
|
196
|
+
# Should raise ValueError when no binary columns are found
|
197
|
+
with pytest.raises(ValueError, match="No binary or boolean columns found"):
|
198
|
+
_binarize_species_data(non_binary_data)
|
199
|
+
|
200
|
+
# Test with empty DataFrame
|
201
|
+
empty_data = pd.DataFrame()
|
202
|
+
with pytest.raises(ValueError, match="No binary or boolean columns found"):
|
203
|
+
_binarize_species_data(empty_data)
|
204
|
+
|
205
|
+
|
206
|
+
def test_filter_reactions_with_disconnected_cspecies(sbml_dfs):
|
207
|
+
# 1. Select first few reactions
|
208
|
+
first_reactions = list(sbml_dfs.reactions.index[:5])
|
209
|
+
|
210
|
+
# 2. Find defining species in these reactions
|
211
|
+
reaction_species = sbml_dfs_core.add_sbo_role(sbml_dfs.reaction_species)
|
212
|
+
defining_species = (
|
213
|
+
reaction_species[reaction_species[SBML_DFS.R_ID].isin(first_reactions)]
|
214
|
+
.query("sbo_role == 'DEFINING'")
|
215
|
+
# at most 1 record for an sc_id in a reaction (generally true anyways)
|
216
|
+
.groupby([SBML_DFS.R_ID, SBML_DFS.SC_ID])
|
217
|
+
.first()
|
218
|
+
.reset_index(drop=False)
|
219
|
+
.groupby(SBML_DFS.R_ID)
|
220
|
+
.head(2) # Take 2 defining species per reaction
|
221
|
+
)
|
222
|
+
|
223
|
+
# 3. Get species IDs for these compartmentalized species
|
224
|
+
species_info = defining_species.merge(
|
225
|
+
sbml_dfs.compartmentalized_species[[SBML_DFS.S_ID]],
|
226
|
+
left_on=SBML_DFS.SC_ID,
|
227
|
+
right_index=True,
|
228
|
+
)
|
229
|
+
|
230
|
+
# Filter out reactions that have less than 2 distinct s_ids (transport reactions)
|
231
|
+
valid_reactions = (
|
232
|
+
species_info.groupby(SBML_DFS.R_ID)[SBML_DFS.S_ID]
|
233
|
+
.nunique()
|
234
|
+
.pipe(lambda x: x[x >= 2])
|
235
|
+
.index
|
236
|
+
)
|
237
|
+
species_info = species_info[species_info[SBML_DFS.R_ID].isin(valid_reactions)]
|
238
|
+
|
239
|
+
# 4. Create binary occurrence data where DISJOINT_S_ID is in a different comaprtment from the other top species
|
240
|
+
# this should result in removing disconnected_reactions from the sbml_dfs
|
241
|
+
DISJOINT_S_ID = species_info.value_counts("s_id").index[0]
|
242
|
+
disconnected_reactions = set(
|
243
|
+
species_info["r_id"][species_info["s_id"] == DISJOINT_S_ID].tolist()
|
244
|
+
)
|
245
|
+
|
246
|
+
# mock data
|
247
|
+
mock_species_data = pd.DataFrame({SBML_DFS.S_ID: species_info["s_id"].unique()})
|
248
|
+
mock_species_data["compartment_A"] = [
|
249
|
+
1 if s_id == DISJOINT_S_ID else 0 for s_id in mock_species_data[SBML_DFS.S_ID]
|
250
|
+
]
|
251
|
+
mock_species_data["compartment_B"] = [
|
252
|
+
0 if s_id == DISJOINT_S_ID else 1 for s_id in mock_species_data[SBML_DFS.S_ID]
|
253
|
+
]
|
254
|
+
mock_species_data.set_index(SBML_DFS.S_ID, inplace=True)
|
255
|
+
|
256
|
+
sbml_dfs.add_species_data("test_data", mock_species_data)
|
257
|
+
|
258
|
+
# Run the filter function
|
259
|
+
filtered_sbml_dfs = filter_reactions_with_disconnected_cspecies(
|
260
|
+
sbml_dfs, "test_data", inplace=False
|
261
|
+
)
|
262
|
+
|
263
|
+
filtered_first_reactions = [
|
264
|
+
r for r in first_reactions if r not in filtered_sbml_dfs.reactions.index
|
265
|
+
]
|
266
|
+
|
267
|
+
assert set(filtered_first_reactions) == disconnected_reactions
|
tests/test_identifiers.py
CHANGED
@@ -5,6 +5,8 @@ import os
|
|
5
5
|
import numpy as np
|
6
6
|
import pandas as pd
|
7
7
|
from napistu import identifiers
|
8
|
+
from napistu.constants import IDENTIFIERS, SBML_DFS
|
9
|
+
import pytest
|
8
10
|
|
9
11
|
# logger = logging.getLogger()
|
10
12
|
# logger.setLevel("DEBUG")
|
@@ -139,6 +141,100 @@ def test_reciprocal_ensembl_dicts():
|
|
139
141
|
)
|
140
142
|
|
141
143
|
|
144
|
+
def test_df_to_identifiers_basic():
|
145
|
+
"""Test basic conversion of DataFrame to Identifiers objects."""
|
146
|
+
# Create a simple test DataFrame
|
147
|
+
df = pd.DataFrame(
|
148
|
+
{
|
149
|
+
"s_id": ["s1", "s1", "s2"],
|
150
|
+
IDENTIFIERS.ONTOLOGY: ["ncbi_entrez_gene", "uniprot", "ncbi_entrez_gene"],
|
151
|
+
IDENTIFIERS.IDENTIFIER: ["123", "P12345", "456"],
|
152
|
+
IDENTIFIERS.URL: [
|
153
|
+
"http://ncbi/123",
|
154
|
+
"http://uniprot/P12345",
|
155
|
+
"http://ncbi/456",
|
156
|
+
],
|
157
|
+
IDENTIFIERS.BQB: ["is", "is", "is"],
|
158
|
+
}
|
159
|
+
)
|
160
|
+
|
161
|
+
# Convert to Identifiers objects
|
162
|
+
result = identifiers.df_to_identifiers(df, SBML_DFS.SPECIES)
|
163
|
+
|
164
|
+
# Check basic properties
|
165
|
+
assert isinstance(result, pd.Series)
|
166
|
+
assert len(result) == 2 # Two unique s_ids
|
167
|
+
assert all(isinstance(x, identifiers.Identifiers) for x in result)
|
168
|
+
|
169
|
+
# Check specific values
|
170
|
+
s1_ids = result["s1"].ids
|
171
|
+
assert len(s1_ids) == 2 # Two identifiers for s1
|
172
|
+
assert any(x[IDENTIFIERS.IDENTIFIER] == "123" for x in s1_ids)
|
173
|
+
assert any(x[IDENTIFIERS.IDENTIFIER] == "P12345" for x in s1_ids)
|
174
|
+
|
175
|
+
s2_ids = result["s2"].ids
|
176
|
+
assert len(s2_ids) == 1 # One identifier for s2
|
177
|
+
assert s2_ids[0][IDENTIFIERS.IDENTIFIER] == "456"
|
178
|
+
|
179
|
+
|
180
|
+
def test_df_to_identifiers_duplicates():
|
181
|
+
"""Test that duplicates are handled correctly."""
|
182
|
+
# Create DataFrame with duplicate entries
|
183
|
+
df = pd.DataFrame(
|
184
|
+
{
|
185
|
+
"s_id": ["s1", "s1", "s1"],
|
186
|
+
IDENTIFIERS.ONTOLOGY: [
|
187
|
+
"ncbi_entrez_gene",
|
188
|
+
"ncbi_entrez_gene",
|
189
|
+
"ncbi_entrez_gene",
|
190
|
+
],
|
191
|
+
IDENTIFIERS.IDENTIFIER: ["123", "123", "123"], # Same identifier repeated
|
192
|
+
IDENTIFIERS.URL: ["http://ncbi/123"] * 3,
|
193
|
+
IDENTIFIERS.BQB: ["is"] * 3,
|
194
|
+
}
|
195
|
+
)
|
196
|
+
|
197
|
+
result = identifiers.df_to_identifiers(df, SBML_DFS.SPECIES)
|
198
|
+
|
199
|
+
# Should collapse duplicates
|
200
|
+
assert len(result) == 1 # One unique s_id
|
201
|
+
assert len(result["s1"].ids) == 1 # One unique identifier
|
202
|
+
|
203
|
+
|
204
|
+
def test_df_to_identifiers_missing_columns():
|
205
|
+
"""Test that missing required columns raise an error."""
|
206
|
+
# Create DataFrame missing required columns
|
207
|
+
df = pd.DataFrame(
|
208
|
+
{
|
209
|
+
"s_id": ["s1"],
|
210
|
+
IDENTIFIERS.ONTOLOGY: ["ncbi_entrez_gene"],
|
211
|
+
IDENTIFIERS.IDENTIFIER: ["123"],
|
212
|
+
# Missing URL and BQB
|
213
|
+
}
|
214
|
+
)
|
215
|
+
|
216
|
+
with pytest.raises(
|
217
|
+
ValueError, match="The DataFrame does not contain the required columns"
|
218
|
+
):
|
219
|
+
identifiers.df_to_identifiers(df, SBML_DFS.SPECIES)
|
220
|
+
|
221
|
+
|
222
|
+
def test_df_to_identifiers_invalid_entity_type():
|
223
|
+
"""Test that invalid entity type raises an error."""
|
224
|
+
df = pd.DataFrame(
|
225
|
+
{
|
226
|
+
"s_id": ["s1"],
|
227
|
+
IDENTIFIERS.ONTOLOGY: ["ncbi_entrez_gene"],
|
228
|
+
IDENTIFIERS.IDENTIFIER: ["123"],
|
229
|
+
IDENTIFIERS.URL: ["http://ncbi/123"],
|
230
|
+
IDENTIFIERS.BQB: ["is"],
|
231
|
+
}
|
232
|
+
)
|
233
|
+
|
234
|
+
with pytest.raises(ValueError, match="Invalid entity type"):
|
235
|
+
identifiers.df_to_identifiers(df, "invalid_type")
|
236
|
+
|
237
|
+
|
142
238
|
################################################
|
143
239
|
# __main__
|
144
240
|
################################################
|
@@ -149,3 +245,7 @@ if __name__ == "__main__":
|
|
149
245
|
test_url_from_identifiers()
|
150
246
|
test_parsing_ensembl_ids()
|
151
247
|
test_reciprocal_ensembl_dicts()
|
248
|
+
test_df_to_identifiers_basic()
|
249
|
+
test_df_to_identifiers_duplicates()
|
250
|
+
test_df_to_identifiers_missing_columns()
|
251
|
+
test_df_to_identifiers_invalid_entity_type()
|
tests/test_indices.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import os
|
4
|
+
import datetime
|
4
5
|
|
5
6
|
import pandas as pd
|
6
7
|
import pytest
|
@@ -10,6 +11,70 @@ test_path = os.path.abspath(os.path.join(__file__, os.pardir))
|
|
10
11
|
test_data = os.path.join(test_path, "test_data")
|
11
12
|
|
12
13
|
|
14
|
+
def test_create_pathway_index_df():
|
15
|
+
"""Test the creation of pathway index DataFrame."""
|
16
|
+
# Test input data
|
17
|
+
model_keys = {"human": "model1", "mouse": "model2"}
|
18
|
+
model_urls = {
|
19
|
+
"human": "http://example.com/model1.xml",
|
20
|
+
"mouse": "http://example.com/model2.xml",
|
21
|
+
}
|
22
|
+
model_species = {"human": "Homo sapiens", "mouse": "Mus musculus"}
|
23
|
+
base_path = "/test/path"
|
24
|
+
source_name = "TestSource"
|
25
|
+
|
26
|
+
# Create pathway index
|
27
|
+
result = indices.create_pathway_index_df(
|
28
|
+
model_keys=model_keys,
|
29
|
+
model_urls=model_urls,
|
30
|
+
model_species=model_species,
|
31
|
+
base_path=base_path,
|
32
|
+
source_name=source_name,
|
33
|
+
)
|
34
|
+
|
35
|
+
# Expected date in YYYYMMDD format
|
36
|
+
expected_date = datetime.date.today().strftime("%Y%m%d")
|
37
|
+
|
38
|
+
# Assertions
|
39
|
+
assert isinstance(result, pd.DataFrame), "Result should be a pandas DataFrame"
|
40
|
+
assert len(result) == 2, "Should have 2 rows for 2 models"
|
41
|
+
|
42
|
+
# Check required columns exist
|
43
|
+
required_columns = {
|
44
|
+
"url",
|
45
|
+
"species",
|
46
|
+
"sbml_path",
|
47
|
+
"file",
|
48
|
+
"date",
|
49
|
+
"pathway_id",
|
50
|
+
"name",
|
51
|
+
"source",
|
52
|
+
}
|
53
|
+
assert set(result.columns) == required_columns, "Missing required columns"
|
54
|
+
|
55
|
+
# Check content for first model (human)
|
56
|
+
human_row = result[result["pathway_id"] == "model1"].iloc[0]
|
57
|
+
assert human_row["url"] == "http://example.com/model1.xml"
|
58
|
+
assert human_row["species"] == "Homo sapiens"
|
59
|
+
assert human_row["file"] == "model1.sbml"
|
60
|
+
assert human_row["date"] == expected_date
|
61
|
+
assert human_row["source"] == "TestSource"
|
62
|
+
assert human_row["sbml_path"] == os.path.join(base_path, "model1.sbml")
|
63
|
+
|
64
|
+
# Test with custom file extension
|
65
|
+
result_custom_ext = indices.create_pathway_index_df(
|
66
|
+
model_keys=model_keys,
|
67
|
+
model_urls=model_urls,
|
68
|
+
model_species=model_species,
|
69
|
+
base_path=base_path,
|
70
|
+
source_name=source_name,
|
71
|
+
file_extension=".xml",
|
72
|
+
)
|
73
|
+
assert result_custom_ext.iloc[0]["file"].endswith(
|
74
|
+
".xml"
|
75
|
+
), "Custom extension not applied"
|
76
|
+
|
77
|
+
|
13
78
|
def test_pwindex_from_file():
|
14
79
|
pw_index_path = os.path.join(test_data, "pw_index.tsv")
|
15
80
|
pw_index = indices.PWIndex(pw_index_path)
|
@@ -1,13 +1,13 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import pandas as pd
|
4
|
-
from napistu.ingestion import
|
4
|
+
from napistu.ingestion import napistu_edgelist
|
5
5
|
|
6
6
|
|
7
7
|
def test_edgelist_remove_reciprocal_reactions():
|
8
8
|
edgelist = pd.DataFrame({"from": ["A", "B", "C", "D"], "to": ["B", "A", "D", "C"]})
|
9
9
|
|
10
|
-
nondegenerate_edgelist =
|
10
|
+
nondegenerate_edgelist = napistu_edgelist.remove_reciprocal_interactions(edgelist)
|
11
11
|
|
12
12
|
assert nondegenerate_edgelist.shape == (2, 2)
|
13
13
|
|
@@ -0,0 +1,108 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
from napistu.network import net_create
|
3
|
+
|
4
|
+
from napistu.network import precompute
|
5
|
+
from napistu.matching.interactions import (
|
6
|
+
edgelist_to_pathway_species,
|
7
|
+
edgelist_to_scids,
|
8
|
+
filter_to_direct_mechanistic_interactions,
|
9
|
+
filter_to_indirect_mechanistic_interactions,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
def test_edgelist_to_pathway_species(sbml_dfs):
|
14
|
+
|
15
|
+
edgelist = pd.DataFrame(
|
16
|
+
[
|
17
|
+
{"identifier_upstream": "17996", "identifier_downstream": "16526"},
|
18
|
+
{"identifier_upstream": "15377", "identifier_downstream": "17544"},
|
19
|
+
{"identifier_upstream": "15378", "identifier_downstream": "57945"},
|
20
|
+
{"identifier_upstream": "57540", "identifier_downstream": "17996"},
|
21
|
+
]
|
22
|
+
)
|
23
|
+
species_identifiers = sbml_dfs.get_identifiers("species").query("bqb == 'BQB_IS'")
|
24
|
+
|
25
|
+
edgelist_w_sids = edgelist_to_pathway_species(
|
26
|
+
edgelist, species_identifiers, ontologies={"chebi", "uniprot"}
|
27
|
+
)
|
28
|
+
assert edgelist_w_sids.shape == (4, 4)
|
29
|
+
|
30
|
+
egelist_w_scids = edgelist_to_scids(
|
31
|
+
edgelist, sbml_dfs, species_identifiers, ontologies={"chebi"}
|
32
|
+
)
|
33
|
+
|
34
|
+
assert egelist_w_scids.shape == (12, 6)
|
35
|
+
|
36
|
+
direct_interactions = filter_to_direct_mechanistic_interactions(
|
37
|
+
edgelist, sbml_dfs, species_identifiers, ontologies={"chebi"}
|
38
|
+
)
|
39
|
+
|
40
|
+
assert direct_interactions.shape == (2, 10)
|
41
|
+
|
42
|
+
|
43
|
+
def test_direct_and_indirect_mechanism_matching(sbml_dfs_glucose_metabolism):
|
44
|
+
|
45
|
+
napistu_graph = net_create.process_napistu_graph(sbml_dfs_glucose_metabolism)
|
46
|
+
|
47
|
+
edgelist = pd.DataFrame(
|
48
|
+
[
|
49
|
+
{
|
50
|
+
"identifier_upstream": "17925",
|
51
|
+
"identifier_downstream": "32966",
|
52
|
+
}, # glu, fbp
|
53
|
+
{
|
54
|
+
"identifier_upstream": "57634",
|
55
|
+
"identifier_downstream": "32966",
|
56
|
+
}, # f6p, fbp
|
57
|
+
{
|
58
|
+
"identifier_upstream": "32966",
|
59
|
+
"identifier_downstream": "57642",
|
60
|
+
}, # fbp, dhap
|
61
|
+
{
|
62
|
+
"identifier_upstream": "17925",
|
63
|
+
"identifier_downstream": "15361",
|
64
|
+
}, # glu, pyr
|
65
|
+
]
|
66
|
+
)
|
67
|
+
|
68
|
+
species_identifiers = sbml_dfs_glucose_metabolism.get_identifiers("species")
|
69
|
+
|
70
|
+
direct_interactions = filter_to_direct_mechanistic_interactions(
|
71
|
+
formatted_edgelist=edgelist,
|
72
|
+
sbml_dfs=sbml_dfs_glucose_metabolism,
|
73
|
+
species_identifiers=species_identifiers,
|
74
|
+
ontologies={"chebi"},
|
75
|
+
)
|
76
|
+
|
77
|
+
assert direct_interactions.shape == (2, 10)
|
78
|
+
|
79
|
+
indirect_interactions = filter_to_indirect_mechanistic_interactions(
|
80
|
+
formatted_edgelist=edgelist,
|
81
|
+
sbml_dfs=sbml_dfs_glucose_metabolism,
|
82
|
+
species_identifiers=species_identifiers,
|
83
|
+
napistu_graph=napistu_graph,
|
84
|
+
ontologies={"chebi"},
|
85
|
+
precomputed_distances=None,
|
86
|
+
max_path_length=10,
|
87
|
+
)
|
88
|
+
|
89
|
+
assert indirect_interactions.shape == (6, 12)
|
90
|
+
|
91
|
+
# confirm that we get the same thing even when using precomputed distances
|
92
|
+
precomputed_distances = precompute.precompute_distances(
|
93
|
+
napistu_graph, weights_vars=["weights"]
|
94
|
+
)
|
95
|
+
|
96
|
+
indirect_interactions_w_precompute = filter_to_indirect_mechanistic_interactions(
|
97
|
+
formatted_edgelist=edgelist,
|
98
|
+
sbml_dfs=sbml_dfs_glucose_metabolism,
|
99
|
+
species_identifiers=species_identifiers,
|
100
|
+
napistu_graph=napistu_graph,
|
101
|
+
ontologies={"chebi"},
|
102
|
+
precomputed_distances=precomputed_distances,
|
103
|
+
max_path_length=10,
|
104
|
+
)
|
105
|
+
|
106
|
+
assert all(
|
107
|
+
indirect_interactions["weight"] == indirect_interactions_w_precompute["weight"]
|
108
|
+
)
|