napistu 0.3.7__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +8 -4
- napistu/constants.py +30 -35
- napistu/gcs/constants.py +11 -11
- napistu/ingestion/napistu_edgelist.py +4 -4
- napistu/matching/interactions.py +41 -39
- napistu/modify/gaps.py +2 -1
- napistu/network/constants.py +61 -45
- napistu/network/data_handling.py +1 -1
- napistu/network/neighborhoods.py +3 -3
- napistu/network/net_create.py +440 -616
- napistu/network/net_create_utils.py +734 -0
- napistu/network/net_propagation.py +1 -1
- napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
- napistu/network/ng_utils.py +28 -21
- napistu/network/paths.py +4 -4
- napistu/network/precompute.py +35 -74
- napistu/ontologies/id_tables.py +282 -0
- napistu/sbml_dfs_core.py +53 -63
- napistu/sbml_dfs_utils.py +126 -16
- napistu/utils.py +80 -5
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/METADATA +7 -2
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/RECORD +39 -34
- tests/conftest.py +102 -1
- tests/test_network_data_handling.py +5 -2
- tests/test_network_net_create.py +92 -201
- tests/test_network_net_create_utils.py +538 -0
- tests/test_network_ng_core.py +19 -0
- tests/test_network_ng_utils.py +1 -1
- tests/test_network_precompute.py +4 -3
- tests/test_ontologies_id_tables.py +198 -0
- tests/test_rpy2_callr.py +0 -1
- tests/test_rpy2_init.py +0 -1
- tests/test_sbml_dfs_core.py +30 -19
- tests/test_sbml_dfs_utils.py +115 -0
- tests/test_utils.py +26 -2
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/WHEEL +0 -0
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.7.dist-info → napistu-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,198 @@
|
|
1
|
+
import pytest
|
2
|
+
import pandas as pd
|
3
|
+
from unittest.mock import patch
|
4
|
+
|
5
|
+
from napistu.ontologies import id_tables
|
6
|
+
from napistu.constants import (
|
7
|
+
BQB,
|
8
|
+
IDENTIFIERS,
|
9
|
+
ONTOLOGIES,
|
10
|
+
SBML_DFS,
|
11
|
+
VALID_BQB_TERMS,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
@pytest.fixture
|
16
|
+
def sample_id_table():
|
17
|
+
"""Create a sample DataFrame for testing."""
|
18
|
+
return pd.DataFrame(
|
19
|
+
{
|
20
|
+
SBML_DFS.S_ID: ["s1", "s2", "s3", "s4"],
|
21
|
+
IDENTIFIERS.ONTOLOGY: [
|
22
|
+
ONTOLOGIES.GO,
|
23
|
+
ONTOLOGIES.KEGG,
|
24
|
+
ONTOLOGIES.REACTOME,
|
25
|
+
ONTOLOGIES.WIKIPATHWAYS,
|
26
|
+
],
|
27
|
+
IDENTIFIERS.IDENTIFIER: ["GO:0001", "hsa00010", "R-HSA-123", "WP123"],
|
28
|
+
IDENTIFIERS.BQB: [BQB.IS, BQB.HAS_PART, BQB.IS_PART_OF, BQB.IS_VERSION_OF],
|
29
|
+
IDENTIFIERS.URL: ["foo", "bar", "baz", "qux"],
|
30
|
+
"other_col": ["a", "b", "c", "d"],
|
31
|
+
}
|
32
|
+
)
|
33
|
+
|
34
|
+
|
35
|
+
def test_sanitize_id_table_values_valid_cases(sample_id_table):
|
36
|
+
"""Test all valid use cases for _sanitize_id_table_values function."""
|
37
|
+
|
38
|
+
# Test string input conversion
|
39
|
+
result = id_tables._sanitize_id_table_values(
|
40
|
+
ONTOLOGIES.GO, sample_id_table, IDENTIFIERS.ONTOLOGY
|
41
|
+
)
|
42
|
+
assert result == {ONTOLOGIES.GO}
|
43
|
+
assert isinstance(result, set)
|
44
|
+
|
45
|
+
# Test list input conversion
|
46
|
+
result = id_tables._sanitize_id_table_values(
|
47
|
+
[ONTOLOGIES.GO, ONTOLOGIES.KEGG], sample_id_table, IDENTIFIERS.ONTOLOGY
|
48
|
+
)
|
49
|
+
assert result == {ONTOLOGIES.GO, ONTOLOGIES.KEGG}
|
50
|
+
assert isinstance(result, set)
|
51
|
+
|
52
|
+
# Test set input unchanged
|
53
|
+
input_set = {ONTOLOGIES.GO, ONTOLOGIES.KEGG}
|
54
|
+
result = id_tables._sanitize_id_table_values(
|
55
|
+
input_set, sample_id_table, IDENTIFIERS.ONTOLOGY
|
56
|
+
)
|
57
|
+
assert result == input_set
|
58
|
+
assert isinstance(result, set)
|
59
|
+
|
60
|
+
# Test successful validation against valid_values
|
61
|
+
result = id_tables._sanitize_id_table_values(
|
62
|
+
BQB.IS, sample_id_table, IDENTIFIERS.BQB, set(VALID_BQB_TERMS)
|
63
|
+
)
|
64
|
+
assert result == {BQB.IS}
|
65
|
+
|
66
|
+
# Test duplicate values in input list are handled correctly
|
67
|
+
result = id_tables._sanitize_id_table_values(
|
68
|
+
[ONTOLOGIES.GO, ONTOLOGIES.GO, ONTOLOGIES.KEGG],
|
69
|
+
sample_id_table,
|
70
|
+
IDENTIFIERS.ONTOLOGY,
|
71
|
+
)
|
72
|
+
assert result == {
|
73
|
+
ONTOLOGIES.GO,
|
74
|
+
ONTOLOGIES.KEGG,
|
75
|
+
} # Duplicates removed by set conversion
|
76
|
+
|
77
|
+
# Test all values present in table
|
78
|
+
result = id_tables._sanitize_id_table_values(
|
79
|
+
[ONTOLOGIES.GO, ONTOLOGIES.KEGG, ONTOLOGIES.REACTOME],
|
80
|
+
sample_id_table,
|
81
|
+
IDENTIFIERS.ONTOLOGY,
|
82
|
+
)
|
83
|
+
assert result == {ONTOLOGIES.GO, ONTOLOGIES.KEGG, ONTOLOGIES.REACTOME}
|
84
|
+
|
85
|
+
# Test single value present in table
|
86
|
+
result = id_tables._sanitize_id_table_values(
|
87
|
+
ONTOLOGIES.WIKIPATHWAYS, sample_id_table, IDENTIFIERS.ONTOLOGY
|
88
|
+
)
|
89
|
+
assert result == {ONTOLOGIES.WIKIPATHWAYS}
|
90
|
+
|
91
|
+
# Test with different column (BQB)
|
92
|
+
result = id_tables._sanitize_id_table_values(
|
93
|
+
BQB.HAS_PART, sample_id_table, IDENTIFIERS.BQB
|
94
|
+
)
|
95
|
+
assert result == {BQB.HAS_PART}
|
96
|
+
|
97
|
+
|
98
|
+
@patch("napistu.ontologies.id_tables.logger")
|
99
|
+
def test_sanitize_id_table_values_error_cases(mock_logger, sample_id_table):
|
100
|
+
"""Test error cases and edge cases for _sanitize_id_table_values function."""
|
101
|
+
|
102
|
+
# Test invalid input types raise ValueError
|
103
|
+
with pytest.raises(ValueError, match="ontology must be a string, a set, or list"):
|
104
|
+
id_tables._sanitize_id_table_values(123, sample_id_table, IDENTIFIERS.ONTOLOGY)
|
105
|
+
|
106
|
+
with pytest.raises(ValueError, match="ontology must be a string, a set, or list"):
|
107
|
+
id_tables._sanitize_id_table_values(
|
108
|
+
{"key": "value"}, sample_id_table, IDENTIFIERS.ONTOLOGY
|
109
|
+
)
|
110
|
+
|
111
|
+
# Test validation failure against valid_values
|
112
|
+
with pytest.raises(
|
113
|
+
ValueError, match="The following bqb are not valid: INVALID_BQB"
|
114
|
+
):
|
115
|
+
id_tables._sanitize_id_table_values(
|
116
|
+
"INVALID_BQB", sample_id_table, IDENTIFIERS.BQB, set(VALID_BQB_TERMS), "bqb"
|
117
|
+
)
|
118
|
+
|
119
|
+
# Test multiple invalid values against valid_values
|
120
|
+
with pytest.raises(ValueError, match="The following bqb are not valid"):
|
121
|
+
id_tables._sanitize_id_table_values(
|
122
|
+
["INVALID1", "INVALID2"],
|
123
|
+
sample_id_table,
|
124
|
+
IDENTIFIERS.BQB,
|
125
|
+
set(VALID_BQB_TERMS),
|
126
|
+
"bqb",
|
127
|
+
)
|
128
|
+
|
129
|
+
# Test all values missing from table raises error
|
130
|
+
missing_values = {"MISSING1", "MISSING2"}
|
131
|
+
with pytest.raises(ValueError, match="None of the requested ontology are present"):
|
132
|
+
id_tables._sanitize_id_table_values(
|
133
|
+
missing_values, sample_id_table, IDENTIFIERS.ONTOLOGY
|
134
|
+
)
|
135
|
+
|
136
|
+
# Test case-sensitive matching (lowercase 'go' should fail)
|
137
|
+
with pytest.raises(ValueError, match="None of the requested ontology are present"):
|
138
|
+
id_tables._sanitize_id_table_values(
|
139
|
+
"INVALID_ONTOLOGY", sample_id_table, IDENTIFIERS.ONTOLOGY
|
140
|
+
)
|
141
|
+
|
142
|
+
# Test custom value_type_name in error messages
|
143
|
+
with pytest.raises(ValueError, match="custom_type must be a string"):
|
144
|
+
id_tables._sanitize_id_table_values(
|
145
|
+
123, sample_id_table, IDENTIFIERS.ONTOLOGY, value_type_name="custom_type"
|
146
|
+
)
|
147
|
+
|
148
|
+
# Test default value_type_name uses column_name
|
149
|
+
with pytest.raises(ValueError, match="test_column must be a string"):
|
150
|
+
id_tables._sanitize_id_table_values(123, sample_id_table, "test_column")
|
151
|
+
|
152
|
+
# Test empty dataframe column
|
153
|
+
empty_df = pd.DataFrame({"ontology": []})
|
154
|
+
with pytest.raises(ValueError, match="None of the requested ontology are present"):
|
155
|
+
id_tables._sanitize_id_table_values("GO", empty_df, IDENTIFIERS.ONTOLOGY)
|
156
|
+
|
157
|
+
# Test partial values missing logs warning but doesn't raise error
|
158
|
+
mixed_values = {ONTOLOGIES.GO, "MISSING"} # GO exists, MISSING doesn't
|
159
|
+
result = id_tables._sanitize_id_table_values(
|
160
|
+
mixed_values, sample_id_table, IDENTIFIERS.ONTOLOGY
|
161
|
+
)
|
162
|
+
|
163
|
+
assert result == mixed_values
|
164
|
+
mock_logger.warning.assert_called_once()
|
165
|
+
warning_call = mock_logger.warning.call_args[0][0]
|
166
|
+
assert "MISSING" in warning_call
|
167
|
+
assert "not present in the id_table" in warning_call
|
168
|
+
|
169
|
+
# Test multiple partial missing values
|
170
|
+
mock_logger.reset_mock()
|
171
|
+
mixed_values = {ONTOLOGIES.GO, ONTOLOGIES.KEGG, "MISSING1", "MISSING2"}
|
172
|
+
result = id_tables._sanitize_id_table_values(
|
173
|
+
mixed_values, sample_id_table, IDENTIFIERS.ONTOLOGY
|
174
|
+
)
|
175
|
+
|
176
|
+
assert result == mixed_values
|
177
|
+
mock_logger.warning.assert_called_once()
|
178
|
+
warning_call = mock_logger.warning.call_args[0][0]
|
179
|
+
assert "MISSING1" in warning_call and "MISSING2" in warning_call
|
180
|
+
|
181
|
+
|
182
|
+
def test_filter_id_table_basic(sample_id_table):
|
183
|
+
"""Basic test for filter_id_table filtering by identifier, ontology, and bqb."""
|
184
|
+
|
185
|
+
# Use a known identifier, ontology, and bqb from the fixture
|
186
|
+
filtered = id_tables.filter_id_table(
|
187
|
+
id_table=sample_id_table,
|
188
|
+
identifiers=["GO:0001"],
|
189
|
+
ontologies=[ONTOLOGIES.GO],
|
190
|
+
bqbs=[BQB.IS],
|
191
|
+
)
|
192
|
+
# Should return a DataFrame with only the matching row
|
193
|
+
assert isinstance(filtered, pd.DataFrame)
|
194
|
+
assert len(filtered) == 1
|
195
|
+
row = filtered.iloc[0]
|
196
|
+
assert row[IDENTIFIERS.ONTOLOGY] == ONTOLOGIES.GO
|
197
|
+
assert row[IDENTIFIERS.IDENTIFIER] == "GO:0001"
|
198
|
+
assert row[IDENTIFIERS.BQB] == BQB.IS
|
tests/test_rpy2_callr.py
CHANGED
tests/test_rpy2_init.py
CHANGED
tests/test_sbml_dfs_core.py
CHANGED
@@ -13,10 +13,12 @@ from napistu.modify import pathwayannot
|
|
13
13
|
|
14
14
|
from napistu import identifiers as napistu_identifiers
|
15
15
|
from napistu.constants import (
|
16
|
-
|
16
|
+
BQB,
|
17
17
|
BQB_DEFINING_ATTRS,
|
18
18
|
BQB_DEFINING_ATTRS_LOOSE,
|
19
|
-
|
19
|
+
SBML_DFS,
|
20
|
+
SCHEMA_DEFS,
|
21
|
+
ONTOLOGIES,
|
20
22
|
)
|
21
23
|
from napistu.sbml_dfs_core import SBML_dfs
|
22
24
|
from unittest.mock import patch
|
@@ -291,53 +293,62 @@ def test_read_sbml_with_invalid_ids():
|
|
291
293
|
|
292
294
|
|
293
295
|
def test_get_table(sbml_dfs):
|
294
|
-
assert isinstance(sbml_dfs.get_table(
|
295
|
-
assert isinstance(
|
296
|
+
assert isinstance(sbml_dfs.get_table(SBML_DFS.SPECIES), pd.DataFrame)
|
297
|
+
assert isinstance(
|
298
|
+
sbml_dfs.get_table(SBML_DFS.SPECIES, {SCHEMA_DEFS.ID}), pd.DataFrame
|
299
|
+
)
|
296
300
|
|
297
301
|
# invalid table
|
298
302
|
with pytest.raises(ValueError):
|
299
|
-
sbml_dfs.get_table("foo", {
|
303
|
+
sbml_dfs.get_table("foo", {SCHEMA_DEFS.ID})
|
300
304
|
|
301
305
|
# bad type
|
302
306
|
with pytest.raises(TypeError):
|
303
|
-
sbml_dfs.get_table(
|
307
|
+
sbml_dfs.get_table(SBML_DFS.REACTION_SPECIES, SCHEMA_DEFS.ID)
|
304
308
|
|
305
309
|
# reaction species don't have ids
|
306
310
|
with pytest.raises(ValueError):
|
307
|
-
sbml_dfs.get_table(
|
311
|
+
sbml_dfs.get_table(SBML_DFS.REACTION_SPECIES, {SCHEMA_DEFS.ID})
|
308
312
|
|
309
313
|
|
310
314
|
def test_search_by_name(sbml_dfs_metabolism):
|
311
|
-
assert
|
312
|
-
|
313
|
-
|
315
|
+
assert (
|
316
|
+
sbml_dfs_metabolism.search_by_name("atp", SBML_DFS.SPECIES, False).shape[0] == 1
|
317
|
+
)
|
318
|
+
assert sbml_dfs_metabolism.search_by_name("pyr", SBML_DFS.SPECIES).shape[0] == 3
|
319
|
+
assert (
|
320
|
+
sbml_dfs_metabolism.search_by_name("kinase", SBML_DFS.REACTIONS).shape[0] == 4
|
321
|
+
)
|
314
322
|
|
315
323
|
|
316
324
|
def test_search_by_id(sbml_dfs_metabolism):
|
317
|
-
identifiers_tbl = sbml_dfs_metabolism.get_identifiers(
|
325
|
+
identifiers_tbl = sbml_dfs_metabolism.get_identifiers(SBML_DFS.SPECIES)
|
318
326
|
ids, species = sbml_dfs_metabolism.search_by_ids(
|
319
|
-
["P40926"]
|
327
|
+
identifiers_tbl, identifiers=["P40926"]
|
320
328
|
)
|
321
329
|
assert ids.shape[0] == 1
|
322
330
|
assert species.shape[0] == 1
|
323
331
|
|
324
332
|
ids, species = sbml_dfs_metabolism.search_by_ids(
|
325
|
-
|
333
|
+
identifiers_tbl,
|
334
|
+
identifiers=["57540", "30744"],
|
335
|
+
ontologies={ONTOLOGIES.CHEBI},
|
326
336
|
)
|
327
337
|
assert ids.shape[0] == 2
|
328
338
|
assert species.shape[0] == 2
|
329
339
|
|
330
|
-
|
331
|
-
|
332
|
-
)
|
333
|
-
|
334
|
-
|
340
|
+
with pytest.raises(
|
341
|
+
ValueError, match="None of the requested identifiers are present"
|
342
|
+
):
|
343
|
+
ids, species = sbml_dfs_metabolism.search_by_ids(
|
344
|
+
identifiers_tbl, identifiers=["baz"] # Non-existent identifier
|
345
|
+
)
|
335
346
|
|
336
347
|
|
337
348
|
def test_species_status(sbml_dfs):
|
338
349
|
|
339
350
|
species = sbml_dfs.species
|
340
|
-
select_species = species[species[
|
351
|
+
select_species = species[species[SBML_DFS.S_NAME] == "OxyHbA"]
|
341
352
|
assert select_species.shape[0] == 1
|
342
353
|
|
343
354
|
status = sbml_dfs.species_status(select_species.index[0])
|
tests/test_sbml_dfs_utils.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import pandas as pd
|
4
|
+
import pytest
|
4
5
|
|
5
6
|
from napistu import sbml_dfs_utils
|
6
7
|
from napistu.constants import (
|
@@ -10,6 +11,10 @@ from napistu.constants import (
|
|
10
11
|
SBML_DFS,
|
11
12
|
IDENTIFIERS,
|
12
13
|
SBOTERM_NAMES,
|
14
|
+
VALID_SBO_TERMS,
|
15
|
+
VALID_SBO_TERM_NAMES,
|
16
|
+
MINI_SBO_FROM_NAME,
|
17
|
+
MINI_SBO_TO_NAME,
|
13
18
|
)
|
14
19
|
|
15
20
|
|
@@ -219,3 +224,113 @@ def test_stubbed_compartment():
|
|
219
224
|
"url": "https://www.ebi.ac.uk/QuickGO/term/GO:0005575",
|
220
225
|
"bqb": "BQB_IS",
|
221
226
|
}
|
227
|
+
|
228
|
+
|
229
|
+
def test_validate_sbo_values_success():
|
230
|
+
# Should not raise
|
231
|
+
sbml_dfs_utils._validate_sbo_values(pd.Series(VALID_SBO_TERMS), validate="terms")
|
232
|
+
sbml_dfs_utils._validate_sbo_values(
|
233
|
+
pd.Series(VALID_SBO_TERM_NAMES), validate="names"
|
234
|
+
)
|
235
|
+
|
236
|
+
|
237
|
+
def test_validate_sbo_values_invalid_type():
|
238
|
+
with pytest.raises(ValueError, match="Invalid validation type"):
|
239
|
+
sbml_dfs_utils._validate_sbo_values(
|
240
|
+
pd.Series(VALID_SBO_TERMS), validate="badtype"
|
241
|
+
)
|
242
|
+
|
243
|
+
|
244
|
+
def test_validate_sbo_values_invalid_value():
|
245
|
+
# Add an invalid term
|
246
|
+
s = pd.Series(VALID_SBO_TERMS + ["SBO:9999999"])
|
247
|
+
with pytest.raises(ValueError, match="unusable SBO terms"):
|
248
|
+
sbml_dfs_utils._validate_sbo_values(s, validate="terms")
|
249
|
+
# Add an invalid name
|
250
|
+
s = pd.Series(VALID_SBO_TERM_NAMES + ["not_a_name"])
|
251
|
+
with pytest.raises(ValueError, match="unusable SBO terms"):
|
252
|
+
sbml_dfs_utils._validate_sbo_values(s, validate="names")
|
253
|
+
|
254
|
+
|
255
|
+
def test_sbo_constants_internal_consistency():
|
256
|
+
# Every term should have a name and vice versa
|
257
|
+
# MINI_SBO_FROM_NAME: name -> term, MINI_SBO_TO_NAME: term -> name
|
258
|
+
terms_from_names = set(MINI_SBO_FROM_NAME.values())
|
259
|
+
names_from_terms = set(MINI_SBO_TO_NAME.values())
|
260
|
+
assert terms_from_names == set(VALID_SBO_TERMS)
|
261
|
+
assert names_from_terms == set(VALID_SBO_TERM_NAMES)
|
262
|
+
# Bijective mapping
|
263
|
+
for name, term in MINI_SBO_FROM_NAME.items():
|
264
|
+
assert MINI_SBO_TO_NAME[term] == name
|
265
|
+
for term, name in MINI_SBO_TO_NAME.items():
|
266
|
+
assert MINI_SBO_FROM_NAME[name] == term
|
267
|
+
|
268
|
+
|
269
|
+
def test_infer_entity_type():
|
270
|
+
"""Test entity type inference with valid keys"""
|
271
|
+
# when index matches primary key.
|
272
|
+
# Test compartments with index as primary key
|
273
|
+
df = pd.DataFrame(
|
274
|
+
{SBML_DFS.C_NAME: ["cytoplasm"], SBML_DFS.C_IDENTIFIERS: ["GO:0005737"]}
|
275
|
+
)
|
276
|
+
df.index.name = SBML_DFS.C_ID
|
277
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
278
|
+
assert result == SBML_DFS.COMPARTMENTS
|
279
|
+
|
280
|
+
# Test species with index as primary key
|
281
|
+
df = pd.DataFrame(
|
282
|
+
{SBML_DFS.S_NAME: ["glucose"], SBML_DFS.S_IDENTIFIERS: ["CHEBI:17234"]}
|
283
|
+
)
|
284
|
+
df.index.name = SBML_DFS.S_ID
|
285
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
286
|
+
assert result == SBML_DFS.SPECIES
|
287
|
+
|
288
|
+
# Test entity type inference by exact column matching.
|
289
|
+
# Test compartmentalized_species (has foreign keys)
|
290
|
+
df = pd.DataFrame(
|
291
|
+
{
|
292
|
+
SBML_DFS.SC_ID: ["glucose_c"],
|
293
|
+
SBML_DFS.S_ID: ["glucose"],
|
294
|
+
SBML_DFS.C_ID: ["cytoplasm"],
|
295
|
+
}
|
296
|
+
)
|
297
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
298
|
+
assert result == "compartmentalized_species"
|
299
|
+
|
300
|
+
# Test reaction_species (has foreign keys)
|
301
|
+
df = pd.DataFrame(
|
302
|
+
{
|
303
|
+
SBML_DFS.RSC_ID: ["rxn1_glc"],
|
304
|
+
SBML_DFS.R_ID: ["rxn1"],
|
305
|
+
SBML_DFS.SC_ID: ["glucose_c"],
|
306
|
+
}
|
307
|
+
)
|
308
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
309
|
+
assert result == SBML_DFS.REACTION_SPECIES
|
310
|
+
|
311
|
+
# Test reactions (only primary key)
|
312
|
+
df = pd.DataFrame({SBML_DFS.R_ID: ["rxn1"]})
|
313
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
314
|
+
assert result == SBML_DFS.REACTIONS
|
315
|
+
|
316
|
+
|
317
|
+
def test_infer_entity_type_errors():
|
318
|
+
"""Test error cases for entity type inference."""
|
319
|
+
# Test no matching entity type
|
320
|
+
df = pd.DataFrame({"random_column": ["value"], "another_col": ["data"]})
|
321
|
+
with pytest.raises(ValueError, match="No entity type matches DataFrame"):
|
322
|
+
sbml_dfs_utils.infer_entity_type(df)
|
323
|
+
|
324
|
+
# Test partial match (missing required foreign key)
|
325
|
+
df = pd.DataFrame(
|
326
|
+
{SBML_DFS.SC_ID: ["glucose_c"], SBML_DFS.S_ID: ["glucose"]}
|
327
|
+
) # Missing c_id
|
328
|
+
with pytest.raises(ValueError):
|
329
|
+
sbml_dfs_utils.infer_entity_type(df)
|
330
|
+
|
331
|
+
# Test extra primary keys that shouldn't be there
|
332
|
+
df = pd.DataFrame(
|
333
|
+
{SBML_DFS.R_ID: ["rxn1"], SBML_DFS.S_ID: ["glucose"]}
|
334
|
+
) # Two primary keys
|
335
|
+
with pytest.raises(ValueError):
|
336
|
+
sbml_dfs_utils.infer_entity_type(df)
|
tests/test_utils.py
CHANGED
@@ -2,9 +2,10 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import gzip
|
4
4
|
import os
|
5
|
+
import tempfile
|
5
6
|
from datetime import datetime
|
6
|
-
from
|
7
|
-
from unittest.mock import patch
|
7
|
+
from pathlib import Path
|
8
|
+
from unittest.mock import Mock, patch
|
8
9
|
|
9
10
|
import numpy as np
|
10
11
|
import pandas as pd
|
@@ -705,3 +706,26 @@ def test_update_pathological_names():
|
|
705
706
|
s3 = pd.Series(["foo", "bar", "baz"])
|
706
707
|
out3 = utils.update_pathological_names(s3, "prefix_")
|
707
708
|
assert list(out3) == ["foo", "bar", "baz"]
|
709
|
+
|
710
|
+
|
711
|
+
def test_parquet_save_load():
|
712
|
+
"""Test that write_parquet and read_parquet work correctly."""
|
713
|
+
# Create test DataFrame
|
714
|
+
original_df = pd.DataFrame(
|
715
|
+
{
|
716
|
+
"sc_id_origin": ["A", "B", "C"],
|
717
|
+
"sc_id_dest": ["B", "C", "A"],
|
718
|
+
"path_length": [1, 2, 3],
|
719
|
+
"path_weights": [0.1, 0.5, 0.8],
|
720
|
+
"has_connection": [True, False, True],
|
721
|
+
}
|
722
|
+
)
|
723
|
+
|
724
|
+
# Write and read using temporary file
|
725
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
726
|
+
file_path = Path(temp_dir) / "test.parquet"
|
727
|
+
utils.save_parquet(original_df, file_path)
|
728
|
+
result_df = utils.load_parquet(file_path)
|
729
|
+
|
730
|
+
# Verify they're identical
|
731
|
+
pd.testing.assert_frame_equal(original_df, result_df)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|