napistu 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/constants.py +2 -0
- napistu/gcs/constants.py +15 -15
- napistu/network/constants.py +23 -1
- napistu/network/ig_utils.py +161 -1
- napistu/network/net_create.py +3 -3
- napistu/network/net_propagation.py +646 -96
- napistu/ontologies/id_tables.py +282 -0
- napistu/sbml_dfs_core.py +53 -63
- napistu/sbml_dfs_utils.py +82 -18
- napistu/statistics/__init__.py +10 -0
- napistu/statistics/quantiles.py +82 -0
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/METADATA +6 -1
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/RECORD +23 -18
- tests/test_network_ig_utils.py +133 -0
- tests/test_network_net_propagation.py +365 -74
- tests/test_ontologies_id_tables.py +198 -0
- tests/test_sbml_dfs_core.py +30 -19
- tests/test_sbml_dfs_utils.py +70 -0
- tests/test_statistics_quantiles.py +133 -0
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/WHEEL +0 -0
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/entry_points.txt +0 -0
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/top_level.txt +0 -0
tests/test_sbml_dfs_core.py
CHANGED
@@ -13,10 +13,12 @@ from napistu.modify import pathwayannot
|
|
13
13
|
|
14
14
|
from napistu import identifiers as napistu_identifiers
|
15
15
|
from napistu.constants import (
|
16
|
-
|
16
|
+
BQB,
|
17
17
|
BQB_DEFINING_ATTRS,
|
18
18
|
BQB_DEFINING_ATTRS_LOOSE,
|
19
|
-
|
19
|
+
SBML_DFS,
|
20
|
+
SCHEMA_DEFS,
|
21
|
+
ONTOLOGIES,
|
20
22
|
)
|
21
23
|
from napistu.sbml_dfs_core import SBML_dfs
|
22
24
|
from unittest.mock import patch
|
@@ -291,53 +293,62 @@ def test_read_sbml_with_invalid_ids():
|
|
291
293
|
|
292
294
|
|
293
295
|
def test_get_table(sbml_dfs):
|
294
|
-
assert isinstance(sbml_dfs.get_table(
|
295
|
-
assert isinstance(
|
296
|
+
assert isinstance(sbml_dfs.get_table(SBML_DFS.SPECIES), pd.DataFrame)
|
297
|
+
assert isinstance(
|
298
|
+
sbml_dfs.get_table(SBML_DFS.SPECIES, {SCHEMA_DEFS.ID}), pd.DataFrame
|
299
|
+
)
|
296
300
|
|
297
301
|
# invalid table
|
298
302
|
with pytest.raises(ValueError):
|
299
|
-
sbml_dfs.get_table("foo", {
|
303
|
+
sbml_dfs.get_table("foo", {SCHEMA_DEFS.ID})
|
300
304
|
|
301
305
|
# bad type
|
302
306
|
with pytest.raises(TypeError):
|
303
|
-
sbml_dfs.get_table(
|
307
|
+
sbml_dfs.get_table(SBML_DFS.REACTION_SPECIES, SCHEMA_DEFS.ID)
|
304
308
|
|
305
309
|
# reaction species don't have ids
|
306
310
|
with pytest.raises(ValueError):
|
307
|
-
sbml_dfs.get_table(
|
311
|
+
sbml_dfs.get_table(SBML_DFS.REACTION_SPECIES, {SCHEMA_DEFS.ID})
|
308
312
|
|
309
313
|
|
310
314
|
def test_search_by_name(sbml_dfs_metabolism):
|
311
|
-
assert
|
312
|
-
|
313
|
-
|
315
|
+
assert (
|
316
|
+
sbml_dfs_metabolism.search_by_name("atp", SBML_DFS.SPECIES, False).shape[0] == 1
|
317
|
+
)
|
318
|
+
assert sbml_dfs_metabolism.search_by_name("pyr", SBML_DFS.SPECIES).shape[0] == 3
|
319
|
+
assert (
|
320
|
+
sbml_dfs_metabolism.search_by_name("kinase", SBML_DFS.REACTIONS).shape[0] == 4
|
321
|
+
)
|
314
322
|
|
315
323
|
|
316
324
|
def test_search_by_id(sbml_dfs_metabolism):
|
317
|
-
identifiers_tbl = sbml_dfs_metabolism.get_identifiers(
|
325
|
+
identifiers_tbl = sbml_dfs_metabolism.get_identifiers(SBML_DFS.SPECIES)
|
318
326
|
ids, species = sbml_dfs_metabolism.search_by_ids(
|
319
|
-
["P40926"]
|
327
|
+
identifiers_tbl, identifiers=["P40926"]
|
320
328
|
)
|
321
329
|
assert ids.shape[0] == 1
|
322
330
|
assert species.shape[0] == 1
|
323
331
|
|
324
332
|
ids, species = sbml_dfs_metabolism.search_by_ids(
|
325
|
-
|
333
|
+
identifiers_tbl,
|
334
|
+
identifiers=["57540", "30744"],
|
335
|
+
ontologies={ONTOLOGIES.CHEBI},
|
326
336
|
)
|
327
337
|
assert ids.shape[0] == 2
|
328
338
|
assert species.shape[0] == 2
|
329
339
|
|
330
|
-
|
331
|
-
|
332
|
-
)
|
333
|
-
|
334
|
-
|
340
|
+
with pytest.raises(
|
341
|
+
ValueError, match="None of the requested identifiers are present"
|
342
|
+
):
|
343
|
+
ids, species = sbml_dfs_metabolism.search_by_ids(
|
344
|
+
identifiers_tbl, identifiers=["baz"] # Non-existent identifier
|
345
|
+
)
|
335
346
|
|
336
347
|
|
337
348
|
def test_species_status(sbml_dfs):
|
338
349
|
|
339
350
|
species = sbml_dfs.species
|
340
|
-
select_species = species[species[
|
351
|
+
select_species = species[species[SBML_DFS.S_NAME] == "OxyHbA"]
|
341
352
|
assert select_species.shape[0] == 1
|
342
353
|
|
343
354
|
status = sbml_dfs.species_status(select_species.index[0])
|
tests/test_sbml_dfs_utils.py
CHANGED
@@ -264,3 +264,73 @@ def test_sbo_constants_internal_consistency():
|
|
264
264
|
assert MINI_SBO_TO_NAME[term] == name
|
265
265
|
for term, name in MINI_SBO_TO_NAME.items():
|
266
266
|
assert MINI_SBO_FROM_NAME[name] == term
|
267
|
+
|
268
|
+
|
269
|
+
def test_infer_entity_type():
|
270
|
+
"""Test entity type inference with valid keys"""
|
271
|
+
# when index matches primary key.
|
272
|
+
# Test compartments with index as primary key
|
273
|
+
df = pd.DataFrame(
|
274
|
+
{SBML_DFS.C_NAME: ["cytoplasm"], SBML_DFS.C_IDENTIFIERS: ["GO:0005737"]}
|
275
|
+
)
|
276
|
+
df.index.name = SBML_DFS.C_ID
|
277
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
278
|
+
assert result == SBML_DFS.COMPARTMENTS
|
279
|
+
|
280
|
+
# Test species with index as primary key
|
281
|
+
df = pd.DataFrame(
|
282
|
+
{SBML_DFS.S_NAME: ["glucose"], SBML_DFS.S_IDENTIFIERS: ["CHEBI:17234"]}
|
283
|
+
)
|
284
|
+
df.index.name = SBML_DFS.S_ID
|
285
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
286
|
+
assert result == SBML_DFS.SPECIES
|
287
|
+
|
288
|
+
# Test entity type inference by exact column matching.
|
289
|
+
# Test compartmentalized_species (has foreign keys)
|
290
|
+
df = pd.DataFrame(
|
291
|
+
{
|
292
|
+
SBML_DFS.SC_ID: ["glucose_c"],
|
293
|
+
SBML_DFS.S_ID: ["glucose"],
|
294
|
+
SBML_DFS.C_ID: ["cytoplasm"],
|
295
|
+
}
|
296
|
+
)
|
297
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
298
|
+
assert result == "compartmentalized_species"
|
299
|
+
|
300
|
+
# Test reaction_species (has foreign keys)
|
301
|
+
df = pd.DataFrame(
|
302
|
+
{
|
303
|
+
SBML_DFS.RSC_ID: ["rxn1_glc"],
|
304
|
+
SBML_DFS.R_ID: ["rxn1"],
|
305
|
+
SBML_DFS.SC_ID: ["glucose_c"],
|
306
|
+
}
|
307
|
+
)
|
308
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
309
|
+
assert result == SBML_DFS.REACTION_SPECIES
|
310
|
+
|
311
|
+
# Test reactions (only primary key)
|
312
|
+
df = pd.DataFrame({SBML_DFS.R_ID: ["rxn1"]})
|
313
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
314
|
+
assert result == SBML_DFS.REACTIONS
|
315
|
+
|
316
|
+
|
317
|
+
def test_infer_entity_type_errors():
|
318
|
+
"""Test error cases for entity type inference."""
|
319
|
+
# Test no matching entity type
|
320
|
+
df = pd.DataFrame({"random_column": ["value"], "another_col": ["data"]})
|
321
|
+
with pytest.raises(ValueError, match="No entity type matches DataFrame"):
|
322
|
+
sbml_dfs_utils.infer_entity_type(df)
|
323
|
+
|
324
|
+
# Test partial match (missing required foreign key)
|
325
|
+
df = pd.DataFrame(
|
326
|
+
{SBML_DFS.SC_ID: ["glucose_c"], SBML_DFS.S_ID: ["glucose"]}
|
327
|
+
) # Missing c_id
|
328
|
+
with pytest.raises(ValueError):
|
329
|
+
sbml_dfs_utils.infer_entity_type(df)
|
330
|
+
|
331
|
+
# Test extra primary keys that shouldn't be there
|
332
|
+
df = pd.DataFrame(
|
333
|
+
{SBML_DFS.R_ID: ["rxn1"], SBML_DFS.S_ID: ["glucose"]}
|
334
|
+
) # Two primary keys
|
335
|
+
with pytest.raises(ValueError):
|
336
|
+
sbml_dfs_utils.infer_entity_type(df)
|
@@ -0,0 +1,133 @@
|
|
1
|
+
import pytest
|
2
|
+
import numpy as np
|
3
|
+
import pandas as pd
|
4
|
+
from napistu.statistics import quantiles
|
5
|
+
|
6
|
+
|
7
|
+
def test_calculate_quantiles_valid_inputs():
|
8
|
+
"""Test calculate_quantiles with valid, well-formed inputs."""
|
9
|
+
# Create observed data: 4 features x 3 attributes
|
10
|
+
observed = pd.DataFrame(
|
11
|
+
[[0.8, 0.3, 0.9], [0.2, 0.7, 0.1], [0.5, 0.5, 0.5], [0.1, 0.9, 0.2]],
|
12
|
+
index=["gene1", "gene2", "gene3", "gene4"],
|
13
|
+
columns=["attr1", "attr2", "attr3"],
|
14
|
+
)
|
15
|
+
|
16
|
+
# Create null data: 2 samples per feature (8 rows total)
|
17
|
+
null_index = ["gene1", "gene2", "gene3", "gene4"] * 2
|
18
|
+
null_data = pd.DataFrame(
|
19
|
+
[
|
20
|
+
[0.1, 0.2, 0.3], # gene1 sample 1
|
21
|
+
[0.4, 0.5, 0.6], # gene2 sample 1
|
22
|
+
[0.7, 0.8, 0.9], # gene3 sample 1
|
23
|
+
[0.0, 0.1, 0.2], # gene4 sample 1
|
24
|
+
[0.2, 0.3, 0.4], # gene1 sample 2
|
25
|
+
[0.5, 0.6, 0.7], # gene2 sample 2
|
26
|
+
[0.8, 0.9, 1.0], # gene3 sample 2
|
27
|
+
[0.1, 0.2, 0.3], # gene4 sample 2
|
28
|
+
],
|
29
|
+
index=null_index,
|
30
|
+
columns=["attr1", "attr2", "attr3"],
|
31
|
+
)
|
32
|
+
|
33
|
+
# Calculate quantiles
|
34
|
+
result = quantiles.calculate_quantiles(observed, null_data)
|
35
|
+
|
36
|
+
# Verify output structure
|
37
|
+
assert result.shape == observed.shape
|
38
|
+
assert list(result.index) == list(observed.index)
|
39
|
+
assert list(result.columns) == list(observed.columns)
|
40
|
+
|
41
|
+
# Check specific quantile calculations
|
42
|
+
# gene1, attr1: observed=0.8, nulls=[0.1, 0.2] -> quantile = 1.0 (100%)
|
43
|
+
assert result.loc["gene1", "attr1"] == 1.0
|
44
|
+
|
45
|
+
# gene2, attr2: observed=0.7, nulls=[0.5, 0.6] -> quantile = 1.0 (100%)
|
46
|
+
assert result.loc["gene2", "attr2"] == 1.0
|
47
|
+
|
48
|
+
# gene3, attr3: observed=0.5, nulls=[0.9, 1.0] -> quantile = 0.0 (0%)
|
49
|
+
assert result.loc["gene3", "attr3"] == 0.0
|
50
|
+
|
51
|
+
# gene4, attr1: observed=0.1, nulls=[0.0, 0.1]
|
52
|
+
# With ≤: 0.0 ≤ 0.1 (True), 0.1 ≤ 0.1 (True) → 2/2 = 1.0
|
53
|
+
assert result.loc["gene4", "attr1"] == 1.0
|
54
|
+
|
55
|
+
|
56
|
+
def test_calculate_quantiles_error_cases():
|
57
|
+
"""Test calculate_quantiles with invalid inputs that should raise errors or warnings."""
|
58
|
+
# Base observed data
|
59
|
+
observed = pd.DataFrame(
|
60
|
+
[[0.8, 0.3], [0.2, 0.7]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
|
61
|
+
)
|
62
|
+
|
63
|
+
# Test 1: Mismatched columns
|
64
|
+
null_wrong_cols = pd.DataFrame(
|
65
|
+
[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
|
66
|
+
index=["gene1", "gene2"],
|
67
|
+
columns=["attr1", "attr2", "attr3"], # Extra column
|
68
|
+
)
|
69
|
+
|
70
|
+
with pytest.raises((KeyError, ValueError)):
|
71
|
+
quantiles.calculate_quantiles(observed, null_wrong_cols)
|
72
|
+
|
73
|
+
# Test 2: Missing features in null data
|
74
|
+
null_missing_feature = pd.DataFrame(
|
75
|
+
[[0.1, 0.2]], index=["gene1"], columns=["attr1", "attr2"] # Missing gene2
|
76
|
+
)
|
77
|
+
|
78
|
+
# Current implementation doesn't validate - it will likely fail in groupby or indexing
|
79
|
+
# This test verifies current behavior (may change if validation added)
|
80
|
+
try:
|
81
|
+
result = quantiles.calculate_quantiles(observed, null_missing_feature)
|
82
|
+
# If it succeeds, gene2 quantiles will be invalid/error
|
83
|
+
assert True # Just check it doesn't crash for now
|
84
|
+
except (KeyError, ValueError, IndexError):
|
85
|
+
assert True # Expected behavior
|
86
|
+
|
87
|
+
# Test 3: Unequal null samples per feature
|
88
|
+
null_unequal_samples = pd.DataFrame(
|
89
|
+
[
|
90
|
+
[0.1, 0.2], # gene1 sample 1
|
91
|
+
[0.3, 0.4], # gene1 sample 2
|
92
|
+
[0.5, 0.6], # gene2 sample 1 (only 1 sample)
|
93
|
+
],
|
94
|
+
index=["gene1", "gene1", "gene2"],
|
95
|
+
columns=["attr1", "attr2"],
|
96
|
+
)
|
97
|
+
|
98
|
+
# This should still work but may give different results
|
99
|
+
result = quantiles.calculate_quantiles(observed, null_unequal_samples)
|
100
|
+
assert result.shape == observed.shape
|
101
|
+
|
102
|
+
# Test 4: Empty null data
|
103
|
+
null_empty = pd.DataFrame(columns=["attr1", "attr2"])
|
104
|
+
|
105
|
+
with pytest.raises((ValueError, IndexError)):
|
106
|
+
quantiles.calculate_quantiles(observed, null_empty)
|
107
|
+
|
108
|
+
# Test 5: Single null sample (edge case)
|
109
|
+
null_single = pd.DataFrame(
|
110
|
+
[[0.1, 0.2], [0.5, 0.6]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
|
111
|
+
)
|
112
|
+
|
113
|
+
result = quantiles.calculate_quantiles(observed, null_single)
|
114
|
+
assert result.shape == observed.shape
|
115
|
+
# With single sample, results should be binary (0 or 1)
|
116
|
+
assert all(val in [0.0, 1.0] for val in result.values.flatten())
|
117
|
+
|
118
|
+
# Test 6: NaN values in data
|
119
|
+
observed_with_nan = observed.copy()
|
120
|
+
observed_with_nan.loc["gene1", "attr1"] = np.nan
|
121
|
+
|
122
|
+
null_with_nan = pd.DataFrame(
|
123
|
+
[[np.nan, 0.2], [0.4, 0.5], [0.1, 0.3], [0.6, 0.7]],
|
124
|
+
index=["gene1", "gene2", "gene1", "gene2"],
|
125
|
+
columns=["attr1", "attr2"],
|
126
|
+
)
|
127
|
+
|
128
|
+
# Should raise ValueError for NaN values
|
129
|
+
with pytest.raises(ValueError, match="NaN values found in observed data"):
|
130
|
+
quantiles.calculate_quantiles(observed_with_nan, null_single)
|
131
|
+
|
132
|
+
with pytest.raises(ValueError, match="NaN values found in null data"):
|
133
|
+
quantiles.calculate_quantiles(observed, null_with_nan)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|