napistu 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,10 +13,12 @@ from napistu.modify import pathwayannot
13
13
 
14
14
  from napistu import identifiers as napistu_identifiers
15
15
  from napistu.constants import (
16
- SBML_DFS,
16
+ BQB,
17
17
  BQB_DEFINING_ATTRS,
18
18
  BQB_DEFINING_ATTRS_LOOSE,
19
- BQB,
19
+ SBML_DFS,
20
+ SCHEMA_DEFS,
21
+ ONTOLOGIES,
20
22
  )
21
23
  from napistu.sbml_dfs_core import SBML_dfs
22
24
  from unittest.mock import patch
@@ -291,53 +293,62 @@ def test_read_sbml_with_invalid_ids():
291
293
 
292
294
 
293
295
  def test_get_table(sbml_dfs):
294
- assert isinstance(sbml_dfs.get_table("species"), pd.DataFrame)
295
- assert isinstance(sbml_dfs.get_table("species", {"id"}), pd.DataFrame)
296
+ assert isinstance(sbml_dfs.get_table(SBML_DFS.SPECIES), pd.DataFrame)
297
+ assert isinstance(
298
+ sbml_dfs.get_table(SBML_DFS.SPECIES, {SCHEMA_DEFS.ID}), pd.DataFrame
299
+ )
296
300
 
297
301
  # invalid table
298
302
  with pytest.raises(ValueError):
299
- sbml_dfs.get_table("foo", {"id"})
303
+ sbml_dfs.get_table("foo", {SCHEMA_DEFS.ID})
300
304
 
301
305
  # bad type
302
306
  with pytest.raises(TypeError):
303
- sbml_dfs.get_table("reaction_species", "id")
307
+ sbml_dfs.get_table(SBML_DFS.REACTION_SPECIES, SCHEMA_DEFS.ID)
304
308
 
305
309
  # reaction species don't have ids
306
310
  with pytest.raises(ValueError):
307
- sbml_dfs.get_table("reaction_species", {"id"})
311
+ sbml_dfs.get_table(SBML_DFS.REACTION_SPECIES, {SCHEMA_DEFS.ID})
308
312
 
309
313
 
310
314
  def test_search_by_name(sbml_dfs_metabolism):
311
- assert sbml_dfs_metabolism.search_by_name("atp", "species", False).shape[0] == 1
312
- assert sbml_dfs_metabolism.search_by_name("pyr", "species").shape[0] == 3
313
- assert sbml_dfs_metabolism.search_by_name("kinase", "reactions").shape[0] == 4
315
+ assert (
316
+ sbml_dfs_metabolism.search_by_name("atp", SBML_DFS.SPECIES, False).shape[0] == 1
317
+ )
318
+ assert sbml_dfs_metabolism.search_by_name("pyr", SBML_DFS.SPECIES).shape[0] == 3
319
+ assert (
320
+ sbml_dfs_metabolism.search_by_name("kinase", SBML_DFS.REACTIONS).shape[0] == 4
321
+ )
314
322
 
315
323
 
316
324
  def test_search_by_id(sbml_dfs_metabolism):
317
- identifiers_tbl = sbml_dfs_metabolism.get_identifiers("species")
325
+ identifiers_tbl = sbml_dfs_metabolism.get_identifiers(SBML_DFS.SPECIES)
318
326
  ids, species = sbml_dfs_metabolism.search_by_ids(
319
- ["P40926"], "species", identifiers_tbl
327
+ identifiers_tbl, identifiers=["P40926"]
320
328
  )
321
329
  assert ids.shape[0] == 1
322
330
  assert species.shape[0] == 1
323
331
 
324
332
  ids, species = sbml_dfs_metabolism.search_by_ids(
325
- ["57540", "30744"], "species", identifiers_tbl, {"chebi"}
333
+ identifiers_tbl,
334
+ identifiers=["57540", "30744"],
335
+ ontologies={ONTOLOGIES.CHEBI},
326
336
  )
327
337
  assert ids.shape[0] == 2
328
338
  assert species.shape[0] == 2
329
339
 
330
- ids, species = sbml_dfs_metabolism.search_by_ids(
331
- ["baz"], "species", identifiers_tbl
332
- )
333
- assert ids.shape[0] == 0
334
- assert species.shape[0] == 0
340
+ with pytest.raises(
341
+ ValueError, match="None of the requested identifiers are present"
342
+ ):
343
+ ids, species = sbml_dfs_metabolism.search_by_ids(
344
+ identifiers_tbl, identifiers=["baz"] # Non-existent identifier
345
+ )
335
346
 
336
347
 
337
348
  def test_species_status(sbml_dfs):
338
349
 
339
350
  species = sbml_dfs.species
340
- select_species = species[species["s_name"] == "OxyHbA"]
351
+ select_species = species[species[SBML_DFS.S_NAME] == "OxyHbA"]
341
352
  assert select_species.shape[0] == 1
342
353
 
343
354
  status = sbml_dfs.species_status(select_species.index[0])
@@ -264,3 +264,73 @@ def test_sbo_constants_internal_consistency():
264
264
  assert MINI_SBO_TO_NAME[term] == name
265
265
  for term, name in MINI_SBO_TO_NAME.items():
266
266
  assert MINI_SBO_FROM_NAME[name] == term
267
+
268
+
269
+ def test_infer_entity_type():
270
+ """Test entity type inference with valid keys"""
271
+ # when index matches primary key.
272
+ # Test compartments with index as primary key
273
+ df = pd.DataFrame(
274
+ {SBML_DFS.C_NAME: ["cytoplasm"], SBML_DFS.C_IDENTIFIERS: ["GO:0005737"]}
275
+ )
276
+ df.index.name = SBML_DFS.C_ID
277
+ result = sbml_dfs_utils.infer_entity_type(df)
278
+ assert result == SBML_DFS.COMPARTMENTS
279
+
280
+ # Test species with index as primary key
281
+ df = pd.DataFrame(
282
+ {SBML_DFS.S_NAME: ["glucose"], SBML_DFS.S_IDENTIFIERS: ["CHEBI:17234"]}
283
+ )
284
+ df.index.name = SBML_DFS.S_ID
285
+ result = sbml_dfs_utils.infer_entity_type(df)
286
+ assert result == SBML_DFS.SPECIES
287
+
288
+ # Test entity type inference by exact column matching.
289
+ # Test compartmentalized_species (has foreign keys)
290
+ df = pd.DataFrame(
291
+ {
292
+ SBML_DFS.SC_ID: ["glucose_c"],
293
+ SBML_DFS.S_ID: ["glucose"],
294
+ SBML_DFS.C_ID: ["cytoplasm"],
295
+ }
296
+ )
297
+ result = sbml_dfs_utils.infer_entity_type(df)
298
+ assert result == "compartmentalized_species"
299
+
300
+ # Test reaction_species (has foreign keys)
301
+ df = pd.DataFrame(
302
+ {
303
+ SBML_DFS.RSC_ID: ["rxn1_glc"],
304
+ SBML_DFS.R_ID: ["rxn1"],
305
+ SBML_DFS.SC_ID: ["glucose_c"],
306
+ }
307
+ )
308
+ result = sbml_dfs_utils.infer_entity_type(df)
309
+ assert result == SBML_DFS.REACTION_SPECIES
310
+
311
+ # Test reactions (only primary key)
312
+ df = pd.DataFrame({SBML_DFS.R_ID: ["rxn1"]})
313
+ result = sbml_dfs_utils.infer_entity_type(df)
314
+ assert result == SBML_DFS.REACTIONS
315
+
316
+
317
+ def test_infer_entity_type_errors():
318
+ """Test error cases for entity type inference."""
319
+ # Test no matching entity type
320
+ df = pd.DataFrame({"random_column": ["value"], "another_col": ["data"]})
321
+ with pytest.raises(ValueError, match="No entity type matches DataFrame"):
322
+ sbml_dfs_utils.infer_entity_type(df)
323
+
324
+ # Test partial match (missing required foreign key)
325
+ df = pd.DataFrame(
326
+ {SBML_DFS.SC_ID: ["glucose_c"], SBML_DFS.S_ID: ["glucose"]}
327
+ ) # Missing c_id
328
+ with pytest.raises(ValueError):
329
+ sbml_dfs_utils.infer_entity_type(df)
330
+
331
+ # Test extra primary keys that shouldn't be there
332
+ df = pd.DataFrame(
333
+ {SBML_DFS.R_ID: ["rxn1"], SBML_DFS.S_ID: ["glucose"]}
334
+ ) # Two primary keys
335
+ with pytest.raises(ValueError):
336
+ sbml_dfs_utils.infer_entity_type(df)
@@ -0,0 +1,133 @@
1
+ import pytest
2
+ import numpy as np
3
+ import pandas as pd
4
+ from napistu.statistics import quantiles
5
+
6
+
7
+ def test_calculate_quantiles_valid_inputs():
8
+ """Test calculate_quantiles with valid, well-formed inputs."""
9
+ # Create observed data: 4 features x 3 attributes
10
+ observed = pd.DataFrame(
11
+ [[0.8, 0.3, 0.9], [0.2, 0.7, 0.1], [0.5, 0.5, 0.5], [0.1, 0.9, 0.2]],
12
+ index=["gene1", "gene2", "gene3", "gene4"],
13
+ columns=["attr1", "attr2", "attr3"],
14
+ )
15
+
16
+ # Create null data: 2 samples per feature (8 rows total)
17
+ null_index = ["gene1", "gene2", "gene3", "gene4"] * 2
18
+ null_data = pd.DataFrame(
19
+ [
20
+ [0.1, 0.2, 0.3], # gene1 sample 1
21
+ [0.4, 0.5, 0.6], # gene2 sample 1
22
+ [0.7, 0.8, 0.9], # gene3 sample 1
23
+ [0.0, 0.1, 0.2], # gene4 sample 1
24
+ [0.2, 0.3, 0.4], # gene1 sample 2
25
+ [0.5, 0.6, 0.7], # gene2 sample 2
26
+ [0.8, 0.9, 1.0], # gene3 sample 2
27
+ [0.1, 0.2, 0.3], # gene4 sample 2
28
+ ],
29
+ index=null_index,
30
+ columns=["attr1", "attr2", "attr3"],
31
+ )
32
+
33
+ # Calculate quantiles
34
+ result = quantiles.calculate_quantiles(observed, null_data)
35
+
36
+ # Verify output structure
37
+ assert result.shape == observed.shape
38
+ assert list(result.index) == list(observed.index)
39
+ assert list(result.columns) == list(observed.columns)
40
+
41
+ # Check specific quantile calculations
42
+ # gene1, attr1: observed=0.8, nulls=[0.1, 0.2] -> quantile = 1.0 (100%)
43
+ assert result.loc["gene1", "attr1"] == 1.0
44
+
45
+ # gene2, attr2: observed=0.7, nulls=[0.5, 0.6] -> quantile = 1.0 (100%)
46
+ assert result.loc["gene2", "attr2"] == 1.0
47
+
48
+ # gene3, attr3: observed=0.5, nulls=[0.9, 1.0] -> quantile = 0.0 (0%)
49
+ assert result.loc["gene3", "attr3"] == 0.0
50
+
51
+ # gene4, attr1: observed=0.1, nulls=[0.0, 0.1]
52
+ # With ≤: 0.0 ≤ 0.1 (True), 0.1 ≤ 0.1 (True) → 2/2 = 1.0
53
+ assert result.loc["gene4", "attr1"] == 1.0
54
+
55
+
56
+ def test_calculate_quantiles_error_cases():
57
+ """Test calculate_quantiles with invalid inputs that should raise errors or warnings."""
58
+ # Base observed data
59
+ observed = pd.DataFrame(
60
+ [[0.8, 0.3], [0.2, 0.7]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
61
+ )
62
+
63
+ # Test 1: Mismatched columns
64
+ null_wrong_cols = pd.DataFrame(
65
+ [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
66
+ index=["gene1", "gene2"],
67
+ columns=["attr1", "attr2", "attr3"], # Extra column
68
+ )
69
+
70
+ with pytest.raises((KeyError, ValueError)):
71
+ quantiles.calculate_quantiles(observed, null_wrong_cols)
72
+
73
+ # Test 2: Missing features in null data
74
+ null_missing_feature = pd.DataFrame(
75
+ [[0.1, 0.2]], index=["gene1"], columns=["attr1", "attr2"] # Missing gene2
76
+ )
77
+
78
+ # Current implementation doesn't validate - it will likely fail in groupby or indexing
79
+ # This test verifies current behavior (may change if validation added)
80
+ try:
81
+ result = quantiles.calculate_quantiles(observed, null_missing_feature)
82
+ # If it succeeds, gene2 quantiles will be invalid/error
83
+ assert True # Just check it doesn't crash for now
84
+ except (KeyError, ValueError, IndexError):
85
+ assert True # Expected behavior
86
+
87
+ # Test 3: Unequal null samples per feature
88
+ null_unequal_samples = pd.DataFrame(
89
+ [
90
+ [0.1, 0.2], # gene1 sample 1
91
+ [0.3, 0.4], # gene1 sample 2
92
+ [0.5, 0.6], # gene2 sample 1 (only 1 sample)
93
+ ],
94
+ index=["gene1", "gene1", "gene2"],
95
+ columns=["attr1", "attr2"],
96
+ )
97
+
98
+ # This should still work but may give different results
99
+ result = quantiles.calculate_quantiles(observed, null_unequal_samples)
100
+ assert result.shape == observed.shape
101
+
102
+ # Test 4: Empty null data
103
+ null_empty = pd.DataFrame(columns=["attr1", "attr2"])
104
+
105
+ with pytest.raises((ValueError, IndexError)):
106
+ quantiles.calculate_quantiles(observed, null_empty)
107
+
108
+ # Test 5: Single null sample (edge case)
109
+ null_single = pd.DataFrame(
110
+ [[0.1, 0.2], [0.5, 0.6]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
111
+ )
112
+
113
+ result = quantiles.calculate_quantiles(observed, null_single)
114
+ assert result.shape == observed.shape
115
+ # With single sample, results should be binary (0 or 1)
116
+ assert all(val in [0.0, 1.0] for val in result.values.flatten())
117
+
118
+ # Test 6: NaN values in data
119
+ observed_with_nan = observed.copy()
120
+ observed_with_nan.loc["gene1", "attr1"] = np.nan
121
+
122
+ null_with_nan = pd.DataFrame(
123
+ [[np.nan, 0.2], [0.4, 0.5], [0.1, 0.3], [0.6, 0.7]],
124
+ index=["gene1", "gene2", "gene1", "gene2"],
125
+ columns=["attr1", "attr2"],
126
+ )
127
+
128
+ # Should raise ValueError for NaN values
129
+ with pytest.raises(ValueError, match="NaN values found in observed data"):
130
+ quantiles.calculate_quantiles(observed_with_nan, null_single)
131
+
132
+ with pytest.raises(ValueError, match="NaN values found in null data"):
133
+ quantiles.calculate_quantiles(observed, null_with_nan)