napistu 0.3.6__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +28 -13
- napistu/consensus.py +19 -25
- napistu/constants.py +102 -83
- napistu/indices.py +3 -1
- napistu/ingestion/napistu_edgelist.py +4 -4
- napistu/ingestion/sbml.py +298 -295
- napistu/ingestion/string.py +14 -18
- napistu/ingestion/trrust.py +22 -27
- napistu/matching/interactions.py +41 -39
- napistu/matching/species.py +1 -1
- napistu/modify/gaps.py +2 -1
- napistu/network/constants.py +61 -45
- napistu/network/data_handling.py +1 -1
- napistu/network/neighborhoods.py +3 -3
- napistu/network/net_create.py +440 -616
- napistu/network/net_create_utils.py +734 -0
- napistu/network/net_propagation.py +1 -1
- napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
- napistu/network/ng_utils.py +28 -21
- napistu/network/paths.py +4 -4
- napistu/network/precompute.py +35 -74
- napistu/ontologies/genodexito.py +5 -1
- napistu/ontologies/renaming.py +4 -0
- napistu/sbml_dfs_core.py +127 -64
- napistu/sbml_dfs_utils.py +50 -0
- napistu/utils.py +132 -46
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/METADATA +2 -2
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/RECORD +47 -44
- tests/conftest.py +171 -13
- tests/test_consensus.py +74 -5
- tests/test_gaps.py +26 -15
- tests/test_network_data_handling.py +5 -2
- tests/test_network_net_create.py +93 -202
- tests/test_network_net_create_utils.py +538 -0
- tests/test_network_ng_core.py +19 -0
- tests/test_network_ng_utils.py +1 -1
- tests/test_network_precompute.py +5 -4
- tests/test_ontologies_renaming.py +28 -24
- tests/test_rpy2_callr.py +0 -1
- tests/test_rpy2_init.py +0 -1
- tests/test_sbml_dfs_core.py +165 -15
- tests/test_sbml_dfs_utils.py +45 -0
- tests/test_utils.py +45 -2
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/WHEEL +0 -0
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,16 @@
|
|
1
1
|
"""Tests for the ontology aliases module."""
|
2
2
|
|
3
|
+
from unittest.mock import patch
|
4
|
+
|
3
5
|
import pytest
|
4
6
|
import pandas as pd
|
5
7
|
from napistu import identifiers
|
6
|
-
from napistu.constants import IDENTIFIERS, SBML_DFS
|
8
|
+
from napistu.constants import IDENTIFIERS, SBML_DFS, ONTOLOGIES
|
7
9
|
from napistu.ontologies import renaming
|
8
10
|
|
9
11
|
|
10
12
|
@pytest.fixture
|
11
|
-
def mock_sbml_dfs():
|
13
|
+
def mock_sbml_dfs(sbml_dfs):
|
12
14
|
"""Create a mock SBML_dfs object for testing."""
|
13
15
|
# Create a simple species DataFrame with identifiers
|
14
16
|
s1_ids = identifiers.Identifiers(
|
@@ -39,32 +41,27 @@ def mock_sbml_dfs():
|
|
39
41
|
]
|
40
42
|
)
|
41
43
|
|
44
|
+
s3_ids = identifiers.Identifiers([])
|
45
|
+
|
42
46
|
species_df = pd.DataFrame(
|
43
|
-
{
|
47
|
+
{
|
48
|
+
SBML_DFS.S_NAME: ["gene1", "gene2", "gene3"],
|
49
|
+
SBML_DFS.S_IDENTIFIERS: [s1_ids, s2_ids, s3_ids],
|
50
|
+
}
|
44
51
|
)
|
45
52
|
|
46
|
-
#
|
47
|
-
|
48
|
-
|
49
|
-
self.species = species_df
|
50
|
-
self.schema = {"species": {"pk": "s_id", "id": SBML_DFS.S_IDENTIFIERS}}
|
51
|
-
|
52
|
-
def get_identifiers(self, table_name):
|
53
|
-
if table_name == SBML_DFS.SPECIES:
|
54
|
-
all_ids = []
|
55
|
-
for idx, row in self.species.iterrows():
|
56
|
-
for id_dict in row[SBML_DFS.S_IDENTIFIERS].ids:
|
57
|
-
all_ids.append({"s_id": idx, **id_dict})
|
58
|
-
return pd.DataFrame(all_ids)
|
59
|
-
return pd.DataFrame()
|
60
|
-
|
61
|
-
return MockSBMLDfs()
|
53
|
+
# Patch the species attribute only for the duration of the test
|
54
|
+
with patch.object(sbml_dfs, "species", new=species_df):
|
55
|
+
yield sbml_dfs # All methods are real, only .species is patched
|
62
56
|
|
63
57
|
|
64
58
|
def test_rename_species_ontologies_basic(mock_sbml_dfs):
|
65
59
|
"""Test basic alias updating functionality."""
|
66
60
|
# Define test aliases
|
67
|
-
test_aliases = {
|
61
|
+
test_aliases = {
|
62
|
+
ONTOLOGIES.NCBI_ENTREZ_GENE: {"ncbigene"},
|
63
|
+
ONTOLOGIES.UNIPROT: {"uniprot_id"},
|
64
|
+
}
|
68
65
|
|
69
66
|
# Update aliases
|
70
67
|
renaming.rename_species_ontologies(mock_sbml_dfs, test_aliases)
|
@@ -73,11 +70,18 @@ def test_rename_species_ontologies_basic(mock_sbml_dfs):
|
|
73
70
|
updated_ids = mock_sbml_dfs.get_identifiers(SBML_DFS.SPECIES)
|
74
71
|
|
75
72
|
# Check that ontologies were updated correctly
|
76
|
-
assert
|
77
|
-
assert
|
73
|
+
assert ONTOLOGIES.NCBI_ENTREZ_GENE in set(updated_ids[IDENTIFIERS.ONTOLOGY])
|
74
|
+
assert ONTOLOGIES.UNIPROT in set(updated_ids[IDENTIFIERS.ONTOLOGY])
|
78
75
|
assert "ncbigene" not in set(updated_ids[IDENTIFIERS.ONTOLOGY])
|
79
76
|
assert "uniprot_id" not in set(updated_ids[IDENTIFIERS.ONTOLOGY])
|
80
77
|
|
78
|
+
# verify that all the species have Identifiers object
|
79
|
+
for row in mock_sbml_dfs.species.itertuples():
|
80
|
+
val = getattr(row, SBML_DFS.S_IDENTIFIERS)
|
81
|
+
assert val is not None and isinstance(
|
82
|
+
val, identifiers.Identifiers
|
83
|
+
), f"Bad value: {val} in row {row}"
|
84
|
+
|
81
85
|
|
82
86
|
def test_rename_species_ontologies_no_overlap(mock_sbml_dfs):
|
83
87
|
"""Test that error is raised when no aliases overlap with data."""
|
@@ -93,7 +97,7 @@ def test_rename_species_ontologies_partial_update(mock_sbml_dfs):
|
|
93
97
|
"""Test that partial updates work correctly."""
|
94
98
|
# Define aliases that only update some ontologies
|
95
99
|
test_aliases = {
|
96
|
-
|
100
|
+
ONTOLOGIES.NCBI_ENTREZ_GENE: {"ncbigene"}
|
97
101
|
# Don't include uniprot_id mapping
|
98
102
|
}
|
99
103
|
|
@@ -104,7 +108,7 @@ def test_rename_species_ontologies_partial_update(mock_sbml_dfs):
|
|
104
108
|
updated_ids = mock_sbml_dfs.get_identifiers(SBML_DFS.SPECIES)
|
105
109
|
|
106
110
|
# Check that only ncbigene was updated
|
107
|
-
assert
|
111
|
+
assert ONTOLOGIES.NCBI_ENTREZ_GENE in set(updated_ids[IDENTIFIERS.ONTOLOGY])
|
108
112
|
assert "uniprot_id" in set(
|
109
113
|
updated_ids[IDENTIFIERS.ONTOLOGY]
|
110
114
|
) # Should remain unchanged
|
tests/test_rpy2_callr.py
CHANGED
tests/test_rpy2_init.py
CHANGED
tests/test_sbml_dfs_core.py
CHANGED
@@ -5,6 +5,7 @@ import os
|
|
5
5
|
import numpy as np
|
6
6
|
import pandas as pd
|
7
7
|
import pytest
|
8
|
+
from napistu import identifiers
|
8
9
|
from napistu import sbml_dfs_core
|
9
10
|
from napistu.source import Source
|
10
11
|
from napistu.ingestion import sbml
|
@@ -25,11 +26,13 @@ from unittest.mock import patch
|
|
25
26
|
def test_data():
|
26
27
|
"""Create test data for SBML integration tests."""
|
27
28
|
|
29
|
+
blank_id = identifiers.Identifiers([])
|
30
|
+
|
28
31
|
# Test compartments
|
29
32
|
compartments_df = pd.DataFrame(
|
30
33
|
[
|
31
|
-
{
|
32
|
-
{
|
34
|
+
{SBML_DFS.C_NAME: "nucleus", SBML_DFS.C_IDENTIFIERS: blank_id},
|
35
|
+
{SBML_DFS.C_NAME: "cytoplasm", SBML_DFS.C_IDENTIFIERS: blank_id},
|
33
36
|
]
|
34
37
|
)
|
35
38
|
|
@@ -37,14 +40,18 @@ def test_data():
|
|
37
40
|
species_df = pd.DataFrame(
|
38
41
|
[
|
39
42
|
{
|
40
|
-
|
41
|
-
|
43
|
+
SBML_DFS.S_NAME: "TP53",
|
44
|
+
SBML_DFS.S_IDENTIFIERS: blank_id,
|
42
45
|
"gene_type": "tumor_suppressor",
|
43
46
|
},
|
44
|
-
{"s_name": "MDM2", "s_Identifiers": None, "gene_type": "oncogene"},
|
45
47
|
{
|
46
|
-
|
47
|
-
|
48
|
+
SBML_DFS.S_NAME: "MDM2",
|
49
|
+
SBML_DFS.S_IDENTIFIERS: blank_id,
|
50
|
+
"gene_type": "oncogene",
|
51
|
+
},
|
52
|
+
{
|
53
|
+
SBML_DFS.S_NAME: "CDKN1A",
|
54
|
+
SBML_DFS.S_IDENTIFIERS: blank_id,
|
48
55
|
"gene_type": "cell_cycle",
|
49
56
|
},
|
50
57
|
]
|
@@ -58,10 +65,10 @@ def test_data():
|
|
58
65
|
"downstream_name": "CDKN1A",
|
59
66
|
"upstream_compartment": "nucleus",
|
60
67
|
"downstream_compartment": "nucleus",
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
68
|
+
SBML_DFS.R_NAME: "TP53_activates_CDKN1A",
|
69
|
+
SBML_DFS.SBO_TERM: "SBO:0000459",
|
70
|
+
SBML_DFS.R_IDENTIFIERS: blank_id,
|
71
|
+
SBML_DFS.R_ISREVERSIBLE: False,
|
65
72
|
"confidence": 0.95,
|
66
73
|
},
|
67
74
|
{
|
@@ -69,10 +76,10 @@ def test_data():
|
|
69
76
|
"downstream_name": "TP53",
|
70
77
|
"upstream_compartment": "cytoplasm",
|
71
78
|
"downstream_compartment": "nucleus",
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
79
|
+
SBML_DFS.R_NAME: "MDM2_inhibits_TP53",
|
80
|
+
SBML_DFS.SBO_TERM: "SBO:0000020",
|
81
|
+
SBML_DFS.R_IDENTIFIERS: blank_id,
|
82
|
+
SBML_DFS.R_ISREVERSIBLE: False,
|
76
83
|
"confidence": 0.87,
|
77
84
|
},
|
78
85
|
]
|
@@ -611,3 +618,146 @@ def test_sbml_custom_stoichiometry(test_data):
|
|
611
618
|
stoichiometries = result.reaction_species["stoichiometry"].unique()
|
612
619
|
assert 2 in stoichiometries # upstream
|
613
620
|
assert 3 in stoichiometries # downstream
|
621
|
+
|
622
|
+
|
623
|
+
def test_validate_schema_missing(minimal_valid_sbml_dfs):
|
624
|
+
"""Test validation fails when schema is missing."""
|
625
|
+
delattr(minimal_valid_sbml_dfs, "schema")
|
626
|
+
with pytest.raises(ValueError, match="No schema found"):
|
627
|
+
minimal_valid_sbml_dfs.validate()
|
628
|
+
|
629
|
+
|
630
|
+
def test_validate_table(minimal_valid_sbml_dfs):
|
631
|
+
"""Test _validate_table fails for various table structure issues."""
|
632
|
+
# Wrong index name
|
633
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
634
|
+
sbml_dfs.species.index.name = "wrong_name"
|
635
|
+
with pytest.raises(ValueError, match="the index name for species was not the pk"):
|
636
|
+
sbml_dfs.validate()
|
637
|
+
|
638
|
+
# Duplicate primary keys
|
639
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
640
|
+
duplicate_species = pd.DataFrame(
|
641
|
+
{
|
642
|
+
SBML_DFS.S_NAME: ["ATP", "ADP"],
|
643
|
+
SBML_DFS.S_IDENTIFIERS: [
|
644
|
+
identifiers.Identifiers([]),
|
645
|
+
identifiers.Identifiers([]),
|
646
|
+
],
|
647
|
+
SBML_DFS.S_SOURCE: [Source(init=True), Source(init=True)],
|
648
|
+
},
|
649
|
+
index=pd.Index(["S00001", "S00001"], name=SBML_DFS.S_ID),
|
650
|
+
)
|
651
|
+
sbml_dfs.species = duplicate_species
|
652
|
+
with pytest.raises(ValueError, match="primary keys were duplicated"):
|
653
|
+
sbml_dfs.validate()
|
654
|
+
|
655
|
+
# Missing required variables
|
656
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
657
|
+
sbml_dfs.species = sbml_dfs.species.drop(columns=[SBML_DFS.S_NAME])
|
658
|
+
with pytest.raises(ValueError, match="Missing .+ required variables for species"):
|
659
|
+
sbml_dfs.validate()
|
660
|
+
|
661
|
+
# Empty table
|
662
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
663
|
+
sbml_dfs.species = pd.DataFrame(
|
664
|
+
{
|
665
|
+
SBML_DFS.S_NAME: [],
|
666
|
+
SBML_DFS.S_IDENTIFIERS: [],
|
667
|
+
SBML_DFS.S_SOURCE: [],
|
668
|
+
},
|
669
|
+
index=pd.Index([], name=SBML_DFS.S_ID),
|
670
|
+
)
|
671
|
+
with pytest.raises(ValueError, match="species contained no entries"):
|
672
|
+
sbml_dfs.validate()
|
673
|
+
|
674
|
+
|
675
|
+
def test_check_pk_fk_correspondence(minimal_valid_sbml_dfs):
|
676
|
+
"""Test _check_pk_fk_correspondence fails for various foreign key issues."""
|
677
|
+
# Missing species reference
|
678
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
679
|
+
sbml_dfs.compartmentalized_species[SBML_DFS.S_ID] = ["S99999"]
|
680
|
+
with pytest.raises(
|
681
|
+
ValueError,
|
682
|
+
match="s_id values were found in compartmentalized_species but missing from species",
|
683
|
+
):
|
684
|
+
sbml_dfs.validate()
|
685
|
+
|
686
|
+
# Missing compartment reference
|
687
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
688
|
+
sbml_dfs.compartmentalized_species[SBML_DFS.C_ID] = ["C99999"]
|
689
|
+
with pytest.raises(
|
690
|
+
ValueError,
|
691
|
+
match="c_id values were found in compartmentalized_species but missing from compartments",
|
692
|
+
):
|
693
|
+
sbml_dfs.validate()
|
694
|
+
|
695
|
+
# Null foreign keys
|
696
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
697
|
+
sbml_dfs.compartmentalized_species[SBML_DFS.S_ID] = [None]
|
698
|
+
with pytest.raises(
|
699
|
+
ValueError, match="compartmentalized_species included missing s_id values"
|
700
|
+
):
|
701
|
+
sbml_dfs.validate()
|
702
|
+
|
703
|
+
|
704
|
+
def test_validate_reaction_species(minimal_valid_sbml_dfs):
|
705
|
+
"""Test _validate_reaction_species fails for various reaction species issues."""
|
706
|
+
# Null stoichiometry
|
707
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
708
|
+
sbml_dfs.reaction_species[SBML_DFS.STOICHIOMETRY] = [None]
|
709
|
+
with pytest.raises(ValueError, match="All reaction_species.* must be not null"):
|
710
|
+
sbml_dfs.validate()
|
711
|
+
|
712
|
+
# Null SBO terms
|
713
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
714
|
+
sbml_dfs.reaction_species[SBML_DFS.SBO_TERM] = [None]
|
715
|
+
with pytest.raises(
|
716
|
+
ValueError, match="sbo_terms were None; all terms should be defined"
|
717
|
+
):
|
718
|
+
sbml_dfs.validate()
|
719
|
+
|
720
|
+
# Invalid SBO terms
|
721
|
+
sbml_dfs = minimal_valid_sbml_dfs.copy()
|
722
|
+
sbml_dfs.reaction_species[SBML_DFS.SBO_TERM] = ["INVALID_SBO_TERM"]
|
723
|
+
with pytest.raises(ValueError, match="sbo_terms were not defined"):
|
724
|
+
sbml_dfs.validate()
|
725
|
+
|
726
|
+
|
727
|
+
def test_validate_identifiers(minimal_valid_sbml_dfs):
|
728
|
+
"""Test _validate_identifiers fails when identifiers are missing."""
|
729
|
+
minimal_valid_sbml_dfs.species[SBML_DFS.S_IDENTIFIERS] = [None]
|
730
|
+
with pytest.raises(ValueError, match="species has .+ missing ids"):
|
731
|
+
minimal_valid_sbml_dfs.validate()
|
732
|
+
|
733
|
+
|
734
|
+
def test_validate_sources(minimal_valid_sbml_dfs):
|
735
|
+
"""Test _validate_sources fails when sources are missing."""
|
736
|
+
minimal_valid_sbml_dfs.species[SBML_DFS.S_SOURCE] = [None]
|
737
|
+
with pytest.raises(ValueError, match="species has .+ missing sources"):
|
738
|
+
minimal_valid_sbml_dfs.validate()
|
739
|
+
|
740
|
+
|
741
|
+
def test_validate_species_data(minimal_valid_sbml_dfs):
|
742
|
+
"""Test _validate_species_data fails when species_data has invalid structure."""
|
743
|
+
invalid_data = pd.DataFrame(
|
744
|
+
{"extra_info": ["test"]}, index=pd.Index(["S99999"], name=SBML_DFS.S_ID)
|
745
|
+
) # Non-existent species
|
746
|
+
minimal_valid_sbml_dfs.species_data["invalid"] = invalid_data
|
747
|
+
with pytest.raises(ValueError, match="species data invalid was invalid"):
|
748
|
+
minimal_valid_sbml_dfs.validate()
|
749
|
+
|
750
|
+
|
751
|
+
def test_validate_reactions_data(minimal_valid_sbml_dfs):
|
752
|
+
"""Test _validate_reactions_data fails when reactions_data has invalid structure."""
|
753
|
+
invalid_data = pd.DataFrame(
|
754
|
+
{"extra_info": ["test"]}, index=pd.Index(["R99999"], name=SBML_DFS.R_ID)
|
755
|
+
) # Non-existent reaction
|
756
|
+
minimal_valid_sbml_dfs.reactions_data["invalid"] = invalid_data
|
757
|
+
with pytest.raises(ValueError, match="reactions data invalid was invalid"):
|
758
|
+
minimal_valid_sbml_dfs.validate()
|
759
|
+
|
760
|
+
|
761
|
+
def test_validate_passes_with_valid_data(minimal_valid_sbml_dfs):
|
762
|
+
"""Test that validation passes with completely valid data."""
|
763
|
+
minimal_valid_sbml_dfs.validate() # Should not raise any exceptions
|
tests/test_sbml_dfs_utils.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import pandas as pd
|
4
|
+
import pytest
|
4
5
|
|
5
6
|
from napistu import sbml_dfs_utils
|
6
7
|
from napistu.constants import (
|
@@ -10,6 +11,10 @@ from napistu.constants import (
|
|
10
11
|
SBML_DFS,
|
11
12
|
IDENTIFIERS,
|
12
13
|
SBOTERM_NAMES,
|
14
|
+
VALID_SBO_TERMS,
|
15
|
+
VALID_SBO_TERM_NAMES,
|
16
|
+
MINI_SBO_FROM_NAME,
|
17
|
+
MINI_SBO_TO_NAME,
|
13
18
|
)
|
14
19
|
|
15
20
|
|
@@ -219,3 +224,43 @@ def test_stubbed_compartment():
|
|
219
224
|
"url": "https://www.ebi.ac.uk/QuickGO/term/GO:0005575",
|
220
225
|
"bqb": "BQB_IS",
|
221
226
|
}
|
227
|
+
|
228
|
+
|
229
|
+
def test_validate_sbo_values_success():
|
230
|
+
# Should not raise
|
231
|
+
sbml_dfs_utils._validate_sbo_values(pd.Series(VALID_SBO_TERMS), validate="terms")
|
232
|
+
sbml_dfs_utils._validate_sbo_values(
|
233
|
+
pd.Series(VALID_SBO_TERM_NAMES), validate="names"
|
234
|
+
)
|
235
|
+
|
236
|
+
|
237
|
+
def test_validate_sbo_values_invalid_type():
|
238
|
+
with pytest.raises(ValueError, match="Invalid validation type"):
|
239
|
+
sbml_dfs_utils._validate_sbo_values(
|
240
|
+
pd.Series(VALID_SBO_TERMS), validate="badtype"
|
241
|
+
)
|
242
|
+
|
243
|
+
|
244
|
+
def test_validate_sbo_values_invalid_value():
|
245
|
+
# Add an invalid term
|
246
|
+
s = pd.Series(VALID_SBO_TERMS + ["SBO:9999999"])
|
247
|
+
with pytest.raises(ValueError, match="unusable SBO terms"):
|
248
|
+
sbml_dfs_utils._validate_sbo_values(s, validate="terms")
|
249
|
+
# Add an invalid name
|
250
|
+
s = pd.Series(VALID_SBO_TERM_NAMES + ["not_a_name"])
|
251
|
+
with pytest.raises(ValueError, match="unusable SBO terms"):
|
252
|
+
sbml_dfs_utils._validate_sbo_values(s, validate="names")
|
253
|
+
|
254
|
+
|
255
|
+
def test_sbo_constants_internal_consistency():
|
256
|
+
# Every term should have a name and vice versa
|
257
|
+
# MINI_SBO_FROM_NAME: name -> term, MINI_SBO_TO_NAME: term -> name
|
258
|
+
terms_from_names = set(MINI_SBO_FROM_NAME.values())
|
259
|
+
names_from_terms = set(MINI_SBO_TO_NAME.values())
|
260
|
+
assert terms_from_names == set(VALID_SBO_TERMS)
|
261
|
+
assert names_from_terms == set(VALID_SBO_TERM_NAMES)
|
262
|
+
# Bijective mapping
|
263
|
+
for name, term in MINI_SBO_FROM_NAME.items():
|
264
|
+
assert MINI_SBO_TO_NAME[term] == name
|
265
|
+
for term, name in MINI_SBO_TO_NAME.items():
|
266
|
+
assert MINI_SBO_FROM_NAME[name] == term
|
tests/test_utils.py
CHANGED
@@ -2,9 +2,10 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import gzip
|
4
4
|
import os
|
5
|
+
import tempfile
|
5
6
|
from datetime import datetime
|
6
|
-
from
|
7
|
-
from unittest.mock import patch
|
7
|
+
from pathlib import Path
|
8
|
+
from unittest.mock import Mock, patch
|
8
9
|
|
9
10
|
import numpy as np
|
10
11
|
import pandas as pd
|
@@ -686,3 +687,45 @@ def test_safe_fill():
|
|
686
687
|
"a_very_long\nstringggg",
|
687
688
|
"",
|
688
689
|
]
|
690
|
+
|
691
|
+
|
692
|
+
def test_update_pathological_names():
|
693
|
+
|
694
|
+
# All numeric
|
695
|
+
s = pd.Series(["1", "2", "3"])
|
696
|
+
out = utils.update_pathological_names(s, "prefix_")
|
697
|
+
assert all(x.startswith("prefix_") for x in out)
|
698
|
+
assert list(out) == ["prefix_1", "prefix_2", "prefix_3"]
|
699
|
+
|
700
|
+
# Mixed numeric and non-numeric
|
701
|
+
s2 = pd.Series(["1", "foo", "3"])
|
702
|
+
out2 = utils.update_pathological_names(s2, "prefix_")
|
703
|
+
assert list(out2) == ["1", "foo", "3"]
|
704
|
+
|
705
|
+
# All non-numeric
|
706
|
+
s3 = pd.Series(["foo", "bar", "baz"])
|
707
|
+
out3 = utils.update_pathological_names(s3, "prefix_")
|
708
|
+
assert list(out3) == ["foo", "bar", "baz"]
|
709
|
+
|
710
|
+
|
711
|
+
def test_parquet_save_load():
|
712
|
+
"""Test that write_parquet and read_parquet work correctly."""
|
713
|
+
# Create test DataFrame
|
714
|
+
original_df = pd.DataFrame(
|
715
|
+
{
|
716
|
+
"sc_id_origin": ["A", "B", "C"],
|
717
|
+
"sc_id_dest": ["B", "C", "A"],
|
718
|
+
"path_length": [1, 2, 3],
|
719
|
+
"path_weights": [0.1, 0.5, 0.8],
|
720
|
+
"has_connection": [True, False, True],
|
721
|
+
}
|
722
|
+
)
|
723
|
+
|
724
|
+
# Write and read using temporary file
|
725
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
726
|
+
file_path = Path(temp_dir) / "test.parquet"
|
727
|
+
utils.save_parquet(original_df, file_path)
|
728
|
+
result_df = utils.load_parquet(file_path)
|
729
|
+
|
730
|
+
# Verify they're identical
|
731
|
+
pd.testing.assert_frame_equal(original_df, result_df)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|