napistu 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/constants.py +4 -10
- napistu/network/ig_utils.py +35 -0
- napistu/network/precompute.py +11 -4
- napistu/sbml_dfs_core.py +748 -543
- napistu/source.py +1 -1
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/METADATA +2 -2
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/RECORD +15 -15
- tests/test_network_ig_utils.py +36 -0
- tests/test_network_precompute.py +4 -1
- tests/test_sbml_dfs_core.py +220 -1
- tests/test_sbml_dfs_utils.py +47 -6
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/WHEEL +0 -0
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/top_level.txt +0 -0
napistu/source.py
CHANGED
@@ -223,7 +223,7 @@ def greedy_set_coverge_of_sources(
|
|
223
223
|
Greedy Set Coverage of Sources
|
224
224
|
|
225
225
|
Apply the greedy set coverge algorithm to find the minimal set of
|
226
|
-
|
226
|
+
sources which cover all entries
|
227
227
|
|
228
228
|
Parameters:
|
229
229
|
source_df: pd.DataFrame
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: napistu
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.5
|
4
4
|
Summary: Connecting high-dimensional data to curated pathways
|
5
5
|
Home-page: https://github.com/napistu/napistu-py
|
6
6
|
Author: Sean Hackett
|
@@ -19,7 +19,7 @@ Requires-Dist: Jinja2
|
|
19
19
|
Requires-Dist: PyYAML<7.0.0,>=6.0.0
|
20
20
|
Requires-Dist: click<9.0.0,>=8.0.0
|
21
21
|
Requires-Dist: click-logging
|
22
|
-
Requires-Dist: fs<3.0.0,>=2.4.
|
22
|
+
Requires-Dist: fs<3.0.0,>=2.4.16
|
23
23
|
Requires-Dist: fs-gcsfs<2.0.0,>=1.5.0
|
24
24
|
Requires-Dist: igraph
|
25
25
|
Requires-Dist: matplotlib<4.0.0,>=3.5.0
|
@@ -1,12 +1,12 @@
|
|
1
1
|
napistu/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
2
2
|
napistu/__main__.py,sha256=PbzIsFAoFHNQuSyi-ql-D7tQLEOuqgmTcgk0PY-OGeU,28636
|
3
3
|
napistu/consensus.py,sha256=UbKKSLP1O46e3Rk8d_aqNlhRHeR3sZRztAgIm7-XK6Y,69960
|
4
|
-
napistu/constants.py,sha256=
|
4
|
+
napistu/constants.py,sha256=XptXQa3OKmH5hPb6PK_3cLSNcETj6jqejJYY60GV9Zc,12384
|
5
5
|
napistu/identifiers.py,sha256=wsVriQdvPllA5uvh5CiREklA2tYW2MIB14dV7CPaMVU,34003
|
6
6
|
napistu/indices.py,sha256=E_djN1XWc6l1lrFw_QnQXfZTKYTaUv8-jFPP7cHkY5A,9780
|
7
|
-
napistu/sbml_dfs_core.py,sha256=
|
7
|
+
napistu/sbml_dfs_core.py,sha256=us0Yz67ilav5dc2LY2TtXypaSdAQ7Kk55ZyGuyougzo,96605
|
8
8
|
napistu/sbml_dfs_utils.py,sha256=LJo6WWTrmnE58ZLDuibeeHk88uCdfunWdja7XxdZpps,11525
|
9
|
-
napistu/source.py,sha256=
|
9
|
+
napistu/source.py,sha256=UGpN70bqbC9gnKmM0ivSdQYim9hfzgABeXoQKzRr9oU,13646
|
10
10
|
napistu/utils.py,sha256=TcholWrFbRSu_sn9ODMA8y2YyAhekEKZjwf4S0WQNzI,33241
|
11
11
|
napistu/context/__init__.py,sha256=LQBEqipcHKK0E5UlDEg1ct-ymCs93IlUrUaH8BCevf0,242
|
12
12
|
napistu/context/discretize.py,sha256=Qq7zg46F_I-PvQIT2_pEDQV7YEtUQCxKoRvT5Gu9QsE,15052
|
@@ -60,14 +60,14 @@ napistu/modify/uncompartmentalize.py,sha256=U5X4Q7Z-YIkC8_711x3sU21vTVdv9rKfauwz
|
|
60
60
|
napistu/network/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
61
61
|
napistu/network/constants.py,sha256=fC1njZDu6in1JiaZ1-T1_fhmmkcc2HKSUUomDVyQ7Dw,5789
|
62
62
|
napistu/network/data_handling.py,sha256=mxplWwyXNrjZRN-jjWWUI9IZOqX69k8qSMDIrL9h0Og,14736
|
63
|
-
napistu/network/ig_utils.py,sha256=
|
63
|
+
napistu/network/ig_utils.py,sha256=JSlf_sZtw3DiiSIiYJ2YqJFEP4hVJMwNRox2qYTA4zY,11470
|
64
64
|
napistu/network/napistu_graph_core.py,sha256=2NbjiLcDcFWFyX1MuN17pobPDgoQFtcYWOwuPSFTT4g,10429
|
65
65
|
napistu/network/neighborhoods.py,sha256=Q9HWUvf_J4a_4RQDKd7ywEy4cp3Wq2OPOfVsotDbEe0,56098
|
66
66
|
napistu/network/net_create.py,sha256=2N5ocGmibdBxIUVtv3H36iFWwkbys9ECCERFRlByhLc,69407
|
67
67
|
napistu/network/net_propagation.py,sha256=89ZR4p2mGpkCCIemofZ53XbUjQsuNABxIc6UmF8A5n8,4935
|
68
68
|
napistu/network/ng_utils.py,sha256=ijWDa5MTuULJpdV6dcVFGmLmtB_xy87jaUG7F5nvC_k,15240
|
69
69
|
napistu/network/paths.py,sha256=S4ZaV0yVmI-o8sXfom5eXA3yy2IEbleYUyXEvnmVw98,17468
|
70
|
-
napistu/network/precompute.py,sha256=
|
70
|
+
napistu/network/precompute.py,sha256=pIXCCE6Mf6HY8o-fiwUaOxvQ_9_mevK0vaC8fND4RZk,9141
|
71
71
|
napistu/ontologies/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
72
72
|
napistu/ontologies/constants.py,sha256=GyOFvezSxDK1VigATcruTKtNhjcYaid1ggulEf_HEtQ,4345
|
73
73
|
napistu/ontologies/dogma.py,sha256=jGZS-J3d29AoUOow-HVjfVZQJ87lnqO5L1aozieN1ec,8825
|
@@ -81,7 +81,7 @@ napistu/rpy2/rids.py,sha256=AfXLTfTdonfspgAHYO0Ph7jSUWv8YuyT8x3fyLfAqc8,3413
|
|
81
81
|
napistu/scverse/__init__.py,sha256=Lgxr3iMQAkTzXE9BNz93CndNP5djzerLvmHM-D0PU3I,357
|
82
82
|
napistu/scverse/constants.py,sha256=0iAkhyJUIeFGHdLLU3fCaEU1O3Oix4qAsxr3CxGTjVs,653
|
83
83
|
napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27016
|
84
|
-
napistu-0.3.
|
84
|
+
napistu-0.3.5.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
|
85
85
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
86
86
|
tests/conftest.py,sha256=XVkd0tQywhnf2cgab7fIjBo3NlaTVX3cO8HaRS2jIwM,3190
|
87
87
|
tests/test_consensus.py,sha256=3dJvvPsPG7bHbw_FY4Pm647N_Gt_Ud9157OKYfPCUd4,9502
|
@@ -102,13 +102,13 @@ tests/test_mcp_config.py,sha256=GTu9vywqAHTYkolywdYS_BEIW3gBzs4A4qcneMSPpRk,7007
|
|
102
102
|
tests/test_mcp_documentation_utils.py,sha256=OW0N2N_2IOktbYTcCWhhWz4bANi8IB60l1q3DJi8Ra4,810
|
103
103
|
tests/test_mcp_server.py,sha256=bP3PWVQsEfX6-lAgXKP32njdg__o65n2WuLvkxTTHkQ,11215
|
104
104
|
tests/test_network_data_handling.py,sha256=oBSZuB3IRG9bwmD6n8FY-UZLe2UqGzXpNSxVtkHRSvE,12605
|
105
|
-
tests/test_network_ig_utils.py,sha256=
|
105
|
+
tests/test_network_ig_utils.py,sha256=Buoh570mNm5pcac3Hf6f3pevCjWfBwPfKuD8IkDLg58,2120
|
106
106
|
tests/test_network_neighborhoods.py,sha256=8BV17m5X1OUd5FwasTTYUOkNYUHDPUkxOKH_VZCsyBE,631
|
107
107
|
tests/test_network_net_create.py,sha256=VNFZTwQawAZQPDnVk_qFevgZErx5KyQZ24bMoZF4T4w,16462
|
108
108
|
tests/test_network_net_propagation.py,sha256=9pKkUdduWejH4iKNCJXKFzAkdNpCfrMbiUWySgI_LH4,3244
|
109
109
|
tests/test_network_ng_utils.py,sha256=CwDw4MKTPhVZXz2HA2XU2QjjBv8CXc1_yQ0drvkBkFw,724
|
110
110
|
tests/test_network_paths.py,sha256=TWZnxY5bF3m6gahcxcYJGrBIawh2-_vUcec1LyPmXV8,1686
|
111
|
-
tests/test_network_precompute.py,sha256=
|
111
|
+
tests/test_network_precompute.py,sha256=xMGmZI9DxcWhJxuP7GCZEqtmcOvDRNK2LSia0x94v0U,9018
|
112
112
|
tests/test_ontologies_genodexito.py,sha256=hBlunyEPiKskqagjWKW5Z6DJwKvpueYHJLwbfyeeAdo,2256
|
113
113
|
tests/test_ontologies_mygene.py,sha256=BuBLm8VatzpK39-Ew_fFTK9ueLE4eqmKIDS5UKE59n8,1541
|
114
114
|
tests/test_ontologies_renaming.py,sha256=k7bQzP24zG7W3fpULwk1me2sOWEWlxylr4Mhx1_gJJY,3740
|
@@ -116,8 +116,8 @@ tests/test_pathwayannot.py,sha256=bceosccNy9tgxQei_7j7ATBSSvBSxOngJvK-mAzR_K0,33
|
|
116
116
|
tests/test_rpy2_callr.py,sha256=UVzXMvYN3wcc-ikDIjH2sA4BqkbwiNbMm561BcbnbD4,2936
|
117
117
|
tests/test_rpy2_init.py,sha256=APrNt9GEQV9va3vU5k250TxFplAoWFc-FJRFhM2GcDk,5927
|
118
118
|
tests/test_sbml.py,sha256=f25zj1NogYrmLluvBDboLameTuCiQ309433Qn3iPvhg,1483
|
119
|
-
tests/test_sbml_dfs_core.py,sha256=
|
120
|
-
tests/test_sbml_dfs_utils.py,sha256=
|
119
|
+
tests/test_sbml_dfs_core.py,sha256=MtmQp9EZD4OFmk-5tWUSb9hUjcGfRKTFbSBymiE_Qzo,23628
|
120
|
+
tests/test_sbml_dfs_utils.py,sha256=5lNzZ1NLOnFb_sZ0YWTgLzXy28yGNCtS_H8Q-W-T6Bw,2022
|
121
121
|
tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
|
122
122
|
tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
|
123
123
|
tests/test_set_coverage.py,sha256=J-6m6LuOjcQa9pxRuWglSfJk4Ltm7kt_eOrn_Q-7P6Q,1604
|
@@ -126,8 +126,8 @@ tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRa
|
|
126
126
|
tests/test_utils.py,sha256=JRJFmjDNZpjG59a-73JkTyGqa_a7Z8d0fE2cZt0CRII,22580
|
127
127
|
tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
|
128
128
|
tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
129
|
-
napistu-0.3.
|
130
|
-
napistu-0.3.
|
131
|
-
napistu-0.3.
|
132
|
-
napistu-0.3.
|
133
|
-
napistu-0.3.
|
129
|
+
napistu-0.3.5.dist-info/METADATA,sha256=Dla6z2cQaQt3eR7HeUfSNOcqOn4uWY3vYtYp3gKEBl4,3414
|
130
|
+
napistu-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
131
|
+
napistu-0.3.5.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
|
132
|
+
napistu-0.3.5.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
|
133
|
+
napistu-0.3.5.dist-info/RECORD,,
|
tests/test_network_ig_utils.py
CHANGED
@@ -6,6 +6,15 @@ from napistu.network import ig_utils
|
|
6
6
|
from napistu.network import net_create
|
7
7
|
|
8
8
|
|
9
|
+
@pytest.fixture
|
10
|
+
def multi_component_graph() -> ig_utils.ig.Graph:
|
11
|
+
"""Creates a graph with multiple disconnected components of different sizes."""
|
12
|
+
g1 = ig_utils.ig.Graph.Ring(5) # 5 vertices, 5 edges
|
13
|
+
g2 = ig_utils.ig.Graph.Tree(3, 2) # 3 vertices, 2 edges
|
14
|
+
g3 = ig_utils.ig.Graph.Full(2) # 2 vertices, 1 edge
|
15
|
+
return ig_utils.ig.disjoint_union([g1, g2, g3])
|
16
|
+
|
17
|
+
|
9
18
|
def test_validate_graph_attributes(sbml_dfs):
|
10
19
|
|
11
20
|
napistu_graph = net_create.process_napistu_graph(
|
@@ -21,3 +30,30 @@ def test_validate_graph_attributes(sbml_dfs):
|
|
21
30
|
assert ig_utils.validate_vertex_attributes(napistu_graph, "node_type") is None
|
22
31
|
with pytest.raises(ValueError):
|
23
32
|
ig_utils.validate_vertex_attributes(napistu_graph, "baz")
|
33
|
+
|
34
|
+
|
35
|
+
def test_filter_to_largest_subgraph(multi_component_graph):
|
36
|
+
"""Tests that the function returns only the single largest component."""
|
37
|
+
largest = ig_utils.filter_to_largest_subgraph(multi_component_graph)
|
38
|
+
assert isinstance(largest, ig_utils.ig.Graph)
|
39
|
+
assert largest.vcount() == 5
|
40
|
+
assert largest.ecount() == 5
|
41
|
+
|
42
|
+
|
43
|
+
def test_filter_to_largest_subgraphs(multi_component_graph):
|
44
|
+
"""Tests that the function returns the top K largest components."""
|
45
|
+
# Test getting the top 2
|
46
|
+
top_2 = ig_utils.filter_to_largest_subgraphs(multi_component_graph, top_k=2)
|
47
|
+
assert isinstance(top_2, list)
|
48
|
+
assert len(top_2) == 2
|
49
|
+
assert all(isinstance(g, ig_utils.ig.Graph) for g in top_2)
|
50
|
+
assert [g.vcount() for g in top_2] == [5, 3]
|
51
|
+
|
52
|
+
# Test getting more than the total number of components
|
53
|
+
top_5 = ig_utils.filter_to_largest_subgraphs(multi_component_graph, top_k=5)
|
54
|
+
assert len(top_5) == 3
|
55
|
+
assert [g.vcount() for g in top_5] == [5, 3, 2]
|
56
|
+
|
57
|
+
# Test invalid top_k
|
58
|
+
with pytest.raises(ValueError):
|
59
|
+
ig_utils.filter_to_largest_subgraphs(multi_component_graph, top_k=0)
|
tests/test_network_precompute.py
CHANGED
@@ -179,8 +179,11 @@ def test_precomputed_distances_neighborhoods():
|
|
179
179
|
right_on=join_key,
|
180
180
|
how="outer",
|
181
181
|
)
|
182
|
-
.fillna(False)
|
183
182
|
)
|
183
|
+
for col in ["in_precompute", "in_otf"]:
|
184
|
+
neighbor_comparison[col] = (
|
185
|
+
neighbor_comparison[col].astype("boolean").fillna(False)
|
186
|
+
)
|
184
187
|
comparison_l.append(neighbor_comparison.assign(focal_sc_id=key))
|
185
188
|
|
186
189
|
comparison_df = pd.concat(comparison_l)
|
tests/test_sbml_dfs_core.py
CHANGED
@@ -6,14 +6,83 @@ import numpy as np
|
|
6
6
|
import pandas as pd
|
7
7
|
import pytest
|
8
8
|
from napistu import sbml_dfs_core
|
9
|
+
from napistu.source import Source
|
9
10
|
from napistu.ingestion import sbml
|
10
11
|
from napistu.modify import pathwayannot
|
12
|
+
from napistu.sbml_dfs_utils import _stub_ids
|
11
13
|
|
12
14
|
from napistu import identifiers as napistu_identifiers
|
13
|
-
from napistu.constants import
|
15
|
+
from napistu.constants import (
|
16
|
+
SBML_DFS,
|
17
|
+
SBOTERM_NAMES,
|
18
|
+
BQB_DEFINING_ATTRS,
|
19
|
+
BQB_DEFINING_ATTRS_LOOSE,
|
20
|
+
BQB,
|
21
|
+
IDENTIFIERS,
|
22
|
+
)
|
14
23
|
from napistu.sbml_dfs_core import SBML_dfs
|
15
24
|
|
16
25
|
|
26
|
+
@pytest.fixture
|
27
|
+
def test_data():
|
28
|
+
"""Create test data for SBML integration tests."""
|
29
|
+
|
30
|
+
# Test compartments
|
31
|
+
compartments_df = pd.DataFrame(
|
32
|
+
[
|
33
|
+
{"c_name": "nucleus", "c_Identifiers": _stub_ids([])},
|
34
|
+
{"c_name": "cytoplasm", "c_Identifiers": _stub_ids([])},
|
35
|
+
]
|
36
|
+
)
|
37
|
+
|
38
|
+
# Test species with extra data
|
39
|
+
species_df = pd.DataFrame(
|
40
|
+
[
|
41
|
+
{
|
42
|
+
"s_name": "TP53",
|
43
|
+
"s_Identifiers": _stub_ids([]),
|
44
|
+
"gene_type": "tumor_suppressor",
|
45
|
+
},
|
46
|
+
{"s_name": "MDM2", "s_Identifiers": _stub_ids([]), "gene_type": "oncogene"},
|
47
|
+
{
|
48
|
+
"s_name": "CDKN1A",
|
49
|
+
"s_Identifiers": _stub_ids([]),
|
50
|
+
"gene_type": "cell_cycle",
|
51
|
+
},
|
52
|
+
]
|
53
|
+
)
|
54
|
+
|
55
|
+
# Test interactions with extra data
|
56
|
+
interaction_edgelist = pd.DataFrame(
|
57
|
+
[
|
58
|
+
{
|
59
|
+
"upstream_name": "TP53",
|
60
|
+
"downstream_name": "CDKN1A",
|
61
|
+
"upstream_compartment": "nucleus",
|
62
|
+
"downstream_compartment": "nucleus",
|
63
|
+
"r_name": "TP53_activates_CDKN1A",
|
64
|
+
"sbo_term": "SBO:0000459",
|
65
|
+
"r_Identifiers": _stub_ids([]),
|
66
|
+
"r_isreversible": False,
|
67
|
+
"confidence": 0.95,
|
68
|
+
},
|
69
|
+
{
|
70
|
+
"upstream_name": "MDM2",
|
71
|
+
"downstream_name": "TP53",
|
72
|
+
"upstream_compartment": "cytoplasm",
|
73
|
+
"downstream_compartment": "nucleus",
|
74
|
+
"r_name": "MDM2_inhibits_TP53",
|
75
|
+
"sbo_term": "SBO:0000020",
|
76
|
+
"r_Identifiers": _stub_ids([]),
|
77
|
+
"r_isreversible": False,
|
78
|
+
"confidence": 0.87,
|
79
|
+
},
|
80
|
+
]
|
81
|
+
)
|
82
|
+
|
83
|
+
return [interaction_edgelist, species_df, compartments_df, Source(init=True)]
|
84
|
+
|
85
|
+
|
17
86
|
def test_drop_cofactors(sbml_dfs):
|
18
87
|
starting_rscs = sbml_dfs.reaction_species.shape[0]
|
19
88
|
reduced_dfs = pathwayannot.drop_cofactors(sbml_dfs)
|
@@ -493,3 +562,153 @@ def test_remove_entity_data_nonexistent(sbml_dfs_w_data, caplog):
|
|
493
562
|
|
494
563
|
# Validate the model is still valid
|
495
564
|
sbml_dfs_w_data.validate()
|
565
|
+
|
566
|
+
|
567
|
+
def test_filter_to_characteristic_species_ids():
|
568
|
+
|
569
|
+
species_ids_dict = {
|
570
|
+
SBML_DFS.S_ID: ["large_complex"] * 6
|
571
|
+
+ ["small_complex"] * 2
|
572
|
+
+ ["proteinA", "proteinB"]
|
573
|
+
+ ["proteinC"] * 3
|
574
|
+
+ [
|
575
|
+
"promiscuous_complexA",
|
576
|
+
"promiscuous_complexB",
|
577
|
+
"promiscuous_complexC",
|
578
|
+
"promiscuous_complexD",
|
579
|
+
"promiscuous_complexE",
|
580
|
+
],
|
581
|
+
IDENTIFIERS.ONTOLOGY: ["complexportal"]
|
582
|
+
+ ["HGNC"] * 7
|
583
|
+
+ ["GO"] * 2
|
584
|
+
+ ["ENSG", "ENSP", "pubmed"]
|
585
|
+
+ ["HGNC"] * 5,
|
586
|
+
IDENTIFIERS.IDENTIFIER: [
|
587
|
+
"CPX-BIG",
|
588
|
+
"mem1",
|
589
|
+
"mem2",
|
590
|
+
"mem3",
|
591
|
+
"mem4",
|
592
|
+
"mem5",
|
593
|
+
"part1",
|
594
|
+
"part2",
|
595
|
+
"GO:1",
|
596
|
+
"GO:2",
|
597
|
+
"dna_seq",
|
598
|
+
"protein_seq",
|
599
|
+
"my_cool_pub",
|
600
|
+
]
|
601
|
+
+ ["promiscuous_complex"] * 5,
|
602
|
+
IDENTIFIERS.BQB: [BQB.IS]
|
603
|
+
+ [BQB.HAS_PART] * 7
|
604
|
+
+ [BQB.IS] * 2
|
605
|
+
+ [
|
606
|
+
# these are retained if BQB_DEFINING_ATTRS_LOOSE is used
|
607
|
+
BQB.ENCODES,
|
608
|
+
BQB.IS_ENCODED_BY,
|
609
|
+
# this should always be removed
|
610
|
+
BQB.IS_DESCRIBED_BY,
|
611
|
+
]
|
612
|
+
+ [BQB.HAS_PART] * 5,
|
613
|
+
}
|
614
|
+
|
615
|
+
species_ids = pd.DataFrame(species_ids_dict)
|
616
|
+
|
617
|
+
characteristic_ids_narrow = sbml_dfs_core.filter_to_characteristic_species_ids(
|
618
|
+
species_ids,
|
619
|
+
defining_biological_qualifiers=BQB_DEFINING_ATTRS,
|
620
|
+
max_complex_size=4,
|
621
|
+
max_promiscuity=4,
|
622
|
+
)
|
623
|
+
|
624
|
+
EXPECTED_IDS = ["CPX-BIG", "GO:1", "GO:2", "part1", "part2"]
|
625
|
+
assert characteristic_ids_narrow[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
|
626
|
+
|
627
|
+
characteristic_ids_loose = sbml_dfs_core.filter_to_characteristic_species_ids(
|
628
|
+
species_ids,
|
629
|
+
# include encodes and is_encoded_by as equivalent to is
|
630
|
+
defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
|
631
|
+
max_complex_size=4,
|
632
|
+
# expand promiscuity to default value
|
633
|
+
max_promiscuity=20,
|
634
|
+
)
|
635
|
+
|
636
|
+
EXPECTED_IDS = [
|
637
|
+
"CPX-BIG",
|
638
|
+
"GO:1",
|
639
|
+
"GO:2",
|
640
|
+
"dna_seq",
|
641
|
+
"protein_seq",
|
642
|
+
"part1",
|
643
|
+
"part2",
|
644
|
+
] + ["promiscuous_complex"] * 5
|
645
|
+
assert characteristic_ids_loose[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
|
646
|
+
|
647
|
+
|
648
|
+
def test_sbml_basic_functionality(test_data):
|
649
|
+
"""Test basic SBML_dfs creation from edgelist."""
|
650
|
+
interaction_edgelist, species_df, compartments_df, interaction_source = test_data
|
651
|
+
|
652
|
+
result = sbml_dfs_core.sbml_dfs_from_edgelist(
|
653
|
+
interaction_edgelist, species_df, compartments_df, interaction_source
|
654
|
+
)
|
655
|
+
|
656
|
+
assert isinstance(result, SBML_dfs)
|
657
|
+
assert len(result.species) == 3
|
658
|
+
assert len(result.compartments) == 2
|
659
|
+
assert len(result.reactions) == 2
|
660
|
+
assert (
|
661
|
+
len(result.compartmentalized_species) == 3
|
662
|
+
) # TP53[nucleus], CDKN1A[nucleus], MDM2[cytoplasm]
|
663
|
+
assert len(result.reaction_species) == 4 # 2 reactions * 2 species each
|
664
|
+
|
665
|
+
|
666
|
+
def test_sbml_extra_data_preservation(test_data):
|
667
|
+
"""Test that extra columns are preserved when requested."""
|
668
|
+
interaction_edgelist, species_df, compartments_df, interaction_source = test_data
|
669
|
+
|
670
|
+
result = sbml_dfs_core.sbml_dfs_from_edgelist(
|
671
|
+
interaction_edgelist,
|
672
|
+
species_df,
|
673
|
+
compartments_df,
|
674
|
+
interaction_source,
|
675
|
+
keep_species_data=True,
|
676
|
+
keep_reactions_data="experiment",
|
677
|
+
)
|
678
|
+
|
679
|
+
assert hasattr(result, "species_data")
|
680
|
+
assert hasattr(result, "reactions_data")
|
681
|
+
assert "gene_type" in result.species_data["source"].columns
|
682
|
+
assert "confidence" in result.reactions_data["experiment"].columns
|
683
|
+
|
684
|
+
|
685
|
+
def test_sbml_compartmentalized_naming(test_data):
|
686
|
+
"""Test compartmentalized species naming convention."""
|
687
|
+
interaction_edgelist, species_df, compartments_df, interaction_source = test_data
|
688
|
+
|
689
|
+
result = sbml_dfs_core.sbml_dfs_from_edgelist(
|
690
|
+
interaction_edgelist, species_df, compartments_df, interaction_source
|
691
|
+
)
|
692
|
+
|
693
|
+
comp_names = result.compartmentalized_species["sc_name"].tolist()
|
694
|
+
assert "TP53 [nucleus]" in comp_names
|
695
|
+
assert "MDM2 [cytoplasm]" in comp_names
|
696
|
+
assert "CDKN1A [nucleus]" in comp_names
|
697
|
+
|
698
|
+
|
699
|
+
def test_sbml_custom_stoichiometry(test_data):
|
700
|
+
"""Test custom stoichiometry parameters."""
|
701
|
+
interaction_edgelist, species_df, compartments_df, interaction_source = test_data
|
702
|
+
|
703
|
+
result = sbml_dfs_core.sbml_dfs_from_edgelist(
|
704
|
+
interaction_edgelist,
|
705
|
+
species_df,
|
706
|
+
compartments_df,
|
707
|
+
interaction_source,
|
708
|
+
upstream_stoichiometry=2,
|
709
|
+
downstream_stoichiometry=3,
|
710
|
+
)
|
711
|
+
|
712
|
+
stoichiometries = result.reaction_species["stoichiometry"].unique()
|
713
|
+
assert 2 in stoichiometries # upstream
|
714
|
+
assert 3 in stoichiometries # downstream
|
tests/test_sbml_dfs_utils.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import pandas as pd
|
4
|
+
|
3
5
|
from napistu import sbml_dfs_utils
|
6
|
+
from napistu.constants import BQB, BQB_DEFINING_ATTRS, BQB_DEFINING_ATTRS_LOOSE
|
4
7
|
|
5
8
|
|
6
9
|
def test_id_formatter():
|
@@ -14,9 +17,47 @@ def test_id_formatter():
|
|
14
17
|
assert list(input_vals) == inv_ids
|
15
18
|
|
16
19
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
def test_get_characteristic_species_ids():
|
21
|
+
"""
|
22
|
+
Test get_characteristic_species_ids function with both dogmatic and non-dogmatic cases.
|
23
|
+
"""
|
24
|
+
# Create mock species identifiers data
|
25
|
+
mock_species_ids = pd.DataFrame(
|
26
|
+
{
|
27
|
+
"s_id": ["s1", "s2", "s3", "s4", "s5"],
|
28
|
+
"identifier": ["P12345", "CHEBI:15377", "GO:12345", "P67890", "P67890"],
|
29
|
+
"ontology": ["uniprot", "chebi", "go", "uniprot", "chebi"],
|
30
|
+
"bqb": [
|
31
|
+
"BQB_IS",
|
32
|
+
"BQB_IS",
|
33
|
+
"BQB_HAS_PART",
|
34
|
+
"BQB_HAS_VERSION",
|
35
|
+
"BQB_ENCODES",
|
36
|
+
],
|
37
|
+
}
|
38
|
+
)
|
39
|
+
|
40
|
+
# Create mock SBML_dfs object
|
41
|
+
class MockSBML_dfs:
|
42
|
+
def get_identifiers(self, entity_type):
|
43
|
+
return mock_species_ids
|
44
|
+
|
45
|
+
mock_sbml = MockSBML_dfs()
|
46
|
+
|
47
|
+
# Test dogmatic case (default)
|
48
|
+
expected_bqbs = BQB_DEFINING_ATTRS + [BQB.HAS_PART] # noqa: F841
|
49
|
+
dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(mock_sbml)
|
50
|
+
expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
51
|
+
|
52
|
+
pd.testing.assert_frame_equal(dogmatic_result, expected_dogmatic, check_like=True)
|
53
|
+
|
54
|
+
# Test non-dogmatic case
|
55
|
+
expected_bqbs = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART] # noqa: F841
|
56
|
+
non_dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(
|
57
|
+
mock_sbml, dogmatic=False
|
58
|
+
)
|
59
|
+
expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
60
|
+
|
61
|
+
pd.testing.assert_frame_equal(
|
62
|
+
non_dogmatic_result, expected_non_dogmatic, check_like=True
|
63
|
+
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|