napistu 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/constants.py +0 -6
- napistu/network/precompute.py +10 -4
- napistu/sbml_dfs_core.py +41 -37
- {napistu-0.3.3.dist-info → napistu-0.3.4.dist-info}/METADATA +2 -2
- {napistu-0.3.3.dist-info → napistu-0.3.4.dist-info}/RECORD +12 -12
- tests/test_network_precompute.py +4 -1
- tests/test_sbml_dfs_core.py +89 -1
- tests/test_sbml_dfs_utils.py +47 -6
- {napistu-0.3.3.dist-info → napistu-0.3.4.dist-info}/WHEEL +0 -0
- {napistu-0.3.3.dist-info → napistu-0.3.4.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.3.dist-info → napistu-0.3.4.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.3.dist-info → napistu-0.3.4.dist-info}/top_level.txt +0 -0
napistu/constants.py
CHANGED
@@ -403,12 +403,6 @@ ONTOLOGIES = SimpleNamespace(
|
|
403
403
|
|
404
404
|
ONTOLOGIES_LIST = list(ONTOLOGIES.__dict__.values())
|
405
405
|
|
406
|
-
CHARACTERISTIC_COMPLEX_ONTOLOGIES = [
|
407
|
-
ONTOLOGIES.ENSEMBL_GENE,
|
408
|
-
ONTOLOGIES.NCBI_ENTREZ_GENE,
|
409
|
-
ONTOLOGIES.MIRBASE,
|
410
|
-
]
|
411
|
-
|
412
406
|
ONTOLOGY_SPECIES_ALIASES = {
|
413
407
|
ONTOLOGIES.NCBI_ENTREZ_GENE: {"ncbigene", "ncbi_gene"},
|
414
408
|
ONTOLOGIES.ENSEMBL_GENE: {"ensembl_gene_id"},
|
napistu/network/precompute.py
CHANGED
@@ -120,7 +120,7 @@ def save_precomputed_distances(
|
|
120
120
|
OSError
|
121
121
|
If the file cannot be written to (permission issues, etc.)
|
122
122
|
"""
|
123
|
-
save_json(str(uri), precomputed_distances.
|
123
|
+
save_json(str(uri), precomputed_distances.to_json())
|
124
124
|
|
125
125
|
|
126
126
|
def load_precomputed_distances(uri: Union[str, Path]) -> pd.DataFrame:
|
@@ -143,12 +143,18 @@ def load_precomputed_distances(uri: Union[str, Path]) -> pd.DataFrame:
|
|
143
143
|
If the specified file does not exist
|
144
144
|
"""
|
145
145
|
try:
|
146
|
-
|
146
|
+
json_string = load_json(str(uri))
|
147
|
+
df = pd.read_json(json_string)
|
148
|
+
|
149
|
+
# Convert integer columns to float
|
150
|
+
for col in df.columns:
|
151
|
+
if df[col].dtype in ["int64", "int32", "int16", "int8"]:
|
152
|
+
df[col] = df[col].astype(float)
|
153
|
+
|
154
|
+
return df
|
147
155
|
except ResourceNotFound as e:
|
148
156
|
raise FileNotFoundError(f"File not found: {uri}") from e
|
149
157
|
|
150
|
-
return pd.DataFrame.from_dict(data_dict, orient="index").rename(index=int)
|
151
|
-
|
152
158
|
|
153
159
|
def _calculate_distances_subset(
|
154
160
|
napistu_graph: NapistuGraph,
|
napistu/sbml_dfs_core.py
CHANGED
@@ -32,7 +32,6 @@ from napistu.constants import SBOTERM_NAMES
|
|
32
32
|
from napistu.constants import SBO_ROLES_DEFS
|
33
33
|
from napistu.constants import ENTITIES_W_DATA
|
34
34
|
from napistu.constants import ENTITIES_TO_ENTITY_DATA
|
35
|
-
from napistu.constants import CHARACTERISTIC_COMPLEX_ONTOLOGIES
|
36
35
|
from napistu.ingestion.constants import GENERIC_COMPARTMENT
|
37
36
|
from napistu.ingestion.constants import COMPARTMENT_ALIASES
|
38
37
|
from napistu.ingestion.constants import COMPARTMENTS_GO_TERMS
|
@@ -1471,12 +1470,6 @@ def filter_to_characteristic_species_ids(
|
|
1471
1470
|
# add components within modestly sized protein complexes
|
1472
1471
|
# look at HAS_PART IDs
|
1473
1472
|
bqb_has_parts_species = species_ids[species_ids[IDENTIFIERS.BQB] == BQB.HAS_PART]
|
1474
|
-
# filter to genes
|
1475
|
-
bqb_has_parts_species = bqb_has_parts_species[
|
1476
|
-
bqb_has_parts_species[IDENTIFIERS.ONTOLOGY].isin(
|
1477
|
-
CHARACTERISTIC_COMPLEX_ONTOLOGIES
|
1478
|
-
)
|
1479
|
-
]
|
1480
1473
|
|
1481
1474
|
# number of species in a complex
|
1482
1475
|
n_species_components = bqb_has_parts_species.value_counts(
|
@@ -1488,38 +1481,10 @@ def filter_to_characteristic_species_ids(
|
|
1488
1481
|
].index.get_level_values(SBML_DFS.S_ID)
|
1489
1482
|
)
|
1490
1483
|
|
1491
|
-
|
1492
|
-
|
1493
|
-
[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER]
|
1494
|
-
)
|
1495
|
-
promiscuous_component_identifiers_index = n_complexes_involvedin[
|
1496
|
-
n_complexes_involvedin > max_promiscuity
|
1497
|
-
].index
|
1498
|
-
promiscuous_component_identifiers = pd.Series(
|
1499
|
-
data=[True] * len(promiscuous_component_identifiers_index),
|
1500
|
-
index=promiscuous_component_identifiers_index,
|
1501
|
-
name="is_shared_component",
|
1502
|
-
dtype=bool,
|
1503
|
-
)
|
1504
|
-
|
1505
|
-
if len(promiscuous_component_identifiers) == 0:
|
1506
|
-
# no complexes to filter
|
1507
|
-
return species_ids
|
1508
|
-
|
1509
|
-
filtered_bqb_has_parts = bqb_has_parts_species.merge(
|
1510
|
-
promiscuous_component_identifiers,
|
1511
|
-
left_on=[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER],
|
1512
|
-
right_index=True,
|
1513
|
-
how="left",
|
1484
|
+
filtered_bqb_has_parts = _filter_promiscuous_components(
|
1485
|
+
bqb_has_parts_species, max_promiscuity
|
1514
1486
|
)
|
1515
1487
|
|
1516
|
-
filtered_bqb_has_parts["is_shared_component"] = filtered_bqb_has_parts[
|
1517
|
-
"is_shared_component"
|
1518
|
-
].fillna(False)
|
1519
|
-
# drop identifiers shared as components across many species
|
1520
|
-
filtered_bqb_has_parts = filtered_bqb_has_parts[
|
1521
|
-
~filtered_bqb_has_parts["is_shared_component"]
|
1522
|
-
].drop(["is_shared_component"], axis=1)
|
1523
1488
|
# drop species parts if there are many components
|
1524
1489
|
filtered_bqb_has_parts = filtered_bqb_has_parts[
|
1525
1490
|
~filtered_bqb_has_parts[SBML_DFS.S_ID].isin(big_complex_sids)
|
@@ -2595,3 +2560,42 @@ def _perform_sbml_dfs_table_validation(
|
|
2595
2560
|
# check for empty table
|
2596
2561
|
if table_data.shape[0] == 0:
|
2597
2562
|
raise ValueError(f"{table_name} contained no entries")
|
2563
|
+
|
2564
|
+
|
2565
|
+
def _filter_promiscuous_components(
|
2566
|
+
bqb_has_parts_species: pd.DataFrame, max_promiscuity: int
|
2567
|
+
) -> pd.DataFrame:
|
2568
|
+
|
2569
|
+
# number of complexes a species is part of
|
2570
|
+
n_complexes_involvedin = bqb_has_parts_species.value_counts(
|
2571
|
+
[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER]
|
2572
|
+
)
|
2573
|
+
promiscuous_component_identifiers_index = n_complexes_involvedin[
|
2574
|
+
n_complexes_involvedin > max_promiscuity
|
2575
|
+
].index
|
2576
|
+
promiscuous_component_identifiers = pd.Series(
|
2577
|
+
data=[True] * len(promiscuous_component_identifiers_index),
|
2578
|
+
index=promiscuous_component_identifiers_index,
|
2579
|
+
name="is_shared_component",
|
2580
|
+
dtype=bool,
|
2581
|
+
)
|
2582
|
+
|
2583
|
+
if len(promiscuous_component_identifiers) == 0:
|
2584
|
+
return bqb_has_parts_species
|
2585
|
+
|
2586
|
+
filtered_bqb_has_parts = bqb_has_parts_species.merge(
|
2587
|
+
promiscuous_component_identifiers,
|
2588
|
+
left_on=[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER],
|
2589
|
+
right_index=True,
|
2590
|
+
how="left",
|
2591
|
+
)
|
2592
|
+
|
2593
|
+
filtered_bqb_has_parts["is_shared_component"] = (
|
2594
|
+
filtered_bqb_has_parts["is_shared_component"].astype("boolean").fillna(False)
|
2595
|
+
)
|
2596
|
+
# drop identifiers shared as components across many species
|
2597
|
+
filtered_bqb_has_parts = filtered_bqb_has_parts[
|
2598
|
+
~filtered_bqb_has_parts["is_shared_component"]
|
2599
|
+
].drop(["is_shared_component"], axis=1)
|
2600
|
+
|
2601
|
+
return filtered_bqb_has_parts
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: napistu
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.4
|
4
4
|
Summary: Connecting high-dimensional data to curated pathways
|
5
5
|
Home-page: https://github.com/napistu/napistu-py
|
6
6
|
Author: Sean Hackett
|
@@ -19,7 +19,7 @@ Requires-Dist: Jinja2
|
|
19
19
|
Requires-Dist: PyYAML<7.0.0,>=6.0.0
|
20
20
|
Requires-Dist: click<9.0.0,>=8.0.0
|
21
21
|
Requires-Dist: click-logging
|
22
|
-
Requires-Dist: fs<3.0.0,>=2.4.
|
22
|
+
Requires-Dist: fs<3.0.0,>=2.4.16
|
23
23
|
Requires-Dist: fs-gcsfs<2.0.0,>=1.5.0
|
24
24
|
Requires-Dist: igraph
|
25
25
|
Requires-Dist: matplotlib<4.0.0,>=3.5.0
|
@@ -1,10 +1,10 @@
|
|
1
1
|
napistu/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
2
2
|
napistu/__main__.py,sha256=PbzIsFAoFHNQuSyi-ql-D7tQLEOuqgmTcgk0PY-OGeU,28636
|
3
3
|
napistu/consensus.py,sha256=UbKKSLP1O46e3Rk8d_aqNlhRHeR3sZRztAgIm7-XK6Y,69960
|
4
|
-
napistu/constants.py,sha256=
|
4
|
+
napistu/constants.py,sha256=10SuKl19koWvCaeM2k-5GDQxvJpKY3GaGFY_4VZivXw,12356
|
5
5
|
napistu/identifiers.py,sha256=wsVriQdvPllA5uvh5CiREklA2tYW2MIB14dV7CPaMVU,34003
|
6
6
|
napistu/indices.py,sha256=E_djN1XWc6l1lrFw_QnQXfZTKYTaUv8-jFPP7cHkY5A,9780
|
7
|
-
napistu/sbml_dfs_core.py,sha256=
|
7
|
+
napistu/sbml_dfs_core.py,sha256=w4hoggMAXJ4Np45_2j-l92vOZg134eYa5Sg7SIdCaFo,91804
|
8
8
|
napistu/sbml_dfs_utils.py,sha256=LJo6WWTrmnE58ZLDuibeeHk88uCdfunWdja7XxdZpps,11525
|
9
9
|
napistu/source.py,sha256=9uUJrkY4jHaKlzz5nNcQQ8wUAep2pfqhlHxHw1hmEkI,13648
|
10
10
|
napistu/utils.py,sha256=TcholWrFbRSu_sn9ODMA8y2YyAhekEKZjwf4S0WQNzI,33241
|
@@ -67,7 +67,7 @@ napistu/network/net_create.py,sha256=2N5ocGmibdBxIUVtv3H36iFWwkbys9ECCERFRlByhLc
|
|
67
67
|
napistu/network/net_propagation.py,sha256=89ZR4p2mGpkCCIemofZ53XbUjQsuNABxIc6UmF8A5n8,4935
|
68
68
|
napistu/network/ng_utils.py,sha256=ijWDa5MTuULJpdV6dcVFGmLmtB_xy87jaUG7F5nvC_k,15240
|
69
69
|
napistu/network/paths.py,sha256=S4ZaV0yVmI-o8sXfom5eXA3yy2IEbleYUyXEvnmVw98,17468
|
70
|
-
napistu/network/precompute.py,sha256=
|
70
|
+
napistu/network/precompute.py,sha256=_TyztdHucczZg1JacWuXfNp5NGRKBFMGfp8Imx7OBMM,9118
|
71
71
|
napistu/ontologies/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
72
72
|
napistu/ontologies/constants.py,sha256=GyOFvezSxDK1VigATcruTKtNhjcYaid1ggulEf_HEtQ,4345
|
73
73
|
napistu/ontologies/dogma.py,sha256=jGZS-J3d29AoUOow-HVjfVZQJ87lnqO5L1aozieN1ec,8825
|
@@ -81,7 +81,7 @@ napistu/rpy2/rids.py,sha256=AfXLTfTdonfspgAHYO0Ph7jSUWv8YuyT8x3fyLfAqc8,3413
|
|
81
81
|
napistu/scverse/__init__.py,sha256=Lgxr3iMQAkTzXE9BNz93CndNP5djzerLvmHM-D0PU3I,357
|
82
82
|
napistu/scverse/constants.py,sha256=0iAkhyJUIeFGHdLLU3fCaEU1O3Oix4qAsxr3CxGTjVs,653
|
83
83
|
napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27016
|
84
|
-
napistu-0.3.
|
84
|
+
napistu-0.3.4.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
|
85
85
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
86
86
|
tests/conftest.py,sha256=XVkd0tQywhnf2cgab7fIjBo3NlaTVX3cO8HaRS2jIwM,3190
|
87
87
|
tests/test_consensus.py,sha256=3dJvvPsPG7bHbw_FY4Pm647N_Gt_Ud9157OKYfPCUd4,9502
|
@@ -108,7 +108,7 @@ tests/test_network_net_create.py,sha256=VNFZTwQawAZQPDnVk_qFevgZErx5KyQZ24bMoZF4
|
|
108
108
|
tests/test_network_net_propagation.py,sha256=9pKkUdduWejH4iKNCJXKFzAkdNpCfrMbiUWySgI_LH4,3244
|
109
109
|
tests/test_network_ng_utils.py,sha256=CwDw4MKTPhVZXz2HA2XU2QjjBv8CXc1_yQ0drvkBkFw,724
|
110
110
|
tests/test_network_paths.py,sha256=TWZnxY5bF3m6gahcxcYJGrBIawh2-_vUcec1LyPmXV8,1686
|
111
|
-
tests/test_network_precompute.py,sha256=
|
111
|
+
tests/test_network_precompute.py,sha256=xMGmZI9DxcWhJxuP7GCZEqtmcOvDRNK2LSia0x94v0U,9018
|
112
112
|
tests/test_ontologies_genodexito.py,sha256=hBlunyEPiKskqagjWKW5Z6DJwKvpueYHJLwbfyeeAdo,2256
|
113
113
|
tests/test_ontologies_mygene.py,sha256=BuBLm8VatzpK39-Ew_fFTK9ueLE4eqmKIDS5UKE59n8,1541
|
114
114
|
tests/test_ontologies_renaming.py,sha256=k7bQzP24zG7W3fpULwk1me2sOWEWlxylr4Mhx1_gJJY,3740
|
@@ -116,8 +116,8 @@ tests/test_pathwayannot.py,sha256=bceosccNy9tgxQei_7j7ATBSSvBSxOngJvK-mAzR_K0,33
|
|
116
116
|
tests/test_rpy2_callr.py,sha256=UVzXMvYN3wcc-ikDIjH2sA4BqkbwiNbMm561BcbnbD4,2936
|
117
117
|
tests/test_rpy2_init.py,sha256=APrNt9GEQV9va3vU5k250TxFplAoWFc-FJRFhM2GcDk,5927
|
118
118
|
tests/test_sbml.py,sha256=f25zj1NogYrmLluvBDboLameTuCiQ309433Qn3iPvhg,1483
|
119
|
-
tests/test_sbml_dfs_core.py,sha256=
|
120
|
-
tests/test_sbml_dfs_utils.py,sha256=
|
119
|
+
tests/test_sbml_dfs_core.py,sha256=tFaLMMuVjTLuhL-wimvcBbodEp59dhyHvXZ-IlUGGeU,19222
|
120
|
+
tests/test_sbml_dfs_utils.py,sha256=5lNzZ1NLOnFb_sZ0YWTgLzXy28yGNCtS_H8Q-W-T6Bw,2022
|
121
121
|
tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
|
122
122
|
tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
|
123
123
|
tests/test_set_coverage.py,sha256=J-6m6LuOjcQa9pxRuWglSfJk4Ltm7kt_eOrn_Q-7P6Q,1604
|
@@ -126,8 +126,8 @@ tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRa
|
|
126
126
|
tests/test_utils.py,sha256=JRJFmjDNZpjG59a-73JkTyGqa_a7Z8d0fE2cZt0CRII,22580
|
127
127
|
tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
|
128
128
|
tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
129
|
-
napistu-0.3.
|
130
|
-
napistu-0.3.
|
131
|
-
napistu-0.3.
|
132
|
-
napistu-0.3.
|
133
|
-
napistu-0.3.
|
129
|
+
napistu-0.3.4.dist-info/METADATA,sha256=u0M0PcjZJds7ds-cV2MYar8maae9_ld_9FO--0G3xwA,3414
|
130
|
+
napistu-0.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
131
|
+
napistu-0.3.4.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
|
132
|
+
napistu-0.3.4.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
|
133
|
+
napistu-0.3.4.dist-info/RECORD,,
|
tests/test_network_precompute.py
CHANGED
@@ -179,8 +179,11 @@ def test_precomputed_distances_neighborhoods():
|
|
179
179
|
right_on=join_key,
|
180
180
|
how="outer",
|
181
181
|
)
|
182
|
-
.fillna(False)
|
183
182
|
)
|
183
|
+
for col in ["in_precompute", "in_otf"]:
|
184
|
+
neighbor_comparison[col] = (
|
185
|
+
neighbor_comparison[col].astype("boolean").fillna(False)
|
186
|
+
)
|
184
187
|
comparison_l.append(neighbor_comparison.assign(focal_sc_id=key))
|
185
188
|
|
186
189
|
comparison_df = pd.concat(comparison_l)
|
tests/test_sbml_dfs_core.py
CHANGED
@@ -10,7 +10,14 @@ from napistu.ingestion import sbml
|
|
10
10
|
from napistu.modify import pathwayannot
|
11
11
|
|
12
12
|
from napistu import identifiers as napistu_identifiers
|
13
|
-
from napistu.constants import
|
13
|
+
from napistu.constants import (
|
14
|
+
SBML_DFS,
|
15
|
+
SBOTERM_NAMES,
|
16
|
+
BQB_DEFINING_ATTRS,
|
17
|
+
BQB_DEFINING_ATTRS_LOOSE,
|
18
|
+
BQB,
|
19
|
+
IDENTIFIERS,
|
20
|
+
)
|
14
21
|
from napistu.sbml_dfs_core import SBML_dfs
|
15
22
|
|
16
23
|
|
@@ -493,3 +500,84 @@ def test_remove_entity_data_nonexistent(sbml_dfs_w_data, caplog):
|
|
493
500
|
|
494
501
|
# Validate the model is still valid
|
495
502
|
sbml_dfs_w_data.validate()
|
503
|
+
|
504
|
+
|
505
|
+
def test_filter_to_characteristic_species_ids():
|
506
|
+
|
507
|
+
species_ids_dict = {
|
508
|
+
SBML_DFS.S_ID: ["large_complex"] * 6
|
509
|
+
+ ["small_complex"] * 2
|
510
|
+
+ ["proteinA", "proteinB"]
|
511
|
+
+ ["proteinC"] * 3
|
512
|
+
+ [
|
513
|
+
"promiscuous_complexA",
|
514
|
+
"promiscuous_complexB",
|
515
|
+
"promiscuous_complexC",
|
516
|
+
"promiscuous_complexD",
|
517
|
+
"promiscuous_complexE",
|
518
|
+
],
|
519
|
+
IDENTIFIERS.ONTOLOGY: ["complexportal"]
|
520
|
+
+ ["HGNC"] * 7
|
521
|
+
+ ["GO"] * 2
|
522
|
+
+ ["ENSG", "ENSP", "pubmed"]
|
523
|
+
+ ["HGNC"] * 5,
|
524
|
+
IDENTIFIERS.IDENTIFIER: [
|
525
|
+
"CPX-BIG",
|
526
|
+
"mem1",
|
527
|
+
"mem2",
|
528
|
+
"mem3",
|
529
|
+
"mem4",
|
530
|
+
"mem5",
|
531
|
+
"part1",
|
532
|
+
"part2",
|
533
|
+
"GO:1",
|
534
|
+
"GO:2",
|
535
|
+
"dna_seq",
|
536
|
+
"protein_seq",
|
537
|
+
"my_cool_pub",
|
538
|
+
]
|
539
|
+
+ ["promiscuous_complex"] * 5,
|
540
|
+
IDENTIFIERS.BQB: [BQB.IS]
|
541
|
+
+ [BQB.HAS_PART] * 7
|
542
|
+
+ [BQB.IS] * 2
|
543
|
+
+ [
|
544
|
+
# these are retained if BQB_DEFINING_ATTRS_LOOSE is used
|
545
|
+
BQB.ENCODES,
|
546
|
+
BQB.IS_ENCODED_BY,
|
547
|
+
# this should always be removed
|
548
|
+
BQB.IS_DESCRIBED_BY,
|
549
|
+
]
|
550
|
+
+ [BQB.HAS_PART] * 5,
|
551
|
+
}
|
552
|
+
|
553
|
+
species_ids = pd.DataFrame(species_ids_dict)
|
554
|
+
|
555
|
+
characteristic_ids_narrow = sbml_dfs_core.filter_to_characteristic_species_ids(
|
556
|
+
species_ids,
|
557
|
+
defining_biological_qualifiers=BQB_DEFINING_ATTRS,
|
558
|
+
max_complex_size=4,
|
559
|
+
max_promiscuity=4,
|
560
|
+
)
|
561
|
+
|
562
|
+
EXPECTED_IDS = ["CPX-BIG", "GO:1", "GO:2", "part1", "part2"]
|
563
|
+
assert characteristic_ids_narrow[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
|
564
|
+
|
565
|
+
characteristic_ids_loose = sbml_dfs_core.filter_to_characteristic_species_ids(
|
566
|
+
species_ids,
|
567
|
+
# include encodes and is_encoded_by as equivalent to is
|
568
|
+
defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
|
569
|
+
max_complex_size=4,
|
570
|
+
# expand promiscuity to default value
|
571
|
+
max_promiscuity=20,
|
572
|
+
)
|
573
|
+
|
574
|
+
EXPECTED_IDS = [
|
575
|
+
"CPX-BIG",
|
576
|
+
"GO:1",
|
577
|
+
"GO:2",
|
578
|
+
"dna_seq",
|
579
|
+
"protein_seq",
|
580
|
+
"part1",
|
581
|
+
"part2",
|
582
|
+
] + ["promiscuous_complex"] * 5
|
583
|
+
assert characteristic_ids_loose[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
|
tests/test_sbml_dfs_utils.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import pandas as pd
|
4
|
+
|
3
5
|
from napistu import sbml_dfs_utils
|
6
|
+
from napistu.constants import BQB, BQB_DEFINING_ATTRS, BQB_DEFINING_ATTRS_LOOSE
|
4
7
|
|
5
8
|
|
6
9
|
def test_id_formatter():
|
@@ -14,9 +17,47 @@ def test_id_formatter():
|
|
14
17
|
assert list(input_vals) == inv_ids
|
15
18
|
|
16
19
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
def test_get_characteristic_species_ids():
|
21
|
+
"""
|
22
|
+
Test get_characteristic_species_ids function with both dogmatic and non-dogmatic cases.
|
23
|
+
"""
|
24
|
+
# Create mock species identifiers data
|
25
|
+
mock_species_ids = pd.DataFrame(
|
26
|
+
{
|
27
|
+
"s_id": ["s1", "s2", "s3", "s4", "s5"],
|
28
|
+
"identifier": ["P12345", "CHEBI:15377", "GO:12345", "P67890", "P67890"],
|
29
|
+
"ontology": ["uniprot", "chebi", "go", "uniprot", "chebi"],
|
30
|
+
"bqb": [
|
31
|
+
"BQB_IS",
|
32
|
+
"BQB_IS",
|
33
|
+
"BQB_HAS_PART",
|
34
|
+
"BQB_HAS_VERSION",
|
35
|
+
"BQB_ENCODES",
|
36
|
+
],
|
37
|
+
}
|
38
|
+
)
|
39
|
+
|
40
|
+
# Create mock SBML_dfs object
|
41
|
+
class MockSBML_dfs:
|
42
|
+
def get_identifiers(self, entity_type):
|
43
|
+
return mock_species_ids
|
44
|
+
|
45
|
+
mock_sbml = MockSBML_dfs()
|
46
|
+
|
47
|
+
# Test dogmatic case (default)
|
48
|
+
expected_bqbs = BQB_DEFINING_ATTRS + [BQB.HAS_PART] # noqa: F841
|
49
|
+
dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(mock_sbml)
|
50
|
+
expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
51
|
+
|
52
|
+
pd.testing.assert_frame_equal(dogmatic_result, expected_dogmatic, check_like=True)
|
53
|
+
|
54
|
+
# Test non-dogmatic case
|
55
|
+
expected_bqbs = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART] # noqa: F841
|
56
|
+
non_dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(
|
57
|
+
mock_sbml, dogmatic=False
|
58
|
+
)
|
59
|
+
expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
60
|
+
|
61
|
+
pd.testing.assert_frame_equal(
|
62
|
+
non_dogmatic_result, expected_non_dogmatic, check_like=True
|
63
|
+
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|