napistu 0.3.2.dev1__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/constants.py +0 -86
- napistu/ingestion/constants.py +106 -37
- napistu/ingestion/sbml.py +392 -221
- napistu/ingestion/string.py +2 -2
- napistu/modify/gaps.py +3 -3
- napistu/network/precompute.py +10 -4
- napistu/sbml_dfs_core.py +60 -57
- {napistu-0.3.2.dev1.dist-info → napistu-0.3.4.dist-info}/METADATA +2 -2
- {napistu-0.3.2.dev1.dist-info → napistu-0.3.4.dist-info}/RECORD +17 -17
- tests/test_network_precompute.py +4 -1
- tests/test_sbml.py +38 -7
- tests/test_sbml_dfs_core.py +89 -1
- tests/test_sbml_dfs_utils.py +47 -6
- {napistu-0.3.2.dev1.dist-info → napistu-0.3.4.dist-info}/WHEEL +0 -0
- {napistu-0.3.2.dev1.dist-info → napistu-0.3.4.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.2.dev1.dist-info → napistu-0.3.4.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.2.dev1.dist-info → napistu-0.3.4.dist-info}/top_level.txt +0 -0
napistu/ingestion/string.py
CHANGED
@@ -8,7 +8,6 @@ from napistu import sbml_dfs_core
|
|
8
8
|
from napistu import source
|
9
9
|
from napistu import utils
|
10
10
|
from napistu.constants import BQB
|
11
|
-
from napistu.constants import COMPARTMENTS
|
12
11
|
from napistu.constants import MINI_SBO_FROM_NAME
|
13
12
|
from napistu.ingestion import napistu_edgelist
|
14
13
|
from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
|
@@ -28,6 +27,7 @@ from napistu.ingestion.constants import STRING_UPSTREAM_COMPARTMENT
|
|
28
27
|
from napistu.ingestion.constants import STRING_UPSTREAM_NAME
|
29
28
|
from napistu.ingestion.constants import STRING_URL_EXPRESSIONS
|
30
29
|
from napistu.ingestion.constants import STRING_VERSION
|
30
|
+
from napistu.ingestion.constants import GENERIC_COMPARTMENT
|
31
31
|
from fs import open_fs
|
32
32
|
|
33
33
|
logger = logging.getLogger(__name__)
|
@@ -297,7 +297,7 @@ def _build_interactor_edgelist(
|
|
297
297
|
downstream_col_name: str = STRING_TARGET,
|
298
298
|
add_reverse_interactions: bool = False,
|
299
299
|
sbo_term: str = "interactor",
|
300
|
-
compartment: str =
|
300
|
+
compartment: str = GENERIC_COMPARTMENT,
|
301
301
|
) -> pd.DataFrame:
|
302
302
|
"""Format STRING interactions as reactions."""
|
303
303
|
|
napistu/modify/gaps.py
CHANGED
@@ -15,18 +15,18 @@ from napistu import utils
|
|
15
15
|
from napistu.network import net_create
|
16
16
|
|
17
17
|
from napistu.constants import SBML_DFS
|
18
|
-
from napistu.constants import COMPARTMENTS
|
19
18
|
from napistu.constants import IDENTIFIERS
|
20
19
|
from napistu.constants import MINI_SBO_FROM_NAME
|
21
20
|
from napistu.constants import SBOTERM_NAMES
|
22
21
|
from napistu.constants import SOURCE_SPEC
|
22
|
+
from napistu.ingestion.constants import EXCHANGE_COMPARTMENT
|
23
23
|
|
24
24
|
logger = logging.getLogger(__name__)
|
25
25
|
|
26
26
|
|
27
27
|
def add_transportation_reactions(
|
28
28
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
29
|
-
exchange_compartment: str =
|
29
|
+
exchange_compartment: str = EXCHANGE_COMPARTMENT,
|
30
30
|
) -> sbml_dfs_core.SBML_dfs:
|
31
31
|
"""
|
32
32
|
Add transportation reactions to connect all forms of a protein across compartments.
|
@@ -73,7 +73,7 @@ def add_transportation_reactions(
|
|
73
73
|
def update_sbml_df_with_exchange(
|
74
74
|
species_needing_transport_rxns: np.ndarray,
|
75
75
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
76
|
-
exchange_compartment: str =
|
76
|
+
exchange_compartment: str = EXCHANGE_COMPARTMENT,
|
77
77
|
) -> sbml_dfs_core.SBML_dfs:
|
78
78
|
"""
|
79
79
|
Add transportation reactions between all locations of a set of molecular species by
|
napistu/network/precompute.py
CHANGED
@@ -120,7 +120,7 @@ def save_precomputed_distances(
|
|
120
120
|
OSError
|
121
121
|
If the file cannot be written to (permission issues, etc.)
|
122
122
|
"""
|
123
|
-
save_json(str(uri), precomputed_distances.
|
123
|
+
save_json(str(uri), precomputed_distances.to_json())
|
124
124
|
|
125
125
|
|
126
126
|
def load_precomputed_distances(uri: Union[str, Path]) -> pd.DataFrame:
|
@@ -143,12 +143,18 @@ def load_precomputed_distances(uri: Union[str, Path]) -> pd.DataFrame:
|
|
143
143
|
If the specified file does not exist
|
144
144
|
"""
|
145
145
|
try:
|
146
|
-
|
146
|
+
json_string = load_json(str(uri))
|
147
|
+
df = pd.read_json(json_string)
|
148
|
+
|
149
|
+
# Convert integer columns to float
|
150
|
+
for col in df.columns:
|
151
|
+
if df[col].dtype in ["int64", "int32", "int16", "int8"]:
|
152
|
+
df[col] = df[col].astype(float)
|
153
|
+
|
154
|
+
return df
|
147
155
|
except ResourceNotFound as e:
|
148
156
|
raise FileNotFoundError(f"File not found: {uri}") from e
|
149
157
|
|
150
|
-
return pd.DataFrame.from_dict(data_dict, orient="index").rename(index=int)
|
151
|
-
|
152
158
|
|
153
159
|
def _calculate_distances_subset(
|
154
160
|
napistu_graph: NapistuGraph,
|
napistu/sbml_dfs_core.py
CHANGED
@@ -13,6 +13,7 @@ from napistu import identifiers
|
|
13
13
|
from napistu import sbml_dfs_utils
|
14
14
|
from napistu import source
|
15
15
|
from napistu import utils
|
16
|
+
from napistu.ingestion import sbml
|
16
17
|
from napistu.constants import SBML_DFS
|
17
18
|
from napistu.constants import SBML_DFS_SCHEMA
|
18
19
|
from napistu.constants import IDENTIFIERS
|
@@ -23,9 +24,6 @@ from napistu.constants import BQB_PRIORITIES
|
|
23
24
|
from napistu.constants import ONTOLOGY_PRIORITIES
|
24
25
|
from napistu.constants import BQB
|
25
26
|
from napistu.constants import BQB_DEFINING_ATTRS
|
26
|
-
from napistu.constants import COMPARTMENTS
|
27
|
-
from napistu.constants import COMPARTMENT_ALIASES
|
28
|
-
from napistu.constants import COMPARTMENTS_GO_TERMS
|
29
27
|
from napistu.constants import MINI_SBO_FROM_NAME
|
30
28
|
from napistu.constants import MINI_SBO_TO_NAME
|
31
29
|
from napistu.constants import ONTOLOGIES
|
@@ -34,8 +32,9 @@ from napistu.constants import SBOTERM_NAMES
|
|
34
32
|
from napistu.constants import SBO_ROLES_DEFS
|
35
33
|
from napistu.constants import ENTITIES_W_DATA
|
36
34
|
from napistu.constants import ENTITIES_TO_ENTITY_DATA
|
37
|
-
from napistu.constants import
|
38
|
-
from napistu.ingestion import
|
35
|
+
from napistu.ingestion.constants import GENERIC_COMPARTMENT
|
36
|
+
from napistu.ingestion.constants import COMPARTMENT_ALIASES
|
37
|
+
from napistu.ingestion.constants import COMPARTMENTS_GO_TERMS
|
39
38
|
from fs import open_fs
|
40
39
|
|
41
40
|
logger = logging.getLogger(__name__)
|
@@ -145,7 +144,7 @@ class SBML_dfs:
|
|
145
144
|
if ent in sbml_model:
|
146
145
|
setattr(self, ent, sbml_model[ent])
|
147
146
|
else:
|
148
|
-
self = sbml.
|
147
|
+
self = sbml.sbml_dfs_from_sbml(self, sbml_model)
|
149
148
|
|
150
149
|
for ent in SBML_DFS_SCHEMA.OPTIONAL_ENTITIES:
|
151
150
|
# Initialize optional entities if not set
|
@@ -1421,8 +1420,8 @@ def filter_to_characteristic_species_ids(
|
|
1421
1420
|
complexes and non-characteristic annotations such as pubmed references and
|
1422
1421
|
homologues.
|
1423
1422
|
|
1424
|
-
|
1425
|
-
|
1423
|
+
Parameters
|
1424
|
+
----------
|
1426
1425
|
species_ids: pd.DataFrame
|
1427
1426
|
A table of identifiers produced by sdbml_dfs.get_identifiers("species")
|
1428
1427
|
max_complex_size: int
|
@@ -1471,12 +1470,6 @@ def filter_to_characteristic_species_ids(
|
|
1471
1470
|
# add components within modestly sized protein complexes
|
1472
1471
|
# look at HAS_PART IDs
|
1473
1472
|
bqb_has_parts_species = species_ids[species_ids[IDENTIFIERS.BQB] == BQB.HAS_PART]
|
1474
|
-
# filter to genes
|
1475
|
-
bqb_has_parts_species = bqb_has_parts_species[
|
1476
|
-
bqb_has_parts_species[IDENTIFIERS.ONTOLOGY].isin(
|
1477
|
-
CHARACTERISTIC_COMPLEX_ONTOLOGIES
|
1478
|
-
)
|
1479
|
-
]
|
1480
1473
|
|
1481
1474
|
# number of species in a complex
|
1482
1475
|
n_species_components = bqb_has_parts_species.value_counts(
|
@@ -1488,38 +1481,10 @@ def filter_to_characteristic_species_ids(
|
|
1488
1481
|
].index.get_level_values(SBML_DFS.S_ID)
|
1489
1482
|
)
|
1490
1483
|
|
1491
|
-
|
1492
|
-
|
1493
|
-
[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER]
|
1494
|
-
)
|
1495
|
-
promiscuous_component_identifiers_index = n_complexes_involvedin[
|
1496
|
-
n_complexes_involvedin > max_promiscuity
|
1497
|
-
].index
|
1498
|
-
promiscuous_component_identifiers = pd.Series(
|
1499
|
-
data=[True] * len(promiscuous_component_identifiers_index),
|
1500
|
-
index=promiscuous_component_identifiers_index,
|
1501
|
-
name="is_shared_component",
|
1502
|
-
dtype=bool,
|
1484
|
+
filtered_bqb_has_parts = _filter_promiscuous_components(
|
1485
|
+
bqb_has_parts_species, max_promiscuity
|
1503
1486
|
)
|
1504
1487
|
|
1505
|
-
if len(promiscuous_component_identifiers) == 0:
|
1506
|
-
# no complexes to filter
|
1507
|
-
return species_ids
|
1508
|
-
|
1509
|
-
filtered_bqb_has_parts = bqb_has_parts_species.merge(
|
1510
|
-
promiscuous_component_identifiers,
|
1511
|
-
left_on=[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER],
|
1512
|
-
right_index=True,
|
1513
|
-
how="left",
|
1514
|
-
)
|
1515
|
-
|
1516
|
-
filtered_bqb_has_parts["is_shared_component"] = filtered_bqb_has_parts[
|
1517
|
-
"is_shared_component"
|
1518
|
-
].fillna(False)
|
1519
|
-
# drop identifiers shared as components across many species
|
1520
|
-
filtered_bqb_has_parts = filtered_bqb_has_parts[
|
1521
|
-
~filtered_bqb_has_parts["is_shared_component"]
|
1522
|
-
].drop(["is_shared_component"], axis=1)
|
1523
1488
|
# drop species parts if there are many components
|
1524
1489
|
filtered_bqb_has_parts = filtered_bqb_has_parts[
|
1525
1490
|
~filtered_bqb_has_parts[SBML_DFS.S_ID].isin(big_complex_sids)
|
@@ -1812,8 +1777,8 @@ def export_sbml_dfs(
|
|
1812
1777
|
If True then treat genes, transcript, and proteins as separate species. If False
|
1813
1778
|
then treat them interchangeably.
|
1814
1779
|
|
1815
|
-
|
1816
|
-
|
1780
|
+
Returns
|
1781
|
+
-------
|
1817
1782
|
None
|
1818
1783
|
|
1819
1784
|
"""
|
@@ -2257,7 +2222,7 @@ def _sbml_dfs_from_edgelist_check_cspecies_merge(
|
|
2257
2222
|
|
2258
2223
|
|
2259
2224
|
def _stub_compartments(
|
2260
|
-
stubbed_compartment: str =
|
2225
|
+
stubbed_compartment: str = GENERIC_COMPARTMENT,
|
2261
2226
|
) -> pd.DataFrame:
|
2262
2227
|
"""Stub Compartments
|
2263
2228
|
|
@@ -2281,7 +2246,6 @@ def _stub_compartments(
|
|
2281
2246
|
f"{stubbed_compartment} is not defined in constants.COMPARTMENTS_GO_TERMS"
|
2282
2247
|
)
|
2283
2248
|
|
2284
|
-
stubbed_compartment_name = COMPARTMENTS[stubbed_compartment]
|
2285
2249
|
stubbed_compartment_id = COMPARTMENTS_GO_TERMS[stubbed_compartment]
|
2286
2250
|
|
2287
2251
|
formatted_uri = identifiers.format_uri(
|
@@ -2294,7 +2258,7 @@ def _stub_compartments(
|
|
2294
2258
|
|
2295
2259
|
compartments_df = pd.DataFrame(
|
2296
2260
|
{
|
2297
|
-
SBML_DFS.C_NAME: [
|
2261
|
+
SBML_DFS.C_NAME: [stubbed_compartment],
|
2298
2262
|
SBML_DFS.C_IDENTIFIERS: [identifiers.Identifiers([formatted_uri])],
|
2299
2263
|
}
|
2300
2264
|
)
|
@@ -2507,9 +2471,9 @@ def validate_sbml_dfs_table(table_data: pd.DataFrame, table_name: str) -> None:
|
|
2507
2471
|
table_name : str
|
2508
2472
|
Name of the table in the SBML_dfs schema
|
2509
2473
|
|
2510
|
-
|
2511
|
-
|
2512
|
-
|
2474
|
+
Raises
|
2475
|
+
------
|
2476
|
+
ValueError
|
2513
2477
|
If table_name is not in schema or validation fails
|
2514
2478
|
"""
|
2515
2479
|
if table_name not in SBML_DFS_SCHEMA.SCHEMA:
|
@@ -2533,8 +2497,8 @@ def _perform_sbml_dfs_table_validation(
|
|
2533
2497
|
This function performs the actual validation checks for any table against its schema,
|
2534
2498
|
regardless of whether it's part of an SBML_dfs object or standalone.
|
2535
2499
|
|
2536
|
-
|
2537
|
-
|
2500
|
+
Parameters
|
2501
|
+
----------
|
2538
2502
|
table_data : pd.DataFrame
|
2539
2503
|
The table data to validate
|
2540
2504
|
table_schema : dict
|
@@ -2542,9 +2506,9 @@ def _perform_sbml_dfs_table_validation(
|
|
2542
2506
|
table_name : str
|
2543
2507
|
Name of the table (for error messages)
|
2544
2508
|
|
2545
|
-
|
2546
|
-
|
2547
|
-
|
2509
|
+
Raises
|
2510
|
+
------
|
2511
|
+
ValueError
|
2548
2512
|
If the table does not conform to its schema:
|
2549
2513
|
- Not a DataFrame
|
2550
2514
|
- Wrong index name
|
@@ -2596,3 +2560,42 @@ def _perform_sbml_dfs_table_validation(
|
|
2596
2560
|
# check for empty table
|
2597
2561
|
if table_data.shape[0] == 0:
|
2598
2562
|
raise ValueError(f"{table_name} contained no entries")
|
2563
|
+
|
2564
|
+
|
2565
|
+
def _filter_promiscuous_components(
|
2566
|
+
bqb_has_parts_species: pd.DataFrame, max_promiscuity: int
|
2567
|
+
) -> pd.DataFrame:
|
2568
|
+
|
2569
|
+
# number of complexes a species is part of
|
2570
|
+
n_complexes_involvedin = bqb_has_parts_species.value_counts(
|
2571
|
+
[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER]
|
2572
|
+
)
|
2573
|
+
promiscuous_component_identifiers_index = n_complexes_involvedin[
|
2574
|
+
n_complexes_involvedin > max_promiscuity
|
2575
|
+
].index
|
2576
|
+
promiscuous_component_identifiers = pd.Series(
|
2577
|
+
data=[True] * len(promiscuous_component_identifiers_index),
|
2578
|
+
index=promiscuous_component_identifiers_index,
|
2579
|
+
name="is_shared_component",
|
2580
|
+
dtype=bool,
|
2581
|
+
)
|
2582
|
+
|
2583
|
+
if len(promiscuous_component_identifiers) == 0:
|
2584
|
+
return bqb_has_parts_species
|
2585
|
+
|
2586
|
+
filtered_bqb_has_parts = bqb_has_parts_species.merge(
|
2587
|
+
promiscuous_component_identifiers,
|
2588
|
+
left_on=[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER],
|
2589
|
+
right_index=True,
|
2590
|
+
how="left",
|
2591
|
+
)
|
2592
|
+
|
2593
|
+
filtered_bqb_has_parts["is_shared_component"] = (
|
2594
|
+
filtered_bqb_has_parts["is_shared_component"].astype("boolean").fillna(False)
|
2595
|
+
)
|
2596
|
+
# drop identifiers shared as components across many species
|
2597
|
+
filtered_bqb_has_parts = filtered_bqb_has_parts[
|
2598
|
+
~filtered_bqb_has_parts["is_shared_component"]
|
2599
|
+
].drop(["is_shared_component"], axis=1)
|
2600
|
+
|
2601
|
+
return filtered_bqb_has_parts
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: napistu
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.4
|
4
4
|
Summary: Connecting high-dimensional data to curated pathways
|
5
5
|
Home-page: https://github.com/napistu/napistu-py
|
6
6
|
Author: Sean Hackett
|
@@ -19,7 +19,7 @@ Requires-Dist: Jinja2
|
|
19
19
|
Requires-Dist: PyYAML<7.0.0,>=6.0.0
|
20
20
|
Requires-Dist: click<9.0.0,>=8.0.0
|
21
21
|
Requires-Dist: click-logging
|
22
|
-
Requires-Dist: fs<3.0.0,>=2.4.
|
22
|
+
Requires-Dist: fs<3.0.0,>=2.4.16
|
23
23
|
Requires-Dist: fs-gcsfs<2.0.0,>=1.5.0
|
24
24
|
Requires-Dist: igraph
|
25
25
|
Requires-Dist: matplotlib<4.0.0,>=3.5.0
|
@@ -1,10 +1,10 @@
|
|
1
1
|
napistu/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
2
2
|
napistu/__main__.py,sha256=PbzIsFAoFHNQuSyi-ql-D7tQLEOuqgmTcgk0PY-OGeU,28636
|
3
3
|
napistu/consensus.py,sha256=UbKKSLP1O46e3Rk8d_aqNlhRHeR3sZRztAgIm7-XK6Y,69960
|
4
|
-
napistu/constants.py,sha256=
|
4
|
+
napistu/constants.py,sha256=10SuKl19koWvCaeM2k-5GDQxvJpKY3GaGFY_4VZivXw,12356
|
5
5
|
napistu/identifiers.py,sha256=wsVriQdvPllA5uvh5CiREklA2tYW2MIB14dV7CPaMVU,34003
|
6
6
|
napistu/indices.py,sha256=E_djN1XWc6l1lrFw_QnQXfZTKYTaUv8-jFPP7cHkY5A,9780
|
7
|
-
napistu/sbml_dfs_core.py,sha256=
|
7
|
+
napistu/sbml_dfs_core.py,sha256=w4hoggMAXJ4Np45_2j-l92vOZg134eYa5Sg7SIdCaFo,91804
|
8
8
|
napistu/sbml_dfs_utils.py,sha256=LJo6WWTrmnE58ZLDuibeeHk88uCdfunWdja7XxdZpps,11525
|
9
9
|
napistu/source.py,sha256=9uUJrkY4jHaKlzz5nNcQQ8wUAep2pfqhlHxHw1hmEkI,13648
|
10
10
|
napistu/utils.py,sha256=TcholWrFbRSu_sn9ODMA8y2YyAhekEKZjwf4S0WQNzI,33241
|
@@ -17,7 +17,7 @@ napistu/gcs/downloads.py,sha256=SvGv9WYr_Vt3guzyz1QiAuBndeKPTBtWSFLj1-QbLf4,6348
|
|
17
17
|
napistu/gcs/utils.py,sha256=eLSsvewWJdCguyj2k0ozUGP5BTemaE1PZg41Z3aY5kM,571
|
18
18
|
napistu/ingestion/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
19
19
|
napistu/ingestion/bigg.py,sha256=q0HeVSO6pFftbrxxVfFGUtMvCoak9Wi9ngMggRfjFjo,4364
|
20
|
-
napistu/ingestion/constants.py,sha256=
|
20
|
+
napistu/ingestion/constants.py,sha256=9UP47VImZ11q0kz17N3EJg2155USqLewwNWyKpA-cbA,8089
|
21
21
|
napistu/ingestion/gtex.py,sha256=X0hSC1yrpf4xSJWFhpeNcnHwJzKDII2MvjfUqYA0JN8,3720
|
22
22
|
napistu/ingestion/hpa.py,sha256=R27ExrryKQ4Crxv9ATXmBJCa-yd01TMOrDjkeBhIQac,5054
|
23
23
|
napistu/ingestion/identifiers_etl.py,sha256=6ppDUA6lEZurdmVbiFLOUzphYbr-hndMhtqsQnq_yAc,5009
|
@@ -25,8 +25,8 @@ napistu/ingestion/napistu_edgelist.py,sha256=eVT9M7gmdBuGHcAYlvkD_zzvTtyzXufKWjw
|
|
25
25
|
napistu/ingestion/obo.py,sha256=AQkIPWbjA464Lma0tx91JucWkIwLjC7Jgv5VHGRTDkE,9601
|
26
26
|
napistu/ingestion/psi_mi.py,sha256=5eJjm7XWogL9oTyGqR52kntHClLwLsTePKqCvUGyi-w,10111
|
27
27
|
napistu/ingestion/reactome.py,sha256=Hn9X-vDp4o_HK-OtaQvel3vJeZ8_TC1-4N2rruK9Oks,7099
|
28
|
-
napistu/ingestion/sbml.py,sha256=
|
29
|
-
napistu/ingestion/string.py,sha256=
|
28
|
+
napistu/ingestion/sbml.py,sha256=N7neMwjTEF7OMhAcNvQJ29V_d3PqMLjLOZqvJTlK9q0,24743
|
29
|
+
napistu/ingestion/string.py,sha256=YSWqaKm3I8bOixzvSA8fU4yfR2izddPYs4qJiqwjbxk,11678
|
30
30
|
napistu/ingestion/trrust.py,sha256=ccjZc_eF3PdxxurnukiEo_e0-aKc_3z22NYbaJBtHdY,9774
|
31
31
|
napistu/ingestion/yeast.py,sha256=bwFBNxRq-dLDaddgBL1hpfZj0eQ56nBXyR_9n0NZT9Y,5233
|
32
32
|
napistu/matching/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
@@ -54,7 +54,7 @@ napistu/mcp/utils.py,sha256=WB4c6s8aPZLgi_Wvhhq0DE8Cnz2QGff0V8hrF1feVRg,1296
|
|
54
54
|
napistu/modify/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
55
55
|
napistu/modify/constants.py,sha256=H6K6twzPlxt0yp6QLAxIx0Tp8YzYhtKKXPdmXi5V_QQ,3689
|
56
56
|
napistu/modify/curation.py,sha256=sQeSO53ZLdn14ww2GSKkoP0vJnDpAoSWb-YDjUf5hDQ,21743
|
57
|
-
napistu/modify/gaps.py,sha256=
|
57
|
+
napistu/modify/gaps.py,sha256=qprylC2BbSk_vPWayYPVT8lwURXDMOlW5zNLV_wMFZ4,26755
|
58
58
|
napistu/modify/pathwayannot.py,sha256=xuBSMDFWbg_d6-Gzv0Td3Q5nnFTa-Qzic48g1b1AZtQ,48081
|
59
59
|
napistu/modify/uncompartmentalize.py,sha256=U5X4Q7Z-YIkC8_711x3sU21vTVdv9rKfauwz4JNzl6c,9690
|
60
60
|
napistu/network/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
@@ -67,7 +67,7 @@ napistu/network/net_create.py,sha256=2N5ocGmibdBxIUVtv3H36iFWwkbys9ECCERFRlByhLc
|
|
67
67
|
napistu/network/net_propagation.py,sha256=89ZR4p2mGpkCCIemofZ53XbUjQsuNABxIc6UmF8A5n8,4935
|
68
68
|
napistu/network/ng_utils.py,sha256=ijWDa5MTuULJpdV6dcVFGmLmtB_xy87jaUG7F5nvC_k,15240
|
69
69
|
napistu/network/paths.py,sha256=S4ZaV0yVmI-o8sXfom5eXA3yy2IEbleYUyXEvnmVw98,17468
|
70
|
-
napistu/network/precompute.py,sha256=
|
70
|
+
napistu/network/precompute.py,sha256=_TyztdHucczZg1JacWuXfNp5NGRKBFMGfp8Imx7OBMM,9118
|
71
71
|
napistu/ontologies/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
72
72
|
napistu/ontologies/constants.py,sha256=GyOFvezSxDK1VigATcruTKtNhjcYaid1ggulEf_HEtQ,4345
|
73
73
|
napistu/ontologies/dogma.py,sha256=jGZS-J3d29AoUOow-HVjfVZQJ87lnqO5L1aozieN1ec,8825
|
@@ -81,7 +81,7 @@ napistu/rpy2/rids.py,sha256=AfXLTfTdonfspgAHYO0Ph7jSUWv8YuyT8x3fyLfAqc8,3413
|
|
81
81
|
napistu/scverse/__init__.py,sha256=Lgxr3iMQAkTzXE9BNz93CndNP5djzerLvmHM-D0PU3I,357
|
82
82
|
napistu/scverse/constants.py,sha256=0iAkhyJUIeFGHdLLU3fCaEU1O3Oix4qAsxr3CxGTjVs,653
|
83
83
|
napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27016
|
84
|
-
napistu-0.3.
|
84
|
+
napistu-0.3.4.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
|
85
85
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
86
86
|
tests/conftest.py,sha256=XVkd0tQywhnf2cgab7fIjBo3NlaTVX3cO8HaRS2jIwM,3190
|
87
87
|
tests/test_consensus.py,sha256=3dJvvPsPG7bHbw_FY4Pm647N_Gt_Ud9157OKYfPCUd4,9502
|
@@ -108,16 +108,16 @@ tests/test_network_net_create.py,sha256=VNFZTwQawAZQPDnVk_qFevgZErx5KyQZ24bMoZF4
|
|
108
108
|
tests/test_network_net_propagation.py,sha256=9pKkUdduWejH4iKNCJXKFzAkdNpCfrMbiUWySgI_LH4,3244
|
109
109
|
tests/test_network_ng_utils.py,sha256=CwDw4MKTPhVZXz2HA2XU2QjjBv8CXc1_yQ0drvkBkFw,724
|
110
110
|
tests/test_network_paths.py,sha256=TWZnxY5bF3m6gahcxcYJGrBIawh2-_vUcec1LyPmXV8,1686
|
111
|
-
tests/test_network_precompute.py,sha256=
|
111
|
+
tests/test_network_precompute.py,sha256=xMGmZI9DxcWhJxuP7GCZEqtmcOvDRNK2LSia0x94v0U,9018
|
112
112
|
tests/test_ontologies_genodexito.py,sha256=hBlunyEPiKskqagjWKW5Z6DJwKvpueYHJLwbfyeeAdo,2256
|
113
113
|
tests/test_ontologies_mygene.py,sha256=BuBLm8VatzpK39-Ew_fFTK9ueLE4eqmKIDS5UKE59n8,1541
|
114
114
|
tests/test_ontologies_renaming.py,sha256=k7bQzP24zG7W3fpULwk1me2sOWEWlxylr4Mhx1_gJJY,3740
|
115
115
|
tests/test_pathwayannot.py,sha256=bceosccNy9tgxQei_7j7ATBSSvBSxOngJvK-mAzR_K0,3312
|
116
116
|
tests/test_rpy2_callr.py,sha256=UVzXMvYN3wcc-ikDIjH2sA4BqkbwiNbMm561BcbnbD4,2936
|
117
117
|
tests/test_rpy2_init.py,sha256=APrNt9GEQV9va3vU5k250TxFplAoWFc-FJRFhM2GcDk,5927
|
118
|
-
tests/test_sbml.py,sha256
|
119
|
-
tests/test_sbml_dfs_core.py,sha256=
|
120
|
-
tests/test_sbml_dfs_utils.py,sha256=
|
118
|
+
tests/test_sbml.py,sha256=f25zj1NogYrmLluvBDboLameTuCiQ309433Qn3iPvhg,1483
|
119
|
+
tests/test_sbml_dfs_core.py,sha256=tFaLMMuVjTLuhL-wimvcBbodEp59dhyHvXZ-IlUGGeU,19222
|
120
|
+
tests/test_sbml_dfs_utils.py,sha256=5lNzZ1NLOnFb_sZ0YWTgLzXy28yGNCtS_H8Q-W-T6Bw,2022
|
121
121
|
tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
|
122
122
|
tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
|
123
123
|
tests/test_set_coverage.py,sha256=J-6m6LuOjcQa9pxRuWglSfJk4Ltm7kt_eOrn_Q-7P6Q,1604
|
@@ -126,8 +126,8 @@ tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRa
|
|
126
126
|
tests/test_utils.py,sha256=JRJFmjDNZpjG59a-73JkTyGqa_a7Z8d0fE2cZt0CRII,22580
|
127
127
|
tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
|
128
128
|
tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
129
|
-
napistu-0.3.
|
130
|
-
napistu-0.3.
|
131
|
-
napistu-0.3.
|
132
|
-
napistu-0.3.
|
133
|
-
napistu-0.3.
|
129
|
+
napistu-0.3.4.dist-info/METADATA,sha256=u0M0PcjZJds7ds-cV2MYar8maae9_ld_9FO--0G3xwA,3414
|
130
|
+
napistu-0.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
131
|
+
napistu-0.3.4.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
|
132
|
+
napistu-0.3.4.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
|
133
|
+
napistu-0.3.4.dist-info/RECORD,,
|
tests/test_network_precompute.py
CHANGED
@@ -179,8 +179,11 @@ def test_precomputed_distances_neighborhoods():
|
|
179
179
|
right_on=join_key,
|
180
180
|
how="outer",
|
181
181
|
)
|
182
|
-
.fillna(False)
|
183
182
|
)
|
183
|
+
for col in ["in_precompute", "in_otf"]:
|
184
|
+
neighbor_comparison[col] = (
|
185
|
+
neighbor_comparison[col].astype("boolean").fillna(False)
|
186
|
+
)
|
184
187
|
comparison_l.append(neighbor_comparison.assign(focal_sc_id=key))
|
185
188
|
|
186
189
|
comparison_df = pd.concat(comparison_l)
|
tests/test_sbml.py
CHANGED
@@ -1,17 +1,48 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import pandas as pd
|
4
3
|
from napistu import sbml_dfs_core
|
5
4
|
from napistu.ingestion import sbml
|
5
|
+
import pytest
|
6
|
+
from pydantic import ValidationError
|
6
7
|
|
7
8
|
|
8
9
|
def test_sbml_dfs(sbml_path):
|
9
10
|
sbml_model = sbml.SBML(sbml_path)
|
10
|
-
_ = sbml_model
|
11
|
+
_ = sbml_dfs_core.SBML_dfs(sbml_model)
|
11
12
|
|
12
|
-
dfs = sbml_dfs_core.SBML_dfs(sbml_model)
|
13
|
-
dfs.validate()
|
14
13
|
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
def test_compartment_aliases_validation_positive():
|
15
|
+
"""
|
16
|
+
Tests that a valid compartment aliases dictionary passes validation.
|
17
|
+
"""
|
18
|
+
valid_aliases = {
|
19
|
+
"extracellular": ["ECM", "extracellular space"],
|
20
|
+
"cytosol": ["cytoplasm"],
|
21
|
+
}
|
22
|
+
# This should not raise an exception
|
23
|
+
sbml.CompartmentAliasesValidator.model_validate(valid_aliases)
|
24
|
+
|
25
|
+
|
26
|
+
def test_compartment_aliases_validation_negative():
|
27
|
+
"""
|
28
|
+
Tests that an invalid compartment aliases dictionary raises a ValidationError.
|
29
|
+
"""
|
30
|
+
invalid_aliases = {
|
31
|
+
"extracellular": ["ECM"],
|
32
|
+
"not_a_real_compartment": ["fake"],
|
33
|
+
}
|
34
|
+
with pytest.raises(ValidationError):
|
35
|
+
sbml.CompartmentAliasesValidator.model_validate(invalid_aliases)
|
36
|
+
|
37
|
+
|
38
|
+
def test_compartment_aliases_validation_bad_type():
|
39
|
+
"""
|
40
|
+
Tests that a validation error is raised for incorrect data types.
|
41
|
+
"""
|
42
|
+
# Test with a non-dict input
|
43
|
+
with pytest.raises(ValidationError):
|
44
|
+
sbml.CompartmentAliasesValidator.model_validate(["extracellular"])
|
45
|
+
|
46
|
+
# Test with incorrect value types in the dictionary
|
47
|
+
with pytest.raises(ValidationError):
|
48
|
+
sbml.CompartmentAliasesValidator.model_validate({"extracellular": "ECM"})
|
tests/test_sbml_dfs_core.py
CHANGED
@@ -10,7 +10,14 @@ from napistu.ingestion import sbml
|
|
10
10
|
from napistu.modify import pathwayannot
|
11
11
|
|
12
12
|
from napistu import identifiers as napistu_identifiers
|
13
|
-
from napistu.constants import
|
13
|
+
from napistu.constants import (
|
14
|
+
SBML_DFS,
|
15
|
+
SBOTERM_NAMES,
|
16
|
+
BQB_DEFINING_ATTRS,
|
17
|
+
BQB_DEFINING_ATTRS_LOOSE,
|
18
|
+
BQB,
|
19
|
+
IDENTIFIERS,
|
20
|
+
)
|
14
21
|
from napistu.sbml_dfs_core import SBML_dfs
|
15
22
|
|
16
23
|
|
@@ -493,3 +500,84 @@ def test_remove_entity_data_nonexistent(sbml_dfs_w_data, caplog):
|
|
493
500
|
|
494
501
|
# Validate the model is still valid
|
495
502
|
sbml_dfs_w_data.validate()
|
503
|
+
|
504
|
+
|
505
|
+
def test_filter_to_characteristic_species_ids():
|
506
|
+
|
507
|
+
species_ids_dict = {
|
508
|
+
SBML_DFS.S_ID: ["large_complex"] * 6
|
509
|
+
+ ["small_complex"] * 2
|
510
|
+
+ ["proteinA", "proteinB"]
|
511
|
+
+ ["proteinC"] * 3
|
512
|
+
+ [
|
513
|
+
"promiscuous_complexA",
|
514
|
+
"promiscuous_complexB",
|
515
|
+
"promiscuous_complexC",
|
516
|
+
"promiscuous_complexD",
|
517
|
+
"promiscuous_complexE",
|
518
|
+
],
|
519
|
+
IDENTIFIERS.ONTOLOGY: ["complexportal"]
|
520
|
+
+ ["HGNC"] * 7
|
521
|
+
+ ["GO"] * 2
|
522
|
+
+ ["ENSG", "ENSP", "pubmed"]
|
523
|
+
+ ["HGNC"] * 5,
|
524
|
+
IDENTIFIERS.IDENTIFIER: [
|
525
|
+
"CPX-BIG",
|
526
|
+
"mem1",
|
527
|
+
"mem2",
|
528
|
+
"mem3",
|
529
|
+
"mem4",
|
530
|
+
"mem5",
|
531
|
+
"part1",
|
532
|
+
"part2",
|
533
|
+
"GO:1",
|
534
|
+
"GO:2",
|
535
|
+
"dna_seq",
|
536
|
+
"protein_seq",
|
537
|
+
"my_cool_pub",
|
538
|
+
]
|
539
|
+
+ ["promiscuous_complex"] * 5,
|
540
|
+
IDENTIFIERS.BQB: [BQB.IS]
|
541
|
+
+ [BQB.HAS_PART] * 7
|
542
|
+
+ [BQB.IS] * 2
|
543
|
+
+ [
|
544
|
+
# these are retained if BQB_DEFINING_ATTRS_LOOSE is used
|
545
|
+
BQB.ENCODES,
|
546
|
+
BQB.IS_ENCODED_BY,
|
547
|
+
# this should always be removed
|
548
|
+
BQB.IS_DESCRIBED_BY,
|
549
|
+
]
|
550
|
+
+ [BQB.HAS_PART] * 5,
|
551
|
+
}
|
552
|
+
|
553
|
+
species_ids = pd.DataFrame(species_ids_dict)
|
554
|
+
|
555
|
+
characteristic_ids_narrow = sbml_dfs_core.filter_to_characteristic_species_ids(
|
556
|
+
species_ids,
|
557
|
+
defining_biological_qualifiers=BQB_DEFINING_ATTRS,
|
558
|
+
max_complex_size=4,
|
559
|
+
max_promiscuity=4,
|
560
|
+
)
|
561
|
+
|
562
|
+
EXPECTED_IDS = ["CPX-BIG", "GO:1", "GO:2", "part1", "part2"]
|
563
|
+
assert characteristic_ids_narrow[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
|
564
|
+
|
565
|
+
characteristic_ids_loose = sbml_dfs_core.filter_to_characteristic_species_ids(
|
566
|
+
species_ids,
|
567
|
+
# include encodes and is_encoded_by as equivalent to is
|
568
|
+
defining_biological_qualifiers=BQB_DEFINING_ATTRS_LOOSE,
|
569
|
+
max_complex_size=4,
|
570
|
+
# expand promiscuity to default value
|
571
|
+
max_promiscuity=20,
|
572
|
+
)
|
573
|
+
|
574
|
+
EXPECTED_IDS = [
|
575
|
+
"CPX-BIG",
|
576
|
+
"GO:1",
|
577
|
+
"GO:2",
|
578
|
+
"dna_seq",
|
579
|
+
"protein_seq",
|
580
|
+
"part1",
|
581
|
+
"part2",
|
582
|
+
] + ["promiscuous_complex"] * 5
|
583
|
+
assert characteristic_ids_loose[IDENTIFIERS.IDENTIFIER].tolist() == EXPECTED_IDS
|
tests/test_sbml_dfs_utils.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import pandas as pd
|
4
|
+
|
3
5
|
from napistu import sbml_dfs_utils
|
6
|
+
from napistu.constants import BQB, BQB_DEFINING_ATTRS, BQB_DEFINING_ATTRS_LOOSE
|
4
7
|
|
5
8
|
|
6
9
|
def test_id_formatter():
|
@@ -14,9 +17,47 @@ def test_id_formatter():
|
|
14
17
|
assert list(input_vals) == inv_ids
|
15
18
|
|
16
19
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
def test_get_characteristic_species_ids():
|
21
|
+
"""
|
22
|
+
Test get_characteristic_species_ids function with both dogmatic and non-dogmatic cases.
|
23
|
+
"""
|
24
|
+
# Create mock species identifiers data
|
25
|
+
mock_species_ids = pd.DataFrame(
|
26
|
+
{
|
27
|
+
"s_id": ["s1", "s2", "s3", "s4", "s5"],
|
28
|
+
"identifier": ["P12345", "CHEBI:15377", "GO:12345", "P67890", "P67890"],
|
29
|
+
"ontology": ["uniprot", "chebi", "go", "uniprot", "chebi"],
|
30
|
+
"bqb": [
|
31
|
+
"BQB_IS",
|
32
|
+
"BQB_IS",
|
33
|
+
"BQB_HAS_PART",
|
34
|
+
"BQB_HAS_VERSION",
|
35
|
+
"BQB_ENCODES",
|
36
|
+
],
|
37
|
+
}
|
38
|
+
)
|
39
|
+
|
40
|
+
# Create mock SBML_dfs object
|
41
|
+
class MockSBML_dfs:
|
42
|
+
def get_identifiers(self, entity_type):
|
43
|
+
return mock_species_ids
|
44
|
+
|
45
|
+
mock_sbml = MockSBML_dfs()
|
46
|
+
|
47
|
+
# Test dogmatic case (default)
|
48
|
+
expected_bqbs = BQB_DEFINING_ATTRS + [BQB.HAS_PART] # noqa: F841
|
49
|
+
dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(mock_sbml)
|
50
|
+
expected_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
51
|
+
|
52
|
+
pd.testing.assert_frame_equal(dogmatic_result, expected_dogmatic, check_like=True)
|
53
|
+
|
54
|
+
# Test non-dogmatic case
|
55
|
+
expected_bqbs = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART] # noqa: F841
|
56
|
+
non_dogmatic_result = sbml_dfs_utils.get_characteristic_species_ids(
|
57
|
+
mock_sbml, dogmatic=False
|
58
|
+
)
|
59
|
+
expected_non_dogmatic = mock_species_ids.query("bqb in @expected_bqbs")
|
60
|
+
|
61
|
+
pd.testing.assert_frame_equal(
|
62
|
+
non_dogmatic_result, expected_non_dogmatic, check_like=True
|
63
|
+
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|