napistu 0.4.1__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {napistu-0.4.1/src/napistu.egg-info → napistu-0.4.3}/PKG-INFO +1 -1
- {napistu-0.4.1 → napistu-0.4.3}/setup.cfg +1 -1
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/consensus.py +3 -4
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/gcs/constants.py +5 -5
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/constants.py +51 -0
- napistu-0.4.3/src/napistu/ingestion/reactom_fi.py +208 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/network/constants.py +23 -1
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/network/ig_utils.py +161 -1
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/network/net_create.py +3 -3
- napistu-0.4.3/src/napistu/network/net_propagation.py +696 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/network/ng_utils.py +26 -6
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/network/precompute.py +56 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/sbml_dfs_utils.py +8 -2
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/source.py +243 -40
- napistu-0.4.3/src/napistu/statistics/__init__.py +10 -0
- napistu-0.4.3/src/napistu/statistics/hypothesis_testing.py +66 -0
- napistu-0.4.3/src/napistu/statistics/quantiles.py +82 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/utils.py +23 -1
- {napistu-0.4.1 → napistu-0.4.3/src/napistu.egg-info}/PKG-INFO +1 -1
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu.egg-info/SOURCES.txt +6 -1
- napistu-0.4.3/src/tests/test_network_ig_utils.py +192 -0
- napistu-0.4.3/src/tests/test_network_net_propagation.py +380 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_network_precompute.py +30 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_sbml_dfs_utils.py +13 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_source.py +38 -6
- napistu-0.4.3/src/tests/test_statistics_hypothesis_testing.py +62 -0
- napistu-0.4.3/src/tests/test_statistics_quantiles.py +133 -0
- napistu-0.4.1/src/napistu/network/net_propagation.py +0 -146
- napistu-0.4.1/src/tests/test_network_ig_utils.py +0 -59
- napistu-0.4.1/src/tests/test_network_net_propagation.py +0 -89
- napistu-0.4.1/src/tests/test_set_coverage.py +0 -50
- {napistu-0.4.1 → napistu-0.4.3}/LICENSE +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/README.md +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/pyproject.toml +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/setup.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/__main__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/constants.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/context/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/context/discretize.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/context/filtering.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/gcs/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/gcs/downloads.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/gcs/utils.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/identifiers.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/indices.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/bigg.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/gtex.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/hpa.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/identifiers_etl.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/napistu_edgelist.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/obo.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/psi_mi.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/reactome.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/sbml.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/string.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/trrust.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ingestion/yeast.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/matching/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/matching/constants.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/matching/interactions.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/matching/mount.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/matching/species.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/__main__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/client.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/codebase.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/codebase_utils.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/component_base.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/config.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/constants.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/documentation.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/documentation_utils.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/execution.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/health.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/profiles.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/server.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/tutorials.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/tutorials_utils.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/mcp/utils.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/modify/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/modify/constants.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/modify/curation.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/modify/gaps.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/modify/pathwayannot.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/modify/uncompartmentalize.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/network/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/network/data_handling.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/network/neighborhoods.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/network/net_create_utils.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/network/ng_core.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/network/paths.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ontologies/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ontologies/constants.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ontologies/dogma.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ontologies/genodexito.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ontologies/id_tables.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ontologies/mygene.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/ontologies/renaming.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/rpy2/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/rpy2/callr.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/rpy2/constants.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/rpy2/rids.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/sbml_dfs_core.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/scverse/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/scverse/constants.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu/scverse/loading.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu.egg-info/dependency_links.txt +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu.egg-info/entry_points.txt +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu.egg-info/requires.txt +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/napistu.egg-info/top_level.txt +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/conftest.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_consensus.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_constants.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_context_discretize.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_context_filtering.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_curation.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_data/__init__.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_gaps.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_gcs.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_identifiers.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_indices.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_ingestion_napistu_edgelist.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_ingestion_obo.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_matching_interactions.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_matching_mount.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_matching_species.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_mcp_config.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_mcp_documentation_utils.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_mcp_server.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_network_data_handling.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_network_neighborhoods.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_network_net_create.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_network_net_create_utils.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_network_ng_core.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_network_ng_utils.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_network_paths.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_ontologies_genodexito.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_ontologies_id_tables.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_ontologies_mygene.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_ontologies_renaming.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_pathwayannot.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_rpy2_callr.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_rpy2_init.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_sbml.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_sbml_dfs_core.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_sbo.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_scverse_loading.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_uncompartmentalize.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/test_utils.py +0 -0
- {napistu-0.4.1 → napistu-0.4.3}/src/tests/utils.py +0 -0
@@ -426,7 +426,7 @@ def post_consensus_species_ontology_check(sbml_dfs: sbml_dfs_core.SBML_dfs) -> s
|
|
426
426
|
|
427
427
|
# get the sources of species in the consensus model
|
428
428
|
consensus_sbmldf_tbl_var_sc = (
|
429
|
-
source.unnest_sources(sbml_dfs.species,
|
429
|
+
source.unnest_sources(sbml_dfs.species, verbose=False)
|
430
430
|
.reset_index()
|
431
431
|
.sort_values([SOURCE_SPEC.NAME])
|
432
432
|
)
|
@@ -504,12 +504,11 @@ def post_consensus_source_check(
|
|
504
504
|
) -> pd.DataFrame:
|
505
505
|
"""Provide sources of tables in a consensus model; the output df will be used to determine whether models are merged."""
|
506
506
|
|
507
|
-
|
508
|
-
table_pk = sbml_dfs.schema[table_name]["pk"]
|
507
|
+
table_pk = sbml_dfs.schema[table_name][SCHEMA_DEFS.PK]
|
509
508
|
|
510
509
|
sbml_dfs_tbl = getattr(sbml_dfs, table_name)
|
511
510
|
sbml_dfs_tbl_pathway_source = (
|
512
|
-
source.unnest_sources(sbml_dfs_tbl,
|
511
|
+
source.unnest_sources(sbml_dfs_tbl, verbose=False)
|
513
512
|
.reset_index()
|
514
513
|
.sort_values(["name"])
|
515
514
|
)
|
@@ -7,7 +7,7 @@ GCS_SUBASSET_NAMES = SimpleNamespace(
|
|
7
7
|
SBML_DFS="sbml_dfs",
|
8
8
|
NAPISTU_GRAPH="napistu_graph",
|
9
9
|
SPECIES_IDENTIFIERS="species_identifiers",
|
10
|
-
|
10
|
+
PRECOMPUTED_DISTANCES="precomputed_distances",
|
11
11
|
)
|
12
12
|
|
13
13
|
|
@@ -15,7 +15,7 @@ GCS_FILETYPES = SimpleNamespace(
|
|
15
15
|
SBML_DFS="sbml_dfs.pkl",
|
16
16
|
NAPISTU_GRAPH="napistu_graph.pkl",
|
17
17
|
SPECIES_IDENTIFIERS="species_identifiers.tsv",
|
18
|
-
|
18
|
+
PRECOMPUTED_DISTANCES="precomputed_distances.parquet",
|
19
19
|
)
|
20
20
|
|
21
21
|
|
@@ -29,7 +29,7 @@ GCS_ASSETS = SimpleNamespace(
|
|
29
29
|
GCS_SUBASSET_NAMES.SBML_DFS: GCS_FILETYPES.SBML_DFS,
|
30
30
|
GCS_SUBASSET_NAMES.NAPISTU_GRAPH: GCS_FILETYPES.NAPISTU_GRAPH,
|
31
31
|
GCS_SUBASSET_NAMES.SPECIES_IDENTIFIERS: GCS_FILETYPES.SPECIES_IDENTIFIERS,
|
32
|
-
GCS_SUBASSET_NAMES.
|
32
|
+
GCS_SUBASSET_NAMES.PRECOMPUTED_DISTANCES: GCS_FILETYPES.PRECOMPUTED_DISTANCES,
|
33
33
|
},
|
34
34
|
"public_url": "https://storage.googleapis.com/shackett-napistu-public/test_pathway.tar.gz",
|
35
35
|
},
|
@@ -48,9 +48,9 @@ GCS_ASSETS = SimpleNamespace(
|
|
48
48
|
GCS_SUBASSET_NAMES.SBML_DFS: GCS_FILETYPES.SBML_DFS,
|
49
49
|
GCS_SUBASSET_NAMES.NAPISTU_GRAPH: GCS_FILETYPES.NAPISTU_GRAPH,
|
50
50
|
GCS_SUBASSET_NAMES.SPECIES_IDENTIFIERS: GCS_FILETYPES.SPECIES_IDENTIFIERS,
|
51
|
-
GCS_SUBASSET_NAMES.
|
51
|
+
GCS_SUBASSET_NAMES.PRECOMPUTED_DISTANCES: GCS_FILETYPES.PRECOMPUTED_DISTANCES,
|
52
52
|
},
|
53
|
-
"public_url": "https://storage.googleapis.com/
|
53
|
+
"public_url": "https://storage.googleapis.com/shackett-napistu-public/human_consensus_w_distances.tar.gz",
|
54
54
|
},
|
55
55
|
"reactome_members": {
|
56
56
|
"file": "external_pathways/external_pathways_reactome_neo4j_members.csv",
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
|
4
4
|
from types import SimpleNamespace
|
5
5
|
|
6
|
+
from napistu.constants import SBOTERM_NAMES
|
6
7
|
|
7
8
|
SPECIES_FULL_NAME_HUMAN = "Homo sapiens"
|
8
9
|
SPECIES_FULL_NAME_MOUSE = "Mus musculus"
|
@@ -90,6 +91,56 @@ REACTOME_PATHWAYS_URL = "https://reactome.org/download/current/ReactomePathways.
|
|
90
91
|
REACTOME_PATHWAY_INDEX_COLUMNS = ["file", "source", "species", "pathway_id", "name"]
|
91
92
|
REACTOME_PATHWAY_LIST_COLUMNS = ["pathway_id", "name", "species"]
|
92
93
|
|
94
|
+
# REACTOME FI
|
95
|
+
REACTOME_FI_URL = "http://cpws.reactome.org/caBigR3WebApp2025/FIsInGene_04142025_with_annotations.txt.zip"
|
96
|
+
|
97
|
+
REACTOME_FI = SimpleNamespace(
|
98
|
+
GENE1="Gene1",
|
99
|
+
GENE2="Gene2",
|
100
|
+
ANNOTATION="Annotation",
|
101
|
+
DIRECTION="Direction",
|
102
|
+
SCORE="Score",
|
103
|
+
)
|
104
|
+
|
105
|
+
REACTOME_FI_DIRECTIONS = SimpleNamespace(
|
106
|
+
UNDIRECTED="-",
|
107
|
+
STIMULATED_BY="<-",
|
108
|
+
STIMULATES="->",
|
109
|
+
STIMULATES_AND_STIMULATED_BY="<->",
|
110
|
+
INHIBITED_BY="|-",
|
111
|
+
INHIBITS="-|",
|
112
|
+
INHIBITS_AND_INHIBITED_BY="|-|",
|
113
|
+
STIMULATES_AND_INHIBITED_BY="|->",
|
114
|
+
INHIBITS_AND_STIMULATED_BY="<-|",
|
115
|
+
)
|
116
|
+
|
117
|
+
VALID_REACTOME_FI_DIRECTIONS = REACTOME_FI_DIRECTIONS.__dict__.values()
|
118
|
+
|
119
|
+
REACTOME_FI_RULES_REVERSE = SimpleNamespace(
|
120
|
+
NAME_RULES={"catalyzed by": SBOTERM_NAMES.CATALYST},
|
121
|
+
DIRECTION_RULES={
|
122
|
+
REACTOME_FI_DIRECTIONS.STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
|
123
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
|
124
|
+
REACTOME_FI_DIRECTIONS.INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
125
|
+
REACTOME_FI_DIRECTIONS.INHIBITS_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
126
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
127
|
+
REACTOME_FI_DIRECTIONS.UNDIRECTED: SBOTERM_NAMES.INTERACTOR,
|
128
|
+
},
|
129
|
+
)
|
130
|
+
|
131
|
+
REACTOME_FI_RULES_FORWARD = SimpleNamespace(
|
132
|
+
NAME_RULES={"catalyze(;$)": SBOTERM_NAMES.CATALYST},
|
133
|
+
DIRECTION_RULES={
|
134
|
+
REACTOME_FI_DIRECTIONS.STIMULATES: SBOTERM_NAMES.STIMULATOR,
|
135
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
|
136
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_INHIBITED_BY: SBOTERM_NAMES.STIMULATOR,
|
137
|
+
REACTOME_FI_DIRECTIONS.INHIBITS: SBOTERM_NAMES.INHIBITOR,
|
138
|
+
REACTOME_FI_DIRECTIONS.INHIBITS_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
139
|
+
REACTOME_FI_DIRECTIONS.INHIBITS_AND_STIMULATED_BY: SBOTERM_NAMES.INHIBITOR,
|
140
|
+
REACTOME_FI_DIRECTIONS.UNDIRECTED: SBOTERM_NAMES.INTERACTOR,
|
141
|
+
},
|
142
|
+
)
|
143
|
+
|
93
144
|
# SBML
|
94
145
|
SBML_DEFS = SimpleNamespace(
|
95
146
|
ERROR_NUMBER="error_number",
|
@@ -0,0 +1,208 @@
|
|
1
|
+
import logging
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
from napistu.identifiers import Identifiers
|
5
|
+
from napistu import utils
|
6
|
+
from napistu.ingestion.constants import (
|
7
|
+
REACTOME_FI,
|
8
|
+
REACTOME_FI_RULES_FORWARD,
|
9
|
+
REACTOME_FI_RULES_REVERSE,
|
10
|
+
REACTOME_FI_URL,
|
11
|
+
VALID_REACTOME_FI_DIRECTIONS,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
def download_reactome_fi(target_uri: str, url: str = REACTOME_FI_URL) -> None:
|
19
|
+
"""
|
20
|
+
Download the Reactome Functional Interactions (FI) dataset as a TSV file.
|
21
|
+
|
22
|
+
Parameters
|
23
|
+
----------
|
24
|
+
target_uri : str
|
25
|
+
The URI where the Reactome FI data should be saved. Should end with .tsv
|
26
|
+
url : str, optional
|
27
|
+
URL to download the zipped Reactome functional interactions TSV from.
|
28
|
+
Defaults to REACTOME_FI_URL.
|
29
|
+
|
30
|
+
Returns
|
31
|
+
-------
|
32
|
+
None
|
33
|
+
|
34
|
+
Raises
|
35
|
+
------
|
36
|
+
ValueError
|
37
|
+
If target_uri does not end with .tsv
|
38
|
+
"""
|
39
|
+
|
40
|
+
if not target_uri.endswith(".tsv"):
|
41
|
+
raise ValueError(f"Target URI must end with .tsv, got {target_uri}")
|
42
|
+
|
43
|
+
file_ext = url.split(".")[-1]
|
44
|
+
target_filename = url.split("/")[-1].split(f".{file_ext}")[0]
|
45
|
+
logger.info("Start downloading proteinatlas %s to %s", url, target_uri)
|
46
|
+
# target_filename is the name of the file in the zip file which will be renamed to target_uri
|
47
|
+
utils.download_wget(url, target_uri, target_filename=target_filename)
|
48
|
+
|
49
|
+
return None
|
50
|
+
|
51
|
+
|
52
|
+
def format_reactome_fi_edgelist(interactions: pd.DataFrame):
|
53
|
+
"""
|
54
|
+
Format the Reactome FI interactions DataFrame as an edgelist for network analysis.
|
55
|
+
|
56
|
+
Parameters
|
57
|
+
----------
|
58
|
+
interactions : pd.DataFrame
|
59
|
+
DataFrame containing Reactome FI interactions.
|
60
|
+
|
61
|
+
Returns
|
62
|
+
-------
|
63
|
+
Dictonary of:
|
64
|
+
|
65
|
+
interaction_edgelist : pd.DataFrame
|
66
|
+
Table containing molecular interactions with columns:
|
67
|
+
- upstream_name : str, matches "s_name" from species_df
|
68
|
+
- downstream_name : str, matches "s_name" from species_df
|
69
|
+
- upstream_compartment : str, matches "c_name" from compartments_df
|
70
|
+
- downstream_compartment : str, matches "c_name" from compartments_df
|
71
|
+
- r_name : str, name for the interaction
|
72
|
+
- sbo_term : str, SBO term defining interaction type
|
73
|
+
- r_Identifiers : identifiers.Identifiers, supporting identifiers
|
74
|
+
- r_isreversible : bool, whether reaction is reversible
|
75
|
+
species_df : pd.DataFrame
|
76
|
+
Table defining molecular species with columns:
|
77
|
+
- s_name : str, name of molecular species
|
78
|
+
- s_Identifiers : identifiers.Identifiers, species identifiers
|
79
|
+
compartments_df : pd.DataFrame
|
80
|
+
Table defining compartments with columns:
|
81
|
+
- c_name : str, name of compartment
|
82
|
+
- c_Identifiers : identifiers.Identifiers, compartment identifiers
|
83
|
+
|
84
|
+
Notes
|
85
|
+
-----
|
86
|
+
This function is not yet implemented and will raise NotImplementedError.
|
87
|
+
"""
|
88
|
+
|
89
|
+
raise NotImplementedError("TO DO - This function is incomplete")
|
90
|
+
|
91
|
+
formatted_annotations = _parse_reactome_fi_annotations(interactions)
|
92
|
+
|
93
|
+
# this join will expand some rows to 2 since the bidirectional relationships are captured as separate edges in Napistu
|
94
|
+
annotated_interactions = interactions.merge(
|
95
|
+
formatted_annotations,
|
96
|
+
on=[REACTOME_FI.ANNOTATION, REACTOME_FI.DIRECTION],
|
97
|
+
how="left",
|
98
|
+
)
|
99
|
+
|
100
|
+
# flip reverse entries so all relationships are forward or undirected
|
101
|
+
formatted_interactions = (
|
102
|
+
pd.concat(
|
103
|
+
[
|
104
|
+
annotated_interactions.query("polarity == 'forward'"),
|
105
|
+
(
|
106
|
+
annotated_interactions.query("polarity == 'reverse'").rename(
|
107
|
+
columns={
|
108
|
+
REACTOME_FI.GENE1: REACTOME_FI.GENE2,
|
109
|
+
REACTOME_FI.GENE2: REACTOME_FI.GENE1,
|
110
|
+
}
|
111
|
+
)
|
112
|
+
),
|
113
|
+
]
|
114
|
+
)[[REACTOME_FI.GENE1, REACTOME_FI.GENE2, "sbo_term_name", "Score"]]
|
115
|
+
# looks like they were already unique edges
|
116
|
+
.sort_values("Score", ascending=False)
|
117
|
+
.groupby([REACTOME_FI.GENE1, REACTOME_FI.GENE2])
|
118
|
+
.first()
|
119
|
+
)
|
120
|
+
|
121
|
+
fi_edgelist = (
|
122
|
+
formatted_interactions.reset_index()
|
123
|
+
.rename(
|
124
|
+
columns={
|
125
|
+
REACTOME_FI.GENE1: "upstream_name",
|
126
|
+
REACTOME_FI.GENE2: "downstream_name",
|
127
|
+
}
|
128
|
+
)
|
129
|
+
.assign(r_Identifiers=Identifiers([]))
|
130
|
+
)
|
131
|
+
|
132
|
+
return fi_edgelist
|
133
|
+
|
134
|
+
|
135
|
+
def _parse_reactome_fi_annotations(interactions: pd.DataFrame) -> pd.DataFrame:
|
136
|
+
"""
|
137
|
+
Parse and annotate Reactome FI interaction types and directions using regex-based rules.
|
138
|
+
|
139
|
+
Parameters
|
140
|
+
----------
|
141
|
+
interactions : pd.DataFrame
|
142
|
+
DataFrame containing Reactome FI interactions, with annotation and direction columns.
|
143
|
+
|
144
|
+
Returns
|
145
|
+
-------
|
146
|
+
pd.DataFrame
|
147
|
+
DataFrame with annotation, direction, SBO term name, and polarity for each unique annotation/direction pair.
|
148
|
+
|
149
|
+
Raises
|
150
|
+
------
|
151
|
+
ValueError
|
152
|
+
If an annotation/direction pair cannot be matched to a rule or if invalid directions are found.
|
153
|
+
"""
|
154
|
+
|
155
|
+
distinct_annotations = (
|
156
|
+
interactions[[REACTOME_FI.ANNOTATION, REACTOME_FI.DIRECTION]]
|
157
|
+
.drop_duplicates()
|
158
|
+
.reset_index(drop=True)
|
159
|
+
)
|
160
|
+
invalid_directions = distinct_annotations.loc[
|
161
|
+
~distinct_annotations[REACTOME_FI.DIRECTION].isin(VALID_REACTOME_FI_DIRECTIONS),
|
162
|
+
"Direction",
|
163
|
+
]
|
164
|
+
if len(invalid_directions) > 0:
|
165
|
+
raise ValueError(f"Invalid directions: {invalid_directions}")
|
166
|
+
|
167
|
+
annotations = list()
|
168
|
+
for _, vals in distinct_annotations.iterrows():
|
169
|
+
annot, direction = vals
|
170
|
+
|
171
|
+
forward_match = utils.match_regex_dict(
|
172
|
+
annot, REACTOME_FI_RULES_FORWARD.NAME_RULES
|
173
|
+
)
|
174
|
+
if not forward_match:
|
175
|
+
if direction in REACTOME_FI_RULES_FORWARD.DIRECTION_RULES:
|
176
|
+
forward_match = REACTOME_FI_RULES_FORWARD.DIRECTION_RULES[direction]
|
177
|
+
|
178
|
+
reverse_match = utils.match_regex_dict(
|
179
|
+
annot, REACTOME_FI_RULES_REVERSE.NAME_RULES
|
180
|
+
)
|
181
|
+
if not reverse_match:
|
182
|
+
if direction in REACTOME_FI_RULES_REVERSE.DIRECTION_RULES:
|
183
|
+
reverse_match = REACTOME_FI_RULES_REVERSE.DIRECTION_RULES[direction]
|
184
|
+
|
185
|
+
if not (forward_match or reverse_match):
|
186
|
+
raise ValueError(f"No match found for {annot} with direction {direction}")
|
187
|
+
|
188
|
+
if forward_match:
|
189
|
+
annotations.append(
|
190
|
+
{
|
191
|
+
REACTOME_FI.ANNOTATION: annot,
|
192
|
+
REACTOME_FI.DIRECTION: direction,
|
193
|
+
"sbo_term_name": forward_match,
|
194
|
+
"polarity": "forward",
|
195
|
+
}
|
196
|
+
)
|
197
|
+
|
198
|
+
if reverse_match:
|
199
|
+
annotations.append(
|
200
|
+
{
|
201
|
+
REACTOME_FI.ANNOTATION: annot,
|
202
|
+
REACTOME_FI.DIRECTION: direction,
|
203
|
+
"sbo_term_name": reverse_match,
|
204
|
+
"polarity": "reverse",
|
205
|
+
}
|
206
|
+
)
|
207
|
+
|
208
|
+
return pd.DataFrame(annotations)
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
4
4
|
|
5
5
|
from types import SimpleNamespace
|
6
6
|
|
7
|
+
|
7
8
|
from napistu.constants import SBML_DFS
|
8
9
|
from napistu.constants import SBOTERM_NAMES
|
9
10
|
|
@@ -13,7 +14,7 @@ NAPISTU_GRAPH_DIRECTEDNESS = SimpleNamespace(
|
|
13
14
|
DIRECTED="directed", UNDIRECTED="undirected"
|
14
15
|
)
|
15
16
|
|
16
|
-
|
17
|
+
NAPISTU_GRAPH_VERTICES = SimpleNamespace(NAME="name")
|
17
18
|
|
18
19
|
NAPISTU_GRAPH_EDGES = SimpleNamespace(
|
19
20
|
DIRECTED="directed",
|
@@ -198,3 +199,24 @@ SCORE_CALIBRATION_POINTS_DICT = {
|
|
198
199
|
}
|
199
200
|
|
200
201
|
SOURCE_VARS_DICT = {"string_wt": 10}
|
202
|
+
|
203
|
+
# network propagation
|
204
|
+
NET_PROPAGATION_DEFS = SimpleNamespace(PERSONALIZED_PAGERANK="personalized_pagerank")
|
205
|
+
|
206
|
+
# null distributions
|
207
|
+
NULL_STRATEGIES = SimpleNamespace(
|
208
|
+
UNIFORM="uniform",
|
209
|
+
PARAMETRIC="parametric",
|
210
|
+
NODE_PERMUTATION="node_permutation",
|
211
|
+
EDGE_PERMUTATION="edge_permutation",
|
212
|
+
)
|
213
|
+
|
214
|
+
VALID_NULL_STRATEGIES = NULL_STRATEGIES.__dict__.values()
|
215
|
+
|
216
|
+
PARAMETRIC_NULL_DEFAULT_DISTRIBUTION = "norm"
|
217
|
+
|
218
|
+
# masks
|
219
|
+
|
220
|
+
MASK_KEYWORDS = SimpleNamespace(
|
221
|
+
ATTR="attr",
|
222
|
+
)
|
@@ -9,7 +9,7 @@ from __future__ import annotations
|
|
9
9
|
|
10
10
|
import logging
|
11
11
|
import random
|
12
|
-
from typing import Any, Optional, Sequence
|
12
|
+
from typing import Any, Optional, Sequence, List, Dict, Union
|
13
13
|
|
14
14
|
import igraph as ig
|
15
15
|
import numpy as np
|
@@ -384,3 +384,163 @@ def _get_top_n_nodes(
|
|
384
384
|
top_node_attrs = [graph.vs[idx].attributes() for idx in top_idxs]
|
385
385
|
top_vals = [vals[idx] for idx in top_idxs]
|
386
386
|
return [{val_name: val, **node} for val, node in zip(top_vals, top_node_attrs)]
|
387
|
+
|
388
|
+
|
389
|
+
def _parse_mask_input(
|
390
|
+
mask_input: Optional[Union[str, np.ndarray, List, Dict]], attributes: List[str]
|
391
|
+
) -> Dict[str, Union[str, np.ndarray, List, None]]:
|
392
|
+
"""
|
393
|
+
Parse mask input and convert to attribute-specific mask specifications.
|
394
|
+
|
395
|
+
Parameters
|
396
|
+
----------
|
397
|
+
mask_input : str, np.ndarray, List, Dict, or None
|
398
|
+
Mask specification that can be:
|
399
|
+
- None: use all nodes for all attributes
|
400
|
+
- "attr": use each attribute as its own mask
|
401
|
+
- np.ndarray/List: use same mask for all attributes
|
402
|
+
- Dict: attribute-specific mask specifications
|
403
|
+
attributes : List[str]
|
404
|
+
List of attribute names.
|
405
|
+
|
406
|
+
Returns
|
407
|
+
-------
|
408
|
+
Dict[str, Union[str, np.ndarray, List, None]]
|
409
|
+
Dictionary mapping each attribute to its mask specification.
|
410
|
+
"""
|
411
|
+
if mask_input is None:
|
412
|
+
return {attr: None for attr in attributes}
|
413
|
+
elif isinstance(mask_input, str):
|
414
|
+
if mask_input == "attr":
|
415
|
+
return {attr: attr for attr in attributes}
|
416
|
+
else:
|
417
|
+
# Single attribute name used for all
|
418
|
+
return {attr: mask_input for attr in attributes}
|
419
|
+
elif isinstance(mask_input, (np.ndarray, list)):
|
420
|
+
# Same mask for all attributes
|
421
|
+
return {attr: mask_input for attr in attributes}
|
422
|
+
elif isinstance(mask_input, dict):
|
423
|
+
# Validate all attributes are present
|
424
|
+
for attr in attributes:
|
425
|
+
if attr not in mask_input:
|
426
|
+
raise ValueError(f"Attribute '{attr}' not found in mask dictionary")
|
427
|
+
return mask_input
|
428
|
+
else:
|
429
|
+
raise ValueError(f"Invalid mask input type: {type(mask_input)}")
|
430
|
+
|
431
|
+
|
432
|
+
def _get_attribute_masks(
|
433
|
+
graph: ig.Graph,
|
434
|
+
mask_specs: Dict[str, Union[str, np.ndarray, List, None]],
|
435
|
+
) -> Dict[str, np.ndarray]:
|
436
|
+
"""
|
437
|
+
Generate boolean masks for each attribute based on specifications.
|
438
|
+
|
439
|
+
Parameters
|
440
|
+
----------
|
441
|
+
graph : ig.Graph
|
442
|
+
Input graph.
|
443
|
+
attributes : List[str]
|
444
|
+
List of attribute names.
|
445
|
+
mask_specs : Dict[str, Union[str, np.ndarray, List, None]]
|
446
|
+
Dictionary mapping each attribute to its mask specification.
|
447
|
+
|
448
|
+
Returns
|
449
|
+
-------
|
450
|
+
Dict[str, np.ndarray]
|
451
|
+
Dictionary mapping each attribute to its boolean mask array.
|
452
|
+
"""
|
453
|
+
n_nodes = graph.vcount()
|
454
|
+
masks = {}
|
455
|
+
|
456
|
+
invalid_attrs = set(mask_specs.keys()).difference(graph.vs.attributes())
|
457
|
+
if invalid_attrs:
|
458
|
+
raise ValueError(f"Attributes {invalid_attrs} not found in graph")
|
459
|
+
|
460
|
+
for attr in mask_specs.keys():
|
461
|
+
|
462
|
+
mask_spec = mask_specs[attr]
|
463
|
+
|
464
|
+
if mask_spec is None:
|
465
|
+
masks[attr] = np.ones(n_nodes, dtype=bool)
|
466
|
+
elif isinstance(mask_spec, str):
|
467
|
+
attr_values = np.array(graph.vs[mask_spec])
|
468
|
+
masks[attr] = attr_values > 0
|
469
|
+
elif isinstance(mask_spec, np.ndarray):
|
470
|
+
masks[attr] = mask_spec.astype(bool)
|
471
|
+
elif isinstance(mask_spec, list):
|
472
|
+
mask_array = np.zeros(n_nodes, dtype=bool)
|
473
|
+
if isinstance(mask_spec[0], str):
|
474
|
+
# Node names
|
475
|
+
node_names = (
|
476
|
+
graph.vs["name"] if "name" in graph.vs.attributes() else None
|
477
|
+
)
|
478
|
+
if node_names is None:
|
479
|
+
raise ValueError("Graph has no 'name' attribute for string mask")
|
480
|
+
for name in mask_spec:
|
481
|
+
idx = node_names.index(name)
|
482
|
+
mask_array[idx] = True
|
483
|
+
else:
|
484
|
+
# Node indices
|
485
|
+
mask_array[mask_spec] = True
|
486
|
+
masks[attr] = mask_array
|
487
|
+
else:
|
488
|
+
raise ValueError(
|
489
|
+
f"Invalid mask specification for attribute '{attr}': {type(mask_spec)}"
|
490
|
+
)
|
491
|
+
|
492
|
+
return masks
|
493
|
+
|
494
|
+
|
495
|
+
def _ensure_valid_attribute(graph: ig.Graph, attribute: str, non_negative: bool = True):
|
496
|
+
"""
|
497
|
+
Ensure a vertex attribute is present, numeric, and optionally non-negative for all vertices.
|
498
|
+
|
499
|
+
This utility checks that the specified vertex attribute exists, is numeric, and (optionally) non-negative
|
500
|
+
for all vertices in the graph. Missing or None values are treated as 0. Raises ValueError
|
501
|
+
if the attribute is missing for all vertices, if all values are zero, or if any value is negative (if non_negative=True).
|
502
|
+
|
503
|
+
Parameters
|
504
|
+
----------
|
505
|
+
graph : NapistuGraph or ig.Graph
|
506
|
+
The input graph (NapistuGraph or igraph.Graph).
|
507
|
+
attribute : str
|
508
|
+
The name of the vertex attribute to check.
|
509
|
+
non_negative : bool, default True
|
510
|
+
Whether to require all values to be non-negative.
|
511
|
+
|
512
|
+
Returns
|
513
|
+
-------
|
514
|
+
np.ndarray
|
515
|
+
Array of attribute values (with missing/None replaced by 0).
|
516
|
+
|
517
|
+
Raises
|
518
|
+
------
|
519
|
+
ValueError
|
520
|
+
If the attribute is missing for all vertices, all values are zero, or any value is negative (if non_negative=True).
|
521
|
+
"""
|
522
|
+
all_missing = all(
|
523
|
+
(attribute not in v.attributes() or v[attribute] is None) for v in graph.vs
|
524
|
+
)
|
525
|
+
if all_missing:
|
526
|
+
raise ValueError(f"Vertex attribute '{attribute}' is missing for all vertices.")
|
527
|
+
|
528
|
+
values = [
|
529
|
+
(
|
530
|
+
v[attribute]
|
531
|
+
if (attribute in v.attributes() and v[attribute] is not None)
|
532
|
+
else 0.0
|
533
|
+
)
|
534
|
+
for v in graph.vs
|
535
|
+
]
|
536
|
+
|
537
|
+
arr = np.array(values, dtype=float)
|
538
|
+
|
539
|
+
if np.all(arr == 0):
|
540
|
+
raise ValueError(
|
541
|
+
f"Vertex attribute '{attribute}' is zero for all vertices; cannot use as reset vector."
|
542
|
+
)
|
543
|
+
if non_negative and np.any(arr < 0):
|
544
|
+
raise ValueError(f"Attribute '{attribute}' contains negative values.")
|
545
|
+
|
546
|
+
return arr
|
@@ -26,7 +26,7 @@ from napistu.constants import (
|
|
26
26
|
)
|
27
27
|
|
28
28
|
from napistu.network.constants import (
|
29
|
-
|
29
|
+
NAPISTU_GRAPH_VERTICES,
|
30
30
|
NAPISTU_GRAPH_EDGES,
|
31
31
|
NAPISTU_GRAPH_EDGE_DIRECTIONS,
|
32
32
|
NAPISTU_GRAPH_NODE_TYPES,
|
@@ -152,7 +152,7 @@ def create_napistu_graph(
|
|
152
152
|
|
153
153
|
# rename nodes to name since it is treated specially
|
154
154
|
network_nodes_df = pd.concat(network_nodes).rename(
|
155
|
-
columns={"node_id":
|
155
|
+
columns={"node_id": NAPISTU_GRAPH_VERTICES.NAME}
|
156
156
|
)
|
157
157
|
|
158
158
|
logger.info(f"Formatting edges as a {wiring_approach} graph")
|
@@ -234,7 +234,7 @@ def create_napistu_graph(
|
|
234
234
|
vertices=network_nodes_df.to_dict("records"),
|
235
235
|
edges=unique_edges.to_dict("records"),
|
236
236
|
directed=directed,
|
237
|
-
vertex_name_attr=
|
237
|
+
vertex_name_attr=NAPISTU_GRAPH_VERTICES.NAME,
|
238
238
|
edge_foreign_keys=(NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO),
|
239
239
|
)
|
240
240
|
|