napistu 0.4.2__tar.gz → 0.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {napistu-0.4.2/src/napistu.egg-info → napistu-0.4.4}/PKG-INFO +1 -1
- {napistu-0.4.2 → napistu-0.4.4}/setup.cfg +1 -1
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/consensus.py +3 -4
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/constants.py +51 -0
- napistu-0.4.4/src/napistu/ingestion/reactom_fi.py +208 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/neighborhoods.py +28 -7
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/ng_utils.py +26 -6
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/precompute.py +56 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/sbml_dfs_utils.py +8 -2
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/source.py +243 -40
- napistu-0.4.4/src/napistu/statistics/hypothesis_testing.py +66 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/utils.py +23 -1
- {napistu-0.4.2 → napistu-0.4.4/src/napistu.egg-info}/PKG-INFO +1 -1
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu.egg-info/SOURCES.txt +3 -1
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_precompute.py +30 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_sbml_dfs_utils.py +13 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_source.py +38 -6
- napistu-0.4.4/src/tests/test_statistics_hypothesis_testing.py +62 -0
- napistu-0.4.2/src/tests/test_set_coverage.py +0 -50
- {napistu-0.4.2 → napistu-0.4.4}/LICENSE +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/README.md +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/pyproject.toml +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/setup.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/__main__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/constants.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/context/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/context/discretize.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/context/filtering.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/gcs/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/gcs/constants.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/gcs/downloads.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/gcs/utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/identifiers.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/indices.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/bigg.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/gtex.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/hpa.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/identifiers_etl.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/napistu_edgelist.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/obo.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/psi_mi.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/reactome.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/sbml.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/string.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/trrust.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/yeast.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/matching/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/matching/constants.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/matching/interactions.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/matching/mount.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/matching/species.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/__main__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/client.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/codebase.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/codebase_utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/component_base.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/config.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/constants.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/documentation.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/documentation_utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/execution.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/health.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/profiles.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/server.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/tutorials.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/tutorials_utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/constants.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/curation.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/gaps.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/pathwayannot.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/uncompartmentalize.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/constants.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/data_handling.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/ig_utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/net_create.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/net_create_utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/net_propagation.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/ng_core.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/paths.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/constants.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/dogma.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/genodexito.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/id_tables.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/mygene.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/renaming.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/rpy2/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/rpy2/callr.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/rpy2/constants.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/rpy2/rids.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/sbml_dfs_core.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/scverse/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/scverse/constants.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/scverse/loading.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/statistics/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu/statistics/quantiles.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu.egg-info/dependency_links.txt +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu.egg-info/entry_points.txt +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu.egg-info/requires.txt +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/napistu.egg-info/top_level.txt +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/conftest.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_consensus.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_constants.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_context_discretize.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_context_filtering.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_curation.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_data/__init__.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_gaps.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_gcs.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_identifiers.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_indices.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ingestion_napistu_edgelist.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ingestion_obo.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_matching_interactions.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_matching_mount.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_matching_species.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_mcp_config.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_mcp_documentation_utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_mcp_server.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_data_handling.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_ig_utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_neighborhoods.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_net_create.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_net_create_utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_net_propagation.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_ng_core.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_ng_utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_paths.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ontologies_genodexito.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ontologies_id_tables.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ontologies_mygene.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ontologies_renaming.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_pathwayannot.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_rpy2_callr.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_rpy2_init.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_sbml.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_sbml_dfs_core.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_sbo.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_scverse_loading.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_statistics_quantiles.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_uncompartmentalize.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_utils.py +0 -0
- {napistu-0.4.2 → napistu-0.4.4}/src/tests/utils.py +0 -0
@@ -426,7 +426,7 @@ def post_consensus_species_ontology_check(sbml_dfs: sbml_dfs_core.SBML_dfs) -> s
|
|
426
426
|
|
427
427
|
# get the sources of species in the consensus model
|
428
428
|
consensus_sbmldf_tbl_var_sc = (
|
429
|
-
source.unnest_sources(sbml_dfs.species,
|
429
|
+
source.unnest_sources(sbml_dfs.species, verbose=False)
|
430
430
|
.reset_index()
|
431
431
|
.sort_values([SOURCE_SPEC.NAME])
|
432
432
|
)
|
@@ -504,12 +504,11 @@ def post_consensus_source_check(
|
|
504
504
|
) -> pd.DataFrame:
|
505
505
|
"""Provide sources of tables in a consensus model; the output df will be used to determine whether models are merged."""
|
506
506
|
|
507
|
-
|
508
|
-
table_pk = sbml_dfs.schema[table_name]["pk"]
|
507
|
+
table_pk = sbml_dfs.schema[table_name][SCHEMA_DEFS.PK]
|
509
508
|
|
510
509
|
sbml_dfs_tbl = getattr(sbml_dfs, table_name)
|
511
510
|
sbml_dfs_tbl_pathway_source = (
|
512
|
-
source.unnest_sources(sbml_dfs_tbl,
|
511
|
+
source.unnest_sources(sbml_dfs_tbl, verbose=False)
|
513
512
|
.reset_index()
|
514
513
|
.sort_values(["name"])
|
515
514
|
)
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
|
4
4
|
from types import SimpleNamespace
|
5
5
|
|
6
|
+
from napistu.constants import SBOTERM_NAMES
|
6
7
|
|
7
8
|
SPECIES_FULL_NAME_HUMAN = "Homo sapiens"
|
8
9
|
SPECIES_FULL_NAME_MOUSE = "Mus musculus"
|
@@ -90,6 +91,56 @@ REACTOME_PATHWAYS_URL = "https://reactome.org/download/current/ReactomePathways.
|
|
90
91
|
REACTOME_PATHWAY_INDEX_COLUMNS = ["file", "source", "species", "pathway_id", "name"]
|
91
92
|
REACTOME_PATHWAY_LIST_COLUMNS = ["pathway_id", "name", "species"]
|
92
93
|
|
94
|
+
# REACTOME FI
|
95
|
+
REACTOME_FI_URL = "http://cpws.reactome.org/caBigR3WebApp2025/FIsInGene_04142025_with_annotations.txt.zip"
|
96
|
+
|
97
|
+
REACTOME_FI = SimpleNamespace(
|
98
|
+
GENE1="Gene1",
|
99
|
+
GENE2="Gene2",
|
100
|
+
ANNOTATION="Annotation",
|
101
|
+
DIRECTION="Direction",
|
102
|
+
SCORE="Score",
|
103
|
+
)
|
104
|
+
|
105
|
+
REACTOME_FI_DIRECTIONS = SimpleNamespace(
|
106
|
+
UNDIRECTED="-",
|
107
|
+
STIMULATED_BY="<-",
|
108
|
+
STIMULATES="->",
|
109
|
+
STIMULATES_AND_STIMULATED_BY="<->",
|
110
|
+
INHIBITED_BY="|-",
|
111
|
+
INHIBITS="-|",
|
112
|
+
INHIBITS_AND_INHIBITED_BY="|-|",
|
113
|
+
STIMULATES_AND_INHIBITED_BY="|->",
|
114
|
+
INHIBITS_AND_STIMULATED_BY="<-|",
|
115
|
+
)
|
116
|
+
|
117
|
+
VALID_REACTOME_FI_DIRECTIONS = REACTOME_FI_DIRECTIONS.__dict__.values()
|
118
|
+
|
119
|
+
REACTOME_FI_RULES_REVERSE = SimpleNamespace(
|
120
|
+
NAME_RULES={"catalyzed by": SBOTERM_NAMES.CATALYST},
|
121
|
+
DIRECTION_RULES={
|
122
|
+
REACTOME_FI_DIRECTIONS.STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
|
123
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
|
124
|
+
REACTOME_FI_DIRECTIONS.INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
125
|
+
REACTOME_FI_DIRECTIONS.INHIBITS_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
126
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
127
|
+
REACTOME_FI_DIRECTIONS.UNDIRECTED: SBOTERM_NAMES.INTERACTOR,
|
128
|
+
},
|
129
|
+
)
|
130
|
+
|
131
|
+
REACTOME_FI_RULES_FORWARD = SimpleNamespace(
|
132
|
+
NAME_RULES={"catalyze(;$)": SBOTERM_NAMES.CATALYST},
|
133
|
+
DIRECTION_RULES={
|
134
|
+
REACTOME_FI_DIRECTIONS.STIMULATES: SBOTERM_NAMES.STIMULATOR,
|
135
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
|
136
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_INHIBITED_BY: SBOTERM_NAMES.STIMULATOR,
|
137
|
+
REACTOME_FI_DIRECTIONS.INHIBITS: SBOTERM_NAMES.INHIBITOR,
|
138
|
+
REACTOME_FI_DIRECTIONS.INHIBITS_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
139
|
+
REACTOME_FI_DIRECTIONS.INHIBITS_AND_STIMULATED_BY: SBOTERM_NAMES.INHIBITOR,
|
140
|
+
REACTOME_FI_DIRECTIONS.UNDIRECTED: SBOTERM_NAMES.INTERACTOR,
|
141
|
+
},
|
142
|
+
)
|
143
|
+
|
93
144
|
# SBML
|
94
145
|
SBML_DEFS = SimpleNamespace(
|
95
146
|
ERROR_NUMBER="error_number",
|
@@ -0,0 +1,208 @@
|
|
1
|
+
import logging
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
from napistu.identifiers import Identifiers
|
5
|
+
from napistu import utils
|
6
|
+
from napistu.ingestion.constants import (
|
7
|
+
REACTOME_FI,
|
8
|
+
REACTOME_FI_RULES_FORWARD,
|
9
|
+
REACTOME_FI_RULES_REVERSE,
|
10
|
+
REACTOME_FI_URL,
|
11
|
+
VALID_REACTOME_FI_DIRECTIONS,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
def download_reactome_fi(target_uri: str, url: str = REACTOME_FI_URL) -> None:
|
19
|
+
"""
|
20
|
+
Download the Reactome Functional Interactions (FI) dataset as a TSV file.
|
21
|
+
|
22
|
+
Parameters
|
23
|
+
----------
|
24
|
+
target_uri : str
|
25
|
+
The URI where the Reactome FI data should be saved. Should end with .tsv
|
26
|
+
url : str, optional
|
27
|
+
URL to download the zipped Reactome functional interactions TSV from.
|
28
|
+
Defaults to REACTOME_FI_URL.
|
29
|
+
|
30
|
+
Returns
|
31
|
+
-------
|
32
|
+
None
|
33
|
+
|
34
|
+
Raises
|
35
|
+
------
|
36
|
+
ValueError
|
37
|
+
If target_uri does not end with .tsv
|
38
|
+
"""
|
39
|
+
|
40
|
+
if not target_uri.endswith(".tsv"):
|
41
|
+
raise ValueError(f"Target URI must end with .tsv, got {target_uri}")
|
42
|
+
|
43
|
+
file_ext = url.split(".")[-1]
|
44
|
+
target_filename = url.split("/")[-1].split(f".{file_ext}")[0]
|
45
|
+
logger.info("Start downloading proteinatlas %s to %s", url, target_uri)
|
46
|
+
# target_filename is the name of the file in the zip file which will be renamed to target_uri
|
47
|
+
utils.download_wget(url, target_uri, target_filename=target_filename)
|
48
|
+
|
49
|
+
return None
|
50
|
+
|
51
|
+
|
52
|
+
def format_reactome_fi_edgelist(interactions: pd.DataFrame):
|
53
|
+
"""
|
54
|
+
Format the Reactome FI interactions DataFrame as an edgelist for network analysis.
|
55
|
+
|
56
|
+
Parameters
|
57
|
+
----------
|
58
|
+
interactions : pd.DataFrame
|
59
|
+
DataFrame containing Reactome FI interactions.
|
60
|
+
|
61
|
+
Returns
|
62
|
+
-------
|
63
|
+
Dictonary of:
|
64
|
+
|
65
|
+
interaction_edgelist : pd.DataFrame
|
66
|
+
Table containing molecular interactions with columns:
|
67
|
+
- upstream_name : str, matches "s_name" from species_df
|
68
|
+
- downstream_name : str, matches "s_name" from species_df
|
69
|
+
- upstream_compartment : str, matches "c_name" from compartments_df
|
70
|
+
- downstream_compartment : str, matches "c_name" from compartments_df
|
71
|
+
- r_name : str, name for the interaction
|
72
|
+
- sbo_term : str, SBO term defining interaction type
|
73
|
+
- r_Identifiers : identifiers.Identifiers, supporting identifiers
|
74
|
+
- r_isreversible : bool, whether reaction is reversible
|
75
|
+
species_df : pd.DataFrame
|
76
|
+
Table defining molecular species with columns:
|
77
|
+
- s_name : str, name of molecular species
|
78
|
+
- s_Identifiers : identifiers.Identifiers, species identifiers
|
79
|
+
compartments_df : pd.DataFrame
|
80
|
+
Table defining compartments with columns:
|
81
|
+
- c_name : str, name of compartment
|
82
|
+
- c_Identifiers : identifiers.Identifiers, compartment identifiers
|
83
|
+
|
84
|
+
Notes
|
85
|
+
-----
|
86
|
+
This function is not yet implemented and will raise NotImplementedError.
|
87
|
+
"""
|
88
|
+
|
89
|
+
raise NotImplementedError("TO DO - This function is incomplete")
|
90
|
+
|
91
|
+
formatted_annotations = _parse_reactome_fi_annotations(interactions)
|
92
|
+
|
93
|
+
# this join will expand some rows to 2 since the bidirectional relationships are captured as separate edges in Napistu
|
94
|
+
annotated_interactions = interactions.merge(
|
95
|
+
formatted_annotations,
|
96
|
+
on=[REACTOME_FI.ANNOTATION, REACTOME_FI.DIRECTION],
|
97
|
+
how="left",
|
98
|
+
)
|
99
|
+
|
100
|
+
# flip reverse entries so all relationships are forward or undirected
|
101
|
+
formatted_interactions = (
|
102
|
+
pd.concat(
|
103
|
+
[
|
104
|
+
annotated_interactions.query("polarity == 'forward'"),
|
105
|
+
(
|
106
|
+
annotated_interactions.query("polarity == 'reverse'").rename(
|
107
|
+
columns={
|
108
|
+
REACTOME_FI.GENE1: REACTOME_FI.GENE2,
|
109
|
+
REACTOME_FI.GENE2: REACTOME_FI.GENE1,
|
110
|
+
}
|
111
|
+
)
|
112
|
+
),
|
113
|
+
]
|
114
|
+
)[[REACTOME_FI.GENE1, REACTOME_FI.GENE2, "sbo_term_name", "Score"]]
|
115
|
+
# looks like they were already unique edges
|
116
|
+
.sort_values("Score", ascending=False)
|
117
|
+
.groupby([REACTOME_FI.GENE1, REACTOME_FI.GENE2])
|
118
|
+
.first()
|
119
|
+
)
|
120
|
+
|
121
|
+
fi_edgelist = (
|
122
|
+
formatted_interactions.reset_index()
|
123
|
+
.rename(
|
124
|
+
columns={
|
125
|
+
REACTOME_FI.GENE1: "upstream_name",
|
126
|
+
REACTOME_FI.GENE2: "downstream_name",
|
127
|
+
}
|
128
|
+
)
|
129
|
+
.assign(r_Identifiers=Identifiers([]))
|
130
|
+
)
|
131
|
+
|
132
|
+
return fi_edgelist
|
133
|
+
|
134
|
+
|
135
|
+
def _parse_reactome_fi_annotations(interactions: pd.DataFrame) -> pd.DataFrame:
|
136
|
+
"""
|
137
|
+
Parse and annotate Reactome FI interaction types and directions using regex-based rules.
|
138
|
+
|
139
|
+
Parameters
|
140
|
+
----------
|
141
|
+
interactions : pd.DataFrame
|
142
|
+
DataFrame containing Reactome FI interactions, with annotation and direction columns.
|
143
|
+
|
144
|
+
Returns
|
145
|
+
-------
|
146
|
+
pd.DataFrame
|
147
|
+
DataFrame with annotation, direction, SBO term name, and polarity for each unique annotation/direction pair.
|
148
|
+
|
149
|
+
Raises
|
150
|
+
------
|
151
|
+
ValueError
|
152
|
+
If an annotation/direction pair cannot be matched to a rule or if invalid directions are found.
|
153
|
+
"""
|
154
|
+
|
155
|
+
distinct_annotations = (
|
156
|
+
interactions[[REACTOME_FI.ANNOTATION, REACTOME_FI.DIRECTION]]
|
157
|
+
.drop_duplicates()
|
158
|
+
.reset_index(drop=True)
|
159
|
+
)
|
160
|
+
invalid_directions = distinct_annotations.loc[
|
161
|
+
~distinct_annotations[REACTOME_FI.DIRECTION].isin(VALID_REACTOME_FI_DIRECTIONS),
|
162
|
+
"Direction",
|
163
|
+
]
|
164
|
+
if len(invalid_directions) > 0:
|
165
|
+
raise ValueError(f"Invalid directions: {invalid_directions}")
|
166
|
+
|
167
|
+
annotations = list()
|
168
|
+
for _, vals in distinct_annotations.iterrows():
|
169
|
+
annot, direction = vals
|
170
|
+
|
171
|
+
forward_match = utils.match_regex_dict(
|
172
|
+
annot, REACTOME_FI_RULES_FORWARD.NAME_RULES
|
173
|
+
)
|
174
|
+
if not forward_match:
|
175
|
+
if direction in REACTOME_FI_RULES_FORWARD.DIRECTION_RULES:
|
176
|
+
forward_match = REACTOME_FI_RULES_FORWARD.DIRECTION_RULES[direction]
|
177
|
+
|
178
|
+
reverse_match = utils.match_regex_dict(
|
179
|
+
annot, REACTOME_FI_RULES_REVERSE.NAME_RULES
|
180
|
+
)
|
181
|
+
if not reverse_match:
|
182
|
+
if direction in REACTOME_FI_RULES_REVERSE.DIRECTION_RULES:
|
183
|
+
reverse_match = REACTOME_FI_RULES_REVERSE.DIRECTION_RULES[direction]
|
184
|
+
|
185
|
+
if not (forward_match or reverse_match):
|
186
|
+
raise ValueError(f"No match found for {annot} with direction {direction}")
|
187
|
+
|
188
|
+
if forward_match:
|
189
|
+
annotations.append(
|
190
|
+
{
|
191
|
+
REACTOME_FI.ANNOTATION: annot,
|
192
|
+
REACTOME_FI.DIRECTION: direction,
|
193
|
+
"sbo_term_name": forward_match,
|
194
|
+
"polarity": "forward",
|
195
|
+
}
|
196
|
+
)
|
197
|
+
|
198
|
+
if reverse_match:
|
199
|
+
annotations.append(
|
200
|
+
{
|
201
|
+
REACTOME_FI.ANNOTATION: annot,
|
202
|
+
REACTOME_FI.DIRECTION: direction,
|
203
|
+
"sbo_term_name": reverse_match,
|
204
|
+
"polarity": "reverse",
|
205
|
+
}
|
206
|
+
)
|
207
|
+
|
208
|
+
return pd.DataFrame(annotations)
|
@@ -34,6 +34,7 @@ def find_and_prune_neighborhoods(
|
|
34
34
|
napistu_graph: ig.Graph,
|
35
35
|
compartmentalized_species: str | list[str],
|
36
36
|
precomputed_distances: pd.DataFrame | None = None,
|
37
|
+
source_total_counts: pd.Series | None = None,
|
37
38
|
network_type: str = NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM,
|
38
39
|
order: int = 3,
|
39
40
|
verbose: bool = True,
|
@@ -44,7 +45,7 @@ def find_and_prune_neighborhoods(
|
|
44
45
|
|
45
46
|
Wrapper which combines find_neighborhoods() and prune_neighborhoods()
|
46
47
|
|
47
|
-
|
48
|
+
Parameters
|
48
49
|
----------
|
49
50
|
sbml_dfs: sbml_dfs_core.SBML_dfs
|
50
51
|
A mechanistic molecular model
|
@@ -54,6 +55,9 @@ def find_and_prune_neighborhoods(
|
|
54
55
|
Compartmentalized species IDs for neighborhood centers
|
55
56
|
precomputed_distances : pd.DataFrame or None
|
56
57
|
If provided, an edgelist of origin->destination path weights and lengths
|
58
|
+
source_total_counts: pd.Series | None
|
59
|
+
Optional, A series of the total counts of each source. As produced by
|
60
|
+
source.get_source_total_counts()
|
57
61
|
network_type: str
|
58
62
|
If the network is directed should neighbors be located "downstream",
|
59
63
|
or "upstream" of each compartmentalized species. The "hourglass" option
|
@@ -109,6 +113,7 @@ def find_and_prune_neighborhoods(
|
|
109
113
|
order=order,
|
110
114
|
verbose=verbose,
|
111
115
|
precomputed_neighbors=precomputed_neighbors,
|
116
|
+
source_total_counts=source_total_counts,
|
112
117
|
)
|
113
118
|
|
114
119
|
pruned_neighborhoods = prune_neighborhoods(neighborhoods, top_n=top_n)
|
@@ -132,7 +137,7 @@ def load_neighborhoods(
|
|
132
137
|
|
133
138
|
Load existing neighborhoods if they exist
|
134
139
|
(and overwrite = False) and otherwise construct
|
135
|
-
|
140
|
+
neighborhoods using the provided settings
|
136
141
|
|
137
142
|
Parameters
|
138
143
|
----------
|
@@ -509,12 +514,13 @@ def find_neighborhoods(
|
|
509
514
|
order: int = 3,
|
510
515
|
verbose: bool = True,
|
511
516
|
precomputed_neighbors: pd.DataFrame | None = None,
|
517
|
+
source_total_counts: pd.Series | None = None,
|
512
518
|
) -> dict:
|
513
519
|
"""
|
514
520
|
Find Neighborhood
|
515
521
|
|
516
522
|
Create a network composed of all species and reactions within N steps of
|
517
|
-
|
523
|
+
each of a set of compartmentalized species.
|
518
524
|
|
519
525
|
Parameters
|
520
526
|
----------
|
@@ -535,11 +541,14 @@ def find_neighborhoods(
|
|
535
541
|
precomputed_neighbors: pd.DataFrame or None
|
536
542
|
If provided, a pre-filtered table of nodes nearby the compartmentalized species
|
537
543
|
which will be used to skip on-the-fly neighborhood generation.
|
544
|
+
source_total_counts: pd.Series | None
|
545
|
+
Optional, A series of the total counts of each source. As produced by
|
546
|
+
source.get_source_total_counts()
|
538
547
|
|
539
548
|
Returns:
|
540
549
|
----------
|
541
550
|
A dict containing the neighborhood of each compartmentalized species.
|
542
|
-
|
551
|
+
Each entry in the dict is a dict of the subgraph, vertices, and edges.
|
543
552
|
"""
|
544
553
|
|
545
554
|
if not isinstance(network_type, str):
|
@@ -567,7 +576,12 @@ def find_neighborhoods(
|
|
567
576
|
# format the vertices and edges in each compartmentalized species' network
|
568
577
|
neighborhood_dict = {
|
569
578
|
sc_id: create_neighborhood_dict_entry(
|
570
|
-
sc_id,
|
579
|
+
sc_id,
|
580
|
+
neighborhood_df=neighborhood_df,
|
581
|
+
sbml_dfs=sbml_dfs,
|
582
|
+
napistu_graph=napistu_graph,
|
583
|
+
source_total_counts=source_total_counts,
|
584
|
+
verbose=verbose,
|
571
585
|
)
|
572
586
|
for sc_id in compartmentalized_species
|
573
587
|
}
|
@@ -580,6 +594,7 @@ def create_neighborhood_dict_entry(
|
|
580
594
|
neighborhood_df: pd.DataFrame,
|
581
595
|
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
582
596
|
napistu_graph: ig.Graph,
|
597
|
+
source_total_counts: pd.Series | None = None,
|
583
598
|
verbose: bool = False,
|
584
599
|
) -> dict[str, Any]:
|
585
600
|
"""
|
@@ -597,6 +612,9 @@ def create_neighborhood_dict_entry(
|
|
597
612
|
A mechanistic molecular model
|
598
613
|
napistu_graph: igraph.Graph
|
599
614
|
A network connecting molecular species and reactions
|
615
|
+
source_total_counts: pd.Series
|
616
|
+
Optional, A series of the total counts of each source. As produced by
|
617
|
+
source.get_source_total_counts()
|
600
618
|
verbose: bool
|
601
619
|
Extra reporting?
|
602
620
|
|
@@ -645,7 +663,10 @@ def create_neighborhood_dict_entry(
|
|
645
663
|
|
646
664
|
try:
|
647
665
|
edge_sources = ng_utils.get_minimal_sources_edges(
|
648
|
-
vertices.rename(columns={"name": "node"}),
|
666
|
+
vertices.rename(columns={"name": "node"}),
|
667
|
+
sbml_dfs,
|
668
|
+
# optional, counts of sources across the whole model
|
669
|
+
source_total_counts,
|
649
670
|
)
|
650
671
|
except Exception:
|
651
672
|
edge_sources = None
|
@@ -1441,7 +1462,7 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
|
|
1441
1462
|
----------
|
1442
1463
|
one_neighborhood: dict
|
1443
1464
|
The neighborhood around a single compartmentalized species - one of the values
|
1444
|
-
|
1465
|
+
in dict created by find_neighborhoods().
|
1445
1466
|
top_n: int
|
1446
1467
|
How many neighboring molecular species should be retained?
|
1447
1468
|
If the neighborhood includes both upstream and downstream connections
|
@@ -66,7 +66,7 @@ def compartmentalize_species_pairs(
|
|
66
66
|
Compartmentalize Shortest Paths
|
67
67
|
|
68
68
|
For a set of origin and destination species pairs, consider each species in every
|
69
|
-
|
69
|
+
compartment it operates in, seperately.
|
70
70
|
|
71
71
|
Parameters
|
72
72
|
----------
|
@@ -112,22 +112,42 @@ def compartmentalize_species_pairs(
|
|
112
112
|
|
113
113
|
|
114
114
|
def get_minimal_sources_edges(
|
115
|
-
vertices: pd.DataFrame,
|
115
|
+
vertices: pd.DataFrame,
|
116
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
117
|
+
source_total_counts: Optional[pd.Series] = None,
|
116
118
|
) -> pd.DataFrame | None:
|
117
|
-
"""
|
119
|
+
"""
|
120
|
+
Assign edges to a set of sources.
|
121
|
+
|
122
|
+
Parameters
|
123
|
+
----------
|
124
|
+
vertices: pd.DataFrame
|
125
|
+
A table of vertices.
|
126
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs
|
127
|
+
A pathway model
|
128
|
+
source_total_counts: pd.Series
|
129
|
+
A series of the total counts of each source.
|
130
|
+
|
131
|
+
Returns
|
132
|
+
-------
|
133
|
+
edge_sources: pd.DataFrame
|
134
|
+
A table of edges and the sources they are assigned to.
|
135
|
+
"""
|
136
|
+
|
118
137
|
nodes = vertices["node"].tolist()
|
119
138
|
present_reactions = sbml_dfs.reactions[sbml_dfs.reactions.index.isin(nodes)]
|
120
139
|
|
121
140
|
if len(present_reactions) == 0:
|
122
141
|
return None
|
123
142
|
|
124
|
-
|
125
|
-
source_df = source.unnest_sources(present_reactions, table_schema["source"])
|
143
|
+
source_df = source.unnest_sources(present_reactions)
|
126
144
|
|
127
145
|
if source_df is None:
|
128
146
|
return None
|
129
147
|
else:
|
130
|
-
edge_sources = source.
|
148
|
+
edge_sources = source.source_set_coverage(
|
149
|
+
source_df, source_total_counts, sbml_dfs
|
150
|
+
)
|
131
151
|
return edge_sources.reset_index()[
|
132
152
|
[SBML_DFS.R_ID, SOURCE_SPEC.PATHWAY_ID, SOURCE_SPEC.NAME]
|
133
153
|
]
|
@@ -110,6 +110,62 @@ def precompute_distances(
|
|
110
110
|
return filtered_precomputed_distances
|
111
111
|
|
112
112
|
|
113
|
+
def filter_precomputed_distances_top_n(precomputed_distances, top_n=50):
|
114
|
+
"""
|
115
|
+
Filter precomputed distances to only include the top-n pairs for each distance measure.
|
116
|
+
|
117
|
+
Parameters
|
118
|
+
----------
|
119
|
+
precomputed_distances : pd.DataFrame
|
120
|
+
Precomputed distances.
|
121
|
+
top_n : int, optional
|
122
|
+
Top-n pairs to include for each distance measure.
|
123
|
+
|
124
|
+
Returns
|
125
|
+
-------
|
126
|
+
pd.DataFrame
|
127
|
+
Filtered precomputed distances.
|
128
|
+
"""
|
129
|
+
|
130
|
+
# take the union of top-n for each distance measure; and from origin -> dest and dest -> origin
|
131
|
+
distance_vars = set(precomputed_distances.columns) - {
|
132
|
+
NAPISTU_EDGELIST.SC_ID_ORIGIN,
|
133
|
+
NAPISTU_EDGELIST.SC_ID_DEST,
|
134
|
+
}
|
135
|
+
|
136
|
+
valid_pairs = list()
|
137
|
+
for distance_var in distance_vars:
|
138
|
+
top_n_pairs_by_origin = (
|
139
|
+
precomputed_distances.sort_values(by=distance_var, ascending=False)
|
140
|
+
.groupby(NAPISTU_EDGELIST.SC_ID_ORIGIN)
|
141
|
+
.head(top_n)
|
142
|
+
)
|
143
|
+
top_n_pairs_by_dest = (
|
144
|
+
precomputed_distances.sort_values(by=distance_var, ascending=False)
|
145
|
+
.groupby(NAPISTU_EDGELIST.SC_ID_DEST)
|
146
|
+
.head(top_n)
|
147
|
+
)
|
148
|
+
|
149
|
+
valid_pairs.append(
|
150
|
+
top_n_pairs_by_origin[
|
151
|
+
[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
|
152
|
+
]
|
153
|
+
)
|
154
|
+
valid_pairs.append(
|
155
|
+
top_n_pairs_by_dest[
|
156
|
+
[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
|
157
|
+
]
|
158
|
+
)
|
159
|
+
|
160
|
+
all_valid_pairs = pd.concat(valid_pairs).drop_duplicates()
|
161
|
+
|
162
|
+
return precomputed_distances.merge(
|
163
|
+
all_valid_pairs,
|
164
|
+
on=[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST],
|
165
|
+
how="inner",
|
166
|
+
)
|
167
|
+
|
168
|
+
|
113
169
|
def _calculate_distances_subset(
|
114
170
|
napistu_graph: NapistuGraph,
|
115
171
|
vs_to_partition: pd.DataFrame,
|
@@ -456,8 +456,14 @@ def infer_entity_type(df: pd.DataFrame) -> str:
|
|
456
456
|
if entity_schema.get(SCHEMA_DEFS.PK) == df.index.name:
|
457
457
|
return entity_type
|
458
458
|
|
459
|
-
# Get DataFrame columns that are also primary keys
|
460
|
-
|
459
|
+
# Get DataFrame columns that are also primary keys, including index or MultiIndex names
|
460
|
+
index_names = []
|
461
|
+
if isinstance(df.index, pd.MultiIndex):
|
462
|
+
index_names = [name for name in df.index.names if name is not None]
|
463
|
+
elif df.index.name is not None:
|
464
|
+
index_names = [df.index.name]
|
465
|
+
|
466
|
+
df_columns = set(df.columns).union(index_names).intersection(primary_keys)
|
461
467
|
|
462
468
|
# Check for exact match with primary key + foreign keys
|
463
469
|
for entity_type, entity_schema in schema.items():
|