napistu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__init__.py +12 -0
- napistu/__main__.py +867 -0
- napistu/consensus.py +1557 -0
- napistu/constants.py +500 -0
- napistu/gcs/__init__.py +10 -0
- napistu/gcs/constants.py +69 -0
- napistu/gcs/downloads.py +180 -0
- napistu/identifiers.py +805 -0
- napistu/indices.py +227 -0
- napistu/ingestion/__init__.py +10 -0
- napistu/ingestion/bigg.py +146 -0
- napistu/ingestion/constants.py +296 -0
- napistu/ingestion/cpr_edgelist.py +106 -0
- napistu/ingestion/identifiers_etl.py +148 -0
- napistu/ingestion/obo.py +268 -0
- napistu/ingestion/psi_mi.py +276 -0
- napistu/ingestion/reactome.py +218 -0
- napistu/ingestion/sbml.py +621 -0
- napistu/ingestion/string.py +356 -0
- napistu/ingestion/trrust.py +285 -0
- napistu/ingestion/yeast.py +147 -0
- napistu/mechanism_matching.py +597 -0
- napistu/modify/__init__.py +10 -0
- napistu/modify/constants.py +86 -0
- napistu/modify/curation.py +628 -0
- napistu/modify/gaps.py +635 -0
- napistu/modify/pathwayannot.py +1381 -0
- napistu/modify/uncompartmentalize.py +264 -0
- napistu/network/__init__.py +10 -0
- napistu/network/constants.py +117 -0
- napistu/network/neighborhoods.py +1594 -0
- napistu/network/net_create.py +1647 -0
- napistu/network/net_utils.py +652 -0
- napistu/network/paths.py +500 -0
- napistu/network/precompute.py +221 -0
- napistu/rpy2/__init__.py +127 -0
- napistu/rpy2/callr.py +168 -0
- napistu/rpy2/constants.py +101 -0
- napistu/rpy2/netcontextr.py +464 -0
- napistu/rpy2/rids.py +697 -0
- napistu/sbml_dfs_core.py +2216 -0
- napistu/sbml_dfs_utils.py +304 -0
- napistu/source.py +394 -0
- napistu/utils.py +943 -0
- napistu-0.1.0.dist-info/METADATA +56 -0
- napistu-0.1.0.dist-info/RECORD +77 -0
- napistu-0.1.0.dist-info/WHEEL +5 -0
- napistu-0.1.0.dist-info/entry_points.txt +2 -0
- napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
- napistu-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- tests/conftest.py +83 -0
- tests/test_consensus.py +255 -0
- tests/test_constants.py +20 -0
- tests/test_curation.py +134 -0
- tests/test_data/__init__.py +0 -0
- tests/test_edgelist.py +20 -0
- tests/test_gcs.py +23 -0
- tests/test_identifiers.py +151 -0
- tests/test_igraph.py +353 -0
- tests/test_indices.py +88 -0
- tests/test_mechanism_matching.py +126 -0
- tests/test_net_utils.py +66 -0
- tests/test_netcontextr.py +105 -0
- tests/test_obo.py +34 -0
- tests/test_pathwayannot.py +95 -0
- tests/test_precomputed_distances.py +222 -0
- tests/test_rpy2.py +61 -0
- tests/test_sbml.py +46 -0
- tests/test_sbml_dfs_create.py +307 -0
- tests/test_sbml_dfs_utils.py +22 -0
- tests/test_sbo.py +11 -0
- tests/test_set_coverage.py +50 -0
- tests/test_source.py +67 -0
- tests/test_uncompartmentalize.py +40 -0
- tests/test_utils.py +487 -0
- tests/utils.py +30 -0
@@ -0,0 +1,147 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
from napistu import identifiers
|
5
|
+
from napistu import sbml_dfs_core
|
6
|
+
from napistu import source
|
7
|
+
from napistu import utils
|
8
|
+
from napistu.constants import BQB
|
9
|
+
from napistu.constants import MINI_SBO_FROM_NAME
|
10
|
+
from napistu.ingestion.constants import YEAST_IDEA_KINETICS_URL
|
11
|
+
from napistu.ingestion.constants import YEAST_IDEA_PUBMED_ID
|
12
|
+
from napistu.ingestion.constants import YEAST_IDEA_SOURCE
|
13
|
+
from napistu.ingestion.constants import YEAST_IDEA_TARGET
|
14
|
+
|
15
|
+
|
16
|
+
def download_idea(output_dir: str) -> None:
|
17
|
+
# save to
|
18
|
+
utils.download_and_extract(YEAST_IDEA_KINETICS_URL, output_dir)
|
19
|
+
|
20
|
+
# TODO: since only a single file is outputted, it makes sense to download and extract the data, then copy it to a target URI
|
21
|
+
# TODO: add GCS support
|
22
|
+
pass
|
23
|
+
|
24
|
+
|
25
|
+
def convert_idea_kinetics_to_sbml_dfs(
|
26
|
+
idea_path: str,
|
27
|
+
) -> sbml_dfs_core.SBML_dfs:
|
28
|
+
"""
|
29
|
+
Convert IDEA Kinetics to SBML DFs
|
30
|
+
|
31
|
+
Format yeast induction regulator->target relationships as a directed graph.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
idea_path: Path to the IDEA Kinetics file.
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
SBML_dfs: an SBML_dfs object containing molecular species and their interactions.
|
38
|
+
Kinetic attributes are included as reactions_data.
|
39
|
+
|
40
|
+
"""
|
41
|
+
|
42
|
+
# TO DO - replace with GCS support (currently this just reads a local .tsv)
|
43
|
+
idea_kinetics_df = pd.read_csv(idea_path, sep="\t")
|
44
|
+
|
45
|
+
# separate based on whether the change is probably direct or indirect
|
46
|
+
idea_kinetics_df["directness"] = [
|
47
|
+
"direct" if t_rise < 15 else "indirect" for t_rise in idea_kinetics_df["t_rise"]
|
48
|
+
]
|
49
|
+
|
50
|
+
# reduce cases of multiple TF-target pairs to a single entry
|
51
|
+
distinct_edges = (
|
52
|
+
idea_kinetics_df.groupby([YEAST_IDEA_SOURCE, YEAST_IDEA_TARGET], as_index=True)
|
53
|
+
.apply(_summarize_idea_pairs)
|
54
|
+
.reset_index()
|
55
|
+
)
|
56
|
+
|
57
|
+
# add some more fields are reformat
|
58
|
+
formatted_distinct_edges = distinct_edges.rename(
|
59
|
+
{YEAST_IDEA_SOURCE: "upstream_name", YEAST_IDEA_TARGET: "downstream_name"},
|
60
|
+
axis=1,
|
61
|
+
).assign(
|
62
|
+
upstream_compartment="cellular_component",
|
63
|
+
downstream_compartment="cellular_component",
|
64
|
+
# tag reactions with the IDEA publication
|
65
|
+
r_Identifiers=identifiers._format_Identifiers_pubmed(YEAST_IDEA_PUBMED_ID),
|
66
|
+
r_isreversible=False,
|
67
|
+
)
|
68
|
+
|
69
|
+
# create some nice interaction names before we rename the roles as SBO terms
|
70
|
+
formatted_distinct_edges["r_name"] = [
|
71
|
+
f"{u} {d} {r} of {t}"
|
72
|
+
for u, d, r, t in zip(
|
73
|
+
formatted_distinct_edges["upstream_name"],
|
74
|
+
formatted_distinct_edges["directness"],
|
75
|
+
formatted_distinct_edges["role"],
|
76
|
+
formatted_distinct_edges["downstream_name"],
|
77
|
+
)
|
78
|
+
]
|
79
|
+
|
80
|
+
# final interaction output
|
81
|
+
# replace readable roles with entries in the SBO ontology
|
82
|
+
interaction_edgelist = formatted_distinct_edges.replace(
|
83
|
+
{"role": MINI_SBO_FROM_NAME}
|
84
|
+
).rename({"role": "sbo_term"}, axis=1)
|
85
|
+
|
86
|
+
species_df = pd.DataFrame(
|
87
|
+
{
|
88
|
+
"s_name": list(
|
89
|
+
{
|
90
|
+
*idea_kinetics_df[YEAST_IDEA_SOURCE],
|
91
|
+
*idea_kinetics_df[YEAST_IDEA_TARGET],
|
92
|
+
}
|
93
|
+
)
|
94
|
+
}
|
95
|
+
)
|
96
|
+
|
97
|
+
# create Identifiers objects for each species
|
98
|
+
species_df["s_Identifiers"] = [
|
99
|
+
identifiers.Identifiers(
|
100
|
+
[{"ontology": "gene_name", "identifier": x, "bqb": BQB.IS}]
|
101
|
+
)
|
102
|
+
for x in species_df["s_name"]
|
103
|
+
]
|
104
|
+
|
105
|
+
# Constant fields (for this data source)
|
106
|
+
|
107
|
+
# setup compartments (just treat this as uncompartmentalized for now)
|
108
|
+
compartments_df = sbml_dfs_core._stub_compartments()
|
109
|
+
|
110
|
+
# Per convention unaggregated models receive an empty source
|
111
|
+
interaction_source = source.Source(init=True)
|
112
|
+
|
113
|
+
sbml_dfs = sbml_dfs_core.sbml_dfs_from_edgelist(
|
114
|
+
interaction_edgelist=interaction_edgelist,
|
115
|
+
species_df=species_df,
|
116
|
+
compartments_df=compartments_df,
|
117
|
+
interaction_source=interaction_source,
|
118
|
+
# additional attributes (directness) are added to reactions_data
|
119
|
+
keep_reactions_data="idea",
|
120
|
+
)
|
121
|
+
sbml_dfs.validate()
|
122
|
+
|
123
|
+
return sbml_dfs
|
124
|
+
|
125
|
+
|
126
|
+
def _summarize_idea_pairs(pairs_data: pd.DataFrame) -> pd.Series:
|
127
|
+
"""Rollup multiple records of a TF->target pair into a single summary."""
|
128
|
+
|
129
|
+
# specify how to aggregate results if there are more than one entry for a TF-target pair
|
130
|
+
# pull most attributes from the earliest change
|
131
|
+
# this will favor direct over indirect naturally
|
132
|
+
earliest_change = pairs_data.sort_values("t_rise").iloc[0].to_dict()
|
133
|
+
|
134
|
+
KEYS_SUMMARIZED = ["v_inter", "v_final", "t_rise", "t_fall", "rate", "directness"]
|
135
|
+
kinetic_timing_dict = {k: earliest_change[k] for k in KEYS_SUMMARIZED}
|
136
|
+
|
137
|
+
# map v_inter (log2 fold-change change following perturbation) onto SBO terms for interactions
|
138
|
+
if (any(pairs_data["v_inter"] > 0)) and (any(pairs_data["v_inter"] < 0)):
|
139
|
+
kinetic_timing_dict["role"] = "modifier"
|
140
|
+
elif all(pairs_data["v_inter"] > 0):
|
141
|
+
kinetic_timing_dict["role"] = "stimulator"
|
142
|
+
elif all(pairs_data["v_inter"] < 0):
|
143
|
+
kinetic_timing_dict["role"] = "inhibitor"
|
144
|
+
else:
|
145
|
+
ValueError("Unexpected v_inter values")
|
146
|
+
|
147
|
+
return pd.Series(kinetic_timing_dict)
|