napistu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__init__.py +12 -0
- napistu/__main__.py +867 -0
- napistu/consensus.py +1557 -0
- napistu/constants.py +500 -0
- napistu/gcs/__init__.py +10 -0
- napistu/gcs/constants.py +69 -0
- napistu/gcs/downloads.py +180 -0
- napistu/identifiers.py +805 -0
- napistu/indices.py +227 -0
- napistu/ingestion/__init__.py +10 -0
- napistu/ingestion/bigg.py +146 -0
- napistu/ingestion/constants.py +296 -0
- napistu/ingestion/cpr_edgelist.py +106 -0
- napistu/ingestion/identifiers_etl.py +148 -0
- napistu/ingestion/obo.py +268 -0
- napistu/ingestion/psi_mi.py +276 -0
- napistu/ingestion/reactome.py +218 -0
- napistu/ingestion/sbml.py +621 -0
- napistu/ingestion/string.py +356 -0
- napistu/ingestion/trrust.py +285 -0
- napistu/ingestion/yeast.py +147 -0
- napistu/mechanism_matching.py +597 -0
- napistu/modify/__init__.py +10 -0
- napistu/modify/constants.py +86 -0
- napistu/modify/curation.py +628 -0
- napistu/modify/gaps.py +635 -0
- napistu/modify/pathwayannot.py +1381 -0
- napistu/modify/uncompartmentalize.py +264 -0
- napistu/network/__init__.py +10 -0
- napistu/network/constants.py +117 -0
- napistu/network/neighborhoods.py +1594 -0
- napistu/network/net_create.py +1647 -0
- napistu/network/net_utils.py +652 -0
- napistu/network/paths.py +500 -0
- napistu/network/precompute.py +221 -0
- napistu/rpy2/__init__.py +127 -0
- napistu/rpy2/callr.py +168 -0
- napistu/rpy2/constants.py +101 -0
- napistu/rpy2/netcontextr.py +464 -0
- napistu/rpy2/rids.py +697 -0
- napistu/sbml_dfs_core.py +2216 -0
- napistu/sbml_dfs_utils.py +304 -0
- napistu/source.py +394 -0
- napistu/utils.py +943 -0
- napistu-0.1.0.dist-info/METADATA +56 -0
- napistu-0.1.0.dist-info/RECORD +77 -0
- napistu-0.1.0.dist-info/WHEEL +5 -0
- napistu-0.1.0.dist-info/entry_points.txt +2 -0
- napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
- napistu-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- tests/conftest.py +83 -0
- tests/test_consensus.py +255 -0
- tests/test_constants.py +20 -0
- tests/test_curation.py +134 -0
- tests/test_data/__init__.py +0 -0
- tests/test_edgelist.py +20 -0
- tests/test_gcs.py +23 -0
- tests/test_identifiers.py +151 -0
- tests/test_igraph.py +353 -0
- tests/test_indices.py +88 -0
- tests/test_mechanism_matching.py +126 -0
- tests/test_net_utils.py +66 -0
- tests/test_netcontextr.py +105 -0
- tests/test_obo.py +34 -0
- tests/test_pathwayannot.py +95 -0
- tests/test_precomputed_distances.py +222 -0
- tests/test_rpy2.py +61 -0
- tests/test_sbml.py +46 -0
- tests/test_sbml_dfs_create.py +307 -0
- tests/test_sbml_dfs_utils.py +22 -0
- tests/test_sbo.py +11 -0
- tests/test_set_coverage.py +50 -0
- tests/test_source.py +67 -0
- tests/test_uncompartmentalize.py +40 -0
- tests/test_utils.py +487 -0
- tests/utils.py +30 -0
napistu/__main__.py
ADDED
@@ -0,0 +1,867 @@
|
|
1
|
+
"""The CLI for cpr"""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import logging
|
6
|
+
import os
|
7
|
+
import pickle
|
8
|
+
from typing import Sequence
|
9
|
+
|
10
|
+
import click
|
11
|
+
import click_logging
|
12
|
+
import napistu
|
13
|
+
import igraph as ig
|
14
|
+
import pandas as pd
|
15
|
+
from napistu import consensus as cpr_consensus
|
16
|
+
from napistu import constants
|
17
|
+
from napistu import indices
|
18
|
+
from napistu import sbml_dfs_core
|
19
|
+
from napistu import utils
|
20
|
+
from napistu.ingestion import bigg
|
21
|
+
from napistu.ingestion import reactome
|
22
|
+
from napistu.ingestion import sbml
|
23
|
+
from napistu.ingestion import string
|
24
|
+
from napistu.ingestion import trrust
|
25
|
+
from napistu.modify import curation
|
26
|
+
from napistu.modify import gaps
|
27
|
+
from napistu.modify import pathwayannot
|
28
|
+
from napistu.modify import uncompartmentalize
|
29
|
+
from napistu.network import net_create
|
30
|
+
from napistu.network import net_utils
|
31
|
+
from napistu.network import precompute
|
32
|
+
from napistu.rpy2 import has_rpy2
|
33
|
+
from fs import open_fs
|
34
|
+
|
35
|
+
if has_rpy2:
|
36
|
+
from napistu.rpy2 import rids
|
37
|
+
from napistu.rpy2 import netcontextr, callr
|
38
|
+
|
39
|
+
logger = logging.getLogger(napistu.__name__)
|
40
|
+
click_logging.basic_config(logger)
|
41
|
+
|
42
|
+
ALL = "all"
|
43
|
+
|
44
|
+
|
45
|
+
@click.group()
|
46
|
+
def cli():
|
47
|
+
"""The Calico Pathway Resources CLI"""
|
48
|
+
pass
|
49
|
+
|
50
|
+
|
51
|
+
@click.group()
|
52
|
+
def load():
|
53
|
+
"""Command line tools to retrieve raw data."""
|
54
|
+
pass
|
55
|
+
|
56
|
+
|
57
|
+
@load.command(name="reactome")
|
58
|
+
@click.argument("base_folder", type=str)
|
59
|
+
@click.option(
|
60
|
+
"--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
|
61
|
+
)
|
62
|
+
@click_logging.simple_verbosity_option(logger)
|
63
|
+
def load_reactome(base_folder: str, overwrite=True):
|
64
|
+
logger.info("Start downloading Reactome to %s", base_folder)
|
65
|
+
reactome.reactome_sbml_download(f"{base_folder}/sbml", overwrite=overwrite)
|
66
|
+
|
67
|
+
|
68
|
+
@load.command(name="bigg")
|
69
|
+
@click.argument("base_folder", type=str)
|
70
|
+
@click.option(
|
71
|
+
"--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
|
72
|
+
)
|
73
|
+
@click_logging.simple_verbosity_option(logger)
|
74
|
+
def load_bigg(base_folder: str, overwrite: bool):
|
75
|
+
logger.info("Start downloading Bigg to %s", base_folder)
|
76
|
+
bigg.bigg_sbml_download(base_folder, overwrite)
|
77
|
+
|
78
|
+
|
79
|
+
@load.command(name="trrust")
|
80
|
+
@click.argument("target_uri", type=str)
|
81
|
+
@click_logging.simple_verbosity_option(logger)
|
82
|
+
def load_ttrust(target_uri: str):
|
83
|
+
logger.info("Start downloading TRRUST to %s", target_uri)
|
84
|
+
trrust.download_trrust(target_uri)
|
85
|
+
|
86
|
+
|
87
|
+
@load.command(name="proteinatlas-subcell")
|
88
|
+
@click.argument("target_uri", type=str)
|
89
|
+
@click.option(
|
90
|
+
"--url",
|
91
|
+
type=str,
|
92
|
+
default=constants.PROTEINATLAS_SUBCELL_LOC_URL,
|
93
|
+
help="URL to download the zipped protein atlas subcellular localization tsv from.",
|
94
|
+
)
|
95
|
+
@click_logging.simple_verbosity_option(logger)
|
96
|
+
def load_proteinatlas_subcell(target_uri: str, url: str):
|
97
|
+
file_ext = constants.PROTEINATLAS_SUBCELL_LOC_URL.split(".")[-1]
|
98
|
+
target_filename = url.split("/")[-1].split(f".{file_ext}")[0]
|
99
|
+
logger.info("Start downloading proteinatlas %s to %s", url, target_uri)
|
100
|
+
utils.download_wget(url, target_uri, target_filename=target_filename)
|
101
|
+
|
102
|
+
|
103
|
+
@load.command(name="gtex-rnaseq-expression")
|
104
|
+
@click.argument("target_uri", type=str)
|
105
|
+
@click.option(
|
106
|
+
"--url",
|
107
|
+
type=str,
|
108
|
+
default=constants.GTEX_RNASEQ_EXPRESSION_URL,
|
109
|
+
help="URL to download the gtex file from.",
|
110
|
+
)
|
111
|
+
@click_logging.simple_verbosity_option(logger)
|
112
|
+
def load_gtex_rnaseq(target_uri: str, url: str):
|
113
|
+
logger.info("Start downloading gtex %s to %s", url, target_uri)
|
114
|
+
utils.download_wget(url, target_uri)
|
115
|
+
|
116
|
+
|
117
|
+
@load.command(name="string-db")
|
118
|
+
@click.argument("target_uri", type=str)
|
119
|
+
@click.option(
|
120
|
+
"--species",
|
121
|
+
type=str,
|
122
|
+
default="Homo sapiens",
|
123
|
+
help="Species name (e.g., Homo sapiens).",
|
124
|
+
)
|
125
|
+
@click_logging.simple_verbosity_option(logger)
|
126
|
+
def load_string_db(target_uri: str, species: str):
|
127
|
+
string.download_string(target_uri, species)
|
128
|
+
|
129
|
+
|
130
|
+
@load.command(name="string-aliases")
|
131
|
+
@click.argument("target_uri", type=str)
|
132
|
+
@click.option(
|
133
|
+
"--species",
|
134
|
+
type=str,
|
135
|
+
default="Homo sapiens",
|
136
|
+
help="Species name (e.g., Homo sapiens).",
|
137
|
+
)
|
138
|
+
@click_logging.simple_verbosity_option(logger)
|
139
|
+
def load_string_aliases(target_uri: str, species: str):
|
140
|
+
string.download_string_aliases(target_uri, species)
|
141
|
+
|
142
|
+
|
143
|
+
@click.group()
|
144
|
+
def integrate():
|
145
|
+
"""Command line tools to integrate raw models into a single SBML_dfs model"""
|
146
|
+
pass
|
147
|
+
|
148
|
+
|
149
|
+
@integrate.command(name="reactome")
|
150
|
+
@click.argument("pw_index_uri", type=str)
|
151
|
+
@click.argument("output_model_uri", type=str)
|
152
|
+
@click.option("--species", "-s", multiple=True, default=(ALL,))
|
153
|
+
@click.option(
|
154
|
+
"--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
|
155
|
+
)
|
156
|
+
@click.option(
|
157
|
+
"--permissive",
|
158
|
+
"-p",
|
159
|
+
is_flag=True,
|
160
|
+
default=False,
|
161
|
+
help="Can parsing failures in submodels throw warnings instead of exceptions?",
|
162
|
+
)
|
163
|
+
@click_logging.simple_verbosity_option(logger)
|
164
|
+
def integrate_reactome(
|
165
|
+
pw_index_uri: str,
|
166
|
+
output_model_uri: str,
|
167
|
+
species: Sequence[str] | None,
|
168
|
+
overwrite=False,
|
169
|
+
permissive=False,
|
170
|
+
):
|
171
|
+
"""Integrates reactome models based on a pw_index"""
|
172
|
+
if overwrite is False and utils.path_exists(output_model_uri):
|
173
|
+
raise FileExistsError("'output_model_uri' exists but overwrite set False.")
|
174
|
+
if species is not None and len(species) == 1 and species[0] == ALL:
|
175
|
+
species = None
|
176
|
+
|
177
|
+
strict = not permissive
|
178
|
+
logger.debug(f"permissive = {permissive}; strict = {strict}")
|
179
|
+
|
180
|
+
consensus_model = reactome.construct_reactome_consensus(
|
181
|
+
pw_index_uri, species=species, strict=strict
|
182
|
+
)
|
183
|
+
utils.save_pickle(output_model_uri, consensus_model)
|
184
|
+
|
185
|
+
|
186
|
+
@integrate.command(name="bigg")
|
187
|
+
@click.argument("pw_index_uri", type=str)
|
188
|
+
@click.argument("output_model_uri", type=str)
|
189
|
+
@click.option("--species", "-s", multiple=True, default=(ALL,))
|
190
|
+
@click.option(
|
191
|
+
"--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
|
192
|
+
)
|
193
|
+
@click_logging.simple_verbosity_option(logger)
|
194
|
+
def integrate_bigg(
|
195
|
+
pw_index_uri: str,
|
196
|
+
output_model_uri: str,
|
197
|
+
species: Sequence[str] | None,
|
198
|
+
overwrite=False,
|
199
|
+
):
|
200
|
+
"""Integrates bigg models based on a pw_index"""
|
201
|
+
if overwrite is False and utils.path_exists(output_model_uri):
|
202
|
+
raise FileExistsError("'output_model_uri' exists but overwrite set False.")
|
203
|
+
if species is not None and len(species) == 1 and species[0] == ALL:
|
204
|
+
species = None
|
205
|
+
consensus_model = bigg.construct_bigg_consensus(pw_index_uri, species=species)
|
206
|
+
utils.save_pickle(output_model_uri, consensus_model)
|
207
|
+
|
208
|
+
|
209
|
+
@integrate.command(name="trrust")
|
210
|
+
@click.argument("trrust_csv_uri", type=str)
|
211
|
+
@click.argument("output_model_uri", type=str)
|
212
|
+
@click.option(
|
213
|
+
"--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
|
214
|
+
)
|
215
|
+
@click_logging.simple_verbosity_option(logger)
|
216
|
+
def integrate_trrust(
|
217
|
+
trrust_csv_uri: str,
|
218
|
+
output_model_uri: str,
|
219
|
+
overwrite=False,
|
220
|
+
):
|
221
|
+
"""Converts TRRUST csv to SBML_dfs model"""
|
222
|
+
if overwrite is False and utils.path_exists(output_model_uri):
|
223
|
+
raise FileExistsError("'output_model_uri' exists but overwrite set False.")
|
224
|
+
logger.info("Start converting TRRUST csv to SBML_dfs")
|
225
|
+
sbmldfs_model = trrust.convert_trrust_to_sbml_dfs(trrust_csv_uri)
|
226
|
+
logger.info("Save SBML_dfs model to %s", output_model_uri)
|
227
|
+
utils.save_pickle(output_model_uri, sbmldfs_model)
|
228
|
+
|
229
|
+
|
230
|
+
@integrate.command(name="string-db")
|
231
|
+
@click.argument("string_db_uri", type=str)
|
232
|
+
@click.argument("string_aliases_uri", type=str)
|
233
|
+
@click.argument("output_model_uri", type=str)
|
234
|
+
@click.option(
|
235
|
+
"--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
|
236
|
+
)
|
237
|
+
@click_logging.simple_verbosity_option(logger)
|
238
|
+
def integrate_string_db(
|
239
|
+
string_db_uri: str, string_aliases_uri: str, output_model_uri: str, overwrite=False
|
240
|
+
):
|
241
|
+
"""Converts string-db to the sbml_dfs format"""
|
242
|
+
if overwrite is False and utils.path_exists(output_model_uri):
|
243
|
+
raise FileExistsError("'output_model_uri' exists but overwrite set False.")
|
244
|
+
logger.info("Start converting string-db to SBML_dfs")
|
245
|
+
sbmldfs_model = string.convert_string_to_sbml_dfs(string_db_uri, string_aliases_uri)
|
246
|
+
logger.info("Save SBML_dfs model to %s", output_model_uri)
|
247
|
+
utils.save_pickle(output_model_uri, sbmldfs_model)
|
248
|
+
|
249
|
+
|
250
|
+
@click.group()
|
251
|
+
def consensus():
|
252
|
+
"""Command line tools to create a consensus model from SBML_dfs"""
|
253
|
+
pass
|
254
|
+
|
255
|
+
|
256
|
+
@consensus.command(name="create")
|
257
|
+
@click.argument("sbml_dfs_uris", type=str, nargs=-1)
|
258
|
+
@click.argument("output_model_uri", type=str, nargs=1)
|
259
|
+
@click.option(
|
260
|
+
"--nondogmatic",
|
261
|
+
"-n",
|
262
|
+
is_flag=True,
|
263
|
+
default=False,
|
264
|
+
help="Run in non-dogmatic mode (trying to merge genes and proteins)?",
|
265
|
+
)
|
266
|
+
@click_logging.simple_verbosity_option(logger)
|
267
|
+
def create_consensus(
|
268
|
+
sbml_dfs_uris: Sequence[str], output_model_uri: str, nondogmatic: bool
|
269
|
+
):
|
270
|
+
"""Create a consensus model from a list of SBML_dfs"""
|
271
|
+
|
272
|
+
dogmatic = not nondogmatic
|
273
|
+
logger.debug(f"nondogmatic = {nondogmatic}; dogmatic = {dogmatic}")
|
274
|
+
logger.info(
|
275
|
+
f"Creating a consensus from {len(sbml_dfs_uris)} sbml_dfs where dogmatic = {dogmatic}"
|
276
|
+
)
|
277
|
+
|
278
|
+
sbml_dfs_dict = {uri: utils.load_pickle(uri) for uri in sbml_dfs_uris}
|
279
|
+
pw_index_df = pd.DataFrame(
|
280
|
+
{
|
281
|
+
"file": sbml_dfs_uris,
|
282
|
+
"pathway_id": sbml_dfs_dict.keys(),
|
283
|
+
"source": sbml_dfs_dict.keys(),
|
284
|
+
"name": sbml_dfs_dict.keys(),
|
285
|
+
# TODO: Discuss with Sean how to deal with date in pw_index
|
286
|
+
"date": "1900-01-01",
|
287
|
+
}
|
288
|
+
)
|
289
|
+
pw_index_df["species"] = "unknown"
|
290
|
+
pw_index = indices.PWIndex(pw_index=pw_index_df, validate_paths=False)
|
291
|
+
consensus_model = cpr_consensus.construct_consensus_model(
|
292
|
+
sbml_dfs_dict, pw_index, dogmatic
|
293
|
+
)
|
294
|
+
utils.save_pickle(output_model_uri, consensus_model)
|
295
|
+
|
296
|
+
|
297
|
+
@click.group()
|
298
|
+
def refine():
|
299
|
+
"""Command line tools to refine a consensus model"""
|
300
|
+
pass
|
301
|
+
|
302
|
+
|
303
|
+
@refine.command(name="add_reactome_entity_sets")
|
304
|
+
@click.argument("model_uri", type=str)
|
305
|
+
@click.argument("entity_set_csv", type=str)
|
306
|
+
@click.argument("output_model_uri", type=str)
|
307
|
+
def add_reactome_entity_sets(
|
308
|
+
model_uri: str, entity_set_csv: str, output_model_uri: str
|
309
|
+
):
|
310
|
+
"""Add reactome entity sets to a consensus model
|
311
|
+
|
312
|
+
The entity set csv is classically exported from the neo4j reactome
|
313
|
+
database.
|
314
|
+
"""
|
315
|
+
model = utils.load_pickle(model_uri)
|
316
|
+
model = pathwayannot.add_entity_sets(model, entity_set_csv)
|
317
|
+
utils.save_pickle(output_model_uri, model)
|
318
|
+
|
319
|
+
|
320
|
+
@refine.command(name="add_reactome_identifiers")
|
321
|
+
@click.argument("model_uri", type=str)
|
322
|
+
@click.argument("crossref_csv", type=str)
|
323
|
+
@click.argument("output_model_uri", type=str)
|
324
|
+
def add_reactome_identifiers(model_uri: str, crossref_csv: str, output_model_uri: str):
|
325
|
+
"""Add reactome identifiers to a consensus model
|
326
|
+
|
327
|
+
The crossref csv is classically exported from the neo4j reactome
|
328
|
+
database.
|
329
|
+
"""
|
330
|
+
model = utils.load_pickle(model_uri)
|
331
|
+
model = pathwayannot.add_reactome_identifiers(model, crossref_csv)
|
332
|
+
utils.save_pickle(output_model_uri, model)
|
333
|
+
|
334
|
+
|
335
|
+
@refine.command(name="infer_uncompartmentalized_species_location")
|
336
|
+
@click.argument("model_uri", type=str)
|
337
|
+
@click.argument("output_model_uri", type=str)
|
338
|
+
def infer_uncompartmentalized_species_location(model_uri: str, output_model_uri: str):
|
339
|
+
"""
|
340
|
+
Infer Uncompartmentalized Species Location
|
341
|
+
|
342
|
+
If the compartment of a subset of compartmentalized species was
|
343
|
+
not specified, infer an appropriate compartment from other members of reactions they particpate in
|
344
|
+
"""
|
345
|
+
model = utils.load_pickle(model_uri)
|
346
|
+
model = sbml_dfs_core.infer_uncompartmentalized_species_location(model)
|
347
|
+
utils.save_pickle(output_model_uri, model)
|
348
|
+
|
349
|
+
|
350
|
+
@refine.command(name="name_compartmentalized_species")
|
351
|
+
@click.argument("model_uri", type=str)
|
352
|
+
@click.argument("output_model_uri", type=str)
|
353
|
+
def name_compartmentalized_species(model_uri: str, output_model_uri: str):
|
354
|
+
"""
|
355
|
+
Name Compartmentalized Species
|
356
|
+
|
357
|
+
Rename compartmentalized species if they have the same name as their species
|
358
|
+
"""
|
359
|
+
model = utils.load_pickle(model_uri)
|
360
|
+
model = sbml_dfs_core.name_compartmentalized_species(model)
|
361
|
+
utils.save_pickle(output_model_uri, model)
|
362
|
+
|
363
|
+
|
364
|
+
@refine.command(name="merge_model_compartments")
|
365
|
+
@click.argument("model_uri", type=str)
|
366
|
+
@click.argument("output_model_uri", type=str)
|
367
|
+
def merge_model_compartments(model_uri: str, output_model_uri: str):
|
368
|
+
"""Take a compartmentalized mechanistic model and merge all of the compartments."""
|
369
|
+
model = utils.load_pickle(model_uri)
|
370
|
+
model = uncompartmentalize.uncompartmentalize_sbml_dfs(model)
|
371
|
+
utils.save_pickle(output_model_uri, model)
|
372
|
+
|
373
|
+
|
374
|
+
@refine.command(name="drop_cofactors")
|
375
|
+
@click.argument("model_uri", type=str)
|
376
|
+
@click.argument("output_model_uri", type=str)
|
377
|
+
def drop_cofactors(model_uri: str, output_model_uri: str):
|
378
|
+
"""Remove reaction species acting as cofactors"""
|
379
|
+
model = utils.load_pickle(model_uri)
|
380
|
+
model = pathwayannot.drop_cofactors(model)
|
381
|
+
utils.save_pickle(output_model_uri, model)
|
382
|
+
|
383
|
+
|
384
|
+
@refine.command(name="add_transportation_reactions")
|
385
|
+
@click.argument("model_uri", type=str)
|
386
|
+
@click.argument("output_model_uri", type=str)
|
387
|
+
@click.option(
|
388
|
+
"--exchange-compartment",
|
389
|
+
"-e",
|
390
|
+
default="cytosol",
|
391
|
+
help="Exchange compartment for new transport reactions.",
|
392
|
+
)
|
393
|
+
@click_logging.simple_verbosity_option(logger)
|
394
|
+
def add_transportation_reaction(
|
395
|
+
model_uri, output_model_uri, exchange_compartment="cytosol"
|
396
|
+
):
|
397
|
+
"""Add transportation reactions to a consensus model"""
|
398
|
+
|
399
|
+
model = utils.load_pickle(model_uri)
|
400
|
+
model = gaps.add_transportation_reactions(
|
401
|
+
model, exchange_compartment=exchange_compartment
|
402
|
+
)
|
403
|
+
utils.save_pickle(output_model_uri, model)
|
404
|
+
|
405
|
+
|
406
|
+
@refine.command(name="apply_manual_curations")
|
407
|
+
@click.argument("model_uri", type=str)
|
408
|
+
@click.argument("curation_dir", type=str)
|
409
|
+
@click.argument("output_model_uri", type=str)
|
410
|
+
def apply_manual_curations(model_uri: str, curation_dir: str, output_model_uri: str):
|
411
|
+
"""Apply manual curations to a consensus model
|
412
|
+
|
413
|
+
The curation dir is a directory containing the manual curations
|
414
|
+
Check cpr.curation.curate_sbml_dfs for more information.
|
415
|
+
"""
|
416
|
+
model = utils.load_pickle(model_uri)
|
417
|
+
model = curation.curate_sbml_dfs(curation_dir=curation_dir, sbml_dfs=model)
|
418
|
+
utils.save_pickle(output_model_uri, model)
|
419
|
+
|
420
|
+
|
421
|
+
@refine.command(name="expand_identifiers")
|
422
|
+
@click.argument("model_uri", type=str)
|
423
|
+
@click.argument("output_model_uri", type=str)
|
424
|
+
@click.option(
|
425
|
+
"--id-type",
|
426
|
+
"-u",
|
427
|
+
type=click.Choice(["species", "compartments", "reactions"]),
|
428
|
+
default="species",
|
429
|
+
)
|
430
|
+
@click.option("--species", "-s", default="Homo sapiens", type=str)
|
431
|
+
@click.option(
|
432
|
+
"--ontologies", "-o", multiple=True, type=str, help="Ontologies to add or complete"
|
433
|
+
)
|
434
|
+
def expand_identifiers(
|
435
|
+
model_uri: str,
|
436
|
+
output_model_uri: str,
|
437
|
+
id_type: str,
|
438
|
+
species: str,
|
439
|
+
ontologies: list[str],
|
440
|
+
):
|
441
|
+
"""Expand identifiers of a model
|
442
|
+
|
443
|
+
Args:
|
444
|
+
model_uri (str): uri of model in sbml dfs format
|
445
|
+
output_model_uri (str): output uri of model in sbml dfs format
|
446
|
+
id_type (str): identifier type, one of: species, compartments, reactions
|
447
|
+
species (str): Species to use
|
448
|
+
ontologies (list[str]): ontologies to add or update
|
449
|
+
|
450
|
+
Example call:
|
451
|
+
> cpr refine expand_identifiers gs://<uri> ./test.pickle -o ensembl_gene
|
452
|
+
"""
|
453
|
+
|
454
|
+
model: sbml.SBML_dfs = utils.load_pickle(model_uri) # type: ignore
|
455
|
+
if len(ontologies) == 0:
|
456
|
+
raise ValueError("No ontologies to expand specified.")
|
457
|
+
expanded_ids = rids.expand_identifiers(model, id_type, species, ontologies)
|
458
|
+
rids.update_expanded_identifiers(model, id_type, expanded_ids)
|
459
|
+
utils.save_pickle(output_model_uri, model)
|
460
|
+
|
461
|
+
|
462
|
+
@integrate.command(name="dogmatic_scaffold")
|
463
|
+
@click.argument("output_model_uri", type=str)
|
464
|
+
@click.option("--species", "-s", default="Homo sapiens", type=str)
|
465
|
+
def dogmatic_scaffold(
|
466
|
+
output_model_uri: str,
|
467
|
+
species: str,
|
468
|
+
):
|
469
|
+
"""Dogmatic Scaffold
|
470
|
+
|
471
|
+
Args:
|
472
|
+
output_model_uri (str): output uri of model in sbml dfs format
|
473
|
+
species (str): Species to use
|
474
|
+
|
475
|
+
Example call:
|
476
|
+
> cpr integrate dogmatic_scaffold ./test.pickle
|
477
|
+
"""
|
478
|
+
|
479
|
+
dogmatic_sbml_dfs = rids.create_dogmatic_sbml_dfs(species)
|
480
|
+
utils.save_pickle(output_model_uri, dogmatic_sbml_dfs)
|
481
|
+
|
482
|
+
|
483
|
+
@refine.command(name="filter_gtex_tissue")
|
484
|
+
@click.argument("model_uri", type=str)
|
485
|
+
@click.argument("gtex_file_uri", type=str)
|
486
|
+
@click.argument("output_model_uri", type=str)
|
487
|
+
@click.argument("tissue", type=str)
|
488
|
+
@click.option(
|
489
|
+
"--filter-non-genic-reactions",
|
490
|
+
"-f",
|
491
|
+
default=False,
|
492
|
+
type=bool,
|
493
|
+
help="Filter reactions not involving genes?",
|
494
|
+
)
|
495
|
+
@click_logging.simple_verbosity_option(logger)
|
496
|
+
def filter_gtex_tissue(
|
497
|
+
model_uri: str,
|
498
|
+
gtex_file_uri: str,
|
499
|
+
output_model_uri: str,
|
500
|
+
tissue: str,
|
501
|
+
filter_non_genic_reactions: bool,
|
502
|
+
):
|
503
|
+
"""Filter model by the gtex tissue expression
|
504
|
+
|
505
|
+
This uses zfpkm values derived from gtex to filter the model.
|
506
|
+
"""
|
507
|
+
logger.info("Get rcpr from R")
|
508
|
+
rcpr = callr.get_rcpr()
|
509
|
+
logger.info("Load sbml_dfs model")
|
510
|
+
model: sbml.SBML_dfs = utils.load_pickle(model_uri) # type: ignore
|
511
|
+
logger.info("Load and clean gtex tissue expression")
|
512
|
+
dat_gtex = netcontextr.load_and_clean_gtex_data(
|
513
|
+
rcpr, gtex_file_uri, by_tissue_zfpkm=True
|
514
|
+
)
|
515
|
+
logger.info("Convert sbml_dfs to rcpr reaction graph")
|
516
|
+
model_r = netcontextr.sbml_dfs_to_rcpr_reactions(model)
|
517
|
+
logger.info("Annotate genes with gtex tissue expression")
|
518
|
+
model_r_annot = netcontextr.annotate_genes(rcpr, model_r, dat_gtex, "tissue")
|
519
|
+
logger.info("Trim network by gene attribute")
|
520
|
+
model_r_trim = netcontextr.trim_reactions_by_gene_attribute(
|
521
|
+
rcpr, model_r_annot, "tissue", tissue
|
522
|
+
)
|
523
|
+
logger.info("Apply trimmed network")
|
524
|
+
|
525
|
+
if filter_non_genic_reactions:
|
526
|
+
logger.info("Filter non genic reactions")
|
527
|
+
considered_reactions = None
|
528
|
+
else:
|
529
|
+
logger.info("Keep genic reactions")
|
530
|
+
considered_reactions = rcpr._get_rids_from_rcpr_reactions(model_r)
|
531
|
+
netcontextr.apply_reactions_context_to_sbml_dfs(
|
532
|
+
model, model_r_trim, considered_reactions=considered_reactions
|
533
|
+
)
|
534
|
+
logger.info("Save model to %s", output_model_uri)
|
535
|
+
utils.save_pickle(output_model_uri, model)
|
536
|
+
|
537
|
+
|
538
|
+
@refine.command(name="filter_hpa_compartments")
|
539
|
+
@click.argument("model_uri", type=str)
|
540
|
+
@click.argument("hpa_file_uri", type=str)
|
541
|
+
@click.argument("output_model_uri", type=str)
|
542
|
+
@click_logging.simple_verbosity_option(logger)
|
543
|
+
def filter_hpa_gene_compartments(
|
544
|
+
model_uri: str, hpa_file_uri: str, output_model_uri: str
|
545
|
+
):
|
546
|
+
"""Filter an interaction network using the human protein atlas
|
547
|
+
|
548
|
+
This uses R `rcpr` to filter an interaction network based on the
|
549
|
+
compartment information from the human protein atlas.
|
550
|
+
|
551
|
+
Only interactions between genes in the same compartment are kept.
|
552
|
+
"""
|
553
|
+
logger.info("Get rcpr from R")
|
554
|
+
rcpr = callr.get_rcpr()
|
555
|
+
logger.info("Load sbml_dfs model")
|
556
|
+
model: sbml.SBML_dfs = utils.load_pickle(model_uri) # type: ignore
|
557
|
+
logger.info("Load and clean hpa data")
|
558
|
+
dat_hpa = netcontextr.load_and_clean_hpa_data(rcpr, hpa_file_uri)
|
559
|
+
logger.info("Convert sbml_dfs to rcpr string graph")
|
560
|
+
model_r = netcontextr.sbml_dfs_to_rcpr_string_graph(model)
|
561
|
+
logger.info("Annotate genes with HPA compartments")
|
562
|
+
model_r_annot = netcontextr.annotate_genes(rcpr, model_r, dat_hpa, "compartment")
|
563
|
+
logger.info("Trim network by gene attribute")
|
564
|
+
model_r_trim = netcontextr.trim_network_by_gene_attribute(
|
565
|
+
rcpr, model_r_annot, "compartment"
|
566
|
+
)
|
567
|
+
logger.info("Apply trimmed network")
|
568
|
+
netcontextr.apply_context_to_sbml_dfs(model, model_r_trim)
|
569
|
+
logger.info("Save model to %s", output_model_uri)
|
570
|
+
utils.save_pickle(output_model_uri, model)
|
571
|
+
|
572
|
+
|
573
|
+
@click.group()
|
574
|
+
def exporter():
|
575
|
+
"""Command line tools to export a consensus model
|
576
|
+
to various formats
|
577
|
+
"""
|
578
|
+
pass
|
579
|
+
|
580
|
+
|
581
|
+
@exporter.command(name="export_igraph")
|
582
|
+
@click.argument("model_uri", type=str)
|
583
|
+
@click.argument("output_uri", type=str)
|
584
|
+
@click.option(
|
585
|
+
"--graph_attrs_spec_uri",
|
586
|
+
"-a",
|
587
|
+
default=None,
|
588
|
+
help="File specifying reaction and/or species attributes to add to the graph",
|
589
|
+
)
|
590
|
+
@click.option(
|
591
|
+
"--format", "-f", default="pickle", help="Output format: gml, edgelist, pickle"
|
592
|
+
)
|
593
|
+
@click.option(
|
594
|
+
"--graph_type", "-g", type=str, default="bipartite", help="bipartite or regulatory"
|
595
|
+
)
|
596
|
+
@click.option(
|
597
|
+
"--weighting_strategy",
|
598
|
+
"-w",
|
599
|
+
type=str,
|
600
|
+
default="unweighted",
|
601
|
+
help="Approach to adding weights to the network",
|
602
|
+
)
|
603
|
+
@click.option(
|
604
|
+
"--directed", "-d", type=bool, default=True, help="Directed or undirected graph?"
|
605
|
+
)
|
606
|
+
@click.option(
|
607
|
+
"--reverse",
|
608
|
+
"-r",
|
609
|
+
type=bool,
|
610
|
+
default=False,
|
611
|
+
help="Reverse edges so they flow from effects to causes?",
|
612
|
+
)
|
613
|
+
def export_igraph(
|
614
|
+
model_uri: str,
|
615
|
+
output_uri: str,
|
616
|
+
graph_attrs_spec_uri: str | None,
|
617
|
+
format: str,
|
618
|
+
graph_type: str,
|
619
|
+
weighting_strategy: str,
|
620
|
+
directed: bool,
|
621
|
+
reverse: bool,
|
622
|
+
):
|
623
|
+
"""Export the consensus model as an igraph object"""
|
624
|
+
model = utils.load_pickle(model_uri)
|
625
|
+
|
626
|
+
if graph_attrs_spec_uri is None:
|
627
|
+
graph_attrs_spec = None
|
628
|
+
else:
|
629
|
+
graph_attrs_spec = net_utils.read_graph_attrs_spec(graph_attrs_spec_uri)
|
630
|
+
|
631
|
+
cpr_graph = net_create.process_cpr_graph(
|
632
|
+
model,
|
633
|
+
reaction_graph_attrs=graph_attrs_spec,
|
634
|
+
directed=directed,
|
635
|
+
edge_reversed=reverse,
|
636
|
+
graph_type=graph_type,
|
637
|
+
weighting_strategy=weighting_strategy,
|
638
|
+
verbose=True,
|
639
|
+
)
|
640
|
+
|
641
|
+
base, path = os.path.split(output_uri)
|
642
|
+
with open_fs(base, create=True, writeable=True) as fs:
|
643
|
+
with fs.openbin(path, "wb") as f:
|
644
|
+
if format == "gml":
|
645
|
+
cpr_graph.write_gml(f)
|
646
|
+
elif format == "edgelist":
|
647
|
+
cpr_graph.write_edgelist(f)
|
648
|
+
elif format == "pickle":
|
649
|
+
pickle.dump(cpr_graph, f)
|
650
|
+
else:
|
651
|
+
raise ValueError("Unknown format: %s" % format)
|
652
|
+
|
653
|
+
|
654
|
+
@exporter.command(name="export_precomputed_distances")
|
655
|
+
@click.argument("graph_uri", type=str)
|
656
|
+
@click.argument("output_uri", type=str)
|
657
|
+
@click.option(
|
658
|
+
"--format",
|
659
|
+
"-f",
|
660
|
+
type=str,
|
661
|
+
default="pickle",
|
662
|
+
help="Input igraph format: gml, edgelist, pickle",
|
663
|
+
)
|
664
|
+
@click.option(
|
665
|
+
"--max_steps",
|
666
|
+
"-s",
|
667
|
+
type=int,
|
668
|
+
default=-1,
|
669
|
+
help="The max number of steps between pairs of species to save a distance",
|
670
|
+
)
|
671
|
+
@click.option(
|
672
|
+
"--max_score_q",
|
673
|
+
"-q",
|
674
|
+
type=float,
|
675
|
+
default=1,
|
676
|
+
help='Retain up to the "max_score_q" quantiles of all scores (small scores are better)',
|
677
|
+
)
|
678
|
+
@click.option(
|
679
|
+
"--partition_size",
|
680
|
+
"-p",
|
681
|
+
type=int,
|
682
|
+
default=5000,
|
683
|
+
help="The number of species to process together when computing distances",
|
684
|
+
)
|
685
|
+
@click.option(
|
686
|
+
"--weights_vars",
|
687
|
+
"-w",
|
688
|
+
type=str,
|
689
|
+
default=["weights", "upstream_weights"],
|
690
|
+
help="One or more variables defining edge weights to use when calculating weighted shortest paths.",
|
691
|
+
)
|
692
|
+
def export_precomputed_distances(
|
693
|
+
graph_uri: str,
|
694
|
+
output_uri: str,
|
695
|
+
format: str,
|
696
|
+
max_steps: int,
|
697
|
+
max_score_q: float,
|
698
|
+
partition_size: int,
|
699
|
+
weights_vars: str,
|
700
|
+
):
|
701
|
+
"""Export precomputed distances for the igraph object"""
|
702
|
+
|
703
|
+
base, path = os.path.split(graph_uri)
|
704
|
+
with open_fs(base) as fs:
|
705
|
+
with fs.openbin(path) as f:
|
706
|
+
if format == "gml":
|
707
|
+
cpr_graph = ig.Graph.Read_GML(f)
|
708
|
+
elif format == "edgelist":
|
709
|
+
cpr_graph = ig.Graph.Read_Edgelist(f)
|
710
|
+
elif format == "pickle":
|
711
|
+
cpr_graph = ig.Graph.Read_Pickle(f)
|
712
|
+
else:
|
713
|
+
raise ValueError("Unknown format: %s" % format)
|
714
|
+
|
715
|
+
# convert weight vars from a str to list
|
716
|
+
weights_vars_list = utils.click_str_to_list(weights_vars)
|
717
|
+
|
718
|
+
precomputed_distances = precompute.precompute_distances(
|
719
|
+
cpr_graph,
|
720
|
+
max_steps=max_steps,
|
721
|
+
max_score_q=max_score_q,
|
722
|
+
partition_size=partition_size,
|
723
|
+
weights_vars=weights_vars_list,
|
724
|
+
)
|
725
|
+
|
726
|
+
utils.save_json(output_uri, precomputed_distances.to_json())
|
727
|
+
|
728
|
+
|
729
|
+
@exporter.command(name="export_smbl_dfs_tables")
|
730
|
+
@click.argument("model_uri", type=str)
|
731
|
+
@click.argument("output_uri", type=str)
|
732
|
+
@click.option(
|
733
|
+
"--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
|
734
|
+
)
|
735
|
+
@click.option(
|
736
|
+
"--model-prefix", "-m", type=str, default="", help="Model prefix for files?"
|
737
|
+
)
|
738
|
+
@click.option(
|
739
|
+
"--nondogmatic",
|
740
|
+
"-n",
|
741
|
+
is_flag=True,
|
742
|
+
default=False,
|
743
|
+
help="Run in non-dogmatic mode (trying to merge genes and proteins)?",
|
744
|
+
)
|
745
|
+
@click_logging.simple_verbosity_option(logger)
|
746
|
+
def export_sbml_dfs_tables(
|
747
|
+
model_uri: str,
|
748
|
+
output_uri: str,
|
749
|
+
overwrite=False,
|
750
|
+
model_prefix="",
|
751
|
+
nondogmatic: bool = True,
|
752
|
+
):
|
753
|
+
"""Export the consensus model as a collection of table"""
|
754
|
+
|
755
|
+
dogmatic = not nondogmatic
|
756
|
+
logger.debug(f"nondogmatic = {nondogmatic}; dogmatic = {dogmatic}")
|
757
|
+
logger.info(f"Exporting tables with dogmatic = {dogmatic}")
|
758
|
+
|
759
|
+
model = utils.load_pickle(model_uri)
|
760
|
+
sbml_dfs_core.export_sbml_dfs(
|
761
|
+
model_prefix, model, output_uri, overwrite=overwrite, dogmatic=dogmatic
|
762
|
+
)
|
763
|
+
|
764
|
+
|
765
|
+
@click.group()
|
766
|
+
def importer():
|
767
|
+
"""Tools to import sbml_dfs directly form other sources"""
|
768
|
+
pass
|
769
|
+
|
770
|
+
|
771
|
+
@importer.command(name="sbml_dfs")
|
772
|
+
@click.argument("input_uri", type=str)
|
773
|
+
@click.argument("output_uri", type=str)
|
774
|
+
@click_logging.simple_verbosity_option(logger)
|
775
|
+
def import_sbml_dfs_from_sbml_dfs_uri(input_uri, output_uri):
|
776
|
+
"""Import sbml_dfs from an uri, eg another GCS bucket"""
|
777
|
+
logger.info("Load sbml_dfs from %s", input_uri)
|
778
|
+
# We could also just copy the file, but I think validating
|
779
|
+
# the filetype is a good idea to prevent downstream errors.
|
780
|
+
sbml_dfs = utils.load_pickle(input_uri)
|
781
|
+
if not (isinstance(sbml_dfs, sbml.SBML_dfs)):
|
782
|
+
raise ValueError(
|
783
|
+
f"Pickled input is not an SBML_dfs object but {type(sbml_dfs)}: {input_uri}"
|
784
|
+
)
|
785
|
+
logger.info("Save file to %s", output_uri)
|
786
|
+
utils.save_pickle(output_uri, sbml_dfs)
|
787
|
+
|
788
|
+
|
789
|
+
@importer.command(name="sbml")
|
790
|
+
@click.argument("input_uri", type=str)
|
791
|
+
@click.argument("output_uri", type=str)
|
792
|
+
@click_logging.simple_verbosity_option(logger)
|
793
|
+
def import_sbml_dfs_from_sbml(input_uri, output_uri):
|
794
|
+
"""Import sbml_dfs from a sbml file"""
|
795
|
+
logger.info("Load sbml from %s", input_uri)
|
796
|
+
# We could also just copy the file, but I think validating
|
797
|
+
# the filetype is a good idea to prevent downstream errors.
|
798
|
+
sbml_file = sbml.SBML(input_uri)
|
799
|
+
logger.info("Convert file to sbml_dfs")
|
800
|
+
sbml_dfs = sbml_dfs_core.SBML_dfs(sbml_file)
|
801
|
+
logger.info("Save file to %s", output_uri)
|
802
|
+
utils.save_pickle(output_uri, sbml_dfs)
|
803
|
+
|
804
|
+
|
805
|
+
@click.group()
|
806
|
+
def contextualizer():
|
807
|
+
"""Command line tools to contextualize a pathway model"""
|
808
|
+
pass
|
809
|
+
|
810
|
+
|
811
|
+
@click.group()
|
812
|
+
def helpers():
|
813
|
+
"""Various helper functions"""
|
814
|
+
pass
|
815
|
+
|
816
|
+
|
817
|
+
@helpers.command(name="copy_uri")
|
818
|
+
@click.argument("input_uri", type=str)
|
819
|
+
@click.argument("output_uri", type=str)
|
820
|
+
@click.option("--is-file", type=bool, default=True, help="Is the input a file?")
|
821
|
+
@click_logging.simple_verbosity_option(logger)
|
822
|
+
def copy_uri(input_uri, output_uri, is_file=True):
|
823
|
+
"""Copy a uri representing a file or folder from one location to another"""
|
824
|
+
logger.info("Copy uri from %s to %s", input_uri, output_uri)
|
825
|
+
utils.copy_uri(input_uri, output_uri, is_file=is_file)
|
826
|
+
|
827
|
+
|
828
|
+
@click.group()
|
829
|
+
def stats():
|
830
|
+
"""Various functions to calculate network statistics
|
831
|
+
|
832
|
+
The statistics are saved as json files
|
833
|
+
"""
|
834
|
+
pass
|
835
|
+
|
836
|
+
|
837
|
+
@stats.command(name="sbml_dfs_network")
|
838
|
+
@click.argument("input_uri", type=str)
|
839
|
+
@click.argument("output_uri", type=str)
|
840
|
+
def calculate_sbml_dfs_stats(input_uri, output_uri):
|
841
|
+
"""Calculate statistics for a sbml_dfs object"""
|
842
|
+
model: sbml_dfs_core.SBML_dfs = utils.load_pickle(input_uri) # type: ignore
|
843
|
+
stats = model.get_network_summary()
|
844
|
+
utils.save_json(output_uri, stats)
|
845
|
+
|
846
|
+
|
847
|
+
@stats.command(name="igraph_network")
|
848
|
+
@click.argument("input_uri", type=str)
|
849
|
+
@click.argument("output_uri", type=str)
|
850
|
+
def calculate_igraph_stats(input_uri, output_uri):
|
851
|
+
"""Calculate statistics for an igraph object"""
|
852
|
+
graph: ig.Graph = utils.load_pickle(input_uri) # type: ignore
|
853
|
+
stats = net_utils.get_graph_summary(graph)
|
854
|
+
utils.save_json(output_uri, stats)
|
855
|
+
|
856
|
+
|
857
|
+
cli.add_command(load)
|
858
|
+
cli.add_command(integrate)
|
859
|
+
cli.add_command(consensus)
|
860
|
+
cli.add_command(refine)
|
861
|
+
cli.add_command(exporter)
|
862
|
+
cli.add_command(importer)
|
863
|
+
cli.add_command(helpers)
|
864
|
+
cli.add_command(stats)
|
865
|
+
|
866
|
+
if __name__ == "__main__":
|
867
|
+
cli()
|