napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. napistu/__init__.py +12 -0
  2. napistu/__main__.py +867 -0
  3. napistu/consensus.py +1557 -0
  4. napistu/constants.py +500 -0
  5. napistu/gcs/__init__.py +10 -0
  6. napistu/gcs/constants.py +69 -0
  7. napistu/gcs/downloads.py +180 -0
  8. napistu/identifiers.py +805 -0
  9. napistu/indices.py +227 -0
  10. napistu/ingestion/__init__.py +10 -0
  11. napistu/ingestion/bigg.py +146 -0
  12. napistu/ingestion/constants.py +296 -0
  13. napistu/ingestion/cpr_edgelist.py +106 -0
  14. napistu/ingestion/identifiers_etl.py +148 -0
  15. napistu/ingestion/obo.py +268 -0
  16. napistu/ingestion/psi_mi.py +276 -0
  17. napistu/ingestion/reactome.py +218 -0
  18. napistu/ingestion/sbml.py +621 -0
  19. napistu/ingestion/string.py +356 -0
  20. napistu/ingestion/trrust.py +285 -0
  21. napistu/ingestion/yeast.py +147 -0
  22. napistu/mechanism_matching.py +597 -0
  23. napistu/modify/__init__.py +10 -0
  24. napistu/modify/constants.py +86 -0
  25. napistu/modify/curation.py +628 -0
  26. napistu/modify/gaps.py +635 -0
  27. napistu/modify/pathwayannot.py +1381 -0
  28. napistu/modify/uncompartmentalize.py +264 -0
  29. napistu/network/__init__.py +10 -0
  30. napistu/network/constants.py +117 -0
  31. napistu/network/neighborhoods.py +1594 -0
  32. napistu/network/net_create.py +1647 -0
  33. napistu/network/net_utils.py +652 -0
  34. napistu/network/paths.py +500 -0
  35. napistu/network/precompute.py +221 -0
  36. napistu/rpy2/__init__.py +127 -0
  37. napistu/rpy2/callr.py +168 -0
  38. napistu/rpy2/constants.py +101 -0
  39. napistu/rpy2/netcontextr.py +464 -0
  40. napistu/rpy2/rids.py +697 -0
  41. napistu/sbml_dfs_core.py +2216 -0
  42. napistu/sbml_dfs_utils.py +304 -0
  43. napistu/source.py +394 -0
  44. napistu/utils.py +943 -0
  45. napistu-0.1.0.dist-info/METADATA +56 -0
  46. napistu-0.1.0.dist-info/RECORD +77 -0
  47. napistu-0.1.0.dist-info/WHEEL +5 -0
  48. napistu-0.1.0.dist-info/entry_points.txt +2 -0
  49. napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
  50. napistu-0.1.0.dist-info/top_level.txt +2 -0
  51. tests/__init__.py +0 -0
  52. tests/conftest.py +83 -0
  53. tests/test_consensus.py +255 -0
  54. tests/test_constants.py +20 -0
  55. tests/test_curation.py +134 -0
  56. tests/test_data/__init__.py +0 -0
  57. tests/test_edgelist.py +20 -0
  58. tests/test_gcs.py +23 -0
  59. tests/test_identifiers.py +151 -0
  60. tests/test_igraph.py +353 -0
  61. tests/test_indices.py +88 -0
  62. tests/test_mechanism_matching.py +126 -0
  63. tests/test_net_utils.py +66 -0
  64. tests/test_netcontextr.py +105 -0
  65. tests/test_obo.py +34 -0
  66. tests/test_pathwayannot.py +95 -0
  67. tests/test_precomputed_distances.py +222 -0
  68. tests/test_rpy2.py +61 -0
  69. tests/test_sbml.py +46 -0
  70. tests/test_sbml_dfs_create.py +307 -0
  71. tests/test_sbml_dfs_utils.py +22 -0
  72. tests/test_sbo.py +11 -0
  73. tests/test_set_coverage.py +50 -0
  74. tests/test_source.py +67 -0
  75. tests/test_uncompartmentalize.py +40 -0
  76. tests/test_utils.py +487 -0
  77. tests/utils.py +30 -0
napistu/__main__.py ADDED
@@ -0,0 +1,867 @@
1
+ """The CLI for cpr"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ import pickle
8
+ from typing import Sequence
9
+
10
+ import click
11
+ import click_logging
12
+ import napistu
13
+ import igraph as ig
14
+ import pandas as pd
15
+ from napistu import consensus as cpr_consensus
16
+ from napistu import constants
17
+ from napistu import indices
18
+ from napistu import sbml_dfs_core
19
+ from napistu import utils
20
+ from napistu.ingestion import bigg
21
+ from napistu.ingestion import reactome
22
+ from napistu.ingestion import sbml
23
+ from napistu.ingestion import string
24
+ from napistu.ingestion import trrust
25
+ from napistu.modify import curation
26
+ from napistu.modify import gaps
27
+ from napistu.modify import pathwayannot
28
+ from napistu.modify import uncompartmentalize
29
+ from napistu.network import net_create
30
+ from napistu.network import net_utils
31
+ from napistu.network import precompute
32
+ from napistu.rpy2 import has_rpy2
33
+ from fs import open_fs
34
+
35
+ if has_rpy2:
36
+ from napistu.rpy2 import rids
37
+ from napistu.rpy2 import netcontextr, callr
38
+
39
+ logger = logging.getLogger(napistu.__name__)
40
+ click_logging.basic_config(logger)
41
+
42
+ ALL = "all"
43
+
44
+
45
+ @click.group()
46
+ def cli():
47
+ """The Calico Pathway Resources CLI"""
48
+ pass
49
+
50
+
51
+ @click.group()
52
+ def load():
53
+ """Command line tools to retrieve raw data."""
54
+ pass
55
+
56
+
57
+ @load.command(name="reactome")
58
+ @click.argument("base_folder", type=str)
59
+ @click.option(
60
+ "--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
61
+ )
62
+ @click_logging.simple_verbosity_option(logger)
63
+ def load_reactome(base_folder: str, overwrite=True):
64
+ logger.info("Start downloading Reactome to %s", base_folder)
65
+ reactome.reactome_sbml_download(f"{base_folder}/sbml", overwrite=overwrite)
66
+
67
+
68
+ @load.command(name="bigg")
69
+ @click.argument("base_folder", type=str)
70
+ @click.option(
71
+ "--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
72
+ )
73
+ @click_logging.simple_verbosity_option(logger)
74
+ def load_bigg(base_folder: str, overwrite: bool):
75
+ logger.info("Start downloading Bigg to %s", base_folder)
76
+ bigg.bigg_sbml_download(base_folder, overwrite)
77
+
78
+
79
+ @load.command(name="trrust")
80
+ @click.argument("target_uri", type=str)
81
+ @click_logging.simple_verbosity_option(logger)
82
+ def load_ttrust(target_uri: str):
83
+ logger.info("Start downloading TRRUST to %s", target_uri)
84
+ trrust.download_trrust(target_uri)
85
+
86
+
87
+ @load.command(name="proteinatlas-subcell")
88
+ @click.argument("target_uri", type=str)
89
+ @click.option(
90
+ "--url",
91
+ type=str,
92
+ default=constants.PROTEINATLAS_SUBCELL_LOC_URL,
93
+ help="URL to download the zipped protein atlas subcellular localization tsv from.",
94
+ )
95
+ @click_logging.simple_verbosity_option(logger)
96
+ def load_proteinatlas_subcell(target_uri: str, url: str):
97
+ file_ext = constants.PROTEINATLAS_SUBCELL_LOC_URL.split(".")[-1]
98
+ target_filename = url.split("/")[-1].split(f".{file_ext}")[0]
99
+ logger.info("Start downloading proteinatlas %s to %s", url, target_uri)
100
+ utils.download_wget(url, target_uri, target_filename=target_filename)
101
+
102
+
103
+ @load.command(name="gtex-rnaseq-expression")
104
+ @click.argument("target_uri", type=str)
105
+ @click.option(
106
+ "--url",
107
+ type=str,
108
+ default=constants.GTEX_RNASEQ_EXPRESSION_URL,
109
+ help="URL to download the gtex file from.",
110
+ )
111
+ @click_logging.simple_verbosity_option(logger)
112
+ def load_gtex_rnaseq(target_uri: str, url: str):
113
+ logger.info("Start downloading gtex %s to %s", url, target_uri)
114
+ utils.download_wget(url, target_uri)
115
+
116
+
117
+ @load.command(name="string-db")
118
+ @click.argument("target_uri", type=str)
119
+ @click.option(
120
+ "--species",
121
+ type=str,
122
+ default="Homo sapiens",
123
+ help="Species name (e.g., Homo sapiens).",
124
+ )
125
+ @click_logging.simple_verbosity_option(logger)
126
+ def load_string_db(target_uri: str, species: str):
127
+ string.download_string(target_uri, species)
128
+
129
+
130
+ @load.command(name="string-aliases")
131
+ @click.argument("target_uri", type=str)
132
+ @click.option(
133
+ "--species",
134
+ type=str,
135
+ default="Homo sapiens",
136
+ help="Species name (e.g., Homo sapiens).",
137
+ )
138
+ @click_logging.simple_verbosity_option(logger)
139
+ def load_string_aliases(target_uri: str, species: str):
140
+ string.download_string_aliases(target_uri, species)
141
+
142
+
143
+ @click.group()
144
+ def integrate():
145
+ """Command line tools to integrate raw models into a single SBML_dfs model"""
146
+ pass
147
+
148
+
149
+ @integrate.command(name="reactome")
150
+ @click.argument("pw_index_uri", type=str)
151
+ @click.argument("output_model_uri", type=str)
152
+ @click.option("--species", "-s", multiple=True, default=(ALL,))
153
+ @click.option(
154
+ "--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
155
+ )
156
+ @click.option(
157
+ "--permissive",
158
+ "-p",
159
+ is_flag=True,
160
+ default=False,
161
+ help="Can parsing failures in submodels throw warnings instead of exceptions?",
162
+ )
163
+ @click_logging.simple_verbosity_option(logger)
164
+ def integrate_reactome(
165
+ pw_index_uri: str,
166
+ output_model_uri: str,
167
+ species: Sequence[str] | None,
168
+ overwrite=False,
169
+ permissive=False,
170
+ ):
171
+ """Integrates reactome models based on a pw_index"""
172
+ if overwrite is False and utils.path_exists(output_model_uri):
173
+ raise FileExistsError("'output_model_uri' exists but overwrite set False.")
174
+ if species is not None and len(species) == 1 and species[0] == ALL:
175
+ species = None
176
+
177
+ strict = not permissive
178
+ logger.debug(f"permissive = {permissive}; strict = {strict}")
179
+
180
+ consensus_model = reactome.construct_reactome_consensus(
181
+ pw_index_uri, species=species, strict=strict
182
+ )
183
+ utils.save_pickle(output_model_uri, consensus_model)
184
+
185
+
186
+ @integrate.command(name="bigg")
187
+ @click.argument("pw_index_uri", type=str)
188
+ @click.argument("output_model_uri", type=str)
189
+ @click.option("--species", "-s", multiple=True, default=(ALL,))
190
+ @click.option(
191
+ "--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
192
+ )
193
+ @click_logging.simple_verbosity_option(logger)
194
+ def integrate_bigg(
195
+ pw_index_uri: str,
196
+ output_model_uri: str,
197
+ species: Sequence[str] | None,
198
+ overwrite=False,
199
+ ):
200
+ """Integrates bigg models based on a pw_index"""
201
+ if overwrite is False and utils.path_exists(output_model_uri):
202
+ raise FileExistsError("'output_model_uri' exists but overwrite set False.")
203
+ if species is not None and len(species) == 1 and species[0] == ALL:
204
+ species = None
205
+ consensus_model = bigg.construct_bigg_consensus(pw_index_uri, species=species)
206
+ utils.save_pickle(output_model_uri, consensus_model)
207
+
208
+
209
+ @integrate.command(name="trrust")
210
+ @click.argument("trrust_csv_uri", type=str)
211
+ @click.argument("output_model_uri", type=str)
212
+ @click.option(
213
+ "--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
214
+ )
215
+ @click_logging.simple_verbosity_option(logger)
216
+ def integrate_trrust(
217
+ trrust_csv_uri: str,
218
+ output_model_uri: str,
219
+ overwrite=False,
220
+ ):
221
+ """Converts TRRUST csv to SBML_dfs model"""
222
+ if overwrite is False and utils.path_exists(output_model_uri):
223
+ raise FileExistsError("'output_model_uri' exists but overwrite set False.")
224
+ logger.info("Start converting TRRUST csv to SBML_dfs")
225
+ sbmldfs_model = trrust.convert_trrust_to_sbml_dfs(trrust_csv_uri)
226
+ logger.info("Save SBML_dfs model to %s", output_model_uri)
227
+ utils.save_pickle(output_model_uri, sbmldfs_model)
228
+
229
+
230
+ @integrate.command(name="string-db")
231
+ @click.argument("string_db_uri", type=str)
232
+ @click.argument("string_aliases_uri", type=str)
233
+ @click.argument("output_model_uri", type=str)
234
+ @click.option(
235
+ "--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
236
+ )
237
+ @click_logging.simple_verbosity_option(logger)
238
+ def integrate_string_db(
239
+ string_db_uri: str, string_aliases_uri: str, output_model_uri: str, overwrite=False
240
+ ):
241
+ """Converts string-db to the sbml_dfs format"""
242
+ if overwrite is False and utils.path_exists(output_model_uri):
243
+ raise FileExistsError("'output_model_uri' exists but overwrite set False.")
244
+ logger.info("Start converting string-db to SBML_dfs")
245
+ sbmldfs_model = string.convert_string_to_sbml_dfs(string_db_uri, string_aliases_uri)
246
+ logger.info("Save SBML_dfs model to %s", output_model_uri)
247
+ utils.save_pickle(output_model_uri, sbmldfs_model)
248
+
249
+
250
+ @click.group()
251
+ def consensus():
252
+ """Command line tools to create a consensus model from SBML_dfs"""
253
+ pass
254
+
255
+
256
+ @consensus.command(name="create")
257
+ @click.argument("sbml_dfs_uris", type=str, nargs=-1)
258
+ @click.argument("output_model_uri", type=str, nargs=1)
259
+ @click.option(
260
+ "--nondogmatic",
261
+ "-n",
262
+ is_flag=True,
263
+ default=False,
264
+ help="Run in non-dogmatic mode (trying to merge genes and proteins)?",
265
+ )
266
+ @click_logging.simple_verbosity_option(logger)
267
+ def create_consensus(
268
+ sbml_dfs_uris: Sequence[str], output_model_uri: str, nondogmatic: bool
269
+ ):
270
+ """Create a consensus model from a list of SBML_dfs"""
271
+
272
+ dogmatic = not nondogmatic
273
+ logger.debug(f"nondogmatic = {nondogmatic}; dogmatic = {dogmatic}")
274
+ logger.info(
275
+ f"Creating a consensus from {len(sbml_dfs_uris)} sbml_dfs where dogmatic = {dogmatic}"
276
+ )
277
+
278
+ sbml_dfs_dict = {uri: utils.load_pickle(uri) for uri in sbml_dfs_uris}
279
+ pw_index_df = pd.DataFrame(
280
+ {
281
+ "file": sbml_dfs_uris,
282
+ "pathway_id": sbml_dfs_dict.keys(),
283
+ "source": sbml_dfs_dict.keys(),
284
+ "name": sbml_dfs_dict.keys(),
285
+ # TODO: Discuss with Sean how to deal with date in pw_index
286
+ "date": "1900-01-01",
287
+ }
288
+ )
289
+ pw_index_df["species"] = "unknown"
290
+ pw_index = indices.PWIndex(pw_index=pw_index_df, validate_paths=False)
291
+ consensus_model = cpr_consensus.construct_consensus_model(
292
+ sbml_dfs_dict, pw_index, dogmatic
293
+ )
294
+ utils.save_pickle(output_model_uri, consensus_model)
295
+
296
+
297
+ @click.group()
298
+ def refine():
299
+ """Command line tools to refine a consensus model"""
300
+ pass
301
+
302
+
303
+ @refine.command(name="add_reactome_entity_sets")
304
+ @click.argument("model_uri", type=str)
305
+ @click.argument("entity_set_csv", type=str)
306
+ @click.argument("output_model_uri", type=str)
307
+ def add_reactome_entity_sets(
308
+ model_uri: str, entity_set_csv: str, output_model_uri: str
309
+ ):
310
+ """Add reactome entity sets to a consensus model
311
+
312
+ The entity set csv is classically exported from the neo4j reactome
313
+ database.
314
+ """
315
+ model = utils.load_pickle(model_uri)
316
+ model = pathwayannot.add_entity_sets(model, entity_set_csv)
317
+ utils.save_pickle(output_model_uri, model)
318
+
319
+
320
+ @refine.command(name="add_reactome_identifiers")
321
+ @click.argument("model_uri", type=str)
322
+ @click.argument("crossref_csv", type=str)
323
+ @click.argument("output_model_uri", type=str)
324
+ def add_reactome_identifiers(model_uri: str, crossref_csv: str, output_model_uri: str):
325
+ """Add reactome identifiers to a consensus model
326
+
327
+ The crossref csv is classically exported from the neo4j reactome
328
+ database.
329
+ """
330
+ model = utils.load_pickle(model_uri)
331
+ model = pathwayannot.add_reactome_identifiers(model, crossref_csv)
332
+ utils.save_pickle(output_model_uri, model)
333
+
334
+
335
+ @refine.command(name="infer_uncompartmentalized_species_location")
336
+ @click.argument("model_uri", type=str)
337
+ @click.argument("output_model_uri", type=str)
338
+ def infer_uncompartmentalized_species_location(model_uri: str, output_model_uri: str):
339
+ """
340
+ Infer Uncompartmentalized Species Location
341
+
342
+ If the compartment of a subset of compartmentalized species was
343
+ not specified, infer an appropriate compartment from other members of reactions they particpate in
344
+ """
345
+ model = utils.load_pickle(model_uri)
346
+ model = sbml_dfs_core.infer_uncompartmentalized_species_location(model)
347
+ utils.save_pickle(output_model_uri, model)
348
+
349
+
350
+ @refine.command(name="name_compartmentalized_species")
351
+ @click.argument("model_uri", type=str)
352
+ @click.argument("output_model_uri", type=str)
353
+ def name_compartmentalized_species(model_uri: str, output_model_uri: str):
354
+ """
355
+ Name Compartmentalized Species
356
+
357
+ Rename compartmentalized species if they have the same name as their species
358
+ """
359
+ model = utils.load_pickle(model_uri)
360
+ model = sbml_dfs_core.name_compartmentalized_species(model)
361
+ utils.save_pickle(output_model_uri, model)
362
+
363
+
364
+ @refine.command(name="merge_model_compartments")
365
+ @click.argument("model_uri", type=str)
366
+ @click.argument("output_model_uri", type=str)
367
+ def merge_model_compartments(model_uri: str, output_model_uri: str):
368
+ """Take a compartmentalized mechanistic model and merge all of the compartments."""
369
+ model = utils.load_pickle(model_uri)
370
+ model = uncompartmentalize.uncompartmentalize_sbml_dfs(model)
371
+ utils.save_pickle(output_model_uri, model)
372
+
373
+
374
+ @refine.command(name="drop_cofactors")
375
+ @click.argument("model_uri", type=str)
376
+ @click.argument("output_model_uri", type=str)
377
+ def drop_cofactors(model_uri: str, output_model_uri: str):
378
+ """Remove reaction species acting as cofactors"""
379
+ model = utils.load_pickle(model_uri)
380
+ model = pathwayannot.drop_cofactors(model)
381
+ utils.save_pickle(output_model_uri, model)
382
+
383
+
384
+ @refine.command(name="add_transportation_reactions")
385
+ @click.argument("model_uri", type=str)
386
+ @click.argument("output_model_uri", type=str)
387
+ @click.option(
388
+ "--exchange-compartment",
389
+ "-e",
390
+ default="cytosol",
391
+ help="Exchange compartment for new transport reactions.",
392
+ )
393
+ @click_logging.simple_verbosity_option(logger)
394
+ def add_transportation_reaction(
395
+ model_uri, output_model_uri, exchange_compartment="cytosol"
396
+ ):
397
+ """Add transportation reactions to a consensus model"""
398
+
399
+ model = utils.load_pickle(model_uri)
400
+ model = gaps.add_transportation_reactions(
401
+ model, exchange_compartment=exchange_compartment
402
+ )
403
+ utils.save_pickle(output_model_uri, model)
404
+
405
+
406
+ @refine.command(name="apply_manual_curations")
407
+ @click.argument("model_uri", type=str)
408
+ @click.argument("curation_dir", type=str)
409
+ @click.argument("output_model_uri", type=str)
410
+ def apply_manual_curations(model_uri: str, curation_dir: str, output_model_uri: str):
411
+ """Apply manual curations to a consensus model
412
+
413
+ The curation dir is a directory containing the manual curations
414
+ Check cpr.curation.curate_sbml_dfs for more information.
415
+ """
416
+ model = utils.load_pickle(model_uri)
417
+ model = curation.curate_sbml_dfs(curation_dir=curation_dir, sbml_dfs=model)
418
+ utils.save_pickle(output_model_uri, model)
419
+
420
+
421
+ @refine.command(name="expand_identifiers")
422
+ @click.argument("model_uri", type=str)
423
+ @click.argument("output_model_uri", type=str)
424
+ @click.option(
425
+ "--id-type",
426
+ "-u",
427
+ type=click.Choice(["species", "compartments", "reactions"]),
428
+ default="species",
429
+ )
430
+ @click.option("--species", "-s", default="Homo sapiens", type=str)
431
+ @click.option(
432
+ "--ontologies", "-o", multiple=True, type=str, help="Ontologies to add or complete"
433
+ )
434
+ def expand_identifiers(
435
+ model_uri: str,
436
+ output_model_uri: str,
437
+ id_type: str,
438
+ species: str,
439
+ ontologies: list[str],
440
+ ):
441
+ """Expand identifiers of a model
442
+
443
+ Args:
444
+ model_uri (str): uri of model in sbml dfs format
445
+ output_model_uri (str): output uri of model in sbml dfs format
446
+ id_type (str): identifier type, one of: species, compartments, reactions
447
+ species (str): Species to use
448
+ ontologies (list[str]): ontologies to add or update
449
+
450
+ Example call:
451
+ > cpr refine expand_identifiers gs://<uri> ./test.pickle -o ensembl_gene
452
+ """
453
+
454
+ model: sbml.SBML_dfs = utils.load_pickle(model_uri) # type: ignore
455
+ if len(ontologies) == 0:
456
+ raise ValueError("No ontologies to expand specified.")
457
+ expanded_ids = rids.expand_identifiers(model, id_type, species, ontologies)
458
+ rids.update_expanded_identifiers(model, id_type, expanded_ids)
459
+ utils.save_pickle(output_model_uri, model)
460
+
461
+
462
+ @integrate.command(name="dogmatic_scaffold")
463
+ @click.argument("output_model_uri", type=str)
464
+ @click.option("--species", "-s", default="Homo sapiens", type=str)
465
+ def dogmatic_scaffold(
466
+ output_model_uri: str,
467
+ species: str,
468
+ ):
469
+ """Dogmatic Scaffold
470
+
471
+ Args:
472
+ output_model_uri (str): output uri of model in sbml dfs format
473
+ species (str): Species to use
474
+
475
+ Example call:
476
+ > cpr integrate dogmatic_scaffold ./test.pickle
477
+ """
478
+
479
+ dogmatic_sbml_dfs = rids.create_dogmatic_sbml_dfs(species)
480
+ utils.save_pickle(output_model_uri, dogmatic_sbml_dfs)
481
+
482
+
483
+ @refine.command(name="filter_gtex_tissue")
484
+ @click.argument("model_uri", type=str)
485
+ @click.argument("gtex_file_uri", type=str)
486
+ @click.argument("output_model_uri", type=str)
487
+ @click.argument("tissue", type=str)
488
+ @click.option(
489
+ "--filter-non-genic-reactions",
490
+ "-f",
491
+ default=False,
492
+ type=bool,
493
+ help="Filter reactions not involving genes?",
494
+ )
495
+ @click_logging.simple_verbosity_option(logger)
496
+ def filter_gtex_tissue(
497
+ model_uri: str,
498
+ gtex_file_uri: str,
499
+ output_model_uri: str,
500
+ tissue: str,
501
+ filter_non_genic_reactions: bool,
502
+ ):
503
+ """Filter model by the gtex tissue expression
504
+
505
+ This uses zfpkm values derived from gtex to filter the model.
506
+ """
507
+ logger.info("Get rcpr from R")
508
+ rcpr = callr.get_rcpr()
509
+ logger.info("Load sbml_dfs model")
510
+ model: sbml.SBML_dfs = utils.load_pickle(model_uri) # type: ignore
511
+ logger.info("Load and clean gtex tissue expression")
512
+ dat_gtex = netcontextr.load_and_clean_gtex_data(
513
+ rcpr, gtex_file_uri, by_tissue_zfpkm=True
514
+ )
515
+ logger.info("Convert sbml_dfs to rcpr reaction graph")
516
+ model_r = netcontextr.sbml_dfs_to_rcpr_reactions(model)
517
+ logger.info("Annotate genes with gtex tissue expression")
518
+ model_r_annot = netcontextr.annotate_genes(rcpr, model_r, dat_gtex, "tissue")
519
+ logger.info("Trim network by gene attribute")
520
+ model_r_trim = netcontextr.trim_reactions_by_gene_attribute(
521
+ rcpr, model_r_annot, "tissue", tissue
522
+ )
523
+ logger.info("Apply trimmed network")
524
+
525
+ if filter_non_genic_reactions:
526
+ logger.info("Filter non genic reactions")
527
+ considered_reactions = None
528
+ else:
529
+ logger.info("Keep genic reactions")
530
+ considered_reactions = rcpr._get_rids_from_rcpr_reactions(model_r)
531
+ netcontextr.apply_reactions_context_to_sbml_dfs(
532
+ model, model_r_trim, considered_reactions=considered_reactions
533
+ )
534
+ logger.info("Save model to %s", output_model_uri)
535
+ utils.save_pickle(output_model_uri, model)
536
+
537
+
538
+ @refine.command(name="filter_hpa_compartments")
539
+ @click.argument("model_uri", type=str)
540
+ @click.argument("hpa_file_uri", type=str)
541
+ @click.argument("output_model_uri", type=str)
542
+ @click_logging.simple_verbosity_option(logger)
543
+ def filter_hpa_gene_compartments(
544
+ model_uri: str, hpa_file_uri: str, output_model_uri: str
545
+ ):
546
+ """Filter an interaction network using the human protein atlas
547
+
548
+ This uses R `rcpr` to filter an interaction network based on the
549
+ compartment information from the human protein atlas.
550
+
551
+ Only interactions between genes in the same compartment are kept.
552
+ """
553
+ logger.info("Get rcpr from R")
554
+ rcpr = callr.get_rcpr()
555
+ logger.info("Load sbml_dfs model")
556
+ model: sbml.SBML_dfs = utils.load_pickle(model_uri) # type: ignore
557
+ logger.info("Load and clean hpa data")
558
+ dat_hpa = netcontextr.load_and_clean_hpa_data(rcpr, hpa_file_uri)
559
+ logger.info("Convert sbml_dfs to rcpr string graph")
560
+ model_r = netcontextr.sbml_dfs_to_rcpr_string_graph(model)
561
+ logger.info("Annotate genes with HPA compartments")
562
+ model_r_annot = netcontextr.annotate_genes(rcpr, model_r, dat_hpa, "compartment")
563
+ logger.info("Trim network by gene attribute")
564
+ model_r_trim = netcontextr.trim_network_by_gene_attribute(
565
+ rcpr, model_r_annot, "compartment"
566
+ )
567
+ logger.info("Apply trimmed network")
568
+ netcontextr.apply_context_to_sbml_dfs(model, model_r_trim)
569
+ logger.info("Save model to %s", output_model_uri)
570
+ utils.save_pickle(output_model_uri, model)
571
+
572
+
573
+ @click.group()
574
+ def exporter():
575
+ """Command line tools to export a consensus model
576
+ to various formats
577
+ """
578
+ pass
579
+
580
+
581
+ @exporter.command(name="export_igraph")
582
+ @click.argument("model_uri", type=str)
583
+ @click.argument("output_uri", type=str)
584
+ @click.option(
585
+ "--graph_attrs_spec_uri",
586
+ "-a",
587
+ default=None,
588
+ help="File specifying reaction and/or species attributes to add to the graph",
589
+ )
590
+ @click.option(
591
+ "--format", "-f", default="pickle", help="Output format: gml, edgelist, pickle"
592
+ )
593
+ @click.option(
594
+ "--graph_type", "-g", type=str, default="bipartite", help="bipartite or regulatory"
595
+ )
596
+ @click.option(
597
+ "--weighting_strategy",
598
+ "-w",
599
+ type=str,
600
+ default="unweighted",
601
+ help="Approach to adding weights to the network",
602
+ )
603
+ @click.option(
604
+ "--directed", "-d", type=bool, default=True, help="Directed or undirected graph?"
605
+ )
606
+ @click.option(
607
+ "--reverse",
608
+ "-r",
609
+ type=bool,
610
+ default=False,
611
+ help="Reverse edges so they flow from effects to causes?",
612
+ )
613
+ def export_igraph(
614
+ model_uri: str,
615
+ output_uri: str,
616
+ graph_attrs_spec_uri: str | None,
617
+ format: str,
618
+ graph_type: str,
619
+ weighting_strategy: str,
620
+ directed: bool,
621
+ reverse: bool,
622
+ ):
623
+ """Export the consensus model as an igraph object"""
624
+ model = utils.load_pickle(model_uri)
625
+
626
+ if graph_attrs_spec_uri is None:
627
+ graph_attrs_spec = None
628
+ else:
629
+ graph_attrs_spec = net_utils.read_graph_attrs_spec(graph_attrs_spec_uri)
630
+
631
+ cpr_graph = net_create.process_cpr_graph(
632
+ model,
633
+ reaction_graph_attrs=graph_attrs_spec,
634
+ directed=directed,
635
+ edge_reversed=reverse,
636
+ graph_type=graph_type,
637
+ weighting_strategy=weighting_strategy,
638
+ verbose=True,
639
+ )
640
+
641
+ base, path = os.path.split(output_uri)
642
+ with open_fs(base, create=True, writeable=True) as fs:
643
+ with fs.openbin(path, "wb") as f:
644
+ if format == "gml":
645
+ cpr_graph.write_gml(f)
646
+ elif format == "edgelist":
647
+ cpr_graph.write_edgelist(f)
648
+ elif format == "pickle":
649
+ pickle.dump(cpr_graph, f)
650
+ else:
651
+ raise ValueError("Unknown format: %s" % format)
652
+
653
+
654
+ @exporter.command(name="export_precomputed_distances")
655
+ @click.argument("graph_uri", type=str)
656
+ @click.argument("output_uri", type=str)
657
+ @click.option(
658
+ "--format",
659
+ "-f",
660
+ type=str,
661
+ default="pickle",
662
+ help="Input igraph format: gml, edgelist, pickle",
663
+ )
664
+ @click.option(
665
+ "--max_steps",
666
+ "-s",
667
+ type=int,
668
+ default=-1,
669
+ help="The max number of steps between pairs of species to save a distance",
670
+ )
671
+ @click.option(
672
+ "--max_score_q",
673
+ "-q",
674
+ type=float,
675
+ default=1,
676
+ help='Retain up to the "max_score_q" quantiles of all scores (small scores are better)',
677
+ )
678
+ @click.option(
679
+ "--partition_size",
680
+ "-p",
681
+ type=int,
682
+ default=5000,
683
+ help="The number of species to process together when computing distances",
684
+ )
685
+ @click.option(
686
+ "--weights_vars",
687
+ "-w",
688
+ type=str,
689
+ default=["weights", "upstream_weights"],
690
+ help="One or more variables defining edge weights to use when calculating weighted shortest paths.",
691
+ )
692
+ def export_precomputed_distances(
693
+ graph_uri: str,
694
+ output_uri: str,
695
+ format: str,
696
+ max_steps: int,
697
+ max_score_q: float,
698
+ partition_size: int,
699
+ weights_vars: str,
700
+ ):
701
+ """Export precomputed distances for the igraph object"""
702
+
703
+ base, path = os.path.split(graph_uri)
704
+ with open_fs(base) as fs:
705
+ with fs.openbin(path) as f:
706
+ if format == "gml":
707
+ cpr_graph = ig.Graph.Read_GML(f)
708
+ elif format == "edgelist":
709
+ cpr_graph = ig.Graph.Read_Edgelist(f)
710
+ elif format == "pickle":
711
+ cpr_graph = ig.Graph.Read_Pickle(f)
712
+ else:
713
+ raise ValueError("Unknown format: %s" % format)
714
+
715
+ # convert weight vars from a str to list
716
+ weights_vars_list = utils.click_str_to_list(weights_vars)
717
+
718
+ precomputed_distances = precompute.precompute_distances(
719
+ cpr_graph,
720
+ max_steps=max_steps,
721
+ max_score_q=max_score_q,
722
+ partition_size=partition_size,
723
+ weights_vars=weights_vars_list,
724
+ )
725
+
726
+ utils.save_json(output_uri, precomputed_distances.to_json())
727
+
728
+
729
+ @exporter.command(name="export_smbl_dfs_tables")
730
+ @click.argument("model_uri", type=str)
731
+ @click.argument("output_uri", type=str)
732
+ @click.option(
733
+ "--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
734
+ )
735
+ @click.option(
736
+ "--model-prefix", "-m", type=str, default="", help="Model prefix for files?"
737
+ )
738
+ @click.option(
739
+ "--nondogmatic",
740
+ "-n",
741
+ is_flag=True,
742
+ default=False,
743
+ help="Run in non-dogmatic mode (trying to merge genes and proteins)?",
744
+ )
745
+ @click_logging.simple_verbosity_option(logger)
746
+ def export_sbml_dfs_tables(
747
+ model_uri: str,
748
+ output_uri: str,
749
+ overwrite=False,
750
+ model_prefix="",
751
+ nondogmatic: bool = True,
752
+ ):
753
+ """Export the consensus model as a collection of table"""
754
+
755
+ dogmatic = not nondogmatic
756
+ logger.debug(f"nondogmatic = {nondogmatic}; dogmatic = {dogmatic}")
757
+ logger.info(f"Exporting tables with dogmatic = {dogmatic}")
758
+
759
+ model = utils.load_pickle(model_uri)
760
+ sbml_dfs_core.export_sbml_dfs(
761
+ model_prefix, model, output_uri, overwrite=overwrite, dogmatic=dogmatic
762
+ )
763
+
764
+
765
+ @click.group()
766
+ def importer():
767
+ """Tools to import sbml_dfs directly form other sources"""
768
+ pass
769
+
770
+
771
+ @importer.command(name="sbml_dfs")
772
+ @click.argument("input_uri", type=str)
773
+ @click.argument("output_uri", type=str)
774
+ @click_logging.simple_verbosity_option(logger)
775
+ def import_sbml_dfs_from_sbml_dfs_uri(input_uri, output_uri):
776
+ """Import sbml_dfs from an uri, eg another GCS bucket"""
777
+ logger.info("Load sbml_dfs from %s", input_uri)
778
+ # We could also just copy the file, but I think validating
779
+ # the filetype is a good idea to prevent downstream errors.
780
+ sbml_dfs = utils.load_pickle(input_uri)
781
+ if not (isinstance(sbml_dfs, sbml.SBML_dfs)):
782
+ raise ValueError(
783
+ f"Pickled input is not an SBML_dfs object but {type(sbml_dfs)}: {input_uri}"
784
+ )
785
+ logger.info("Save file to %s", output_uri)
786
+ utils.save_pickle(output_uri, sbml_dfs)
787
+
788
+
789
+ @importer.command(name="sbml")
790
+ @click.argument("input_uri", type=str)
791
+ @click.argument("output_uri", type=str)
792
+ @click_logging.simple_verbosity_option(logger)
793
+ def import_sbml_dfs_from_sbml(input_uri, output_uri):
794
+ """Import sbml_dfs from a sbml file"""
795
+ logger.info("Load sbml from %s", input_uri)
796
+ # We could also just copy the file, but I think validating
797
+ # the filetype is a good idea to prevent downstream errors.
798
+ sbml_file = sbml.SBML(input_uri)
799
+ logger.info("Convert file to sbml_dfs")
800
+ sbml_dfs = sbml_dfs_core.SBML_dfs(sbml_file)
801
+ logger.info("Save file to %s", output_uri)
802
+ utils.save_pickle(output_uri, sbml_dfs)
803
+
804
+
805
+ @click.group()
806
+ def contextualizer():
807
+ """Command line tools to contextualize a pathway model"""
808
+ pass
809
+
810
+
811
+ @click.group()
812
+ def helpers():
813
+ """Various helper functions"""
814
+ pass
815
+
816
+
817
+ @helpers.command(name="copy_uri")
818
+ @click.argument("input_uri", type=str)
819
+ @click.argument("output_uri", type=str)
820
+ @click.option("--is-file", type=bool, default=True, help="Is the input a file?")
821
+ @click_logging.simple_verbosity_option(logger)
822
+ def copy_uri(input_uri, output_uri, is_file=True):
823
+ """Copy a uri representing a file or folder from one location to another"""
824
+ logger.info("Copy uri from %s to %s", input_uri, output_uri)
825
+ utils.copy_uri(input_uri, output_uri, is_file=is_file)
826
+
827
+
828
+ @click.group()
829
+ def stats():
830
+ """Various functions to calculate network statistics
831
+
832
+ The statistics are saved as json files
833
+ """
834
+ pass
835
+
836
+
837
+ @stats.command(name="sbml_dfs_network")
838
+ @click.argument("input_uri", type=str)
839
+ @click.argument("output_uri", type=str)
840
+ def calculate_sbml_dfs_stats(input_uri, output_uri):
841
+ """Calculate statistics for a sbml_dfs object"""
842
+ model: sbml_dfs_core.SBML_dfs = utils.load_pickle(input_uri) # type: ignore
843
+ stats = model.get_network_summary()
844
+ utils.save_json(output_uri, stats)
845
+
846
+
847
+ @stats.command(name="igraph_network")
848
+ @click.argument("input_uri", type=str)
849
+ @click.argument("output_uri", type=str)
850
+ def calculate_igraph_stats(input_uri, output_uri):
851
+ """Calculate statistics for an igraph object"""
852
+ graph: ig.Graph = utils.load_pickle(input_uri) # type: ignore
853
+ stats = net_utils.get_graph_summary(graph)
854
+ utils.save_json(output_uri, stats)
855
+
856
+
857
+ cli.add_command(load)
858
+ cli.add_command(integrate)
859
+ cli.add_command(consensus)
860
+ cli.add_command(refine)
861
+ cli.add_command(exporter)
862
+ cli.add_command(importer)
863
+ cli.add_command(helpers)
864
+ cli.add_command(stats)
865
+
866
+ if __name__ == "__main__":
867
+ cli()