napistu 0.2.5.dev6__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__main__.py +126 -96
- napistu/constants.py +35 -41
- napistu/context/__init__.py +10 -0
- napistu/context/discretize.py +462 -0
- napistu/context/filtering.py +387 -0
- napistu/gcs/__init__.py +1 -1
- napistu/identifiers.py +74 -15
- napistu/indices.py +68 -0
- napistu/ingestion/__init__.py +1 -1
- napistu/ingestion/bigg.py +47 -62
- napistu/ingestion/constants.py +18 -133
- napistu/ingestion/gtex.py +113 -0
- napistu/ingestion/hpa.py +147 -0
- napistu/ingestion/sbml.py +0 -97
- napistu/ingestion/string.py +2 -2
- napistu/matching/__init__.py +10 -0
- napistu/matching/constants.py +18 -0
- napistu/matching/interactions.py +518 -0
- napistu/matching/mount.py +529 -0
- napistu/matching/species.py +510 -0
- napistu/mcp/__init__.py +7 -4
- napistu/mcp/__main__.py +128 -72
- napistu/mcp/client.py +16 -25
- napistu/mcp/codebase.py +201 -153
- napistu/mcp/component_base.py +170 -0
- napistu/mcp/config.py +223 -0
- napistu/mcp/constants.py +45 -2
- napistu/mcp/documentation.py +253 -136
- napistu/mcp/documentation_utils.py +13 -48
- napistu/mcp/execution.py +372 -305
- napistu/mcp/health.py +49 -67
- napistu/mcp/profiles.py +10 -6
- napistu/mcp/server.py +161 -80
- napistu/mcp/tutorials.py +139 -87
- napistu/modify/__init__.py +1 -1
- napistu/modify/gaps.py +1 -1
- napistu/network/__init__.py +1 -1
- napistu/network/constants.py +101 -34
- napistu/network/data_handling.py +388 -0
- napistu/network/ig_utils.py +351 -0
- napistu/network/napistu_graph_core.py +354 -0
- napistu/network/neighborhoods.py +40 -40
- napistu/network/net_create.py +373 -309
- napistu/network/net_propagation.py +47 -19
- napistu/network/{net_utils.py → ng_utils.py} +124 -272
- napistu/network/paths.py +67 -51
- napistu/network/precompute.py +11 -11
- napistu/ontologies/__init__.py +10 -0
- napistu/ontologies/constants.py +129 -0
- napistu/ontologies/dogma.py +243 -0
- napistu/ontologies/genodexito.py +649 -0
- napistu/ontologies/mygene.py +369 -0
- napistu/ontologies/renaming.py +198 -0
- napistu/rpy2/__init__.py +229 -86
- napistu/rpy2/callr.py +47 -77
- napistu/rpy2/constants.py +24 -23
- napistu/rpy2/rids.py +61 -648
- napistu/sbml_dfs_core.py +587 -222
- napistu/scverse/__init__.py +15 -0
- napistu/scverse/constants.py +28 -0
- napistu/scverse/loading.py +727 -0
- napistu/utils.py +118 -10
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/METADATA +8 -3
- napistu-0.3.1.dist-info/RECORD +133 -0
- tests/conftest.py +22 -0
- tests/test_context_discretize.py +56 -0
- tests/test_context_filtering.py +267 -0
- tests/test_identifiers.py +100 -0
- tests/test_indices.py +65 -0
- tests/{test_edgelist.py → test_ingestion_napistu_edgelist.py} +2 -2
- tests/test_matching_interactions.py +108 -0
- tests/test_matching_mount.py +305 -0
- tests/test_matching_species.py +394 -0
- tests/test_mcp_config.py +193 -0
- tests/test_mcp_documentation_utils.py +12 -3
- tests/test_mcp_server.py +356 -0
- tests/test_network_data_handling.py +397 -0
- tests/test_network_ig_utils.py +23 -0
- tests/test_network_neighborhoods.py +19 -0
- tests/test_network_net_create.py +459 -0
- tests/test_network_ng_utils.py +30 -0
- tests/test_network_paths.py +56 -0
- tests/{test_precomputed_distances.py → test_network_precompute.py} +8 -6
- tests/test_ontologies_genodexito.py +58 -0
- tests/test_ontologies_mygene.py +39 -0
- tests/test_ontologies_renaming.py +110 -0
- tests/test_rpy2_callr.py +79 -0
- tests/test_rpy2_init.py +151 -0
- tests/test_sbml.py +0 -31
- tests/test_sbml_dfs_core.py +134 -10
- tests/test_scverse_loading.py +778 -0
- tests/test_set_coverage.py +2 -2
- tests/test_utils.py +121 -1
- napistu/mechanism_matching.py +0 -1353
- napistu/rpy2/netcontextr.py +0 -467
- napistu-0.2.5.dev6.dist-info/RECORD +0 -97
- tests/test_igraph.py +0 -367
- tests/test_mechanism_matching.py +0 -784
- tests/test_net_utils.py +0 -149
- tests/test_netcontextr.py +0 -105
- tests/test_rpy2.py +0 -61
- /napistu/ingestion/{cpr_edgelist.py → napistu_edgelist.py} +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/WHEEL +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/entry_points.txt +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.2.5.dev6.dist-info → napistu-0.3.1.dist-info}/top_level.txt +0 -0
- /tests/{test_obo.py → test_ingestion_obo.py} +0 -0
napistu/__main__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""The CLI for
|
1
|
+
"""The CLI for Napistu"""
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
@@ -13,11 +13,14 @@ import napistu
|
|
13
13
|
import igraph as ig
|
14
14
|
import pandas as pd
|
15
15
|
from napistu import consensus as cpr_consensus
|
16
|
-
from napistu import constants
|
17
16
|
from napistu import indices
|
18
17
|
from napistu import sbml_dfs_core
|
19
18
|
from napistu import utils
|
19
|
+
from napistu.context import filtering
|
20
|
+
from napistu.matching import mount
|
20
21
|
from napistu.ingestion import bigg
|
22
|
+
from napistu.ingestion import gtex
|
23
|
+
from napistu.ingestion import hpa
|
21
24
|
from napistu.ingestion import reactome
|
22
25
|
from napistu.ingestion import sbml
|
23
26
|
from napistu.ingestion import string
|
@@ -27,15 +30,17 @@ from napistu.modify import gaps
|
|
27
30
|
from napistu.modify import pathwayannot
|
28
31
|
from napistu.modify import uncompartmentalize
|
29
32
|
from napistu.network import net_create
|
30
|
-
from napistu.network import
|
33
|
+
from napistu.network.ig_utils import get_graph_summary
|
34
|
+
from napistu.network.ng_utils import read_graph_attrs_spec
|
31
35
|
from napistu.network import precompute
|
32
|
-
from napistu.
|
36
|
+
from napistu.ontologies.genodexito import Genodexito
|
37
|
+
from napistu.ontologies import dogma
|
38
|
+
from napistu.constants import ONTOLOGIES
|
39
|
+
from napistu.constants import RESOLVE_MATCHES_AGGREGATORS
|
40
|
+
from napistu.ingestion.constants import PROTEINATLAS_SUBCELL_LOC_URL
|
41
|
+
from napistu.ingestion.constants import GTEX_RNASEQ_EXPRESSION_URL
|
33
42
|
from fs import open_fs
|
34
43
|
|
35
|
-
if has_rpy2:
|
36
|
-
from napistu.rpy2 import rids
|
37
|
-
from napistu.rpy2 import netcontextr, callr
|
38
|
-
|
39
44
|
logger = logging.getLogger(napistu.__name__)
|
40
45
|
click_logging.basic_config(logger)
|
41
46
|
|
@@ -89,15 +94,12 @@ def load_ttrust(target_uri: str):
|
|
89
94
|
@click.option(
|
90
95
|
"--url",
|
91
96
|
type=str,
|
92
|
-
default=
|
97
|
+
default=PROTEINATLAS_SUBCELL_LOC_URL,
|
93
98
|
help="URL to download the zipped protein atlas subcellular localization tsv from.",
|
94
99
|
)
|
95
100
|
@click_logging.simple_verbosity_option(logger)
|
96
101
|
def load_proteinatlas_subcell(target_uri: str, url: str):
|
97
|
-
|
98
|
-
target_filename = url.split("/")[-1].split(f".{file_ext}")[0]
|
99
|
-
logger.info("Start downloading proteinatlas %s to %s", url, target_uri)
|
100
|
-
utils.download_wget(url, target_uri, target_filename=target_filename)
|
102
|
+
hpa.download_hpa_data(target_uri, url)
|
101
103
|
|
102
104
|
|
103
105
|
@load.command(name="gtex-rnaseq-expression")
|
@@ -105,13 +107,12 @@ def load_proteinatlas_subcell(target_uri: str, url: str):
|
|
105
107
|
@click.option(
|
106
108
|
"--url",
|
107
109
|
type=str,
|
108
|
-
default=
|
110
|
+
default=GTEX_RNASEQ_EXPRESSION_URL,
|
109
111
|
help="URL to download the gtex file from.",
|
110
112
|
)
|
111
113
|
@click_logging.simple_verbosity_option(logger)
|
112
114
|
def load_gtex_rnaseq(target_uri: str, url: str):
|
113
|
-
|
114
|
-
utils.download_wget(url, target_uri)
|
115
|
+
gtex.download_gtex_rnaseq(target_uri, url)
|
115
116
|
|
116
117
|
|
117
118
|
@load.command(name="string-db")
|
@@ -411,7 +412,7 @@ def apply_manual_curations(model_uri: str, curation_dir: str, output_model_uri:
|
|
411
412
|
"""Apply manual curations to a consensus model
|
412
413
|
|
413
414
|
The curation dir is a directory containing the manual curations
|
414
|
-
Check
|
415
|
+
Check napistu.modify.curation.curate_sbml_dfs for more information.
|
415
416
|
"""
|
416
417
|
model = utils.load_pickle(model_uri)
|
417
418
|
model = curation.curate_sbml_dfs(curation_dir=curation_dir, sbml_dfs=model)
|
@@ -419,52 +420,80 @@ def apply_manual_curations(model_uri: str, curation_dir: str, output_model_uri:
|
|
419
420
|
|
420
421
|
|
421
422
|
@refine.command(name="expand_identifiers")
|
422
|
-
@click.argument("
|
423
|
+
@click.argument("sbml_dfs_uri", type=str)
|
423
424
|
@click.argument("output_model_uri", type=str)
|
424
|
-
@click.option(
|
425
|
-
"--id-type",
|
426
|
-
"-u",
|
427
|
-
type=click.Choice(["species", "compartments", "reactions"]),
|
428
|
-
default="species",
|
429
|
-
)
|
430
425
|
@click.option("--species", "-s", default="Homo sapiens", type=str)
|
431
426
|
@click.option(
|
432
427
|
"--ontologies", "-o", multiple=True, type=str, help="Ontologies to add or complete"
|
433
428
|
)
|
429
|
+
@click.option(
|
430
|
+
"--preferred_method",
|
431
|
+
"-p",
|
432
|
+
default="bioconductor",
|
433
|
+
type=str,
|
434
|
+
help="Preferred method to use for identifier expansion",
|
435
|
+
)
|
436
|
+
@click.option(
|
437
|
+
"--allow_fallback",
|
438
|
+
"-a",
|
439
|
+
default=True,
|
440
|
+
type=bool,
|
441
|
+
help="Allow fallback to other methods if preferred method fails",
|
442
|
+
)
|
434
443
|
def expand_identifiers(
|
435
|
-
|
444
|
+
sbml_dfs_uri: str,
|
436
445
|
output_model_uri: str,
|
437
|
-
id_type: str,
|
438
446
|
species: str,
|
439
|
-
ontologies:
|
447
|
+
ontologies: set[str],
|
448
|
+
preferred_method: str,
|
449
|
+
allow_fallback: bool,
|
440
450
|
):
|
441
451
|
"""Expand identifiers of a model
|
442
452
|
|
443
453
|
Args:
|
444
|
-
|
454
|
+
sbml_dfs_uri (str): uri of model in sbml dfs format
|
445
455
|
output_model_uri (str): output uri of model in sbml dfs format
|
446
|
-
id_type (str): identifier type, one of: species, compartments, reactions
|
447
456
|
species (str): Species to use
|
448
|
-
ontologies (
|
457
|
+
ontologies (set[str]): ontologies to add or update
|
449
458
|
|
450
459
|
Example call:
|
451
460
|
> cpr refine expand_identifiers gs://<uri> ./test.pickle -o ensembl_gene
|
452
461
|
"""
|
453
|
-
|
454
|
-
model: sbml.SBML_dfs = utils.load_pickle(model_uri) # type: ignore
|
462
|
+
sbml_dfs: sbml.SBML_dfs = utils.load_pickle(sbml_dfs_uri) # type: ignore
|
455
463
|
if len(ontologies) == 0:
|
456
464
|
raise ValueError("No ontologies to expand specified.")
|
457
|
-
|
458
|
-
|
459
|
-
|
465
|
+
|
466
|
+
Genodexito(
|
467
|
+
species=species,
|
468
|
+
preferred_method=preferred_method,
|
469
|
+
allow_fallback=allow_fallback,
|
470
|
+
).expand_sbml_dfs_ids(sbml_dfs, ontologies=ontologies)
|
471
|
+
|
472
|
+
utils.save_pickle(output_model_uri, sbml_dfs)
|
460
473
|
|
461
474
|
|
462
475
|
@integrate.command(name="dogmatic_scaffold")
|
463
476
|
@click.argument("output_model_uri", type=str)
|
464
477
|
@click.option("--species", "-s", default="Homo sapiens", type=str)
|
478
|
+
@click.option(
|
479
|
+
"--preferred_method",
|
480
|
+
"-p",
|
481
|
+
default="bioconductor",
|
482
|
+
type=str,
|
483
|
+
help="Preferred method to use for identifier expansion",
|
484
|
+
)
|
485
|
+
@click.option(
|
486
|
+
"--allow_fallback",
|
487
|
+
"-a",
|
488
|
+
default=True,
|
489
|
+
type=bool,
|
490
|
+
help="Allow fallback to other methods if preferred method fails",
|
491
|
+
)
|
465
492
|
def dogmatic_scaffold(
|
466
493
|
output_model_uri: str,
|
467
494
|
species: str,
|
495
|
+
preferred_method: str,
|
496
|
+
allow_fallback: bool,
|
468
497
|
):
|
469
498
|
"""Dogmatic Scaffold
|
470
499
|
|
@@ -476,25 +505,23 @@ def dogmatic_scaffold(
|
|
476
505
|
> cpr integrate dogmatic_scaffold ./test.pickle
|
477
506
|
"""
|
478
507
|
|
479
|
-
dogmatic_sbml_dfs =
|
508
|
+
dogmatic_sbml_dfs = dogma.create_dogmatic_sbml_dfs(
|
509
|
+
species=species,
|
510
|
+
preferred_method=preferred_method,
|
511
|
+
allow_fallback=allow_fallback,
|
512
|
+
)
|
513
|
+
|
480
514
|
utils.save_pickle(output_model_uri, dogmatic_sbml_dfs)
|
481
515
|
|
482
516
|
|
483
517
|
@refine.command(name="filter_gtex_tissue")
|
484
|
-
@click.argument("
|
518
|
+
@click.argument("sbml_dfs_uri", type=str)
|
485
519
|
@click.argument("gtex_file_uri", type=str)
|
486
520
|
@click.argument("output_model_uri", type=str)
|
487
521
|
@click.argument("tissue", type=str)
|
488
|
-
@click.option(
|
489
|
-
"--filter-non-genic-reactions",
|
490
|
-
"-f",
|
491
|
-
default=False,
|
492
|
-
type=bool,
|
493
|
-
help="Filter reactions not involving genes?",
|
494
|
-
)
|
495
522
|
@click_logging.simple_verbosity_option(logger)
|
496
523
|
def filter_gtex_tissue(
|
497
|
-
|
524
|
+
sbml_dfs_uri: str,
|
498
525
|
gtex_file_uri: str,
|
499
526
|
output_model_uri: str,
|
500
527
|
tissue: str,
|
@@ -504,70 +531,73 @@ def filter_gtex_tissue(
|
|
504
531
|
|
505
532
|
This uses zfpkm values derived from gtex to filter the model.
|
506
533
|
"""
|
507
|
-
|
508
|
-
rcpr = callr.get_rcpr()
|
534
|
+
|
509
535
|
logger.info("Load sbml_dfs model")
|
510
|
-
|
536
|
+
sbml_dfs: sbml.SBML_dfs = utils.load_pickle(sbml_dfs_uri) # type: ignore
|
511
537
|
logger.info("Load and clean gtex tissue expression")
|
512
|
-
dat_gtex =
|
513
|
-
rcpr, gtex_file_uri, by_tissue_zfpkm=True
|
514
|
-
)
|
515
|
-
logger.info("Convert sbml_dfs to rcpr reaction graph")
|
516
|
-
model_r = netcontextr.sbml_dfs_to_rcpr_reactions(model)
|
538
|
+
dat_gtex = gtex.load_and_clean_gtex_data(gtex_file_uri)
|
517
539
|
logger.info("Annotate genes with gtex tissue expression")
|
518
|
-
|
540
|
+
mount.bind_wide_results(
|
541
|
+
sbml_dfs=sbml_dfs,
|
542
|
+
results_df=dat_gtex.reset_index(drop=False),
|
543
|
+
results_name="gtex",
|
544
|
+
ontologies={ONTOLOGIES.ENSEMBL_GENE},
|
545
|
+
numeric_agg=RESOLVE_MATCHES_AGGREGATORS.MAX,
|
546
|
+
)
|
519
547
|
logger.info("Trim network by gene attribute")
|
520
|
-
|
521
|
-
|
548
|
+
filtering.filter_species_by_attribute(
|
549
|
+
sbml_dfs,
|
550
|
+
"gtex",
|
551
|
+
attribute_name=tissue,
|
552
|
+
# remove entries which are NOT in the liver
|
553
|
+
attribute_value=0,
|
554
|
+
inplace=True,
|
522
555
|
)
|
523
|
-
|
556
|
+
# remove the gtex species data from the sbml_dfs
|
557
|
+
sbml_dfs.remove_species_data("gtex")
|
524
558
|
|
525
|
-
|
526
|
-
|
527
|
-
considered_reactions = None
|
528
|
-
else:
|
529
|
-
logger.info("Keep genic reactions")
|
530
|
-
considered_reactions = rcpr._get_rids_from_rcpr_reactions(model_r)
|
531
|
-
netcontextr.apply_reactions_context_to_sbml_dfs(
|
532
|
-
model, model_r_trim, considered_reactions=considered_reactions
|
533
|
-
)
|
534
|
-
logger.info("Save model to %s", output_model_uri)
|
535
|
-
utils.save_pickle(output_model_uri, model)
|
559
|
+
logger.info("Save sbml_dfs to %s", output_model_uri)
|
560
|
+
utils.save_pickle(output_model_uri, sbml_dfs)
|
536
561
|
|
537
562
|
|
538
563
|
@refine.command(name="filter_hpa_compartments")
|
539
|
-
@click.argument("
|
564
|
+
@click.argument("sbml_dfs_uri", type=str)
|
540
565
|
@click.argument("hpa_file_uri", type=str)
|
541
566
|
@click.argument("output_model_uri", type=str)
|
542
567
|
@click_logging.simple_verbosity_option(logger)
|
543
568
|
def filter_hpa_gene_compartments(
|
544
|
-
|
569
|
+
sbml_dfs_uri: str, hpa_file_uri: str, output_model_uri: str
|
545
570
|
):
|
546
571
|
"""Filter an interaction network using the human protein atlas
|
547
572
|
|
548
|
-
This uses
|
549
|
-
|
573
|
+
This uses loads the human proteome atlas and removes reactions (including interactions)
|
574
|
+
containing genes which are not colocalized.
|
550
575
|
|
551
576
|
Only interactions between genes in the same compartment are kept.
|
552
577
|
"""
|
553
|
-
|
554
|
-
rcpr = callr.get_rcpr()
|
578
|
+
|
555
579
|
logger.info("Load sbml_dfs model")
|
556
|
-
|
580
|
+
sbml_dfs: sbml.SBML_dfs = utils.load_pickle(sbml_dfs_uri) # type: ignore
|
557
581
|
logger.info("Load and clean hpa data")
|
558
|
-
dat_hpa =
|
559
|
-
logger.info("Convert sbml_dfs to rcpr string graph")
|
560
|
-
model_r = netcontextr.sbml_dfs_to_rcpr_string_graph(model)
|
582
|
+
dat_hpa = hpa.load_and_clean_hpa_data(hpa_file_uri)
|
561
583
|
logger.info("Annotate genes with HPA compartments")
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
584
|
+
mount.bind_wide_results(
|
585
|
+
sbml_dfs=sbml_dfs,
|
586
|
+
results_df=dat_hpa.reset_index(drop=False),
|
587
|
+
results_name="hpa",
|
588
|
+
ontologies={ONTOLOGIES.ENSEMBL_GENE},
|
589
|
+
numeric_agg=RESOLVE_MATCHES_AGGREGATORS.MAX,
|
566
590
|
)
|
567
|
-
logger.info(
|
568
|
-
|
569
|
-
|
570
|
-
|
591
|
+
logger.info(
|
592
|
+
"Trim network removing reactions with species in different compartments"
|
593
|
+
)
|
594
|
+
filtering.filter_reactions_with_disconnected_cspecies(
|
595
|
+
sbml_dfs, "hpa", inplace=False
|
596
|
+
)
|
597
|
+
sbml_dfs.remove_species_data("hpa")
|
598
|
+
|
599
|
+
logger.info("Save sbml_dfs to %s", output_model_uri)
|
600
|
+
utils.save_pickle(output_model_uri, sbml_dfs)
|
571
601
|
|
572
602
|
|
573
603
|
@click.group()
|
@@ -626,9 +656,9 @@ def export_igraph(
|
|
626
656
|
if graph_attrs_spec_uri is None:
|
627
657
|
graph_attrs_spec = None
|
628
658
|
else:
|
629
|
-
graph_attrs_spec =
|
659
|
+
graph_attrs_spec = read_graph_attrs_spec(graph_attrs_spec_uri)
|
630
660
|
|
631
|
-
|
661
|
+
napistu_graph = net_create.process_napistu_graph(
|
632
662
|
model,
|
633
663
|
reaction_graph_attrs=graph_attrs_spec,
|
634
664
|
directed=directed,
|
@@ -642,11 +672,11 @@ def export_igraph(
|
|
642
672
|
with open_fs(base, create=True, writeable=True) as fs:
|
643
673
|
with fs.openbin(path, "wb") as f:
|
644
674
|
if format == "gml":
|
645
|
-
|
675
|
+
napistu_graph.write_gml(f)
|
646
676
|
elif format == "edgelist":
|
647
|
-
|
677
|
+
napistu_graph.write_edgelist(f)
|
648
678
|
elif format == "pickle":
|
649
|
-
pickle.dump(
|
679
|
+
pickle.dump(napistu_graph, f)
|
650
680
|
else:
|
651
681
|
raise ValueError("Unknown format: %s" % format)
|
652
682
|
|
@@ -704,11 +734,11 @@ def export_precomputed_distances(
|
|
704
734
|
with open_fs(base) as fs:
|
705
735
|
with fs.openbin(path) as f:
|
706
736
|
if format == "gml":
|
707
|
-
|
737
|
+
napistu_graph = ig.Graph.Read_GML(f)
|
708
738
|
elif format == "edgelist":
|
709
|
-
|
739
|
+
napistu_graph = ig.Graph.Read_Edgelist(f)
|
710
740
|
elif format == "pickle":
|
711
|
-
|
741
|
+
napistu_graph = ig.Graph.Read_Pickle(f)
|
712
742
|
else:
|
713
743
|
raise ValueError("Unknown format: %s" % format)
|
714
744
|
|
@@ -716,7 +746,7 @@ def export_precomputed_distances(
|
|
716
746
|
weights_vars_list = utils.click_str_to_list(weights_vars)
|
717
747
|
|
718
748
|
precomputed_distances = precompute.precompute_distances(
|
719
|
-
|
749
|
+
napistu_graph,
|
720
750
|
max_steps=max_steps,
|
721
751
|
max_score_q=max_score_q,
|
722
752
|
partition_size=partition_size,
|
@@ -850,7 +880,7 @@ def calculate_sbml_dfs_stats(input_uri, output_uri):
|
|
850
880
|
def calculate_igraph_stats(input_uri, output_uri):
|
851
881
|
"""Calculate statistics for an igraph object"""
|
852
882
|
graph: ig.Graph = utils.load_pickle(input_uri) # type: ignore
|
853
|
-
stats =
|
883
|
+
stats = get_graph_summary(graph)
|
854
884
|
utils.save_json(output_uri, stats)
|
855
885
|
|
856
886
|
|
napistu/constants.py
CHANGED
@@ -21,16 +21,6 @@ PACKAGE_DEFS = SimpleNamespace(
|
|
21
21
|
CACHE_DIR="napistu_data",
|
22
22
|
)
|
23
23
|
|
24
|
-
PROTEINATLAS_SUBCELL_LOC_URL = (
|
25
|
-
"https://www.proteinatlas.org/download/tsv/subcellular_location.tsv.zip"
|
26
|
-
)
|
27
|
-
|
28
|
-
# GTEx
|
29
|
-
GTEX_RNASEQ_EXPRESSION_URL = "https://storage.googleapis.com/adult-gtex/bulk-gex/v8/rna-seq/GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_median_tpm.gct.gz"
|
30
|
-
|
31
|
-
# Gencode
|
32
|
-
GENCODE_URL = "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_26/gencode.v26.transcripts.fa.gz"
|
33
|
-
|
34
24
|
FILE_EXT_ZIP = "zip"
|
35
25
|
FILE_EXT_GZ = "gz"
|
36
26
|
|
@@ -131,6 +121,11 @@ SBML_DFS_SCHEMA = SimpleNamespace(
|
|
131
121
|
|
132
122
|
ENTITIES_W_DATA = {SBML_DFS.SPECIES, SBML_DFS.REACTIONS}
|
133
123
|
|
124
|
+
ENTITIES_TO_ENTITY_DATA = {
|
125
|
+
SBML_DFS.SPECIES: SBML_DFS.SPECIES_DATA,
|
126
|
+
SBML_DFS.REACTIONS: SBML_DFS.REACTIONS_DATA,
|
127
|
+
}
|
128
|
+
|
134
129
|
REQUIRED_REACTION_FROMEDGELIST_COLUMNS = [
|
135
130
|
"sc_id_up",
|
136
131
|
"sc_id_down",
|
@@ -211,11 +206,14 @@ IDENTIFIERS = SimpleNamespace(
|
|
211
206
|
ONTOLOGY="ontology", IDENTIFIER="identifier", BQB="bqb", URL="url"
|
212
207
|
)
|
213
208
|
|
214
|
-
|
215
|
-
SBML_DFS.S_ID,
|
209
|
+
IDENTIFIERS_REQUIRED_VARS = {
|
216
210
|
IDENTIFIERS.ONTOLOGY,
|
217
211
|
IDENTIFIERS.IDENTIFIER,
|
218
212
|
IDENTIFIERS.BQB,
|
213
|
+
}
|
214
|
+
|
215
|
+
SPECIES_IDENTIFIERS_REQUIRED_VARS = IDENTIFIERS_REQUIRED_VARS | {
|
216
|
+
SBML_DFS.S_ID,
|
219
217
|
SBML_DFS.S_NAME,
|
220
218
|
}
|
221
219
|
|
@@ -296,29 +294,34 @@ MINI_SBO_NAME_TO_POLARITY = {
|
|
296
294
|
# affect whether a reaction can occur
|
297
295
|
# for example, if I remove any substrate a reaction won't occur
|
298
296
|
# but I would have to remove all catalysts for it to not occur
|
297
|
+
SBO_ROLES_DEFS = SimpleNamespace(
|
298
|
+
DEFINING="DEFINING", REQUIRED="REQUIRED", OPTIONAL="OPTIONAL", SBO_ROLE="sbo_role"
|
299
|
+
)
|
300
|
+
|
299
301
|
SBO_NAME_TO_ROLE = {
|
300
|
-
SBOTERM_NAMES.REACTANT:
|
301
|
-
SBOTERM_NAMES.PRODUCT:
|
302
|
-
SBOTERM_NAMES.INTERACTOR:
|
303
|
-
SBOTERM_NAMES.CATALYST:
|
304
|
-
SBOTERM_NAMES.INHIBITOR:
|
305
|
-
SBOTERM_NAMES.STIMULATOR:
|
306
|
-
SBOTERM_NAMES.MODIFIER:
|
302
|
+
SBOTERM_NAMES.REACTANT: SBO_ROLES_DEFS.DEFINING,
|
303
|
+
SBOTERM_NAMES.PRODUCT: SBO_ROLES_DEFS.DEFINING,
|
304
|
+
SBOTERM_NAMES.INTERACTOR: SBO_ROLES_DEFS.DEFINING,
|
305
|
+
SBOTERM_NAMES.CATALYST: SBO_ROLES_DEFS.REQUIRED,
|
306
|
+
SBOTERM_NAMES.INHIBITOR: SBO_ROLES_DEFS.OPTIONAL,
|
307
|
+
SBOTERM_NAMES.STIMULATOR: SBO_ROLES_DEFS.OPTIONAL,
|
308
|
+
SBOTERM_NAMES.MODIFIER: SBO_ROLES_DEFS.OPTIONAL,
|
307
309
|
}
|
308
310
|
|
309
311
|
# see also https://github.com/calico/netcontextr/blob/main/R/reactionTrimmingFunctions.R
|
310
312
|
VALID_SBO_ROLES = (
|
311
313
|
# there is a direct correspondence between the set of defining entries and the identity of a reaction
|
312
314
|
# e.g., the stoichiometery of a metabolic reaction or the members of a protein-protein interaction
|
313
|
-
|
315
|
+
SBO_ROLES_DEFS.DEFINING,
|
314
316
|
# 1+ entries are needed if entries were initially defined. i.e., reactions which require a catalyst
|
315
317
|
# would no longer exist if the catalyst was removed, but many reactions do not require a catalyst.
|
316
|
-
|
318
|
+
SBO_ROLES_DEFS.REQUIRED,
|
317
319
|
# 0+ entries. optional species can be added or removed to a reaction without changing its identity
|
318
|
-
|
320
|
+
SBO_ROLES_DEFS.OPTIONAL,
|
319
321
|
)
|
320
322
|
|
321
|
-
# required variables for the edgelist formats used by
|
323
|
+
# required variables for the edgelist formats used by the matching subpackage
|
324
|
+
# also used in some network modules
|
322
325
|
CPR_EDGELIST = SimpleNamespace(
|
323
326
|
S_ID_UPSTREAM="s_id_upstream",
|
324
327
|
S_ID_DOWNSTREAM="s_id_downstream",
|
@@ -354,24 +357,8 @@ RESOLVE_MATCHES_AGGREGATORS = SimpleNamespace(
|
|
354
357
|
|
355
358
|
RESOLVE_MATCHES_TMP_WEIGHT_COL = "__tmp_weight_for_aggregation__"
|
356
359
|
|
357
|
-
#
|
358
|
-
|
359
|
-
DEFAULT_WT_TRANS = "identity"
|
360
|
-
|
361
|
-
DEFINED_WEIGHT_TRANSFORMATION = {
|
362
|
-
DEFAULT_WT_TRANS: "_wt_transformation_identity",
|
363
|
-
"string": "_wt_transformation_string",
|
364
|
-
"string_inv": "_wt_transformation_string_inv",
|
365
|
-
}
|
360
|
+
# source information
|
366
361
|
|
367
|
-
SCORE_CALIBRATION_POINTS_DICT = {
|
368
|
-
"weights": {"strong": 3, "good": 7, "okay": 20, "weak": 40},
|
369
|
-
"string_wt": {"strong": 950, "good": 400, "okay": 230, "weak": 150},
|
370
|
-
}
|
371
|
-
|
372
|
-
SOURCE_VARS_DICT = {"string_wt": 10}
|
373
|
-
|
374
|
-
# source
|
375
362
|
SOURCE_SPEC = SimpleNamespace(
|
376
363
|
PATHWAY_ID="pathway_id",
|
377
364
|
MODEL="model",
|
@@ -399,8 +386,11 @@ EXPECTED_PW_INDEX_COLUMNS = {
|
|
399
386
|
ONTOLOGIES = SimpleNamespace(
|
400
387
|
CHEBI="chebi",
|
401
388
|
ENSEMBL_GENE="ensembl_gene",
|
389
|
+
ENSEMBL_GENE_VERSION="ensembl_gene_version",
|
402
390
|
ENSEMBL_TRANSCRIPT="ensembl_transcript",
|
391
|
+
ENSEMBL_TRANSCRIPT_VERSION="ensembl_transcript_version",
|
403
392
|
ENSEMBL_PROTEIN="ensembl_protein",
|
393
|
+
ENSEMBL_PROTEIN_VERSION="ensembl_protein_version",
|
404
394
|
GENE_NAME="gene_name",
|
405
395
|
GO="go",
|
406
396
|
MIRBASE="mirbase",
|
@@ -419,7 +409,11 @@ CHARACTERISTIC_COMPLEX_ONTOLOGIES = [
|
|
419
409
|
ONTOLOGIES.MIRBASE,
|
420
410
|
]
|
421
411
|
|
422
|
-
|
412
|
+
ONTOLOGY_SPECIES_ALIASES = {
|
413
|
+
ONTOLOGIES.NCBI_ENTREZ_GENE: {"ncbigene", "ncbi_gene"},
|
414
|
+
ONTOLOGIES.ENSEMBL_GENE: {"ensembl_gene_id"},
|
415
|
+
ONTOLOGIES.UNIPROT: {"Uniprot"},
|
416
|
+
}
|
423
417
|
|
424
418
|
ENSEMBL_MOLECULE_TYPES_TO_ONTOLOGY = {
|
425
419
|
"G": ONTOLOGIES.ENSEMBL_GENE,
|