TSUMUGI 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- TSUMUGI/annotator.py +103 -0
- TSUMUGI/argparser.py +599 -0
- TSUMUGI/core.py +185 -0
- TSUMUGI/data/impc_phenodigm.csv +3406 -0
- TSUMUGI/data/mp.obo +143993 -0
- TSUMUGI/filterer.py +36 -0
- TSUMUGI/formatter.py +122 -0
- TSUMUGI/genewise_annotation_builder.py +94 -0
- TSUMUGI/io_handler.py +189 -0
- TSUMUGI/main.py +300 -0
- TSUMUGI/network_constructor.py +603 -0
- TSUMUGI/ontology_handler.py +62 -0
- TSUMUGI/pairwise_similarity_builder.py +66 -0
- TSUMUGI/report_generator.py +122 -0
- TSUMUGI/similarity_calculator.py +498 -0
- TSUMUGI/subcommands/count_filterer.py +47 -0
- TSUMUGI/subcommands/genes_filterer.py +89 -0
- TSUMUGI/subcommands/graphml_builder.py +158 -0
- TSUMUGI/subcommands/life_stage_filterer.py +48 -0
- TSUMUGI/subcommands/mp_filterer.py +142 -0
- TSUMUGI/subcommands/score_filterer.py +22 -0
- TSUMUGI/subcommands/sex_filterer.py +48 -0
- TSUMUGI/subcommands/webapp_builder.py +358 -0
- TSUMUGI/subcommands/zygosity_filterer.py +48 -0
- TSUMUGI/validator.py +65 -0
- TSUMUGI/web/app/css/app.css +1129 -0
- TSUMUGI/web/app/genelist/network_genelist.html +339 -0
- TSUMUGI/web/app/genelist/network_genelist.js +421 -0
- TSUMUGI/web/app/js/data/dataLoader.js +41 -0
- TSUMUGI/web/app/js/export/graphExporter.js +214 -0
- TSUMUGI/web/app/js/graph/centrality.js +495 -0
- TSUMUGI/web/app/js/graph/components.js +30 -0
- TSUMUGI/web/app/js/graph/filters.js +158 -0
- TSUMUGI/web/app/js/graph/highlighter.js +52 -0
- TSUMUGI/web/app/js/graph/layoutController.js +454 -0
- TSUMUGI/web/app/js/graph/valueScaler.js +43 -0
- TSUMUGI/web/app/js/search/geneSearcher.js +93 -0
- TSUMUGI/web/app/js/search/phenotypeSearcher.js +292 -0
- TSUMUGI/web/app/js/ui/dynamicFontSize.js +30 -0
- TSUMUGI/web/app/js/ui/mobilePanel.js +77 -0
- TSUMUGI/web/app/js/ui/slider.js +22 -0
- TSUMUGI/web/app/js/ui/tooltips.js +514 -0
- TSUMUGI/web/app/js/viewer/pageSetup.js +217 -0
- TSUMUGI/web/app/viewer.html +515 -0
- TSUMUGI/web/app/viewer.js +1593 -0
- TSUMUGI/web/css/sanitize.css +363 -0
- TSUMUGI/web/css/top.css +391 -0
- TSUMUGI/web/image/tsumugi-favicon.ico +0 -0
- TSUMUGI/web/image/tsumugi-icon.png +0 -0
- TSUMUGI/web/image/tsumugi-logo.png +0 -0
- TSUMUGI/web/image/tsumugi-logo.svg +69 -0
- TSUMUGI/web/js/genelist_formatter.js +123 -0
- TSUMUGI/web/js/top.js +338 -0
- TSUMUGI/web/open_webapp_linux.sh +25 -0
- TSUMUGI/web/open_webapp_mac.command +25 -0
- TSUMUGI/web/open_webapp_windows.bat +37 -0
- TSUMUGI/web/serve_index.py +110 -0
- TSUMUGI/web/template/template_index.html +197 -0
- TSUMUGI/web_deployer.py +150 -0
- tsumugi-1.0.1.dist-info/METADATA +504 -0
- tsumugi-1.0.1.dist-info/RECORD +64 -0
- tsumugi-1.0.1.dist-info/WHEEL +4 -0
- tsumugi-1.0.1.dist-info/entry_points.txt +3 -0
- tsumugi-1.0.1.dist-info/licenses/LICENSE +21 -0
TSUMUGI/core.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import pickle
|
|
5
|
+
import shutil
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from collections.abc import Iterator
|
|
8
|
+
from datetime import date
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from TSUMUGI import (
|
|
12
|
+
genewise_annotation_builder,
|
|
13
|
+
io_handler,
|
|
14
|
+
network_constructor,
|
|
15
|
+
pairwise_similarity_builder,
|
|
16
|
+
report_generator,
|
|
17
|
+
web_deployer,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def run_pipeline(args) -> None:
|
|
22
|
+
ROOT_DIR = Path(args.output_dir)
|
|
23
|
+
TEMPDIR = Path(ROOT_DIR / ".tempdir")
|
|
24
|
+
|
|
25
|
+
records: Iterator[dict[str, str | float]] = io_handler.load_csv_as_dicts(Path(args.statistical_results))
|
|
26
|
+
|
|
27
|
+
ontology_terms: dict[str, dict[str, str | list[str]]] = io_handler.parse_obo_file(Path(args.mp_obo))
|
|
28
|
+
|
|
29
|
+
disease_annotations_by_gene: dict[str, list[dict[str, str]]] = io_handler.parse_impc_phenodigm(
|
|
30
|
+
Path(args.impc_phenodigm)
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
Path(ROOT_DIR / "README.md").write_text(
|
|
34
|
+
f"TSUMUGI version: {args.version}\n Running Date: {date.today().isoformat()}"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
if args.debug_web is False:
|
|
38
|
+
###########################################################
|
|
39
|
+
# Build gene-wise phenotype annotations
|
|
40
|
+
###########################################################
|
|
41
|
+
|
|
42
|
+
logging.info("Preprocessing statistical results...")
|
|
43
|
+
|
|
44
|
+
genewise_phenotype_annotations = genewise_annotation_builder.build_genewise_phenotype_annotations(
|
|
45
|
+
records, ontology_terms, disease_annotations_by_gene
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
path_genewise_phenotype_annotations = ROOT_DIR / "genewise_phenotype_annotations.jsonl.gz"
|
|
49
|
+
io_handler.write_jsonl(genewise_phenotype_annotations, path_genewise_phenotype_annotations)
|
|
50
|
+
|
|
51
|
+
genewise_phenotype_significants = [
|
|
52
|
+
record for record in io_handler.read_jsonl(path_genewise_phenotype_annotations) if record["significant"]
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
# --------------------------------------------------------
|
|
56
|
+
# Cache results
|
|
57
|
+
# --------------------------------------------------------
|
|
58
|
+
if args.debug:
|
|
59
|
+
output_dir = Path(TEMPDIR / "preprocessed")
|
|
60
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
|
|
62
|
+
with open(output_dir / "genewise_phenotype_annotations.pkl", "wb") as f:
|
|
63
|
+
pickle.dump(list(io_handler.read_jsonl(path_genewise_phenotype_annotations)), f)
|
|
64
|
+
with open(output_dir / "genewise_phenotype_significants.pkl", "wb") as f:
|
|
65
|
+
pickle.dump(genewise_phenotype_significants, f)
|
|
66
|
+
|
|
67
|
+
###########################################################
|
|
68
|
+
# Calculate phenotype similarity
|
|
69
|
+
###########################################################
|
|
70
|
+
|
|
71
|
+
pairwise_similarity_annotations = pairwise_similarity_builder.build_pairwise_similarity(
|
|
72
|
+
genewise_phenotype_significants,
|
|
73
|
+
ontology_terms,
|
|
74
|
+
args=args,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
path_pairwise_similarity_annotations = ROOT_DIR / "pairwise_similarity_annotations.jsonl.gz"
|
|
78
|
+
io_handler.write_jsonl(pairwise_similarity_annotations, path_pairwise_similarity_annotations)
|
|
79
|
+
|
|
80
|
+
###########################################################
|
|
81
|
+
# Generate network
|
|
82
|
+
###########################################################
|
|
83
|
+
logging.info("Generating phenotype and gene networks...")
|
|
84
|
+
|
|
85
|
+
MIN_NUM_PHENOTYPES = 3
|
|
86
|
+
|
|
87
|
+
pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
|
|
88
|
+
|
|
89
|
+
pairwise_similarity_annotations_with_shared_phenotype = [
|
|
90
|
+
r for r in pairwise_similarity_annotations if len(r["phenotype_shared_annotations"]) >= MIN_NUM_PHENOTYPES
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
logging.info("Building phenotype network JSON files...")
|
|
94
|
+
|
|
95
|
+
# Detect binary phenotypes (effect_size in {0,1}); include both spaced and underscored names
|
|
96
|
+
binary_phenotypes = set()
|
|
97
|
+
phenotype_effects = defaultdict(set)
|
|
98
|
+
for rec in genewise_phenotype_significants:
|
|
99
|
+
phenotype_effects[rec["mp_term_name"]].add(rec.get("effect_size", 0))
|
|
100
|
+
for mp_term_name, effects in phenotype_effects.items():
|
|
101
|
+
if effects and all(es in (0, 1) for es in effects):
|
|
102
|
+
binary_phenotypes.add(mp_term_name)
|
|
103
|
+
|
|
104
|
+
output_dir = Path(TEMPDIR / "network" / "phenotype")
|
|
105
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
network_constructor.build_phenotype_network_json(
|
|
107
|
+
genewise_phenotype_significants,
|
|
108
|
+
pairwise_similarity_annotations_with_shared_phenotype,
|
|
109
|
+
disease_annotations_by_gene,
|
|
110
|
+
output_dir,
|
|
111
|
+
binary_phenotypes=binary_phenotypes,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
logging.info("Building gene network JSON files...")
|
|
115
|
+
output_dir = Path(TEMPDIR / "network" / "genesymbol")
|
|
116
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
117
|
+
|
|
118
|
+
network_constructor.build_gene_network_json(
|
|
119
|
+
genewise_phenotype_significants,
|
|
120
|
+
pairwise_similarity_annotations_with_shared_phenotype,
|
|
121
|
+
disease_annotations_by_gene,
|
|
122
|
+
output_dir,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
del pairwise_similarity_annotations_with_shared_phenotype
|
|
126
|
+
del disease_annotations_by_gene
|
|
127
|
+
|
|
128
|
+
###########################################################
|
|
129
|
+
# Output data for web application
|
|
130
|
+
###########################################################
|
|
131
|
+
|
|
132
|
+
if args.debug_web:
|
|
133
|
+
if not Path(TEMPDIR / "preprocessed" / "genewise_phenotype_significants.pkl").is_file():
|
|
134
|
+
raise FileNotFoundError(f"genewise_phenotype_significants.pkl not found in {TEMPDIR}")
|
|
135
|
+
|
|
136
|
+
with open(TEMPDIR / "preprocessed" / "genewise_phenotype_significants.pkl", "rb") as f:
|
|
137
|
+
genewise_phenotype_significants = pickle.load(f)
|
|
138
|
+
|
|
139
|
+
output_dir = Path(TEMPDIR, "webapp")
|
|
140
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
141
|
+
|
|
142
|
+
# available mp terms
|
|
143
|
+
report_generator.write_available_mp_terms_txt(TEMPDIR, Path(output_dir / "available_mp_terms.txt"))
|
|
144
|
+
available_mp_terms_json = Path(output_dir / "available_mp_terms.json")
|
|
145
|
+
report_generator.write_available_mp_terms_json(TEMPDIR, available_mp_terms_json)
|
|
146
|
+
report_generator.write_mp_term_id_lookup(
|
|
147
|
+
genewise_phenotype_significants, available_mp_terms_json, Path(output_dir / "mp_term_id_lookup.json")
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# binary phenotypes
|
|
151
|
+
report_generator.write_binary_phenotypes_txt(
|
|
152
|
+
genewise_phenotype_significants, TEMPDIR, Path(output_dir / "binary_phenotypes.txt")
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# available gene symbols
|
|
156
|
+
report_generator.write_available_gene_symbols_txt(TEMPDIR, Path(output_dir / "available_gene_symbols.txt"))
|
|
157
|
+
|
|
158
|
+
# marker symbol to accession id
|
|
159
|
+
report_generator.write_marker_symbol_accession_id_json(
|
|
160
|
+
genewise_phenotype_significants, TEMPDIR, Path(output_dir / "marker_symbol_accession_id.json")
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
###########################################################
|
|
164
|
+
# Deploy to web application
|
|
165
|
+
###########################################################
|
|
166
|
+
|
|
167
|
+
logging.info("Building web application...")
|
|
168
|
+
|
|
169
|
+
output_dir = Path(ROOT_DIR, "TSUMUGI-webapp")
|
|
170
|
+
|
|
171
|
+
if output_dir.exists():
|
|
172
|
+
shutil.rmtree(output_dir)
|
|
173
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
174
|
+
|
|
175
|
+
targetted_phenotypes = web_deployer.select_targetted_phenotypes(TEMPDIR)
|
|
176
|
+
targetted_genes = web_deployer.select_targetted_genes(TEMPDIR)
|
|
177
|
+
|
|
178
|
+
web_deployer.prepare_files(targetted_phenotypes, targetted_genes, TEMPDIR, output_dir, args.version)
|
|
179
|
+
|
|
180
|
+
if args.debug is True or args.debug_web is True:
|
|
181
|
+
logging.debug(f"DEBUG: retain temporary directory: {Path(TEMPDIR).resolve()}")
|
|
182
|
+
else:
|
|
183
|
+
shutil.rmtree(TEMPDIR, ignore_errors=True)
|
|
184
|
+
|
|
185
|
+
logging.info(f"Finished!🎊 Results are saved in {Path(ROOT_DIR).resolve()}")
|