TSUMUGI 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- TSUMUGI/annotator.py +103 -0
- TSUMUGI/argparser.py +599 -0
- TSUMUGI/core.py +185 -0
- TSUMUGI/data/impc_phenodigm.csv +3406 -0
- TSUMUGI/data/mp.obo +143993 -0
- TSUMUGI/filterer.py +36 -0
- TSUMUGI/formatter.py +122 -0
- TSUMUGI/genewise_annotation_builder.py +94 -0
- TSUMUGI/io_handler.py +189 -0
- TSUMUGI/main.py +300 -0
- TSUMUGI/network_constructor.py +603 -0
- TSUMUGI/ontology_handler.py +62 -0
- TSUMUGI/pairwise_similarity_builder.py +66 -0
- TSUMUGI/report_generator.py +122 -0
- TSUMUGI/similarity_calculator.py +498 -0
- TSUMUGI/subcommands/count_filterer.py +47 -0
- TSUMUGI/subcommands/genes_filterer.py +89 -0
- TSUMUGI/subcommands/graphml_builder.py +158 -0
- TSUMUGI/subcommands/life_stage_filterer.py +48 -0
- TSUMUGI/subcommands/mp_filterer.py +142 -0
- TSUMUGI/subcommands/score_filterer.py +22 -0
- TSUMUGI/subcommands/sex_filterer.py +48 -0
- TSUMUGI/subcommands/webapp_builder.py +358 -0
- TSUMUGI/subcommands/zygosity_filterer.py +48 -0
- TSUMUGI/validator.py +65 -0
- TSUMUGI/web/app/css/app.css +1129 -0
- TSUMUGI/web/app/genelist/network_genelist.html +339 -0
- TSUMUGI/web/app/genelist/network_genelist.js +421 -0
- TSUMUGI/web/app/js/data/dataLoader.js +41 -0
- TSUMUGI/web/app/js/export/graphExporter.js +214 -0
- TSUMUGI/web/app/js/graph/centrality.js +495 -0
- TSUMUGI/web/app/js/graph/components.js +30 -0
- TSUMUGI/web/app/js/graph/filters.js +158 -0
- TSUMUGI/web/app/js/graph/highlighter.js +52 -0
- TSUMUGI/web/app/js/graph/layoutController.js +454 -0
- TSUMUGI/web/app/js/graph/valueScaler.js +43 -0
- TSUMUGI/web/app/js/search/geneSearcher.js +93 -0
- TSUMUGI/web/app/js/search/phenotypeSearcher.js +292 -0
- TSUMUGI/web/app/js/ui/dynamicFontSize.js +30 -0
- TSUMUGI/web/app/js/ui/mobilePanel.js +77 -0
- TSUMUGI/web/app/js/ui/slider.js +22 -0
- TSUMUGI/web/app/js/ui/tooltips.js +514 -0
- TSUMUGI/web/app/js/viewer/pageSetup.js +217 -0
- TSUMUGI/web/app/viewer.html +515 -0
- TSUMUGI/web/app/viewer.js +1593 -0
- TSUMUGI/web/css/sanitize.css +363 -0
- TSUMUGI/web/css/top.css +391 -0
- TSUMUGI/web/image/tsumugi-favicon.ico +0 -0
- TSUMUGI/web/image/tsumugi-icon.png +0 -0
- TSUMUGI/web/image/tsumugi-logo.png +0 -0
- TSUMUGI/web/image/tsumugi-logo.svg +69 -0
- TSUMUGI/web/js/genelist_formatter.js +123 -0
- TSUMUGI/web/js/top.js +338 -0
- TSUMUGI/web/open_webapp_linux.sh +25 -0
- TSUMUGI/web/open_webapp_mac.command +25 -0
- TSUMUGI/web/open_webapp_windows.bat +37 -0
- TSUMUGI/web/serve_index.py +110 -0
- TSUMUGI/web/template/template_index.html +197 -0
- TSUMUGI/web_deployer.py +150 -0
- tsumugi-1.0.1.dist-info/METADATA +504 -0
- tsumugi-1.0.1.dist-info/RECORD +64 -0
- tsumugi-1.0.1.dist-info/WHEEL +4 -0
- tsumugi-1.0.1.dist-info/entry_points.txt +3 -0
- tsumugi-1.0.1.dist-info/licenses/LICENSE +21 -0
TSUMUGI/main.py
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from TSUMUGI import argparser, core, validator
|
|
8
|
+
from TSUMUGI.subcommands import (
|
|
9
|
+
count_filterer,
|
|
10
|
+
genes_filterer,
|
|
11
|
+
graphml_builder,
|
|
12
|
+
life_stage_filterer,
|
|
13
|
+
mp_filterer,
|
|
14
|
+
score_filterer,
|
|
15
|
+
sex_filterer,
|
|
16
|
+
webapp_builder,
|
|
17
|
+
zygosity_filterer,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def main() -> None:
|
|
24
|
+
args = argparser.parse_args()
|
|
25
|
+
|
|
26
|
+
logging.info(f"TSUMUGI version: {args.version}")
|
|
27
|
+
|
|
28
|
+
###########################################################
|
|
29
|
+
# Load and validate data
|
|
30
|
+
###########################################################
|
|
31
|
+
|
|
32
|
+
if getattr(args, "statistical_results", None):
|
|
33
|
+
validator.validate_statistical_results(args.statistical_results)
|
|
34
|
+
|
|
35
|
+
if getattr(args, "mp_obo", None):
|
|
36
|
+
validator.validate_obo_file(args.mp_obo)
|
|
37
|
+
|
|
38
|
+
if getattr(args, "impc_phenodigm", None):
|
|
39
|
+
validator.validate_phenodigm_file(args.impc_phenodigm)
|
|
40
|
+
|
|
41
|
+
if getattr(args, "mp_obo", None) and (getattr(args, "exclude", None) or getattr(args, "include", None)):
|
|
42
|
+
mp_term_id = args.exclude or args.include
|
|
43
|
+
validator.validate_mp_term_id(mp_term_id, args.mp_obo)
|
|
44
|
+
|
|
45
|
+
###########################################################
|
|
46
|
+
# Run commands
|
|
47
|
+
###########################################################
|
|
48
|
+
|
|
49
|
+
if args.cmd == "run":
|
|
50
|
+
logging.info("Running TSUMUGI pipeline")
|
|
51
|
+
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
|
|
52
|
+
core.run_pipeline(args)
|
|
53
|
+
|
|
54
|
+
# ===========================================================
|
|
55
|
+
# Subcommands for filtering pairwise similarity annotations
|
|
56
|
+
# ===========================================================
|
|
57
|
+
|
|
58
|
+
# -----------------------------------------------------
|
|
59
|
+
# MP term inclusion/exclusion
|
|
60
|
+
# -----------------------------------------------------
|
|
61
|
+
if args.cmd == "mp":
|
|
62
|
+
if args.include:
|
|
63
|
+
if args.pairwise:
|
|
64
|
+
logging.info(f"Including gene pairs with phenotypes related to MP term: {args.include}")
|
|
65
|
+
mp_filterer.include_specific_phenotype(
|
|
66
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
67
|
+
path_genewise_phenotype_annotations=None,
|
|
68
|
+
path_obo=args.mp_obo,
|
|
69
|
+
mp_term_id=args.include,
|
|
70
|
+
life_stage=args.life_stage,
|
|
71
|
+
sex=args.sex,
|
|
72
|
+
zygosity=args.zygosity,
|
|
73
|
+
is_pairwise=True,
|
|
74
|
+
)
|
|
75
|
+
else:
|
|
76
|
+
logging.info(f"Including genes with phenotypes related to MP term: {args.include}")
|
|
77
|
+
mp_filterer.include_specific_phenotype(
|
|
78
|
+
path_pairwise_similarity_annotations=None,
|
|
79
|
+
path_genewise_phenotype_annotations=args.path_genewise,
|
|
80
|
+
path_obo=args.mp_obo,
|
|
81
|
+
mp_term_id=args.include,
|
|
82
|
+
life_stage=args.life_stage,
|
|
83
|
+
sex=args.sex,
|
|
84
|
+
zygosity=args.zygosity,
|
|
85
|
+
is_pairwise=False,
|
|
86
|
+
)
|
|
87
|
+
if args.exclude:
|
|
88
|
+
if args.pairwise:
|
|
89
|
+
logging.info(f"Excluding gene pairs with phenotypes related to MP term: {args.exclude}")
|
|
90
|
+
mp_filterer.exclude_specific_phenotype(
|
|
91
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
92
|
+
path_genewise_phenotype_annotations=args.path_genewise,
|
|
93
|
+
path_obo=args.mp_obo,
|
|
94
|
+
mp_term_id=args.exclude,
|
|
95
|
+
life_stage=args.life_stage,
|
|
96
|
+
sex=args.sex,
|
|
97
|
+
zygosity=args.zygosity,
|
|
98
|
+
is_pairwise=True,
|
|
99
|
+
)
|
|
100
|
+
else:
|
|
101
|
+
logging.info(f"Excluding genes with phenotypes related to MP term: {args.exclude}")
|
|
102
|
+
mp_filterer.exclude_specific_phenotype(
|
|
103
|
+
path_pairwise_similarity_annotations=None,
|
|
104
|
+
path_genewise_phenotype_annotations=args.path_genewise,
|
|
105
|
+
path_obo=args.mp_obo,
|
|
106
|
+
mp_term_id=args.exclude,
|
|
107
|
+
life_stage=args.life_stage,
|
|
108
|
+
sex=args.sex,
|
|
109
|
+
zygosity=args.zygosity,
|
|
110
|
+
is_pairwise=False,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# -----------------------------------------------------
|
|
114
|
+
# Number of phenotypes per gene/pair
|
|
115
|
+
# -----------------------------------------------------
|
|
116
|
+
if args.cmd == "count":
|
|
117
|
+
logging.info("Filtering gene pairs based on number of phenotypes per gene")
|
|
118
|
+
if args.genewise:
|
|
119
|
+
count_filterer.filter_by_number_of_phenotypes_per_gene(
|
|
120
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
121
|
+
path_genewise_phenotype_annotations=args.path_genewise,
|
|
122
|
+
min_phenotypes=args.min,
|
|
123
|
+
max_phenotypes=args.max,
|
|
124
|
+
)
|
|
125
|
+
elif args.pairwise:
|
|
126
|
+
count_filterer.filter_by_number_of_phenotypes_per_pair(
|
|
127
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
128
|
+
min_phenotypes=args.min,
|
|
129
|
+
max_phenotypes=args.max,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# -----------------------------------------------------
|
|
133
|
+
# Score of phenotype similarity per gene/pair
|
|
134
|
+
# -----------------------------------------------------
|
|
135
|
+
if args.cmd == "score":
|
|
136
|
+
logging.info("Filtering gene pairs based on the score of phenotype similarity per gene")
|
|
137
|
+
score_filterer.filter_by_score_of_phenotypes_per_pair(
|
|
138
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
139
|
+
min_phenotypes=args.min,
|
|
140
|
+
max_phenotypes=args.max,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# -----------------------------------------------------
|
|
144
|
+
# gene lists filterer
|
|
145
|
+
# -----------------------------------------------------
|
|
146
|
+
if args.cmd == "genes":
|
|
147
|
+
if args.genewise:
|
|
148
|
+
if args.keep:
|
|
149
|
+
if Path(args.keep).is_file():
|
|
150
|
+
gene_list = set(Path(args.keep).read_text().splitlines())
|
|
151
|
+
else:
|
|
152
|
+
gene_list = set(args.keep.split(","))
|
|
153
|
+
|
|
154
|
+
if len(gene_list) == 0:
|
|
155
|
+
raise ValueError("Gene list is empty. Please provide at least one gene symbol.")
|
|
156
|
+
|
|
157
|
+
logging.info(f"Keeping phenotype annotations matching {len(gene_list)} genes")
|
|
158
|
+
genes_filterer.filter_annotations_by_genes(
|
|
159
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
160
|
+
gene_list=gene_list,
|
|
161
|
+
keep=True,
|
|
162
|
+
)
|
|
163
|
+
elif args.drop:
|
|
164
|
+
if Path(args.drop).is_file():
|
|
165
|
+
gene_list = set(Path(args.drop).read_text().splitlines())
|
|
166
|
+
else:
|
|
167
|
+
gene_list = set(args.drop.split(","))
|
|
168
|
+
if len(gene_list) == 0:
|
|
169
|
+
raise ValueError("Gene list is empty. Please provide at least one gene symbol.")
|
|
170
|
+
|
|
171
|
+
logging.info(f"Dropping phenotype annotations matching {len(gene_list)} genes")
|
|
172
|
+
genes_filterer.filter_annotations_by_genes(
|
|
173
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
174
|
+
gene_list=gene_list,
|
|
175
|
+
drop=True,
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
if args.keep:
|
|
179
|
+
gene_pairs = set()
|
|
180
|
+
for record in Path(args.keep).read_text().splitlines():
|
|
181
|
+
# TSV
|
|
182
|
+
if "\t" in record:
|
|
183
|
+
gene1, gene2 = record.split("\t")
|
|
184
|
+
# CSV
|
|
185
|
+
elif "," in record:
|
|
186
|
+
gene1, gene2 = record.split(",")
|
|
187
|
+
gene_pairs.add(frozenset([gene1, gene2]))
|
|
188
|
+
|
|
189
|
+
if len(gene_pairs) == 0:
|
|
190
|
+
raise ValueError(f"Gene list is empty. Please provide at least one gene pair in {args.keep}.")
|
|
191
|
+
|
|
192
|
+
logging.info(f"Keeping phenotype annotations matching {len(gene_pairs)} gene pairs")
|
|
193
|
+
genes_filterer.filter_annotations_by_gene_pairs(
|
|
194
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
195
|
+
gene_pairs=gene_pairs,
|
|
196
|
+
keep=True,
|
|
197
|
+
)
|
|
198
|
+
elif args.drop:
|
|
199
|
+
gene_pairs = set()
|
|
200
|
+
for record in Path(args.drop).read_text().splitlines():
|
|
201
|
+
# TSV
|
|
202
|
+
if "\t" in record:
|
|
203
|
+
gene1, gene2 = record.split("\t")
|
|
204
|
+
# CSV
|
|
205
|
+
elif "," in record:
|
|
206
|
+
gene1, gene2 = record.split(",")
|
|
207
|
+
gene_pairs.add(frozenset([gene1, gene2]))
|
|
208
|
+
|
|
209
|
+
if len(gene_pairs) == 0:
|
|
210
|
+
raise ValueError(f"Gene list is empty. Please provide at least one gene pair in {args.drop}.")
|
|
211
|
+
|
|
212
|
+
logging.info(f"Dropping phenotype annotations matching {len(gene_pairs)} gene pairs")
|
|
213
|
+
genes_filterer.filter_annotations_by_gene_pairs(
|
|
214
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
215
|
+
gene_pairs=gene_pairs,
|
|
216
|
+
drop=True,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# -----------------------------------------------------
|
|
220
|
+
# Life stage filterer
|
|
221
|
+
# -----------------------------------------------------
|
|
222
|
+
if args.cmd == "life-stage":
|
|
223
|
+
if args.keep:
|
|
224
|
+
logging.info(f"Keeping phenotype annotations matching life stage: {args.keep}")
|
|
225
|
+
life_stage_filterer.filter_annotations_by_life_stage(
|
|
226
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
227
|
+
life_stage=args.keep,
|
|
228
|
+
keep=True,
|
|
229
|
+
)
|
|
230
|
+
elif args.drop:
|
|
231
|
+
logging.info(f"Dropping phenotype annotations matching life stage: {args.drop}")
|
|
232
|
+
life_stage_filterer.filter_annotations_by_life_stage(
|
|
233
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
234
|
+
life_stage=args.drop,
|
|
235
|
+
drop=True,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# -----------------------------------------------------
|
|
239
|
+
# Sex filterer
|
|
240
|
+
# -----------------------------------------------------
|
|
241
|
+
if args.cmd == "sex":
|
|
242
|
+
if args.keep:
|
|
243
|
+
logging.info(f"Keeping phenotype annotations matching sex: {args.keep}")
|
|
244
|
+
sex_filterer.filter_annotations_by_sex(
|
|
245
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
246
|
+
sex=args.keep,
|
|
247
|
+
keep=True,
|
|
248
|
+
)
|
|
249
|
+
elif args.drop:
|
|
250
|
+
logging.info(f"Dropping phenotype annotations matching sex: {args.drop}")
|
|
251
|
+
sex_filterer.filter_annotations_by_sex(
|
|
252
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
253
|
+
sex=args.drop,
|
|
254
|
+
drop=True,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# -----------------------------------------------------
|
|
258
|
+
# Zygosity filterer
|
|
259
|
+
# -----------------------------------------------------
|
|
260
|
+
if args.cmd == "zygosity":
|
|
261
|
+
if args.keep:
|
|
262
|
+
logging.info(f"Keeping phenotype annotations matching zygosity: {args.keep}")
|
|
263
|
+
zygosity_filterer.filter_annotations_by_zygosity(
|
|
264
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
265
|
+
zygosity=args.keep,
|
|
266
|
+
keep=True,
|
|
267
|
+
)
|
|
268
|
+
elif args.drop:
|
|
269
|
+
logging.info(f"Dropping phenotype annotations matching zygosity: {args.drop}")
|
|
270
|
+
zygosity_filterer.filter_annotations_by_zygosity(
|
|
271
|
+
path_pairwise_similarity_annotations=args.path_pairwise or sys.stdin,
|
|
272
|
+
zygosity=args.drop,
|
|
273
|
+
drop=True,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# -----------------------------------------------------
|
|
277
|
+
# Build GraphML
|
|
278
|
+
# -----------------------------------------------------
|
|
279
|
+
if args.cmd == "build-graphml":
|
|
280
|
+
logging.info("Building GraphML from pairwise similarity annotations")
|
|
281
|
+
|
|
282
|
+
graphml_builder.write_graphml_to_stdout(
|
|
283
|
+
pairwise_path=args.path_pairwise or sys.stdin,
|
|
284
|
+
genewise_path=args.path_genewise,
|
|
285
|
+
)
|
|
286
|
+
# -----------------------------------------------------
|
|
287
|
+
# Build Webapp
|
|
288
|
+
# -----------------------------------------------------
|
|
289
|
+
if args.cmd == "build-webapp":
|
|
290
|
+
logging.info("Building webapp network from pairwise similarity annotations")
|
|
291
|
+
|
|
292
|
+
webapp_builder.build_and_save_webapp_network(
|
|
293
|
+
genewise_path=args.path_genewise,
|
|
294
|
+
pairwise_path=args.path_pairwise or sys.stdin,
|
|
295
|
+
output_dir=args.output_dir,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
if __name__ == "__main__":
|
|
300
|
+
main()
|