TSUMUGI 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. TSUMUGI/annotator.py +103 -0
  2. TSUMUGI/argparser.py +599 -0
  3. TSUMUGI/core.py +185 -0
  4. TSUMUGI/data/impc_phenodigm.csv +3406 -0
  5. TSUMUGI/data/mp.obo +143993 -0
  6. TSUMUGI/filterer.py +36 -0
  7. TSUMUGI/formatter.py +122 -0
  8. TSUMUGI/genewise_annotation_builder.py +94 -0
  9. TSUMUGI/io_handler.py +189 -0
  10. TSUMUGI/main.py +300 -0
  11. TSUMUGI/network_constructor.py +603 -0
  12. TSUMUGI/ontology_handler.py +62 -0
  13. TSUMUGI/pairwise_similarity_builder.py +66 -0
  14. TSUMUGI/report_generator.py +122 -0
  15. TSUMUGI/similarity_calculator.py +498 -0
  16. TSUMUGI/subcommands/count_filterer.py +47 -0
  17. TSUMUGI/subcommands/genes_filterer.py +89 -0
  18. TSUMUGI/subcommands/graphml_builder.py +158 -0
  19. TSUMUGI/subcommands/life_stage_filterer.py +48 -0
  20. TSUMUGI/subcommands/mp_filterer.py +142 -0
  21. TSUMUGI/subcommands/score_filterer.py +22 -0
  22. TSUMUGI/subcommands/sex_filterer.py +48 -0
  23. TSUMUGI/subcommands/webapp_builder.py +358 -0
  24. TSUMUGI/subcommands/zygosity_filterer.py +48 -0
  25. TSUMUGI/validator.py +65 -0
  26. TSUMUGI/web/app/css/app.css +1129 -0
  27. TSUMUGI/web/app/genelist/network_genelist.html +339 -0
  28. TSUMUGI/web/app/genelist/network_genelist.js +421 -0
  29. TSUMUGI/web/app/js/data/dataLoader.js +41 -0
  30. TSUMUGI/web/app/js/export/graphExporter.js +214 -0
  31. TSUMUGI/web/app/js/graph/centrality.js +495 -0
  32. TSUMUGI/web/app/js/graph/components.js +30 -0
  33. TSUMUGI/web/app/js/graph/filters.js +158 -0
  34. TSUMUGI/web/app/js/graph/highlighter.js +52 -0
  35. TSUMUGI/web/app/js/graph/layoutController.js +454 -0
  36. TSUMUGI/web/app/js/graph/valueScaler.js +43 -0
  37. TSUMUGI/web/app/js/search/geneSearcher.js +93 -0
  38. TSUMUGI/web/app/js/search/phenotypeSearcher.js +292 -0
  39. TSUMUGI/web/app/js/ui/dynamicFontSize.js +30 -0
  40. TSUMUGI/web/app/js/ui/mobilePanel.js +77 -0
  41. TSUMUGI/web/app/js/ui/slider.js +22 -0
  42. TSUMUGI/web/app/js/ui/tooltips.js +514 -0
  43. TSUMUGI/web/app/js/viewer/pageSetup.js +217 -0
  44. TSUMUGI/web/app/viewer.html +515 -0
  45. TSUMUGI/web/app/viewer.js +1593 -0
  46. TSUMUGI/web/css/sanitize.css +363 -0
  47. TSUMUGI/web/css/top.css +391 -0
  48. TSUMUGI/web/image/tsumugi-favicon.ico +0 -0
  49. TSUMUGI/web/image/tsumugi-icon.png +0 -0
  50. TSUMUGI/web/image/tsumugi-logo.png +0 -0
  51. TSUMUGI/web/image/tsumugi-logo.svg +69 -0
  52. TSUMUGI/web/js/genelist_formatter.js +123 -0
  53. TSUMUGI/web/js/top.js +338 -0
  54. TSUMUGI/web/open_webapp_linux.sh +25 -0
  55. TSUMUGI/web/open_webapp_mac.command +25 -0
  56. TSUMUGI/web/open_webapp_windows.bat +37 -0
  57. TSUMUGI/web/serve_index.py +110 -0
  58. TSUMUGI/web/template/template_index.html +197 -0
  59. TSUMUGI/web_deployer.py +150 -0
  60. tsumugi-1.0.1.dist-info/METADATA +504 -0
  61. tsumugi-1.0.1.dist-info/RECORD +64 -0
  62. tsumugi-1.0.1.dist-info/WHEEL +4 -0
  63. tsumugi-1.0.1.dist-info/entry_points.txt +3 -0
  64. tsumugi-1.0.1.dist-info/licenses/LICENSE +21 -0
TSUMUGI/annotator.py ADDED
@@ -0,0 +1,103 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from collections.abc import Generator, Iterable, Iterator
5
+
6
+ ###########################################################
7
+ # annotate_life_stage
8
+ ###########################################################
9
+
10
+
11
+ def _annotate_life_stage(procedure_name: str, pipeline_name: str, embryo_pattern: re.Pattern) -> str:
12
+ if bool(embryo_pattern.search(procedure_name)):
13
+ return "Embryo"
14
+ if "Interval" in pipeline_name or "interval" in pipeline_name:
15
+ return "Interval"
16
+ elif "Late" in pipeline_name or "late" in pipeline_name:
17
+ return "Late"
18
+ else:
19
+ return "Early"
20
+
21
+
22
+ def annotate_life_stage(records_annotated, embryo_assays: set[str]) -> Iterator[dict]:
23
+ embryo_pattern = re.compile("|".join(map(re.escape, embryo_assays)))
24
+ for record in records_annotated:
25
+ record["life_stage"] = _annotate_life_stage(record["procedure_name"], record["pipeline_name"], embryo_pattern)
26
+
27
+ yield record
28
+
29
+
30
+ ###########################################################
31
+ # annotate_sexual_dimorphism
32
+ ###########################################################
33
+
34
+
35
+ def _annotate_sexual_dimorphism(
36
+ female_ko_effect_p_value: float, male_ko_effect_p_value: float, threshold: float = 1e-4
37
+ ) -> str:
38
+ if female_ko_effect_p_value <= threshold and male_ko_effect_p_value > threshold:
39
+ return "Female"
40
+ elif male_ko_effect_p_value <= threshold and female_ko_effect_p_value > threshold:
41
+ return "Male"
42
+ else:
43
+ return "None"
44
+
45
+
46
+ def annotate_sexual_dimorphism(records_annotated, threshold: float = 1e-4) -> Generator[dict]:
47
+ for record in records_annotated:
48
+ # Annotate sexual dimorphism
49
+ record["sexual_dimorphism"] = _annotate_sexual_dimorphism(
50
+ record["female_ko_effect_p_value"], record["male_ko_effect_p_value"], threshold
51
+ )
52
+
53
+ # Set effect_size based on sexual_dimorphism
54
+ if record["sexual_dimorphism"] == "Female":
55
+ record["effect_size"] = record["female_ko_parameter_estimate"]
56
+ elif record["sexual_dimorphism"] == "Male":
57
+ record["effect_size"] = record["male_ko_parameter_estimate"]
58
+
59
+ yield record
60
+
61
+
62
+ ###########################################################
63
+ # annotate_diseases
64
+ ###########################################################
65
+
66
+
67
+ def annotate_diseases(records_annotated, disease_annotations_by_gene: dict) -> Generator[dict]:
68
+ for record in records_annotated:
69
+ if not record["significant"]:
70
+ record["disease_annotation"] = []
71
+ continue
72
+
73
+ record["disease_annotation"] = set()
74
+
75
+ marker = record["marker_symbol"]
76
+ record_zygosity = record["zygosity"]
77
+ record_life_stage = record["life_stage"]
78
+ if marker in disease_annotations_by_gene:
79
+ for disease_annotation in disease_annotations_by_gene[marker]:
80
+ if (
81
+ record_zygosity == disease_annotation["zygosity"]
82
+ and record_life_stage == disease_annotation["life_stage"]
83
+ ):
84
+ record["disease_annotation"].add(disease_annotation["disorder_name"])
85
+
86
+ record["disease_annotation"] = sorted(record["disease_annotation"])
87
+
88
+ yield record
89
+
90
+
91
+ def annotate_significant(records_annotated: Iterable[dict]) -> Generator[dict]:
92
+ for record in records_annotated:
93
+ if record["mp_term_id"]:
94
+ record["significant"] = True
95
+ yield record
96
+
97
+ record["effect_size"] = 0.0
98
+ record["p_value"] = 1.0
99
+ record["significant"] = False
100
+ record["mp_term_id"] = record["intermediate_mp_term_id"].split(",")[-1]
101
+ record["mp_term_name"] = record["intermediate_mp_term_name"].split(",")[-1]
102
+
103
+ yield record
TSUMUGI/argparser.py ADDED
@@ -0,0 +1,599 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from importlib.metadata import PackageNotFoundError, version as pkg_version
5
+ from importlib.resources import files
6
+ from pathlib import Path
7
+
8
+
9
+ def _get_version() -> str:
10
+ """
11
+ Get TSUMUGI version defined in pyproject.toml.
12
+ The argument must match [project.name] (distribution name).
13
+ """
14
+ try:
15
+ return pkg_version("tsumugi")
16
+ except PackageNotFoundError:
17
+ return "not-available"
18
+
19
+
20
+ def build_parser() -> argparse.ArgumentParser:
21
+ parser = argparse.ArgumentParser(
22
+ description="Run TSUMUGI pipeline and subcommands",
23
+ formatter_class=argparse.RawTextHelpFormatter,
24
+ )
25
+
26
+ # tsumugi -v / --version
27
+ parser.add_argument(
28
+ "-v",
29
+ "--version",
30
+ action="version",
31
+ version=f"%(prog)s {_get_version()}",
32
+ help="Show TSUMUGI version and exit.",
33
+ )
34
+
35
+ subparsers = parser.add_subparsers(dest="cmd", required=True)
36
+
37
+ # =========================================================
38
+ # run: Run TSUMUGI pipeline
39
+ # =========================================================
40
+ run = subparsers.add_parser(
41
+ "run",
42
+ help="Run TSUMUGI pipeline",
43
+ formatter_class=argparse.RawTextHelpFormatter,
44
+ )
45
+
46
+ run.add_argument(
47
+ "-o",
48
+ "--output_dir",
49
+ type=str,
50
+ required=True,
51
+ help=(
52
+ "Output directory for TSUMUGI results.\n"
53
+ "All generated files (intermediate and final results) will be saved here.\n"
54
+ ),
55
+ )
56
+
57
+ run.add_argument(
58
+ "-s",
59
+ "--statistical_results",
60
+ type=str,
61
+ required=True,
62
+ help=(
63
+ "Path to IMPC statistical_results_ALL.csv file.\n"
64
+ "This file contains statistical test results (effect sizes, p-values, etc.) "
65
+ "for all IMPC phenotyping experiments.\n"
66
+ "If not available, download 'statistical-results-ALL.csv.gz' manually from:\n"
67
+ "https://ftp.ebi.ac.uk/pub/databases/impc/all-data-releases/latest/TSUMUGI-results/"
68
+ ),
69
+ )
70
+
71
+ run.add_argument(
72
+ "-m",
73
+ "--mp_obo",
74
+ type=str,
75
+ required=False,
76
+ help=(
77
+ "Path to Mammalian Phenotype ontology file (mp.obo).\n"
78
+ "Used to map and infer hierarchical relationships among MP terms.\n"
79
+ "If not available, download 'mp.obo' manually from:\n"
80
+ "https://obofoundry.org/ontology/mp.html"
81
+ ),
82
+ )
83
+
84
+ run.add_argument(
85
+ "-i",
86
+ "--impc_phenodigm",
87
+ type=str,
88
+ required=False,
89
+ help=(
90
+ "Path to IMPC Phenodigm annotation file (impc_phenodigm.csv).\n"
91
+ "This file links mouse phenotypes to human diseases based on Phenodigm similarity.\n"
92
+ "If not available, download manually from:\n"
93
+ "https://diseasemodels.research.its.qmul.ac.uk/\n"
94
+ ),
95
+ )
96
+
97
+ run.add_argument(
98
+ "-t",
99
+ "--threads",
100
+ type=int,
101
+ default=1,
102
+ help=("Number of threads to use for TSUMUGI pipeline.\nIf not specified, defaults to 1.\n"),
103
+ )
104
+
105
+ # Debug options (hidden) to retain temporary files
106
+ run.add_argument(
107
+ "--debug",
108
+ action="store_true",
109
+ help=argparse.SUPPRESS,
110
+ )
111
+ # Web specific debug options (hidden) to
112
+ # skip preprocessing and retain temporary files
113
+ run.add_argument(
114
+ "--debug_web",
115
+ action="store_true",
116
+ help=argparse.SUPPRESS,
117
+ )
118
+
119
+ # =========================================================
120
+ # mp: Filter gene pairs by a specific MP term and its descendants
121
+ # =========================================================
122
+ mp_parser = subparsers.add_parser(
123
+ "mp",
124
+ help="Filter gene pairs by a specific MP term and its descendants",
125
+ formatter_class=argparse.RawTextHelpFormatter,
126
+ )
127
+
128
+ # --- Group A: MP include/exclude ---
129
+ group_mp_filter = mp_parser.add_mutually_exclusive_group(required=True)
130
+ group_mp_filter.add_argument(
131
+ "-i",
132
+ "--include",
133
+ dest="include",
134
+ metavar="MP_ID",
135
+ help=("Include gene pairs that share the specified MP term (descendants included).\nExample: -i MP:0001146"),
136
+ )
137
+ group_mp_filter.add_argument(
138
+ "-e",
139
+ "--exclude",
140
+ dest="exclude",
141
+ metavar="MP_ID",
142
+ help=(
143
+ "Exclude gene pairs that (when measured) lack the specified MP term "
144
+ "(descendants included).\n"
145
+ "Example: -e MP:0001146"
146
+ ),
147
+ )
148
+ # --- Group B: granularity (genewise / pairwise) ---
149
+ group_level = mp_parser.add_mutually_exclusive_group(required=False)
150
+ group_level.add_argument(
151
+ "-g", "--genewise", action="store_true", help="Filter by number of phenotypes per KO mouse"
152
+ )
153
+ group_level.add_argument(
154
+ "-p", "--pairwise", action="store_true", help="Filter by number of shared phenotypes between KO pairs"
155
+ )
156
+
157
+ mp_parser.add_argument(
158
+ "-m",
159
+ "--mp_obo",
160
+ type=str,
161
+ required=False,
162
+ help=(
163
+ "Path to Mammalian Phenotype ontology file (mp.obo).\n"
164
+ "Used to map and infer hierarchical relationships among MP terms.\n"
165
+ "If not available, download 'mp.obo' manually from:\n"
166
+ "https://obofoundry.org/ontology/mp.html"
167
+ ),
168
+ )
169
+
170
+ mp_parser.add_argument(
171
+ "-a",
172
+ "--genewise_annotations",
173
+ dest="path_genewise",
174
+ type=str,
175
+ required=False,
176
+ help=(
177
+ "Path to the 'genewise_phenotype_annotations' file (JSONL or JSONL.gz).\n"
178
+ "Required when using '-e/--exclude' to determine genes that were measured\n"
179
+ "and showed no phenotype for the target MP term.\n"
180
+ ),
181
+ )
182
+
183
+ mp_parser.add_argument(
184
+ "--in",
185
+ dest="path_pairwise",
186
+ type=str,
187
+ required=False,
188
+ help=(
189
+ "Path to 'pairwise_similarity_annotations' file (JSONL or JSONL.gz).\n"
190
+ "If omitted, data are read from STDIN.\n"
191
+ ),
192
+ )
193
+
194
+ # Annotations
195
+ mp_parser.add_argument(
196
+ "--life_stage",
197
+ type=str,
198
+ required=False,
199
+ help=("Filter by life stage. 'Embryo', 'Early', 'Interval', and 'Late'."),
200
+ )
201
+ mp_parser.add_argument(
202
+ "--sex",
203
+ type=str,
204
+ required=False,
205
+ help=("Filter by sexual dimorphism. 'Male' or 'Female'."),
206
+ )
207
+ mp_parser.add_argument(
208
+ "--zygosity", type=str, required=False, help=("Filter by zygosity. 'Homo', 'Hetero' or 'Hemi'.")
209
+ )
210
+
211
+ # =========================================================
212
+ # count (Filter by the number of phenotypes)
213
+ # =========================================================
214
+
215
+ count_parser = subparsers.add_parser(
216
+ "count",
217
+ help="Filter genes or gene pairs by the number of phenotypes",
218
+ description="Filter genes based on the number of detected phenotypes per KO or shared between KO pairs.",
219
+ )
220
+
221
+ group_count = count_parser.add_mutually_exclusive_group(required=True)
222
+ group_count.add_argument(
223
+ "-g", "--genewise", action="store_true", help="Filter by number of phenotypes per KO mouse"
224
+ )
225
+ group_count.add_argument(
226
+ "-p", "--pairwise", action="store_true", help="Filter by number of shared phenotypes between KO pairs"
227
+ )
228
+
229
+ count_parser.add_argument("--min", type=int, help="Minimum number threshold")
230
+ count_parser.add_argument("--max", type=int, help="Maximum number threshold")
231
+
232
+ count_parser.add_argument(
233
+ "--in",
234
+ dest="path_pairwise",
235
+ type=str,
236
+ required=False,
237
+ help=(
238
+ "Path to 'pairwise_similarity_annotations' file (JSONL or JSONL.gz).\n"
239
+ "If omitted, data are read from STDIN.\n"
240
+ ),
241
+ )
242
+
243
+ count_parser.add_argument(
244
+ "-a",
245
+ "--genewise_annotations",
246
+ dest="path_genewise",
247
+ type=str,
248
+ required=False,
249
+ help=(
250
+ "Path to the 'genewise_phenotype_annotations' file (JSONL or JSONL.gz).\n"
251
+ "Required when using '-g/--genewise' to determine genes that were measured.\n"
252
+ ),
253
+ )
254
+
255
+ # =========================================================
256
+ # score (Filter by the similarity score of gene pairs)
257
+ # =========================================================
258
+
259
+ score_parser = subparsers.add_parser(
260
+ "score",
261
+ help="Filter genes or gene pairs by the similarity score",
262
+ description="Filter genes based on the similarity score per KO or shared between KO pairs.",
263
+ )
264
+
265
+ score_parser.add_argument("--min", type=int, help="Minimum number threshold")
266
+ score_parser.add_argument("--max", type=int, help="Maximum number threshold")
267
+
268
+ score_parser.add_argument(
269
+ "--in",
270
+ dest="path_pairwise",
271
+ type=str,
272
+ required=False,
273
+ help=(
274
+ "Path to 'pairwise_similarity_annotations' file (JSONL or JSONL.gz).\n"
275
+ "If omitted, data are read from STDIN.\n"
276
+ ),
277
+ )
278
+
279
+ # =========================================================
280
+ # genes (Filter by gene symbols or gene pairs)
281
+ # =========================================================
282
+
283
+ genes_parser = subparsers.add_parser(
284
+ "genes",
285
+ help="Filter gene pairs by gene symbols or gene pairs of phenotype annotations",
286
+ formatter_class=argparse.RawTextHelpFormatter,
287
+ )
288
+
289
+ group_genes = genes_parser.add_mutually_exclusive_group(required=True)
290
+ group_genes.add_argument(
291
+ "-k",
292
+ "--keep",
293
+ metavar="GENE_SYMBOL",
294
+ help="Keep ONLY annotations with the specified gene symbols (comma-separated or path of text file)",
295
+ )
296
+ group_genes.add_argument(
297
+ "-d",
298
+ "--drop",
299
+ metavar="GENE_SYMBOL",
300
+ help="Drop annotations with the specified gene symbols (comma-separated or path of text file)",
301
+ )
302
+
303
+ group_level = genes_parser.add_mutually_exclusive_group(required=False)
304
+ group_level.add_argument("-g", "--genewise", action="store_true", help="Filter by user-provided gene symbols")
305
+ group_level.add_argument("-p", "--pairwise", action="store_true", help="Filter by user-provided gene pairs")
306
+
307
+ genes_parser.add_argument(
308
+ "--in",
309
+ dest="path_pairwise",
310
+ type=str,
311
+ required=False,
312
+ help=(
313
+ "Path to 'pairwise_similarity_annotations' file (JSONL or JSONL.gz).\n"
314
+ "If omitted, data are read from STDIN.\n"
315
+ ),
316
+ )
317
+
318
+ # =========================================================
319
+ # life-stage (Filter by life stage)
320
+ # =========================================================
321
+
322
+ LIFE_STAGES = ("Embryo", "Early", "Interval", "Late")
323
+
324
+ life_stage_parser = subparsers.add_parser(
325
+ "life-stage",
326
+ help="Filter gene pairs by life stage of phenotype annotations",
327
+ formatter_class=argparse.RawTextHelpFormatter,
328
+ )
329
+
330
+ group_life_stage = life_stage_parser.add_mutually_exclusive_group(required=True)
331
+ group_life_stage.add_argument(
332
+ "-k",
333
+ "--keep",
334
+ choices=LIFE_STAGES,
335
+ metavar="LIFE_STAGE",
336
+ help="Keep ONLY annotations with the specified life stage",
337
+ )
338
+ group_life_stage.add_argument(
339
+ "-d",
340
+ "--drop",
341
+ choices=LIFE_STAGES,
342
+ metavar="LIFE_STAGE",
343
+ help="Drop annotations with the specified life stage",
344
+ )
345
+
346
+ life_stage_parser.add_argument(
347
+ "--in",
348
+ dest="path_pairwise",
349
+ type=str,
350
+ required=False,
351
+ help=(
352
+ "Path to 'pairwise_similarity_annotations' file (JSONL or JSONL.gz).\n"
353
+ "If omitted, data are read from STDIN.\n"
354
+ ),
355
+ )
356
+
357
+ # =========================================================
358
+ # sex (Filter by sexual dimorphism)
359
+ # =========================================================
360
+
361
+ SEXES = ("Male", "Female", "None")
362
+
363
+ sex_parser = subparsers.add_parser(
364
+ "sex",
365
+ help="Filter gene pairs by sexual dimorphism of phenotype annotations",
366
+ formatter_class=argparse.RawTextHelpFormatter,
367
+ )
368
+
369
+ group_sex = sex_parser.add_mutually_exclusive_group(required=True)
370
+ group_sex.add_argument(
371
+ "-k",
372
+ "--keep",
373
+ choices=SEXES,
374
+ metavar="SEX",
375
+ help="Keep ONLY annotations with the specified sexual dimorphism",
376
+ )
377
+ group_sex.add_argument(
378
+ "-d",
379
+ "--drop",
380
+ choices=SEXES,
381
+ metavar="SEX",
382
+ help="Drop annotations with the specified sexual dimorphism",
383
+ )
384
+
385
+ sex_parser.add_argument(
386
+ "--in",
387
+ dest="path_pairwise",
388
+ type=str,
389
+ required=False,
390
+ help=(
391
+ "Path to 'pairwise_similarity_annotations' file (JSONL or JSONL.gz).\n"
392
+ "If omitted, data are read from STDIN.\n"
393
+ ),
394
+ )
395
+
396
+ # =========================================================
397
+ # zygosity (Filter by zygosity)
398
+ # =========================================================
399
+
400
+ ZYGOSITIES = ("Homo", "Hetero", "Hemi")
401
+
402
+ zygosity_parser = subparsers.add_parser(
403
+ "zygosity",
404
+ help="Filter gene pairs by zygosity of phenotype annotations",
405
+ formatter_class=argparse.RawTextHelpFormatter,
406
+ )
407
+
408
+ group_zygosity = zygosity_parser.add_mutually_exclusive_group(required=True)
409
+ group_zygosity.add_argument(
410
+ "-k",
411
+ "--keep",
412
+ choices=ZYGOSITIES,
413
+ metavar="ZYGOSITY",
414
+ help="Keep ONLY annotations with the specified zygosity",
415
+ )
416
+ group_zygosity.add_argument(
417
+ "-d",
418
+ "--drop",
419
+ choices=ZYGOSITIES,
420
+ metavar="ZYGOSITY",
421
+ help="Drop annotations with the specified zygosity",
422
+ )
423
+
424
+ zygosity_parser.add_argument(
425
+ "--in",
426
+ dest="path_pairwise",
427
+ type=str,
428
+ required=False,
429
+ help=(
430
+ "Path to 'pairwise_similarity_annotations' file (JSONL or JSONL.gz).\n"
431
+ "If omitted, data are read from STDIN.\n"
432
+ ),
433
+ )
434
+
435
+ # =========================================================
436
+ # build-graphml
437
+ # =========================================================
438
+
439
+ build_graphml_parser = subparsers.add_parser(
440
+ "build-graphml",
441
+ help="Build a GraphML file from gene pair similarity annotations",
442
+ formatter_class=argparse.RawTextHelpFormatter,
443
+ )
444
+
445
+ build_graphml_parser.add_argument(
446
+ "--in",
447
+ dest="path_pairwise",
448
+ type=str,
449
+ required=False,
450
+ help=(
451
+ "Path to 'pairwise_similarity_annotations' file (JSONL or JSONL.gz).\n"
452
+ "If omitted, data are read from STDIN.\n"
453
+ ),
454
+ )
455
+
456
+ build_graphml_parser.add_argument(
457
+ "-a",
458
+ "--genewise_annotations",
459
+ dest="path_genewise",
460
+ type=str,
461
+ required=True,
462
+ help=("Path to the 'genewise_phenotype_annotations' file (JSONL or JSONL.gz).\n"),
463
+ )
464
+
465
+ # =========================================================
466
+ # build-webapp
467
+ # =========================================================
468
+
469
+ build_webapp_parser = subparsers.add_parser(
470
+ "build-webapp",
471
+ help="Build a webapp from gene pair similarity annotations",
472
+ formatter_class=argparse.RawTextHelpFormatter,
473
+ )
474
+
475
+ build_webapp_parser.add_argument(
476
+ "--in",
477
+ dest="path_pairwise",
478
+ type=str,
479
+ required=False,
480
+ help=(
481
+ "Path to 'pairwise_similarity_annotations' file (JSONL or JSONL.gz).\n"
482
+ "If omitted, data are read from STDIN.\n"
483
+ ),
484
+ )
485
+
486
+ build_webapp_parser.add_argument(
487
+ "-a",
488
+ "--genewise_annotations",
489
+ dest="path_genewise",
490
+ type=str,
491
+ required=True,
492
+ help=("Path to the 'genewise_phenotype_annotations' file (JSONL or JSONL.gz).\n"),
493
+ )
494
+
495
+ build_webapp_parser.add_argument(
496
+ "-o",
497
+ "--out",
498
+ dest="output_dir",
499
+ type=str,
500
+ required=True,
501
+ )
502
+ #######################################################
503
+ # Return parser
504
+ #######################################################
505
+ return parser
506
+
507
+
508
+ ###############################################################################
509
+ # main
510
+ ###############################################################################
511
+
512
+
513
+ def parse_args(argv=None):
514
+ parser = build_parser()
515
+ args = parser.parse_args(argv)
516
+
517
+ ########################################################################
518
+ # run
519
+ ########################################################################
520
+ if args.cmd == "run":
521
+ # If args.mp_obo or args.impc_phendigm are not provided,
522
+ # use the built-in files inside the TSUMUGI/data directory.
523
+ if not args.mp_obo:
524
+ args.mp_obo = str(files("TSUMUGI") / "data" / "mp.obo")
525
+
526
+ if not args.impc_phenodigm:
527
+ args.impc_phenodigm = str(files("TSUMUGI") / "data" / "impc_phenodigm.csv")
528
+
529
+ ########################################################################
530
+ # mp
531
+ ########################################################################
532
+ if args.cmd == "mp":
533
+ # If args.mp_obo is not provided,
534
+ # use the built-in files inside the TSUMUGI/data directory.
535
+ if not args.mp_obo:
536
+ args.mp_obo = str(files("TSUMUGI") / "data" / "mp.obo")
537
+
538
+ if args.exclude and not args.path_genewise:
539
+ parser.error(
540
+ "mp: '-a/--path_genewise' is required when using '-e/--exclude'.\n"
541
+ "Path to the 'genewise_phenotype_annotations' file (JSONL or JSONL.gz).\n"
542
+ )
543
+
544
+ # Default to pairwise if neither -g / --genewise nor -p / --pairwise is specified.
545
+ if not args.genewise and not args.pairwise:
546
+ args.pairwise = True
547
+ else:
548
+ args.pairwise = False
549
+
550
+ ########################################################################
551
+ # count / score
552
+ ########################################################################
553
+ # When using the count/score subcommand, at least one of --min or --max must be specified.
554
+ if args.cmd == "count" and args.min is None and args.max is None:
555
+ parser.error("count: At least one of '--min' or '--max' must be specified.")
556
+
557
+ if args.cmd == "score" and args.min is None and args.max is None:
558
+ parser.error("score: At least one of '--min' or '--max' must be specified.")
559
+
560
+ # When using -g / --genewise with the count subcommand,
561
+ # the --genewise_annotations option is required.
562
+ if args.cmd == "count" and args.genewise and not args.path_genewise:
563
+ parser.error(
564
+ "count: '-a/--genewise_annotations' is required when using '-g/--genewise'.\n"
565
+ "Provide the gene phenotype annotations JSONL(.gz) file to identify genes that were measured."
566
+ )
567
+
568
+ ########################################################################
569
+ # genes
570
+ ########################################################################
571
+ if args.cmd == "genes":
572
+ path_arg = args.keep or args.drop
573
+
574
+ # Default to pairwise if neither -g / --genewise nor -p / --pairwise is specified.
575
+ if not args.genewise and not args.pairwise:
576
+ args.pairwise = True
577
+ elif args.genewise:
578
+ args.pairwise = False
579
+ else:
580
+ args.genewise = False
581
+
582
+ # In pairwise mode, the gene list must be provided as a text file.
583
+ if args.pairwise and not Path(path_arg).is_file():
584
+ parser.error(
585
+ "genes --pairwise: Please provide a valid path to a text file containing gene symbols or gene pairs."
586
+ )
587
+
588
+ ########################################################################
589
+ # build-webapp
590
+ ########################################################################
591
+ # For build-webapp, check that output_dir is a directory (not a file)
592
+ if args.cmd == "build-webapp" and Path(args.output_dir).suffix:
593
+ parser.error(
594
+ f"build-webapp: {args.output_dir} looks like a file name (has extension). Please specify a directory."
595
+ )
596
+
597
+ args.version = _get_version()
598
+
599
+ return args