TSUMUGI 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. TSUMUGI/annotator.py +103 -0
  2. TSUMUGI/argparser.py +599 -0
  3. TSUMUGI/core.py +185 -0
  4. TSUMUGI/data/impc_phenodigm.csv +3406 -0
  5. TSUMUGI/data/mp.obo +143993 -0
  6. TSUMUGI/filterer.py +36 -0
  7. TSUMUGI/formatter.py +122 -0
  8. TSUMUGI/genewise_annotation_builder.py +94 -0
  9. TSUMUGI/io_handler.py +189 -0
  10. TSUMUGI/main.py +300 -0
  11. TSUMUGI/network_constructor.py +603 -0
  12. TSUMUGI/ontology_handler.py +62 -0
  13. TSUMUGI/pairwise_similarity_builder.py +66 -0
  14. TSUMUGI/report_generator.py +122 -0
  15. TSUMUGI/similarity_calculator.py +498 -0
  16. TSUMUGI/subcommands/count_filterer.py +47 -0
  17. TSUMUGI/subcommands/genes_filterer.py +89 -0
  18. TSUMUGI/subcommands/graphml_builder.py +158 -0
  19. TSUMUGI/subcommands/life_stage_filterer.py +48 -0
  20. TSUMUGI/subcommands/mp_filterer.py +142 -0
  21. TSUMUGI/subcommands/score_filterer.py +22 -0
  22. TSUMUGI/subcommands/sex_filterer.py +48 -0
  23. TSUMUGI/subcommands/webapp_builder.py +358 -0
  24. TSUMUGI/subcommands/zygosity_filterer.py +48 -0
  25. TSUMUGI/validator.py +65 -0
  26. TSUMUGI/web/app/css/app.css +1129 -0
  27. TSUMUGI/web/app/genelist/network_genelist.html +339 -0
  28. TSUMUGI/web/app/genelist/network_genelist.js +421 -0
  29. TSUMUGI/web/app/js/data/dataLoader.js +41 -0
  30. TSUMUGI/web/app/js/export/graphExporter.js +214 -0
  31. TSUMUGI/web/app/js/graph/centrality.js +495 -0
  32. TSUMUGI/web/app/js/graph/components.js +30 -0
  33. TSUMUGI/web/app/js/graph/filters.js +158 -0
  34. TSUMUGI/web/app/js/graph/highlighter.js +52 -0
  35. TSUMUGI/web/app/js/graph/layoutController.js +454 -0
  36. TSUMUGI/web/app/js/graph/valueScaler.js +43 -0
  37. TSUMUGI/web/app/js/search/geneSearcher.js +93 -0
  38. TSUMUGI/web/app/js/search/phenotypeSearcher.js +292 -0
  39. TSUMUGI/web/app/js/ui/dynamicFontSize.js +30 -0
  40. TSUMUGI/web/app/js/ui/mobilePanel.js +77 -0
  41. TSUMUGI/web/app/js/ui/slider.js +22 -0
  42. TSUMUGI/web/app/js/ui/tooltips.js +514 -0
  43. TSUMUGI/web/app/js/viewer/pageSetup.js +217 -0
  44. TSUMUGI/web/app/viewer.html +515 -0
  45. TSUMUGI/web/app/viewer.js +1593 -0
  46. TSUMUGI/web/css/sanitize.css +363 -0
  47. TSUMUGI/web/css/top.css +391 -0
  48. TSUMUGI/web/image/tsumugi-favicon.ico +0 -0
  49. TSUMUGI/web/image/tsumugi-icon.png +0 -0
  50. TSUMUGI/web/image/tsumugi-logo.png +0 -0
  51. TSUMUGI/web/image/tsumugi-logo.svg +69 -0
  52. TSUMUGI/web/js/genelist_formatter.js +123 -0
  53. TSUMUGI/web/js/top.js +338 -0
  54. TSUMUGI/web/open_webapp_linux.sh +25 -0
  55. TSUMUGI/web/open_webapp_mac.command +25 -0
  56. TSUMUGI/web/open_webapp_windows.bat +37 -0
  57. TSUMUGI/web/serve_index.py +110 -0
  58. TSUMUGI/web/template/template_index.html +197 -0
  59. TSUMUGI/web_deployer.py +150 -0
  60. tsumugi-1.0.1.dist-info/METADATA +504 -0
  61. tsumugi-1.0.1.dist-info/RECORD +64 -0
  62. tsumugi-1.0.1.dist-info/WHEEL +4 -0
  63. tsumugi-1.0.1.dist-info/entry_points.txt +3 -0
  64. tsumugi-1.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,504 @@
1
+ Metadata-Version: 2.4
2
+ Name: TSUMUGI
3
+ Version: 1.0.1
4
+ Summary: TSUMUGI: Phenotype-Driven Gene Network Identifier
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Author: Akihiro Kuno
8
+ Author-email: akuno@md.tsukuba.ac.jp
9
+ Requires-Python: >=3.10
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
19
+ Requires-Dist: networkx (>=3.3)
20
+ Requires-Dist: numpy (>=1.21.0)
21
+ Requires-Dist: tqdm (>=4.64.0)
22
+ Project-URL: Homepage, https://github.com/akikuno/TSUMUGI
23
+ Description-Content-Type: text/markdown
24
+
25
+ <p align="center">
26
+ <a href="https://larc-tsukuba.github.io/tsumugi/" target="_blank">
27
+ <img src="https://raw.githubusercontent.com/akikuno/TSUMUGI-dev/refs/heads/main/image/tsumugi-logo.jpg" alt="Tsumugi Logo" width="80%">
28
+ </a>
29
+ </p>
30
+
31
+ [![License](https://img.shields.io/badge/License-MIT-9cf.svg)](https://choosealicense.com/licenses/mit/)
32
+ [![Test](https://img.shields.io/github/actions/workflow/status/akikuno/tsumugi-dev/pytest.yml?branch=main&label=Test&color=brightgreen)](https://github.com/akikuno/tsumugi-dev/actions)
33
+ [![PyPI](https://img.shields.io/pypi/v/tsumugi.svg?label=PyPI&color=orange)](https://pypi.org/project/tsumugi/)
34
+ [![Bioconda](https://img.shields.io/conda/v/bioconda/tsumugi?label=Bioconda&color=orange)](https://anaconda.org/bioconda/tsumugi)
35
+ [![DOI](https://zenodo.org/badge/441025227.svg)](https://doi.org/10.5281/zenodo.14957711)
36
+ [![Contact](https://img.shields.io/badge/Contact-923DE2)](https://forms.gle/ME8EJZZHaRNgKZ979)
37
+
38
+ Translations: [日本語](https://github.com/akikuno/TSUMUGI-dev/blob/main/doc/README_JP.md) | [한국어](https://github.com/akikuno/TSUMUGI-dev/blob/main/doc/README_KR.md) | [简体中文](https://github.com/akikuno/TSUMUGI-dev/blob/main/doc/README_ZH_CN.md) | [繁體中文](https://github.com/akikuno/TSUMUGI-dev/blob/main/doc/README_ZH_TW.md) | [हिन्दी](https://github.com/akikuno/TSUMUGI-dev/blob/main/doc/README_HI.md) | [Bahasa Indonesia](https://github.com/akikuno/TSUMUGI-dev/blob/main/doc/README_ID.md) | [Tiếng Việt](https://github.com/akikuno/TSUMUGI-dev/blob/main/doc/README_VN.md) | [Español](https://github.com/akikuno/TSUMUGI-dev/blob/main/doc/README_ES.md) | [Français](https://github.com/akikuno/TSUMUGI-dev/blob/main/doc/README_FR.md) | [Deutsch](https://github.com/akikuno/TSUMUGI-dev/blob/main/doc/README_DE.md) | [Português](https://github.com/akikuno/TSUMUGI-dev/blob/main/doc/README_PT.md)
39
+
40
+ **TSUMUGI (Trait-driven Surveillance for Mutation-based Gene module Identification)** is a web tool that uses knockout (KO) mouse phenotype data from the [International Mouse Phenotyping Consortium (IMPC)](https://www.mousephenotype.org/) to **extract and visualize gene modules based on phenotypic similarity**.
41
+
42
+ **TSUMUGI (紡ぎ)** comes from the idea of “weaving together gene groups that form phenotypes.”
43
+
44
+ This web app is available to everyone online👇️
45
+
46
+ 🔗https://larc-tsukuba.github.io/tsumugi/
47
+
48
+ # 📖 How to Use TSUMUGI
49
+
50
+ TSUMUGI supports three kinds of input.
51
+
52
+ ### Phenotype
53
+ Enter a phenotype of interest to search for **genes whose KO mice have similar overall phenotype profiles**.
54
+ Phenotype names follow [Mammalian Phenotype Ontology (MPO)](https://www.informatics.jax.org/vocab/mp_ontology).
55
+
56
+ 👉 [Phenotype list](https://github.com/larc-tsukuba/tsumugi/blob/main/data/available_mp_terms.txt)
57
+
58
+ ### Gene
59
+ Specify one gene to search for **other genes whose KO mice show similar phenotypes**.
60
+ Gene symbols follow [MGI](http://www.informatics.jax.org/).
61
+
62
+ 👉 [Gene list](https://github.com/larc-tsukuba/tsumugi/blob/main/data/available_gene_symbols.txt)
63
+
64
+ ### Gene List
65
+ Paste multiple genes (one per line). This extracts phenotypically similar genes **among the genes in the list**.
66
+
67
+ > [!CAUTION]
68
+ > If no similar genes are found: `No similar phenotypes were found among the entered genes.`
69
+ > If more than 200 similar genes are found: `Too many genes submitted. Please limit the number to 200 or fewer.`
70
+
71
+ ### 📥 Download data
72
+
73
+ TSUMUGI reports gzipped JSONL files.
74
+
75
+ #### `genewise_phenotype_annotations.jsonl.gz`
76
+ - Gene symbol (e.g., "1110059G10Rik")
77
+ - Marker accession ID (e.g., "MGI:1913452")
78
+ - Phenotype term name/ID (e.g., "fused joints", "MP:0000137")
79
+ - Effect size (e.g., 0.0, 1.324)
80
+ - Significance flag (true/false)
81
+ - Zygosity ("Homo", "Hetero", "Hemi")
82
+ - Life stage ("Embryo", "Early", "Interval", "Late")
83
+ - Sexual dimorphism ("None", "Male", "Female")
84
+ - Disease annotation (e.g., [] or "Premature Ovarian Failure 18")
85
+
86
+ Example:
87
+ ```
88
+ {"life_stage": "Early", "marker_symbol": "1110059G10Rik", "marker_accession_id": "MGI:1913452", "effect_size": 0.0, "mp_term_name": "fused joints", "disease_annotation": [], "significant": false, "zygosity": "Homo", "sexual_dimorphism": "None", "mp_term_id": "MP:0000137"}
89
+ ```
90
+
91
+ #### `pairwise_similarity_annotations.jsonl.gz`
92
+ - Gene pair (`gene1_symbol`, `gene2_symbol`)
93
+ - `phenotype_shared_annotations` (per-phenotype metadata: life stage, zygosity, sexual dimorphism)
94
+ - `phenotype_similarity_score` (Phenodigm score, 0–100)
95
+
96
+ Example:
97
+ ```
98
+ {"gene1_symbol": "1110059G10Rik", "gene2_symbol": "Cog6", "phenotype_shared_annotations": {"vertebral transformation": {"zygosity": "Homo", "life_stage": "Early", "sexual_dimorphism": "Male"}}, "phenotype_similarity_score": 42}
99
+ ```
100
+
101
+ # 🌐 Network
102
+
103
+ The page transitions and draws the network automatically.
104
+
105
+ > [!IMPORTANT]
106
+ > Gene pairs with **3 or more shared abnormal phenotypes** and **phenotypic similarity > 0.0** are visualized.
107
+
108
+ ### Network panel
109
+ **Nodes** represent genes. Click to see the list of abnormal phenotypes observed in that KO mouse; drag to rearrange positions.
110
+ **Edges** show shared phenotypes; click to view details.
111
+ **Modules** outline subnetworks of genes. Click a module to list phenotypes involving its member genes; drag modules to reposition them and avoid overlap.
112
+
113
+ ### Control panel
114
+ Adjust network display from the left panel.
115
+
116
+ #### Filter by phenotypic similarity
117
+ `Phenotypes similarity` slider thresholds edges by Resnik→Phenodigm score.
118
+
119
+ > [!NOTE]
120
+ > For how we compute similarity, see: 👉 [🔍 How We Calculate Phenotypically Similar Genes](#-how-we-calculate-phenotypically-similar-genes)
121
+
122
+ #### Filter by phenotype severity
123
+ `Phenotype severity` slider filters nodes by effect size (severity in KO mice). Higher values mean stronger impact.
124
+
125
+ > [!NOTE]
126
+ > Hidden for binary phenotypes (e.g., [abnormal embryo development](https://larc-tsukuba.github.io/tsumugi/app/phenotype/abnormal_embryo_development.html); binary list: 👉 [here](https://github.com/larc-tsukuba/tsumugi/blob/main/data/binary_phenotypes.txt)) or gene(s) input.
127
+
128
+ #### Specify genotype
129
+ Choose the genotype in which phenotypes appear:
130
+ - `Homo`: homozygous
131
+ - `Hetero`: heterozygous
132
+ - `Hemi`: hemizygous
133
+
134
+ #### Specify sex
135
+ Extract sex-specific phenotypes:
136
+ - `Female`
137
+ - `Male`
138
+
139
+ #### Specify life stage
140
+ Filter by life stage in which phenotypes appear:
141
+ - `Embryo`
142
+ - `Early` (0–16 weeks)
143
+ - `Interval` (17–48 weeks)
144
+ - `Late` (49+ weeks)
145
+
146
+ ### Markup panel
147
+
148
+ #### Highlight: Human Disease
149
+ Highlight genes linked to human disease (IMPC Disease Models Portal data).
150
+
151
+ #### Search: Specific Gene
152
+ Search gene names within the network.
153
+
154
+ #### Layout & Display
155
+ Adjust layout, font size, edge width, and node repulsion (Cose layout).
156
+
157
+ #### Export
158
+ Export the current network as PNG/CSV/GraphML.
159
+ CSV includes connected-component (module) IDs and phenotype lists per gene; GraphML is Cytoscape-compatible.
160
+
161
+ # 🛠 Command-Line Interface (CLI)
162
+
163
+ The TSUMUGI CLI allows you to use the latest IMPC data downloaded locally, and provides more fine-grained filtering and output options than the web tool.
164
+
165
+ ## Features
166
+
167
+ - Recompute with IMPC `statistical-results-ALL.csv.gz` (optionally `mp.obo`, `impc_phenodigm.csv`).
168
+ - Filter by presence/absence of MP terms.
169
+ - Filter by gene list (comma-separated or text file).
170
+ - Outputs: GraphML (`tsumugi build-graphml`), offline webapp bundle (`tsumugi build-webapp`).
171
+
172
+ ## Installation
173
+
174
+ BioConda:
175
+ ```bash
176
+ conda install -c conda-forge -c bioconda tsumugi
177
+ ```
178
+
179
+ PyPI:
180
+ ```bash
181
+ pip install tsumugi
182
+ ```
183
+
184
+ You are ready if `tsumugi --version` prints the version.
185
+
186
+ ## Available commands
187
+
188
+ - `tsumugi run`: Recompute the network from IMPC data
189
+ - `tsumugi mp --include/--exclude (--pairwise/--genewise)`: Filter gene pairs or genes that contain / do not show an MP term
190
+ - `tsumugi count --pairwise/--genewise (--min/--max)`: Filter by phenotype counts (pairwise or per gene)
191
+ - `tsumugi score (--min/--max)`: Filter by phenotype similarity score (pairwise)
192
+ - `tsumugi genes --keep/--drop`: Keep/drop by gene list (comma-separated or text file)
193
+ - `tsumugi life-stage --keep/--drop`: Filter by life stage (Embryo/Early/Interval/Late)
194
+ - `tsumugi sex --keep/--drop`: Filter by sex (Male/Female/None)
195
+ - `tsumugi zygosity --keep/--drop`: Filter by zygosity (Homo/Hetero/Hemi)
196
+ - `tsumugi build-graphml`: Generate GraphML (Cytoscape, etc.)
197
+ - `tsumugi build-webapp`: Generate TSUMUGI webapp assets (local HTML/CSS/JS)
198
+
199
+ > [!NOTE]
200
+ > All filtering subcommands stream JSONL to STDOUT.
201
+ > Redirect with `>` if you want to save results to a file.
202
+
203
+
204
+ ## Usage
205
+
206
+ ### Recompute from IMPC data (`tsumugi run`)
207
+ If `--mp_obo` is omitted, TSUMUGI uses the bundled `data-version: releases/2025-08-27/mp.obo`.
208
+ If `--impc_phenodigm` is omitted, it uses the file fetched on 2025-10-01 from the [IMPC Disease Models Portal](https://diseasemodels.research.its.qmul.ac.uk/).
209
+ ```bash
210
+ tsumugi run \
211
+ --output_dir ./tsumugi-output \
212
+ --statistical_results ./statistical-results-ALL.csv.gz \
213
+ --threads 8
214
+ ```
215
+ Outputs: `./tsumugi-output` contains genewise annotations (genewise_phenotype_annotations.jsonl.gz), pairwise similarity data (pairwise_similarity_annotations.jsonl.gz), and visualization assets (`TSUMUGI-webapp`).
216
+
217
+ > [!IMPORTANT]
218
+ > The `TSUMUGI-webapp` directory includes OS-specific launch scripts; double-click to open the local web app:
219
+ > - Windows: `open_webapp_windows.bat`
220
+ > - macOS: `open_webapp_mac.command`
221
+ > - Linux: `open_webapp_linux.sh`
222
+
223
+ ### Filter by MP term (`tsumugi mp --include/--exclude`)
224
+ Extract gene pairs (or genes) that include phenotypes of interest, or pairs whose relevant phenotypes were measured but did not show significant abnormalities.
225
+
226
+ ```txt
227
+ tsumugi mp [-h] (-i MP_ID | -e MP_ID) [-g | -p] [-m MP_OBO] [-a GENEWISE_ANNOTATIONS] [--in IN] [--life_stage LIFE_STAGE] [--sex SEX] [--zygosity ZYGOSITY]
228
+ ```
229
+
230
+ #### `-i MP_ID`, `--include MP_ID`
231
+ Include genes/gene pairs that have the specified MP term (descendants included).
232
+
233
+ #### `-e MP_ID`, `--exclude MP_ID`
234
+ Return genes/gene pairs that were measured for the specified MP term (descendants included) and did **not** show a significant phenotype. Requires `-a/--genewise_annotations`.
235
+
236
+ #### `-g`, `--genewise`
237
+ Filter at gene level. Reads `genewise_phenotype_annotations.jsonl(.gz)`. When using `--genewise`, specify `-a/--genewise_annotations`.
238
+
239
+ #### `-p`, `--pairwise`
240
+ Filter at gene-pair level. Targets `pairwise_similarity_annotations.jsonl(.gz)`. If `--in` is omitted, reads from STDIN.
241
+
242
+ #### `-m MP_OBO`, `--mp_obo MP_OBO`
243
+ Path to Mammalian Phenotype ontology (mp.obo). If omitted, uses the bundled `data/mp.obo`.
244
+
245
+ #### `-a GENEWISE_ANNOTATIONS`, `--genewise_annotations GENEWISE_ANNOTATIONS`
246
+ Path to the genewise annotation file (JSONL/.gz). Required for `--exclude`; also specify when using `--genewise`.
247
+
248
+ #### `--in IN`
249
+ Path to the pairwise annotation file (JSONL/.gz). If omitted, reads from STDIN.
250
+
251
+ #### `--life_stage LIFE_STAGE`
252
+ Additional filter by life stage. Available values: `Embryo`, `Early`, `Interval`, `Late`.
253
+
254
+ #### `--sex SEX`
255
+ Additional filter by sexual dimorphism. Use the values present in annotations (e.g., `Male`, `Female`, `None`).
256
+
257
+ #### `--zygosity ZYGOSITY`
258
+ Additional filter by zygosity. Available values: `Homo`, `Hetero`, `Hemi`.
259
+
260
+ ```bash
261
+ # Extract only gene pairs that include MP:0001146 (abnormal testis morphology) or descendant terms (e.g., MP:0004849 abnormal testis size)
262
+ tsumugi mp --include MP:0001146 \
263
+ --in pairwise_similarity_annotations.jsonl.gz \
264
+ > pairwise_filtered.jsonl
265
+
266
+ # Extract gene pairs whose measured genes include MP:0001146 and descendant terms and did not show a significant abnormality
267
+ tsumugi mp --exclude MP:0001146 \
268
+ --genewise genewise_phenotype_annotations.jsonl.gz \
269
+ --in pairwise_similarity_annotations.jsonl.gz \
270
+ > pairwise_filtered.jsonl
271
+
272
+ # Extract significant gene-level annotations containing MP:0001146 (descendants included)
273
+ tsumugi mp --include MP:0001146 \
274
+ --genewise \
275
+ --genewise_annotations genewise_phenotype_annotations.jsonl.gz \
276
+ > genewise_filtered.jsonl
277
+
278
+ # Extract genes measured for MP:0001146 (descendants included) that did not show a significant abnormality
279
+ tsumugi mp --exclude MP:0001146 \
280
+ --genewise \
281
+ --genewise_annotations genewise_phenotype_annotations.jsonl.gz \
282
+ > genewise_no_phenotype.jsonl
283
+ ```
284
+
285
+ > [!IMPORTANT]
286
+ > **Descendant MP terms of the specified ID are also handled.**
287
+ > For example, if you specify `MP:0001146 (abnormal testis morphology)`, descendant terms such as `MP:0004849 (abnormal testis size)` are considered as well.
288
+
289
+ ### Filter by phenotype counts (`tsumugi count`)
290
+ ```txt
291
+ tsumugi count [-h] (-g | -p) [--min MIN] [--max MAX] [--in IN] [-a GENEWISE_ANNOTATIONS]
292
+ ```
293
+
294
+ Filter genes or gene pairs by the number of phenotypes. At least one of `--min` or `--max` is required.
295
+
296
+ #### `-g`, `--genewise`
297
+ Filter by the number of significant phenotypes per gene. Requires `-a/--genewise_annotations` with `genewise_phenotype_annotations.jsonl(.gz)`.
298
+
299
+ #### `-p`, `--pairwise`
300
+ Filter by the number of shared phenotypes per gene pair. If `--in` is omitted, reads `pairwise_similarity_annotations.jsonl(.gz)` from STDIN.
301
+
302
+ #### `--min MIN`, `--max MAX`
303
+ Lower/upper bounds for phenotype counts. Use either flag alone for one-sided filtering.
304
+
305
+ #### `--in IN`
306
+ Path to the pairwise annotation file (JSONL/.gz). If omitted, reads from STDIN.
307
+
308
+ #### `-a GENEWISE_ANNOTATIONS`, `--genewise_annotations GENEWISE_ANNOTATIONS`
309
+ Path to the genewise annotation file (JSONL/.gz). Required with `--genewise`.
310
+
311
+ - Shared phenotypes per pair:
312
+ ```bash
313
+ tsumugi count --pairwise --min 3 --max 20 \
314
+ --in pairwise_similarity_annotations.jsonl.gz \
315
+ > pairwise_min3_max20.jsonl
316
+ ```
317
+ - Phenotypes per gene (genewise required):
318
+ ```bash
319
+ tsumugi count --genewise --min 5 --max 50 \
320
+ --genewise genewise_phenotype_annotations.jsonl.gz \
321
+ --in pairwise_similarity_annotations.jsonl.gz \
322
+ > genewise_min5_max50.jsonl
323
+ ```
324
+ `--min` or `--max` alone is fine.
325
+
326
+ ### Filter by similarity score (`tsumugi score`)
327
+ ```txt
328
+ tsumugi score [-h] [--min MIN] [--max MAX] [--in IN]
329
+ ```
330
+
331
+ Filter gene pairs by `phenotype_similarity_score` (0–100). At least one of `--min` or `--max` is required.
332
+
333
+ #### `--min MIN`, `--max MAX`
334
+ Lower/upper bounds for phenotype similarity score. Use either flag alone for one-sided filtering.
335
+
336
+ #### `--in IN`
337
+ Path to the pairwise annotation file (JSONL/.gz). If omitted, reads from STDIN.
338
+
339
+ ```bash
340
+ tsumugi score --min 50 --max 80 \
341
+ --in pairwise_similarity_annotations.jsonl.gz \
342
+ > pairwise_score50_80.jsonl
343
+ ```
344
+
345
+ `--min` or `--max` alone is fine.
346
+
347
+ ### Filter by gene list (`tsumugi genes --keep/--drop`)
348
+ ```txt
349
+ tsumugi genes [-h] (-k GENE_SYMBOL | -d GENE_SYMBOL) [--in IN]
350
+ ```
351
+
352
+ #### `-k GENE_SYMBOL`, `--keep GENE_SYMBOL`
353
+ Keep only pairs containing specified genes (comma-separated list or text file).
354
+
355
+ #### `-d GENE_SYMBOL`, `--drop GENE_SYMBOL`
356
+ Drop pairs containing specified genes (comma-separated list or text file).
357
+
358
+ #### `--in IN`
359
+ Path to the pairwise annotation file (JSONL/.gz). If omitted, reads from STDIN.
360
+
361
+ ```bash
362
+ tsumugi genes --keep genes.txt \
363
+ --in pairwise_similarity_annotations.jsonl.gz \
364
+ > pairwise_keep_genes.jsonl
365
+
366
+ tsumugi genes --drop geneA,geneB \
367
+ --in pairwise_similarity_annotations.jsonl.gz \
368
+ > pairwise_drop_genes.jsonl
369
+ ```
370
+
371
+ ### Filter by life stage (`tsumugi life-stage --keep/--drop`)
372
+ ```txt
373
+ tsumugi life-stage [-h] (-k LIFE_STAGE | -d LIFE_STAGE) [--in IN]
374
+ ```
375
+
376
+ #### `-k LIFE_STAGE`, `--keep LIFE_STAGE`
377
+ Keep only annotations with the specified life stage (`Embryo`, `Early`, `Interval`, `Late`).
378
+
379
+ #### `-d LIFE_STAGE`, `--drop LIFE_STAGE`
380
+ Drop annotations with the specified life stage.
381
+
382
+ #### `--in IN`
383
+ Path to the pairwise annotation file (JSONL/.gz). If omitted, reads from STDIN.
384
+
385
+ ```bash
386
+ tsumugi life-stage --keep Early \
387
+ --in pairwise_similarity_annotations.jsonl.gz \
388
+ > pairwise_lifestage_early.jsonl
389
+ ```
390
+
391
+ ### Filter by sex (`tsumugi sex --keep/--drop`)
392
+ ```txt
393
+ tsumugi sex [-h] (-k SEX | -d SEX) [--in IN]
394
+ ```
395
+
396
+ #### `-k SEX`, `--keep SEX`
397
+ Keep only annotations with the specified sexual dimorphism (`Male`, `Female`, `None`).
398
+
399
+ #### `-d SEX`, `--drop SEX`
400
+ Drop annotations with the specified sexual dimorphism.
401
+
402
+ #### `--in IN`
403
+ Path to the pairwise annotation file (JSONL/.gz). If omitted, reads from STDIN.
404
+
405
+ ```bash
406
+ tsumugi sex --drop Male \
407
+ --in pairwise_similarity_annotations.jsonl.gz \
408
+ > pairwise_no_male.jsonl
409
+ ```
410
+
411
+ ### Filter by zygosity (`tsumugi zygosity --keep/--drop`)
412
+ ```txt
413
+ tsumugi zygosity [-h] (-k ZYGOSITY | -d ZYGOSITY) [--in IN]
414
+ ```
415
+
416
+ #### `-k ZYGOSITY`, `--keep ZYGOSITY`
417
+ Keep only annotations with the specified zygosity (`Homo`, `Hetero`, `Hemi`).
418
+
419
+ #### `-d ZYGOSITY`, `--drop ZYGOSITY`
420
+ Drop annotations with the specified zygosity.
421
+
422
+ #### `--in IN`
423
+ Path to the pairwise annotation file (JSONL/.gz). If omitted, reads from STDIN.
424
+
425
+ ```bash
426
+ tsumugi zygosity --keep Homo \
427
+ --in pairwise_similarity_annotations.jsonl.gz \
428
+ > pairwise_homo.jsonl
429
+ ```
430
+
431
+ ### Export GraphML / webapp
432
+ ```txt
433
+ tsumugi build-graphml [-h] [--in IN] -a GENEWISE_ANNOTATIONS
434
+ ```
435
+
436
+ #### `--in IN`
437
+ Path to the pairwise annotation file (JSONL/.gz). If omitted, reads from STDIN.
438
+
439
+ #### `-a GENEWISE_ANNOTATIONS`, `--genewise_annotations GENEWISE_ANNOTATIONS`
440
+ Path to the genewise annotation file (JSONL/.gz). Required.
441
+
442
+ ```bash
443
+ tsumugi build-graphml \
444
+ --in pairwise_similarity_annotations.jsonl.gz \
445
+ --genewise genewise_phenotype_annotations.jsonl.gz \
446
+ > network.graphml
447
+ ```
448
+
449
+ ```txt
450
+ tsumugi build-webapp [-h] [--in IN] -a GENEWISE_ANNOTATIONS -o OUT
451
+ ```
452
+
453
+ #### `--in IN`
454
+ Path to the pairwise annotation file (JSONL/.gz). If omitted, reads from STDIN.
455
+
456
+ #### `-a GENEWISE_ANNOTATIONS`, `--genewise_annotations GENEWISE_ANNOTATIONS`
457
+ Path to the genewise annotation file (JSONL/.gz). Required.
458
+
459
+ #### `-o OUT`, `--out OUT`
460
+ Output directory for the webapp bundle (HTML/CSS/JS + network data). Do not specify a filename with an extension.
461
+
462
+ ```bash
463
+ tsumugi build-webapp \
464
+ --in pairwise_similarity_annotations.jsonl.gz \
465
+ --genewise genewise_phenotype_annotations.jsonl.gz \
466
+ --output_dir ./webapp_output
467
+ ```
468
+
469
+ CLI supports STDIN/STDOUT, so you can chain commands:
470
+ `zcat pairwise_similarity_annotations.jsonl.gz | tsumugi mp ... | tsumugi genes ... > out.jsonl`
471
+
472
+ # 🔍 How We Calculate Phenotypically Similar Genes
473
+
474
+ ## Data source
475
+ [IMPC Release-23.0](https://ftp.ebi.ac.uk/pub/databases/impc/all-data-releases/release-23.0/results) `statistical-results-ALL.csv.gz`
476
+ Columns: [Data fields](https://www.mousephenotype.org/help/programmatic-data-access/data-fields/)
477
+
478
+ ## Preprocessing
479
+ Extract gene–phenotype pairs with KO mouse P-value (`p_value`, `female_ko_effect_p_value`, or `male_ko_effect_p_value`) ≤ 0.0001.
480
+ - Annotate genotype-specific phenotypes: `homo`, `hetero`, `hemi`
481
+ - Annotate sex-specific phenotypes: `female`, `male`
482
+
483
+ ## Phenotypic similarity
484
+ TSUMUGI currently follows a **Phenodigm-like** approach ([Smedley D, et al. (2013)](https://doi.org/10.1093/database/bat025)). We compute **Resnik similarity** between MP terms and **Jaccard similarity** between term sets, then combine them by the **geometric mean**. The key difference from the original Phenodigm is that TSUMUGI adds **metadata weighting** (zygosity, life stage, sexual dimorphism) when aggregating similarities.
485
+
486
+ 1. Build the MP ontology and compute Information Content(IC) for each term:
487
+ `IC(term) = -log((|Descendants(term)| + 1) / |All MP terms|)`
488
+ Terms below the 5th percentile of IC are set to 0.
489
+ 2. For each MP term pair, find the most specific common ancestor and compute Resnik similarity as its IC.
490
+ Compute Jaccard index over the ancestor sets.
491
+ Pairwise term similarity = `sqrt(Resnik * Jaccard)`.
492
+ 3. For each gene pair, build a term-by-term similarity matrix and apply metadata weighting.
493
+ Zygosity, life stage, and sexual dimorphism matches contribute weights of 0.25/0.5/0.75/1.0 for 0/1/2/3 matches.
494
+ 4. Apply Phenodigm-style scaling to 0–100:
495
+ Use row/column maxima to get actual max and mean similarity.
496
+ Normalize by theoretical max/mean based on IC, then compute
497
+ `Score = 100 * (normalized_max + normalized_mean) / 2`.
498
+ If a theoretical denominator is 0, that term is set to 0.
499
+
500
+
501
+ # ✉️ Contact
502
+ - Google Form: https://forms.gle/ME8EJZZHaRNgKZ979
503
+ - GitHub Issues: https://github.com/akikuno/TSUMUGI-dev/issues/new/choose
504
+
@@ -0,0 +1,64 @@
1
+ TSUMUGI/annotator.py,sha256=-t-gneApB-TywJ50OJtiSHP4DBOcdNEnL9Bo-NozbQo,3759
2
+ TSUMUGI/argparser.py,sha256=IbhawNMDJem6ZLpKIamlCiDZI8p2TrI_G0BUfi_hEd8,20181
3
+ TSUMUGI/core.py,sha256=Fk3T2eHFQRMcjOcyvPDd2DZuUwzYCjYZaFeo5He5fRM,7611
4
+ TSUMUGI/data/impc_phenodigm.csv,sha256=bPXkj6dFWmfXYORMKS6uLoM3y7Er_MmPm__UOYhjNrE,589835
5
+ TSUMUGI/data/mp.obo,sha256=DMzR5QOOha41HdB2G8XtcDC5JfbrsUQ1qxXD5b0y2-0,7369563
6
+ TSUMUGI/filterer.py,sha256=yla2SCosdkh8HlV-FXG_6CAgMi0X2-zfDjuUWyg1TuU,1497
7
+ TSUMUGI/formatter.py,sha256=8TR9z-SgZGPfwac6Wycl0VGTqs8BHzdaMf6VE6so3qU,4875
8
+ TSUMUGI/genewise_annotation_builder.py,sha256=tVn4qK1pDYredRn9zbC3AqxLkKAmBBQpjmERLRwisKM,3323
9
+ TSUMUGI/io_handler.py,sha256=P9fJn3x_bLk-QE5TYbpaljXzA9josz40tjwN3m5wvyI,6082
10
+ TSUMUGI/main.py,sha256=phQxSwsl81zNkjbR_SJRKschkyPDYsry1xtHc-tPJ5I,12828
11
+ TSUMUGI/network_constructor.py,sha256=f7N6avj28z8dXLLMuc73N8217AMPq1GX3sd6aII_9s8,23432
12
+ TSUMUGI/ontology_handler.py,sha256=DkTOvBN7joS9b5hSxJlRWy4ajrZwlcdU434fw4dMGAA,2342
13
+ TSUMUGI/pairwise_similarity_builder.py,sha256=lRek5xtjQOeYuKhd9_GFTp04u1m_FlC6Qop3pwTwo5s,2405
14
+ TSUMUGI/report_generator.py,sha256=M4YnMiWVfxXWzAvj_3VtwlGGBsqi2g5tIWCUh0VD4fE,5112
15
+ TSUMUGI/similarity_calculator.py,sha256=sEmzlyKnQxM9ryiF7mtmFPa0CuNsX2_gf8g38uOMOoo,19564
16
+ TSUMUGI/subcommands/count_filterer.py,sha256=drifh9V6pICYSWLxyU4onxwnwhDxh3CqtrgwIrBP8Hg,1933
17
+ TSUMUGI/subcommands/genes_filterer.py,sha256=lyyf23Va-QExZAFuDH7Yo3480ED3kKTYkrXU3d_DkMs,3479
18
+ TSUMUGI/subcommands/graphml_builder.py,sha256=5UDhyGyK7sa462l53uwO0wXUzlFgtJsoXgDpizp96AY,5369
19
+ TSUMUGI/subcommands/life_stage_filterer.py,sha256=sh5_0MeORtXOktO-7BFv7CzqPyS2M-UMihpz2v82QWU,1819
20
+ TSUMUGI/subcommands/mp_filterer.py,sha256=x_-zaI2ug4A-9CVKAL9GyCaH6DoHn9k10cMui-U7lUc,6623
21
+ TSUMUGI/subcommands/score_filterer.py,sha256=k6krEqcAPqskC8BEfMsmuDALOIBS1ZuNWKqVjWkBzJg,813
22
+ TSUMUGI/subcommands/sex_filterer.py,sha256=XvRmy1-hQ9g29hkXZw2Ex5dWdiEfzVZkYFatm3MiQ7c,1774
23
+ TSUMUGI/subcommands/webapp_builder.py,sha256=l_5lNP6RY8lz7X9DxM2EzHoAO-lD4laNXkYbaXkCaaw,11961
24
+ TSUMUGI/subcommands/zygosity_filterer.py,sha256=xK6Ay2B3MJIbr9uoSF1JAk_ZhAIJmsX1Emi94fzkjJs,1801
25
+ TSUMUGI/validator.py,sha256=L3vIduJypDMnMFj7KDauoIpQfjb5-JzPAlyx9h7I3Yc,2289
26
+ TSUMUGI/web/app/css/app.css,sha256=ZLefsRoVasGLkVDkG64m8vZuhfqI_yPjuonZFW8vnQY,22915
27
+ TSUMUGI/web/app/genelist/network_genelist.html,sha256=AJeIbaj27BG_c6fxDgAzyZeh1ZjBiWkBOWjN0rW4hco,16348
28
+ TSUMUGI/web/app/genelist/network_genelist.js,sha256=R0muENtsn_T8ylPQi1_bu78B6WTCFbHhd38I8TUc5ZM,15784
29
+ TSUMUGI/web/app/js/data/dataLoader.js,sha256=_rl3nsTsLRNOwunit6ooI4cuqoT2UXntHgDtfuMgDiw,1161
30
+ TSUMUGI/web/app/js/export/graphExporter.js,sha256=2zpEhkD4VexmMW_xhuR0zkJIdp7Kj1J7Hvcwg3yA-6E,7066
31
+ TSUMUGI/web/app/js/graph/centrality.js,sha256=VGmgzev1V-8z29XYExis_VraJXNvQ_r6OOnJCkhuBYo,18091
32
+ TSUMUGI/web/app/js/graph/components.js,sha256=FyGzB6Cinf1UEPRSxPeEWjcHTyaC3UhaHFLQe75FVhI,1072
33
+ TSUMUGI/web/app/js/graph/filters.js,sha256=5MbQoo9oSAbKVET9vCUtWl07S73VJ9KnI4M9tdNMzdI,5810
34
+ TSUMUGI/web/app/js/graph/highlighter.js,sha256=86kupr1TTiZVFRtOpZn9k8WikSNoqAZFZbjZ97Uu4Nk,1804
35
+ TSUMUGI/web/app/js/graph/layoutController.js,sha256=Dx-dprmwkEBozzXVY2BFhrhfe6NSZ5SMfXr644k6cQg,16347
36
+ TSUMUGI/web/app/js/graph/valueScaler.js,sha256=w0lkzMntUcIlxMPMrirHfs3n5Xq6UcF_hBssxwCeKHc,1546
37
+ TSUMUGI/web/app/js/search/geneSearcher.js,sha256=USLUdTtPBoEq9OulMTOYvYV4iyD4vkanaCmqAgjhsOA,2772
38
+ TSUMUGI/web/app/js/search/phenotypeSearcher.js,sha256=KVt9TYA7BBDLTSX4m7dMmDKIlTl8w9uf1HH7bElErLw,9679
39
+ TSUMUGI/web/app/js/ui/dynamicFontSize.js,sha256=gUUDKYsybHhDmN7r47z9KITwxsZR04d0umsp3ursXJ8,945
40
+ TSUMUGI/web/app/js/ui/mobilePanel.js,sha256=4etkE93sVN8F5CVl91j7pbOO6Z1Lzo2SOj6xOm-BB54,2471
41
+ TSUMUGI/web/app/js/ui/slider.js,sha256=gzz0pR6LduLgJAs7Yg9ITdy72upayQf6avPIwf1vSVI,735
42
+ TSUMUGI/web/app/js/ui/tooltips.js,sha256=Mgie9Kt5PqBmxV-BquGsOFJFZRBVCLG-Ed2m3R1Sfr0,19044
43
+ TSUMUGI/web/app/js/viewer/pageSetup.js,sha256=4w70NJxTASXdvfh970xfuQoSCL2xquXP1R57jmfHklY,7278
44
+ TSUMUGI/web/app/viewer.html,sha256=5VFwyI9VrWI5Wx25f1tNlHvcUD_eP6MxqJfIKpqcrsg,24861
45
+ TSUMUGI/web/app/viewer.js,sha256=D8Us8_ZH1bNq54Zfaz6CGB7AwQFg5jJEzxDUlUI-vrE,54227
46
+ TSUMUGI/web/css/sanitize.css,sha256=-TDNmUgHd7MxBBh2xvqREEA8YK2rBRbEeDc7Sjm3w04,7381
47
+ TSUMUGI/web/css/top.css,sha256=qQoPhzWxSckARsag4rLvrh58wlskLUDkjW2TXBWegXI,7817
48
+ TSUMUGI/web/image/tsumugi-favicon.ico,sha256=-Ng70-e5SSlcJN1uObeWl7hBK3oDXj9sUJizf-y4UeA,5430
49
+ TSUMUGI/web/image/tsumugi-icon.png,sha256=DKmuXDoK0_Cip9XdYAeASduqP7fi1utT1wFkbmFbz3c,260520
50
+ TSUMUGI/web/image/tsumugi-logo.png,sha256=YDFZCf520jiAeUekniHKawOtu99n7435GVoTw2NpHIg,173346
51
+ TSUMUGI/web/image/tsumugi-logo.svg,sha256=mRPy1WE40UliXAh0oR6_VKiA_l1QtMBuz8bk7JYrLVc,4155
52
+ TSUMUGI/web/js/genelist_formatter.js,sha256=4GwWBQ6vphGUYHu1L7QUqnDBDiQe9EYdGZXp5Kggcgs,4147
53
+ TSUMUGI/web/js/top.js,sha256=BBeMoR1O6YqKqIygAdd1MiJdCc8tvIIASXVUF-MonZE,11985
54
+ TSUMUGI/web/open_webapp_linux.sh,sha256=ldhe0QlcoVyYMe0PjKLBJLzYbKZfCFnlauTQOlErOuo,664
55
+ TSUMUGI/web/open_webapp_mac.command,sha256=YXr-8eqVus9Euv2uazJkVNMOF94PrDBQ75b1OjuwUX0,696
56
+ TSUMUGI/web/open_webapp_windows.bat,sha256=WgPD1bTI5vfgWAWuy-grjnc7eU7egNIDJndAgcavvL0,763
57
+ TSUMUGI/web/serve_index.py,sha256=lwTOLQXUfgkQ884V0QgXu3efwoDxEZ3OkM4-aT6Zp5U,3183
58
+ TSUMUGI/web/template/template_index.html,sha256=oSWWEBESAH-WRRYN1K4MKwHxj8pgzc09q8EmDZ62k9Q,8907
59
+ TSUMUGI/web_deployer.py,sha256=iEjURyoBWuXv1L7MVBVutl6mmNElO2vRCvIfEU4kNeM,6203
60
+ tsumugi-1.0.1.dist-info/METADATA,sha256=83FZflkBThEvh-fu26uuHtJNZosVKY8Y6hhzqoOPA3A,21250
61
+ tsumugi-1.0.1.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
62
+ tsumugi-1.0.1.dist-info/entry_points.txt,sha256=ZXlsE2eOQY9ffM0zodx-6gEKT_ZT8rqbsONeq6LR5Zw,45
63
+ tsumugi-1.0.1.dist-info/licenses/LICENSE,sha256=mNuqjmHG0acqpWyJLuKPYarB6amPTkcfLjgHcWJ7Kko,1069
64
+ tsumugi-1.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.3.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ tsumugi=TSUMUGI.main:main
3
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 Akihiro Kuno
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.