TSUMUGI 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. TSUMUGI/annotator.py +103 -0
  2. TSUMUGI/argparser.py +599 -0
  3. TSUMUGI/core.py +185 -0
  4. TSUMUGI/data/impc_phenodigm.csv +3406 -0
  5. TSUMUGI/data/mp.obo +143993 -0
  6. TSUMUGI/filterer.py +36 -0
  7. TSUMUGI/formatter.py +122 -0
  8. TSUMUGI/genewise_annotation_builder.py +94 -0
  9. TSUMUGI/io_handler.py +189 -0
  10. TSUMUGI/main.py +300 -0
  11. TSUMUGI/network_constructor.py +603 -0
  12. TSUMUGI/ontology_handler.py +62 -0
  13. TSUMUGI/pairwise_similarity_builder.py +66 -0
  14. TSUMUGI/report_generator.py +122 -0
  15. TSUMUGI/similarity_calculator.py +498 -0
  16. TSUMUGI/subcommands/count_filterer.py +47 -0
  17. TSUMUGI/subcommands/genes_filterer.py +89 -0
  18. TSUMUGI/subcommands/graphml_builder.py +158 -0
  19. TSUMUGI/subcommands/life_stage_filterer.py +48 -0
  20. TSUMUGI/subcommands/mp_filterer.py +142 -0
  21. TSUMUGI/subcommands/score_filterer.py +22 -0
  22. TSUMUGI/subcommands/sex_filterer.py +48 -0
  23. TSUMUGI/subcommands/webapp_builder.py +358 -0
  24. TSUMUGI/subcommands/zygosity_filterer.py +48 -0
  25. TSUMUGI/validator.py +65 -0
  26. TSUMUGI/web/app/css/app.css +1129 -0
  27. TSUMUGI/web/app/genelist/network_genelist.html +339 -0
  28. TSUMUGI/web/app/genelist/network_genelist.js +421 -0
  29. TSUMUGI/web/app/js/data/dataLoader.js +41 -0
  30. TSUMUGI/web/app/js/export/graphExporter.js +214 -0
  31. TSUMUGI/web/app/js/graph/centrality.js +495 -0
  32. TSUMUGI/web/app/js/graph/components.js +30 -0
  33. TSUMUGI/web/app/js/graph/filters.js +158 -0
  34. TSUMUGI/web/app/js/graph/highlighter.js +52 -0
  35. TSUMUGI/web/app/js/graph/layoutController.js +454 -0
  36. TSUMUGI/web/app/js/graph/valueScaler.js +43 -0
  37. TSUMUGI/web/app/js/search/geneSearcher.js +93 -0
  38. TSUMUGI/web/app/js/search/phenotypeSearcher.js +292 -0
  39. TSUMUGI/web/app/js/ui/dynamicFontSize.js +30 -0
  40. TSUMUGI/web/app/js/ui/mobilePanel.js +77 -0
  41. TSUMUGI/web/app/js/ui/slider.js +22 -0
  42. TSUMUGI/web/app/js/ui/tooltips.js +514 -0
  43. TSUMUGI/web/app/js/viewer/pageSetup.js +217 -0
  44. TSUMUGI/web/app/viewer.html +515 -0
  45. TSUMUGI/web/app/viewer.js +1593 -0
  46. TSUMUGI/web/css/sanitize.css +363 -0
  47. TSUMUGI/web/css/top.css +391 -0
  48. TSUMUGI/web/image/tsumugi-favicon.ico +0 -0
  49. TSUMUGI/web/image/tsumugi-icon.png +0 -0
  50. TSUMUGI/web/image/tsumugi-logo.png +0 -0
  51. TSUMUGI/web/image/tsumugi-logo.svg +69 -0
  52. TSUMUGI/web/js/genelist_formatter.js +123 -0
  53. TSUMUGI/web/js/top.js +338 -0
  54. TSUMUGI/web/open_webapp_linux.sh +25 -0
  55. TSUMUGI/web/open_webapp_mac.command +25 -0
  56. TSUMUGI/web/open_webapp_windows.bat +37 -0
  57. TSUMUGI/web/serve_index.py +110 -0
  58. TSUMUGI/web/template/template_index.html +197 -0
  59. TSUMUGI/web_deployer.py +150 -0
  60. tsumugi-1.0.1.dist-info/METADATA +504 -0
  61. tsumugi-1.0.1.dist-info/RECORD +64 -0
  62. tsumugi-1.0.1.dist-info/WHEEL +4 -0
  63. tsumugi-1.0.1.dist-info/entry_points.txt +3 -0
  64. tsumugi-1.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,358 @@
1
+ from __future__ import annotations
2
+
3
+ import gzip
4
+ import json
5
+ import shutil
6
+ from collections import defaultdict
7
+ from collections.abc import Iterator
8
+ from pathlib import Path
9
+
10
+ from TSUMUGI import io_handler
11
+
12
+ MAX_NODE_COUNT = 150
13
+
14
+ from importlib.resources import as_file, files
15
+
16
+ WEB_DIR = files("TSUMUGI") / "web"
17
+ WEB_APP_DIR = WEB_DIR / "app"
18
+ TEMPLATE_HTML_DIR = WEB_DIR / "template" / "template-app-html"
19
+ TEMPLATE_JS_DIR = WEB_DIR / "template" / "template-app-js"
20
+ LAUNCHER_FILES = [
21
+ "open_webapp_linux.sh",
22
+ "open_webapp_mac.command",
23
+ "open_webapp_windows.bat",
24
+ "serve_index.py",
25
+ ]
26
+
27
+
28
+ def _create_annotation_string(*parts):
29
+ """Join non-empty parts with commas."""
30
+ return ", ".join([p for p in parts if p])
31
+
32
+
33
+ ###############################################################################
34
+ # Node building
35
+ ###############################################################################
36
+
37
+
38
+ def _format_suffix(zygosity: str, life_stage: str, sexual_dimorphism: str) -> str:
39
+ """Return a suffix like '(Homo, Early, Male)'; omit 'None'."""
40
+ parts = [zygosity, life_stage]
41
+ if sexual_dimorphism and sexual_dimorphism != "None":
42
+ parts.append(sexual_dimorphism)
43
+ return f"({', '.join(parts)})"
44
+
45
+
46
+ def build_nodes(gene_to_records, all_genes, hide_severity: bool = False):
47
+ """
48
+ Embed the following formatted text into data.annotation:
49
+ Phenotypes of {GENE} KO mice
50
+ - {mp_term_name} (zygosity, life_stage, sexual_dimorphism)
51
+ ...
52
+ Associated Human Diseases
53
+ - {disease_name} (zygosity, life_stage, sexual_dimorphism)
54
+ ...
55
+ """
56
+ nodes = []
57
+
58
+ for gene in sorted(all_genes):
59
+ recs = gene_to_records.get(gene, [])
60
+
61
+ phenotype_lines: list[str] = []
62
+ disease_lines: list[str] = []
63
+
64
+ for r in recs:
65
+ mp = r.get("mp_term_name", "")
66
+ zyg = r.get("zygosity", "")
67
+ ls = r.get("life_stage", "")
68
+ sd = r.get("sexual_dimorphism", "None")
69
+ suffix = _format_suffix(zygosity=zyg, life_stage=ls, sexual_dimorphism=sd)
70
+
71
+ # Phenotypes of {gene} KO mice
72
+ if mp:
73
+ phenotype_lines.append(f"{mp} {suffix}")
74
+
75
+ # Associated Human Diseases (list[str] only)
76
+ for dis in r.get("disease_annotation", []) or []:
77
+ disease_lines.append(f"{dis} {suffix}")
78
+
79
+ phenotype_lines = list(set(phenotype_lines))
80
+ disease_lines = list(set(disease_lines))
81
+
82
+ # Formatted annotation text for display
83
+ lines = [f"Phenotypes of {gene} KO mice"]
84
+ lines += [f"- {p}" for p in phenotype_lines]
85
+ if disease_lines:
86
+ lines.append("Associated Human Diseases")
87
+ lines += [f"- {d}" for d in disease_lines]
88
+
89
+ node = {
90
+ "data": {
91
+ "id": gene,
92
+ "label": gene,
93
+ "phenotype": phenotype_lines,
94
+ "disease": disease_lines if disease_lines else "",
95
+ "node_color": 1,
96
+ }
97
+ }
98
+ if hide_severity:
99
+ node["data"]["hide_severity"] = True
100
+
101
+ nodes.append(node)
102
+
103
+ return nodes
104
+
105
+
106
+ ###############################################################################
107
+ # Edge building
108
+ ###############################################################################
109
+
110
+
111
+ def _build_edges(pairwise_similarity_annotations: Iterator[dict]):
112
+ """Return list of Cytoscape.js edges."""
113
+ edges = []
114
+
115
+ for r in pairwise_similarity_annotations:
116
+ g1 = r["gene1_symbol"]
117
+ g2 = r["gene2_symbol"]
118
+
119
+ shared = r.get("phenotype_shared_annotations", {}) or {}
120
+ phen_list = []
121
+
122
+ for mp, ann in shared.items():
123
+ zyg = ann.get("zygosity", "")
124
+ ls = ann.get("life_stage", "")
125
+ sd = ann.get("sexual_dimorphism", "")
126
+ if sd == "None":
127
+ sd = ""
128
+
129
+ ann_str = _create_annotation_string(zyg, ls, sd)
130
+
131
+ if mp:
132
+ if ann_str:
133
+ phen_list.append(f"{mp} ({ann_str})")
134
+ else:
135
+ phen_list.append(mp)
136
+
137
+ edge_size = r.get("phenotype_similarity_score", 0)
138
+
139
+ edges.append(
140
+ {
141
+ "data": {
142
+ "source": g1,
143
+ "target": g2,
144
+ "phenotype": phen_list,
145
+ "edge_size": edge_size,
146
+ }
147
+ }
148
+ )
149
+
150
+ return edges
151
+
152
+
153
+ ###############################################################################
154
+ # Main builder
155
+ ###############################################################################
156
+
157
+
158
+ def _build_symbol_to_id_map(gene_to_records: dict[str, list[dict]]) -> dict[str, str]:
159
+ symbol_to_id: dict[str, str] = {}
160
+ for symbol, recs in gene_to_records.items():
161
+ for r in recs:
162
+ acc = r.get("marker_accession_id")
163
+ if isinstance(acc, str) and acc and symbol not in symbol_to_id:
164
+ symbol_to_id[symbol] = acc
165
+ break
166
+ return symbol_to_id
167
+
168
+
169
+ def build_webapp_network(genewise_path, pairwise_path, hide_severity: bool = False):
170
+ """Return (nodes, edges)."""
171
+ # Read pairwise annotations and collect all genes
172
+ pairwise_similarity_annotations: list[dict] = list(io_handler.read_jsonl(pairwise_path))
173
+
174
+ all_genes = set()
175
+ for record in pairwise_similarity_annotations:
176
+ all_genes.add(record["gene1_symbol"])
177
+ all_genes.add(record["gene2_symbol"])
178
+
179
+ # Read genewise annotations and map by marker_symbol
180
+ genewise_phenotype_annotations: Iterator[dict] = io_handler.read_jsonl(genewise_path)
181
+ gene_to_records = defaultdict(list)
182
+ for rec in genewise_phenotype_annotations:
183
+ gene_to_records[rec["marker_symbol"]].append(rec)
184
+ gene_to_records = dict(gene_to_records)
185
+
186
+ nodes = build_nodes(gene_to_records, all_genes, hide_severity=hide_severity)
187
+
188
+ if len(nodes) > MAX_NODE_COUNT:
189
+ raise ValueError(
190
+ f"Number of nodes ({len(nodes)}) exceeds the maximum allowed ({MAX_NODE_COUNT}). "
191
+ "For large networks, please generate a GraphML file using the `tsumugi build-graphml` "
192
+ "command and visualize it with Cytoscape or another network visualization tool."
193
+ )
194
+
195
+ edges = _build_edges(pairwise_similarity_annotations)
196
+
197
+ symbol_to_id = _build_symbol_to_id_map(gene_to_records)
198
+
199
+ return nodes, edges, symbol_to_id
200
+
201
+
202
+ def build_and_save_webapp_network(genewise_path, pairwise_path, output_dir):
203
+ output_dir = Path(output_dir)
204
+ output_dir.mkdir(parents=True, exist_ok=True)
205
+ json_path = output_dir / "network.json.gz"
206
+ network_label = "Gene List"
207
+
208
+ # For gene/gene list views, we hide severity; caller (TSUMUGI main) can pass False for phenotype mode
209
+ nodes, edges, symbol_to_id = build_webapp_network(genewise_path, pairwise_path, hide_severity=True)
210
+ elements = nodes + edges
211
+ with gzip.open(json_path, "wt", encoding="utf-8") as f:
212
+ json.dump(elements, f, indent=4)
213
+
214
+ symmap_path = output_dir / "marker_symbol_accession_id.json"
215
+ with open(symmap_path, "w", encoding="utf-8") as fh:
216
+ json.dump(symbol_to_id, fh, ensure_ascii=False, indent=2)
217
+
218
+ _create_webapp_bundle(
219
+ output_dir=output_dir,
220
+ data_filename=json_path.name,
221
+ network_label=network_label,
222
+ )
223
+
224
+
225
+ ###############################################################################
226
+ # Helpers for HTML/JS generation
227
+ ###############################################################################
228
+
229
+
230
+ def _safe_filename(name: str) -> str:
231
+ if not name:
232
+ return "gene_list"
233
+ safe = "".join(ch if ch.isalnum() or ch in ("-", "_") else "_" for ch in name)
234
+ return safe or "gene_list"
235
+
236
+
237
+ def _copy_asset_tree(src: Path, dst: Path) -> None:
238
+ with as_file(src) as src_on_fs:
239
+ src = Path(src_on_fs)
240
+ if src.exists():
241
+ shutil.copytree(src, dst, dirs_exist_ok=True)
242
+
243
+
244
+ def _copy_launchers(output_dir: Path) -> None:
245
+ for filename in LAUNCHER_FILES:
246
+ src = WEB_DIR / filename
247
+ if src.exists():
248
+ with as_file(src) as src_on_fs:
249
+ shutil.copy(src_on_fs, output_dir / filename)
250
+
251
+
252
+ def _create_webapp_bundle(
253
+ output_dir: Path,
254
+ data_filename: str,
255
+ network_label: str,
256
+ ) -> None:
257
+ output_dir.mkdir(parents=True, exist_ok=True)
258
+
259
+ _copy_asset_tree(WEB_APP_DIR / "css", output_dir / "css")
260
+ _copy_asset_tree(WEB_APP_DIR / "js", output_dir / "js")
261
+ _copy_asset_tree(WEB_DIR / "image", output_dir / "image")
262
+ _copy_launchers(output_dir)
263
+
264
+ safe_entry_name = _safe_filename(network_label)
265
+ _generate_genelist_entry_script(
266
+ output_dir=output_dir,
267
+ entry_js_name=safe_entry_name,
268
+ data_filename=data_filename,
269
+ export_label=safe_entry_name,
270
+ )
271
+ _generate_index_html(
272
+ output_dir=output_dir,
273
+ entry_js_name=safe_entry_name,
274
+ network_label=network_label,
275
+ )
276
+
277
+
278
+ def _read_template(path: Path) -> str:
279
+ with open(path, encoding="utf-8") as fh:
280
+ return fh.read()
281
+
282
+
283
+ def _generate_genelist_entry_script(
284
+ output_dir: Path,
285
+ entry_js_name: str,
286
+ data_filename: str,
287
+ export_label: str,
288
+ ) -> None:
289
+ template_lines = _read_template(TEMPLATE_JS_DIR / "template_app.js").splitlines()
290
+ filtered_lines = [
291
+ line
292
+ for line in template_lines
293
+ if "XXX_NODE_COLOR_INITIALIZATION" not in line and "XXX_NODE_COLOR_UPDATE" not in line
294
+ ]
295
+ template = "\n".join(filtered_lines)
296
+ template = template.replace(
297
+ 'const isGeneSymbolPage = "XXX_ELEMENTS".includes("genesymbol");',
298
+ "const isGeneSymbolPage = false;",
299
+ )
300
+
301
+ filter_js = _read_template(TEMPLATE_JS_DIR / "filterByNodeColorAndEdgeSize_genelist.js")
302
+
303
+ final_js = (
304
+ template.replace("XXX_FILTER_BY_NODE_COLOR_AND_EDGE_SIZE", filter_js)
305
+ .replace("XXX_NODE_MIN_MAX", "")
306
+ .replace(
307
+ "XXX_EDGE_MIN_MAX",
308
+ "const edgeMin = Math.min(...edgeSizes); const edgeMax = Math.max(...edgeSizes);",
309
+ )
310
+ .replace("XXX_ELEMENTS", f"loadJSONGz('./{data_filename}')")
311
+ .replace("XXX_PHENOTYPE", "")
312
+ .replace("XXX_NAME", export_label)
313
+ )
314
+ final_js = final_js.replace(
315
+ 'const map_symbol_to_id = loadJSON("../../data/marker_symbol_accession_id.json");',
316
+ 'const map_symbol_to_id = loadJSON("./marker_symbol_accession_id.json");',
317
+ )
318
+
319
+ js_path = output_dir / "js" / f"{entry_js_name}.js"
320
+ with open(js_path, "w", encoding="utf-8") as fh:
321
+ fh.write(final_js)
322
+
323
+
324
+ def _generate_index_html(
325
+ output_dir: Path,
326
+ entry_js_name: str,
327
+ network_label: str,
328
+ ) -> None:
329
+ body_html = _read_template(TEMPLATE_HTML_DIR / "body-container.html").replace("XXX_PHENOTYPE_SEVERITY", "")
330
+ cy_html = _read_template(TEMPLATE_HTML_DIR / "cy-container.html").replace("XXX_PHENOTYPE_SEVERITY", "")
331
+
332
+ page_title = network_label or "Gene List"
333
+
334
+ head_html = (
335
+ _read_template(TEMPLATE_HTML_DIR / "head.html")
336
+ .replace("XXX_TITLE", page_title)
337
+ .replace('src="./XXX_JS_FILE_NAME.js"', f'src="./js/{entry_js_name}.js"')
338
+ .replace("XXX_JS_FILE_NAME", entry_js_name)
339
+ )
340
+ head_html = head_html.replace("../js/", "./js/").replace("../css/", "./css/").replace("../../image", "./image")
341
+
342
+ if network_label and network_label.lower() != "gene list":
343
+ header_insert = f"Gene List: {network_label}"
344
+ else:
345
+ header_insert = "Gene List"
346
+ header_html = _read_template(TEMPLATE_HTML_DIR / "header.html").replace("XXX_TITLE", header_insert)
347
+
348
+ template_html = _read_template(TEMPLATE_HTML_DIR / "template_app.html")
349
+ final_html = (
350
+ template_html.replace("XXX_HEAD", head_html)
351
+ .replace("XXX_H1", header_html)
352
+ .replace("XXX_BODY_CONTAINER", body_html)
353
+ .replace("XXX_CY_CONTAINER", cy_html)
354
+ )
355
+
356
+ index_path = output_dir / "index.html"
357
+ with open(index_path, "w", encoding="utf-8") as fh:
358
+ fh.write(final_html)
@@ -0,0 +1,48 @@
1
+ from collections.abc import Generator
2
+ from pathlib import Path
3
+
4
+ from TSUMUGI import io_handler
5
+
6
+
7
+ def _filter_annotations_by_zygosity(
8
+ pairwise_similarity_annotations: list[dict[str, str | dict[str, dict] | dict[str | int]]],
9
+ zygosity: str = "Homo",
10
+ keep: bool = False,
11
+ drop: bool = False,
12
+ ) -> Generator[frozenset[str], dict[str, dict, int]]:
13
+ for pairwise_similarity_annotation in pairwise_similarity_annotations:
14
+ phenotype_shared_annotations = pairwise_similarity_annotation["phenotype_shared_annotations"]
15
+
16
+ if len(phenotype_shared_annotations) == 0:
17
+ continue
18
+
19
+ phenotype_shared_annotations_filtered = {}
20
+ for term_name, annotation in phenotype_shared_annotations.items():
21
+ if annotation["zygosity"] == zygosity and keep:
22
+ phenotype_shared_annotations_filtered[term_name] = annotation
23
+ if annotation["zygosity"] != zygosity and drop:
24
+ phenotype_shared_annotations_filtered[term_name] = annotation
25
+
26
+ if len(phenotype_shared_annotations_filtered) == 0:
27
+ continue
28
+
29
+ pairwise_similarity_annotation["phenotype_shared_annotations"] = phenotype_shared_annotations_filtered
30
+
31
+ yield pairwise_similarity_annotation
32
+
33
+
34
+ def filter_annotations_by_zygosity(
35
+ path_pairwise_similarity_annotations: str | Path,
36
+ zygosity: str,
37
+ keep: bool = False,
38
+ drop: bool = False,
39
+ ) -> None:
40
+ pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
41
+ for record in _filter_annotations_by_zygosity(
42
+ pairwise_similarity_annotations=pairwise_similarity_annotations,
43
+ zygosity=zygosity,
44
+ keep=keep,
45
+ drop=drop,
46
+ ):
47
+ # output to stdout as JSONL
48
+ io_handler.write_jsonl_to_stdout(record)
TSUMUGI/validator.py ADDED
@@ -0,0 +1,65 @@
1
+ from __future__ import annotations
2
+
3
+ from TSUMUGI import io_handler
4
+
5
+
6
+ def validate_statistical_results(file_path: str) -> None:
7
+ # Implementation for validating statistical results file
8
+ columns = {
9
+ "marker_symbol",
10
+ "marker_accession_id",
11
+ "mp_term_name",
12
+ "mp_term_id",
13
+ "p_value",
14
+ "effect_size",
15
+ "female_ko_effect_p_value", # sex differences
16
+ "male_ko_effect_p_value", # sex differences
17
+ "zygosity", # zygosity
18
+ "pipeline_name", # life-stage
19
+ "procedure_name", # life-stage
20
+ "allele_symbol", # map to Phendigm
21
+ }
22
+ records = io_handler.load_csv_as_dicts(file_path)
23
+ record_columns = next(records).keys()
24
+ missing_columns = columns - record_columns
25
+ if missing_columns:
26
+ raise ValueError(f"Invalid file: Missing columns {missing_columns} in {file_path}")
27
+
28
+
29
+ def validate_obo_file(file_path: str) -> None:
30
+ # Implementation for validating OBO file
31
+
32
+ has_format = False
33
+ has_term = False
34
+
35
+ with open(file_path, encoding="utf-8") as f:
36
+ for line in f:
37
+ s = line.strip()
38
+ if not s or s.startswith("!"):
39
+ continue
40
+ if s.startswith("format-version:"):
41
+ has_format = True
42
+ elif s.startswith("[Term]"):
43
+ has_term = True
44
+ break # enough for quick validation
45
+
46
+ if not has_format:
47
+ raise ValueError("Invalid OBO file: missing 'format-version:' in header.")
48
+ if not has_term:
49
+ raise ValueError("Invalid OBO file: missing '[Term]' stanza.")
50
+
51
+
52
+ def validate_mp_term_id(term_id: str, mp_obo_path: str) -> None:
53
+ # Implementation for validating MP term ID
54
+ ontology_terms = io_handler.parse_obo_file(mp_obo_path)
55
+ if term_id not in ontology_terms:
56
+ raise ValueError(f"MP term ID '{term_id}' not found in OBO file '{mp_obo_path}'.")
57
+
58
+
59
+ def validate_phenodigm_file(file_path: str) -> None:
60
+ # Implementation for validating Phenodigm file
61
+ columns = {"Disorder name", "Mouse model description"}
62
+ record_columns = next(io_handler.load_csv_as_dicts(file_path)).keys()
63
+ missing_columns = columns - record_columns
64
+ if missing_columns:
65
+ raise ValueError(f"Invalid file: Missing {missing_columns} in {file_path}")