TSUMUGI 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- TSUMUGI/annotator.py +103 -0
- TSUMUGI/argparser.py +599 -0
- TSUMUGI/core.py +185 -0
- TSUMUGI/data/impc_phenodigm.csv +3406 -0
- TSUMUGI/data/mp.obo +143993 -0
- TSUMUGI/filterer.py +36 -0
- TSUMUGI/formatter.py +122 -0
- TSUMUGI/genewise_annotation_builder.py +94 -0
- TSUMUGI/io_handler.py +189 -0
- TSUMUGI/main.py +300 -0
- TSUMUGI/network_constructor.py +603 -0
- TSUMUGI/ontology_handler.py +62 -0
- TSUMUGI/pairwise_similarity_builder.py +66 -0
- TSUMUGI/report_generator.py +122 -0
- TSUMUGI/similarity_calculator.py +498 -0
- TSUMUGI/subcommands/count_filterer.py +47 -0
- TSUMUGI/subcommands/genes_filterer.py +89 -0
- TSUMUGI/subcommands/graphml_builder.py +158 -0
- TSUMUGI/subcommands/life_stage_filterer.py +48 -0
- TSUMUGI/subcommands/mp_filterer.py +142 -0
- TSUMUGI/subcommands/score_filterer.py +22 -0
- TSUMUGI/subcommands/sex_filterer.py +48 -0
- TSUMUGI/subcommands/webapp_builder.py +358 -0
- TSUMUGI/subcommands/zygosity_filterer.py +48 -0
- TSUMUGI/validator.py +65 -0
- TSUMUGI/web/app/css/app.css +1129 -0
- TSUMUGI/web/app/genelist/network_genelist.html +339 -0
- TSUMUGI/web/app/genelist/network_genelist.js +421 -0
- TSUMUGI/web/app/js/data/dataLoader.js +41 -0
- TSUMUGI/web/app/js/export/graphExporter.js +214 -0
- TSUMUGI/web/app/js/graph/centrality.js +495 -0
- TSUMUGI/web/app/js/graph/components.js +30 -0
- TSUMUGI/web/app/js/graph/filters.js +158 -0
- TSUMUGI/web/app/js/graph/highlighter.js +52 -0
- TSUMUGI/web/app/js/graph/layoutController.js +454 -0
- TSUMUGI/web/app/js/graph/valueScaler.js +43 -0
- TSUMUGI/web/app/js/search/geneSearcher.js +93 -0
- TSUMUGI/web/app/js/search/phenotypeSearcher.js +292 -0
- TSUMUGI/web/app/js/ui/dynamicFontSize.js +30 -0
- TSUMUGI/web/app/js/ui/mobilePanel.js +77 -0
- TSUMUGI/web/app/js/ui/slider.js +22 -0
- TSUMUGI/web/app/js/ui/tooltips.js +514 -0
- TSUMUGI/web/app/js/viewer/pageSetup.js +217 -0
- TSUMUGI/web/app/viewer.html +515 -0
- TSUMUGI/web/app/viewer.js +1593 -0
- TSUMUGI/web/css/sanitize.css +363 -0
- TSUMUGI/web/css/top.css +391 -0
- TSUMUGI/web/image/tsumugi-favicon.ico +0 -0
- TSUMUGI/web/image/tsumugi-icon.png +0 -0
- TSUMUGI/web/image/tsumugi-logo.png +0 -0
- TSUMUGI/web/image/tsumugi-logo.svg +69 -0
- TSUMUGI/web/js/genelist_formatter.js +123 -0
- TSUMUGI/web/js/top.js +338 -0
- TSUMUGI/web/open_webapp_linux.sh +25 -0
- TSUMUGI/web/open_webapp_mac.command +25 -0
- TSUMUGI/web/open_webapp_windows.bat +37 -0
- TSUMUGI/web/serve_index.py +110 -0
- TSUMUGI/web/template/template_index.html +197 -0
- TSUMUGI/web_deployer.py +150 -0
- tsumugi-1.0.1.dist-info/METADATA +504 -0
- tsumugi-1.0.1.dist-info/RECORD +64 -0
- tsumugi-1.0.1.dist-info/WHEEL +4 -0
- tsumugi-1.0.1.dist-info/entry_points.txt +3 -0
- tsumugi-1.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
|
|
8
|
+
from TSUMUGI import io_handler
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def filter_by_number_of_phenotypes_per_gene(
|
|
12
|
+
path_pairwise_similarity_annotations: str | Path | None,
|
|
13
|
+
path_genewise_phenotype_annotations: str | Path,
|
|
14
|
+
min_phenotypes: int | None = None,
|
|
15
|
+
max_phenotypes: int | None = None,
|
|
16
|
+
) -> None:
|
|
17
|
+
genewise_phenotype_annotations = io_handler.read_jsonl(path_genewise_phenotype_annotations)
|
|
18
|
+
|
|
19
|
+
cnt = Counter(rec["marker_symbol"] for rec in genewise_phenotype_annotations if rec["significant"])
|
|
20
|
+
matched_genes = {
|
|
21
|
+
marker
|
|
22
|
+
for marker, c in cnt.items()
|
|
23
|
+
if (min_phenotypes is None or c >= min_phenotypes) and (max_phenotypes is None or c <= max_phenotypes)
|
|
24
|
+
}
|
|
25
|
+
pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
|
|
26
|
+
for record in tqdm(pairwise_similarity_annotations, desc="Filtering gene pairs"):
|
|
27
|
+
# check both genes in the pair match the criteria
|
|
28
|
+
if record["gene1_symbol"] in matched_genes and record["gene2_symbol"] in matched_genes:
|
|
29
|
+
# output to stdout as JSON
|
|
30
|
+
io_handler.write_jsonl_to_stdout(record)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def filter_by_number_of_phenotypes_per_pair(
|
|
34
|
+
path_pairwise_similarity_annotations: str | Path | None,
|
|
35
|
+
min_phenotypes: int | None = None,
|
|
36
|
+
max_phenotypes: int | None = None,
|
|
37
|
+
) -> None:
|
|
38
|
+
pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
|
|
39
|
+
for record in pairwise_similarity_annotations:
|
|
40
|
+
num_shared_phenotypes = len(record["phenotype_shared_annotations"])
|
|
41
|
+
if min_phenotypes is not None and num_shared_phenotypes < min_phenotypes:
|
|
42
|
+
continue
|
|
43
|
+
if max_phenotypes is not None and num_shared_phenotypes > max_phenotypes:
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
# output to stdout as JSON
|
|
47
|
+
io_handler.write_jsonl_to_stdout(record)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from collections.abc import Iterator
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from TSUMUGI import io_handler
|
|
5
|
+
|
|
6
|
+
###############################################################################
|
|
7
|
+
# filter_annotations_by_genes
|
|
8
|
+
###############################################################################
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _filter_annotations_by_genes(
|
|
12
|
+
pairwise_similarity_annotations: Iterator[dict[str, str | dict[str, dict] | dict[str | int]]],
|
|
13
|
+
gene_list: set[str],
|
|
14
|
+
keep: bool = False,
|
|
15
|
+
drop: bool = False,
|
|
16
|
+
) -> Iterator[dict[str, str | dict[str, dict] | dict[str | int]]]:
|
|
17
|
+
for pairwise_similarity_annotation in pairwise_similarity_annotations:
|
|
18
|
+
gene1 = pairwise_similarity_annotation["gene1_symbol"]
|
|
19
|
+
gene2 = pairwise_similarity_annotation["gene2_symbol"]
|
|
20
|
+
|
|
21
|
+
# Keep if either gene is in the list
|
|
22
|
+
# - gene1: A, gene2: B, gene_list: {A, C} -> Keep
|
|
23
|
+
# - gene1: D, gene2: E, gene_list: {A, C} -> Drop
|
|
24
|
+
if (gene1 in gene_list or gene2 in gene_list) and keep:
|
|
25
|
+
yield pairwise_similarity_annotation
|
|
26
|
+
|
|
27
|
+
# Drop only when both genes are not in the list
|
|
28
|
+
# - gene1: A, gene2: B, gene_list: {A, C} -> Drop
|
|
29
|
+
# - gene1: D, gene2: E, gene_list: {A, C} -> Keep
|
|
30
|
+
if (gene1 not in gene_list and gene2 not in gene_list) and drop:
|
|
31
|
+
yield pairwise_similarity_annotation
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def filter_annotations_by_genes(
|
|
35
|
+
path_pairwise_similarity_annotations: str | Path,
|
|
36
|
+
gene_list: set[str],
|
|
37
|
+
keep: bool = False,
|
|
38
|
+
drop: bool = False,
|
|
39
|
+
) -> None:
|
|
40
|
+
pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
|
|
41
|
+
for record in _filter_annotations_by_genes(
|
|
42
|
+
pairwise_similarity_annotations=pairwise_similarity_annotations,
|
|
43
|
+
gene_list=gene_list,
|
|
44
|
+
keep=keep,
|
|
45
|
+
drop=drop,
|
|
46
|
+
):
|
|
47
|
+
# output to stdout as JSONL
|
|
48
|
+
io_handler.write_jsonl_to_stdout(record)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
###############################################################################
|
|
52
|
+
# filter_annotations_by_gene_pairs
|
|
53
|
+
###############################################################################
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _filter_annotations_by_gene_pairs(
|
|
57
|
+
pairwise_similarity_annotations: Iterator[dict[str, str | dict[str, dict] | dict[str | int]]],
|
|
58
|
+
gene_pairs: set[frozenset[str]],
|
|
59
|
+
keep: bool = False,
|
|
60
|
+
drop: bool = False,
|
|
61
|
+
) -> Iterator[dict[str, str | dict[str, dict] | dict[str | int]]]:
|
|
62
|
+
for pairwise_similarity_annotation in pairwise_similarity_annotations:
|
|
63
|
+
gene1 = pairwise_similarity_annotation["gene1_symbol"]
|
|
64
|
+
gene2 = pairwise_similarity_annotation["gene2_symbol"]
|
|
65
|
+
gene_pair = frozenset({gene1, gene2})
|
|
66
|
+
|
|
67
|
+
# Keep if either gene is in the list
|
|
68
|
+
if gene_pair in gene_pairs and keep:
|
|
69
|
+
yield pairwise_similarity_annotation
|
|
70
|
+
# Drop only when both genes are not in the list
|
|
71
|
+
if gene_pair not in gene_pairs and drop:
|
|
72
|
+
yield pairwise_similarity_annotation
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def filter_annotations_by_gene_pairs(
|
|
76
|
+
path_pairwise_similarity_annotations: str | Path,
|
|
77
|
+
gene_pairs: set[frozenset[str]],
|
|
78
|
+
keep: bool = False,
|
|
79
|
+
drop: bool = False,
|
|
80
|
+
):
|
|
81
|
+
pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
|
|
82
|
+
for record in _filter_annotations_by_gene_pairs(
|
|
83
|
+
pairwise_similarity_annotations=pairwise_similarity_annotations,
|
|
84
|
+
gene_pairs=gene_pairs,
|
|
85
|
+
keep=keep,
|
|
86
|
+
drop=drop,
|
|
87
|
+
):
|
|
88
|
+
# output to stdout as JSONL
|
|
89
|
+
io_handler.write_jsonl_to_stdout(record)
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import sys
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
|
|
5
|
+
import networkx as nx
|
|
6
|
+
|
|
7
|
+
from TSUMUGI import io_handler
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def format_suffix(zygosity: str, life_stage: str, sexual_dimorphism: str) -> str:
|
|
11
|
+
"""Produce strings like (Homo, Early, Male); omit sexual_dimorphism when it equals 'None'."""
|
|
12
|
+
parts = [zygosity, life_stage]
|
|
13
|
+
if sexual_dimorphism and sexual_dimorphism != "None":
|
|
14
|
+
parts.append(sexual_dimorphism)
|
|
15
|
+
return f"({', '.join(parts)})"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def build_nodes(genewise_phenotype_annotations: list[dict]) -> dict:
|
|
19
|
+
"""
|
|
20
|
+
Read genewise_phenotype_annotations.jsonl.gz and aggregate node attributes per marker_symbol.
|
|
21
|
+
- node_id: marker_symbol
|
|
22
|
+
- label: marker_symbol
|
|
23
|
+
- effect_size: always 1
|
|
24
|
+
- node_annotations:
|
|
25
|
+
Phenotypes of GeneA KO mice
|
|
26
|
+
- vertebral transformation (Homo, Early, Male)
|
|
27
|
+
...
|
|
28
|
+
Associated Human Diseases
|
|
29
|
+
- Male infertility (Homo, Early)
|
|
30
|
+
...
|
|
31
|
+
"""
|
|
32
|
+
phenotypes_per_gene = defaultdict(list)
|
|
33
|
+
diseases_per_gene = defaultdict(list)
|
|
34
|
+
|
|
35
|
+
for record in genewise_phenotype_annotations:
|
|
36
|
+
marker_symbol = record["marker_symbol"]
|
|
37
|
+
mp_term_name = record["mp_term_name"]
|
|
38
|
+
zygosity = record["zygosity"]
|
|
39
|
+
life_stage = record["life_stage"]
|
|
40
|
+
sexual_dimorphism = record.get("sexual_dimorphism", "None")
|
|
41
|
+
disease_annotation = record.get("disease_annotation", [])
|
|
42
|
+
|
|
43
|
+
suffix = format_suffix(zygosity, life_stage, sexual_dimorphism)
|
|
44
|
+
|
|
45
|
+
# KO mouse phenotypes
|
|
46
|
+
pheno_text = f"{mp_term_name} {suffix}"
|
|
47
|
+
phenotypes_per_gene[marker_symbol].append(pheno_text)
|
|
48
|
+
|
|
49
|
+
# Human diseases
|
|
50
|
+
for d in disease_annotation:
|
|
51
|
+
disease_text = f"{d} {suffix}"
|
|
52
|
+
diseases_per_gene[marker_symbol].append(disease_text)
|
|
53
|
+
|
|
54
|
+
nodes = {}
|
|
55
|
+
for marker_symbol in phenotypes_per_gene.keys() | diseases_per_gene.keys():
|
|
56
|
+
lines = []
|
|
57
|
+
|
|
58
|
+
# Phenotypes section
|
|
59
|
+
lines.append(f"Phenotypes of {marker_symbol} KO mice")
|
|
60
|
+
for pheno in phenotypes_per_gene.get(marker_symbol, []):
|
|
61
|
+
lines.append(f"- {pheno}")
|
|
62
|
+
|
|
63
|
+
# Diseases section (if available)
|
|
64
|
+
diseases = diseases_per_gene.get(marker_symbol, [])
|
|
65
|
+
if diseases:
|
|
66
|
+
lines.append("Associated Human Diseases")
|
|
67
|
+
for dis in diseases:
|
|
68
|
+
lines.append(f"- {dis}")
|
|
69
|
+
|
|
70
|
+
node_annotations = "\n".join(lines)
|
|
71
|
+
|
|
72
|
+
nodes[marker_symbol] = {
|
|
73
|
+
"label": marker_symbol,
|
|
74
|
+
"effect_size": 1.0,
|
|
75
|
+
"node_annotations": node_annotations,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return nodes
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def build_graph(pairwise_similarity_annotations: list[dict], nodes: dict) -> nx.Graph:
|
|
82
|
+
"""
|
|
83
|
+
Build a Graph using pairwise_similarity_annotations and the supplied nodes.
|
|
84
|
+
- Nodes: add the contents from nodes (and create empty nodes for unseen genes)
|
|
85
|
+
- Edges: gene1_symbol, gene2_symbol
|
|
86
|
+
- weight: phenotype_similarity_score
|
|
87
|
+
- edge_annotations:
|
|
88
|
+
Shared phenotypes of GeneA and GeneB KOs (Similarity: 59)
|
|
89
|
+
- vertebral transformation (Homo, Early, Male)
|
|
90
|
+
...
|
|
91
|
+
"""
|
|
92
|
+
G = nx.Graph()
|
|
93
|
+
|
|
94
|
+
edge_id = 0 # Place outside the loop so multiple edges get sequential IDs
|
|
95
|
+
geneset = set()
|
|
96
|
+
# Add edges
|
|
97
|
+
for record in pairwise_similarity_annotations:
|
|
98
|
+
g1 = record["gene1_symbol"]
|
|
99
|
+
g2 = record["gene2_symbol"]
|
|
100
|
+
score = record["phenotype_similarity_score"]
|
|
101
|
+
shared = record.get("phenotype_shared_annotations", {})
|
|
102
|
+
|
|
103
|
+
# Add missing nodes (genes absent from the genewise data)
|
|
104
|
+
if g1 not in G:
|
|
105
|
+
G.add_node(g1, label=g1, effect_size=1.0, node_annotations="")
|
|
106
|
+
if g2 not in G:
|
|
107
|
+
G.add_node(g2, label=g2, effect_size=1.0, node_annotations="")
|
|
108
|
+
|
|
109
|
+
# Format phenotype_shared_annotations
|
|
110
|
+
edge_texts = []
|
|
111
|
+
for mp_term_name, meta in shared.items():
|
|
112
|
+
zygosity = meta.get("zygosity", "")
|
|
113
|
+
life_stage = meta.get("life_stage", "")
|
|
114
|
+
sexual_dimorphism = meta.get("sexual_dimorphism", "None")
|
|
115
|
+
suffix = format_suffix(zygosity, life_stage, sexual_dimorphism)
|
|
116
|
+
edge_texts.append(f"{mp_term_name} {suffix}")
|
|
117
|
+
|
|
118
|
+
lines = []
|
|
119
|
+
lines.append(f"Shared phenotypes of {g1} and {g2} KOs (Similarity: {score})")
|
|
120
|
+
for txt in edge_texts:
|
|
121
|
+
lines.append(f"- {txt}")
|
|
122
|
+
|
|
123
|
+
edge_annotations = "\n".join(lines)
|
|
124
|
+
|
|
125
|
+
G.add_edge(
|
|
126
|
+
g1,
|
|
127
|
+
g2,
|
|
128
|
+
id=f"e{edge_id}",
|
|
129
|
+
weight=score,
|
|
130
|
+
edge_annotations=edge_annotations,
|
|
131
|
+
)
|
|
132
|
+
edge_id += 1
|
|
133
|
+
geneset.add(g1)
|
|
134
|
+
geneset.add(g2)
|
|
135
|
+
|
|
136
|
+
# Add nodes
|
|
137
|
+
for node_id, attrs in nodes.items():
|
|
138
|
+
if node_id in geneset:
|
|
139
|
+
G.add_node(node_id, **attrs)
|
|
140
|
+
|
|
141
|
+
return G
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def write_graphml_to_stdout(pairwise_path: str, genewise_path: str) -> None:
|
|
145
|
+
"""
|
|
146
|
+
Write GraphML to stdout using pairwise_similarity_annotations.jsonl.gz
|
|
147
|
+
and genewise_phenotype_annotations.jsonl.gz.
|
|
148
|
+
"""
|
|
149
|
+
pairwise_similarity_annotations = io_handler.read_jsonl(pairwise_path)
|
|
150
|
+
genewise_phenotype_annotations = io_handler.read_jsonl(genewise_path)
|
|
151
|
+
|
|
152
|
+
nodes = build_nodes(genewise_phenotype_annotations)
|
|
153
|
+
G = build_graph(pairwise_similarity_annotations, nodes)
|
|
154
|
+
|
|
155
|
+
text_buffer = io.StringIO()
|
|
156
|
+
nx.write_graphml(G, text_buffer, encoding="unicode")
|
|
157
|
+
|
|
158
|
+
sys.stdout.write(text_buffer.getvalue())
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from collections.abc import Generator
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from TSUMUGI import io_handler
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _filter_annotations_by_life_stage(
|
|
8
|
+
pairwise_similarity_annotations: list[dict[str, str | dict[str, dict] | dict[str | int]]],
|
|
9
|
+
life_stage: str = "",
|
|
10
|
+
keep: bool = False,
|
|
11
|
+
drop: bool = False,
|
|
12
|
+
) -> Generator[frozenset[str], dict[str, dict, int]]:
|
|
13
|
+
for pairwise_similarity_annotation in pairwise_similarity_annotations:
|
|
14
|
+
phenotype_shared_annotations = pairwise_similarity_annotation["phenotype_shared_annotations"]
|
|
15
|
+
|
|
16
|
+
if len(phenotype_shared_annotations) == 0:
|
|
17
|
+
continue
|
|
18
|
+
|
|
19
|
+
phenotype_shared_annotations_filtered = {}
|
|
20
|
+
for term_name, annotation in phenotype_shared_annotations.items():
|
|
21
|
+
if annotation["life_stage"] == life_stage and keep:
|
|
22
|
+
phenotype_shared_annotations_filtered[term_name] = annotation
|
|
23
|
+
if annotation["life_stage"] != life_stage and drop:
|
|
24
|
+
phenotype_shared_annotations_filtered[term_name] = annotation
|
|
25
|
+
|
|
26
|
+
if len(phenotype_shared_annotations_filtered) == 0:
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
pairwise_similarity_annotation["phenotype_shared_annotations"] = phenotype_shared_annotations_filtered
|
|
30
|
+
|
|
31
|
+
yield pairwise_similarity_annotation
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def filter_annotations_by_life_stage(
|
|
35
|
+
path_pairwise_similarity_annotations: str | Path,
|
|
36
|
+
life_stage: str,
|
|
37
|
+
keep: bool = False,
|
|
38
|
+
drop: bool = False,
|
|
39
|
+
) -> None:
|
|
40
|
+
pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
|
|
41
|
+
for record in _filter_annotations_by_life_stage(
|
|
42
|
+
pairwise_similarity_annotations=pairwise_similarity_annotations,
|
|
43
|
+
life_stage=life_stage,
|
|
44
|
+
keep=keep,
|
|
45
|
+
drop=drop,
|
|
46
|
+
):
|
|
47
|
+
# output to stdout as JSONL
|
|
48
|
+
io_handler.write_jsonl_to_stdout(record)
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from TSUMUGI import io_handler, ontology_handler
|
|
6
|
+
|
|
7
|
+
###########################################################
|
|
8
|
+
# Include gene pairs with target_mp_term_id and its descendants
|
|
9
|
+
###########################################################
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def include_specific_phenotype(
|
|
13
|
+
path_pairwise_similarity_annotations: str | Path | None,
|
|
14
|
+
path_genewise_phenotype_annotations: str | Path | None,
|
|
15
|
+
path_obo: str | Path,
|
|
16
|
+
mp_term_id: str,
|
|
17
|
+
life_stage: str | None = None,
|
|
18
|
+
sex: str | None = None,
|
|
19
|
+
zygosity: str | None = None,
|
|
20
|
+
is_pairwise: bool = True,
|
|
21
|
+
) -> None:
|
|
22
|
+
ontology_terms = io_handler.parse_obo_file(path_obo)
|
|
23
|
+
_, child_term_map = ontology_handler.build_term_hierarchy(ontology_terms)
|
|
24
|
+
descendants_of_term_ids = ontology_handler.find_all_descendant_terms(mp_term_id, child_term_map)
|
|
25
|
+
descendants_of_term_ids.add(mp_term_id)
|
|
26
|
+
descendants_of_term_name = {
|
|
27
|
+
data["name"] for term_id, data in ontology_terms.items() if term_id in descendants_of_term_ids
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if is_pairwise:
|
|
31
|
+
pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
|
|
32
|
+
for record in pairwise_similarity_annotations:
|
|
33
|
+
target_term_names = set(record["phenotype_shared_annotations"].keys()).intersection(
|
|
34
|
+
descendants_of_term_name
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# If none of the target terms are present, skip
|
|
38
|
+
if not target_term_names:
|
|
39
|
+
continue
|
|
40
|
+
|
|
41
|
+
# Check if any of the target terms have the specified phenotype
|
|
42
|
+
has_phenotype = False
|
|
43
|
+
for term_name in target_term_names:
|
|
44
|
+
annotation = record["phenotype_shared_annotations"][term_name]
|
|
45
|
+
if life_stage is not None and annotation["life_stage"] != life_stage:
|
|
46
|
+
continue
|
|
47
|
+
if sex is not None and annotation["sexual_dimorphism"] != sex:
|
|
48
|
+
continue
|
|
49
|
+
if zygosity is not None and annotation["zygosity"] != zygosity:
|
|
50
|
+
continue
|
|
51
|
+
has_phenotype = True
|
|
52
|
+
|
|
53
|
+
if has_phenotype:
|
|
54
|
+
# output to stdout as JSONL
|
|
55
|
+
io_handler.write_jsonl_to_stdout(record)
|
|
56
|
+
else:
|
|
57
|
+
genewise_phenotype_annotations = io_handler.read_jsonl(path_genewise_phenotype_annotations)
|
|
58
|
+
for record in genewise_phenotype_annotations:
|
|
59
|
+
if record["mp_term_id"] not in descendants_of_term_ids:
|
|
60
|
+
continue
|
|
61
|
+
if record.get("significant") is False:
|
|
62
|
+
continue
|
|
63
|
+
if life_stage is not None and record["life_stage"] != life_stage:
|
|
64
|
+
continue
|
|
65
|
+
if sex is not None and record["sexual_dimorphism"] != sex:
|
|
66
|
+
continue
|
|
67
|
+
if zygosity is not None and record["zygosity"] != zygosity:
|
|
68
|
+
continue
|
|
69
|
+
# output to stdout as JSONL
|
|
70
|
+
io_handler.write_jsonl_to_stdout(record)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
###########################################################
|
|
74
|
+
# Exclude gene pairs with target_mp_term_id and its descendants
|
|
75
|
+
###########################################################
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def exclude_specific_phenotype(
|
|
79
|
+
path_pairwise_similarity_annotations: str | Path | None,
|
|
80
|
+
path_genewise_phenotype_annotations: str | Path,
|
|
81
|
+
path_obo: str | Path,
|
|
82
|
+
mp_term_id: str,
|
|
83
|
+
life_stage: str | None = None,
|
|
84
|
+
sex: str | None = None,
|
|
85
|
+
zygosity: str | None = None,
|
|
86
|
+
is_pairwise: bool = True,
|
|
87
|
+
) -> None:
|
|
88
|
+
ontology_terms = io_handler.parse_obo_file(path_obo)
|
|
89
|
+
parent_term_map, child_term_map = ontology_handler.build_term_hierarchy(ontology_terms)
|
|
90
|
+
descendants_of_term_id = ontology_handler.find_all_descendant_terms(mp_term_id, child_term_map)
|
|
91
|
+
ancesters_of_term_id = ontology_handler.find_all_ancestor_terms(mp_term_id, parent_term_map)
|
|
92
|
+
|
|
93
|
+
# If a gene exhibits a significant abnormal phenotype annotated to
|
|
94
|
+
# the target mp_term_id or any of its ancestor/descendant terms,
|
|
95
|
+
# the gene is classified as “having a phenotype.”
|
|
96
|
+
genewise_phenotype_annotations = list(io_handler.read_jsonl(Path(path_genewise_phenotype_annotations)))
|
|
97
|
+
genes_with_phenotype = set()
|
|
98
|
+
for record in genewise_phenotype_annotations:
|
|
99
|
+
condition1 = record["mp_term_id"] == mp_term_id
|
|
100
|
+
condition2 = record["mp_term_id"] in ancesters_of_term_id
|
|
101
|
+
condition3 = record["mp_term_id"] in descendants_of_term_id
|
|
102
|
+
if (condition1 or condition2 or condition3) and record["significant"] is True:
|
|
103
|
+
if life_stage is not None and record["life_stage"] != life_stage:
|
|
104
|
+
continue
|
|
105
|
+
if sex is not None and record["sexual_dimorphism"] != sex:
|
|
106
|
+
continue
|
|
107
|
+
if zygosity is not None and record["zygosity"] != zygosity:
|
|
108
|
+
continue
|
|
109
|
+
genes_with_phenotype.add(record["marker_symbol"])
|
|
110
|
+
|
|
111
|
+
# For genes whose phenotype status remains undetermined in (1),
|
|
112
|
+
# if a non-significant phenotype annotation exists for the target mp_term_id or any of
|
|
113
|
+
# its ancestor/descendant terms, the gene is classified as “confirmed as having no phenotype.”
|
|
114
|
+
genes_without_phenotype = set()
|
|
115
|
+
for record in genewise_phenotype_annotations:
|
|
116
|
+
if record["marker_symbol"] in genes_with_phenotype:
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
condition1 = record["mp_term_id"] == mp_term_id
|
|
120
|
+
condition2 = record["mp_term_id"] in ancesters_of_term_id
|
|
121
|
+
condition3 = record["mp_term_id"] in descendants_of_term_id
|
|
122
|
+
if (condition1 or condition2 or condition3) and record["significant"] is False:
|
|
123
|
+
if life_stage is not None and record["life_stage"] != life_stage:
|
|
124
|
+
continue
|
|
125
|
+
if sex is not None and record["sexual_dimorphism"] != sex:
|
|
126
|
+
continue
|
|
127
|
+
if zygosity is not None and record["zygosity"] != zygosity:
|
|
128
|
+
continue
|
|
129
|
+
genes_without_phenotype.add(record["marker_symbol"])
|
|
130
|
+
|
|
131
|
+
# Now filter gene pairs based on genes_without_phenotype
|
|
132
|
+
if is_pairwise:
|
|
133
|
+
pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
|
|
134
|
+
for record in pairwise_similarity_annotations:
|
|
135
|
+
if record["gene1_symbol"] in genes_without_phenotype and record["gene2_symbol"] in genes_without_phenotype:
|
|
136
|
+
# output to stdout as JSONL
|
|
137
|
+
io_handler.write_jsonl_to_stdout(record)
|
|
138
|
+
else:
|
|
139
|
+
for record in genewise_phenotype_annotations:
|
|
140
|
+
if record["marker_symbol"] in genes_without_phenotype:
|
|
141
|
+
# output to stdout as JSONL
|
|
142
|
+
io_handler.write_jsonl_to_stdout(record)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from TSUMUGI import io_handler
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def filter_by_score_of_phenotypes_per_pair(
|
|
9
|
+
path_pairwise_similarity_annotations: str | Path | None,
|
|
10
|
+
min_phenotypes: int | None = None,
|
|
11
|
+
max_phenotypes: int | None = None,
|
|
12
|
+
) -> None:
|
|
13
|
+
pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
|
|
14
|
+
for record in pairwise_similarity_annotations:
|
|
15
|
+
phenotype_similarity_score = record["phenotype_similarity_score"]
|
|
16
|
+
if min_phenotypes is not None and phenotype_similarity_score < min_phenotypes:
|
|
17
|
+
continue
|
|
18
|
+
if max_phenotypes is not None and phenotype_similarity_score > max_phenotypes:
|
|
19
|
+
continue
|
|
20
|
+
|
|
21
|
+
# output to stdout as JSON
|
|
22
|
+
io_handler.write_jsonl_to_stdout(record)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from collections.abc import Generator
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from TSUMUGI import io_handler
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _filter_annotations_by_sex(
|
|
8
|
+
pairwise_similarity_annotations: list[dict[str, str | dict[str, dict] | dict[str | int]]],
|
|
9
|
+
sex: str = "None",
|
|
10
|
+
keep: bool = False,
|
|
11
|
+
drop: bool = False,
|
|
12
|
+
) -> Generator[frozenset[str], dict[str, dict, int]]:
|
|
13
|
+
for pairwise_similarity_annotation in pairwise_similarity_annotations:
|
|
14
|
+
phenotype_shared_annotations = pairwise_similarity_annotation["phenotype_shared_annotations"]
|
|
15
|
+
|
|
16
|
+
if len(phenotype_shared_annotations) == 0:
|
|
17
|
+
continue
|
|
18
|
+
|
|
19
|
+
phenotype_shared_annotations_filtered = {}
|
|
20
|
+
for term_name, annotation in phenotype_shared_annotations.items():
|
|
21
|
+
if annotation["sexual_dimorphism"] == sex and keep:
|
|
22
|
+
phenotype_shared_annotations_filtered[term_name] = annotation
|
|
23
|
+
if annotation["sexual_dimorphism"] != sex and drop:
|
|
24
|
+
phenotype_shared_annotations_filtered[term_name] = annotation
|
|
25
|
+
|
|
26
|
+
if len(phenotype_shared_annotations_filtered) == 0:
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
pairwise_similarity_annotation["phenotype_shared_annotations"] = phenotype_shared_annotations_filtered
|
|
30
|
+
|
|
31
|
+
yield pairwise_similarity_annotation
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def filter_annotations_by_sex(
|
|
35
|
+
path_pairwise_similarity_annotations: str | Path,
|
|
36
|
+
sex: str,
|
|
37
|
+
keep: bool = False,
|
|
38
|
+
drop: bool = False,
|
|
39
|
+
) -> None:
|
|
40
|
+
pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
|
|
41
|
+
for record in _filter_annotations_by_sex(
|
|
42
|
+
pairwise_similarity_annotations=pairwise_similarity_annotations,
|
|
43
|
+
sex=sex,
|
|
44
|
+
keep=keep,
|
|
45
|
+
drop=drop,
|
|
46
|
+
):
|
|
47
|
+
# output to stdout as JSONL
|
|
48
|
+
io_handler.write_jsonl_to_stdout(record)
|