TSUMUGI 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. TSUMUGI/annotator.py +103 -0
  2. TSUMUGI/argparser.py +599 -0
  3. TSUMUGI/core.py +185 -0
  4. TSUMUGI/data/impc_phenodigm.csv +3406 -0
  5. TSUMUGI/data/mp.obo +143993 -0
  6. TSUMUGI/filterer.py +36 -0
  7. TSUMUGI/formatter.py +122 -0
  8. TSUMUGI/genewise_annotation_builder.py +94 -0
  9. TSUMUGI/io_handler.py +189 -0
  10. TSUMUGI/main.py +300 -0
  11. TSUMUGI/network_constructor.py +603 -0
  12. TSUMUGI/ontology_handler.py +62 -0
  13. TSUMUGI/pairwise_similarity_builder.py +66 -0
  14. TSUMUGI/report_generator.py +122 -0
  15. TSUMUGI/similarity_calculator.py +498 -0
  16. TSUMUGI/subcommands/count_filterer.py +47 -0
  17. TSUMUGI/subcommands/genes_filterer.py +89 -0
  18. TSUMUGI/subcommands/graphml_builder.py +158 -0
  19. TSUMUGI/subcommands/life_stage_filterer.py +48 -0
  20. TSUMUGI/subcommands/mp_filterer.py +142 -0
  21. TSUMUGI/subcommands/score_filterer.py +22 -0
  22. TSUMUGI/subcommands/sex_filterer.py +48 -0
  23. TSUMUGI/subcommands/webapp_builder.py +358 -0
  24. TSUMUGI/subcommands/zygosity_filterer.py +48 -0
  25. TSUMUGI/validator.py +65 -0
  26. TSUMUGI/web/app/css/app.css +1129 -0
  27. TSUMUGI/web/app/genelist/network_genelist.html +339 -0
  28. TSUMUGI/web/app/genelist/network_genelist.js +421 -0
  29. TSUMUGI/web/app/js/data/dataLoader.js +41 -0
  30. TSUMUGI/web/app/js/export/graphExporter.js +214 -0
  31. TSUMUGI/web/app/js/graph/centrality.js +495 -0
  32. TSUMUGI/web/app/js/graph/components.js +30 -0
  33. TSUMUGI/web/app/js/graph/filters.js +158 -0
  34. TSUMUGI/web/app/js/graph/highlighter.js +52 -0
  35. TSUMUGI/web/app/js/graph/layoutController.js +454 -0
  36. TSUMUGI/web/app/js/graph/valueScaler.js +43 -0
  37. TSUMUGI/web/app/js/search/geneSearcher.js +93 -0
  38. TSUMUGI/web/app/js/search/phenotypeSearcher.js +292 -0
  39. TSUMUGI/web/app/js/ui/dynamicFontSize.js +30 -0
  40. TSUMUGI/web/app/js/ui/mobilePanel.js +77 -0
  41. TSUMUGI/web/app/js/ui/slider.js +22 -0
  42. TSUMUGI/web/app/js/ui/tooltips.js +514 -0
  43. TSUMUGI/web/app/js/viewer/pageSetup.js +217 -0
  44. TSUMUGI/web/app/viewer.html +515 -0
  45. TSUMUGI/web/app/viewer.js +1593 -0
  46. TSUMUGI/web/css/sanitize.css +363 -0
  47. TSUMUGI/web/css/top.css +391 -0
  48. TSUMUGI/web/image/tsumugi-favicon.ico +0 -0
  49. TSUMUGI/web/image/tsumugi-icon.png +0 -0
  50. TSUMUGI/web/image/tsumugi-logo.png +0 -0
  51. TSUMUGI/web/image/tsumugi-logo.svg +69 -0
  52. TSUMUGI/web/js/genelist_formatter.js +123 -0
  53. TSUMUGI/web/js/top.js +338 -0
  54. TSUMUGI/web/open_webapp_linux.sh +25 -0
  55. TSUMUGI/web/open_webapp_mac.command +25 -0
  56. TSUMUGI/web/open_webapp_windows.bat +37 -0
  57. TSUMUGI/web/serve_index.py +110 -0
  58. TSUMUGI/web/template/template_index.html +197 -0
  59. TSUMUGI/web_deployer.py +150 -0
  60. tsumugi-1.0.1.dist-info/METADATA +504 -0
  61. tsumugi-1.0.1.dist-info/RECORD +64 -0
  62. tsumugi-1.0.1.dist-info/WHEEL +4 -0
  63. tsumugi-1.0.1.dist-info/entry_points.txt +3 -0
  64. tsumugi-1.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter
4
+ from pathlib import Path
5
+
6
+ from tqdm import tqdm
7
+
8
+ from TSUMUGI import io_handler
9
+
10
+
11
+ def filter_by_number_of_phenotypes_per_gene(
12
+ path_pairwise_similarity_annotations: str | Path | None,
13
+ path_genewise_phenotype_annotations: str | Path,
14
+ min_phenotypes: int | None = None,
15
+ max_phenotypes: int | None = None,
16
+ ) -> None:
17
+ genewise_phenotype_annotations = io_handler.read_jsonl(path_genewise_phenotype_annotations)
18
+
19
+ cnt = Counter(rec["marker_symbol"] for rec in genewise_phenotype_annotations if rec["significant"])
20
+ matched_genes = {
21
+ marker
22
+ for marker, c in cnt.items()
23
+ if (min_phenotypes is None or c >= min_phenotypes) and (max_phenotypes is None or c <= max_phenotypes)
24
+ }
25
+ pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
26
+ for record in tqdm(pairwise_similarity_annotations, desc="Filtering gene pairs"):
27
+ # check both genes in the pair match the criteria
28
+ if record["gene1_symbol"] in matched_genes and record["gene2_symbol"] in matched_genes:
29
+ # output to stdout as JSON
30
+ io_handler.write_jsonl_to_stdout(record)
31
+
32
+
33
+ def filter_by_number_of_phenotypes_per_pair(
34
+ path_pairwise_similarity_annotations: str | Path | None,
35
+ min_phenotypes: int | None = None,
36
+ max_phenotypes: int | None = None,
37
+ ) -> None:
38
+ pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
39
+ for record in pairwise_similarity_annotations:
40
+ num_shared_phenotypes = len(record["phenotype_shared_annotations"])
41
+ if min_phenotypes is not None and num_shared_phenotypes < min_phenotypes:
42
+ continue
43
+ if max_phenotypes is not None and num_shared_phenotypes > max_phenotypes:
44
+ continue
45
+
46
+ # output to stdout as JSON
47
+ io_handler.write_jsonl_to_stdout(record)
@@ -0,0 +1,89 @@
1
+ from collections.abc import Iterator
2
+ from pathlib import Path
3
+
4
+ from TSUMUGI import io_handler
5
+
6
+ ###############################################################################
7
+ # filter_annotations_by_genes
8
+ ###############################################################################
9
+
10
+
11
+ def _filter_annotations_by_genes(
12
+ pairwise_similarity_annotations: Iterator[dict[str, str | dict[str, dict] | dict[str | int]]],
13
+ gene_list: set[str],
14
+ keep: bool = False,
15
+ drop: bool = False,
16
+ ) -> Iterator[dict[str, str | dict[str, dict] | dict[str | int]]]:
17
+ for pairwise_similarity_annotation in pairwise_similarity_annotations:
18
+ gene1 = pairwise_similarity_annotation["gene1_symbol"]
19
+ gene2 = pairwise_similarity_annotation["gene2_symbol"]
20
+
21
+ # Keep if either gene is in the list
22
+ # - gene1: A, gene2: B, gene_list: {A, C} -> Keep
23
+ # - gene1: D, gene2: E, gene_list: {A, C} -> Drop
24
+ if (gene1 in gene_list or gene2 in gene_list) and keep:
25
+ yield pairwise_similarity_annotation
26
+
27
+ # Drop only when both genes are not in the list
28
+ # - gene1: A, gene2: B, gene_list: {A, C} -> Drop
29
+ # - gene1: D, gene2: E, gene_list: {A, C} -> Keep
30
+ if (gene1 not in gene_list and gene2 not in gene_list) and drop:
31
+ yield pairwise_similarity_annotation
32
+
33
+
34
+ def filter_annotations_by_genes(
35
+ path_pairwise_similarity_annotations: str | Path,
36
+ gene_list: set[str],
37
+ keep: bool = False,
38
+ drop: bool = False,
39
+ ) -> None:
40
+ pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
41
+ for record in _filter_annotations_by_genes(
42
+ pairwise_similarity_annotations=pairwise_similarity_annotations,
43
+ gene_list=gene_list,
44
+ keep=keep,
45
+ drop=drop,
46
+ ):
47
+ # output to stdout as JSONL
48
+ io_handler.write_jsonl_to_stdout(record)
49
+
50
+
51
+ ###############################################################################
52
+ # filter_annotations_by_gene_pairs
53
+ ###############################################################################
54
+
55
+
56
+ def _filter_annotations_by_gene_pairs(
57
+ pairwise_similarity_annotations: Iterator[dict[str, str | dict[str, dict] | dict[str | int]]],
58
+ gene_pairs: set[frozenset[str]],
59
+ keep: bool = False,
60
+ drop: bool = False,
61
+ ) -> Iterator[dict[str, str | dict[str, dict] | dict[str | int]]]:
62
+ for pairwise_similarity_annotation in pairwise_similarity_annotations:
63
+ gene1 = pairwise_similarity_annotation["gene1_symbol"]
64
+ gene2 = pairwise_similarity_annotation["gene2_symbol"]
65
+ gene_pair = frozenset({gene1, gene2})
66
+
67
+ # Keep if either gene is in the list
68
+ if gene_pair in gene_pairs and keep:
69
+ yield pairwise_similarity_annotation
70
+ # Drop only when both genes are not in the list
71
+ if gene_pair not in gene_pairs and drop:
72
+ yield pairwise_similarity_annotation
73
+
74
+
75
+ def filter_annotations_by_gene_pairs(
76
+ path_pairwise_similarity_annotations: str | Path,
77
+ gene_pairs: set[frozenset[str]],
78
+ keep: bool = False,
79
+ drop: bool = False,
80
+ ):
81
+ pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
82
+ for record in _filter_annotations_by_gene_pairs(
83
+ pairwise_similarity_annotations=pairwise_similarity_annotations,
84
+ gene_pairs=gene_pairs,
85
+ keep=keep,
86
+ drop=drop,
87
+ ):
88
+ # output to stdout as JSONL
89
+ io_handler.write_jsonl_to_stdout(record)
@@ -0,0 +1,158 @@
1
+ import io
2
+ import sys
3
+ from collections import defaultdict
4
+
5
+ import networkx as nx
6
+
7
+ from TSUMUGI import io_handler
8
+
9
+
10
+ def format_suffix(zygosity: str, life_stage: str, sexual_dimorphism: str) -> str:
11
+ """Produce strings like (Homo, Early, Male); omit sexual_dimorphism when it equals 'None'."""
12
+ parts = [zygosity, life_stage]
13
+ if sexual_dimorphism and sexual_dimorphism != "None":
14
+ parts.append(sexual_dimorphism)
15
+ return f"({', '.join(parts)})"
16
+
17
+
18
+ def build_nodes(genewise_phenotype_annotations: list[dict]) -> dict:
19
+ """
20
+ Read genewise_phenotype_annotations.jsonl.gz and aggregate node attributes per marker_symbol.
21
+ - node_id: marker_symbol
22
+ - label: marker_symbol
23
+ - effect_size: always 1
24
+ - node_annotations:
25
+ Phenotypes of GeneA KO mice
26
+ - vertebral transformation (Homo, Early, Male)
27
+ ...
28
+ Associated Human Diseases
29
+ - Male infertility (Homo, Early)
30
+ ...
31
+ """
32
+ phenotypes_per_gene = defaultdict(list)
33
+ diseases_per_gene = defaultdict(list)
34
+
35
+ for record in genewise_phenotype_annotations:
36
+ marker_symbol = record["marker_symbol"]
37
+ mp_term_name = record["mp_term_name"]
38
+ zygosity = record["zygosity"]
39
+ life_stage = record["life_stage"]
40
+ sexual_dimorphism = record.get("sexual_dimorphism", "None")
41
+ disease_annotation = record.get("disease_annotation", [])
42
+
43
+ suffix = format_suffix(zygosity, life_stage, sexual_dimorphism)
44
+
45
+ # KO mouse phenotypes
46
+ pheno_text = f"{mp_term_name} {suffix}"
47
+ phenotypes_per_gene[marker_symbol].append(pheno_text)
48
+
49
+ # Human diseases
50
+ for d in disease_annotation:
51
+ disease_text = f"{d} {suffix}"
52
+ diseases_per_gene[marker_symbol].append(disease_text)
53
+
54
+ nodes = {}
55
+ for marker_symbol in phenotypes_per_gene.keys() | diseases_per_gene.keys():
56
+ lines = []
57
+
58
+ # Phenotypes section
59
+ lines.append(f"Phenotypes of {marker_symbol} KO mice")
60
+ for pheno in phenotypes_per_gene.get(marker_symbol, []):
61
+ lines.append(f"- {pheno}")
62
+
63
+ # Diseases section (if available)
64
+ diseases = diseases_per_gene.get(marker_symbol, [])
65
+ if diseases:
66
+ lines.append("Associated Human Diseases")
67
+ for dis in diseases:
68
+ lines.append(f"- {dis}")
69
+
70
+ node_annotations = "\n".join(lines)
71
+
72
+ nodes[marker_symbol] = {
73
+ "label": marker_symbol,
74
+ "effect_size": 1.0,
75
+ "node_annotations": node_annotations,
76
+ }
77
+
78
+ return nodes
79
+
80
+
81
+ def build_graph(pairwise_similarity_annotations: list[dict], nodes: dict) -> nx.Graph:
82
+ """
83
+ Build a Graph using pairwise_similarity_annotations and the supplied nodes.
84
+ - Nodes: add the contents from nodes (and create empty nodes for unseen genes)
85
+ - Edges: gene1_symbol, gene2_symbol
86
+ - weight: phenotype_similarity_score
87
+ - edge_annotations:
88
+ Shared phenotypes of GeneA and GeneB KOs (Similarity: 59)
89
+ - vertebral transformation (Homo, Early, Male)
90
+ ...
91
+ """
92
+ G = nx.Graph()
93
+
94
+ edge_id = 0 # Place outside the loop so multiple edges get sequential IDs
95
+ geneset = set()
96
+ # Add edges
97
+ for record in pairwise_similarity_annotations:
98
+ g1 = record["gene1_symbol"]
99
+ g2 = record["gene2_symbol"]
100
+ score = record["phenotype_similarity_score"]
101
+ shared = record.get("phenotype_shared_annotations", {})
102
+
103
+ # Add missing nodes (genes absent from the genewise data)
104
+ if g1 not in G:
105
+ G.add_node(g1, label=g1, effect_size=1.0, node_annotations="")
106
+ if g2 not in G:
107
+ G.add_node(g2, label=g2, effect_size=1.0, node_annotations="")
108
+
109
+ # Format phenotype_shared_annotations
110
+ edge_texts = []
111
+ for mp_term_name, meta in shared.items():
112
+ zygosity = meta.get("zygosity", "")
113
+ life_stage = meta.get("life_stage", "")
114
+ sexual_dimorphism = meta.get("sexual_dimorphism", "None")
115
+ suffix = format_suffix(zygosity, life_stage, sexual_dimorphism)
116
+ edge_texts.append(f"{mp_term_name} {suffix}")
117
+
118
+ lines = []
119
+ lines.append(f"Shared phenotypes of {g1} and {g2} KOs (Similarity: {score})")
120
+ for txt in edge_texts:
121
+ lines.append(f"- {txt}")
122
+
123
+ edge_annotations = "\n".join(lines)
124
+
125
+ G.add_edge(
126
+ g1,
127
+ g2,
128
+ id=f"e{edge_id}",
129
+ weight=score,
130
+ edge_annotations=edge_annotations,
131
+ )
132
+ edge_id += 1
133
+ geneset.add(g1)
134
+ geneset.add(g2)
135
+
136
+ # Add nodes
137
+ for node_id, attrs in nodes.items():
138
+ if node_id in geneset:
139
+ G.add_node(node_id, **attrs)
140
+
141
+ return G
142
+
143
+
144
+ def write_graphml_to_stdout(pairwise_path: str, genewise_path: str) -> None:
145
+ """
146
+ Write GraphML to stdout using pairwise_similarity_annotations.jsonl.gz
147
+ and genewise_phenotype_annotations.jsonl.gz.
148
+ """
149
+ pairwise_similarity_annotations = io_handler.read_jsonl(pairwise_path)
150
+ genewise_phenotype_annotations = io_handler.read_jsonl(genewise_path)
151
+
152
+ nodes = build_nodes(genewise_phenotype_annotations)
153
+ G = build_graph(pairwise_similarity_annotations, nodes)
154
+
155
+ text_buffer = io.StringIO()
156
+ nx.write_graphml(G, text_buffer, encoding="unicode")
157
+
158
+ sys.stdout.write(text_buffer.getvalue())
@@ -0,0 +1,48 @@
1
+ from collections.abc import Generator
2
+ from pathlib import Path
3
+
4
+ from TSUMUGI import io_handler
5
+
6
+
7
+ def _filter_annotations_by_life_stage(
8
+ pairwise_similarity_annotations: list[dict[str, str | dict[str, dict] | dict[str | int]]],
9
+ life_stage: str = "",
10
+ keep: bool = False,
11
+ drop: bool = False,
12
+ ) -> Generator[frozenset[str], dict[str, dict, int]]:
13
+ for pairwise_similarity_annotation in pairwise_similarity_annotations:
14
+ phenotype_shared_annotations = pairwise_similarity_annotation["phenotype_shared_annotations"]
15
+
16
+ if len(phenotype_shared_annotations) == 0:
17
+ continue
18
+
19
+ phenotype_shared_annotations_filtered = {}
20
+ for term_name, annotation in phenotype_shared_annotations.items():
21
+ if annotation["life_stage"] == life_stage and keep:
22
+ phenotype_shared_annotations_filtered[term_name] = annotation
23
+ if annotation["life_stage"] != life_stage and drop:
24
+ phenotype_shared_annotations_filtered[term_name] = annotation
25
+
26
+ if len(phenotype_shared_annotations_filtered) == 0:
27
+ continue
28
+
29
+ pairwise_similarity_annotation["phenotype_shared_annotations"] = phenotype_shared_annotations_filtered
30
+
31
+ yield pairwise_similarity_annotation
32
+
33
+
34
+ def filter_annotations_by_life_stage(
35
+ path_pairwise_similarity_annotations: str | Path,
36
+ life_stage: str,
37
+ keep: bool = False,
38
+ drop: bool = False,
39
+ ) -> None:
40
+ pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
41
+ for record in _filter_annotations_by_life_stage(
42
+ pairwise_similarity_annotations=pairwise_similarity_annotations,
43
+ life_stage=life_stage,
44
+ keep=keep,
45
+ drop=drop,
46
+ ):
47
+ # output to stdout as JSONL
48
+ io_handler.write_jsonl_to_stdout(record)
@@ -0,0 +1,142 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from TSUMUGI import io_handler, ontology_handler
6
+
7
+ ###########################################################
8
+ # Include gene pairs with target_mp_term_id and its descendants
9
+ ###########################################################
10
+
11
+
12
+ def include_specific_phenotype(
13
+ path_pairwise_similarity_annotations: str | Path | None,
14
+ path_genewise_phenotype_annotations: str | Path | None,
15
+ path_obo: str | Path,
16
+ mp_term_id: str,
17
+ life_stage: str | None = None,
18
+ sex: str | None = None,
19
+ zygosity: str | None = None,
20
+ is_pairwise: bool = True,
21
+ ) -> None:
22
+ ontology_terms = io_handler.parse_obo_file(path_obo)
23
+ _, child_term_map = ontology_handler.build_term_hierarchy(ontology_terms)
24
+ descendants_of_term_ids = ontology_handler.find_all_descendant_terms(mp_term_id, child_term_map)
25
+ descendants_of_term_ids.add(mp_term_id)
26
+ descendants_of_term_name = {
27
+ data["name"] for term_id, data in ontology_terms.items() if term_id in descendants_of_term_ids
28
+ }
29
+
30
+ if is_pairwise:
31
+ pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
32
+ for record in pairwise_similarity_annotations:
33
+ target_term_names = set(record["phenotype_shared_annotations"].keys()).intersection(
34
+ descendants_of_term_name
35
+ )
36
+
37
+ # If none of the target terms are present, skip
38
+ if not target_term_names:
39
+ continue
40
+
41
+ # Check if any of the target terms have the specified phenotype
42
+ has_phenotype = False
43
+ for term_name in target_term_names:
44
+ annotation = record["phenotype_shared_annotations"][term_name]
45
+ if life_stage is not None and annotation["life_stage"] != life_stage:
46
+ continue
47
+ if sex is not None and annotation["sexual_dimorphism"] != sex:
48
+ continue
49
+ if zygosity is not None and annotation["zygosity"] != zygosity:
50
+ continue
51
+ has_phenotype = True
52
+
53
+ if has_phenotype:
54
+ # output to stdout as JSONL
55
+ io_handler.write_jsonl_to_stdout(record)
56
+ else:
57
+ genewise_phenotype_annotations = io_handler.read_jsonl(path_genewise_phenotype_annotations)
58
+ for record in genewise_phenotype_annotations:
59
+ if record["mp_term_id"] not in descendants_of_term_ids:
60
+ continue
61
+ if record.get("significant") is False:
62
+ continue
63
+ if life_stage is not None and record["life_stage"] != life_stage:
64
+ continue
65
+ if sex is not None and record["sexual_dimorphism"] != sex:
66
+ continue
67
+ if zygosity is not None and record["zygosity"] != zygosity:
68
+ continue
69
+ # output to stdout as JSONL
70
+ io_handler.write_jsonl_to_stdout(record)
71
+
72
+
73
+ ###########################################################
74
+ # Exclude gene pairs with target_mp_term_id and its descendants
75
+ ###########################################################
76
+
77
+
78
+ def exclude_specific_phenotype(
79
+ path_pairwise_similarity_annotations: str | Path | None,
80
+ path_genewise_phenotype_annotations: str | Path,
81
+ path_obo: str | Path,
82
+ mp_term_id: str,
83
+ life_stage: str | None = None,
84
+ sex: str | None = None,
85
+ zygosity: str | None = None,
86
+ is_pairwise: bool = True,
87
+ ) -> None:
88
+ ontology_terms = io_handler.parse_obo_file(path_obo)
89
+ parent_term_map, child_term_map = ontology_handler.build_term_hierarchy(ontology_terms)
90
+ descendants_of_term_id = ontology_handler.find_all_descendant_terms(mp_term_id, child_term_map)
91
+ ancesters_of_term_id = ontology_handler.find_all_ancestor_terms(mp_term_id, parent_term_map)
92
+
93
+ # If a gene exhibits a significant abnormal phenotype annotated to
94
+ # the target mp_term_id or any of its ancestor/descendant terms,
95
+ # the gene is classified as “having a phenotype.”
96
+ genewise_phenotype_annotations = list(io_handler.read_jsonl(Path(path_genewise_phenotype_annotations)))
97
+ genes_with_phenotype = set()
98
+ for record in genewise_phenotype_annotations:
99
+ condition1 = record["mp_term_id"] == mp_term_id
100
+ condition2 = record["mp_term_id"] in ancesters_of_term_id
101
+ condition3 = record["mp_term_id"] in descendants_of_term_id
102
+ if (condition1 or condition2 or condition3) and record["significant"] is True:
103
+ if life_stage is not None and record["life_stage"] != life_stage:
104
+ continue
105
+ if sex is not None and record["sexual_dimorphism"] != sex:
106
+ continue
107
+ if zygosity is not None and record["zygosity"] != zygosity:
108
+ continue
109
+ genes_with_phenotype.add(record["marker_symbol"])
110
+
111
+ # For genes whose phenotype status remains undetermined in (1),
112
+ # if a non-significant phenotype annotation exists for the target mp_term_id or any of
113
+ # its ancestor/descendant terms, the gene is classified as “confirmed as having no phenotype.”
114
+ genes_without_phenotype = set()
115
+ for record in genewise_phenotype_annotations:
116
+ if record["marker_symbol"] in genes_with_phenotype:
117
+ continue
118
+
119
+ condition1 = record["mp_term_id"] == mp_term_id
120
+ condition2 = record["mp_term_id"] in ancesters_of_term_id
121
+ condition3 = record["mp_term_id"] in descendants_of_term_id
122
+ if (condition1 or condition2 or condition3) and record["significant"] is False:
123
+ if life_stage is not None and record["life_stage"] != life_stage:
124
+ continue
125
+ if sex is not None and record["sexual_dimorphism"] != sex:
126
+ continue
127
+ if zygosity is not None and record["zygosity"] != zygosity:
128
+ continue
129
+ genes_without_phenotype.add(record["marker_symbol"])
130
+
131
+ # Now filter gene pairs based on genes_without_phenotype
132
+ if is_pairwise:
133
+ pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
134
+ for record in pairwise_similarity_annotations:
135
+ if record["gene1_symbol"] in genes_without_phenotype and record["gene2_symbol"] in genes_without_phenotype:
136
+ # output to stdout as JSONL
137
+ io_handler.write_jsonl_to_stdout(record)
138
+ else:
139
+ for record in genewise_phenotype_annotations:
140
+ if record["marker_symbol"] in genes_without_phenotype:
141
+ # output to stdout as JSONL
142
+ io_handler.write_jsonl_to_stdout(record)
@@ -0,0 +1,22 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from TSUMUGI import io_handler
6
+
7
+
8
+ def filter_by_score_of_phenotypes_per_pair(
9
+ path_pairwise_similarity_annotations: str | Path | None,
10
+ min_phenotypes: int | None = None,
11
+ max_phenotypes: int | None = None,
12
+ ) -> None:
13
+ pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
14
+ for record in pairwise_similarity_annotations:
15
+ phenotype_similarity_score = record["phenotype_similarity_score"]
16
+ if min_phenotypes is not None and phenotype_similarity_score < min_phenotypes:
17
+ continue
18
+ if max_phenotypes is not None and phenotype_similarity_score > max_phenotypes:
19
+ continue
20
+
21
+ # output to stdout as JSON
22
+ io_handler.write_jsonl_to_stdout(record)
@@ -0,0 +1,48 @@
1
+ from collections.abc import Generator
2
+ from pathlib import Path
3
+
4
+ from TSUMUGI import io_handler
5
+
6
+
7
+ def _filter_annotations_by_sex(
8
+ pairwise_similarity_annotations: list[dict[str, str | dict[str, dict] | dict[str | int]]],
9
+ sex: str = "None",
10
+ keep: bool = False,
11
+ drop: bool = False,
12
+ ) -> Generator[frozenset[str], dict[str, dict, int]]:
13
+ for pairwise_similarity_annotation in pairwise_similarity_annotations:
14
+ phenotype_shared_annotations = pairwise_similarity_annotation["phenotype_shared_annotations"]
15
+
16
+ if len(phenotype_shared_annotations) == 0:
17
+ continue
18
+
19
+ phenotype_shared_annotations_filtered = {}
20
+ for term_name, annotation in phenotype_shared_annotations.items():
21
+ if annotation["sexual_dimorphism"] == sex and keep:
22
+ phenotype_shared_annotations_filtered[term_name] = annotation
23
+ if annotation["sexual_dimorphism"] != sex and drop:
24
+ phenotype_shared_annotations_filtered[term_name] = annotation
25
+
26
+ if len(phenotype_shared_annotations_filtered) == 0:
27
+ continue
28
+
29
+ pairwise_similarity_annotation["phenotype_shared_annotations"] = phenotype_shared_annotations_filtered
30
+
31
+ yield pairwise_similarity_annotation
32
+
33
+
34
+ def filter_annotations_by_sex(
35
+ path_pairwise_similarity_annotations: str | Path,
36
+ sex: str,
37
+ keep: bool = False,
38
+ drop: bool = False,
39
+ ) -> None:
40
+ pairwise_similarity_annotations = io_handler.read_jsonl(path_pairwise_similarity_annotations)
41
+ for record in _filter_annotations_by_sex(
42
+ pairwise_similarity_annotations=pairwise_similarity_annotations,
43
+ sex=sex,
44
+ keep=keep,
45
+ drop=drop,
46
+ ):
47
+ # output to stdout as JSONL
48
+ io_handler.write_jsonl_to_stdout(record)