@nahisaho/satori 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/README.md +111 -45
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-admet-pharmacokinetics/SKILL.md +1 -0
  4. package/src/.github/skills/scientific-cancer-genomics/SKILL.md +287 -0
  5. package/src/.github/skills/scientific-clinical-decision-support/SKILL.md +2 -0
  6. package/src/.github/skills/scientific-clinical-reporting/SKILL.md +324 -0
  7. package/src/.github/skills/scientific-computational-materials/SKILL.md +4 -4
  8. package/src/.github/skills/scientific-deep-learning/SKILL.md +1 -0
  9. package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +1 -0
  10. package/src/.github/skills/scientific-grant-writing/SKILL.md +2 -0
  11. package/src/.github/skills/scientific-lab-data-management/SKILL.md +2 -2
  12. package/src/.github/skills/scientific-literature-search/SKILL.md +443 -0
  13. package/src/.github/skills/scientific-meta-analysis/SKILL.md +10 -0
  14. package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +288 -0
  15. package/src/.github/skills/scientific-molecular-docking/SKILL.md +303 -0
  16. package/src/.github/skills/scientific-pathway-enrichment/SKILL.md +449 -0
  17. package/src/.github/skills/scientific-pharmacovigilance/SKILL.md +3 -0
  18. package/src/.github/skills/scientific-population-genetics/SKILL.md +2 -0
  19. package/src/.github/skills/scientific-precision-oncology/SKILL.md +1 -0
  20. package/src/.github/skills/scientific-protein-domain-family/SKILL.md +369 -0
  21. package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +352 -0
  22. package/src/.github/skills/scientific-scientific-schematics/SKILL.md +2 -2
  23. package/src/.github/skills/scientific-single-cell-genomics/SKILL.md +2 -0
  24. package/src/.github/skills/scientific-survival-clinical/SKILL.md +11 -0
  25. package/src/.github/skills/scientific-systematic-review/SKILL.md +361 -0
  26. package/src/.github/skills/scientific-variant-effect-prediction/SKILL.md +325 -0
  27. package/src/.github/skills/scientific-variant-interpretation/SKILL.md +1 -0
@@ -0,0 +1,361 @@
1
+ ---
2
+ name: scientific-systematic-review
3
+ description: |
4
+ PRISMA 2020 準拠系統的レビュースキル。マルチ DB 検索戦略立案
5
+ (PubMed/Embase/Cochrane/Web of Science)、スクリーニングワークフロー
6
+ (タイトル/抄録→全文)、品質評価 (RoB 2/ROBINS-I/NOS)、データ抽出
7
+ テンプレート、PRISMA フロー図自動生成パイプライン。
8
+ ---
9
+
10
+ # Scientific Systematic Review
11
+
12
+ PRISMA 2020 ガイドラインに準拠した
13
+ 系統的レビュー・メタアナリシスの方法論パイプラインを提供する。
14
+
15
+ ## When to Use
16
+
17
+ - 系統的レビューの検索戦略を設計するとき
18
+ - タイトル/抄録スクリーニングのワークフローが必要なとき
19
+ - バイアスリスク (RoB 2, ROBINS-I, NOS) 評価を行うとき
20
+ - PRISMA フロー図を生成するとき
21
+ - 系統的レビューのデータ抽出テーブルを作成するとき
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ## 1. 検索戦略設計 (PICO → クエリ)
28
+
29
+ ```python
30
+ import pandas as pd
31
+ import json
32
+
33
+
34
+ def design_search_strategy(pico, databases=None):
35
+ """
36
+ PICO フレームワークから検索戦略を設計。
37
+
38
+ Parameters:
39
+ pico: dict — {"P": "...", "I": "...", "C": "...", "O": "..."}
40
+ databases: list — ["PubMed", "Embase", "Cochrane", "Web of Science"]
41
+ """
42
+ if databases is None:
43
+ databases = ["PubMed", "Embase", "Cochrane"]
44
+
45
+ strategy = {
46
+ "pico": pico,
47
+ "databases": databases,
48
+ "search_blocks": [],
49
+ }
50
+
51
+ # P (Population) ブロック
52
+ p_terms = pico.get("P", "").split(",")
53
+ p_block = {
54
+ "concept": "Population",
55
+ "terms": [t.strip() for t in p_terms],
56
+ "mesh_terms": [], # 手動で MeSH を追加
57
+ "boolean": "OR",
58
+ }
59
+
60
+ # I (Intervention) ブロック
61
+ i_terms = pico.get("I", "").split(",")
62
+ i_block = {
63
+ "concept": "Intervention",
64
+ "terms": [t.strip() for t in i_terms],
65
+ "mesh_terms": [],
66
+ "boolean": "OR",
67
+ }
68
+
69
+ # C (Comparison) ブロック
70
+ c_terms = pico.get("C", "").split(",")
71
+ c_block = {
72
+ "concept": "Comparison",
73
+ "terms": [t.strip() for t in c_terms if t.strip()],
74
+ "boolean": "OR",
75
+ }
76
+
77
+ # O (Outcome) ブロック
78
+ o_terms = pico.get("O", "").split(",")
79
+ o_block = {
80
+ "concept": "Outcome",
81
+ "terms": [t.strip() for t in o_terms],
82
+ "boolean": "OR",
83
+ }
84
+
85
+ strategy["search_blocks"] = [p_block, i_block]
86
+ if c_block["terms"]:
87
+ strategy["search_blocks"].append(c_block)
88
+ if o_block["terms"]:
89
+ strategy["search_blocks"].append(o_block)
90
+
91
+ # PubMed クエリ生成
92
+ pubmed_parts = []
93
+ for block in strategy["search_blocks"]:
94
+ terms = [f'"{t}"' for t in block["terms"]]
95
+ mesh = [f'"{m}"[MeSH]' for m in block.get("mesh_terms", [])]
96
+ all_terms = terms + mesh
97
+ pubmed_parts.append(f"({' OR '.join(all_terms)})")
98
+
99
+ strategy["pubmed_query"] = " AND ".join(pubmed_parts)
100
+
101
+ print(f"Search strategy: {len(strategy['search_blocks'])} blocks, "
102
+ f"{len(databases)} databases")
103
+ print(f"PubMed query: {strategy['pubmed_query'][:200]}...")
104
+ return strategy
105
+ ```
106
+
107
+ ## 2. スクリーニングワークフロー
108
+
109
+ ```python
110
+ def screening_workflow(records_df, stage="title_abstract",
111
+ inclusion_criteria=None,
112
+ exclusion_criteria=None):
113
+ """
114
+ スクリーニングワークフロー管理。
115
+
116
+ Parameters:
117
+ records_df: DataFrame — columns: [id, title, abstract, source]
118
+ stage: "title_abstract" or "fulltext"
119
+ inclusion_criteria: list — 適格基準
120
+ exclusion_criteria: list — 除外基準
121
+ """
122
+ if inclusion_criteria is None:
123
+ inclusion_criteria = [
124
+ "Published in English or Japanese",
125
+ "Human subjects",
126
+ "Original research (not review/editorial)",
127
+ ]
128
+ if exclusion_criteria is None:
129
+ exclusion_criteria = [
130
+ "Case reports (n < 5)",
131
+ "Conference abstracts only",
132
+ "Animal studies only",
133
+ ]
134
+
135
+ # 重複除去
136
+ initial_count = len(records_df)
137
+ records_df = records_df.drop_duplicates(subset=["title"], keep="first")
138
+ duplicates_removed = initial_count - len(records_df)
139
+
140
+ # スクリーニング結果テンプレート
141
+ records_df["decision"] = "pending"
142
+ records_df["excluded_reason"] = ""
143
+ records_df["screener"] = ""
144
+
145
+ result = {
146
+ "stage": stage,
147
+ "total_records": initial_count,
148
+ "duplicates_removed": duplicates_removed,
149
+ "unique_records": len(records_df),
150
+ "inclusion_criteria": inclusion_criteria,
151
+ "exclusion_criteria": exclusion_criteria,
152
+ }
153
+
154
+ print(f"Screening ({stage}): {initial_count} records → "
155
+ f"{duplicates_removed} duplicates removed → "
156
+ f"{len(records_df)} to screen")
157
+ return records_df, result
158
+ ```
159
+
160
+ ## 3. バイアスリスク評価
161
+
162
+ ```python
163
+ def risk_of_bias_assessment(studies_df, tool="RoB2"):
164
+ """
165
+ バイアスリスク評価。
166
+
167
+ Parameters:
168
+ studies_df: DataFrame — columns: [study_id, study_type, ...]
169
+ tool: "RoB2" (RCT), "ROBINS-I" (非ランダム化), "NOS" (観察研究)
170
+ """
171
+ if tool == "RoB2":
172
+ # Cochrane RoB 2 — 5 ドメイン
173
+ domains = [
174
+ "D1: Randomization process",
175
+ "D2: Deviations from interventions",
176
+ "D3: Missing outcome data",
177
+ "D4: Measurement of the outcome",
178
+ "D5: Selection of the reported result",
179
+ ]
180
+ levels = ["Low", "Some concerns", "High"]
181
+ elif tool == "ROBINS-I":
182
+ domains = [
183
+ "D1: Confounding",
184
+ "D2: Selection of participants",
185
+ "D3: Classification of interventions",
186
+ "D4: Deviations from intended interventions",
187
+ "D5: Missing data",
188
+ "D6: Measurement of outcomes",
189
+ "D7: Selection of the reported result",
190
+ ]
191
+ levels = ["Low", "Moderate", "Serious", "Critical", "NI"]
192
+ elif tool == "NOS":
193
+ domains = [
194
+ "Selection (0-4 stars)",
195
+ "Comparability (0-2 stars)",
196
+ "Outcome/Exposure (0-3 stars)",
197
+ ]
198
+ levels = ["0-3 (low quality)", "4-6 (moderate)", "7-9 (high quality)"]
199
+ else:
200
+ raise ValueError(f"Unknown tool: {tool}")
201
+
202
+ # 評価テンプレート生成
203
+ assessments = []
204
+ for _, study in studies_df.iterrows():
205
+ assessment = {
206
+ "study_id": study.get("study_id", ""),
207
+ "tool": tool,
208
+ }
209
+ for domain in domains:
210
+ assessment[domain] = "pending"
211
+ assessment["overall"] = "pending"
212
+ assessments.append(assessment)
213
+
214
+ df = pd.DataFrame(assessments)
215
+ print(f"RoB assessment ({tool}): {len(df)} studies, "
216
+ f"{len(domains)} domains")
217
+ return df
218
+ ```
219
+
220
+ ## 4. PRISMA フロー図生成
221
+
222
+ ```python
223
+ def generate_prisma_flowchart(counts, output="figures/prisma_flow.svg"):
224
+ """
225
+ PRISMA 2020 フロー図の自動生成。
226
+
227
+ Parameters:
228
+ counts: dict — {
229
+ "databases": {"PubMed": 500, "Embase": 300, "Cochrane": 100},
230
+ "other_sources": 20,
231
+ "duplicates_removed": 150,
232
+ "title_abstract_screened": 770,
233
+ "title_abstract_excluded": 650,
234
+ "fulltext_assessed": 120,
235
+ "fulltext_excluded": {"not_relevant": 30, "wrong_design": 20, ...},
236
+ "included_qualitative": 70,
237
+ "included_quantitative": 50,
238
+ }
239
+ """
240
+ import os
241
+ os.makedirs(os.path.dirname(output), exist_ok=True)
242
+
243
+ # Mermaid 形式で PRISMA フロー生成
244
+ db_counts = counts.get("databases", {})
245
+ total_db = sum(db_counts.values())
246
+ other = counts.get("other_sources", 0)
247
+ total = total_db + other
248
+ dedup = counts.get("duplicates_removed", 0)
249
+ screened = counts.get("title_abstract_screened", total - dedup)
250
+ ta_excluded = counts.get("title_abstract_excluded", 0)
251
+ ft_assessed = counts.get("fulltext_assessed", screened - ta_excluded)
252
+ ft_excluded = counts.get("fulltext_excluded", {})
253
+ ft_excluded_total = sum(ft_excluded.values()) if isinstance(ft_excluded, dict) else ft_excluded
254
+ qualitative = counts.get("included_qualitative", ft_assessed - ft_excluded_total)
255
+ quantitative = counts.get("included_quantitative", qualitative)
256
+
257
+ mermaid = f"""flowchart TD
258
+ A[Database検索<br>n={total_db}] --> C[重複除去後<br>n={total - dedup}]
259
+ B[その他ソース<br>n={other}] --> C
260
+ C --> D[タイトル/抄録スクリーニング<br>n={screened}]
261
+ D --> E[除外<br>n={ta_excluded}]
262
+ D --> F[全文評価<br>n={ft_assessed}]
263
+ F --> G[除外<br>n={ft_excluded_total}]
264
+ F --> H[質的統合<br>n={qualitative}]
265
+ H --> I[量的統合 (メタアナリシス)<br>n={quantitative}]
266
+ """
267
+
268
+ # SVG として保存 (Mermaid CLI or fallback to text)
269
+ mermaid_file = output.replace(".svg", ".mmd")
270
+ with open(mermaid_file, "w") as f:
271
+ f.write(mermaid)
272
+
273
+ print(f"PRISMA flow: {total} identified → {qualitative} included")
274
+ print(f" Mermaid source: {mermaid_file}")
275
+ return mermaid_file, counts
276
+ ```
277
+
278
+ ## 5. データ抽出テンプレート
279
+
280
+ ```python
281
+ def create_extraction_template(study_type="RCT",
282
+ custom_fields=None):
283
+ """
284
+ 系統的レビュー用データ抽出テンプレート。
285
+
286
+ Parameters:
287
+ study_type: "RCT", "cohort", "cross-sectional", "case-control"
288
+ custom_fields: list — 追加フィールド
289
+ """
290
+ base_fields = [
291
+ "study_id", "first_author", "year", "country",
292
+ "study_design", "sample_size", "population",
293
+ "setting",
294
+ ]
295
+
296
+ if study_type == "RCT":
297
+ type_fields = [
298
+ "intervention", "comparator", "randomization_method",
299
+ "blinding", "follow_up_duration",
300
+ "primary_outcome", "primary_result",
301
+ "secondary_outcomes", "adverse_events",
302
+ "attrition_rate", "itt_analysis",
303
+ ]
304
+ elif study_type == "cohort":
305
+ type_fields = [
306
+ "exposure", "comparator", "follow_up_duration",
307
+ "primary_outcome", "adjustment_variables",
308
+ "effect_measure", "effect_estimate", "ci_95",
309
+ "p_value", "loss_to_follow_up",
310
+ ]
311
+ else:
312
+ type_fields = [
313
+ "exposure", "outcome", "adjustment_variables",
314
+ "effect_measure", "effect_estimate", "ci_95",
315
+ ]
316
+
317
+ all_fields = base_fields + type_fields
318
+ if custom_fields:
319
+ all_fields.extend(custom_fields)
320
+
321
+ template = pd.DataFrame(columns=all_fields)
322
+ print(f"Extraction template ({study_type}): {len(all_fields)} fields")
323
+ return template
324
+ ```
325
+
326
+ ## References
327
+
328
+ ### Output Files
329
+
330
+ | ファイル | 形式 |
331
+ |---|---|
332
+ | `results/search_strategy.json` | JSON |
333
+ | `results/screening_records.csv` | CSV |
334
+ | `results/risk_of_bias.csv` | CSV |
335
+ | `results/data_extraction.csv` | CSV |
336
+ | `figures/prisma_flow.mmd` | Mermaid |
337
+ | `figures/prisma_flow.svg` | SVG |
338
+
339
+ ### 利用可能ツール
340
+
341
+ > PubMed/EuropePMC ツールは `scientific-literature-search` スキルと共有。
342
+
343
+ | カテゴリ | 主要ツール | 用途 |
344
+ |---|---|---|
345
+ | PubMed | `PubMed_search_articles` | 系統的検索 |
346
+ | PubMed | `PubMed_Guidelines_Search` | ガイドライン検索 |
347
+ | EuropePMC | `EuropePMC_search_articles` | 欧州文献検索 |
348
+
349
+ ### 参照スキル
350
+
351
+ | スキル | 関連 |
352
+ |---|---|
353
+ | `scientific-literature-search` | マルチ DB 検索実行 |
354
+ | `scientific-meta-analysis` | 量的統合 (Forest/Funnel プロット) |
355
+ | `scientific-critical-review` | 品質評価・批判レビュー |
356
+ | `scientific-academic-writing` | レビュー論文執筆 |
357
+ | `scientific-scientific-schematics` | PRISMA 図作成 |
358
+
359
+ ### 依存パッケージ
360
+
361
+ `pandas`, `json` (stdlib)
@@ -0,0 +1,325 @@
1
+ ---
2
+ name: scientific-variant-effect-prediction
3
+ description: |
4
+ 計算バリアント効果予測スキル。AlphaMissense (タンパク質構造ベース病原性予測)、
5
+ CADD (統合アノテーションスコア)、SpliceAI (スプライシング影響予測) の
6
+ 3 大予測ツールを統合したコンセンサス病原性評価パイプライン。
7
+ Ensembl VEP 連携、バリアントフィルタリング、優先順位付け対応。
8
+ 9 の ToolUniverse SMCP ツールと連携。
9
+ ---
10
+
11
+ # Scientific Variant Effect Prediction
12
+
13
+ AlphaMissense / CADD / SpliceAI の 3 大計算予測ツールを統合した
14
+ バリアント病原性評価・優先順位付けパイプラインを提供する。
15
+
16
+ ## When to Use
17
+
18
+ - ミスセンスバリアントの病原性を計算予測するとき
19
+ - CADD スコアで全ゲノムバリアントの有害度を評価するとき
20
+ - SpliceAI でスプライシング影響を予測するとき
21
+ - 複数予測ツールのコンセンサススコアを算出するとき
22
+ - WGS/WES バリアントの優先順位付けが必要なとき
23
+
24
+ ---
25
+
26
+ ## Quick Start
27
+
28
+ ## 1. AlphaMissense 病原性予測
29
+
30
+ ```python
31
+ import pandas as pd
32
+ import numpy as np
33
+ import requests
34
+
35
+
36
+ def alphamissense_predict(variants, uniprot_id=None):
37
+ """
38
+ AlphaMissense タンパク質構造ベース病原性予測。
39
+
40
+ Parameters:
41
+ variants: list[dict] — [{"protein": "P12345", "position": 42, "ref": "A", "alt": "V"}]
42
+ uniprot_id: str — タンパク質単位で全ポジションのスコア取得
43
+ """
44
+ results = []
45
+
46
+ if uniprot_id:
47
+ # タンパク質全体のスコアマップ取得
48
+ # AlphaMissense は事前計算済みスコアを提供
49
+ print(f"Fetching AlphaMissense scores for {uniprot_id}...")
50
+ # ToolUniverse 経由: AlphaMissense_get_protein_scores
51
+ # または AlphaMissense_get_residue_scores
52
+
53
+ for var in variants:
54
+ protein = var.get("protein", uniprot_id)
55
+ pos = var["position"]
56
+ ref_aa = var.get("ref", "")
57
+ alt_aa = var.get("alt", "")
58
+
59
+ # スコア分類閾値 (DeepMind 推奨)
60
+ # pathogenic: score > 0.564
61
+ # benign: score < 0.340
62
+ # ambiguous: 0.340 - 0.564
63
+ score = var.get("score", np.nan)
64
+
65
+ if not np.isnan(score):
66
+ if score > 0.564:
67
+ classification = "likely_pathogenic"
68
+ elif score < 0.340:
69
+ classification = "likely_benign"
70
+ else:
71
+ classification = "ambiguous"
72
+ else:
73
+ classification = "unknown"
74
+
75
+ results.append({
76
+ "protein": protein,
77
+ "position": pos,
78
+ "ref_aa": ref_aa,
79
+ "alt_aa": alt_aa,
80
+ "am_score": score,
81
+ "am_class": classification,
82
+ "variant": f"{ref_aa}{pos}{alt_aa}",
83
+ })
84
+
85
+ df = pd.DataFrame(results)
86
+ print(f"AlphaMissense: {len(df)} variants scored")
87
+ return df
88
+ ```
89
+
90
+ ## 2. CADD スコア取得
91
+
92
+ ```python
93
+ def cadd_score_variants(variants, genome="GRCh38", version="v1.7"):
94
+ """
95
+ CADD (Combined Annotation Dependent Depletion) スコア取得。
96
+
97
+ Parameters:
98
+ variants: list[dict] — [{"chr": "1", "pos": 12345, "ref": "A", "alt": "G"}]
99
+ genome: "GRCh37" or "GRCh38"
100
+ version: CADD バージョン
101
+ """
102
+ base_url = f"https://cadd.gs.washington.edu/api/{version}"
103
+
104
+ results = []
105
+ for var in variants:
106
+ chrom = str(var["chr"]).replace("chr", "")
107
+ pos = var["pos"]
108
+ ref = var["ref"]
109
+ alt = var["alt"]
110
+
111
+ # CADD API クエリ
112
+ # ToolUniverse 経由: CADD_get_variant_score
113
+ url = f"{base_url}/{genome}/{chrom}:{pos}"
114
+ try:
115
+ resp = requests.get(url, timeout=30)
116
+ if resp.status_code == 200:
117
+ data = resp.json()
118
+ for hit in data:
119
+ if hit.get("Ref") == ref and hit.get("Alt") == alt:
120
+ raw = hit.get("RawScore", np.nan)
121
+ phred = hit.get("PHRED", np.nan)
122
+ break
123
+ else:
124
+ raw, phred = np.nan, np.nan
125
+ else:
126
+ raw, phred = np.nan, np.nan
127
+ except Exception:
128
+ raw, phred = np.nan, np.nan
129
+
130
+ # CADD PHRED 閾値目安
131
+ # >= 20: top 1% deleterious
132
+ # >= 30: top 0.1% deleterious
133
+ if phred >= 30:
134
+ cadd_class = "highly_deleterious"
135
+ elif phred >= 20:
136
+ cadd_class = "deleterious"
137
+ elif phred >= 10:
138
+ cadd_class = "moderate"
139
+ else:
140
+ cadd_class = "benign"
141
+
142
+ results.append({
143
+ "chr": chrom, "pos": pos, "ref": ref, "alt": alt,
144
+ "cadd_raw": raw,
145
+ "cadd_phred": phred,
146
+ "cadd_class": cadd_class,
147
+ "variant": f"chr{chrom}:{pos}{ref}>{alt}",
148
+ })
149
+
150
+ df = pd.DataFrame(results)
151
+ print(f"CADD: {len(df)} variants scored, "
152
+ f"{(df['cadd_phred'] >= 20).sum()} deleterious (PHRED≥20)")
153
+ return df
154
+ ```
155
+
156
+ ## 3. SpliceAI スプライシング予測
157
+
158
+ ```python
159
+ def spliceai_predict(variants, genome="GRCh38",
160
+ delta_threshold=0.2):
161
+ """
162
+ SpliceAI スプライシング影響予測。
163
+
164
+ Parameters:
165
+ variants: list[dict] — [{"chr": "1", "pos": 12345, "ref": "A", "alt": "G"}]
166
+ delta_threshold: float — Δスコア閾値
167
+ 0.2: high recall, 0.5: recommended, 0.8: high precision
168
+ """
169
+ results = []
170
+
171
+ for var in variants:
172
+ chrom = str(var["chr"]).replace("chr", "")
173
+ pos = var["pos"]
174
+ ref = var["ref"]
175
+ alt = var["alt"]
176
+
177
+ # ToolUniverse 経由: SpliceAI_predict_splice
178
+ # SpliceAI は 4 つの Δスコアを出力:
179
+ # DS_AG: acceptor gain, DS_AL: acceptor loss
180
+ # DS_DG: donor gain, DS_DL: donor loss
181
+ ds_ag = var.get("ds_ag", 0)
182
+ ds_al = var.get("ds_al", 0)
183
+ ds_dg = var.get("ds_dg", 0)
184
+ ds_dl = var.get("ds_dl", 0)
185
+
186
+ max_delta = max(ds_ag, ds_al, ds_dg, ds_dl)
187
+
188
+ if max_delta >= 0.8:
189
+ splice_class = "high_impact"
190
+ elif max_delta >= 0.5:
191
+ splice_class = "moderate_impact"
192
+ elif max_delta >= 0.2:
193
+ splice_class = "low_impact"
194
+ else:
195
+ splice_class = "no_impact"
196
+
197
+ results.append({
198
+ "chr": chrom, "pos": pos, "ref": ref, "alt": alt,
199
+ "ds_acceptor_gain": ds_ag,
200
+ "ds_acceptor_loss": ds_al,
201
+ "ds_donor_gain": ds_dg,
202
+ "ds_donor_loss": ds_dl,
203
+ "max_delta": max_delta,
204
+ "splice_class": splice_class,
205
+ "variant": f"chr{chrom}:{pos}{ref}>{alt}",
206
+ })
207
+
208
+ df = pd.DataFrame(results)
209
+ impacted = (df["max_delta"] >= delta_threshold).sum()
210
+ print(f"SpliceAI: {len(df)} variants, "
211
+ f"{impacted} with splice impact (Δ≥{delta_threshold})")
212
+ return df
213
+ ```
214
+
215
+ ## 4. コンセンサス病原性評価
216
+
217
+ ```python
218
+ def consensus_pathogenicity(am_df, cadd_df, spliceai_df,
219
+ am_threshold=0.564, cadd_threshold=20,
220
+ splice_threshold=0.5):
221
+ """
222
+ AlphaMissense + CADD + SpliceAI のコンセンサス評価。
223
+
224
+ Parameters:
225
+ am_df: AlphaMissense 結果 DataFrame
226
+ cadd_df: CADD 結果 DataFrame
227
+ spliceai_df: SpliceAI 結果 DataFrame
228
+ """
229
+ # バリアント ID で結合
230
+ merged = cadd_df.copy()
231
+
232
+ if len(am_df) > 0:
233
+ merged = merged.merge(
234
+ am_df[["variant", "am_score", "am_class"]],
235
+ on="variant", how="left"
236
+ )
237
+ if len(spliceai_df) > 0:
238
+ merged = merged.merge(
239
+ spliceai_df[["variant", "max_delta", "splice_class"]],
240
+ on="variant", how="left"
241
+ )
242
+
243
+ # コンセンサススコア
244
+ def compute_consensus(row):
245
+ votes = 0
246
+ total = 0
247
+
248
+ if "cadd_phred" in row and not pd.isna(row.get("cadd_phred")):
249
+ total += 1
250
+ if row["cadd_phred"] >= cadd_threshold:
251
+ votes += 1
252
+
253
+ if "am_score" in row and not pd.isna(row.get("am_score")):
254
+ total += 1
255
+ if row["am_score"] >= am_threshold:
256
+ votes += 1
257
+
258
+ if "max_delta" in row and not pd.isna(row.get("max_delta")):
259
+ total += 1
260
+ if row["max_delta"] >= splice_threshold:
261
+ votes += 1
262
+
263
+ if total == 0:
264
+ return "insufficient_data"
265
+ ratio = votes / total
266
+ if ratio >= 0.67:
267
+ return "pathogenic"
268
+ elif ratio >= 0.33:
269
+ return "uncertain"
270
+ else:
271
+ return "benign"
272
+
273
+ merged["consensus"] = merged.apply(compute_consensus, axis=1)
274
+ merged["evidence_count"] = merged.apply(
275
+ lambda r: sum(1 for c in ["cadd_phred", "am_score", "max_delta"]
276
+ if c in r and not pd.isna(r.get(c))), axis=1)
277
+
278
+ print(f"Consensus: {len(merged)} variants — "
279
+ f"{(merged['consensus'] == 'pathogenic').sum()} pathogenic, "
280
+ f"{(merged['consensus'] == 'uncertain').sum()} uncertain, "
281
+ f"{(merged['consensus'] == 'benign').sum()} benign")
282
+ return merged
283
+ ```
284
+
285
+ ## References
286
+
287
+ ### Output Files
288
+
289
+ | ファイル | 形式 |
290
+ |---|---|
291
+ | `results/alphamissense_scores.csv` | CSV |
292
+ | `results/cadd_scores.csv` | CSV |
293
+ | `results/spliceai_scores.csv` | CSV |
294
+ | `results/consensus_pathogenicity.csv` | CSV |
295
+ | `figures/variant_score_distribution.png` | PNG |
296
+
297
+ ### 利用可能ツール
298
+
299
+ > [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
300
+
301
+ | カテゴリ | 主要ツール | 用途 |
302
+ |---|---|---|
303
+ | AlphaMissense | `AlphaMissense_get_protein_scores` | タンパク質全体スコア |
304
+ | AlphaMissense | `AlphaMissense_get_variant_score` | 個別バリアントスコア |
305
+ | AlphaMissense | `AlphaMissense_get_residue_scores` | 残基レベルスコア |
306
+ | CADD | `CADD_get_variant_score` | 個別バリアント PHRED スコア |
307
+ | CADD | `CADD_get_position_scores` | ポジション全体スコア |
308
+ | CADD | `CADD_get_range_scores` | 範囲一括スコア |
309
+ | SpliceAI | `SpliceAI_predict_splice` | スプライシングΔスコア予測 |
310
+ | SpliceAI | `SpliceAI_predict_pangolin` | Pangolin スプライシング予測 |
311
+ | SpliceAI | `SpliceAI_get_max_delta` | 最大Δスコア取得 |
312
+
313
+ ### 参照スキル
314
+
315
+ | スキル | 関連 |
316
+ |---|---|
317
+ | `scientific-variant-interpretation` | ACMG/AMP 臨床バリアント解釈 |
318
+ | `scientific-population-genetics` | gnomAD 集団頻度参照 |
319
+ | `scientific-disease-research` | 疾患-バリアント関連 |
320
+ | `scientific-pharmacogenomics` | PGx バリアント効果 |
321
+ | `scientific-protein-structure-analysis` | 構造→機能影響評価 |
322
+
323
+ ### 依存パッケージ
324
+
325
+ `pandas`, `numpy`, `requests`
@@ -324,3 +324,4 @@ def pgx_recommendation(gene, phenotype, drug):
324
324
  | `scientific-data-preprocessing` | ← バリアントデータの前処理・正規化 |
325
325
  | `scientific-clinical-decision-support` | → バリアント解釈結果の臨床意思決定 |
326
326
  | `scientific-academic-writing` | → 研究成果の論文化 |
327
+ | `scientific-pharmacogenomics` | ← Star アレル・代謝型・薬理ゲノミクス |