@nahisaho/satori 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +111 -45
- package/package.json +1 -1
- package/src/.github/skills/scientific-admet-pharmacokinetics/SKILL.md +1 -0
- package/src/.github/skills/scientific-cancer-genomics/SKILL.md +287 -0
- package/src/.github/skills/scientific-clinical-decision-support/SKILL.md +2 -0
- package/src/.github/skills/scientific-clinical-reporting/SKILL.md +324 -0
- package/src/.github/skills/scientific-computational-materials/SKILL.md +4 -4
- package/src/.github/skills/scientific-deep-learning/SKILL.md +1 -0
- package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +1 -0
- package/src/.github/skills/scientific-grant-writing/SKILL.md +2 -0
- package/src/.github/skills/scientific-lab-data-management/SKILL.md +2 -2
- package/src/.github/skills/scientific-literature-search/SKILL.md +443 -0
- package/src/.github/skills/scientific-meta-analysis/SKILL.md +10 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +288 -0
- package/src/.github/skills/scientific-molecular-docking/SKILL.md +303 -0
- package/src/.github/skills/scientific-pathway-enrichment/SKILL.md +449 -0
- package/src/.github/skills/scientific-pharmacovigilance/SKILL.md +3 -0
- package/src/.github/skills/scientific-population-genetics/SKILL.md +2 -0
- package/src/.github/skills/scientific-precision-oncology/SKILL.md +1 -0
- package/src/.github/skills/scientific-protein-domain-family/SKILL.md +369 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +352 -0
- package/src/.github/skills/scientific-scientific-schematics/SKILL.md +2 -2
- package/src/.github/skills/scientific-single-cell-genomics/SKILL.md +2 -0
- package/src/.github/skills/scientific-survival-clinical/SKILL.md +11 -0
- package/src/.github/skills/scientific-systematic-review/SKILL.md +361 -0
- package/src/.github/skills/scientific-variant-effect-prediction/SKILL.md +325 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +1 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-systematic-review
|
|
3
|
+
description: |
|
|
4
|
+
PRISMA 2020 準拠系統的レビュースキル。マルチ DB 検索戦略立案
|
|
5
|
+
(PubMed/Embase/Cochrane/Web of Science)、スクリーニングワークフロー
|
|
6
|
+
(タイトル/抄録→全文)、品質評価 (RoB 2/ROBINS-I/NOS)、データ抽出
|
|
7
|
+
テンプレート、PRISMA フロー図自動生成パイプライン。
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific Systematic Review
|
|
11
|
+
|
|
12
|
+
PRISMA 2020 ガイドラインに準拠した
|
|
13
|
+
系統的レビュー・メタアナリシスの方法論パイプラインを提供する。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- 系統的レビューの検索戦略を設計するとき
|
|
18
|
+
- タイトル/抄録スクリーニングのワークフローが必要なとき
|
|
19
|
+
- バイアスリスク (RoB 2, ROBINS-I, NOS) 評価を行うとき
|
|
20
|
+
- PRISMA フロー図を生成するとき
|
|
21
|
+
- 系統的レビューのデータ抽出テーブルを作成するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. 検索戦略設計 (PICO → クエリ)
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import pandas as pd
|
|
31
|
+
import json
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def design_search_strategy(pico, databases=None):
|
|
35
|
+
"""
|
|
36
|
+
PICO フレームワークから検索戦略を設計。
|
|
37
|
+
|
|
38
|
+
Parameters:
|
|
39
|
+
pico: dict — {"P": "...", "I": "...", "C": "...", "O": "..."}
|
|
40
|
+
databases: list — ["PubMed", "Embase", "Cochrane", "Web of Science"]
|
|
41
|
+
"""
|
|
42
|
+
if databases is None:
|
|
43
|
+
databases = ["PubMed", "Embase", "Cochrane"]
|
|
44
|
+
|
|
45
|
+
strategy = {
|
|
46
|
+
"pico": pico,
|
|
47
|
+
"databases": databases,
|
|
48
|
+
"search_blocks": [],
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# P (Population) ブロック
|
|
52
|
+
p_terms = pico.get("P", "").split(",")
|
|
53
|
+
p_block = {
|
|
54
|
+
"concept": "Population",
|
|
55
|
+
"terms": [t.strip() for t in p_terms],
|
|
56
|
+
"mesh_terms": [], # 手動で MeSH を追加
|
|
57
|
+
"boolean": "OR",
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# I (Intervention) ブロック
|
|
61
|
+
i_terms = pico.get("I", "").split(",")
|
|
62
|
+
i_block = {
|
|
63
|
+
"concept": "Intervention",
|
|
64
|
+
"terms": [t.strip() for t in i_terms],
|
|
65
|
+
"mesh_terms": [],
|
|
66
|
+
"boolean": "OR",
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# C (Comparison) ブロック
|
|
70
|
+
c_terms = pico.get("C", "").split(",")
|
|
71
|
+
c_block = {
|
|
72
|
+
"concept": "Comparison",
|
|
73
|
+
"terms": [t.strip() for t in c_terms if t.strip()],
|
|
74
|
+
"boolean": "OR",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# O (Outcome) ブロック
|
|
78
|
+
o_terms = pico.get("O", "").split(",")
|
|
79
|
+
o_block = {
|
|
80
|
+
"concept": "Outcome",
|
|
81
|
+
"terms": [t.strip() for t in o_terms],
|
|
82
|
+
"boolean": "OR",
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
strategy["search_blocks"] = [p_block, i_block]
|
|
86
|
+
if c_block["terms"]:
|
|
87
|
+
strategy["search_blocks"].append(c_block)
|
|
88
|
+
if o_block["terms"]:
|
|
89
|
+
strategy["search_blocks"].append(o_block)
|
|
90
|
+
|
|
91
|
+
# PubMed クエリ生成
|
|
92
|
+
pubmed_parts = []
|
|
93
|
+
for block in strategy["search_blocks"]:
|
|
94
|
+
terms = [f'"{t}"' for t in block["terms"]]
|
|
95
|
+
mesh = [f'"{m}"[MeSH]' for m in block.get("mesh_terms", [])]
|
|
96
|
+
all_terms = terms + mesh
|
|
97
|
+
pubmed_parts.append(f"({' OR '.join(all_terms)})")
|
|
98
|
+
|
|
99
|
+
strategy["pubmed_query"] = " AND ".join(pubmed_parts)
|
|
100
|
+
|
|
101
|
+
print(f"Search strategy: {len(strategy['search_blocks'])} blocks, "
|
|
102
|
+
f"{len(databases)} databases")
|
|
103
|
+
print(f"PubMed query: {strategy['pubmed_query'][:200]}...")
|
|
104
|
+
return strategy
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## 2. スクリーニングワークフロー
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
def screening_workflow(records_df, stage="title_abstract",
|
|
111
|
+
inclusion_criteria=None,
|
|
112
|
+
exclusion_criteria=None):
|
|
113
|
+
"""
|
|
114
|
+
スクリーニングワークフロー管理。
|
|
115
|
+
|
|
116
|
+
Parameters:
|
|
117
|
+
records_df: DataFrame — columns: [id, title, abstract, source]
|
|
118
|
+
stage: "title_abstract" or "fulltext"
|
|
119
|
+
inclusion_criteria: list — 適格基準
|
|
120
|
+
exclusion_criteria: list — 除外基準
|
|
121
|
+
"""
|
|
122
|
+
if inclusion_criteria is None:
|
|
123
|
+
inclusion_criteria = [
|
|
124
|
+
"Published in English or Japanese",
|
|
125
|
+
"Human subjects",
|
|
126
|
+
"Original research (not review/editorial)",
|
|
127
|
+
]
|
|
128
|
+
if exclusion_criteria is None:
|
|
129
|
+
exclusion_criteria = [
|
|
130
|
+
"Case reports (n < 5)",
|
|
131
|
+
"Conference abstracts only",
|
|
132
|
+
"Animal studies only",
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
# 重複除去
|
|
136
|
+
initial_count = len(records_df)
|
|
137
|
+
records_df = records_df.drop_duplicates(subset=["title"], keep="first")
|
|
138
|
+
duplicates_removed = initial_count - len(records_df)
|
|
139
|
+
|
|
140
|
+
# スクリーニング結果テンプレート
|
|
141
|
+
records_df["decision"] = "pending"
|
|
142
|
+
records_df["excluded_reason"] = ""
|
|
143
|
+
records_df["screener"] = ""
|
|
144
|
+
|
|
145
|
+
result = {
|
|
146
|
+
"stage": stage,
|
|
147
|
+
"total_records": initial_count,
|
|
148
|
+
"duplicates_removed": duplicates_removed,
|
|
149
|
+
"unique_records": len(records_df),
|
|
150
|
+
"inclusion_criteria": inclusion_criteria,
|
|
151
|
+
"exclusion_criteria": exclusion_criteria,
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
print(f"Screening ({stage}): {initial_count} records → "
|
|
155
|
+
f"{duplicates_removed} duplicates removed → "
|
|
156
|
+
f"{len(records_df)} to screen")
|
|
157
|
+
return records_df, result
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## 3. バイアスリスク評価
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
def risk_of_bias_assessment(studies_df, tool="RoB2"):
|
|
164
|
+
"""
|
|
165
|
+
バイアスリスク評価。
|
|
166
|
+
|
|
167
|
+
Parameters:
|
|
168
|
+
studies_df: DataFrame — columns: [study_id, study_type, ...]
|
|
169
|
+
tool: "RoB2" (RCT), "ROBINS-I" (非ランダム化), "NOS" (観察研究)
|
|
170
|
+
"""
|
|
171
|
+
if tool == "RoB2":
|
|
172
|
+
# Cochrane RoB 2 — 5 ドメイン
|
|
173
|
+
domains = [
|
|
174
|
+
"D1: Randomization process",
|
|
175
|
+
"D2: Deviations from interventions",
|
|
176
|
+
"D3: Missing outcome data",
|
|
177
|
+
"D4: Measurement of the outcome",
|
|
178
|
+
"D5: Selection of the reported result",
|
|
179
|
+
]
|
|
180
|
+
levels = ["Low", "Some concerns", "High"]
|
|
181
|
+
elif tool == "ROBINS-I":
|
|
182
|
+
domains = [
|
|
183
|
+
"D1: Confounding",
|
|
184
|
+
"D2: Selection of participants",
|
|
185
|
+
"D3: Classification of interventions",
|
|
186
|
+
"D4: Deviations from intended interventions",
|
|
187
|
+
"D5: Missing data",
|
|
188
|
+
"D6: Measurement of outcomes",
|
|
189
|
+
"D7: Selection of the reported result",
|
|
190
|
+
]
|
|
191
|
+
levels = ["Low", "Moderate", "Serious", "Critical", "NI"]
|
|
192
|
+
elif tool == "NOS":
|
|
193
|
+
domains = [
|
|
194
|
+
"Selection (0-4 stars)",
|
|
195
|
+
"Comparability (0-2 stars)",
|
|
196
|
+
"Outcome/Exposure (0-3 stars)",
|
|
197
|
+
]
|
|
198
|
+
levels = ["0-3 (low quality)", "4-6 (moderate)", "7-9 (high quality)"]
|
|
199
|
+
else:
|
|
200
|
+
raise ValueError(f"Unknown tool: {tool}")
|
|
201
|
+
|
|
202
|
+
# 評価テンプレート生成
|
|
203
|
+
assessments = []
|
|
204
|
+
for _, study in studies_df.iterrows():
|
|
205
|
+
assessment = {
|
|
206
|
+
"study_id": study.get("study_id", ""),
|
|
207
|
+
"tool": tool,
|
|
208
|
+
}
|
|
209
|
+
for domain in domains:
|
|
210
|
+
assessment[domain] = "pending"
|
|
211
|
+
assessment["overall"] = "pending"
|
|
212
|
+
assessments.append(assessment)
|
|
213
|
+
|
|
214
|
+
df = pd.DataFrame(assessments)
|
|
215
|
+
print(f"RoB assessment ({tool}): {len(df)} studies, "
|
|
216
|
+
f"{len(domains)} domains")
|
|
217
|
+
return df
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## 4. PRISMA フロー図生成
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
def generate_prisma_flowchart(counts, output="figures/prisma_flow.svg"):
|
|
224
|
+
"""
|
|
225
|
+
PRISMA 2020 フロー図の自動生成。
|
|
226
|
+
|
|
227
|
+
Parameters:
|
|
228
|
+
counts: dict — {
|
|
229
|
+
"databases": {"PubMed": 500, "Embase": 300, "Cochrane": 100},
|
|
230
|
+
"other_sources": 20,
|
|
231
|
+
"duplicates_removed": 150,
|
|
232
|
+
"title_abstract_screened": 770,
|
|
233
|
+
"title_abstract_excluded": 650,
|
|
234
|
+
"fulltext_assessed": 120,
|
|
235
|
+
"fulltext_excluded": {"not_relevant": 30, "wrong_design": 20, ...},
|
|
236
|
+
"included_qualitative": 70,
|
|
237
|
+
"included_quantitative": 50,
|
|
238
|
+
}
|
|
239
|
+
"""
|
|
240
|
+
import os
|
|
241
|
+
os.makedirs(os.path.dirname(output), exist_ok=True)
|
|
242
|
+
|
|
243
|
+
# Mermaid 形式で PRISMA フロー生成
|
|
244
|
+
db_counts = counts.get("databases", {})
|
|
245
|
+
total_db = sum(db_counts.values())
|
|
246
|
+
other = counts.get("other_sources", 0)
|
|
247
|
+
total = total_db + other
|
|
248
|
+
dedup = counts.get("duplicates_removed", 0)
|
|
249
|
+
screened = counts.get("title_abstract_screened", total - dedup)
|
|
250
|
+
ta_excluded = counts.get("title_abstract_excluded", 0)
|
|
251
|
+
ft_assessed = counts.get("fulltext_assessed", screened - ta_excluded)
|
|
252
|
+
ft_excluded = counts.get("fulltext_excluded", {})
|
|
253
|
+
ft_excluded_total = sum(ft_excluded.values()) if isinstance(ft_excluded, dict) else ft_excluded
|
|
254
|
+
qualitative = counts.get("included_qualitative", ft_assessed - ft_excluded_total)
|
|
255
|
+
quantitative = counts.get("included_quantitative", qualitative)
|
|
256
|
+
|
|
257
|
+
mermaid = f"""flowchart TD
|
|
258
|
+
A[Database検索<br>n={total_db}] --> C[重複除去後<br>n={total - dedup}]
|
|
259
|
+
B[その他ソース<br>n={other}] --> C
|
|
260
|
+
C --> D[タイトル/抄録スクリーニング<br>n={screened}]
|
|
261
|
+
D --> E[除外<br>n={ta_excluded}]
|
|
262
|
+
D --> F[全文評価<br>n={ft_assessed}]
|
|
263
|
+
F --> G[除外<br>n={ft_excluded_total}]
|
|
264
|
+
F --> H[質的統合<br>n={qualitative}]
|
|
265
|
+
H --> I[量的統合 (メタアナリシス)<br>n={quantitative}]
|
|
266
|
+
"""
|
|
267
|
+
|
|
268
|
+
# SVG として保存 (Mermaid CLI or fallback to text)
|
|
269
|
+
mermaid_file = output.replace(".svg", ".mmd")
|
|
270
|
+
with open(mermaid_file, "w") as f:
|
|
271
|
+
f.write(mermaid)
|
|
272
|
+
|
|
273
|
+
print(f"PRISMA flow: {total} identified → {qualitative} included")
|
|
274
|
+
print(f" Mermaid source: {mermaid_file}")
|
|
275
|
+
return mermaid_file, counts
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## 5. データ抽出テンプレート
|
|
279
|
+
|
|
280
|
+
```python
|
|
281
|
+
def create_extraction_template(study_type="RCT",
|
|
282
|
+
custom_fields=None):
|
|
283
|
+
"""
|
|
284
|
+
系統的レビュー用データ抽出テンプレート。
|
|
285
|
+
|
|
286
|
+
Parameters:
|
|
287
|
+
study_type: "RCT", "cohort", "cross-sectional", "case-control"
|
|
288
|
+
custom_fields: list — 追加フィールド
|
|
289
|
+
"""
|
|
290
|
+
base_fields = [
|
|
291
|
+
"study_id", "first_author", "year", "country",
|
|
292
|
+
"study_design", "sample_size", "population",
|
|
293
|
+
"setting",
|
|
294
|
+
]
|
|
295
|
+
|
|
296
|
+
if study_type == "RCT":
|
|
297
|
+
type_fields = [
|
|
298
|
+
"intervention", "comparator", "randomization_method",
|
|
299
|
+
"blinding", "follow_up_duration",
|
|
300
|
+
"primary_outcome", "primary_result",
|
|
301
|
+
"secondary_outcomes", "adverse_events",
|
|
302
|
+
"attrition_rate", "itt_analysis",
|
|
303
|
+
]
|
|
304
|
+
elif study_type == "cohort":
|
|
305
|
+
type_fields = [
|
|
306
|
+
"exposure", "comparator", "follow_up_duration",
|
|
307
|
+
"primary_outcome", "adjustment_variables",
|
|
308
|
+
"effect_measure", "effect_estimate", "ci_95",
|
|
309
|
+
"p_value", "loss_to_follow_up",
|
|
310
|
+
]
|
|
311
|
+
else:
|
|
312
|
+
type_fields = [
|
|
313
|
+
"exposure", "outcome", "adjustment_variables",
|
|
314
|
+
"effect_measure", "effect_estimate", "ci_95",
|
|
315
|
+
]
|
|
316
|
+
|
|
317
|
+
all_fields = base_fields + type_fields
|
|
318
|
+
if custom_fields:
|
|
319
|
+
all_fields.extend(custom_fields)
|
|
320
|
+
|
|
321
|
+
template = pd.DataFrame(columns=all_fields)
|
|
322
|
+
print(f"Extraction template ({study_type}): {len(all_fields)} fields")
|
|
323
|
+
return template
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
## References
|
|
327
|
+
|
|
328
|
+
### Output Files
|
|
329
|
+
|
|
330
|
+
| ファイル | 形式 |
|
|
331
|
+
|---|---|
|
|
332
|
+
| `results/search_strategy.json` | JSON |
|
|
333
|
+
| `results/screening_records.csv` | CSV |
|
|
334
|
+
| `results/risk_of_bias.csv` | CSV |
|
|
335
|
+
| `results/data_extraction.csv` | CSV |
|
|
336
|
+
| `figures/prisma_flow.mmd` | Mermaid |
|
|
337
|
+
| `figures/prisma_flow.svg` | SVG |
|
|
338
|
+
|
|
339
|
+
### 利用可能ツール
|
|
340
|
+
|
|
341
|
+
> PubMed/EuropePMC ツールは `scientific-literature-search` スキルと共有。
|
|
342
|
+
|
|
343
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
344
|
+
|---|---|---|
|
|
345
|
+
| PubMed | `PubMed_search_articles` | 系統的検索 |
|
|
346
|
+
| PubMed | `PubMed_Guidelines_Search` | ガイドライン検索 |
|
|
347
|
+
| EuropePMC | `EuropePMC_search_articles` | 欧州文献検索 |
|
|
348
|
+
|
|
349
|
+
### 参照スキル
|
|
350
|
+
|
|
351
|
+
| スキル | 関連 |
|
|
352
|
+
|---|---|
|
|
353
|
+
| `scientific-literature-search` | マルチ DB 検索実行 |
|
|
354
|
+
| `scientific-meta-analysis` | 量的統合 (Forest/Funnel プロット) |
|
|
355
|
+
| `scientific-critical-review` | 品質評価・批判レビュー |
|
|
356
|
+
| `scientific-academic-writing` | レビュー論文執筆 |
|
|
357
|
+
| `scientific-scientific-schematics` | PRISMA 図作成 |
|
|
358
|
+
|
|
359
|
+
### 依存パッケージ
|
|
360
|
+
|
|
361
|
+
`pandas`, `json` (stdlib)
|
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-variant-effect-prediction
|
|
3
|
+
description: |
|
|
4
|
+
計算バリアント効果予測スキル。AlphaMissense (タンパク質構造ベース病原性予測)、
|
|
5
|
+
CADD (統合アノテーションスコア)、SpliceAI (スプライシング影響予測) の
|
|
6
|
+
3 大予測ツールを統合したコンセンサス病原性評価パイプライン。
|
|
7
|
+
Ensembl VEP 連携、バリアントフィルタリング、優先順位付け対応。
|
|
8
|
+
9 の ToolUniverse SMCP ツールと連携。
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Scientific Variant Effect Prediction
|
|
12
|
+
|
|
13
|
+
AlphaMissense / CADD / SpliceAI の 3 大計算予測ツールを統合した
|
|
14
|
+
バリアント病原性評価・優先順位付けパイプラインを提供する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- ミスセンスバリアントの病原性を計算予測するとき
|
|
19
|
+
- CADD スコアで全ゲノムバリアントの有害度を評価するとき
|
|
20
|
+
- SpliceAI でスプライシング影響を予測するとき
|
|
21
|
+
- 複数予測ツールのコンセンサススコアを算出するとき
|
|
22
|
+
- WGS/WES バリアントの優先順位付けが必要なとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. AlphaMissense 病原性予測
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import pandas as pd
|
|
32
|
+
import numpy as np
|
|
33
|
+
import requests
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def alphamissense_predict(variants, uniprot_id=None):
|
|
37
|
+
"""
|
|
38
|
+
AlphaMissense タンパク質構造ベース病原性予測。
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
variants: list[dict] — [{"protein": "P12345", "position": 42, "ref": "A", "alt": "V"}]
|
|
42
|
+
uniprot_id: str — タンパク質単位で全ポジションのスコア取得
|
|
43
|
+
"""
|
|
44
|
+
results = []
|
|
45
|
+
|
|
46
|
+
if uniprot_id:
|
|
47
|
+
# タンパク質全体のスコアマップ取得
|
|
48
|
+
# AlphaMissense は事前計算済みスコアを提供
|
|
49
|
+
print(f"Fetching AlphaMissense scores for {uniprot_id}...")
|
|
50
|
+
# ToolUniverse 経由: AlphaMissense_get_protein_scores
|
|
51
|
+
# または AlphaMissense_get_residue_scores
|
|
52
|
+
|
|
53
|
+
for var in variants:
|
|
54
|
+
protein = var.get("protein", uniprot_id)
|
|
55
|
+
pos = var["position"]
|
|
56
|
+
ref_aa = var.get("ref", "")
|
|
57
|
+
alt_aa = var.get("alt", "")
|
|
58
|
+
|
|
59
|
+
# スコア分類閾値 (DeepMind 推奨)
|
|
60
|
+
# pathogenic: score > 0.564
|
|
61
|
+
# benign: score < 0.340
|
|
62
|
+
# ambiguous: 0.340 - 0.564
|
|
63
|
+
score = var.get("score", np.nan)
|
|
64
|
+
|
|
65
|
+
if not np.isnan(score):
|
|
66
|
+
if score > 0.564:
|
|
67
|
+
classification = "likely_pathogenic"
|
|
68
|
+
elif score < 0.340:
|
|
69
|
+
classification = "likely_benign"
|
|
70
|
+
else:
|
|
71
|
+
classification = "ambiguous"
|
|
72
|
+
else:
|
|
73
|
+
classification = "unknown"
|
|
74
|
+
|
|
75
|
+
results.append({
|
|
76
|
+
"protein": protein,
|
|
77
|
+
"position": pos,
|
|
78
|
+
"ref_aa": ref_aa,
|
|
79
|
+
"alt_aa": alt_aa,
|
|
80
|
+
"am_score": score,
|
|
81
|
+
"am_class": classification,
|
|
82
|
+
"variant": f"{ref_aa}{pos}{alt_aa}",
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
df = pd.DataFrame(results)
|
|
86
|
+
print(f"AlphaMissense: {len(df)} variants scored")
|
|
87
|
+
return df
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## 2. CADD スコア取得
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
def cadd_score_variants(variants, genome="GRCh38", version="v1.7"):
|
|
94
|
+
"""
|
|
95
|
+
CADD (Combined Annotation Dependent Depletion) スコア取得。
|
|
96
|
+
|
|
97
|
+
Parameters:
|
|
98
|
+
variants: list[dict] — [{"chr": "1", "pos": 12345, "ref": "A", "alt": "G"}]
|
|
99
|
+
genome: "GRCh37" or "GRCh38"
|
|
100
|
+
version: CADD バージョン
|
|
101
|
+
"""
|
|
102
|
+
base_url = f"https://cadd.gs.washington.edu/api/{version}"
|
|
103
|
+
|
|
104
|
+
results = []
|
|
105
|
+
for var in variants:
|
|
106
|
+
chrom = str(var["chr"]).replace("chr", "")
|
|
107
|
+
pos = var["pos"]
|
|
108
|
+
ref = var["ref"]
|
|
109
|
+
alt = var["alt"]
|
|
110
|
+
|
|
111
|
+
# CADD API クエリ
|
|
112
|
+
# ToolUniverse 経由: CADD_get_variant_score
|
|
113
|
+
url = f"{base_url}/{genome}/{chrom}:{pos}"
|
|
114
|
+
try:
|
|
115
|
+
resp = requests.get(url, timeout=30)
|
|
116
|
+
if resp.status_code == 200:
|
|
117
|
+
data = resp.json()
|
|
118
|
+
for hit in data:
|
|
119
|
+
if hit.get("Ref") == ref and hit.get("Alt") == alt:
|
|
120
|
+
raw = hit.get("RawScore", np.nan)
|
|
121
|
+
phred = hit.get("PHRED", np.nan)
|
|
122
|
+
break
|
|
123
|
+
else:
|
|
124
|
+
raw, phred = np.nan, np.nan
|
|
125
|
+
else:
|
|
126
|
+
raw, phred = np.nan, np.nan
|
|
127
|
+
except Exception:
|
|
128
|
+
raw, phred = np.nan, np.nan
|
|
129
|
+
|
|
130
|
+
# CADD PHRED 閾値目安
|
|
131
|
+
# >= 20: top 1% deleterious
|
|
132
|
+
# >= 30: top 0.1% deleterious
|
|
133
|
+
if phred >= 30:
|
|
134
|
+
cadd_class = "highly_deleterious"
|
|
135
|
+
elif phred >= 20:
|
|
136
|
+
cadd_class = "deleterious"
|
|
137
|
+
elif phred >= 10:
|
|
138
|
+
cadd_class = "moderate"
|
|
139
|
+
else:
|
|
140
|
+
cadd_class = "benign"
|
|
141
|
+
|
|
142
|
+
results.append({
|
|
143
|
+
"chr": chrom, "pos": pos, "ref": ref, "alt": alt,
|
|
144
|
+
"cadd_raw": raw,
|
|
145
|
+
"cadd_phred": phred,
|
|
146
|
+
"cadd_class": cadd_class,
|
|
147
|
+
"variant": f"chr{chrom}:{pos}{ref}>{alt}",
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
df = pd.DataFrame(results)
|
|
151
|
+
print(f"CADD: {len(df)} variants scored, "
|
|
152
|
+
f"{(df['cadd_phred'] >= 20).sum()} deleterious (PHRED≥20)")
|
|
153
|
+
return df
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## 3. SpliceAI スプライシング予測
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
def spliceai_predict(variants, genome="GRCh38",
|
|
160
|
+
delta_threshold=0.2):
|
|
161
|
+
"""
|
|
162
|
+
SpliceAI スプライシング影響予測。
|
|
163
|
+
|
|
164
|
+
Parameters:
|
|
165
|
+
variants: list[dict] — [{"chr": "1", "pos": 12345, "ref": "A", "alt": "G"}]
|
|
166
|
+
delta_threshold: float — Δスコア閾値
|
|
167
|
+
0.2: high recall, 0.5: recommended, 0.8: high precision
|
|
168
|
+
"""
|
|
169
|
+
results = []
|
|
170
|
+
|
|
171
|
+
for var in variants:
|
|
172
|
+
chrom = str(var["chr"]).replace("chr", "")
|
|
173
|
+
pos = var["pos"]
|
|
174
|
+
ref = var["ref"]
|
|
175
|
+
alt = var["alt"]
|
|
176
|
+
|
|
177
|
+
# ToolUniverse 経由: SpliceAI_predict_splice
|
|
178
|
+
# SpliceAI は 4 つの Δスコアを出力:
|
|
179
|
+
# DS_AG: acceptor gain, DS_AL: acceptor loss
|
|
180
|
+
# DS_DG: donor gain, DS_DL: donor loss
|
|
181
|
+
ds_ag = var.get("ds_ag", 0)
|
|
182
|
+
ds_al = var.get("ds_al", 0)
|
|
183
|
+
ds_dg = var.get("ds_dg", 0)
|
|
184
|
+
ds_dl = var.get("ds_dl", 0)
|
|
185
|
+
|
|
186
|
+
max_delta = max(ds_ag, ds_al, ds_dg, ds_dl)
|
|
187
|
+
|
|
188
|
+
if max_delta >= 0.8:
|
|
189
|
+
splice_class = "high_impact"
|
|
190
|
+
elif max_delta >= 0.5:
|
|
191
|
+
splice_class = "moderate_impact"
|
|
192
|
+
elif max_delta >= 0.2:
|
|
193
|
+
splice_class = "low_impact"
|
|
194
|
+
else:
|
|
195
|
+
splice_class = "no_impact"
|
|
196
|
+
|
|
197
|
+
results.append({
|
|
198
|
+
"chr": chrom, "pos": pos, "ref": ref, "alt": alt,
|
|
199
|
+
"ds_acceptor_gain": ds_ag,
|
|
200
|
+
"ds_acceptor_loss": ds_al,
|
|
201
|
+
"ds_donor_gain": ds_dg,
|
|
202
|
+
"ds_donor_loss": ds_dl,
|
|
203
|
+
"max_delta": max_delta,
|
|
204
|
+
"splice_class": splice_class,
|
|
205
|
+
"variant": f"chr{chrom}:{pos}{ref}>{alt}",
|
|
206
|
+
})
|
|
207
|
+
|
|
208
|
+
df = pd.DataFrame(results)
|
|
209
|
+
impacted = (df["max_delta"] >= delta_threshold).sum()
|
|
210
|
+
print(f"SpliceAI: {len(df)} variants, "
|
|
211
|
+
f"{impacted} with splice impact (Δ≥{delta_threshold})")
|
|
212
|
+
return df
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## 4. コンセンサス病原性評価
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
def consensus_pathogenicity(am_df, cadd_df, spliceai_df,
|
|
219
|
+
am_threshold=0.564, cadd_threshold=20,
|
|
220
|
+
splice_threshold=0.5):
|
|
221
|
+
"""
|
|
222
|
+
AlphaMissense + CADD + SpliceAI のコンセンサス評価。
|
|
223
|
+
|
|
224
|
+
Parameters:
|
|
225
|
+
am_df: AlphaMissense 結果 DataFrame
|
|
226
|
+
cadd_df: CADD 結果 DataFrame
|
|
227
|
+
spliceai_df: SpliceAI 結果 DataFrame
|
|
228
|
+
"""
|
|
229
|
+
# バリアント ID で結合
|
|
230
|
+
merged = cadd_df.copy()
|
|
231
|
+
|
|
232
|
+
if len(am_df) > 0:
|
|
233
|
+
merged = merged.merge(
|
|
234
|
+
am_df[["variant", "am_score", "am_class"]],
|
|
235
|
+
on="variant", how="left"
|
|
236
|
+
)
|
|
237
|
+
if len(spliceai_df) > 0:
|
|
238
|
+
merged = merged.merge(
|
|
239
|
+
spliceai_df[["variant", "max_delta", "splice_class"]],
|
|
240
|
+
on="variant", how="left"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# コンセンサススコア
|
|
244
|
+
def compute_consensus(row):
|
|
245
|
+
votes = 0
|
|
246
|
+
total = 0
|
|
247
|
+
|
|
248
|
+
if "cadd_phred" in row and not pd.isna(row.get("cadd_phred")):
|
|
249
|
+
total += 1
|
|
250
|
+
if row["cadd_phred"] >= cadd_threshold:
|
|
251
|
+
votes += 1
|
|
252
|
+
|
|
253
|
+
if "am_score" in row and not pd.isna(row.get("am_score")):
|
|
254
|
+
total += 1
|
|
255
|
+
if row["am_score"] >= am_threshold:
|
|
256
|
+
votes += 1
|
|
257
|
+
|
|
258
|
+
if "max_delta" in row and not pd.isna(row.get("max_delta")):
|
|
259
|
+
total += 1
|
|
260
|
+
if row["max_delta"] >= splice_threshold:
|
|
261
|
+
votes += 1
|
|
262
|
+
|
|
263
|
+
if total == 0:
|
|
264
|
+
return "insufficient_data"
|
|
265
|
+
ratio = votes / total
|
|
266
|
+
if ratio >= 0.67:
|
|
267
|
+
return "pathogenic"
|
|
268
|
+
elif ratio >= 0.33:
|
|
269
|
+
return "uncertain"
|
|
270
|
+
else:
|
|
271
|
+
return "benign"
|
|
272
|
+
|
|
273
|
+
merged["consensus"] = merged.apply(compute_consensus, axis=1)
|
|
274
|
+
merged["evidence_count"] = merged.apply(
|
|
275
|
+
lambda r: sum(1 for c in ["cadd_phred", "am_score", "max_delta"]
|
|
276
|
+
if c in r and not pd.isna(r.get(c))), axis=1)
|
|
277
|
+
|
|
278
|
+
print(f"Consensus: {len(merged)} variants — "
|
|
279
|
+
f"{(merged['consensus'] == 'pathogenic').sum()} pathogenic, "
|
|
280
|
+
f"{(merged['consensus'] == 'uncertain').sum()} uncertain, "
|
|
281
|
+
f"{(merged['consensus'] == 'benign').sum()} benign")
|
|
282
|
+
return merged
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
## References
|
|
286
|
+
|
|
287
|
+
### Output Files
|
|
288
|
+
|
|
289
|
+
| ファイル | 形式 |
|
|
290
|
+
|---|---|
|
|
291
|
+
| `results/alphamissense_scores.csv` | CSV |
|
|
292
|
+
| `results/cadd_scores.csv` | CSV |
|
|
293
|
+
| `results/spliceai_scores.csv` | CSV |
|
|
294
|
+
| `results/consensus_pathogenicity.csv` | CSV |
|
|
295
|
+
| `figures/variant_score_distribution.png` | PNG |
|
|
296
|
+
|
|
297
|
+
### 利用可能ツール
|
|
298
|
+
|
|
299
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
300
|
+
|
|
301
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
302
|
+
|---|---|---|
|
|
303
|
+
| AlphaMissense | `AlphaMissense_get_protein_scores` | タンパク質全体スコア |
|
|
304
|
+
| AlphaMissense | `AlphaMissense_get_variant_score` | 個別バリアントスコア |
|
|
305
|
+
| AlphaMissense | `AlphaMissense_get_residue_scores` | 残基レベルスコア |
|
|
306
|
+
| CADD | `CADD_get_variant_score` | 個別バリアント PHRED スコア |
|
|
307
|
+
| CADD | `CADD_get_position_scores` | ポジション全体スコア |
|
|
308
|
+
| CADD | `CADD_get_range_scores` | 範囲一括スコア |
|
|
309
|
+
| SpliceAI | `SpliceAI_predict_splice` | スプライシングΔスコア予測 |
|
|
310
|
+
| SpliceAI | `SpliceAI_predict_pangolin` | Pangolin スプライシング予測 |
|
|
311
|
+
| SpliceAI | `SpliceAI_get_max_delta` | 最大Δスコア取得 |
|
|
312
|
+
|
|
313
|
+
### 参照スキル
|
|
314
|
+
|
|
315
|
+
| スキル | 関連 |
|
|
316
|
+
|---|---|
|
|
317
|
+
| `scientific-variant-interpretation` | ACMG/AMP 臨床バリアント解釈 |
|
|
318
|
+
| `scientific-population-genetics` | gnomAD 集団頻度参照 |
|
|
319
|
+
| `scientific-disease-research` | 疾患-バリアント関連 |
|
|
320
|
+
| `scientific-pharmacogenomics` | PGx バリアント効果 |
|
|
321
|
+
| `scientific-protein-structure-analysis` | 構造→機能影響評価 |
|
|
322
|
+
|
|
323
|
+
### 依存パッケージ
|
|
324
|
+
|
|
325
|
+
`pandas`, `numpy`, `requests`
|
|
@@ -324,3 +324,4 @@ def pgx_recommendation(gene, phenotype, drug):
|
|
|
324
324
|
| `scientific-data-preprocessing` | ← バリアントデータの前処理・正規化 |
|
|
325
325
|
| `scientific-clinical-decision-support` | → バリアント解釈結果の臨床意思決定 |
|
|
326
326
|
| `scientific-academic-writing` | → 研究成果の論文化 |
|
|
327
|
+
| `scientific-pharmacogenomics` | ← Star アレル・代謝型・薬理ゲノミクス |
|