@nahisaho/satori 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -39
- package/package.json +1 -1
- package/src/.github/skills/scientific-admet-pharmacokinetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-biobank-cohort/SKILL.md +268 -0
- package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +4 -0
- package/src/.github/skills/scientific-cancer-genomics/SKILL.md +7 -0
- package/src/.github/skills/scientific-cell-line-resources/SKILL.md +4 -0
- package/src/.github/skills/scientific-cellxgene-census/SKILL.md +257 -0
- package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +4 -0
- package/src/.github/skills/scientific-clingen-curation/SKILL.md +258 -0
- package/src/.github/skills/scientific-clinical-nlp/SKILL.md +250 -0
- package/src/.github/skills/scientific-drug-repurposing/SKILL.md +4 -0
- package/src/.github/skills/scientific-drug-target-profiling/SKILL.md +4 -0
- package/src/.github/skills/scientific-gdc-portal/SKILL.md +280 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +5 -2
- package/src/.github/skills/scientific-hgnc-nomenclature/SKILL.md +282 -0
- package/src/.github/skills/scientific-human-cell-atlas/SKILL.md +3 -0
- package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +4 -0
- package/src/.github/skills/scientific-immunoinformatics/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolic-flux/SKILL.md +306 -0
- package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolomics-network/SKILL.md +311 -0
- package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-monarch-ontology/SKILL.md +260 -0
- package/src/.github/skills/scientific-pharmacogenomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +10 -0
- package/src/.github/skills/scientific-pharos-targets/SKILL.md +276 -0
- package/src/.github/skills/scientific-precision-oncology/SKILL.md +4 -0
- package/src/.github/skills/scientific-protein-structure-analysis/SKILL.md +4 -0
- package/src/.github/skills/scientific-spatial-multiomics/SKILL.md +293 -0
- package/src/.github/skills/scientific-stitch-chemical-network/SKILL.md +318 -0
- package/src/.github/skills/scientific-string-network-api/SKILL.md +4 -0
- package/src/.github/skills/scientific-variant-effect-prediction/SKILL.md +7 -0
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-cellxgene-census
|
|
3
|
+
description: |
|
|
4
|
+
CELLxGENE Census 大規模シングルセルアトラススキル。
|
|
5
|
+
CZ CELLxGENE Census API によるヒト/マウス全アトラスの
|
|
6
|
+
メタデータ検索・遺伝子発現クエリ・セルタイプ分布解析・
|
|
7
|
+
データセット横断統合パイプライン。
|
|
8
|
+
ToolUniverse 連携: cellxgene_census。
|
|
9
|
+
tu_tools:
|
|
10
|
+
- key: cellxgene_census
|
|
11
|
+
name: CELLxGENE Census
|
|
12
|
+
description: 大規模シングルセルアトラスデータアクセス API
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
# Scientific CELLxGENE Census
|
|
16
|
+
|
|
17
|
+
CZ CELLxGENE Census API を活用した大規模シングルセルアトラスの
|
|
18
|
+
メタデータ検索・遺伝子発現クエリ・セルタイプ分布解析・
|
|
19
|
+
データセット横断統合パイプラインを提供する。
|
|
20
|
+
|
|
21
|
+
## When to Use
|
|
22
|
+
|
|
23
|
+
- 数千万細胞規模のシングルセルアトラスから特定組織/疾患のデータを抽出するとき
|
|
24
|
+
- 組織横断的なセルタイプ分布を比較するとき
|
|
25
|
+
- 特定遺伝子の全アトラスにわたる発現パターンを検索するとき
|
|
26
|
+
- Census データセットをメタデータベースでフィルタリングするとき
|
|
27
|
+
- AnnData/Sparse 行列として大規模データを効率的に取得するとき
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
## 1. Census メタデータ検索
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import cellxgene_census
|
|
37
|
+
import pandas as pd
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def census_datasets(organism="Homo sapiens",
|
|
41
|
+
tissue=None, disease=None):
|
|
42
|
+
"""
|
|
43
|
+
CELLxGENE Census — データセットメタデータ検索。
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
organism: str — 生物種
|
|
47
|
+
tissue: str — 組織フィルタ
|
|
48
|
+
disease: str — 疾患フィルタ
|
|
49
|
+
"""
|
|
50
|
+
with cellxgene_census.open_soma() as census:
|
|
51
|
+
datasets = census["census_info"]["datasets"].read(
|
|
52
|
+
).concat().to_pandas()
|
|
53
|
+
|
|
54
|
+
if tissue:
|
|
55
|
+
datasets = datasets[
|
|
56
|
+
datasets["dataset_title"].str.contains(
|
|
57
|
+
tissue, case=False, na=False)]
|
|
58
|
+
if disease:
|
|
59
|
+
datasets = datasets[
|
|
60
|
+
datasets["dataset_title"].str.contains(
|
|
61
|
+
disease, case=False, na=False)]
|
|
62
|
+
|
|
63
|
+
print(f"Census datasets: {len(datasets)} matched")
|
|
64
|
+
return datasets
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def census_cell_metadata(organism="Homo sapiens",
|
|
68
|
+
tissue=None,
|
|
69
|
+
cell_type=None,
|
|
70
|
+
max_cells=10000):
|
|
71
|
+
"""
|
|
72
|
+
CELLxGENE Census — セルメタデータ取得。
|
|
73
|
+
|
|
74
|
+
Parameters:
|
|
75
|
+
organism: str — 生物種
|
|
76
|
+
tissue: str — 組織フィルタ
|
|
77
|
+
cell_type: str — セルタイプフィルタ
|
|
78
|
+
max_cells: int — 最大セル数
|
|
79
|
+
"""
|
|
80
|
+
obs_filters = []
|
|
81
|
+
if tissue:
|
|
82
|
+
obs_filters.append(
|
|
83
|
+
f"tissue_general == '{tissue}'")
|
|
84
|
+
if cell_type:
|
|
85
|
+
obs_filters.append(
|
|
86
|
+
f"cell_type == '{cell_type}'")
|
|
87
|
+
value_filter = " and ".join(obs_filters) \
|
|
88
|
+
if obs_filters else None
|
|
89
|
+
|
|
90
|
+
with cellxgene_census.open_soma() as census:
|
|
91
|
+
obs_df = cellxgene_census.get_obs(
|
|
92
|
+
census, organism,
|
|
93
|
+
value_filter=value_filter,
|
|
94
|
+
column_names=[
|
|
95
|
+
"cell_type", "tissue_general",
|
|
96
|
+
"disease", "sex", "development_stage",
|
|
97
|
+
"dataset_id", "assay"],
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
df = obs_df.head(max_cells)
|
|
101
|
+
print(f"Census cells: {len(df)} retrieved "
|
|
102
|
+
f"(filter: {value_filter})")
|
|
103
|
+
return df
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## 2. 遺伝子発現クエリ
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
def census_gene_expression(organism="Homo sapiens",
|
|
110
|
+
gene_symbols=None,
|
|
111
|
+
tissue=None,
|
|
112
|
+
max_cells=5000):
|
|
113
|
+
"""
|
|
114
|
+
CELLxGENE Census — 遺伝子発現データ取得。
|
|
115
|
+
|
|
116
|
+
Parameters:
|
|
117
|
+
organism: str — 生物種
|
|
118
|
+
gene_symbols: list[str] — 遺伝子シンボルリスト
|
|
119
|
+
tissue: str — 組織フィルタ
|
|
120
|
+
max_cells: int — 最大セル数
|
|
121
|
+
"""
|
|
122
|
+
obs_filter = (f"tissue_general == '{tissue}'"
|
|
123
|
+
if tissue else None)
|
|
124
|
+
var_filter = None
|
|
125
|
+
if gene_symbols:
|
|
126
|
+
genes_str = "', '".join(gene_symbols)
|
|
127
|
+
var_filter = f"feature_name in ['{genes_str}']"
|
|
128
|
+
|
|
129
|
+
with cellxgene_census.open_soma() as census:
|
|
130
|
+
adata = cellxgene_census.get_anndata(
|
|
131
|
+
census, organism,
|
|
132
|
+
obs_value_filter=obs_filter,
|
|
133
|
+
var_value_filter=var_filter,
|
|
134
|
+
obs_column_names=[
|
|
135
|
+
"cell_type", "tissue_general",
|
|
136
|
+
"disease"],
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if max_cells and len(adata) > max_cells:
|
|
140
|
+
import numpy as np
|
|
141
|
+
idx = np.random.choice(
|
|
142
|
+
len(adata), max_cells, replace=False)
|
|
143
|
+
adata = adata[idx]
|
|
144
|
+
|
|
145
|
+
print(f"Census expression: {adata.shape[0]} cells × "
|
|
146
|
+
f"{adata.shape[1]} genes")
|
|
147
|
+
return adata
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## 3. セルタイプ分布解析
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
def celltype_distribution(organism="Homo sapiens",
|
|
154
|
+
tissue=None):
|
|
155
|
+
"""
|
|
156
|
+
組織別セルタイプ分布解析。
|
|
157
|
+
|
|
158
|
+
Parameters:
|
|
159
|
+
organism: str — 生物種
|
|
160
|
+
tissue: str — 組織フィルタ
|
|
161
|
+
"""
|
|
162
|
+
obs_filter = (f"tissue_general == '{tissue}'"
|
|
163
|
+
if tissue else None)
|
|
164
|
+
|
|
165
|
+
with cellxgene_census.open_soma() as census:
|
|
166
|
+
obs_df = cellxgene_census.get_obs(
|
|
167
|
+
census, organism,
|
|
168
|
+
value_filter=obs_filter,
|
|
169
|
+
column_names=["cell_type",
|
|
170
|
+
"tissue_general"],
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# セルタイプカウント
|
|
174
|
+
ct_counts = obs_df["cell_type"].value_counts()
|
|
175
|
+
ct_df = pd.DataFrame({
|
|
176
|
+
"cell_type": ct_counts.index,
|
|
177
|
+
"count": ct_counts.values,
|
|
178
|
+
"fraction": ct_counts.values / ct_counts.sum(),
|
|
179
|
+
})
|
|
180
|
+
|
|
181
|
+
print(f"Cell types: {len(ct_df)} types, "
|
|
182
|
+
f"total {ct_counts.sum()} cells")
|
|
183
|
+
return ct_df
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## 4. Census 統合パイプライン
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
def census_pipeline(organism="Homo sapiens",
|
|
190
|
+
tissue=None,
|
|
191
|
+
gene_symbols=None,
|
|
192
|
+
output_dir="results"):
|
|
193
|
+
"""
|
|
194
|
+
CELLxGENE Census 統合パイプライン。
|
|
195
|
+
|
|
196
|
+
Parameters:
|
|
197
|
+
organism: str — 生物種
|
|
198
|
+
tissue: str — 組織
|
|
199
|
+
gene_symbols: list[str] — 対象遺伝子
|
|
200
|
+
output_dir: str — 出力ディレクトリ
|
|
201
|
+
"""
|
|
202
|
+
from pathlib import Path
|
|
203
|
+
output_dir = Path(output_dir)
|
|
204
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
205
|
+
|
|
206
|
+
# 1) データセットメタデータ
|
|
207
|
+
datasets = census_datasets(organism, tissue)
|
|
208
|
+
datasets.to_csv(output_dir / "census_datasets.csv",
|
|
209
|
+
index=False)
|
|
210
|
+
|
|
211
|
+
# 2) セルタイプ分布
|
|
212
|
+
ct_dist = celltype_distribution(organism, tissue)
|
|
213
|
+
ct_dist.to_csv(
|
|
214
|
+
output_dir / "celltype_distribution.csv",
|
|
215
|
+
index=False)
|
|
216
|
+
|
|
217
|
+
# 3) 遺伝子発現 (指定時)
|
|
218
|
+
adata = None
|
|
219
|
+
if gene_symbols:
|
|
220
|
+
adata = census_gene_expression(
|
|
221
|
+
organism, gene_symbols, tissue)
|
|
222
|
+
adata.write_h5ad(
|
|
223
|
+
output_dir / "census_expression.h5ad")
|
|
224
|
+
|
|
225
|
+
print(f"Census pipeline → {output_dir}")
|
|
226
|
+
return {
|
|
227
|
+
"datasets": datasets,
|
|
228
|
+
"celltype_dist": ct_dist,
|
|
229
|
+
"adata": adata,
|
|
230
|
+
}
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## ToolUniverse 連携
|
|
236
|
+
|
|
237
|
+
| TU Key | ツール名 | 連携内容 |
|
|
238
|
+
|--------|---------|---------|
|
|
239
|
+
| `cellxgene_census` | CELLxGENE Census | 大規模シングルセルアトラス API |
|
|
240
|
+
|
|
241
|
+
## パイプライン統合
|
|
242
|
+
|
|
243
|
+
```
|
|
244
|
+
human-cell-atlas → cellxgene-census → single-cell-genomics
|
|
245
|
+
(HCA Portal) (Census 大規模) (Scanpy 解析)
|
|
246
|
+
│ │ ↓
|
|
247
|
+
spatial-multiomics ────┘ scvi-integration
|
|
248
|
+
(空間統合) (scVI/scANVI)
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## パイプライン出力
|
|
252
|
+
|
|
253
|
+
| ファイル | 説明 | 次スキル |
|
|
254
|
+
|---------|------|---------|
|
|
255
|
+
| `results/census_datasets.csv` | データセット一覧 | → human-cell-atlas |
|
|
256
|
+
| `results/celltype_distribution.csv` | セルタイプ分布 | → single-cell-genomics |
|
|
257
|
+
| `results/census_expression.h5ad` | 発現行列 (AnnData) | → scvi-integration |
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-clingen-curation
|
|
3
|
+
description: |
|
|
4
|
+
ClinGen 臨床ゲノム資源キュレーションスキル。ClinGen API に
|
|
5
|
+
よる遺伝子-疾患バリディティ、臨床アクショナビリティ、
|
|
6
|
+
投与量感受性、バリアントレベルエビデンス評価パイプライン。
|
|
7
|
+
ToolUniverse 連携: clingen。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: clingen
|
|
10
|
+
name: ClinGen
|
|
11
|
+
description: ClinGen 臨床ゲノムリソース キュレーションデータ
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Scientific ClinGen Curation
|
|
15
|
+
|
|
16
|
+
ClinGen (Clinical Genome Resource) API を活用した
|
|
17
|
+
遺伝子-疾患バリディティ分類・臨床アクショナビリティ
|
|
18
|
+
スコアリング・投与量感受性評価・バリアントキュレーション
|
|
19
|
+
パイプラインを提供する。
|
|
20
|
+
|
|
21
|
+
## When to Use
|
|
22
|
+
|
|
23
|
+
- 遺伝子-疾患関連のエビデンスレベルを評価するとき
|
|
24
|
+
- 臨床アクショナビリティ (介入可能性) を判定するとき
|
|
25
|
+
- ハプロ不全/トリプロ感受性を評価するとき
|
|
26
|
+
- ClinGen キュレーション済みバリアント分類を取得するとき
|
|
27
|
+
- ACMG ガイドラインに基づくバリアント解釈を行うとき
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
## 1. 遺伝子-疾患バリディティ
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import requests
|
|
37
|
+
import pandas as pd
|
|
38
|
+
|
|
39
|
+
CLINGEN_BASE = "https://search.clinicalgenome.org/kb"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def clingen_gene_validity(gene_symbol):
|
|
43
|
+
"""
|
|
44
|
+
ClinGen — 遺伝子-疾患バリディティ分類取得。
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
gene_symbol: str — 遺伝子シンボル (例: "BRCA1")
|
|
48
|
+
"""
|
|
49
|
+
url = (f"{CLINGEN_BASE}/gene-validity/"
|
|
50
|
+
f"?search={gene_symbol}&format=json")
|
|
51
|
+
resp = requests.get(url, timeout=30)
|
|
52
|
+
resp.raise_for_status()
|
|
53
|
+
data = resp.json()
|
|
54
|
+
|
|
55
|
+
results = data if isinstance(data, list) else \
|
|
56
|
+
data.get("results", [])
|
|
57
|
+
|
|
58
|
+
rows = []
|
|
59
|
+
for item in results:
|
|
60
|
+
rows.append({
|
|
61
|
+
"gene": item.get("gene", {}).get(
|
|
62
|
+
"symbol", gene_symbol),
|
|
63
|
+
"disease": item.get("disease", {}).get(
|
|
64
|
+
"label", ""),
|
|
65
|
+
"classification": item.get(
|
|
66
|
+
"classification", ""),
|
|
67
|
+
"moi": item.get("moi", ""),
|
|
68
|
+
"sop": item.get("sopVersion", ""),
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
df = pd.DataFrame(rows)
|
|
72
|
+
print(f"ClinGen validity: {gene_symbol} → "
|
|
73
|
+
f"{len(df)} gene-disease pairs")
|
|
74
|
+
return df
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def clingen_gene_validity_batch(gene_symbols):
|
|
78
|
+
"""
|
|
79
|
+
ClinGen — 複数遺伝子バリディティバッチ取得。
|
|
80
|
+
|
|
81
|
+
Parameters:
|
|
82
|
+
gene_symbols: list[str] — 遺伝子シンボルリスト
|
|
83
|
+
"""
|
|
84
|
+
all_results = []
|
|
85
|
+
for sym in gene_symbols:
|
|
86
|
+
df = clingen_gene_validity(sym)
|
|
87
|
+
if not df.empty:
|
|
88
|
+
all_results.append(df)
|
|
89
|
+
if all_results:
|
|
90
|
+
combined = pd.concat(all_results,
|
|
91
|
+
ignore_index=True)
|
|
92
|
+
cls_dist = combined["classification"].value_counts()
|
|
93
|
+
print(f"Validity distribution: "
|
|
94
|
+
f"{cls_dist.to_dict()}")
|
|
95
|
+
return combined
|
|
96
|
+
return pd.DataFrame()
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## 2. 投与量感受性
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
def clingen_dosage_sensitivity(gene_symbol):
|
|
103
|
+
"""
|
|
104
|
+
ClinGen — 投与量感受性 (haplo/triplo) 評価取得。
|
|
105
|
+
|
|
106
|
+
Parameters:
|
|
107
|
+
gene_symbol: str — 遺伝子シンボル
|
|
108
|
+
"""
|
|
109
|
+
url = (f"{CLINGEN_BASE}/gene-dosage/"
|
|
110
|
+
f"?search={gene_symbol}&format=json")
|
|
111
|
+
resp = requests.get(url, timeout=30)
|
|
112
|
+
resp.raise_for_status()
|
|
113
|
+
data = resp.json()
|
|
114
|
+
|
|
115
|
+
results = data if isinstance(data, list) else \
|
|
116
|
+
data.get("results", [])
|
|
117
|
+
|
|
118
|
+
rows = []
|
|
119
|
+
for item in results:
|
|
120
|
+
rows.append({
|
|
121
|
+
"gene": item.get("gene", {}).get(
|
|
122
|
+
"symbol", gene_symbol),
|
|
123
|
+
"haplo_score": item.get(
|
|
124
|
+
"haploinsufficiency", {}).get(
|
|
125
|
+
"score", ""),
|
|
126
|
+
"haplo_label": item.get(
|
|
127
|
+
"haploinsufficiency", {}).get(
|
|
128
|
+
"label", ""),
|
|
129
|
+
"triplo_score": item.get(
|
|
130
|
+
"triplosensitivity", {}).get(
|
|
131
|
+
"score", ""),
|
|
132
|
+
"triplo_label": item.get(
|
|
133
|
+
"triplosensitivity", {}).get(
|
|
134
|
+
"label", ""),
|
|
135
|
+
})
|
|
136
|
+
|
|
137
|
+
df = pd.DataFrame(rows)
|
|
138
|
+
print(f"ClinGen dosage: {gene_symbol} → "
|
|
139
|
+
f"{len(df)} entries")
|
|
140
|
+
return df
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## 3. 臨床アクショナビリティ
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
def clingen_actionability(gene_symbol):
|
|
147
|
+
"""
|
|
148
|
+
ClinGen — 臨床アクショナビリティスコア取得。
|
|
149
|
+
|
|
150
|
+
Parameters:
|
|
151
|
+
gene_symbol: str — 遺伝子シンボル
|
|
152
|
+
"""
|
|
153
|
+
url = (f"{CLINGEN_BASE}/actionability/"
|
|
154
|
+
f"?search={gene_symbol}&format=json")
|
|
155
|
+
resp = requests.get(url, timeout=30)
|
|
156
|
+
resp.raise_for_status()
|
|
157
|
+
data = resp.json()
|
|
158
|
+
|
|
159
|
+
results = data if isinstance(data, list) else \
|
|
160
|
+
data.get("results", [])
|
|
161
|
+
|
|
162
|
+
rows = []
|
|
163
|
+
for item in results:
|
|
164
|
+
rows.append({
|
|
165
|
+
"gene": item.get("gene", {}).get(
|
|
166
|
+
"symbol", gene_symbol),
|
|
167
|
+
"disease": item.get("disease", {}).get(
|
|
168
|
+
"label", ""),
|
|
169
|
+
"classification": item.get(
|
|
170
|
+
"classification", ""),
|
|
171
|
+
"date": item.get("date", ""),
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
df = pd.DataFrame(rows)
|
|
175
|
+
print(f"ClinGen actionability: {gene_symbol} → "
|
|
176
|
+
f"{len(df)} entries")
|
|
177
|
+
return df
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## 4. ClinGen 統合パイプライン
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
def clingen_pipeline(gene_symbols,
|
|
184
|
+
output_dir="results"):
|
|
185
|
+
"""
|
|
186
|
+
ClinGen 統合キュレーションパイプライン。
|
|
187
|
+
|
|
188
|
+
Parameters:
|
|
189
|
+
gene_symbols: list[str] — 遺伝子シンボルリスト
|
|
190
|
+
output_dir: str — 出力ディレクトリ
|
|
191
|
+
"""
|
|
192
|
+
from pathlib import Path
|
|
193
|
+
output_dir = Path(output_dir)
|
|
194
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
195
|
+
|
|
196
|
+
# 1) Gene-disease validity
|
|
197
|
+
validity_df = clingen_gene_validity_batch(
|
|
198
|
+
gene_symbols)
|
|
199
|
+
if not validity_df.empty:
|
|
200
|
+
validity_df.to_csv(
|
|
201
|
+
output_dir / "clingen_validity.csv",
|
|
202
|
+
index=False)
|
|
203
|
+
|
|
204
|
+
# 2) Dosage sensitivity
|
|
205
|
+
dosage_results = []
|
|
206
|
+
for sym in gene_symbols:
|
|
207
|
+
dos = clingen_dosage_sensitivity(sym)
|
|
208
|
+
if not dos.empty:
|
|
209
|
+
dosage_results.append(dos)
|
|
210
|
+
if dosage_results:
|
|
211
|
+
dosage_df = pd.concat(dosage_results,
|
|
212
|
+
ignore_index=True)
|
|
213
|
+
dosage_df.to_csv(
|
|
214
|
+
output_dir / "clingen_dosage.csv",
|
|
215
|
+
index=False)
|
|
216
|
+
|
|
217
|
+
# 3) Actionability
|
|
218
|
+
action_results = []
|
|
219
|
+
for sym in gene_symbols:
|
|
220
|
+
act = clingen_actionability(sym)
|
|
221
|
+
if not act.empty:
|
|
222
|
+
action_results.append(act)
|
|
223
|
+
if action_results:
|
|
224
|
+
action_df = pd.concat(action_results,
|
|
225
|
+
ignore_index=True)
|
|
226
|
+
action_df.to_csv(
|
|
227
|
+
output_dir / "clingen_actionability.csv",
|
|
228
|
+
index=False)
|
|
229
|
+
|
|
230
|
+
print(f"ClinGen pipeline → {output_dir}")
|
|
231
|
+
return {"validity": validity_df}
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## ToolUniverse 連携
|
|
237
|
+
|
|
238
|
+
| TU Key | ツール名 | 連携内容 |
|
|
239
|
+
|--------|---------|---------|
|
|
240
|
+
| `clingen` | ClinGen | ClinGen 臨床ゲノムリソース キュレーションデータ |
|
|
241
|
+
|
|
242
|
+
## パイプライン統合
|
|
243
|
+
|
|
244
|
+
```
|
|
245
|
+
variant-interpretation → clingen-curation → clinical-decision-support
|
|
246
|
+
(ClinVar/ACMG) (GDV/DOS/ACT) (臨床判断支援)
|
|
247
|
+
│ │ ↓
|
|
248
|
+
variant-effect-prediction ─┘ pharmacogenomics
|
|
249
|
+
(SpliceAI/CADD) (PGx 処方)
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
## パイプライン出力
|
|
253
|
+
|
|
254
|
+
| ファイル | 説明 | 次スキル |
|
|
255
|
+
|---------|------|---------|
|
|
256
|
+
| `results/clingen_validity.csv` | 遺伝子-疾患バリディティ | → genetic-counseling |
|
|
257
|
+
| `results/clingen_dosage.csv` | 投与量感受性 | → cnv-analysis |
|
|
258
|
+
| `results/clingen_actionability.csv` | 臨床介入可能性 | → precision-medicine |
|