@nahisaho/satori 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -39
- package/package.json +1 -1
- package/src/.github/skills/scientific-admet-pharmacokinetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-biobank-cohort/SKILL.md +268 -0
- package/src/.github/skills/scientific-cancer-genomics/SKILL.md +7 -0
- package/src/.github/skills/scientific-cell-line-resources/SKILL.md +4 -0
- package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +4 -0
- package/src/.github/skills/scientific-civic-evidence/SKILL.md +292 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +4 -0
- package/src/.github/skills/scientific-depmap-dependencies/SKILL.md +239 -0
- package/src/.github/skills/scientific-disease-research/SKILL.md +4 -0
- package/src/.github/skills/scientific-drug-target-profiling/SKILL.md +4 -0
- package/src/.github/skills/scientific-drugbank-resources/SKILL.md +269 -0
- package/src/.github/skills/scientific-gdc-portal/SKILL.md +280 -0
- package/src/.github/skills/scientific-gnomad-variants/SKILL.md +356 -0
- package/src/.github/skills/scientific-immunoinformatics/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolic-flux/SKILL.md +306 -0
- package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +4 -0
- package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-monarch-ontology/SKILL.md +260 -0
- package/src/.github/skills/scientific-opentargets-genetics/SKILL.md +299 -0
- package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +10 -0
- package/src/.github/skills/scientific-precision-oncology/SKILL.md +4 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +4 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-rcsb-pdb-search/SKILL.md +280 -0
- package/src/.github/skills/scientific-reactome-pathways/SKILL.md +242 -0
- package/src/.github/skills/scientific-spatial-multiomics/SKILL.md +293 -0
- package/src/.github/skills/scientific-stitch-chemical-network/SKILL.md +318 -0
- package/src/.github/skills/scientific-string-network-api/SKILL.md +4 -0
- package/src/.github/skills/scientific-uniprot-proteome/SKILL.md +273 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +4 -0
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-drugbank-resources
|
|
3
|
+
description: |
|
|
4
|
+
DrugBank リソーススキル。DrugBank API を用いた薬剤記述・
|
|
5
|
+
薬理情報・標的タンパク質・薬物相互作用検索。
|
|
6
|
+
ToolUniverse 連携: drugbank。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: drugbank
|
|
9
|
+
name: DrugBank
|
|
10
|
+
description: 薬剤データベース API
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific DrugBank Resources
|
|
14
|
+
|
|
15
|
+
DrugBank API を活用した薬剤記述・薬理情報 (MOA)・標的タンパク質
|
|
16
|
+
検索・薬物相互作用 (DDI) パイプラインを提供する。
|
|
17
|
+
|
|
18
|
+
## When to Use
|
|
19
|
+
|
|
20
|
+
- 薬剤の基本情報 (名前・分類・構造) を検索するとき
|
|
21
|
+
- 薬理メカニズム (MOA) を調べるとき
|
|
22
|
+
- 標的タンパク質から薬剤を逆引き検索するとき
|
|
23
|
+
- 薬物相互作用 (DDI) を確認するとき
|
|
24
|
+
- 薬剤の ADMET プロパティを取得するとき
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
## 1. 薬剤検索・基本情報
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import requests
|
|
34
|
+
import pandas as pd
|
|
35
|
+
|
|
36
|
+
DRUGBANK_API = "https://api.drugbank.com/v1"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def drugbank_search(query, limit=25, api_key=None):
|
|
40
|
+
"""
|
|
41
|
+
DrugBank — 薬剤テキスト検索。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
query: str — 検索クエリ (例: "imatinib")
|
|
45
|
+
limit: int — 最大結果数
|
|
46
|
+
api_key: str — DrugBank API キー
|
|
47
|
+
"""
|
|
48
|
+
headers = {}
|
|
49
|
+
if api_key:
|
|
50
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
51
|
+
|
|
52
|
+
url = f"{DRUGBANK_API}/drugs"
|
|
53
|
+
params = {"q": query, "per_page": limit}
|
|
54
|
+
resp = requests.get(url, params=params,
|
|
55
|
+
headers=headers, timeout=30)
|
|
56
|
+
resp.raise_for_status()
|
|
57
|
+
data = resp.json()
|
|
58
|
+
|
|
59
|
+
rows = []
|
|
60
|
+
for d in data:
|
|
61
|
+
rows.append({
|
|
62
|
+
"drugbank_id": d.get("drugbank_id", ""),
|
|
63
|
+
"name": d.get("name", ""),
|
|
64
|
+
"cas_number": d.get("cas_number", ""),
|
|
65
|
+
"drug_type": d.get("type", ""),
|
|
66
|
+
"state": d.get("state", ""),
|
|
67
|
+
"description": (d.get("description", "")
|
|
68
|
+
[:200]),
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
df = pd.DataFrame(rows)
|
|
72
|
+
print(f"DrugBank search: '{query}' → {len(df)} drugs")
|
|
73
|
+
return df
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def drugbank_drug_detail(drugbank_id, api_key=None):
|
|
77
|
+
"""
|
|
78
|
+
DrugBank — 薬剤詳細取得。
|
|
79
|
+
|
|
80
|
+
Parameters:
|
|
81
|
+
drugbank_id: str — DrugBank ID (例: "DB01254")
|
|
82
|
+
api_key: str — DrugBank API キー
|
|
83
|
+
"""
|
|
84
|
+
headers = {}
|
|
85
|
+
if api_key:
|
|
86
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
87
|
+
|
|
88
|
+
url = f"{DRUGBANK_API}/drugs/{drugbank_id}"
|
|
89
|
+
resp = requests.get(url, headers=headers, timeout=30)
|
|
90
|
+
resp.raise_for_status()
|
|
91
|
+
data = resp.json()
|
|
92
|
+
|
|
93
|
+
result = {
|
|
94
|
+
"drugbank_id": data.get("drugbank_id", ""),
|
|
95
|
+
"name": data.get("name", ""),
|
|
96
|
+
"description": data.get("description", ""),
|
|
97
|
+
"indication": data.get("indication", ""),
|
|
98
|
+
"pharmacodynamics": data.get(
|
|
99
|
+
"pharmacodynamics", ""),
|
|
100
|
+
"mechanism_of_action": data.get(
|
|
101
|
+
"mechanism_of_action", ""),
|
|
102
|
+
"absorption": data.get("absorption", ""),
|
|
103
|
+
"half_life": data.get("half_life", ""),
|
|
104
|
+
"protein_binding": data.get(
|
|
105
|
+
"protein_binding", ""),
|
|
106
|
+
"molecular_weight": data.get(
|
|
107
|
+
"average_molecular_weight", ""),
|
|
108
|
+
}
|
|
109
|
+
return result
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## 2. 標的タンパク質検索
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
def drugbank_targets(drugbank_id, api_key=None):
|
|
116
|
+
"""
|
|
117
|
+
DrugBank — 薬剤の標的タンパク質取得。
|
|
118
|
+
|
|
119
|
+
Parameters:
|
|
120
|
+
drugbank_id: str — DrugBank ID
|
|
121
|
+
api_key: str — DrugBank API キー
|
|
122
|
+
"""
|
|
123
|
+
headers = {}
|
|
124
|
+
if api_key:
|
|
125
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
126
|
+
|
|
127
|
+
url = f"{DRUGBANK_API}/drugs/{drugbank_id}/targets"
|
|
128
|
+
resp = requests.get(url, headers=headers, timeout=30)
|
|
129
|
+
resp.raise_for_status()
|
|
130
|
+
data = resp.json()
|
|
131
|
+
|
|
132
|
+
rows = []
|
|
133
|
+
for t in data:
|
|
134
|
+
polypeptide = t.get("polypeptide", {}) or {}
|
|
135
|
+
rows.append({
|
|
136
|
+
"drugbank_id": drugbank_id,
|
|
137
|
+
"target_name": t.get("name", ""),
|
|
138
|
+
"organism": t.get("organism", ""),
|
|
139
|
+
"known_action": t.get("known_action", ""),
|
|
140
|
+
"gene_name": polypeptide.get(
|
|
141
|
+
"gene_name", ""),
|
|
142
|
+
"uniprot_id": polypeptide.get(
|
|
143
|
+
"external_identifiers", {}).get(
|
|
144
|
+
"UniProtKB", ""),
|
|
145
|
+
})
|
|
146
|
+
|
|
147
|
+
df = pd.DataFrame(rows)
|
|
148
|
+
print(f"DrugBank targets: {drugbank_id} "
|
|
149
|
+
f"→ {len(df)} targets")
|
|
150
|
+
return df
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## 3. 薬物相互作用 (DDI)
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
def drugbank_interactions(drugbank_id, api_key=None):
|
|
157
|
+
"""
|
|
158
|
+
DrugBank — 薬物相互作用取得。
|
|
159
|
+
|
|
160
|
+
Parameters:
|
|
161
|
+
drugbank_id: str — DrugBank ID
|
|
162
|
+
api_key: str — DrugBank API キー
|
|
163
|
+
"""
|
|
164
|
+
headers = {}
|
|
165
|
+
if api_key:
|
|
166
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
167
|
+
|
|
168
|
+
url = (f"{DRUGBANK_API}/drugs/"
|
|
169
|
+
f"{drugbank_id}/drug_interactions")
|
|
170
|
+
resp = requests.get(url, headers=headers, timeout=30)
|
|
171
|
+
resp.raise_for_status()
|
|
172
|
+
data = resp.json()
|
|
173
|
+
|
|
174
|
+
rows = []
|
|
175
|
+
for inter in data:
|
|
176
|
+
rows.append({
|
|
177
|
+
"drug_a": drugbank_id,
|
|
178
|
+
"drug_b_id": inter.get(
|
|
179
|
+
"drugbank_id", ""),
|
|
180
|
+
"drug_b_name": inter.get("name", ""),
|
|
181
|
+
"description": inter.get(
|
|
182
|
+
"description", "")[:300],
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
df = pd.DataFrame(rows)
|
|
186
|
+
print(f"DrugBank DDI: {drugbank_id} "
|
|
187
|
+
f"→ {len(df)} interactions")
|
|
188
|
+
return df
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## 4. DrugBank 統合パイプライン
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
def drugbank_pipeline(drug_name, api_key=None,
|
|
195
|
+
output_dir="results"):
|
|
196
|
+
"""
|
|
197
|
+
DrugBank 統合パイプライン。
|
|
198
|
+
|
|
199
|
+
Parameters:
|
|
200
|
+
drug_name: str — 薬剤名 (例: "imatinib")
|
|
201
|
+
api_key: str — DrugBank API キー
|
|
202
|
+
output_dir: str — 出力ディレクトリ
|
|
203
|
+
"""
|
|
204
|
+
from pathlib import Path
|
|
205
|
+
output_dir = Path(output_dir)
|
|
206
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
207
|
+
|
|
208
|
+
# 1) 検索
|
|
209
|
+
results = drugbank_search(drug_name,
|
|
210
|
+
api_key=api_key)
|
|
211
|
+
results.to_csv(output_dir / "drugbank_search.csv",
|
|
212
|
+
index=False)
|
|
213
|
+
|
|
214
|
+
if results.empty:
|
|
215
|
+
print(f"DrugBank: '{drug_name}' not found")
|
|
216
|
+
return {"search": results}
|
|
217
|
+
|
|
218
|
+
db_id = results.iloc[0]["drugbank_id"]
|
|
219
|
+
|
|
220
|
+
# 2) 詳細
|
|
221
|
+
detail = drugbank_drug_detail(db_id,
|
|
222
|
+
api_key=api_key)
|
|
223
|
+
pd.DataFrame([detail]).to_csv(
|
|
224
|
+
output_dir / "drugbank_detail.csv",
|
|
225
|
+
index=False)
|
|
226
|
+
|
|
227
|
+
# 3) 標的
|
|
228
|
+
targets = drugbank_targets(db_id,
|
|
229
|
+
api_key=api_key)
|
|
230
|
+
targets.to_csv(output_dir / "drugbank_targets.csv",
|
|
231
|
+
index=False)
|
|
232
|
+
|
|
233
|
+
# 4) DDI
|
|
234
|
+
ddi = drugbank_interactions(db_id,
|
|
235
|
+
api_key=api_key)
|
|
236
|
+
ddi.to_csv(output_dir / "drugbank_ddi.csv",
|
|
237
|
+
index=False)
|
|
238
|
+
|
|
239
|
+
print(f"DrugBank pipeline: {drug_name} → {output_dir}")
|
|
240
|
+
return {"detail": detail, "targets": targets,
|
|
241
|
+
"ddi": ddi}
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## ToolUniverse 連携
|
|
247
|
+
|
|
248
|
+
| TU Key | ツール名 | 連携内容 |
|
|
249
|
+
|--------|---------|---------|
|
|
250
|
+
| `drugbank` | DrugBank | 薬剤データベース API |
|
|
251
|
+
|
|
252
|
+
## パイプライン統合
|
|
253
|
+
|
|
254
|
+
```
|
|
255
|
+
drug-target-profiling → drugbank-resources → admet-pharmacokinetics
|
|
256
|
+
(標的プロファイリング) (DrugBank API) (ADMET 予測)
|
|
257
|
+
│ │ ↓
|
|
258
|
+
opentargets-genetics ──────────┘ compound-screening
|
|
259
|
+
(OT 薬剤エビデンス) (ZINC 化合物検索)
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
## パイプライン出力
|
|
263
|
+
|
|
264
|
+
| ファイル | 説明 | 次スキル |
|
|
265
|
+
|---------|------|---------|
|
|
266
|
+
| `results/drugbank_search.csv` | 薬剤検索結果 | → drug-target-profiling |
|
|
267
|
+
| `results/drugbank_detail.csv` | 薬剤詳細 | → admet-pharmacokinetics |
|
|
268
|
+
| `results/drugbank_targets.csv` | 標的タンパク質 | → protein-interaction-network |
|
|
269
|
+
| `results/drugbank_ddi.csv` | 薬物相互作用 | → pharmacogenomics |
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-gdc-portal
|
|
3
|
+
description: |
|
|
4
|
+
NCI Genomic Data Commons ポータルスキル。GDC REST API
|
|
5
|
+
を用いたがんゲノムプロジェクト横断検索・ケースメタデータ・
|
|
6
|
+
体細胞変異 (SSM)・遺伝子発現・ファイル取得。
|
|
7
|
+
ToolUniverse 連携: gdc。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: gdc
|
|
10
|
+
name: GDC
|
|
11
|
+
description: NCI Genomic Data Commons REST API
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Scientific GDC Portal
|
|
15
|
+
|
|
16
|
+
NCI Genomic Data Commons (GDC) REST API を活用した
|
|
17
|
+
がんゲノムプロジェクト横断検索・ケースメタデータ取得・
|
|
18
|
+
体細胞変異 (SSM)・遺伝子発現パイプラインを提供する。
|
|
19
|
+
|
|
20
|
+
## When to Use
|
|
21
|
+
|
|
22
|
+
- TCGA/TARGET 等のがんゲノムデータを横断検索するとき
|
|
23
|
+
- がん種別のケースメタデータを取得するとき
|
|
24
|
+
- 特定遺伝子の体細胞変異 (SSM) 頻度を調べるとき
|
|
25
|
+
- がんプロジェクトの統計サマリーを取得するとき
|
|
26
|
+
- GDC ファイルメタデータを検索してダウンロード URL を取得するとき
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
## 1. プロジェクト検索・統計
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
import requests
|
|
36
|
+
import pandas as pd
|
|
37
|
+
|
|
38
|
+
GDC_API = "https://api.gdc.cancer.gov"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def gdc_projects(disease_type=None, limit=50):
|
|
42
|
+
"""
|
|
43
|
+
GDC — プロジェクト検索。
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
disease_type: str — 疾患タイプフィルタ
|
|
47
|
+
(例: "Breast Invasive Carcinoma")
|
|
48
|
+
limit: int — 最大結果数
|
|
49
|
+
"""
|
|
50
|
+
url = f"{GDC_API}/projects"
|
|
51
|
+
params = {
|
|
52
|
+
"size": limit,
|
|
53
|
+
"fields": ("project_id,name,primary_site,"
|
|
54
|
+
"disease_type,summary.case_count,"
|
|
55
|
+
"summary.file_count"),
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if disease_type:
|
|
59
|
+
params["filters"] = (
|
|
60
|
+
'{"op":"=","content":{"field":'
|
|
61
|
+
f'"disease_type","value":"{disease_type}"}}}}'
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
65
|
+
resp.raise_for_status()
|
|
66
|
+
data = resp.json()
|
|
67
|
+
|
|
68
|
+
rows = []
|
|
69
|
+
for hit in data.get("data", {}).get("hits", []):
|
|
70
|
+
summary = hit.get("summary", {})
|
|
71
|
+
rows.append({
|
|
72
|
+
"project_id": hit.get("project_id", ""),
|
|
73
|
+
"name": hit.get("name", ""),
|
|
74
|
+
"primary_site": "; ".join(
|
|
75
|
+
hit.get("primary_site", [])),
|
|
76
|
+
"disease_type": "; ".join(
|
|
77
|
+
hit.get("disease_type", [])),
|
|
78
|
+
"case_count": summary.get(
|
|
79
|
+
"case_count", 0),
|
|
80
|
+
"file_count": summary.get(
|
|
81
|
+
"file_count", 0),
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
df = pd.DataFrame(rows)
|
|
85
|
+
print(f"GDC projects: {len(df)}")
|
|
86
|
+
return df
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## 2. ケースメタデータ
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
def gdc_cases(project_id, limit=100):
|
|
93
|
+
"""
|
|
94
|
+
GDC — ケースメタデータ取得。
|
|
95
|
+
|
|
96
|
+
Parameters:
|
|
97
|
+
project_id: str — プロジェクト ID
|
|
98
|
+
(例: "TCGA-BRCA")
|
|
99
|
+
limit: int — 最大結果数
|
|
100
|
+
"""
|
|
101
|
+
url = f"{GDC_API}/cases"
|
|
102
|
+
filters = {
|
|
103
|
+
"op": "=",
|
|
104
|
+
"content": {
|
|
105
|
+
"field": "project.project_id",
|
|
106
|
+
"value": project_id,
|
|
107
|
+
},
|
|
108
|
+
}
|
|
109
|
+
params = {
|
|
110
|
+
"filters": str(filters).replace("'", '"'),
|
|
111
|
+
"fields": ("case_id,submitter_id,"
|
|
112
|
+
"demographic.gender,"
|
|
113
|
+
"demographic.race,"
|
|
114
|
+
"demographic.vital_status,"
|
|
115
|
+
"diagnoses.primary_diagnosis,"
|
|
116
|
+
"diagnoses.tumor_stage,"
|
|
117
|
+
"diagnoses.age_at_diagnosis"),
|
|
118
|
+
"size": limit,
|
|
119
|
+
}
|
|
120
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
121
|
+
resp.raise_for_status()
|
|
122
|
+
data = resp.json()
|
|
123
|
+
|
|
124
|
+
rows = []
|
|
125
|
+
for hit in data.get("data", {}).get("hits", []):
|
|
126
|
+
demo = hit.get("demographic", {}) or {}
|
|
127
|
+
diag = (hit.get("diagnoses", [{}]) or [{}])[0]
|
|
128
|
+
rows.append({
|
|
129
|
+
"case_id": hit.get("case_id", ""),
|
|
130
|
+
"submitter_id": hit.get("submitter_id", ""),
|
|
131
|
+
"gender": demo.get("gender", ""),
|
|
132
|
+
"race": demo.get("race", ""),
|
|
133
|
+
"vital_status": demo.get(
|
|
134
|
+
"vital_status", ""),
|
|
135
|
+
"diagnosis": diag.get(
|
|
136
|
+
"primary_diagnosis", ""),
|
|
137
|
+
"stage": diag.get("tumor_stage", ""),
|
|
138
|
+
"age_at_diagnosis": diag.get(
|
|
139
|
+
"age_at_diagnosis", ""),
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
df = pd.DataFrame(rows)
|
|
143
|
+
print(f"GDC cases: {project_id} → {len(df)}")
|
|
144
|
+
return df
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## 3. 体細胞変異 (SSM) 検索
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
def gdc_ssm_by_gene(gene_symbol, project_id=None,
|
|
151
|
+
limit=100):
|
|
152
|
+
"""
|
|
153
|
+
GDC — 遺伝子別体細胞変異検索。
|
|
154
|
+
|
|
155
|
+
Parameters:
|
|
156
|
+
gene_symbol: str — 遺伝子シンボル (例: "TP53")
|
|
157
|
+
project_id: str — プロジェクト ID フィルタ
|
|
158
|
+
limit: int — 最大結果数
|
|
159
|
+
"""
|
|
160
|
+
url = f"{GDC_API}/ssms"
|
|
161
|
+
filters = {
|
|
162
|
+
"op": "and",
|
|
163
|
+
"content": [
|
|
164
|
+
{
|
|
165
|
+
"op": "=",
|
|
166
|
+
"content": {
|
|
167
|
+
"field":
|
|
168
|
+
"consequence.transcript."
|
|
169
|
+
"gene.symbol",
|
|
170
|
+
"value": gene_symbol,
|
|
171
|
+
},
|
|
172
|
+
}
|
|
173
|
+
],
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if project_id:
|
|
177
|
+
filters["content"].append({
|
|
178
|
+
"op": "=",
|
|
179
|
+
"content": {
|
|
180
|
+
"field": "cases.project.project_id",
|
|
181
|
+
"value": project_id,
|
|
182
|
+
},
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
params = {
|
|
186
|
+
"filters": str(filters).replace("'", '"'),
|
|
187
|
+
"fields": ("ssm_id,genomic_dna_change,"
|
|
188
|
+
"consequence.transcript.aa_change,"
|
|
189
|
+
"consequence.transcript."
|
|
190
|
+
"consequence_type,"
|
|
191
|
+
"consequence.transcript."
|
|
192
|
+
"gene.symbol"),
|
|
193
|
+
"size": limit,
|
|
194
|
+
}
|
|
195
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
196
|
+
resp.raise_for_status()
|
|
197
|
+
data = resp.json()
|
|
198
|
+
|
|
199
|
+
rows = []
|
|
200
|
+
for hit in data.get("data", {}).get("hits", []):
|
|
201
|
+
for csq in hit.get("consequence", []):
|
|
202
|
+
tx = csq.get("transcript", {})
|
|
203
|
+
rows.append({
|
|
204
|
+
"ssm_id": hit.get("ssm_id", ""),
|
|
205
|
+
"genomic_change": hit.get(
|
|
206
|
+
"genomic_dna_change", ""),
|
|
207
|
+
"gene": tx.get("gene", {}).get(
|
|
208
|
+
"symbol", ""),
|
|
209
|
+
"aa_change": tx.get("aa_change", ""),
|
|
210
|
+
"consequence_type": tx.get(
|
|
211
|
+
"consequence_type", ""),
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
df = pd.DataFrame(rows)
|
|
215
|
+
print(f"GDC SSM: {gene_symbol} → {len(df)} variants")
|
|
216
|
+
return df
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## 4. GDC 統合パイプライン
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
def gdc_pipeline(project_id, gene_symbol=None,
|
|
223
|
+
output_dir="results"):
|
|
224
|
+
"""
|
|
225
|
+
GDC 統合パイプライン。
|
|
226
|
+
|
|
227
|
+
Parameters:
|
|
228
|
+
project_id: str — プロジェクト ID
|
|
229
|
+
gene_symbol: str — 遺伝子フィルタ
|
|
230
|
+
output_dir: str — 出力ディレクトリ
|
|
231
|
+
"""
|
|
232
|
+
from pathlib import Path
|
|
233
|
+
output_dir = Path(output_dir)
|
|
234
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
235
|
+
|
|
236
|
+
# 1) プロジェクト情報
|
|
237
|
+
projects = gdc_projects()
|
|
238
|
+
projects.to_csv(output_dir / "gdc_projects.csv",
|
|
239
|
+
index=False)
|
|
240
|
+
|
|
241
|
+
# 2) ケースメタデータ
|
|
242
|
+
cases = gdc_cases(project_id)
|
|
243
|
+
cases.to_csv(output_dir / "gdc_cases.csv",
|
|
244
|
+
index=False)
|
|
245
|
+
|
|
246
|
+
# 3) 体細胞変異
|
|
247
|
+
if gene_symbol:
|
|
248
|
+
ssm = gdc_ssm_by_gene(gene_symbol, project_id)
|
|
249
|
+
ssm.to_csv(output_dir / "gdc_ssm.csv",
|
|
250
|
+
index=False)
|
|
251
|
+
|
|
252
|
+
print(f"GDC pipeline: {project_id} → {output_dir}")
|
|
253
|
+
return {"cases": cases}
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## ToolUniverse 連携
|
|
259
|
+
|
|
260
|
+
| TU Key | ツール名 | 連携内容 |
|
|
261
|
+
|--------|---------|---------|
|
|
262
|
+
| `gdc` | GDC | NCI Genomic Data Commons REST API |
|
|
263
|
+
|
|
264
|
+
## パイプライン統合
|
|
265
|
+
|
|
266
|
+
```
|
|
267
|
+
cancer-genomics → gdc-portal → precision-oncology
|
|
268
|
+
(COSMIC/DepMap) (GDC API) (MTB レポート)
|
|
269
|
+
│ │ ↓
|
|
270
|
+
icgc-cancer-data ───────┘ variant-interpretation
|
|
271
|
+
(ICGC DCC) (ClinVar/ACMG)
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## パイプライン出力
|
|
275
|
+
|
|
276
|
+
| ファイル | 説明 | 次スキル |
|
|
277
|
+
|---------|------|---------|
|
|
278
|
+
| `results/gdc_projects.csv` | プロジェクト一覧 | → cancer-genomics |
|
|
279
|
+
| `results/gdc_cases.csv` | ケースメタデータ | → precision-oncology |
|
|
280
|
+
| `results/gdc_ssm.csv` | 体細胞変異 | → variant-interpretation |
|