@nahisaho/satori 0.11.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -56
- package/package.json +1 -1
- package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +298 -0
- package/src/.github/skills/scientific-cancer-genomics/SKILL.md +287 -0
- package/src/.github/skills/scientific-clinical-reporting/SKILL.md +324 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +245 -0
- package/src/.github/skills/scientific-genome-sequence-tools/SKILL.md +304 -0
- package/src/.github/skills/scientific-healthcare-ai/SKILL.md +273 -0
- package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +244 -0
- package/src/.github/skills/scientific-literature-search/SKILL.md +443 -0
- package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +288 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +288 -0
- package/src/.github/skills/scientific-molecular-docking/SKILL.md +303 -0
- package/src/.github/skills/scientific-noncoding-rna/SKILL.md +262 -0
- package/src/.github/skills/scientific-pathway-enrichment/SKILL.md +449 -0
- package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +323 -0
- package/src/.github/skills/scientific-protein-domain-family/SKILL.md +369 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +352 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +327 -0
- package/src/.github/skills/scientific-structural-proteomics/SKILL.md +317 -0
- package/src/.github/skills/scientific-systematic-review/SKILL.md +361 -0
- package/src/.github/skills/scientific-variant-effect-prediction/SKILL.md +325 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-biothings-idmapping
|
|
3
|
+
description: |
|
|
4
|
+
BioThings API (MyGene.info, MyVariant.info, MyChem.info) を活用した
|
|
5
|
+
遺伝子・変異・化合物の横断的 ID マッピングおよびアノテーション統合スキル。
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Scientific BioThings ID Mapping
|
|
9
|
+
|
|
10
|
+
BioThings API スイート (MyGene, MyVariant, MyChem) を活用した
|
|
11
|
+
多データベース横断の ID 変換・アノテーション取得パイプラインを提供する。
|
|
12
|
+
|
|
13
|
+
## When to Use
|
|
14
|
+
|
|
15
|
+
- 遺伝子 ID 間の変換 (Entrez ↔ Ensembl ↔ Symbol ↔ UniProt) を行うとき
|
|
16
|
+
- 変異 ID のアノテーション (ClinVar, dbSNP, CADD 等) を取得するとき
|
|
17
|
+
- 化合物 ID の変換 (DrugBank ↔ ChEMBL ↔ InChIKey ↔ PubChem) を行うとき
|
|
18
|
+
- バッチクエリで多数の ID を一括アノテーションするとき
|
|
19
|
+
- 複数データベースのメタ情報を統合するとき
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
## 1. MyGene.info 遺伝子アノテーション
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
import requests
|
|
29
|
+
import pandas as pd
|
|
30
|
+
|
|
31
|
+
MYGENE_API = "https://mygene.info/v3"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def mygene_query(query, fields=None, species="human", size=10):
|
|
35
|
+
"""
|
|
36
|
+
MyGene.info で遺伝子検索。
|
|
37
|
+
|
|
38
|
+
Parameters:
|
|
39
|
+
query: str — gene symbol, Entrez ID, or keyword
|
|
40
|
+
fields: str | None — comma-separated fields
|
|
41
|
+
species: str — "human", "mouse", etc.
|
|
42
|
+
|
|
43
|
+
ToolUniverse:
|
|
44
|
+
MyGene_query_genes(q=query, fields=fields, species=species)
|
|
45
|
+
"""
|
|
46
|
+
params = {
|
|
47
|
+
"q": query,
|
|
48
|
+
"species": species,
|
|
49
|
+
"size": size,
|
|
50
|
+
}
|
|
51
|
+
if fields:
|
|
52
|
+
params["fields"] = fields
|
|
53
|
+
|
|
54
|
+
resp = requests.get(f"{MYGENE_API}/query", params=params)
|
|
55
|
+
resp.raise_for_status()
|
|
56
|
+
data = resp.json()
|
|
57
|
+
|
|
58
|
+
hits = data.get("hits", [])
|
|
59
|
+
print(f"MyGene query '{query}': {data.get('total', 0)} total, "
|
|
60
|
+
f"{len(hits)} returned")
|
|
61
|
+
return hits
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def mygene_get_gene(gene_id, fields=None):
|
|
65
|
+
"""
|
|
66
|
+
MyGene.info 遺伝子詳細アノテーション取得。
|
|
67
|
+
|
|
68
|
+
ToolUniverse:
|
|
69
|
+
MyGene_get_gene_annotation(gene_id=gene_id, fields=fields)
|
|
70
|
+
"""
|
|
71
|
+
params = {}
|
|
72
|
+
if fields:
|
|
73
|
+
params["fields"] = fields
|
|
74
|
+
|
|
75
|
+
resp = requests.get(f"{MYGENE_API}/gene/{gene_id}", params=params)
|
|
76
|
+
resp.raise_for_status()
|
|
77
|
+
data = resp.json()
|
|
78
|
+
|
|
79
|
+
print(f"MyGene gene {gene_id}: {data.get('symbol', '?')} "
|
|
80
|
+
f"({data.get('name', '')})")
|
|
81
|
+
return data
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def mygene_batch_query(gene_ids, fields=None, species="human"):
|
|
85
|
+
"""
|
|
86
|
+
MyGene.info バッチ遺伝子アノテーション。
|
|
87
|
+
|
|
88
|
+
ToolUniverse:
|
|
89
|
+
MyGene_batch_query(ids=gene_ids, fields=fields, species=species)
|
|
90
|
+
"""
|
|
91
|
+
payload = {
|
|
92
|
+
"ids": ",".join(str(g) for g in gene_ids),
|
|
93
|
+
"species": species,
|
|
94
|
+
}
|
|
95
|
+
if fields:
|
|
96
|
+
payload["fields"] = fields
|
|
97
|
+
|
|
98
|
+
resp = requests.post(f"{MYGENE_API}/gene", json=payload)
|
|
99
|
+
resp.raise_for_status()
|
|
100
|
+
data = resp.json()
|
|
101
|
+
|
|
102
|
+
print(f"MyGene batch: {len(gene_ids)} queried → {len(data)} results")
|
|
103
|
+
return data
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## 2. MyVariant.info 変異アノテーション
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
MYVARIANT_API = "https://myvariant.info/v1"
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def myvariant_get(variant_id, fields=None):
|
|
113
|
+
"""
|
|
114
|
+
MyVariant.info 変異アノテーション取得。
|
|
115
|
+
|
|
116
|
+
Parameters:
|
|
117
|
+
variant_id: str — HGVS notation (e.g., "chr17:g.7674220C>T")
|
|
118
|
+
|
|
119
|
+
ToolUniverse:
|
|
120
|
+
MyVariant_get_variant_annotation(variant_id=variant_id, fields=fields)
|
|
121
|
+
"""
|
|
122
|
+
params = {}
|
|
123
|
+
if fields:
|
|
124
|
+
params["fields"] = fields
|
|
125
|
+
|
|
126
|
+
resp = requests.get(f"{MYVARIANT_API}/variant/{variant_id}", params=params)
|
|
127
|
+
resp.raise_for_status()
|
|
128
|
+
data = resp.json()
|
|
129
|
+
|
|
130
|
+
clinvar = data.get("clinvar", {})
|
|
131
|
+
cadd = data.get("cadd", {})
|
|
132
|
+
print(f"MyVariant {variant_id}: "
|
|
133
|
+
f"ClinVar={clinvar.get('clinical_significance', 'N/A')}, "
|
|
134
|
+
f"CADD={cadd.get('phred', 'N/A')}")
|
|
135
|
+
return data
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def myvariant_query(query, fields=None, size=10):
|
|
139
|
+
"""
|
|
140
|
+
MyVariant.info 変異検索。
|
|
141
|
+
|
|
142
|
+
ToolUniverse:
|
|
143
|
+
MyVariant_query_variants(q=query, fields=fields, size=size)
|
|
144
|
+
"""
|
|
145
|
+
params = {"q": query, "size": size}
|
|
146
|
+
if fields:
|
|
147
|
+
params["fields"] = fields
|
|
148
|
+
|
|
149
|
+
resp = requests.get(f"{MYVARIANT_API}/query", params=params)
|
|
150
|
+
resp.raise_for_status()
|
|
151
|
+
data = resp.json()
|
|
152
|
+
|
|
153
|
+
hits = data.get("hits", [])
|
|
154
|
+
print(f"MyVariant query '{query}': {data.get('total', 0)} total")
|
|
155
|
+
return hits
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## 3. MyChem.info 化合物アノテーション
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
MYCHEM_API = "https://mychem.info/v1"
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def mychem_get(chem_id, fields=None):
|
|
165
|
+
"""
|
|
166
|
+
MyChem.info 化合物アノテーション取得。
|
|
167
|
+
|
|
168
|
+
Parameters:
|
|
169
|
+
chem_id: str — InChIKey, DrugBank ID, ChEMBL ID, etc.
|
|
170
|
+
|
|
171
|
+
ToolUniverse:
|
|
172
|
+
MyChem_get_chemical_annotation(chem_id=chem_id, fields=fields)
|
|
173
|
+
"""
|
|
174
|
+
params = {}
|
|
175
|
+
if fields:
|
|
176
|
+
params["fields"] = fields
|
|
177
|
+
|
|
178
|
+
resp = requests.get(f"{MYCHEM_API}/chem/{chem_id}", params=params)
|
|
179
|
+
resp.raise_for_status()
|
|
180
|
+
data = resp.json()
|
|
181
|
+
|
|
182
|
+
drugbank = data.get("drugbank", {})
|
|
183
|
+
print(f"MyChem {chem_id}: {drugbank.get('name', 'N/A')}")
|
|
184
|
+
return data
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def mychem_query(query, fields=None, size=10):
|
|
188
|
+
"""
|
|
189
|
+
MyChem.info 化合物検索。
|
|
190
|
+
|
|
191
|
+
ToolUniverse:
|
|
192
|
+
MyChem_query_chemicals(q=query, fields=fields, size=size)
|
|
193
|
+
"""
|
|
194
|
+
params = {"q": query, "size": size}
|
|
195
|
+
if fields:
|
|
196
|
+
params["fields"] = fields
|
|
197
|
+
|
|
198
|
+
resp = requests.get(f"{MYCHEM_API}/query", params=params)
|
|
199
|
+
resp.raise_for_status()
|
|
200
|
+
data = resp.json()
|
|
201
|
+
|
|
202
|
+
hits = data.get("hits", [])
|
|
203
|
+
print(f"MyChem query '{query}': {data.get('total', 0)} total")
|
|
204
|
+
return hits
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## 4. クロスデータベース ID マッピング
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
def cross_db_id_mapping(gene_symbol):
|
|
211
|
+
"""
|
|
212
|
+
遺伝子シンボルから Entrez, Ensembl, UniProt, RefSeq を一括取得。
|
|
213
|
+
|
|
214
|
+
ToolUniverse (横断):
|
|
215
|
+
MyGene_query_genes(q=gene_symbol, fields="entrezgene,ensembl.gene,uniprot,refseq")
|
|
216
|
+
"""
|
|
217
|
+
fields = "entrezgene,ensembl.gene,uniprot.Swiss-Prot,refseq.rna,symbol,name"
|
|
218
|
+
hits = mygene_query(gene_symbol, fields=fields)
|
|
219
|
+
|
|
220
|
+
results = []
|
|
221
|
+
for hit in hits:
|
|
222
|
+
ensembl = hit.get("ensembl", {})
|
|
223
|
+
if isinstance(ensembl, list):
|
|
224
|
+
ensembl = ensembl[0] if ensembl else {}
|
|
225
|
+
uniprot = hit.get("uniprot", {})
|
|
226
|
+
|
|
227
|
+
results.append({
|
|
228
|
+
"symbol": hit.get("symbol", ""),
|
|
229
|
+
"name": hit.get("name", ""),
|
|
230
|
+
"entrez_id": hit.get("entrezgene", ""),
|
|
231
|
+
"ensembl_gene": ensembl.get("gene", ""),
|
|
232
|
+
"uniprot_swissprot": uniprot.get("Swiss-Prot", ""),
|
|
233
|
+
"refseq_rna": hit.get("refseq", {}).get("rna", []),
|
|
234
|
+
})
|
|
235
|
+
|
|
236
|
+
df = pd.DataFrame(results)
|
|
237
|
+
print(f"ID mapping '{gene_symbol}': {len(df)} entries")
|
|
238
|
+
return df
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## 5. バッチ統合アノテーション
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
def batch_integrated_annotation(gene_symbols, include_variants=False):
|
|
245
|
+
"""
|
|
246
|
+
複数遺伝子のバッチ統合アノテーション。
|
|
247
|
+
|
|
248
|
+
ToolUniverse (横断):
|
|
249
|
+
MyGene_batch_query(ids=entrez_ids, fields=fields)
|
|
250
|
+
MyVariant_query_variants(q=gene_query) [optional]
|
|
251
|
+
"""
|
|
252
|
+
# Step 1: Batch gene annotation
|
|
253
|
+
all_hits = []
|
|
254
|
+
for symbol in gene_symbols:
|
|
255
|
+
hits = mygene_query(symbol, fields="entrezgene,symbol,name,summary")
|
|
256
|
+
all_hits.extend(hits[:1]) # top hit per symbol
|
|
257
|
+
|
|
258
|
+
df = pd.DataFrame(all_hits)
|
|
259
|
+
print(f"Batch annotation: {len(gene_symbols)} genes → {len(df)} results")
|
|
260
|
+
return df
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## References
|
|
264
|
+
|
|
265
|
+
### Output Files
|
|
266
|
+
|
|
267
|
+
| ファイル | 形式 |
|
|
268
|
+
|---|---|
|
|
269
|
+
| `results/mygene_annotation.json` | JSON |
|
|
270
|
+
| `results/myvariant_annotation.json` | JSON |
|
|
271
|
+
| `results/mychem_annotation.json` | JSON |
|
|
272
|
+
| `results/id_mapping.csv` | CSV |
|
|
273
|
+
|
|
274
|
+
### 利用可能ツール
|
|
275
|
+
|
|
276
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
277
|
+
|---|---|---|
|
|
278
|
+
| BioThings | `MyGene_query_genes` | 遺伝子検索 |
|
|
279
|
+
| BioThings | `MyGene_get_gene_annotation` | 遺伝子詳細 |
|
|
280
|
+
| BioThings | `MyGene_batch_query` | バッチアノテーション |
|
|
281
|
+
| BioThings | `MyVariant_get_variant_annotation` | 変異アノテーション |
|
|
282
|
+
| BioThings | `MyVariant_query_variants` | 変異検索 |
|
|
283
|
+
| BioThings | `MyChem_get_chemical_annotation` | 化合物アノテーション |
|
|
284
|
+
| BioThings | `MyChem_query_chemicals` | 化合物検索 |
|
|
285
|
+
|
|
286
|
+
### 参照スキル
|
|
287
|
+
|
|
288
|
+
| スキル | 関連 |
|
|
289
|
+
|---|---|
|
|
290
|
+
| `scientific-variant-interpretation` | 変異アノテーション |
|
|
291
|
+
| `scientific-gene-expression-transcriptomics` | 遺伝子発現 |
|
|
292
|
+
| `scientific-drug-target-interaction` | DTI 解析 |
|
|
293
|
+
| `scientific-rare-disease-genetics` | 希少疾患 |
|
|
294
|
+
| `scientific-pathway-enrichment` | パスウェイ解析 |
|
|
295
|
+
|
|
296
|
+
### 依存パッケージ
|
|
297
|
+
|
|
298
|
+
`requests`, `pandas`
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-cancer-genomics
|
|
3
|
+
description: |
|
|
4
|
+
がんゲノミクスポータル統合スキル。COSMIC (体細胞変異カタログ)、
|
|
5
|
+
cBioPortal (がんゲノミクスデータ解析)、DepMap (がん細胞依存性) の
|
|
6
|
+
3 大がんゲノミクスデータベースを統合した変異プロファイリング、
|
|
7
|
+
変異シグネチャー解析、遺伝子依存性 (essentiality) 評価、
|
|
8
|
+
コピー数変化・がん種横断解析パイプライン。
|
|
9
|
+
13 の ToolUniverse SMCP ツールと連携。
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# Scientific Cancer Genomics
|
|
13
|
+
|
|
14
|
+
COSMIC / cBioPortal / DepMap の 3 大がんゲノミクスポータルを統合した
|
|
15
|
+
体細胞変異プロファイリング・機能解析パイプラインを提供する。
|
|
16
|
+
|
|
17
|
+
## When to Use
|
|
18
|
+
|
|
19
|
+
- がん関連遺伝子の体細胞変異をカタログ検索するとき
|
|
20
|
+
- cBioPortal でがん種横断の遺伝子変異頻度を調べるとき
|
|
21
|
+
- DepMap で遺伝子依存性 (essentiality) を評価するとき
|
|
22
|
+
- 変異シグネチャー解析 (SBS/DBS/ID) を行うとき
|
|
23
|
+
- コピー数変化 (CNA) のドライバー・パッセンジャー分類が必要なとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. COSMIC 体細胞変異検索
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import pandas as pd
|
|
33
|
+
import numpy as np
|
|
34
|
+
import requests
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def cosmic_search_mutations(gene, cancer_type=None, mutation_type=None):
|
|
38
|
+
"""
|
|
39
|
+
COSMIC (Catalogue Of Somatic Mutations In Cancer) 変異検索。
|
|
40
|
+
|
|
41
|
+
Parameters:
|
|
42
|
+
gene: str — 遺伝子シンボル (e.g., "BRAF", "TP53")
|
|
43
|
+
cancer_type: str — がん種フィルタ (e.g., "melanoma")
|
|
44
|
+
mutation_type: str — 変異タイプ ("missense", "nonsense", "frameshift")
|
|
45
|
+
"""
|
|
46
|
+
# ToolUniverse 経由: COSMIC_search_mutations, COSMIC_get_mutations_by_gene
|
|
47
|
+
# COSMIC API は認証が必要 (Academic 無料)
|
|
48
|
+
|
|
49
|
+
# Cancer Gene Census (CGC) チェック
|
|
50
|
+
cgc_genes = {
|
|
51
|
+
"TP53": {"role": "TSG", "tier": 1},
|
|
52
|
+
"BRAF": {"role": "oncogene", "tier": 1},
|
|
53
|
+
"KRAS": {"role": "oncogene", "tier": 1},
|
|
54
|
+
"EGFR": {"role": "oncogene", "tier": 1},
|
|
55
|
+
"PIK3CA": {"role": "oncogene", "tier": 1},
|
|
56
|
+
"BRCA1": {"role": "TSG", "tier": 1},
|
|
57
|
+
"BRCA2": {"role": "TSG", "tier": 1},
|
|
58
|
+
"ALK": {"role": "oncogene", "tier": 1},
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
gene_info = cgc_genes.get(gene.upper(), {})
|
|
62
|
+
|
|
63
|
+
result = {
|
|
64
|
+
"gene": gene,
|
|
65
|
+
"cgc_role": gene_info.get("role", "unknown"),
|
|
66
|
+
"cgc_tier": gene_info.get("tier", None),
|
|
67
|
+
"cancer_type_filter": cancer_type,
|
|
68
|
+
"mutation_type_filter": mutation_type,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
print(f"COSMIC query: {gene} "
|
|
72
|
+
f"(CGC: {gene_info.get('role', 'N/A')}, "
|
|
73
|
+
f"Tier {gene_info.get('tier', 'N/A')})")
|
|
74
|
+
return result
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## 2. cBioPortal がんゲノミクスデータ取得
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
def cbioportal_query(genes, study_id=None, cancer_type=None,
|
|
81
|
+
data_types=None):
|
|
82
|
+
"""
|
|
83
|
+
cBioPortal REST API によるがんゲノミクスデータ取得。
|
|
84
|
+
|
|
85
|
+
Parameters:
|
|
86
|
+
genes: list — 遺伝子シンボルリスト
|
|
87
|
+
study_id: str — cBioPortal 研究 ID (e.g., "tcga_brca_pan_can_atlas_2018")
|
|
88
|
+
cancer_type: str — がん種 (e.g., "Breast Cancer")
|
|
89
|
+
data_types: list — ["mutations", "cna", "mrna", "methylation"]
|
|
90
|
+
"""
|
|
91
|
+
base_url = "https://www.cbioportal.org/api"
|
|
92
|
+
|
|
93
|
+
if data_types is None:
|
|
94
|
+
data_types = ["mutations", "cna"]
|
|
95
|
+
|
|
96
|
+
results = {}
|
|
97
|
+
|
|
98
|
+
# 研究一覧取得
|
|
99
|
+
if study_id is None:
|
|
100
|
+
resp = requests.get(f"{base_url}/studies")
|
|
101
|
+
studies = resp.json()
|
|
102
|
+
if cancer_type:
|
|
103
|
+
studies = [s for s in studies
|
|
104
|
+
if cancer_type.lower() in
|
|
105
|
+
s.get("cancerType", {}).get("name", "").lower()]
|
|
106
|
+
print(f"cBioPortal: {len(studies)} studies for '{cancer_type}'")
|
|
107
|
+
results["studies"] = pd.DataFrame([{
|
|
108
|
+
"study_id": s["studyId"],
|
|
109
|
+
"name": s["name"],
|
|
110
|
+
"cancer_type": s.get("cancerType", {}).get("name", ""),
|
|
111
|
+
"sample_count": s.get("allSampleCount", 0),
|
|
112
|
+
} for s in studies[:20]])
|
|
113
|
+
else:
|
|
114
|
+
# 変異データ取得
|
|
115
|
+
if "mutations" in data_types:
|
|
116
|
+
url = f"{base_url}/molecular-profiles/{study_id}_mutations/mutations"
|
|
117
|
+
params = {"projection": "DETAILED"}
|
|
118
|
+
resp = requests.get(url, params=params)
|
|
119
|
+
if resp.status_code == 200:
|
|
120
|
+
mutations = resp.json()
|
|
121
|
+
mut_df = pd.DataFrame([{
|
|
122
|
+
"gene": m.get("gene", {}).get("hugoGeneSymbol", ""),
|
|
123
|
+
"mutation": m.get("proteinChange", ""),
|
|
124
|
+
"mutation_type": m.get("mutationType", ""),
|
|
125
|
+
"chromosome": m.get("chr", ""),
|
|
126
|
+
"position": m.get("startPosition", ""),
|
|
127
|
+
"allele_freq": m.get("tumorAltCount", 0) /
|
|
128
|
+
max(m.get("tumorRefCount", 1) +
|
|
129
|
+
m.get("tumorAltCount", 1), 1),
|
|
130
|
+
} for m in mutations
|
|
131
|
+
if m.get("gene", {}).get("hugoGeneSymbol", "") in genes])
|
|
132
|
+
results["mutations"] = mut_df
|
|
133
|
+
print(f" Mutations: {len(mut_df)} found in {genes}")
|
|
134
|
+
|
|
135
|
+
return results
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## 3. DepMap 遺伝子依存性解析
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
def depmap_gene_dependency(genes, cell_lineage=None):
|
|
142
|
+
"""
|
|
143
|
+
DepMap (Cancer Dependency Map) 遺伝子依存性解析。
|
|
144
|
+
|
|
145
|
+
Parameters:
|
|
146
|
+
genes: list — 遺伝子シンボルリスト
|
|
147
|
+
cell_lineage: str — 細胞系統フィルタ (e.g., "Lung", "Breast")
|
|
148
|
+
"""
|
|
149
|
+
# ToolUniverse 経由:
|
|
150
|
+
# DepMap_search_genes, DepMap_get_gene_dependencies
|
|
151
|
+
# DepMap_get_cell_line, DepMap_get_cell_lines, DepMap_search_cell_lines
|
|
152
|
+
|
|
153
|
+
# DepMap CRISPR (Chronos) dependency score:
|
|
154
|
+
# negative = essential (依存), ~0 = non-essential
|
|
155
|
+
# Common Essential: mean < -0.5 across 90% of lines
|
|
156
|
+
# Selective Dependency: mean < -0.5 in specific lineages
|
|
157
|
+
|
|
158
|
+
results = []
|
|
159
|
+
for gene in genes:
|
|
160
|
+
result = {
|
|
161
|
+
"gene": gene,
|
|
162
|
+
"cell_lineage": cell_lineage,
|
|
163
|
+
"query_type": "CRISPR_dependency",
|
|
164
|
+
# 実際のスコアは ToolUniverse 経由で取得
|
|
165
|
+
"interpretation": (
|
|
166
|
+
"Chronos score < 0: gene essentiality increases. "
|
|
167
|
+
"score < -0.5: likely essential in this lineage. "
|
|
168
|
+
"score ~ 0: non-essential."
|
|
169
|
+
),
|
|
170
|
+
}
|
|
171
|
+
results.append(result)
|
|
172
|
+
|
|
173
|
+
df = pd.DataFrame(results)
|
|
174
|
+
print(f"DepMap: queried {len(genes)} genes "
|
|
175
|
+
f"(lineage: {cell_lineage or 'pan-cancer'})")
|
|
176
|
+
return df
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## 4. 変異シグネチャー解析
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
def mutational_signature_analysis(mutations_df, genome="GRCh38",
|
|
183
|
+
n_signatures=None):
|
|
184
|
+
"""
|
|
185
|
+
体細胞変異シグネチャー解析 (COSMIC SBS signatures)。
|
|
186
|
+
|
|
187
|
+
Parameters:
|
|
188
|
+
mutations_df: DataFrame — columns: [chr, pos, ref, alt, sample]
|
|
189
|
+
genome: str — 参照ゲノム
|
|
190
|
+
n_signatures: int — 抽出シグネチャー数 (None=自動推定)
|
|
191
|
+
"""
|
|
192
|
+
from itertools import product
|
|
193
|
+
|
|
194
|
+
# 96 トリヌクレオチドコンテキスト
|
|
195
|
+
bases = ["C", "T"]
|
|
196
|
+
contexts = []
|
|
197
|
+
for ref in bases:
|
|
198
|
+
for alt in ["A", "C", "G", "T"]:
|
|
199
|
+
if ref == alt:
|
|
200
|
+
continue
|
|
201
|
+
for five in "ACGT":
|
|
202
|
+
for three in "ACGT":
|
|
203
|
+
contexts.append(f"{five}[{ref}>{alt}]{three}")
|
|
204
|
+
|
|
205
|
+
# サンプルごとのカタログ構築
|
|
206
|
+
samples = mutations_df["sample"].unique()
|
|
207
|
+
catalog = pd.DataFrame(0, index=contexts, columns=samples)
|
|
208
|
+
|
|
209
|
+
for _, row in mutations_df.iterrows():
|
|
210
|
+
ref = row["ref"]
|
|
211
|
+
alt = row["alt"]
|
|
212
|
+
sample = row["sample"]
|
|
213
|
+
context = row.get("trinucleotide_context", "N[N>N]N")
|
|
214
|
+
if context in catalog.index:
|
|
215
|
+
catalog.loc[context, sample] += 1
|
|
216
|
+
|
|
217
|
+
print(f"Mutation catalog: {len(contexts)} contexts, "
|
|
218
|
+
f"{len(samples)} samples, "
|
|
219
|
+
f"{catalog.sum().sum():.0f} total mutations")
|
|
220
|
+
|
|
221
|
+
# NMF 分解 (SigProfilerExtractor 代替)
|
|
222
|
+
from sklearn.decomposition import NMF
|
|
223
|
+
|
|
224
|
+
X = catalog.values.T # samples × contexts
|
|
225
|
+
if n_signatures is None:
|
|
226
|
+
n_signatures = min(5, len(samples))
|
|
227
|
+
|
|
228
|
+
model = NMF(n_components=n_signatures, random_state=42, max_iter=1000)
|
|
229
|
+
W = model.fit_transform(X) # exposure matrix
|
|
230
|
+
H = model.components_ # signature profiles
|
|
231
|
+
|
|
232
|
+
signatures = pd.DataFrame(H.T, index=contexts,
|
|
233
|
+
columns=[f"SBS_{i+1}" for i in range(n_signatures)])
|
|
234
|
+
exposures = pd.DataFrame(W, index=samples,
|
|
235
|
+
columns=[f"SBS_{i+1}" for i in range(n_signatures)])
|
|
236
|
+
|
|
237
|
+
print(f"Extracted {n_signatures} mutational signatures")
|
|
238
|
+
return signatures, exposures
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## References
|
|
242
|
+
|
|
243
|
+
### Output Files
|
|
244
|
+
|
|
245
|
+
| ファイル | 形式 |
|
|
246
|
+
|---|---|
|
|
247
|
+
| `results/cosmic_mutations.csv` | CSV |
|
|
248
|
+
| `results/cbioportal_mutations.csv` | CSV |
|
|
249
|
+
| `results/depmap_dependencies.csv` | CSV |
|
|
250
|
+
| `results/mutation_signatures.csv` | CSV |
|
|
251
|
+
| `results/signature_exposures.csv` | CSV |
|
|
252
|
+
| `figures/mutation_spectrum.png` | PNG |
|
|
253
|
+
| `figures/signature_profiles.png` | PNG |
|
|
254
|
+
|
|
255
|
+
### 利用可能ツール
|
|
256
|
+
|
|
257
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
258
|
+
|
|
259
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
260
|
+
|---|---|---|
|
|
261
|
+
| COSMIC | `COSMIC_search_mutations` | 体細胞変異検索 |
|
|
262
|
+
| COSMIC | `COSMIC_get_mutations_by_gene` | 遺伝子別変異取得 |
|
|
263
|
+
| cBioPortal | `cBioPortal_get_cancer_studies` | がん研究一覧 |
|
|
264
|
+
| cBioPortal | `cBioPortal_get_mutations` | 変異データ取得 |
|
|
265
|
+
| cBioPortal | `cBioPortal_get_molecular_profiles` | 分子プロファイル |
|
|
266
|
+
| cBioPortal | `cBioPortal_get_patients` | 患者データ取得 |
|
|
267
|
+
| cBioPortal | `cBioPortal_get_sample_lists` | サンプルリスト |
|
|
268
|
+
| cBioPortal | `cBioPortal_get_samples` | サンプル詳細 |
|
|
269
|
+
| DepMap | `DepMap_get_gene_dependencies` | 遺伝子依存性スコア |
|
|
270
|
+
| DepMap | `DepMap_get_cell_line` | 細胞株情報 |
|
|
271
|
+
| DepMap | `DepMap_get_cell_lines` | 細胞株一覧 |
|
|
272
|
+
| DepMap | `DepMap_search_cell_lines` | 細胞株検索 |
|
|
273
|
+
| DepMap | `DepMap_search_genes` | 遺伝子検索 |
|
|
274
|
+
|
|
275
|
+
### 参照スキル
|
|
276
|
+
|
|
277
|
+
| スキル | 関連 |
|
|
278
|
+
|---|---|
|
|
279
|
+
| `scientific-precision-oncology` | 腫瘍プロファイル → 治療選択 |
|
|
280
|
+
| `scientific-variant-interpretation` | バリアント臨床解釈 |
|
|
281
|
+
| `scientific-variant-effect-prediction` | 計算病原性予測 |
|
|
282
|
+
| `scientific-disease-research` | GWAS → がんリスク |
|
|
283
|
+
| `scientific-drug-target-profiling` | 標的同定 → 依存性 |
|
|
284
|
+
|
|
285
|
+
### 依存パッケージ
|
|
286
|
+
|
|
287
|
+
`pandas`, `numpy`, `requests`, `scikit-learn`, `matplotlib`
|