@nahisaho/satori 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -29
- package/package.json +1 -1
- package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +298 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +245 -0
- package/src/.github/skills/scientific-genome-sequence-tools/SKILL.md +304 -0
- package/src/.github/skills/scientific-healthcare-ai/SKILL.md +273 -0
- package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +244 -0
- package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +288 -0
- package/src/.github/skills/scientific-noncoding-rna/SKILL.md +262 -0
- package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +323 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +327 -0
- package/src/.github/skills/scientific-structural-proteomics/SKILL.md +317 -0
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-pharmacology-targets
|
|
3
|
+
description: |
|
|
4
|
+
薬理学的ターゲットプロファイリングスキル。BindingDB 結合親和性、
|
|
5
|
+
GPCRdb GPCR 構造-活性、GtoPdb 薬理学、BRENDA 酵素動態、
|
|
6
|
+
Pharos 未解明ターゲット(TDL)の統合解析パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Pharmacology Targets
|
|
10
|
+
|
|
11
|
+
複数の薬理学データベース (BindingDB, GPCRdb, GtoPdb, BRENDA, Pharos) を
|
|
12
|
+
統合した包括的ターゲットプロファイリングパイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- 特定タンパク質の既知リガンド・結合親和性を調べるとき
|
|
17
|
+
- GPCR のリガンド・変異・構造情報を取得するとき
|
|
18
|
+
- 薬物-ターゲット相互作用のデータベース横断検索を行うとき
|
|
19
|
+
- 酵素阻害剤データ (BRENDA) を調べるとき
|
|
20
|
+
- 未解明ターゲット (Tdark/Tbio) のドラッガビリティを評価するとき
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
## 1. BindingDB 結合親和性データ取得
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import requests
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_bindingdb_ligands(uniprot_id, cutoff=None):
|
|
34
|
+
"""
|
|
35
|
+
BindingDB から UniProt ID ベースのリガンド結合データを取得。
|
|
36
|
+
|
|
37
|
+
Parameters:
|
|
38
|
+
uniprot_id: str — UniProt accession (e.g., "P00533")
|
|
39
|
+
cutoff: float | None — affinity cutoff nM
|
|
40
|
+
|
|
41
|
+
ToolUniverse:
|
|
42
|
+
BindingDB_get_ligands_by_uniprot(uniprot_id=uniprot_id)
|
|
43
|
+
BindingDB_get_targets_by_compound(smiles=smiles)
|
|
44
|
+
"""
|
|
45
|
+
url = "https://bindingdb.org/axis2/services/BDBService"
|
|
46
|
+
params = {
|
|
47
|
+
"uniprot": uniprot_id,
|
|
48
|
+
"response": "json",
|
|
49
|
+
}
|
|
50
|
+
if cutoff:
|
|
51
|
+
params["cutoff"] = cutoff
|
|
52
|
+
|
|
53
|
+
resp = requests.get(f"{url}/getLigandsByUniprot", params=params)
|
|
54
|
+
resp.raise_for_status()
|
|
55
|
+
data = resp.json()
|
|
56
|
+
|
|
57
|
+
ligands = data.get("getLigandsByUniprotResponse", {}).get("affinities", [])
|
|
58
|
+
results = []
|
|
59
|
+
for lig in ligands:
|
|
60
|
+
results.append({
|
|
61
|
+
"monomer_id": lig.get("monomerid", ""),
|
|
62
|
+
"smiles": lig.get("smiles", ""),
|
|
63
|
+
"affinity_type": lig.get("affinity_type", ""),
|
|
64
|
+
"affinity_value_nm": lig.get("affinity", ""),
|
|
65
|
+
"source": lig.get("source", ""),
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
df = pd.DataFrame(results)
|
|
69
|
+
print(f"BindingDB '{uniprot_id}': {len(df)} ligands")
|
|
70
|
+
return df
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## 2. GPCRdb GPCR プロファイリング
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
def get_gpcrdb_profile(protein_entry):
|
|
77
|
+
"""
|
|
78
|
+
GPCRdb から GPCR のリガンド・変異・構造情報を取得。
|
|
79
|
+
|
|
80
|
+
Parameters:
|
|
81
|
+
protein_entry: str — GPCRdb entry name (e.g., "adrb2_human")
|
|
82
|
+
|
|
83
|
+
ToolUniverse:
|
|
84
|
+
GPCRdb_get_protein(entry_name=protein_entry)
|
|
85
|
+
GPCRdb_get_ligands(entry_name=protein_entry)
|
|
86
|
+
GPCRdb_get_mutations(entry_name=protein_entry)
|
|
87
|
+
GPCRdb_get_structures(entry_name=protein_entry)
|
|
88
|
+
GPCRdb_list_proteins()
|
|
89
|
+
"""
|
|
90
|
+
base = "https://gpcrdb.org/services"
|
|
91
|
+
|
|
92
|
+
# Protein info
|
|
93
|
+
resp_p = requests.get(f"{base}/protein/{protein_entry}/")
|
|
94
|
+
resp_p.raise_for_status()
|
|
95
|
+
protein = resp_p.json()
|
|
96
|
+
|
|
97
|
+
# Ligands
|
|
98
|
+
resp_l = requests.get(f"{base}/ligands/{protein_entry}/")
|
|
99
|
+
ligands = resp_l.json() if resp_l.status_code == 200 else []
|
|
100
|
+
|
|
101
|
+
# Mutations
|
|
102
|
+
resp_m = requests.get(f"{base}/mutants/{protein_entry}/")
|
|
103
|
+
mutations = resp_m.json() if resp_m.status_code == 200 else []
|
|
104
|
+
|
|
105
|
+
# Structures
|
|
106
|
+
resp_s = requests.get(f"{base}/structure/protein/{protein_entry}/")
|
|
107
|
+
structures = resp_s.json() if resp_s.status_code == 200 else []
|
|
108
|
+
|
|
109
|
+
profile = {
|
|
110
|
+
"entry_name": protein.get("entry_name", ""),
|
|
111
|
+
"name": protein.get("name", ""),
|
|
112
|
+
"family": protein.get("family", ""),
|
|
113
|
+
"species": protein.get("species", ""),
|
|
114
|
+
"num_ligands": len(ligands),
|
|
115
|
+
"num_mutations": len(mutations),
|
|
116
|
+
"num_structures": len(structures),
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
print(f"GPCRdb '{protein_entry}': "
|
|
120
|
+
f"{profile['num_ligands']} ligands, "
|
|
121
|
+
f"{profile['num_mutations']} mutations, "
|
|
122
|
+
f"{profile['num_structures']} structures")
|
|
123
|
+
return profile, ligands, mutations, structures
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## 3. GtoPdb 薬理学データ
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
def get_gtopdb_target_pharmacology(target_id):
|
|
130
|
+
"""
|
|
131
|
+
Guide to PHARMACOLOGY (GtoPdb) から
|
|
132
|
+
ターゲットの薬理学的相互作用データを取得。
|
|
133
|
+
|
|
134
|
+
ToolUniverse:
|
|
135
|
+
GtoPdb_get_target(target_id=target_id)
|
|
136
|
+
GtoPdb_get_target_interactions(target_id=target_id)
|
|
137
|
+
GtoPdb_get_ligand(ligand_id=ligand_id)
|
|
138
|
+
GtoPdb_search_interactions(query=query)
|
|
139
|
+
"""
|
|
140
|
+
base = "https://www.guidetopharmacology.org/services"
|
|
141
|
+
|
|
142
|
+
# Target info
|
|
143
|
+
resp_t = requests.get(f"{base}/targets/{target_id}")
|
|
144
|
+
resp_t.raise_for_status()
|
|
145
|
+
target = resp_t.json()
|
|
146
|
+
|
|
147
|
+
# Interactions
|
|
148
|
+
resp_i = requests.get(f"{base}/targets/{target_id}/interactions")
|
|
149
|
+
interactions = resp_i.json() if resp_i.status_code == 200 else []
|
|
150
|
+
|
|
151
|
+
results = []
|
|
152
|
+
for ix in interactions:
|
|
153
|
+
results.append({
|
|
154
|
+
"ligand_id": ix.get("ligandId", ""),
|
|
155
|
+
"ligand_name": ix.get("ligandName", ""),
|
|
156
|
+
"type": ix.get("type", ""),
|
|
157
|
+
"action": ix.get("action", ""),
|
|
158
|
+
"affinity_type": ix.get("affinityType", ""),
|
|
159
|
+
"affinity_median": ix.get("affinityMedian", ""),
|
|
160
|
+
"approved": ix.get("approvedDrug", False),
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
df = pd.DataFrame(results)
|
|
164
|
+
print(f"GtoPdb target {target_id} ({target.get('name', '')}): "
|
|
165
|
+
f"{len(df)} interactions")
|
|
166
|
+
return target, df
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## 4. Pharos/TCRD 未解明ターゲット検索
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
def search_pharos_targets(query, tdl=None):
|
|
173
|
+
"""
|
|
174
|
+
Pharos / TCRD からターゲット情報を取得。
|
|
175
|
+
Target Development Level (TDL) でフィルタ可能。
|
|
176
|
+
|
|
177
|
+
Parameters:
|
|
178
|
+
query: str — gene symbol or target name
|
|
179
|
+
tdl: str | None — "Tclin", "Tchem", "Tbio", "Tdark"
|
|
180
|
+
|
|
181
|
+
ToolUniverse:
|
|
182
|
+
Pharos_search_targets(q=query)
|
|
183
|
+
Pharos_get_target(q=query)
|
|
184
|
+
Pharos_get_tdl_summary()
|
|
185
|
+
Pharos_get_disease_targets(disease_name=disease_name)
|
|
186
|
+
"""
|
|
187
|
+
url = "https://pharos-api.ncats.io/graphql"
|
|
188
|
+
gql = """
|
|
189
|
+
query TargetSearch($term: String!, $top: Int) {
|
|
190
|
+
targets(filter: { term: $term }, top: $top) {
|
|
191
|
+
targets {
|
|
192
|
+
name
|
|
193
|
+
sym
|
|
194
|
+
uniprot { accession }
|
|
195
|
+
tdl
|
|
196
|
+
fam
|
|
197
|
+
novelty
|
|
198
|
+
jensenScore
|
|
199
|
+
diseaseCount
|
|
200
|
+
ligandCount
|
|
201
|
+
}
|
|
202
|
+
count
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
"""
|
|
206
|
+
variables = {"term": query, "top": 20}
|
|
207
|
+
resp = requests.post(url, json={"query": gql, "variables": variables})
|
|
208
|
+
resp.raise_for_status()
|
|
209
|
+
data = resp.json()["data"]["targets"]
|
|
210
|
+
|
|
211
|
+
results = []
|
|
212
|
+
for t in data["targets"]:
|
|
213
|
+
results.append({
|
|
214
|
+
"symbol": t.get("sym", ""),
|
|
215
|
+
"name": t.get("name", ""),
|
|
216
|
+
"uniprot": t.get("uniprot", {}).get("accession", ""),
|
|
217
|
+
"tdl": t.get("tdl", ""),
|
|
218
|
+
"family": t.get("fam", ""),
|
|
219
|
+
"novelty": t.get("novelty", 0),
|
|
220
|
+
"disease_count": t.get("diseaseCount", 0),
|
|
221
|
+
"ligand_count": t.get("ligandCount", 0),
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
df = pd.DataFrame(results)
|
|
225
|
+
if tdl:
|
|
226
|
+
df = df[df["tdl"] == tdl]
|
|
227
|
+
|
|
228
|
+
print(f"Pharos '{query}': {len(df)} targets"
|
|
229
|
+
f"{f' (TDL={tdl})' if tdl else ''}")
|
|
230
|
+
return df
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## 5. 統合ターゲットプロファイリング
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
def integrated_target_profile(uniprot_id, gene_symbol):
|
|
237
|
+
"""
|
|
238
|
+
複数データベースを統合したターゲットプロファイル。
|
|
239
|
+
|
|
240
|
+
ToolUniverse (横断):
|
|
241
|
+
BindingDB_get_ligands_by_uniprot(uniprot_id)
|
|
242
|
+
Pharos_get_target(q=gene_symbol)
|
|
243
|
+
GtoPdb_get_targets() → GtoPdb_get_target_interactions()
|
|
244
|
+
"""
|
|
245
|
+
profile = {
|
|
246
|
+
"uniprot_id": uniprot_id,
|
|
247
|
+
"gene_symbol": gene_symbol,
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
# BindingDB ligands
|
|
251
|
+
try:
|
|
252
|
+
bdb_df = get_bindingdb_ligands(uniprot_id)
|
|
253
|
+
profile["bindingdb_ligand_count"] = len(bdb_df)
|
|
254
|
+
except Exception:
|
|
255
|
+
profile["bindingdb_ligand_count"] = 0
|
|
256
|
+
|
|
257
|
+
# Pharos TDL
|
|
258
|
+
try:
|
|
259
|
+
pharos_df = search_pharos_targets(gene_symbol)
|
|
260
|
+
if not pharos_df.empty:
|
|
261
|
+
row = pharos_df.iloc[0]
|
|
262
|
+
profile["tdl"] = row.get("tdl", "")
|
|
263
|
+
profile["novelty"] = row.get("novelty", 0)
|
|
264
|
+
except Exception:
|
|
265
|
+
profile["tdl"] = "Unknown"
|
|
266
|
+
|
|
267
|
+
print(f"Integrated profile {gene_symbol}: TDL={profile.get('tdl', '?')}, "
|
|
268
|
+
f"BindingDB={profile['bindingdb_ligand_count']} ligands")
|
|
269
|
+
return profile
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## References
|
|
273
|
+
|
|
274
|
+
### Output Files
|
|
275
|
+
|
|
276
|
+
| ファイル | 形式 |
|
|
277
|
+
|---|---|
|
|
278
|
+
| `results/bindingdb_ligands.csv` | CSV |
|
|
279
|
+
| `results/gpcrdb_profile.json` | JSON |
|
|
280
|
+
| `results/gtopdb_interactions.csv` | CSV |
|
|
281
|
+
| `results/pharos_targets.csv` | CSV |
|
|
282
|
+
| `results/integrated_target_profile.json` | JSON |
|
|
283
|
+
|
|
284
|
+
### 利用可能ツール
|
|
285
|
+
|
|
286
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
287
|
+
|---|---|---|
|
|
288
|
+
| BindingDB | `BindingDB_get_ligands_by_uniprot` | UniProt→リガンド |
|
|
289
|
+
| BindingDB | `BindingDB_get_ligands_by_uniprots` | バッチ |
|
|
290
|
+
| BindingDB | `BindingDB_get_ligands_by_pdb` | PDB→リガンド |
|
|
291
|
+
| BindingDB | `BindingDB_get_targets_by_compound` | 化合物→ターゲット |
|
|
292
|
+
| GPCRdb | `GPCRdb_get_protein` | GPCR 詳細 |
|
|
293
|
+
| GPCRdb | `GPCRdb_get_ligands` | GPCR リガンド |
|
|
294
|
+
| GPCRdb | `GPCRdb_get_mutations` | GPCR 変異 |
|
|
295
|
+
| GPCRdb | `GPCRdb_get_structures` | GPCR 構造 |
|
|
296
|
+
| GPCRdb | `GPCRdb_list_proteins` | GPCR 一覧 |
|
|
297
|
+
| GtoPdb | `GtoPdb_get_target` | ターゲット情報 |
|
|
298
|
+
| GtoPdb | `GtoPdb_get_target_interactions` | 相互作用 |
|
|
299
|
+
| GtoPdb | `GtoPdb_get_ligand` | リガンド情報 |
|
|
300
|
+
| GtoPdb | `GtoPdb_get_targets` | ターゲット一覧 |
|
|
301
|
+
| GtoPdb | `GtoPdb_list_ligands` | リガンド一覧 |
|
|
302
|
+
| GtoPdb | `GtoPdb_get_disease` | 疾患関連 |
|
|
303
|
+
| GtoPdb | `GtoPdb_list_diseases` | 疾患一覧 |
|
|
304
|
+
| GtoPdb | `GtoPdb_search_interactions` | 相互作用検索 |
|
|
305
|
+
| BRENDA | `BRENDA_get_inhibitors` | 酵素阻害剤 |
|
|
306
|
+
| Pharos | `Pharos_search_targets` | ターゲット検索 |
|
|
307
|
+
| Pharos | `Pharos_get_target` | ターゲット詳細 |
|
|
308
|
+
| Pharos | `Pharos_get_tdl_summary` | TDL サマリー |
|
|
309
|
+
| Pharos | `Pharos_get_disease_targets` | 疾患→ターゲット |
|
|
310
|
+
|
|
311
|
+
### 参照スキル
|
|
312
|
+
|
|
313
|
+
| スキル | 関連 |
|
|
314
|
+
|---|---|
|
|
315
|
+
| `scientific-drug-target-interaction` | DTI 予測 |
|
|
316
|
+
| `scientific-compound-similarity` | 化合物類似性 |
|
|
317
|
+
| `scientific-compound-screening` | 化合物スクリーニング |
|
|
318
|
+
| `scientific-molecular-docking` | 分子ドッキング |
|
|
319
|
+
| `scientific-protein-interaction-network` | PPI ネットワーク |
|
|
320
|
+
|
|
321
|
+
### 依存パッケージ
|
|
322
|
+
|
|
323
|
+
`requests`, `pandas`
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-rare-disease-genetics
|
|
3
|
+
description: |
|
|
4
|
+
希少疾患遺伝学スキル。OMIM 遺伝子-疾患マッピング、Orphanet 希少疾患
|
|
5
|
+
分類・遺伝子照会、DisGeNET 疾患-遺伝子関連スコア、IMPC マウス表現型
|
|
6
|
+
参照、遺伝子-表現型統合解析パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Rare Disease Genetics
|
|
10
|
+
|
|
11
|
+
OMIM / Orphanet / DisGeNET / IMPC を統合した
|
|
12
|
+
希少疾患遺伝学パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- 希少疾患の原因遺伝子を同定するとき
|
|
17
|
+
- OMIM で遺伝子-疾患の Mendelian 関連を調べるとき
|
|
18
|
+
- Orphanet で希少疾患分類や有病率を検索するとき
|
|
19
|
+
- DisGeNET で疾患-遺伝子関連スコア (GDA) を取得するとき
|
|
20
|
+
- IMPC マウスノックアウト表現型と比較するとき
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
## 1. OMIM 遺伝子-疾患マッピング
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import requests
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
OMIM_API = "https://api.omim.org/api"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def search_omim(query, api_key, include="geneMap"):
|
|
36
|
+
"""
|
|
37
|
+
OMIM データベース検索。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
query: str — 検索語 (遺伝子名、疾患名)
|
|
41
|
+
api_key: str — OMIM API キー
|
|
42
|
+
include: str — "geneMap", "clinicalSynopsis", "all"
|
|
43
|
+
|
|
44
|
+
ToolUniverse:
|
|
45
|
+
OMIM_search(query=query)
|
|
46
|
+
OMIM_get_entry(mim_number=mim_number)
|
|
47
|
+
OMIM_get_gene_map(gene_symbol=gene_symbol)
|
|
48
|
+
OMIM_get_clinical_synopsis(mim_number=mim_number)
|
|
49
|
+
"""
|
|
50
|
+
params = {
|
|
51
|
+
"search": query,
|
|
52
|
+
"include": include,
|
|
53
|
+
"format": "json",
|
|
54
|
+
"apiKey": api_key,
|
|
55
|
+
}
|
|
56
|
+
resp = requests.get(f"{OMIM_API}/entry/search", params=params)
|
|
57
|
+
resp.raise_for_status()
|
|
58
|
+
data = resp.json()
|
|
59
|
+
|
|
60
|
+
entries = data.get("omim", {}).get("searchResponse", {}).get("entryList", [])
|
|
61
|
+
results = []
|
|
62
|
+
for entry in entries:
|
|
63
|
+
e = entry.get("entry", {})
|
|
64
|
+
gene_map = e.get("geneMap", {})
|
|
65
|
+
results.append({
|
|
66
|
+
"mim_number": e.get("mimNumber"),
|
|
67
|
+
"title": e.get("titles", {}).get("preferredTitle", ""),
|
|
68
|
+
"gene_symbols": gene_map.get("geneSymbols", ""),
|
|
69
|
+
"chromosome": gene_map.get("computedCytoLocation", ""),
|
|
70
|
+
"phenotypes": [
|
|
71
|
+
p.get("phenotype", "")
|
|
72
|
+
for p in gene_map.get("phenotypeMapList", [])
|
|
73
|
+
],
|
|
74
|
+
"inheritance": [
|
|
75
|
+
p.get("phenotypeMappingKey", "")
|
|
76
|
+
for p in gene_map.get("phenotypeMapList", [])
|
|
77
|
+
],
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
df = pd.DataFrame(results)
|
|
81
|
+
print(f"OMIM search '{query}': {len(df)} entries")
|
|
82
|
+
return df
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## 2. Orphanet 希少疾患分類
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
ORPHANET_API = "https://api.orphadata.com"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def search_orphanet_diseases(query):
|
|
92
|
+
"""
|
|
93
|
+
Orphanet 希少疾患検索。
|
|
94
|
+
|
|
95
|
+
ToolUniverse:
|
|
96
|
+
Orphanet_search_diseases(query=query)
|
|
97
|
+
Orphanet_search_by_name(name=query)
|
|
98
|
+
Orphanet_get_disease(orpha_code=code)
|
|
99
|
+
Orphanet_get_genes(orpha_code=code)
|
|
100
|
+
Orphanet_get_classification(orpha_code=code)
|
|
101
|
+
"""
|
|
102
|
+
resp = requests.get(
|
|
103
|
+
f"{ORPHANET_API}/rd-cross-referencing",
|
|
104
|
+
params={"query": query}
|
|
105
|
+
)
|
|
106
|
+
resp.raise_for_status()
|
|
107
|
+
data = resp.json()
|
|
108
|
+
|
|
109
|
+
results = []
|
|
110
|
+
for item in data if isinstance(data, list) else [data]:
|
|
111
|
+
results.append({
|
|
112
|
+
"orpha_code": item.get("ORPHAcode", ""),
|
|
113
|
+
"name": item.get("Preferred term", ""),
|
|
114
|
+
"prevalence_class": item.get("Prevalence", {}).get("PrevalenceClass", ""),
|
|
115
|
+
"inheritance": item.get("TypeOfInheritance", []),
|
|
116
|
+
"age_of_onset": item.get("AgeOfOnset", []),
|
|
117
|
+
"genes": item.get("DisorderGeneAssociationList", []),
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
df = pd.DataFrame(results)
|
|
121
|
+
print(f"Orphanet search '{query}': {len(df)} diseases")
|
|
122
|
+
return df
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## 3. DisGeNET 疾患-遺伝子関連スコア
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
DISGENET_API = "https://www.disgenet.org/api"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def get_disease_gene_associations(disease_id, api_key):
|
|
132
|
+
"""
|
|
133
|
+
DisGeNET GDA スコアによる疾患-遺伝子関連取得。
|
|
134
|
+
|
|
135
|
+
Parameters:
|
|
136
|
+
disease_id: str — UMLS CUI (e.g., "C0023264") or disease name
|
|
137
|
+
api_key: str — DisGeNET API key
|
|
138
|
+
|
|
139
|
+
ToolUniverse:
|
|
140
|
+
DisGeNET_search_disease(query=disease_id)
|
|
141
|
+
DisGeNET_get_disease_genes(disease_id=disease_id)
|
|
142
|
+
DisGeNET_search_gene(query=gene)
|
|
143
|
+
DisGeNET_get_gene_diseases(gene_symbol=gene)
|
|
144
|
+
DisGeNET_get_variant_diseases(variant_id=variant)
|
|
145
|
+
"""
|
|
146
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
147
|
+
resp = requests.get(
|
|
148
|
+
f"{DISGENET_API}/gda/disease/{disease_id}",
|
|
149
|
+
headers=headers
|
|
150
|
+
)
|
|
151
|
+
resp.raise_for_status()
|
|
152
|
+
data = resp.json()
|
|
153
|
+
|
|
154
|
+
results = []
|
|
155
|
+
for gda in data:
|
|
156
|
+
results.append({
|
|
157
|
+
"gene_symbol": gda.get("gene_symbol", ""),
|
|
158
|
+
"gene_id": gda.get("geneid", ""),
|
|
159
|
+
"gda_score": gda.get("score", 0),
|
|
160
|
+
"ei": gda.get("ei", 0), # Evidence Index
|
|
161
|
+
"el": gda.get("el", ""), # Evidence Level
|
|
162
|
+
"n_pmids": gda.get("pmid_count", 0),
|
|
163
|
+
"source": gda.get("source", ""),
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
df = pd.DataFrame(results)
|
|
167
|
+
if not df.empty:
|
|
168
|
+
df = df.sort_values("gda_score", ascending=False)
|
|
169
|
+
|
|
170
|
+
print(f"DisGeNET '{disease_id}': {len(df)} gene associations, "
|
|
171
|
+
f"top GDA score={df['gda_score'].max():.3f}" if len(df) > 0 else "")
|
|
172
|
+
return df
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## 4. IMPC マウス表現型参照
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
IMPC_API = "https://www.ebi.ac.uk/mi/impc/solr"
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def get_impc_mouse_phenotypes(gene_symbol):
|
|
182
|
+
"""
|
|
183
|
+
IMPC マウスノックアウト表現型データ取得。
|
|
184
|
+
|
|
185
|
+
ToolUniverse:
|
|
186
|
+
IMPC_search_genes(query=gene_symbol)
|
|
187
|
+
IMPC_get_gene_summary(gene_symbol=gene_symbol)
|
|
188
|
+
IMPC_get_phenotypes_by_gene(gene_symbol=gene_symbol)
|
|
189
|
+
IMPC_get_gene_phenotype_hits(gene_symbol=gene_symbol)
|
|
190
|
+
"""
|
|
191
|
+
params = {
|
|
192
|
+
"q": f"marker_symbol:{gene_symbol}",
|
|
193
|
+
"rows": 100,
|
|
194
|
+
"wt": "json",
|
|
195
|
+
}
|
|
196
|
+
resp = requests.get(f"{IMPC_API}/genotype-phenotype/select", params=params)
|
|
197
|
+
resp.raise_for_status()
|
|
198
|
+
data = resp.json()
|
|
199
|
+
|
|
200
|
+
results = []
|
|
201
|
+
for doc in data.get("response", {}).get("docs", []):
|
|
202
|
+
results.append({
|
|
203
|
+
"gene_symbol": doc.get("marker_symbol", ""),
|
|
204
|
+
"mp_term_id": doc.get("mp_term_id", ""),
|
|
205
|
+
"mp_term_name": doc.get("mp_term_name", ""),
|
|
206
|
+
"top_level_mp": doc.get("top_level_mp_term_name", []),
|
|
207
|
+
"p_value": doc.get("p_value", None),
|
|
208
|
+
"effect_size": doc.get("effect_size", None),
|
|
209
|
+
"zygosity": doc.get("zygosity", ""),
|
|
210
|
+
"procedure_name": doc.get("procedure_name", ""),
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
df = pd.DataFrame(results)
|
|
214
|
+
if not df.empty and "p_value" in df.columns:
|
|
215
|
+
df = df.sort_values("p_value")
|
|
216
|
+
|
|
217
|
+
print(f"IMPC '{gene_symbol}': {len(df)} phenotype associations")
|
|
218
|
+
return df
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## 5. 遺伝子-表現型統合解析
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
def rare_disease_gene_analysis(gene_symbol, omim_api_key=None,
|
|
225
|
+
disgenet_api_key=None):
|
|
226
|
+
"""
|
|
227
|
+
全 DB 統合の希少疾患遺伝子プロファイリング。
|
|
228
|
+
"""
|
|
229
|
+
profile = {"gene": gene_symbol, "sources": {}}
|
|
230
|
+
|
|
231
|
+
# 1. OMIM
|
|
232
|
+
if omim_api_key:
|
|
233
|
+
try:
|
|
234
|
+
omim_df = search_omim(gene_symbol, omim_api_key)
|
|
235
|
+
profile["sources"]["omim"] = {
|
|
236
|
+
"entries": len(omim_df),
|
|
237
|
+
"phenotypes": omim_df["phenotypes"].explode().dropna().unique().tolist()
|
|
238
|
+
if not omim_df.empty else [],
|
|
239
|
+
}
|
|
240
|
+
except Exception as e:
|
|
241
|
+
profile["sources"]["omim"] = {"error": str(e)}
|
|
242
|
+
|
|
243
|
+
# 2. Orphanet
|
|
244
|
+
try:
|
|
245
|
+
orpha_df = search_orphanet_diseases(gene_symbol)
|
|
246
|
+
profile["sources"]["orphanet"] = {
|
|
247
|
+
"diseases": len(orpha_df),
|
|
248
|
+
"names": orpha_df["name"].tolist() if not orpha_df.empty else [],
|
|
249
|
+
}
|
|
250
|
+
except Exception as e:
|
|
251
|
+
profile["sources"]["orphanet"] = {"error": str(e)}
|
|
252
|
+
|
|
253
|
+
# 3. DisGeNET
|
|
254
|
+
if disgenet_api_key:
|
|
255
|
+
try:
|
|
256
|
+
dgn_df = get_disease_gene_associations(gene_symbol, disgenet_api_key)
|
|
257
|
+
profile["sources"]["disgenet"] = {
|
|
258
|
+
"associations": len(dgn_df),
|
|
259
|
+
"max_gda_score": float(dgn_df["gda_score"].max())
|
|
260
|
+
if not dgn_df.empty else 0,
|
|
261
|
+
}
|
|
262
|
+
except Exception as e:
|
|
263
|
+
profile["sources"]["disgenet"] = {"error": str(e)}
|
|
264
|
+
|
|
265
|
+
# 4. IMPC
|
|
266
|
+
try:
|
|
267
|
+
impc_df = get_impc_mouse_phenotypes(gene_symbol)
|
|
268
|
+
profile["sources"]["impc"] = {
|
|
269
|
+
"phenotypes": len(impc_df),
|
|
270
|
+
"top_phenotypes": impc_df["mp_term_name"].head(5).tolist()
|
|
271
|
+
if not impc_df.empty else [],
|
|
272
|
+
}
|
|
273
|
+
except Exception as e:
|
|
274
|
+
profile["sources"]["impc"] = {"error": str(e)}
|
|
275
|
+
|
|
276
|
+
n_sources = sum(1 for v in profile["sources"].values() if "error" not in v)
|
|
277
|
+
print(f"Rare disease profile '{gene_symbol}': {n_sources}/4 sources OK")
|
|
278
|
+
return profile
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## References
|
|
282
|
+
|
|
283
|
+
### Output Files
|
|
284
|
+
|
|
285
|
+
| ファイル | 形式 |
|
|
286
|
+
|---|---|
|
|
287
|
+
| `results/omim_search.csv` | CSV |
|
|
288
|
+
| `results/orphanet_diseases.csv` | CSV |
|
|
289
|
+
| `results/disgenet_gda.csv` | CSV |
|
|
290
|
+
| `results/impc_phenotypes.csv` | CSV |
|
|
291
|
+
| `results/rare_disease_profile.json` | JSON |
|
|
292
|
+
|
|
293
|
+
### 利用可能ツール
|
|
294
|
+
|
|
295
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
296
|
+
|---|---|---|
|
|
297
|
+
| OMIM | `OMIM_search` | 遺伝子/疾患検索 |
|
|
298
|
+
| OMIM | `OMIM_get_entry` | MIM エントリ取得 |
|
|
299
|
+
| OMIM | `OMIM_get_gene_map` | 遺伝子マップ |
|
|
300
|
+
| OMIM | `OMIM_get_clinical_synopsis` | 臨床概要 |
|
|
301
|
+
| Orphanet | `Orphanet_search_diseases` | 希少疾患検索 |
|
|
302
|
+
| Orphanet | `Orphanet_get_disease` | 疾患詳細 |
|
|
303
|
+
| Orphanet | `Orphanet_get_genes` | 関連遺伝子 |
|
|
304
|
+
| Orphanet | `Orphanet_get_classification` | 分類情報 |
|
|
305
|
+
| Orphanet | `Orphanet_search_by_name` | 名前検索 |
|
|
306
|
+
| DisGeNET | `DisGeNET_search_disease` | 疾患検索 |
|
|
307
|
+
| DisGeNET | `DisGeNET_get_disease_genes` | 疾患遺伝子 |
|
|
308
|
+
| DisGeNET | `DisGeNET_get_gene_diseases` | 遺伝子疾患 |
|
|
309
|
+
| DisGeNET | `DisGeNET_get_variant_diseases` | バリアント疾患 |
|
|
310
|
+
| IMPC | `IMPC_search_genes` | 遺伝子検索 |
|
|
311
|
+
| IMPC | `IMPC_get_gene_summary` | 遺伝子サマリー |
|
|
312
|
+
| IMPC | `IMPC_get_phenotypes_by_gene` | 表現型取得 |
|
|
313
|
+
| IMPC | `IMPC_get_gene_phenotype_hits` | ヒット数 |
|
|
314
|
+
|
|
315
|
+
### 参照スキル
|
|
316
|
+
|
|
317
|
+
| スキル | 関連 |
|
|
318
|
+
|---|---|
|
|
319
|
+
| `scientific-disease-research` | GWAS/Orphanet 疾患研究 |
|
|
320
|
+
| `scientific-variant-interpretation` | ACMG バリアント解釈 |
|
|
321
|
+
| `scientific-variant-effect-prediction` | 病原性予測 |
|
|
322
|
+
| `scientific-population-genetics` | 集団遺伝学 |
|
|
323
|
+
| `scientific-human-protein-atlas` | タンパク質発現 |
|
|
324
|
+
|
|
325
|
+
### 依存パッケージ
|
|
326
|
+
|
|
327
|
+
`requests`, `pandas`
|