@nahisaho/satori 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +106 -39
- package/package.json +1 -1
- package/src/.github/skills/scientific-biomedical-pubtator/SKILL.md +331 -0
- package/src/.github/skills/scientific-cell-line-resources/SKILL.md +258 -0
- package/src/.github/skills/scientific-ebi-databases/SKILL.md +280 -0
- package/src/.github/skills/scientific-ontology-enrichment/SKILL.md +340 -0
- package/src/.github/skills/scientific-phylogenetics/SKILL.md +297 -0
- package/src/.github/skills/scientific-preprint-archive/SKILL.md +476 -0
- package/src/.github/skills/scientific-public-health-data/SKILL.md +322 -0
- package/src/.github/skills/scientific-regulatory-genomics/SKILL.md +274 -0
- package/src/.github/skills/scientific-reinforcement-learning/SKILL.md +280 -0
- package/src/.github/skills/scientific-symbolic-mathematics/SKILL.md +277 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-ebi-databases
|
|
3
|
+
description: |
|
|
4
|
+
EBI データベース群統合アクセススキル。EBI Search 横断検索、ENA Browser
|
|
5
|
+
ヌクレオチドアーカイブ、BioStudies 研究データ、dbfetch エントリ取得、
|
|
6
|
+
MetaboLights メタボロミクスリポジトリの統合パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific EBI Databases
|
|
10
|
+
|
|
11
|
+
EBI Search / ENA Browser / BioStudies / dbfetch / MetaboLights を統合した
|
|
12
|
+
EBI データベース群アクセスパイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- EBI Search で複数データベースを横断検索するとき
|
|
17
|
+
- ENA (European Nucleotide Archive) で配列データを検索するとき
|
|
18
|
+
- BioStudies で研究プロジェクトデータを探すとき
|
|
19
|
+
- dbfetch でエントリを一括取得するとき
|
|
20
|
+
- MetaboLights でメタボロミクス実験データにアクセスするとき
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
## 1. EBI Search 横断検索
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import requests
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
EBI_SEARCH_API = "https://www.ebi.ac.uk/ebisearch/ws/rest"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def search_ebi(query, domain="allebi", size=25, fields=None):
|
|
36
|
+
"""
|
|
37
|
+
EBI Search 横断検索 — 複数 EBI データベースを一括検索。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
query: str — 検索クエリ
|
|
41
|
+
domain: str — 検索ドメイン ("allebi", "uniprot", "pdb", "ena", etc.)
|
|
42
|
+
size: int — 最大取得数
|
|
43
|
+
fields: list — 返却フィールド
|
|
44
|
+
|
|
45
|
+
ToolUniverse:
|
|
46
|
+
EBI_Search_query(query=query, domain=domain)
|
|
47
|
+
EBI_Search_get_entry(domain=domain, entry_id=entry_id)
|
|
48
|
+
"""
|
|
49
|
+
params = {
|
|
50
|
+
"query": query,
|
|
51
|
+
"size": size,
|
|
52
|
+
"format": "json",
|
|
53
|
+
}
|
|
54
|
+
if fields:
|
|
55
|
+
params["fields"] = ",".join(fields)
|
|
56
|
+
|
|
57
|
+
resp = requests.get(f"{EBI_SEARCH_API}/{domain}", params=params)
|
|
58
|
+
resp.raise_for_status()
|
|
59
|
+
data = resp.json()
|
|
60
|
+
|
|
61
|
+
results = []
|
|
62
|
+
for entry in data.get("entries", []):
|
|
63
|
+
row = {"id": entry.get("id", ""), "source": entry.get("source", "")}
|
|
64
|
+
for field in entry.get("fields", {}):
|
|
65
|
+
row[field] = entry["fields"][field][0] if entry["fields"][field] else ""
|
|
66
|
+
results.append(row)
|
|
67
|
+
|
|
68
|
+
df = pd.DataFrame(results)
|
|
69
|
+
total = data.get("hitCount", 0)
|
|
70
|
+
print(f"EBI Search [{domain}] '{query}': {total} total hits, {len(df)} returned")
|
|
71
|
+
return df
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## 2. ENA (European Nucleotide Archive) 配列検索
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
ENA_API = "https://www.ebi.ac.uk/ena/browser/api"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def search_ena(query, result_type="sequence", limit=100):
|
|
81
|
+
"""
|
|
82
|
+
ENA ヌクレオチドアーカイブ検索。
|
|
83
|
+
|
|
84
|
+
Parameters:
|
|
85
|
+
query: str — 検索クエリ or Taxon ID
|
|
86
|
+
result_type: str — "sequence", "read_run", "analysis", "study"
|
|
87
|
+
limit: int — 最大取得数
|
|
88
|
+
|
|
89
|
+
ToolUniverse:
|
|
90
|
+
ENA_search(query=query, result=result_type)
|
|
91
|
+
ENA_get_entry(accession=accession)
|
|
92
|
+
"""
|
|
93
|
+
params = {
|
|
94
|
+
"query": query,
|
|
95
|
+
"result": result_type,
|
|
96
|
+
"limit": limit,
|
|
97
|
+
"format": "json",
|
|
98
|
+
}
|
|
99
|
+
resp = requests.get(f"{ENA_API}/search", params=params)
|
|
100
|
+
resp.raise_for_status()
|
|
101
|
+
data = resp.json()
|
|
102
|
+
|
|
103
|
+
df = pd.DataFrame(data) if isinstance(data, list) else pd.DataFrame()
|
|
104
|
+
print(f"ENA search '{query}' [{result_type}]: {len(df)} entries")
|
|
105
|
+
return df
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_ena_entry(accession, display="json"):
|
|
109
|
+
"""
|
|
110
|
+
ENA アクセッション番号によるエントリ取得。
|
|
111
|
+
|
|
112
|
+
Parameters:
|
|
113
|
+
accession: str — ENA accession (e.g., "ERS000001", "ERR000001")
|
|
114
|
+
"""
|
|
115
|
+
resp = requests.get(
|
|
116
|
+
f"{ENA_API}/entry/{accession}",
|
|
117
|
+
params={"display": display}
|
|
118
|
+
)
|
|
119
|
+
resp.raise_for_status()
|
|
120
|
+
print(f"ENA entry {accession}: retrieved")
|
|
121
|
+
return resp.json() if display == "json" else resp.text
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## 3. BioStudies 研究データ検索
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
BIOSTUDIES_API = "https://www.ebi.ac.uk/biostudies/api/v1"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def search_biostudies(query, page_size=25):
|
|
131
|
+
"""
|
|
132
|
+
BioStudies 研究プロジェクトデータ検索。
|
|
133
|
+
|
|
134
|
+
Parameters:
|
|
135
|
+
query: str — 検索クエリ
|
|
136
|
+
page_size: int — ページサイズ
|
|
137
|
+
|
|
138
|
+
ToolUniverse:
|
|
139
|
+
BioStudies_search(query=query)
|
|
140
|
+
BioStudies_get_study(accession=accession)
|
|
141
|
+
"""
|
|
142
|
+
params = {"query": query, "pageSize": page_size}
|
|
143
|
+
resp = requests.get(f"{BIOSTUDIES_API}/search", params=params)
|
|
144
|
+
resp.raise_for_status()
|
|
145
|
+
data = resp.json()
|
|
146
|
+
|
|
147
|
+
results = []
|
|
148
|
+
for hit in data.get("hits", []):
|
|
149
|
+
results.append({
|
|
150
|
+
"accession": hit.get("accno", ""),
|
|
151
|
+
"title": hit.get("title", ""),
|
|
152
|
+
"author": hit.get("author", ""),
|
|
153
|
+
"release_date": hit.get("rtime", ""),
|
|
154
|
+
"type": hit.get("type", ""),
|
|
155
|
+
"files": hit.get("files", 0),
|
|
156
|
+
"links": hit.get("links", 0),
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
df = pd.DataFrame(results)
|
|
160
|
+
total = data.get("totalHits", 0)
|
|
161
|
+
print(f"BioStudies search '{query}': {total} total, {len(df)} returned")
|
|
162
|
+
return df
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## 4. dbfetch エントリ一括取得
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
DBFETCH_API = "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch"
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def dbfetch(db, ids, format_type="json", style="raw"):
|
|
172
|
+
"""
|
|
173
|
+
dbfetch — EBI データベースエントリ一括取得。
|
|
174
|
+
|
|
175
|
+
Parameters:
|
|
176
|
+
db: str — データベース名 (e.g., "uniprotkb", "embl", "pdb")
|
|
177
|
+
ids: list — ID リスト
|
|
178
|
+
format_type: str — 出力形式 ("json", "fasta", "xml")
|
|
179
|
+
style: str — スタイル ("raw", "html")
|
|
180
|
+
|
|
181
|
+
ToolUniverse:
|
|
182
|
+
dbfetch_get_entries(db=db, ids=ids, format=format_type)
|
|
183
|
+
"""
|
|
184
|
+
ids_str = ",".join(ids) if isinstance(ids, list) else ids
|
|
185
|
+
params = {
|
|
186
|
+
"db": db,
|
|
187
|
+
"id": ids_str,
|
|
188
|
+
"format": format_type,
|
|
189
|
+
"style": style,
|
|
190
|
+
}
|
|
191
|
+
resp = requests.get(DBFETCH_API, params=params)
|
|
192
|
+
resp.raise_for_status()
|
|
193
|
+
|
|
194
|
+
print(f"dbfetch [{db}]: {len(ids) if isinstance(ids, list) else 1} entries, "
|
|
195
|
+
f"format={format_type}")
|
|
196
|
+
if format_type == "json":
|
|
197
|
+
return resp.json()
|
|
198
|
+
return resp.text
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## 5. MetaboLights メタボロミクスリポジトリ
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
METABOLIGHTS_API = "https://www.ebi.ac.uk/metabolights/ws"
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def search_metabolights(query):
|
|
208
|
+
"""
|
|
209
|
+
MetaboLights メタボロミクス実験データ検索。
|
|
210
|
+
|
|
211
|
+
Parameters:
|
|
212
|
+
query: str — 検索クエリ (化合物名、疾患名、生物種)
|
|
213
|
+
|
|
214
|
+
ToolUniverse:
|
|
215
|
+
MetaboLights_search_studies(query=query)
|
|
216
|
+
MetaboLights_get_study(study_id=study_id)
|
|
217
|
+
"""
|
|
218
|
+
resp = requests.get(
|
|
219
|
+
f"{METABOLIGHTS_API}/studies/search",
|
|
220
|
+
params={"query": query}
|
|
221
|
+
)
|
|
222
|
+
resp.raise_for_status()
|
|
223
|
+
data = resp.json()
|
|
224
|
+
|
|
225
|
+
results = []
|
|
226
|
+
for study in data.get("content", []):
|
|
227
|
+
results.append({
|
|
228
|
+
"study_id": study.get("studyIdentifier", ""),
|
|
229
|
+
"title": study.get("title", ""),
|
|
230
|
+
"organism": study.get("organism", ""),
|
|
231
|
+
"description": (study.get("description") or "")[:200],
|
|
232
|
+
"submission_date": study.get("submissionDate", ""),
|
|
233
|
+
"status": study.get("studyStatus", ""),
|
|
234
|
+
})
|
|
235
|
+
|
|
236
|
+
df = pd.DataFrame(results)
|
|
237
|
+
print(f"MetaboLights search '{query}': {len(df)} studies")
|
|
238
|
+
return df
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def get_metabolights_study(study_id):
|
|
242
|
+
"""MetaboLights 個別研究取得。"""
|
|
243
|
+
resp = requests.get(f"{METABOLIGHTS_API}/studies/{study_id}")
|
|
244
|
+
resp.raise_for_status()
|
|
245
|
+
data = resp.json()
|
|
246
|
+
print(f"MetaboLights {study_id}: {data.get('title', '')[:80]}")
|
|
247
|
+
return data
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## 利用可能ツール
|
|
253
|
+
|
|
254
|
+
| ToolUniverse カテゴリ | 主なツール |
|
|
255
|
+
|---|---|
|
|
256
|
+
| `ebi_search` | `EBI_Search_query`, `EBI_Search_get_entry` |
|
|
257
|
+
| `ena_browser` | `ENA_search`, `ENA_get_entry` |
|
|
258
|
+
| `biostudies` | `BioStudies_search`, `BioStudies_get_study` |
|
|
259
|
+
| `dbfetch` | `dbfetch_get_entries` |
|
|
260
|
+
| `metabolights` | `MetaboLights_search_studies`, `MetaboLights_get_study` |
|
|
261
|
+
|
|
262
|
+
## パイプライン出力
|
|
263
|
+
|
|
264
|
+
| 出力ファイル | 説明 | 連携先スキル |
|
|
265
|
+
|---|---|---|
|
|
266
|
+
| `results/ebi_search.csv` | EBI 横断検索結果 | → bioinformatics, literature-search |
|
|
267
|
+
| `results/ena_sequences.fasta` | ENA 配列データ | → genome-sequence-tools, sequence-analysis |
|
|
268
|
+
| `results/biostudies_metadata.json` | 研究プロジェクト情報 | → multi-omics, systematic-review |
|
|
269
|
+
| `results/metabolights_study.json` | メタボロミクスデータ | → metabolomics, metabolomics-databases |
|
|
270
|
+
|
|
271
|
+
## パイプライン統合
|
|
272
|
+
|
|
273
|
+
```
|
|
274
|
+
genome-sequence-tools ──→ ebi-databases ──→ metabolomics-databases
|
|
275
|
+
(NCBI/BLAST) (ENA/EBI Search) (HMDB/MetaCyc)
|
|
276
|
+
│
|
|
277
|
+
├──→ bioinformatics (配列データ)
|
|
278
|
+
├──→ sequence-analysis (FASTA)
|
|
279
|
+
└──→ structural-proteomics (PDBe cross-ref)
|
|
280
|
+
```
|
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-ontology-enrichment
|
|
3
|
+
description: |
|
|
4
|
+
オントロジー・エンリッチメント解析スキル。EFO 実験ファクターオントロジー、
|
|
5
|
+
OLS オントロジー検索サービス、Enrichr 遺伝子セット濃縮解析、
|
|
6
|
+
UMLS メタシソーラス統一医学言語体系の統合パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Ontology Enrichment
|
|
10
|
+
|
|
11
|
+
EFO / OLS / Enrichr / UMLS を統合した
|
|
12
|
+
オントロジー検索・エンリッチメント解析パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- EFO で実験条件 (疾患・細胞型・組織) のオントロジー ID を取得するとき
|
|
17
|
+
- OLS で複数オントロジー横断検索 (HP, MONDO, DOID, GO, CHEBI) するとき
|
|
18
|
+
- Enrichr で遺伝子リストの濃縮解析を行うとき
|
|
19
|
+
- UMLS CUI で異なる用語体系間のマッピングを行うとき
|
|
20
|
+
- GWAS Catalog の trait を EFO 用語で標準化するとき
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
## 1. EFO 実験ファクターオントロジー
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import requests
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
OLS_API = "https://www.ebi.ac.uk/ols4/api"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def search_efo(query, exact=False):
|
|
36
|
+
"""
|
|
37
|
+
EFO (Experimental Factor Ontology) 検索。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
query: str — 検索語 (疾患名、細胞型、組織名等)
|
|
41
|
+
exact: bool — 完全一致検索
|
|
42
|
+
|
|
43
|
+
ToolUniverse:
|
|
44
|
+
EFO_search(query=query, exact=exact)
|
|
45
|
+
"""
|
|
46
|
+
params = {
|
|
47
|
+
"q": query,
|
|
48
|
+
"ontology": "efo",
|
|
49
|
+
"exact": str(exact).lower(),
|
|
50
|
+
"rows": 30,
|
|
51
|
+
}
|
|
52
|
+
resp = requests.get(f"{OLS_API}/search", params=params)
|
|
53
|
+
resp.raise_for_status()
|
|
54
|
+
data = resp.json()
|
|
55
|
+
|
|
56
|
+
results = []
|
|
57
|
+
for doc in data.get("response", {}).get("docs", []):
|
|
58
|
+
results.append({
|
|
59
|
+
"efo_id": doc.get("obo_id", ""),
|
|
60
|
+
"label": doc.get("label", ""),
|
|
61
|
+
"description": (doc.get("description") or [""])[0][:200],
|
|
62
|
+
"iri": doc.get("iri", ""),
|
|
63
|
+
"ontology": doc.get("ontology_name", ""),
|
|
64
|
+
"is_defining_ontology": doc.get("is_defining_ontology", False),
|
|
65
|
+
"synonyms": doc.get("synonym", []),
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
df = pd.DataFrame(results)
|
|
69
|
+
print(f"EFO search '{query}': {len(df)} terms")
|
|
70
|
+
return df
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## 2. OLS マルチオントロジー検索
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
def search_ols(query, ontologies=None, type_filter=None):
|
|
77
|
+
"""
|
|
78
|
+
OLS (Ontology Lookup Service) マルチオントロジー横断検索。
|
|
79
|
+
|
|
80
|
+
Parameters:
|
|
81
|
+
query: str — 検索語
|
|
82
|
+
ontologies: list — オントロジー ID リスト (e.g., ["hp", "mondo", "go"])
|
|
83
|
+
type_filter: str — "class", "property", "individual"
|
|
84
|
+
|
|
85
|
+
ToolUniverse:
|
|
86
|
+
OLS_search(query=query, ontology=ontology)
|
|
87
|
+
OLS_get_term(ontology=ontology, iri=iri)
|
|
88
|
+
OLS_get_ancestors(ontology=ontology, iri=iri)
|
|
89
|
+
"""
|
|
90
|
+
params = {"q": query, "rows": 50}
|
|
91
|
+
if ontologies:
|
|
92
|
+
params["ontology"] = ",".join(ontologies)
|
|
93
|
+
if type_filter:
|
|
94
|
+
params["type"] = type_filter
|
|
95
|
+
|
|
96
|
+
resp = requests.get(f"{OLS_API}/search", params=params)
|
|
97
|
+
resp.raise_for_status()
|
|
98
|
+
data = resp.json()
|
|
99
|
+
|
|
100
|
+
results = []
|
|
101
|
+
for doc in data.get("response", {}).get("docs", []):
|
|
102
|
+
results.append({
|
|
103
|
+
"obo_id": doc.get("obo_id", ""),
|
|
104
|
+
"label": doc.get("label", ""),
|
|
105
|
+
"ontology": doc.get("ontology_name", ""),
|
|
106
|
+
"description": (doc.get("description") or [""])[0][:200],
|
|
107
|
+
"iri": doc.get("iri", ""),
|
|
108
|
+
"synonyms": doc.get("synonym", []),
|
|
109
|
+
"has_children": doc.get("has_children", False),
|
|
110
|
+
})
|
|
111
|
+
|
|
112
|
+
df = pd.DataFrame(results)
|
|
113
|
+
print(f"OLS search '{query}' "
|
|
114
|
+
f"[{','.join(ontologies) if ontologies else 'all'}]: "
|
|
115
|
+
f"{len(df)} terms")
|
|
116
|
+
return df
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_ols_term_hierarchy(ontology, term_id):
|
|
120
|
+
"""
|
|
121
|
+
OLS 用語の階層構造 (ancestors/descendants) 取得。
|
|
122
|
+
|
|
123
|
+
Parameters:
|
|
124
|
+
ontology: str — オントロジー ID (e.g., "hp", "go")
|
|
125
|
+
term_id: str — OBO ID (e.g., "HP:0001250")
|
|
126
|
+
"""
|
|
127
|
+
iri = f"http://purl.obolibrary.org/obo/{term_id.replace(':', '_')}"
|
|
128
|
+
encoded_iri = requests.utils.quote(requests.utils.quote(iri, safe=""), safe="")
|
|
129
|
+
|
|
130
|
+
# Ancestors
|
|
131
|
+
anc_resp = requests.get(
|
|
132
|
+
f"{OLS_API}/ontologies/{ontology}/terms/{encoded_iri}/ancestors"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Descendants
|
|
136
|
+
desc_resp = requests.get(
|
|
137
|
+
f"{OLS_API}/ontologies/{ontology}/terms/{encoded_iri}/descendants"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
hierarchy = {"ancestors": [], "descendants": []}
|
|
141
|
+
|
|
142
|
+
if anc_resp.status_code == 200:
|
|
143
|
+
for t in anc_resp.json().get("_embedded", {}).get("terms", []):
|
|
144
|
+
hierarchy["ancestors"].append({
|
|
145
|
+
"id": t.get("obo_id", ""),
|
|
146
|
+
"label": t.get("label", ""),
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
if desc_resp.status_code == 200:
|
|
150
|
+
for t in desc_resp.json().get("_embedded", {}).get("terms", []):
|
|
151
|
+
hierarchy["descendants"].append({
|
|
152
|
+
"id": t.get("obo_id", ""),
|
|
153
|
+
"label": t.get("label", ""),
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
print(f"OLS hierarchy {term_id}: "
|
|
157
|
+
f"{len(hierarchy['ancestors'])} ancestors, "
|
|
158
|
+
f"{len(hierarchy['descendants'])} descendants")
|
|
159
|
+
return hierarchy
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## 3. Enrichr 遺伝子セット濃縮解析
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
ENRICHR_API = "https://maayanlab.cloud/Enrichr"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def run_enrichr(gene_list, description="", gene_set_libraries=None):
|
|
169
|
+
"""
|
|
170
|
+
Enrichr 遺伝子リスト濃縮解析。
|
|
171
|
+
|
|
172
|
+
Parameters:
|
|
173
|
+
gene_list: list — 遺伝子シンボルリスト (e.g., ["TP53", "BRCA1", "EGFR"])
|
|
174
|
+
description: str — 解析の説明
|
|
175
|
+
gene_set_libraries: list — 使用する遺伝子セットライブラリ
|
|
176
|
+
|
|
177
|
+
ToolUniverse:
|
|
178
|
+
Enrichr_submit_gene_list(genes=gene_list)
|
|
179
|
+
Enrichr_get_enrichment(user_list_id=id, library=library)
|
|
180
|
+
"""
|
|
181
|
+
if gene_set_libraries is None:
|
|
182
|
+
gene_set_libraries = [
|
|
183
|
+
"GO_Biological_Process_2023",
|
|
184
|
+
"GO_Molecular_Function_2023",
|
|
185
|
+
"KEGG_2021_Human",
|
|
186
|
+
"Reactome_2022",
|
|
187
|
+
"WikiPathway_2023_Human",
|
|
188
|
+
"DisGeNET",
|
|
189
|
+
]
|
|
190
|
+
|
|
191
|
+
# Submit gene list
|
|
192
|
+
genes_str = "\n".join(gene_list)
|
|
193
|
+
submit_resp = requests.post(
|
|
194
|
+
f"{ENRICHR_API}/addList",
|
|
195
|
+
files={"list": (None, genes_str), "description": (None, description)},
|
|
196
|
+
)
|
|
197
|
+
submit_resp.raise_for_status()
|
|
198
|
+
user_list_id = submit_resp.json().get("userListId")
|
|
199
|
+
print(f"Enrichr: submitted {len(gene_list)} genes (ID={user_list_id})")
|
|
200
|
+
|
|
201
|
+
# Get enrichment results per library
|
|
202
|
+
all_results = {}
|
|
203
|
+
for library in gene_set_libraries:
|
|
204
|
+
enrich_resp = requests.get(
|
|
205
|
+
f"{ENRICHR_API}/enrich",
|
|
206
|
+
params={"userListId": user_list_id, "backgroundType": library},
|
|
207
|
+
)
|
|
208
|
+
enrich_resp.raise_for_status()
|
|
209
|
+
data = enrich_resp.json()
|
|
210
|
+
|
|
211
|
+
results = []
|
|
212
|
+
for term_data in data.get(library, []):
|
|
213
|
+
results.append({
|
|
214
|
+
"rank": term_data[0],
|
|
215
|
+
"term": term_data[1],
|
|
216
|
+
"p_value": term_data[2],
|
|
217
|
+
"z_score": term_data[3],
|
|
218
|
+
"combined_score": term_data[4],
|
|
219
|
+
"overlap_genes": term_data[5],
|
|
220
|
+
"adjusted_p": term_data[6],
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
df = pd.DataFrame(results)
|
|
224
|
+
if not df.empty:
|
|
225
|
+
df = df.sort_values("adjusted_p")
|
|
226
|
+
all_results[library] = df
|
|
227
|
+
sig_count = (df["adjusted_p"] < 0.05).sum() if not df.empty else 0
|
|
228
|
+
print(f" {library}: {sig_count} significant terms (FDR < 0.05)")
|
|
229
|
+
|
|
230
|
+
return all_results
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## 4. UMLS メタシソーラスマッピング
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
UMLS_API = "https://uts-ws.nlm.nih.gov/rest"
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def search_umls(query, api_key, search_type="words"):
|
|
240
|
+
"""
|
|
241
|
+
UMLS メタシソーラス検索。
|
|
242
|
+
|
|
243
|
+
Parameters:
|
|
244
|
+
query: str — 検索語 (疾患名、症状、薬剤名)
|
|
245
|
+
api_key: str — UMLS API キー
|
|
246
|
+
search_type: str — "words", "exact", "leftTruncation"
|
|
247
|
+
|
|
248
|
+
ToolUniverse:
|
|
249
|
+
UMLS_search(query=query, search_type=search_type)
|
|
250
|
+
UMLS_get_concept(cui=cui)
|
|
251
|
+
"""
|
|
252
|
+
params = {
|
|
253
|
+
"string": query,
|
|
254
|
+
"searchType": search_type,
|
|
255
|
+
"apiKey": api_key,
|
|
256
|
+
"pageSize": 25,
|
|
257
|
+
}
|
|
258
|
+
resp = requests.get(f"{UMLS_API}/search/current", params=params)
|
|
259
|
+
resp.raise_for_status()
|
|
260
|
+
data = resp.json()
|
|
261
|
+
|
|
262
|
+
results = []
|
|
263
|
+
for item in data.get("result", {}).get("results", []):
|
|
264
|
+
results.append({
|
|
265
|
+
"cui": item.get("ui", ""),
|
|
266
|
+
"name": item.get("name", ""),
|
|
267
|
+
"root_source": item.get("rootSource", ""),
|
|
268
|
+
"uri": item.get("uri", ""),
|
|
269
|
+
})
|
|
270
|
+
|
|
271
|
+
df = pd.DataFrame(results)
|
|
272
|
+
print(f"UMLS search '{query}': {len(df)} concepts")
|
|
273
|
+
return df
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def get_umls_crosswalk(cui, api_key, target_source=None):
|
|
277
|
+
"""
|
|
278
|
+
UMLS CUI からの用語体系間マッピング。
|
|
279
|
+
|
|
280
|
+
Parameters:
|
|
281
|
+
cui: str — UMLS CUI (e.g., "C0023264")
|
|
282
|
+
api_key: str — UMLS API キー
|
|
283
|
+
target_source: str — ターゲット用語体系 (e.g., "SNOMEDCT_US", "ICD10CM", "MeSH")
|
|
284
|
+
"""
|
|
285
|
+
params = {"apiKey": api_key, "pageSize": 100}
|
|
286
|
+
if target_source:
|
|
287
|
+
params["sabs"] = target_source
|
|
288
|
+
|
|
289
|
+
resp = requests.get(f"{UMLS_API}/content/current/CUI/{cui}/atoms", params=params)
|
|
290
|
+
resp.raise_for_status()
|
|
291
|
+
data = resp.json()
|
|
292
|
+
|
|
293
|
+
mappings = []
|
|
294
|
+
for atom in data.get("result", []):
|
|
295
|
+
mappings.append({
|
|
296
|
+
"source": atom.get("rootSource", ""),
|
|
297
|
+
"code": atom.get("sourceConcept", ""),
|
|
298
|
+
"name": atom.get("name", ""),
|
|
299
|
+
"term_type": atom.get("termType", ""),
|
|
300
|
+
})
|
|
301
|
+
|
|
302
|
+
df = pd.DataFrame(mappings)
|
|
303
|
+
if target_source:
|
|
304
|
+
df = df[df["source"] == target_source]
|
|
305
|
+
|
|
306
|
+
print(f"UMLS crosswalk {cui}: {len(df)} mappings "
|
|
307
|
+
f"({target_source or 'all sources'})")
|
|
308
|
+
return df
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
---
|
|
312
|
+
|
|
313
|
+
## 利用可能ツール
|
|
314
|
+
|
|
315
|
+
| ToolUniverse カテゴリ | 主なツール |
|
|
316
|
+
|---|---|
|
|
317
|
+
| `efo` | `EFO_search` |
|
|
318
|
+
| `ols` | `OLS_search`, `OLS_get_term`, `OLS_get_ancestors` |
|
|
319
|
+
| `enrichr` | `Enrichr_submit_gene_list`, `Enrichr_get_enrichment` |
|
|
320
|
+
| `umls` | `UMLS_search`, `UMLS_get_concept` |
|
|
321
|
+
|
|
322
|
+
## パイプライン出力
|
|
323
|
+
|
|
324
|
+
| 出力ファイル | 説明 | 連携先スキル |
|
|
325
|
+
|---|---|---|
|
|
326
|
+
| `results/efo_terms.csv` | EFO 標準化用語 | → disease-research, gene-expression |
|
|
327
|
+
| `results/enrichr_results/` | 遺伝子セット濃縮結果 | → pathway-enrichment, multi-omics |
|
|
328
|
+
| `results/umls_mapping.json` | UMLS 用語マッピング | → clinical-decision-support, public-health-data |
|
|
329
|
+
| `results/ontology_hierarchy.json` | オントロジー階層 | → text-mining-nlp, knowledge-graph |
|
|
330
|
+
|
|
331
|
+
## パイプライン統合
|
|
332
|
+
|
|
333
|
+
```
|
|
334
|
+
disease-research ──→ ontology-enrichment ──→ pathway-enrichment
|
|
335
|
+
(GWAS/DisGeNET) (EFO/OLS/UMLS/Enrichr) (KEGG/Reactome/GO)
|
|
336
|
+
│
|
|
337
|
+
├──→ biothings-idmapping (CUI→Gene→Protein)
|
|
338
|
+
├──→ public-health-data (UMLS→RxNorm)
|
|
339
|
+
└──→ clinical-reporting (SNOMED/ICD マッピング)
|
|
340
|
+
```
|