@nahisaho/satori 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,280 @@
1
+ ---
2
+ name: scientific-ebi-databases
3
+ description: |
4
+ EBI データベース群統合アクセススキル。EBI Search 横断検索、ENA Browser
5
+ ヌクレオチドアーカイブ、BioStudies 研究データ、dbfetch エントリ取得、
6
+ MetaboLights メタボロミクスリポジトリの統合パイプライン。
7
+ ---
8
+
9
+ # Scientific EBI Databases
10
+
11
+ EBI Search / ENA Browser / BioStudies / dbfetch / MetaboLights を統合した
12
+ EBI データベース群アクセスパイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - EBI Search で複数データベースを横断検索するとき
17
+ - ENA (European Nucleotide Archive) で配列データを検索するとき
18
+ - BioStudies で研究プロジェクトデータを探すとき
19
+ - dbfetch でエントリを一括取得するとき
20
+ - MetaboLights でメタボロミクス実験データにアクセスするとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. EBI Search 横断検索
27
+
28
+ ```python
29
+ import requests
30
+ import pandas as pd
31
+
32
+ EBI_SEARCH_API = "https://www.ebi.ac.uk/ebisearch/ws/rest"
33
+
34
+
35
+ def search_ebi(query, domain="allebi", size=25, fields=None):
36
+ """
37
+ EBI Search 横断検索 — 複数 EBI データベースを一括検索。
38
+
39
+ Parameters:
40
+ query: str — 検索クエリ
41
+ domain: str — 検索ドメイン ("allebi", "uniprot", "pdb", "ena", etc.)
42
+ size: int — 最大取得数
43
+ fields: list — 返却フィールド
44
+
45
+ ToolUniverse:
46
+ EBI_Search_query(query=query, domain=domain)
47
+ EBI_Search_get_entry(domain=domain, entry_id=entry_id)
48
+ """
49
+ params = {
50
+ "query": query,
51
+ "size": size,
52
+ "format": "json",
53
+ }
54
+ if fields:
55
+ params["fields"] = ",".join(fields)
56
+
57
+ resp = requests.get(f"{EBI_SEARCH_API}/{domain}", params=params)
58
+ resp.raise_for_status()
59
+ data = resp.json()
60
+
61
+ results = []
62
+ for entry in data.get("entries", []):
63
+ row = {"id": entry.get("id", ""), "source": entry.get("source", "")}
64
+ for field in entry.get("fields", {}):
65
+ row[field] = entry["fields"][field][0] if entry["fields"][field] else ""
66
+ results.append(row)
67
+
68
+ df = pd.DataFrame(results)
69
+ total = data.get("hitCount", 0)
70
+ print(f"EBI Search [{domain}] '{query}': {total} total hits, {len(df)} returned")
71
+ return df
72
+ ```
73
+
74
+ ## 2. ENA (European Nucleotide Archive) 配列検索
75
+
76
+ ```python
77
+ ENA_API = "https://www.ebi.ac.uk/ena/browser/api"
78
+
79
+
80
+ def search_ena(query, result_type="sequence", limit=100):
81
+ """
82
+ ENA ヌクレオチドアーカイブ検索。
83
+
84
+ Parameters:
85
+ query: str — 検索クエリ or Taxon ID
86
+ result_type: str — "sequence", "read_run", "analysis", "study"
87
+ limit: int — 最大取得数
88
+
89
+ ToolUniverse:
90
+ ENA_search(query=query, result=result_type)
91
+ ENA_get_entry(accession=accession)
92
+ """
93
+ params = {
94
+ "query": query,
95
+ "result": result_type,
96
+ "limit": limit,
97
+ "format": "json",
98
+ }
99
+ resp = requests.get(f"{ENA_API}/search", params=params)
100
+ resp.raise_for_status()
101
+ data = resp.json()
102
+
103
+ df = pd.DataFrame(data) if isinstance(data, list) else pd.DataFrame()
104
+ print(f"ENA search '{query}' [{result_type}]: {len(df)} entries")
105
+ return df
106
+
107
+
108
+ def get_ena_entry(accession, display="json"):
109
+ """
110
+ ENA アクセッション番号によるエントリ取得。
111
+
112
+ Parameters:
113
+ accession: str — ENA accession (e.g., "ERS000001", "ERR000001")
114
+ """
115
+ resp = requests.get(
116
+ f"{ENA_API}/entry/{accession}",
117
+ params={"display": display}
118
+ )
119
+ resp.raise_for_status()
120
+ print(f"ENA entry {accession}: retrieved")
121
+ return resp.json() if display == "json" else resp.text
122
+ ```
123
+
124
+ ## 3. BioStudies 研究データ検索
125
+
126
+ ```python
127
+ BIOSTUDIES_API = "https://www.ebi.ac.uk/biostudies/api/v1"
128
+
129
+
130
+ def search_biostudies(query, page_size=25):
131
+ """
132
+ BioStudies 研究プロジェクトデータ検索。
133
+
134
+ Parameters:
135
+ query: str — 検索クエリ
136
+ page_size: int — ページサイズ
137
+
138
+ ToolUniverse:
139
+ BioStudies_search(query=query)
140
+ BioStudies_get_study(accession=accession)
141
+ """
142
+ params = {"query": query, "pageSize": page_size}
143
+ resp = requests.get(f"{BIOSTUDIES_API}/search", params=params)
144
+ resp.raise_for_status()
145
+ data = resp.json()
146
+
147
+ results = []
148
+ for hit in data.get("hits", []):
149
+ results.append({
150
+ "accession": hit.get("accno", ""),
151
+ "title": hit.get("title", ""),
152
+ "author": hit.get("author", ""),
153
+ "release_date": hit.get("rtime", ""),
154
+ "type": hit.get("type", ""),
155
+ "files": hit.get("files", 0),
156
+ "links": hit.get("links", 0),
157
+ })
158
+
159
+ df = pd.DataFrame(results)
160
+ total = data.get("totalHits", 0)
161
+ print(f"BioStudies search '{query}': {total} total, {len(df)} returned")
162
+ return df
163
+ ```
164
+
165
+ ## 4. dbfetch エントリ一括取得
166
+
167
+ ```python
168
+ DBFETCH_API = "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch"
169
+
170
+
171
+ def dbfetch(db, ids, format_type="json", style="raw"):
172
+ """
173
+ dbfetch — EBI データベースエントリ一括取得。
174
+
175
+ Parameters:
176
+ db: str — データベース名 (e.g., "uniprotkb", "embl", "pdb")
177
+ ids: list — ID リスト
178
+ format_type: str — 出力形式 ("json", "fasta", "xml")
179
+ style: str — スタイル ("raw", "html")
180
+
181
+ ToolUniverse:
182
+ dbfetch_get_entries(db=db, ids=ids, format=format_type)
183
+ """
184
+ ids_str = ",".join(ids) if isinstance(ids, list) else ids
185
+ params = {
186
+ "db": db,
187
+ "id": ids_str,
188
+ "format": format_type,
189
+ "style": style,
190
+ }
191
+ resp = requests.get(DBFETCH_API, params=params)
192
+ resp.raise_for_status()
193
+
194
+ print(f"dbfetch [{db}]: {len(ids) if isinstance(ids, list) else 1} entries, "
195
+ f"format={format_type}")
196
+ if format_type == "json":
197
+ return resp.json()
198
+ return resp.text
199
+ ```
200
+
201
+ ## 5. MetaboLights メタボロミクスリポジトリ
202
+
203
+ ```python
204
+ METABOLIGHTS_API = "https://www.ebi.ac.uk/metabolights/ws"
205
+
206
+
207
+ def search_metabolights(query):
208
+ """
209
+ MetaboLights メタボロミクス実験データ検索。
210
+
211
+ Parameters:
212
+ query: str — 検索クエリ (化合物名、疾患名、生物種)
213
+
214
+ ToolUniverse:
215
+ MetaboLights_search_studies(query=query)
216
+ MetaboLights_get_study(study_id=study_id)
217
+ """
218
+ resp = requests.get(
219
+ f"{METABOLIGHTS_API}/studies/search",
220
+ params={"query": query}
221
+ )
222
+ resp.raise_for_status()
223
+ data = resp.json()
224
+
225
+ results = []
226
+ for study in data.get("content", []):
227
+ results.append({
228
+ "study_id": study.get("studyIdentifier", ""),
229
+ "title": study.get("title", ""),
230
+ "organism": study.get("organism", ""),
231
+ "description": (study.get("description") or "")[:200],
232
+ "submission_date": study.get("submissionDate", ""),
233
+ "status": study.get("studyStatus", ""),
234
+ })
235
+
236
+ df = pd.DataFrame(results)
237
+ print(f"MetaboLights search '{query}': {len(df)} studies")
238
+ return df
239
+
240
+
241
+ def get_metabolights_study(study_id):
242
+ """MetaboLights 個別研究取得。"""
243
+ resp = requests.get(f"{METABOLIGHTS_API}/studies/{study_id}")
244
+ resp.raise_for_status()
245
+ data = resp.json()
246
+ print(f"MetaboLights {study_id}: {data.get('title', '')[:80]}")
247
+ return data
248
+ ```
249
+
250
+ ---
251
+
252
+ ## 利用可能ツール
253
+
254
+ | ToolUniverse カテゴリ | 主なツール |
255
+ |---|---|
256
+ | `ebi_search` | `EBI_Search_query`, `EBI_Search_get_entry` |
257
+ | `ena_browser` | `ENA_search`, `ENA_get_entry` |
258
+ | `biostudies` | `BioStudies_search`, `BioStudies_get_study` |
259
+ | `dbfetch` | `dbfetch_get_entries` |
260
+ | `metabolights` | `MetaboLights_search_studies`, `MetaboLights_get_study` |
261
+
262
+ ## パイプライン出力
263
+
264
+ | 出力ファイル | 説明 | 連携先スキル |
265
+ |---|---|---|
266
+ | `results/ebi_search.csv` | EBI 横断検索結果 | → bioinformatics, literature-search |
267
+ | `results/ena_sequences.fasta` | ENA 配列データ | → genome-sequence-tools, sequence-analysis |
268
+ | `results/biostudies_metadata.json` | 研究プロジェクト情報 | → multi-omics, systematic-review |
269
+ | `results/metabolights_study.json` | メタボロミクスデータ | → metabolomics, metabolomics-databases |
270
+
271
+ ## パイプライン統合
272
+
273
+ ```
274
+ genome-sequence-tools ──→ ebi-databases ──→ metabolomics-databases
275
+ (NCBI/BLAST) (ENA/EBI Search) (HMDB/MetaCyc)
276
+
277
+ ├──→ bioinformatics (配列データ)
278
+ ├──→ sequence-analysis (FASTA)
279
+ └──→ structural-proteomics (PDBe cross-ref)
280
+ ```
@@ -0,0 +1,340 @@
1
+ ---
2
+ name: scientific-ontology-enrichment
3
+ description: |
4
+ オントロジー・エンリッチメント解析スキル。EFO 実験ファクターオントロジー、
5
+ OLS オントロジー検索サービス、Enrichr 遺伝子セット濃縮解析、
6
+ UMLS メタシソーラス統一医学言語体系の統合パイプライン。
7
+ ---
8
+
9
+ # Scientific Ontology Enrichment
10
+
11
+ EFO / OLS / Enrichr / UMLS を統合した
12
+ オントロジー検索・エンリッチメント解析パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - EFO で実験条件 (疾患・細胞型・組織) のオントロジー ID を取得するとき
17
+ - OLS で複数オントロジー横断検索 (HP, MONDO, DOID, GO, CHEBI) するとき
18
+ - Enrichr で遺伝子リストの濃縮解析を行うとき
19
+ - UMLS CUI で異なる用語体系間のマッピングを行うとき
20
+ - GWAS Catalog の trait を EFO 用語で標準化するとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. EFO 実験ファクターオントロジー
27
+
28
+ ```python
29
+ import requests
30
+ import pandas as pd
31
+
32
+ OLS_API = "https://www.ebi.ac.uk/ols4/api"
33
+
34
+
35
+ def search_efo(query, exact=False):
36
+ """
37
+ EFO (Experimental Factor Ontology) 検索。
38
+
39
+ Parameters:
40
+ query: str — 検索語 (疾患名、細胞型、組織名等)
41
+ exact: bool — 完全一致検索
42
+
43
+ ToolUniverse:
44
+ EFO_search(query=query, exact=exact)
45
+ """
46
+ params = {
47
+ "q": query,
48
+ "ontology": "efo",
49
+ "exact": str(exact).lower(),
50
+ "rows": 30,
51
+ }
52
+ resp = requests.get(f"{OLS_API}/search", params=params)
53
+ resp.raise_for_status()
54
+ data = resp.json()
55
+
56
+ results = []
57
+ for doc in data.get("response", {}).get("docs", []):
58
+ results.append({
59
+ "efo_id": doc.get("obo_id", ""),
60
+ "label": doc.get("label", ""),
61
+ "description": (doc.get("description") or [""])[0][:200],
62
+ "iri": doc.get("iri", ""),
63
+ "ontology": doc.get("ontology_name", ""),
64
+ "is_defining_ontology": doc.get("is_defining_ontology", False),
65
+ "synonyms": doc.get("synonym", []),
66
+ })
67
+
68
+ df = pd.DataFrame(results)
69
+ print(f"EFO search '{query}': {len(df)} terms")
70
+ return df
71
+ ```
72
+
73
+ ## 2. OLS マルチオントロジー検索
74
+
75
+ ```python
76
+ def search_ols(query, ontologies=None, type_filter=None):
77
+ """
78
+ OLS (Ontology Lookup Service) マルチオントロジー横断検索。
79
+
80
+ Parameters:
81
+ query: str — 検索語
82
+ ontologies: list — オントロジー ID リスト (e.g., ["hp", "mondo", "go"])
83
+ type_filter: str — "class", "property", "individual"
84
+
85
+ ToolUniverse:
86
+ OLS_search(query=query, ontology=ontology)
87
+ OLS_get_term(ontology=ontology, iri=iri)
88
+ OLS_get_ancestors(ontology=ontology, iri=iri)
89
+ """
90
+ params = {"q": query, "rows": 50}
91
+ if ontologies:
92
+ params["ontology"] = ",".join(ontologies)
93
+ if type_filter:
94
+ params["type"] = type_filter
95
+
96
+ resp = requests.get(f"{OLS_API}/search", params=params)
97
+ resp.raise_for_status()
98
+ data = resp.json()
99
+
100
+ results = []
101
+ for doc in data.get("response", {}).get("docs", []):
102
+ results.append({
103
+ "obo_id": doc.get("obo_id", ""),
104
+ "label": doc.get("label", ""),
105
+ "ontology": doc.get("ontology_name", ""),
106
+ "description": (doc.get("description") or [""])[0][:200],
107
+ "iri": doc.get("iri", ""),
108
+ "synonyms": doc.get("synonym", []),
109
+ "has_children": doc.get("has_children", False),
110
+ })
111
+
112
+ df = pd.DataFrame(results)
113
+ print(f"OLS search '{query}' "
114
+ f"[{','.join(ontologies) if ontologies else 'all'}]: "
115
+ f"{len(df)} terms")
116
+ return df
117
+
118
+
119
+ def get_ols_term_hierarchy(ontology, term_id):
120
+ """
121
+ OLS 用語の階層構造 (ancestors/descendants) 取得。
122
+
123
+ Parameters:
124
+ ontology: str — オントロジー ID (e.g., "hp", "go")
125
+ term_id: str — OBO ID (e.g., "HP:0001250")
126
+ """
127
+ iri = f"http://purl.obolibrary.org/obo/{term_id.replace(':', '_')}"
128
+ encoded_iri = requests.utils.quote(requests.utils.quote(iri, safe=""), safe="")
129
+
130
+ # Ancestors
131
+ anc_resp = requests.get(
132
+ f"{OLS_API}/ontologies/{ontology}/terms/{encoded_iri}/ancestors"
133
+ )
134
+
135
+ # Descendants
136
+ desc_resp = requests.get(
137
+ f"{OLS_API}/ontologies/{ontology}/terms/{encoded_iri}/descendants"
138
+ )
139
+
140
+ hierarchy = {"ancestors": [], "descendants": []}
141
+
142
+ if anc_resp.status_code == 200:
143
+ for t in anc_resp.json().get("_embedded", {}).get("terms", []):
144
+ hierarchy["ancestors"].append({
145
+ "id": t.get("obo_id", ""),
146
+ "label": t.get("label", ""),
147
+ })
148
+
149
+ if desc_resp.status_code == 200:
150
+ for t in desc_resp.json().get("_embedded", {}).get("terms", []):
151
+ hierarchy["descendants"].append({
152
+ "id": t.get("obo_id", ""),
153
+ "label": t.get("label", ""),
154
+ })
155
+
156
+ print(f"OLS hierarchy {term_id}: "
157
+ f"{len(hierarchy['ancestors'])} ancestors, "
158
+ f"{len(hierarchy['descendants'])} descendants")
159
+ return hierarchy
160
+ ```
161
+
162
+ ## 3. Enrichr 遺伝子セット濃縮解析
163
+
164
+ ```python
165
+ ENRICHR_API = "https://maayanlab.cloud/Enrichr"
166
+
167
+
168
+ def run_enrichr(gene_list, description="", gene_set_libraries=None):
169
+ """
170
+ Enrichr 遺伝子リスト濃縮解析。
171
+
172
+ Parameters:
173
+ gene_list: list — 遺伝子シンボルリスト (e.g., ["TP53", "BRCA1", "EGFR"])
174
+ description: str — 解析の説明
175
+ gene_set_libraries: list — 使用する遺伝子セットライブラリ
176
+
177
+ ToolUniverse:
178
+ Enrichr_submit_gene_list(genes=gene_list)
179
+ Enrichr_get_enrichment(user_list_id=id, library=library)
180
+ """
181
+ if gene_set_libraries is None:
182
+ gene_set_libraries = [
183
+ "GO_Biological_Process_2023",
184
+ "GO_Molecular_Function_2023",
185
+ "KEGG_2021_Human",
186
+ "Reactome_2022",
187
+ "WikiPathway_2023_Human",
188
+ "DisGeNET",
189
+ ]
190
+
191
+ # Submit gene list
192
+ genes_str = "\n".join(gene_list)
193
+ submit_resp = requests.post(
194
+ f"{ENRICHR_API}/addList",
195
+ files={"list": (None, genes_str), "description": (None, description)},
196
+ )
197
+ submit_resp.raise_for_status()
198
+ user_list_id = submit_resp.json().get("userListId")
199
+ print(f"Enrichr: submitted {len(gene_list)} genes (ID={user_list_id})")
200
+
201
+ # Get enrichment results per library
202
+ all_results = {}
203
+ for library in gene_set_libraries:
204
+ enrich_resp = requests.get(
205
+ f"{ENRICHR_API}/enrich",
206
+ params={"userListId": user_list_id, "backgroundType": library},
207
+ )
208
+ enrich_resp.raise_for_status()
209
+ data = enrich_resp.json()
210
+
211
+ results = []
212
+ for term_data in data.get(library, []):
213
+ results.append({
214
+ "rank": term_data[0],
215
+ "term": term_data[1],
216
+ "p_value": term_data[2],
217
+ "z_score": term_data[3],
218
+ "combined_score": term_data[4],
219
+ "overlap_genes": term_data[5],
220
+ "adjusted_p": term_data[6],
221
+ })
222
+
223
+ df = pd.DataFrame(results)
224
+ if not df.empty:
225
+ df = df.sort_values("adjusted_p")
226
+ all_results[library] = df
227
+ sig_count = (df["adjusted_p"] < 0.05).sum() if not df.empty else 0
228
+ print(f" {library}: {sig_count} significant terms (FDR < 0.05)")
229
+
230
+ return all_results
231
+ ```
232
+
233
+ ## 4. UMLS メタシソーラスマッピング
234
+
235
+ ```python
236
+ UMLS_API = "https://uts-ws.nlm.nih.gov/rest"
237
+
238
+
239
+ def search_umls(query, api_key, search_type="words"):
240
+ """
241
+ UMLS メタシソーラス検索。
242
+
243
+ Parameters:
244
+ query: str — 検索語 (疾患名、症状、薬剤名)
245
+ api_key: str — UMLS API キー
246
+ search_type: str — "words", "exact", "leftTruncation"
247
+
248
+ ToolUniverse:
249
+ UMLS_search(query=query, search_type=search_type)
250
+ UMLS_get_concept(cui=cui)
251
+ """
252
+ params = {
253
+ "string": query,
254
+ "searchType": search_type,
255
+ "apiKey": api_key,
256
+ "pageSize": 25,
257
+ }
258
+ resp = requests.get(f"{UMLS_API}/search/current", params=params)
259
+ resp.raise_for_status()
260
+ data = resp.json()
261
+
262
+ results = []
263
+ for item in data.get("result", {}).get("results", []):
264
+ results.append({
265
+ "cui": item.get("ui", ""),
266
+ "name": item.get("name", ""),
267
+ "root_source": item.get("rootSource", ""),
268
+ "uri": item.get("uri", ""),
269
+ })
270
+
271
+ df = pd.DataFrame(results)
272
+ print(f"UMLS search '{query}': {len(df)} concepts")
273
+ return df
274
+
275
+
276
+ def get_umls_crosswalk(cui, api_key, target_source=None):
277
+ """
278
+ UMLS CUI からの用語体系間マッピング。
279
+
280
+ Parameters:
281
+ cui: str — UMLS CUI (e.g., "C0023264")
282
+ api_key: str — UMLS API キー
283
+ target_source: str — ターゲット用語体系 (e.g., "SNOMEDCT_US", "ICD10CM", "MeSH")
284
+ """
285
+ params = {"apiKey": api_key, "pageSize": 100}
286
+ if target_source:
287
+ params["sabs"] = target_source
288
+
289
+ resp = requests.get(f"{UMLS_API}/content/current/CUI/{cui}/atoms", params=params)
290
+ resp.raise_for_status()
291
+ data = resp.json()
292
+
293
+ mappings = []
294
+ for atom in data.get("result", []):
295
+ mappings.append({
296
+ "source": atom.get("rootSource", ""),
297
+ "code": atom.get("sourceConcept", ""),
298
+ "name": atom.get("name", ""),
299
+ "term_type": atom.get("termType", ""),
300
+ })
301
+
302
+ df = pd.DataFrame(mappings)
303
+ if target_source:
304
+ df = df[df["source"] == target_source]
305
+
306
+ print(f"UMLS crosswalk {cui}: {len(df)} mappings "
307
+ f"({target_source or 'all sources'})")
308
+ return df
309
+ ```
310
+
311
+ ---
312
+
313
+ ## 利用可能ツール
314
+
315
+ | ToolUniverse カテゴリ | 主なツール |
316
+ |---|---|
317
+ | `efo` | `EFO_search` |
318
+ | `ols` | `OLS_search`, `OLS_get_term`, `OLS_get_ancestors` |
319
+ | `enrichr` | `Enrichr_submit_gene_list`, `Enrichr_get_enrichment` |
320
+ | `umls` | `UMLS_search`, `UMLS_get_concept` |
321
+
322
+ ## パイプライン出力
323
+
324
+ | 出力ファイル | 説明 | 連携先スキル |
325
+ |---|---|---|
326
+ | `results/efo_terms.csv` | EFO 標準化用語 | → disease-research, gene-expression |
327
+ | `results/enrichr_results/` | 遺伝子セット濃縮結果 | → pathway-enrichment, multi-omics |
328
+ | `results/umls_mapping.json` | UMLS 用語マッピング | → clinical-decision-support, public-health-data |
329
+ | `results/ontology_hierarchy.json` | オントロジー階層 | → text-mining-nlp, knowledge-graph |
330
+
331
+ ## パイプライン統合
332
+
333
+ ```
334
+ disease-research ──→ ontology-enrichment ──→ pathway-enrichment
335
+ (GWAS/DisGeNET) (EFO/OLS/UMLS/Enrichr) (KEGG/Reactome/GO)
336
+
337
+ ├──→ biothings-idmapping (CUI→Gene→Protein)
338
+ ├──→ public-health-data (UMLS→RxNorm)
339
+ └──→ clinical-reporting (SNOMED/ICD マッピング)
340
+ ```