@nahisaho/satori 0.17.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. package/README.md +85 -38
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-alphafold-structures/SKILL.md +256 -0
  4. package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md +264 -0
  5. package/src/.github/skills/scientific-civic-evidence/SKILL.md +292 -0
  6. package/src/.github/skills/scientific-compound-screening/SKILL.md +4 -0
  7. package/src/.github/skills/scientific-crossref-metadata/SKILL.md +313 -0
  8. package/src/.github/skills/scientific-depmap-dependencies/SKILL.md +239 -0
  9. package/src/.github/skills/scientific-disease-research/SKILL.md +4 -0
  10. package/src/.github/skills/scientific-drugbank-resources/SKILL.md +269 -0
  11. package/src/.github/skills/scientific-gnomad-variants/SKILL.md +356 -0
  12. package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +271 -0
  13. package/src/.github/skills/scientific-gwas-catalog/SKILL.md +267 -0
  14. package/src/.github/skills/scientific-icgc-cancer-data/SKILL.md +351 -0
  15. package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +4 -0
  16. package/src/.github/skills/scientific-opentargets-genetics/SKILL.md +299 -0
  17. package/src/.github/skills/scientific-pharmgkb-pgx/SKILL.md +306 -0
  18. package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +4 -0
  19. package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +4 -0
  20. package/src/.github/skills/scientific-rcsb-pdb-search/SKILL.md +280 -0
  21. package/src/.github/skills/scientific-reactome-pathways/SKILL.md +242 -0
  22. package/src/.github/skills/scientific-semantic-scholar/SKILL.md +298 -0
  23. package/src/.github/skills/scientific-uniprot-proteome/SKILL.md +273 -0
  24. package/src/.github/skills/scientific-variant-interpretation/SKILL.md +4 -0
@@ -0,0 +1,264 @@
1
+ ---
2
+ name: scientific-arrayexpress-expression
3
+ description: |
4
+ ArrayExpress 発現アーカイブスキル。BioStudies/ArrayExpress
5
+ REST API によるマイクロアレイ・RNA-seq 発現実験検索・メタ
6
+ データ取得・データ再解析。ToolUniverse 連携: arrayexpress。
7
+ tu_tools:
8
+ - key: arrayexpress
9
+ name: ArrayExpress
10
+ description: ArrayExpress 発現実験検索・メタデータ・ファイル取得
11
+ ---
12
+
13
+ # Scientific ArrayExpress Expression
14
+
15
+ EBI ArrayExpress / BioStudies REST API を活用した発現データ
16
+ アーカイブ検索・再解析パイプラインを提供する。
17
+
18
+ ## When to Use
19
+
20
+ - ArrayExpress/BioStudies の発現実験を検索するとき
21
+ - マイクロアレイ/RNA-seq 発現データのメタデータを取得するとき
22
+ - SDRF サンプル情報テーブルを解析するとき
23
+ - E-MTAB/E-GEOD アクセッションからデータ再解析するとき
24
+ - 発現データアーカイブを横断検索するとき
25
+ - GEO と ArrayExpress の両方でデータを探すとき
26
+
27
+ ---
28
+
29
+ ## Quick Start
30
+
31
+ ## 1. BioStudies 発現実験検索
32
+
33
+ ```python
34
+ import requests
35
+ import pandas as pd
36
+
37
+ BIOSTUDIES_BASE = "https://www.ebi.ac.uk/biostudies/api/v1"
38
+ AE_BASE = "https://www.ebi.ac.uk/arrayexpress/json/v3"
39
+
40
+
41
+ def arrayexpress_search_experiments(query, organism=None,
42
+ experiment_type=None,
43
+ limit=50):
44
+ """
45
+ ArrayExpress — 発現実験検索 (BioStudies API)。
46
+
47
+ Parameters:
48
+ query: str — 検索クエリ (例: "breast cancer RNA-seq")
49
+ organism: str — 生物種 (例: "Homo sapiens")
50
+ experiment_type: str — 実験タイプ (例: "RNA-seq of coding RNA")
51
+ limit: int — 最大結果数
52
+ """
53
+ url = f"{BIOSTUDIES_BASE}/search"
54
+ params = {
55
+ "query": query,
56
+ "type": "study",
57
+ "pageSize": limit,
58
+ }
59
+ if organism:
60
+ params["organism"] = organism
61
+ if experiment_type:
62
+ params["experimenttype"] = experiment_type
63
+
64
+ resp = requests.get(url, params=params, timeout=30)
65
+ resp.raise_for_status()
66
+ data = resp.json()
67
+
68
+ hits = data.get("hits", [])
69
+ results = []
70
+ for h in hits:
71
+ attrs = {a.get("name", ""): a.get("value", "")
72
+ for a in h.get("attributes", [])}
73
+ results.append({
74
+ "accession": h.get("accession", ""),
75
+ "title": attrs.get("Title", h.get("title", "")),
76
+ "organism": attrs.get("Organism", ""),
77
+ "experiment_type": attrs.get("Experiment type", ""),
78
+ "release_date": h.get("releaseDate", ""),
79
+ "files_count": h.get("filesCount", 0),
80
+ "links_count": h.get("linksCount", 0),
81
+ })
82
+
83
+ df = pd.DataFrame(results)
84
+ print(f"ArrayExpress search: {len(df)} experiments "
85
+ f"(query={query})")
86
+ return df
87
+ ```
88
+
89
+ ## 2. 実験メタデータ・SDRF 取得
90
+
91
+ ```python
92
+ def arrayexpress_get_experiment(accession):
93
+ """
94
+ ArrayExpress — 実験メタデータ & SDRF 取得。
95
+
96
+ Parameters:
97
+ accession: str — アクセッション (例: "E-MTAB-12345")
98
+ """
99
+ url = f"{BIOSTUDIES_BASE}/studies/{accession}"
100
+ resp = requests.get(url, timeout=30)
101
+ resp.raise_for_status()
102
+ data = resp.json()
103
+
104
+ # メタデータ
105
+ attrs = {a.get("name", ""): a.get("value", "")
106
+ for a in data.get("attributes", [])}
107
+ metadata = {
108
+ "accession": accession,
109
+ "title": attrs.get("Title", ""),
110
+ "description": attrs.get("Description", "")[:500],
111
+ "organism": attrs.get("Organism", ""),
112
+ "experiment_type": attrs.get("Experiment type", ""),
113
+ "release_date": data.get("releaseDate", ""),
114
+ }
115
+
116
+ # ファイル一覧
117
+ files = []
118
+ for section in data.get("section", {}).get("files", []):
119
+ if isinstance(section, list):
120
+ for f in section:
121
+ files.append({
122
+ "filename": f.get("path", ""),
123
+ "type": f.get("type", ""),
124
+ "size": f.get("size", 0),
125
+ })
126
+ elif isinstance(section, dict):
127
+ files.append({
128
+ "filename": section.get("path", ""),
129
+ "type": section.get("type", ""),
130
+ "size": section.get("size", 0),
131
+ })
132
+
133
+ files_df = pd.DataFrame(files)
134
+
135
+ # SDRF 取得試行
136
+ sdrf_url = (f"https://www.ebi.ac.uk/biostudies/files/"
137
+ f"{accession}/{accession}.sdrf.txt")
138
+ sdrf_df = pd.DataFrame()
139
+ try:
140
+ sdrf_resp = requests.get(sdrf_url, timeout=30)
141
+ if sdrf_resp.status_code == 200:
142
+ from io import StringIO
143
+ sdrf_df = pd.read_csv(StringIO(sdrf_resp.text), sep="\t")
144
+ except Exception:
145
+ pass
146
+
147
+ print(f"ArrayExpress {accession}: {len(files_df)} files, "
148
+ f"{len(sdrf_df)} SDRF rows")
149
+ return metadata, files_df, sdrf_df
150
+ ```
151
+
152
+ ## 3. 発現データダウンロード・処理
153
+
154
+ ```python
155
+ def arrayexpress_download_matrix(accession, output_dir="results"):
156
+ """
157
+ ArrayExpress — 発現マトリクスダウンロード。
158
+
159
+ Parameters:
160
+ accession: str — アクセッション
161
+ output_dir: str — 出力ディレクトリ
162
+ """
163
+ from pathlib import Path
164
+ output_dir = Path(output_dir)
165
+ output_dir.mkdir(parents=True, exist_ok=True)
166
+
167
+ metadata, files_df, sdrf_df = arrayexpress_get_experiment(accession)
168
+
169
+ # 処理済み発現ファイル検索
170
+ expr_files = files_df[
171
+ files_df["filename"].str.contains(
172
+ r"processed|normalized|expression|counts",
173
+ case=False, na=False)
174
+ ]
175
+
176
+ downloaded = []
177
+ for _, frow in expr_files.iterrows():
178
+ fname = frow["filename"]
179
+ url = (f"https://www.ebi.ac.uk/biostudies/files/"
180
+ f"{accession}/{fname}")
181
+ try:
182
+ resp = requests.get(url, timeout=120)
183
+ if resp.status_code == 200:
184
+ fpath = output_dir / fname.split("/")[-1]
185
+ fpath.write_bytes(resp.content)
186
+ downloaded.append(str(fpath))
187
+ except Exception:
188
+ continue
189
+
190
+ # SDRF 保存
191
+ if not sdrf_df.empty:
192
+ sdrf_df.to_csv(output_dir / "sdrf.csv", index=False)
193
+
194
+ print(f"ArrayExpress download: {len(downloaded)} files → "
195
+ f"{output_dir}")
196
+ return {
197
+ "metadata": metadata,
198
+ "files": downloaded,
199
+ "sdrf": sdrf_df,
200
+ }
201
+ ```
202
+
203
+ ## 4. ArrayExpress 統合パイプライン
204
+
205
+ ```python
206
+ def arrayexpress_pipeline(query, organism="Homo sapiens",
207
+ output_dir="results"):
208
+ """
209
+ ArrayExpress 統合パイプライン。
210
+
211
+ Parameters:
212
+ query: str — 検索クエリ
213
+ organism: str — 生物種
214
+ output_dir: str — 出力ディレクトリ
215
+ """
216
+ from pathlib import Path
217
+ output_dir = Path(output_dir)
218
+ output_dir.mkdir(parents=True, exist_ok=True)
219
+
220
+ # 1) 実験検索
221
+ experiments = arrayexpress_search_experiments(
222
+ query, organism=organism)
223
+ experiments.to_csv(output_dir / "experiments.csv", index=False)
224
+
225
+ # 2) トップ実験の詳細
226
+ if not experiments.empty:
227
+ top_acc = experiments.iloc[0]["accession"]
228
+ metadata, files, sdrf = arrayexpress_get_experiment(top_acc)
229
+ files.to_csv(output_dir / "experiment_files.csv", index=False)
230
+ if not sdrf.empty:
231
+ sdrf.to_csv(output_dir / "sdrf.csv", index=False)
232
+
233
+ print(f"ArrayExpress pipeline: {output_dir}")
234
+ return {"experiments": experiments}
235
+ ```
236
+
237
+ ---
238
+
239
+ ## ToolUniverse 連携
240
+
241
+ | TU Key | ツール名 | 連携内容 |
242
+ |--------|---------|---------|
243
+ | `arrayexpress` | ArrayExpress | 発現実験検索・メタデータ・ファイル取得 |
244
+
245
+ ## パイプライン統合
246
+
247
+ ```
248
+ ebi-databases → arrayexpress-expression → gene-expression-transcriptomics
249
+ (EBI Search) (ArrayExpress/BioStudies) (DESeq2/GSEA)
250
+ │ │ ↓
251
+ geo-expression ─────────┘ pathway-enrichment
252
+ (GEO データ) │ (KEGG/Reactome)
253
+
254
+ multi-omics
255
+ (統合解析)
256
+ ```
257
+
258
+ ## パイプライン出力
259
+
260
+ | ファイル | 説明 | 次スキル |
261
+ |---------|------|---------|
262
+ | `results/experiments.csv` | 実験一覧 | → geo-expression |
263
+ | `results/sdrf.csv` | サンプル情報 | → gene-expression-transcriptomics |
264
+ | `results/experiment_files.csv` | ファイルリスト | → data-preprocessing |
@@ -0,0 +1,292 @@
1
+ ---
2
+ name: scientific-civic-evidence
3
+ description: |
4
+ CIViC 臨床エビデンススキル。CIViC (Clinical Interpretation
5
+ of Variants in Cancer) REST API を用いたバリアント臨床解釈・
6
+ エビデンスアイテム・分子プロファイル・アサーション検索。
7
+ ToolUniverse 連携: civic。
8
+ tu_tools:
9
+ - key: civic
10
+ name: CIViC
11
+ description: がんバリアント臨床解釈データベース
12
+ ---
13
+
14
+ # Scientific CIViC Evidence
15
+
16
+ CIViC (Clinical Interpretation of Variants in Cancer) REST API
17
+ を活用したバリアント臨床解釈・エビデンスアイテム取得・
18
+ 分子プロファイル・アサーションパイプラインを提供する。
19
+
20
+ ## When to Use
21
+
22
+ - がんバリアントの臨床的解釈を検索するとき
23
+ - エビデンスアイテム (薬剤応答・予後・診断) を取得するとき
24
+ - 遺伝子ごとのバリアントサマリーを確認するとき
25
+ - 分子プロファイル (Molecular Profile) を検索するとき
26
+ - アサーション (ガイドライン推奨) を取得するとき
27
+
28
+ ---
29
+
30
+ ## Quick Start
31
+
32
+ ## 1. バリアント検索・臨床解釈
33
+
34
+ ```python
35
+ import requests
36
+ import pandas as pd
37
+
38
+ CIVIC_API = "https://civicdb.org/api"
39
+
40
+
41
+ def civic_variant_search(gene_name, variant_name=None,
42
+ limit=50):
43
+ """
44
+ CIViC — バリアント検索。
45
+
46
+ Parameters:
47
+ gene_name: str — 遺伝子名 (例: "BRAF")
48
+ variant_name: str — バリアント名
49
+ (例: "V600E")
50
+ limit: int — 最大結果数
51
+ """
52
+ url = f"{CIVIC_API}/variants"
53
+ params = {"count": limit}
54
+
55
+ # 遺伝子名で検索
56
+ gene_url = f"{CIVIC_API}/genes/{gene_name}"
57
+ try:
58
+ resp = requests.get(gene_url, timeout=30)
59
+ if resp.status_code == 200:
60
+ gene_data = resp.json()
61
+ else:
62
+ # 検索 API フォールバック
63
+ search_url = f"{CIVIC_API}/genes"
64
+ params_g = {"name": gene_name, "count": 5}
65
+ resp = requests.get(search_url,
66
+ params=params_g,
67
+ timeout=30)
68
+ resp.raise_for_status()
69
+ records = resp.json().get("records", [])
70
+ gene_data = records[0] if records else {}
71
+ except Exception as e:
72
+ print(f" CIViC gene lookup: {e}")
73
+ gene_data = {}
74
+
75
+ if not gene_data:
76
+ return pd.DataFrame()
77
+
78
+ variants = gene_data.get("variants", [])
79
+ rows = []
80
+ for v in variants[:limit]:
81
+ name = v.get("name", "")
82
+ if variant_name and variant_name.lower() \
83
+ not in name.lower():
84
+ continue
85
+ rows.append({
86
+ "variant_id": v.get("id", ""),
87
+ "gene": gene_name,
88
+ "variant_name": name,
89
+ "description": (v.get("description", "")
90
+ [:200]),
91
+ "evidence_count": len(
92
+ v.get("evidence_items", [])),
93
+ })
94
+
95
+ df = pd.DataFrame(rows)
96
+ print(f"CIViC variants: {gene_name} → {len(df)}")
97
+ return df
98
+
99
+
100
+ def civic_gene_summary(gene_name):
101
+ """
102
+ CIViC — 遺伝子サマリー取得。
103
+
104
+ Parameters:
105
+ gene_name: str — 遺伝子名 (例: "EGFR")
106
+ """
107
+ url = f"{CIVIC_API}/genes/{gene_name}"
108
+ resp = requests.get(url, timeout=30)
109
+ resp.raise_for_status()
110
+ data = resp.json()
111
+
112
+ result = {
113
+ "gene_id": data.get("id", ""),
114
+ "name": data.get("name", ""),
115
+ "description": data.get("description", ""),
116
+ "n_variants": len(data.get("variants", [])),
117
+ "aliases": "; ".join(
118
+ data.get("aliases", [])),
119
+ }
120
+ return result
121
+ ```
122
+
123
+ ## 2. エビデンスアイテム取得
124
+
125
+ ```python
126
+ def civic_evidence_items(variant_id, limit=50):
127
+ """
128
+ CIViC — エビデンスアイテム取得。
129
+
130
+ Parameters:
131
+ variant_id: int — バリアント ID
132
+ limit: int — 最大結果数
133
+ """
134
+ url = f"{CIVIC_API}/variants/{variant_id}"
135
+ resp = requests.get(url, timeout=30)
136
+ resp.raise_for_status()
137
+ data = resp.json()
138
+
139
+ rows = []
140
+ for ev in data.get("evidence_items", [])[:limit]:
141
+ drugs = [d.get("name", "")
142
+ for d in ev.get("drugs", [])]
143
+ rows.append({
144
+ "evidence_id": ev.get("id", ""),
145
+ "variant_id": variant_id,
146
+ "evidence_type": ev.get(
147
+ "evidence_type", ""),
148
+ "evidence_level": ev.get(
149
+ "evidence_level", ""),
150
+ "evidence_direction": ev.get(
151
+ "evidence_direction", ""),
152
+ "clinical_significance": ev.get(
153
+ "clinical_significance", ""),
154
+ "disease": ev.get("disease", {}).get(
155
+ "name", ""),
156
+ "drugs": "; ".join(drugs),
157
+ "rating": ev.get("rating", ""),
158
+ "status": ev.get("status", ""),
159
+ "source_citation": ev.get(
160
+ "source", {}).get("citation", ""),
161
+ })
162
+
163
+ df = pd.DataFrame(rows)
164
+ print(f"CIViC evidence: variant {variant_id} "
165
+ f"→ {len(df)} items")
166
+ return df
167
+ ```
168
+
169
+ ## 3. アサーション取得
170
+
171
+ ```python
172
+ def civic_assertions(gene_name=None, limit=50):
173
+ """
174
+ CIViC — アサーション (ガイドライン推奨) 取得。
175
+
176
+ Parameters:
177
+ gene_name: str — 遺伝子名フィルタ
178
+ limit: int — 最大結果数
179
+ """
180
+ url = f"{CIVIC_API}/assertions"
181
+ params = {"count": limit}
182
+
183
+ resp = requests.get(url, params=params, timeout=30)
184
+ resp.raise_for_status()
185
+ data = resp.json()
186
+
187
+ rows = []
188
+ for a in data.get("records", []):
189
+ genes = [g.get("name", "")
190
+ for g in a.get("genes", [])]
191
+ if gene_name and gene_name not in genes:
192
+ continue
193
+ drugs = [d.get("name", "")
194
+ for d in a.get("drugs", [])]
195
+ rows.append({
196
+ "assertion_id": a.get("id", ""),
197
+ "genes": "; ".join(genes),
198
+ "variant": a.get("variant", {}).get(
199
+ "name", ""),
200
+ "disease": a.get("disease", {}).get(
201
+ "name", ""),
202
+ "drugs": "; ".join(drugs),
203
+ "assertion_type": a.get(
204
+ "assertion_type", ""),
205
+ "assertion_direction": a.get(
206
+ "assertion_direction", ""),
207
+ "clinical_significance": a.get(
208
+ "clinical_significance", ""),
209
+ "amp_level": a.get("amp_level", ""),
210
+ "status": a.get("status", ""),
211
+ })
212
+
213
+ df = pd.DataFrame(rows)
214
+ print(f"CIViC assertions: {len(df)}")
215
+ return df
216
+ ```
217
+
218
+ ## 4. CIViC 統合パイプライン
219
+
220
+ ```python
221
+ def civic_pipeline(gene_name, variant_name=None,
222
+ output_dir="results"):
223
+ """
224
+ CIViC 統合パイプライン。
225
+
226
+ Parameters:
227
+ gene_name: str — 遺伝子名 (例: "BRAF")
228
+ variant_name: str — バリアント名 (例: "V600E")
229
+ output_dir: str — 出力ディレクトリ
230
+ """
231
+ from pathlib import Path
232
+ output_dir = Path(output_dir)
233
+ output_dir.mkdir(parents=True, exist_ok=True)
234
+
235
+ # 1) 遺伝子サマリー
236
+ summary = civic_gene_summary(gene_name)
237
+ pd.DataFrame([summary]).to_csv(
238
+ output_dir / "civic_gene.csv", index=False)
239
+
240
+ # 2) バリアント検索
241
+ variants = civic_variant_search(gene_name,
242
+ variant_name)
243
+ variants.to_csv(output_dir / "civic_variants.csv",
244
+ index=False)
245
+
246
+ # 3) トップバリアントのエビデンス
247
+ if not variants.empty:
248
+ top_vid = variants.iloc[0]["variant_id"]
249
+ evidence = civic_evidence_items(top_vid)
250
+ evidence.to_csv(
251
+ output_dir / "civic_evidence.csv",
252
+ index=False)
253
+
254
+ # 4) アサーション
255
+ assertions = civic_assertions(gene_name)
256
+ assertions.to_csv(
257
+ output_dir / "civic_assertions.csv",
258
+ index=False)
259
+
260
+ print(f"CIViC pipeline: {gene_name} → {output_dir}")
261
+ return {"variants": variants}
262
+ ```
263
+
264
+ ---
265
+
266
+ ## ToolUniverse 連携
267
+
268
+ | TU Key | ツール名 | 連携内容 |
269
+ |--------|---------|---------|
270
+ | `civic` | CIViC | がんバリアント臨床解釈 (~12 tools) |
271
+
272
+ ## パイプライン統合
273
+
274
+ ```
275
+ variant-interpretation → civic-evidence → precision-oncology
276
+ (ClinVar バリアント) (CIViC REST) (精密腫瘍学)
277
+ │ │ ↓
278
+ gnomad-variants ────────────┘ drug-target-profiling
279
+ (集団頻度) │ (標的プロファイリング)
280
+
281
+ opentargets-genetics
282
+ (OT 標的-疾患)
283
+ ```
284
+
285
+ ## パイプライン出力
286
+
287
+ | ファイル | 説明 | 次スキル |
288
+ |---------|------|---------|
289
+ | `results/civic_gene.csv` | 遺伝子サマリー | → cancer-genomics |
290
+ | `results/civic_variants.csv` | バリアント一覧 | → variant-interpretation |
291
+ | `results/civic_evidence.csv` | エビデンス | → precision-oncology |
292
+ | `results/civic_assertions.csv` | アサーション | → pharmacogenomics |
@@ -4,6 +4,10 @@ description: |
4
4
  化合物スクリーニングスキル。ZINC データベースを活用した購入可能化合物検索、
5
5
  SMILES/名前ベースの類似性検索、カタログフィルタリング、
6
6
  バーチャルスクリーニング前処理パイプライン。
7
+ tu_tools:
8
+ - key: zinc
9
+ name: ZINC
10
+ description: 購入可能化合物データベース
7
11
  ---
8
12
 
9
13
  # Scientific Compound Screening