@nahisaho/satori 0.18.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -23
- package/package.json +1 -1
- package/src/.github/skills/scientific-civic-evidence/SKILL.md +292 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +4 -0
- package/src/.github/skills/scientific-depmap-dependencies/SKILL.md +239 -0
- package/src/.github/skills/scientific-disease-research/SKILL.md +4 -0
- package/src/.github/skills/scientific-drugbank-resources/SKILL.md +269 -0
- package/src/.github/skills/scientific-gnomad-variants/SKILL.md +356 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +4 -0
- package/src/.github/skills/scientific-opentargets-genetics/SKILL.md +299 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +4 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-rcsb-pdb-search/SKILL.md +280 -0
- package/src/.github/skills/scientific-reactome-pathways/SKILL.md +242 -0
- package/src/.github/skills/scientific-uniprot-proteome/SKILL.md +273 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +4 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-uniprot-proteome
|
|
3
|
+
description: |
|
|
4
|
+
UniProt プロテオームスキル。UniProt REST API による
|
|
5
|
+
タンパク質検索・ID マッピング・配列取得・機能アノテーション・
|
|
6
|
+
UniRef/UniParc 横断検索。ToolUniverse 連携: uniprot。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: uniprot
|
|
9
|
+
name: UniProt
|
|
10
|
+
description: タンパク質配列・機能アノテーション・ID マッピング
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific UniProt Proteome
|
|
14
|
+
|
|
15
|
+
UniProt REST API を活用したタンパク質検索・ID マッピング・
|
|
16
|
+
機能アノテーション取得・プロテオーム解析パイプラインを提供する。
|
|
17
|
+
|
|
18
|
+
## When to Use
|
|
19
|
+
|
|
20
|
+
- タンパク質のアミノ酸配列・機能情報を検索するとき
|
|
21
|
+
- UniProt ID と他データベース ID を相互変換するとき
|
|
22
|
+
- タンパク質のドメイン・GO アノテーション・疾患関連を調べるとき
|
|
23
|
+
- プロテオーム規模でのタンパク質機能解析を行うとき
|
|
24
|
+
- UniRef クラスター・UniParc アーカイブを横断検索するとき
|
|
25
|
+
- 生物種ごとのリファレンスプロテオームを取得するとき
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
## 1. タンパク質検索・エントリ取得
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import requests
|
|
35
|
+
import pandas as pd
|
|
36
|
+
|
|
37
|
+
UNIPROT_BASE = "https://rest.uniprot.org"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def uniprot_search(query, organism=None, reviewed=True,
|
|
41
|
+
limit=50, fields=None):
|
|
42
|
+
"""
|
|
43
|
+
UniProt — タンパク質検索。
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
query: str — 検索クエリ (例: "BRCA1", "kinase")
|
|
47
|
+
organism: str — 生物種 (例: "9606" for Human)
|
|
48
|
+
reviewed: bool — Swiss-Prot のみ (True) / TrEMBL 含む (False)
|
|
49
|
+
limit: int — 最大結果数
|
|
50
|
+
fields: list[str] — 返却フィールド
|
|
51
|
+
"""
|
|
52
|
+
url = f"{UNIPROT_BASE}/uniprotkb/search"
|
|
53
|
+
default_fields = [
|
|
54
|
+
"accession", "id", "protein_name", "gene_names",
|
|
55
|
+
"organism_name", "length", "reviewed",
|
|
56
|
+
"go_p", "go_f", "go_c",
|
|
57
|
+
]
|
|
58
|
+
params = {
|
|
59
|
+
"query": query,
|
|
60
|
+
"size": min(limit, 500),
|
|
61
|
+
"fields": ",".join(fields or default_fields),
|
|
62
|
+
"format": "json",
|
|
63
|
+
}
|
|
64
|
+
if organism:
|
|
65
|
+
params["query"] += f" AND organism_id:{organism}"
|
|
66
|
+
if reviewed:
|
|
67
|
+
params["query"] += " AND reviewed:true"
|
|
68
|
+
|
|
69
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
70
|
+
resp.raise_for_status()
|
|
71
|
+
data = resp.json()
|
|
72
|
+
|
|
73
|
+
results = []
|
|
74
|
+
for entry in data.get("results", []):
|
|
75
|
+
protein_name = ""
|
|
76
|
+
if entry.get("proteinDescription"):
|
|
77
|
+
rec = entry["proteinDescription"].get(
|
|
78
|
+
"recommendedName")
|
|
79
|
+
if rec:
|
|
80
|
+
protein_name = rec.get("fullName", {}).get(
|
|
81
|
+
"value", "")
|
|
82
|
+
|
|
83
|
+
genes = [g.get("geneName", {}).get("value", "")
|
|
84
|
+
for g in entry.get("genes", [])]
|
|
85
|
+
|
|
86
|
+
results.append({
|
|
87
|
+
"accession": entry.get("primaryAccession", ""),
|
|
88
|
+
"entry_name": entry.get("uniProtkbId", ""),
|
|
89
|
+
"protein_name": protein_name,
|
|
90
|
+
"gene_names": "; ".join(genes),
|
|
91
|
+
"organism": entry.get("organism", {}).get(
|
|
92
|
+
"scientificName", ""),
|
|
93
|
+
"length": entry.get("sequence", {}).get(
|
|
94
|
+
"length", 0),
|
|
95
|
+
"reviewed": entry.get("entryType", "") == "UniProtKB reviewed (Swiss-Prot)",
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
df = pd.DataFrame(results)
|
|
99
|
+
print(f"UniProt search: {len(df)} entries "
|
|
100
|
+
f"(query='{query}')")
|
|
101
|
+
return df
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def uniprot_get_entry(accession, format="json"):
|
|
105
|
+
"""
|
|
106
|
+
UniProt — エントリ詳細取得。
|
|
107
|
+
|
|
108
|
+
Parameters:
|
|
109
|
+
accession: str — UniProt ID (例: "P38398")
|
|
110
|
+
format: str — "json" or "fasta"
|
|
111
|
+
"""
|
|
112
|
+
url = f"{UNIPROT_BASE}/uniprotkb/{accession}"
|
|
113
|
+
resp = requests.get(url, params={"format": format},
|
|
114
|
+
timeout=30)
|
|
115
|
+
resp.raise_for_status()
|
|
116
|
+
if format == "fasta":
|
|
117
|
+
return resp.text
|
|
118
|
+
return resp.json()
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## 2. ID マッピング
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
import time
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def uniprot_id_mapping(from_db, to_db, ids):
|
|
128
|
+
"""
|
|
129
|
+
UniProt — ID マッピング (非同期ジョブ)。
|
|
130
|
+
|
|
131
|
+
Parameters:
|
|
132
|
+
from_db: str — 変換元 DB (例: "UniProtKB_AC-ID")
|
|
133
|
+
to_db: str — 変換先 DB (例: "Gene_Name", "PDB",
|
|
134
|
+
"Ensembl", "RefSeq_Protein")
|
|
135
|
+
ids: list[str] — ID リスト
|
|
136
|
+
"""
|
|
137
|
+
# ジョブ投入
|
|
138
|
+
url = f"{UNIPROT_BASE}/idmapping/run"
|
|
139
|
+
resp = requests.post(url, data={
|
|
140
|
+
"from": from_db, "to": to_db,
|
|
141
|
+
"ids": ",".join(ids),
|
|
142
|
+
}, timeout=30)
|
|
143
|
+
resp.raise_for_status()
|
|
144
|
+
job_id = resp.json()["jobId"]
|
|
145
|
+
|
|
146
|
+
# ポーリング
|
|
147
|
+
status_url = f"{UNIPROT_BASE}/idmapping/status/{job_id}"
|
|
148
|
+
for _ in range(30):
|
|
149
|
+
s = requests.get(status_url, timeout=30).json()
|
|
150
|
+
if "results" in s or "redirectURL" in s:
|
|
151
|
+
break
|
|
152
|
+
time.sleep(2)
|
|
153
|
+
|
|
154
|
+
# 結果取得
|
|
155
|
+
result_url = (f"{UNIPROT_BASE}/idmapping/results/"
|
|
156
|
+
f"{job_id}")
|
|
157
|
+
r = requests.get(result_url, timeout=30)
|
|
158
|
+
r.raise_for_status()
|
|
159
|
+
data = r.json()
|
|
160
|
+
|
|
161
|
+
results = []
|
|
162
|
+
for item in data.get("results", []):
|
|
163
|
+
results.append({
|
|
164
|
+
"from_id": item.get("from", ""),
|
|
165
|
+
"to_id": item.get("to", ""),
|
|
166
|
+
})
|
|
167
|
+
|
|
168
|
+
df = pd.DataFrame(results)
|
|
169
|
+
print(f"UniProt ID mapping: {len(df)} mappings "
|
|
170
|
+
f"({from_db} → {to_db})")
|
|
171
|
+
return df
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## 3. 機能アノテーション取得
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
def uniprot_get_features(accession):
|
|
178
|
+
"""
|
|
179
|
+
UniProt — タンパク質機能フィーチャー取得。
|
|
180
|
+
|
|
181
|
+
Parameters:
|
|
182
|
+
accession: str — UniProt ID
|
|
183
|
+
"""
|
|
184
|
+
entry = uniprot_get_entry(accession)
|
|
185
|
+
|
|
186
|
+
features = []
|
|
187
|
+
for f in entry.get("features", []):
|
|
188
|
+
loc = f.get("location", {})
|
|
189
|
+
start = loc.get("start", {}).get("value")
|
|
190
|
+
end = loc.get("end", {}).get("value")
|
|
191
|
+
features.append({
|
|
192
|
+
"type": f.get("type", ""),
|
|
193
|
+
"description": f.get("description", ""),
|
|
194
|
+
"start": start,
|
|
195
|
+
"end": end,
|
|
196
|
+
"evidence": len(f.get("evidences", [])),
|
|
197
|
+
})
|
|
198
|
+
|
|
199
|
+
df = pd.DataFrame(features)
|
|
200
|
+
print(f"UniProt features: {accession} → "
|
|
201
|
+
f"{len(df)} features")
|
|
202
|
+
return df
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## 4. UniProt 統合パイプライン
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
def uniprot_pipeline(gene_names, organism="9606",
|
|
209
|
+
output_dir="results"):
|
|
210
|
+
"""
|
|
211
|
+
UniProt 統合パイプライン。
|
|
212
|
+
|
|
213
|
+
Parameters:
|
|
214
|
+
gene_names: list[str] — 遺伝子シンボルリスト
|
|
215
|
+
organism: str — 生物種 Taxonomy ID
|
|
216
|
+
output_dir: str — 出力ディレクトリ
|
|
217
|
+
"""
|
|
218
|
+
from pathlib import Path
|
|
219
|
+
output_dir = Path(output_dir)
|
|
220
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
221
|
+
|
|
222
|
+
all_entries = []
|
|
223
|
+
for gene in gene_names:
|
|
224
|
+
try:
|
|
225
|
+
df = uniprot_search(gene, organism=organism)
|
|
226
|
+
all_entries.append(df)
|
|
227
|
+
except Exception as e:
|
|
228
|
+
print(f" Warning: {gene} — {e}")
|
|
229
|
+
|
|
230
|
+
if all_entries:
|
|
231
|
+
combined = pd.concat(all_entries, ignore_index=True)
|
|
232
|
+
combined.to_csv(output_dir / "uniprot_entries.csv",
|
|
233
|
+
index=False)
|
|
234
|
+
|
|
235
|
+
# トップエントリの機能フィーチャー
|
|
236
|
+
if not combined.empty:
|
|
237
|
+
top_acc = combined.iloc[0]["accession"]
|
|
238
|
+
features = uniprot_get_features(top_acc)
|
|
239
|
+
features.to_csv(
|
|
240
|
+
output_dir / "protein_features.csv",
|
|
241
|
+
index=False)
|
|
242
|
+
|
|
243
|
+
print(f"UniProt pipeline: {output_dir}")
|
|
244
|
+
return {"entries": combined if all_entries else pd.DataFrame()}
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
## ToolUniverse 連携
|
|
250
|
+
|
|
251
|
+
| TU Key | ツール名 | 連携内容 |
|
|
252
|
+
|--------|---------|---------|
|
|
253
|
+
| `uniprot` | UniProt | タンパク質検索・ID マッピング・配列・機能アノテーション |
|
|
254
|
+
|
|
255
|
+
## パイプライン統合
|
|
256
|
+
|
|
257
|
+
```
|
|
258
|
+
protein-structure-analysis → uniprot-proteome → protein-design
|
|
259
|
+
(PDB/AlphaFold 構造) (UniProt REST API) (de novo 設計)
|
|
260
|
+
│ │ ↓
|
|
261
|
+
alphafold-structures ──────────┘ drug-target-profiling
|
|
262
|
+
(AlphaFold DB) │ (標的プロファイリング)
|
|
263
|
+
↓
|
|
264
|
+
protein-domain-family
|
|
265
|
+
(InterPro/Pfam)
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## パイプライン出力
|
|
269
|
+
|
|
270
|
+
| ファイル | 説明 | 次スキル |
|
|
271
|
+
|---------|------|---------|
|
|
272
|
+
| `results/uniprot_entries.csv` | タンパク質検索結果 | → protein-structure-analysis |
|
|
273
|
+
| `results/protein_features.csv` | 機能フィーチャー | → protein-domain-family |
|
|
@@ -5,6 +5,10 @@ description: |
|
|
|
5
5
|
基づく病原性評価、薬理ゲノミクス(PharmGKB/ClinPGx)、バリアント-表現型相関の
|
|
6
6
|
エビデンスグレーディング。ToolUniverse の Variant Interpretation パラダイムを統合。
|
|
7
7
|
「バリアントの病原性を評価して」「pharmacogenomics 解析して」で発火。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: clinvar
|
|
10
|
+
name: ClinVar
|
|
11
|
+
description: 臨床的バリアント解釈データベース
|
|
8
12
|
---
|
|
9
13
|
|
|
10
14
|
# Scientific Variant Interpretation
|