@nahisaho/satori 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -15
- package/package.json +1 -1
- package/src/.github/skills/scientific-advanced-imaging/SKILL.md +382 -0
- package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +509 -0
- package/src/.github/skills/scientific-deep-chemistry/SKILL.md +350 -0
- package/src/.github/skills/scientific-ensembl-genomics/SKILL.md +378 -0
- package/src/.github/skills/scientific-expression-comparison/SKILL.md +303 -0
- package/src/.github/skills/scientific-md-simulation/SKILL.md +315 -0
- package/src/.github/skills/scientific-model-organism-db/SKILL.md +329 -0
- package/src/.github/skills/scientific-perturbation-analysis/SKILL.md +297 -0
- package/src/.github/skills/scientific-scvi-integration/SKILL.md +344 -0
- package/src/.github/skills/scientific-string-network-api/SKILL.md +376 -0
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-string-network-api
|
|
3
|
+
description: |
|
|
4
|
+
STRING/BioGRID/STITCH ネットワーク解析スキル。STRING タンパク質相互作用
|
|
5
|
+
ネットワーク直接 API、BioGRID 実験的 PPI、STITCH 化学-タンパク質ネットワーク、
|
|
6
|
+
ネットワークトポロジー解析・コミュニティ検出・機能濃縮統合パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific STRING Network API
|
|
10
|
+
|
|
11
|
+
STRING v12 / BioGRID / STITCH API を活用した PPI・化合物-タンパク質
|
|
12
|
+
ネットワーク解析パイプラインを提供する。既存の protein-interaction-network
|
|
13
|
+
スキル (IntAct/HumanBase) を補完し、STRING 直接 API ベースの高度な
|
|
14
|
+
ネットワーク分析を統合。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- STRING API でタンパク質相互作用ネットワークを直接構築するとき
|
|
19
|
+
- BioGRID から実験的エビデンスベースの PPI を取得するとき
|
|
20
|
+
- STITCH で化合物-タンパク質間ネットワークを検索するとき
|
|
21
|
+
- ネットワークトポロジー指標 (次数分布・媒介中心性) を計算するとき
|
|
22
|
+
- PPI ネットワーク上でコミュニティ検出を行うとき
|
|
23
|
+
- 機能濃縮解析 (STRING enrichment) をネットワーク上で実行するとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. STRING PPI ネットワーク取得
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import requests
|
|
33
|
+
import pandas as pd
|
|
34
|
+
import networkx as nx
|
|
35
|
+
|
|
36
|
+
STRING_API = "https://string-db.org/api"
|
|
37
|
+
OUTPUT_FORMAT = "json"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_string_network(proteins, species=9606, score_threshold=400,
|
|
41
|
+
network_type="functional", limit=50):
|
|
42
|
+
"""
|
|
43
|
+
STRING PPI ネットワーク取得。
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
proteins: list — タンパク質名リスト (例: ["TP53", "MDM2", "BRCA1"])
|
|
47
|
+
species: int — NCBI Taxonomy ID (9606=human)
|
|
48
|
+
score_threshold: int — 信頼スコア閾値 (0-1000)
|
|
49
|
+
network_type: str — "functional" or "physical"
|
|
50
|
+
limit: int — interactor 最大数
|
|
51
|
+
|
|
52
|
+
ToolUniverse:
|
|
53
|
+
STRING_get_protein_interactions(
|
|
54
|
+
protein_ids=proteins, species=species,
|
|
55
|
+
confidence_score=score_threshold/1000,
|
|
56
|
+
network_type=network_type, limit=limit
|
|
57
|
+
)
|
|
58
|
+
"""
|
|
59
|
+
url = f"{STRING_API}/{OUTPUT_FORMAT}/network"
|
|
60
|
+
params = {
|
|
61
|
+
"identifiers": "\r".join(proteins),
|
|
62
|
+
"species": species,
|
|
63
|
+
"required_score": score_threshold,
|
|
64
|
+
"network_type": network_type,
|
|
65
|
+
"limit": limit,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
resp = requests.post(url, data=params)
|
|
69
|
+
resp.raise_for_status()
|
|
70
|
+
interactions = resp.json()
|
|
71
|
+
|
|
72
|
+
rows = []
|
|
73
|
+
for i in interactions:
|
|
74
|
+
rows.append({
|
|
75
|
+
"protein_a": i.get("preferredName_A"),
|
|
76
|
+
"protein_b": i.get("preferredName_B"),
|
|
77
|
+
"combined_score": i.get("score"),
|
|
78
|
+
"nscore": i.get("nscore"),
|
|
79
|
+
"fscore": i.get("fscore"),
|
|
80
|
+
"pscore": i.get("pscore"),
|
|
81
|
+
"ascore": i.get("ascore"),
|
|
82
|
+
"escore": i.get("escore"),
|
|
83
|
+
"dscore": i.get("dscore"),
|
|
84
|
+
"tscore": i.get("tscore"),
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
df = pd.DataFrame(rows)
|
|
88
|
+
print(f"STRING network: {len(df)} interactions "
|
|
89
|
+
f"(score ≥ {score_threshold/1000})")
|
|
90
|
+
return df
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## 2. BioGRID 実験的 PPI 取得
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
def get_biogrid_interactions(genes, organism=9606, evidence_type=None,
|
|
97
|
+
api_key="YOUR_KEY", limit=500):
|
|
98
|
+
"""
|
|
99
|
+
BioGRID 実験的 PPI データ取得。
|
|
100
|
+
|
|
101
|
+
Parameters:
|
|
102
|
+
genes: list — 遺伝子名リスト
|
|
103
|
+
organism: int — NCBI Taxonomy ID
|
|
104
|
+
evidence_type: str — "physical" or "genetic"
|
|
105
|
+
api_key: str — BioGRID API key (https://webservice.thebiogrid.org)
|
|
106
|
+
limit: int — 最大取得数
|
|
107
|
+
|
|
108
|
+
ToolUniverse:
|
|
109
|
+
BioGRID_get_interactions(
|
|
110
|
+
gene_names=genes, organism=organism,
|
|
111
|
+
interaction_type=evidence_type, limit=limit
|
|
112
|
+
)
|
|
113
|
+
"""
|
|
114
|
+
url = "https://webservice.thebiogrid.org/interactions"
|
|
115
|
+
params = {
|
|
116
|
+
"accessKey": api_key,
|
|
117
|
+
"geneList": "|".join(genes),
|
|
118
|
+
"organism": organism,
|
|
119
|
+
"format": "json",
|
|
120
|
+
"max": limit,
|
|
121
|
+
"searchNames": "true",
|
|
122
|
+
"includeInteractors": "true",
|
|
123
|
+
}
|
|
124
|
+
if evidence_type:
|
|
125
|
+
params["interSpeciesExcluded"] = "true"
|
|
126
|
+
|
|
127
|
+
resp = requests.get(url, params=params)
|
|
128
|
+
resp.raise_for_status()
|
|
129
|
+
data = resp.json()
|
|
130
|
+
|
|
131
|
+
rows = []
|
|
132
|
+
for _, interaction in data.items():
|
|
133
|
+
rows.append({
|
|
134
|
+
"gene_a": interaction.get("OFFICIAL_SYMBOL_A"),
|
|
135
|
+
"gene_b": interaction.get("OFFICIAL_SYMBOL_B"),
|
|
136
|
+
"experimental_system": interaction.get("EXPERIMENTAL_SYSTEM"),
|
|
137
|
+
"throughput": interaction.get("THROUGHPUT"),
|
|
138
|
+
"pubmed_id": interaction.get("PUBMED_ID"),
|
|
139
|
+
"source_db": "BioGRID",
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
df = pd.DataFrame(rows)
|
|
143
|
+
print(f"BioGRID: {len(df)} interactions for {genes}")
|
|
144
|
+
return df
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## 3. STITCH 化合物-タンパク質ネットワーク
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
def get_stitch_interactions(identifiers, species=9606, score=400, limit=20):
|
|
151
|
+
"""
|
|
152
|
+
STITCH 化合物-タンパク質相互作用取得。
|
|
153
|
+
|
|
154
|
+
Parameters:
|
|
155
|
+
identifiers: list — CID (化合物) または遺伝子名リスト
|
|
156
|
+
species: int — NCBI Taxonomy ID
|
|
157
|
+
score: int — 信頼スコア閾値
|
|
158
|
+
limit: int — 最大結果数
|
|
159
|
+
|
|
160
|
+
ToolUniverse:
|
|
161
|
+
STITCH_get_chemical_protein_interactions(
|
|
162
|
+
identifiers=identifiers, species=species,
|
|
163
|
+
required_score=score, limit=limit
|
|
164
|
+
)
|
|
165
|
+
STITCH_get_interaction_partners(identifiers=identifiers)
|
|
166
|
+
STITCH_resolve_identifier(identifiers=identifiers)
|
|
167
|
+
"""
|
|
168
|
+
url = f"https://stitch.embl.de/api/{OUTPUT_FORMAT}/interactionsList"
|
|
169
|
+
params = {
|
|
170
|
+
"identifiers": "\r".join(identifiers),
|
|
171
|
+
"species": species,
|
|
172
|
+
"required_score": score,
|
|
173
|
+
"limit": limit,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
resp = requests.post(url, data=params)
|
|
177
|
+
resp.raise_for_status()
|
|
178
|
+
interactions = resp.json()
|
|
179
|
+
|
|
180
|
+
rows = []
|
|
181
|
+
for i in interactions:
|
|
182
|
+
rows.append({
|
|
183
|
+
"interactor_a": i.get("preferredName_A", i.get("stringId_A")),
|
|
184
|
+
"interactor_b": i.get("preferredName_B", i.get("stringId_B")),
|
|
185
|
+
"combined_score": i.get("score"),
|
|
186
|
+
"is_chemical": "CID" in str(i.get("stringId_A", ""))
|
|
187
|
+
or "CID" in str(i.get("stringId_B", "")),
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
df = pd.DataFrame(rows)
|
|
191
|
+
print(f"STITCH: {len(df)} chemical-protein interactions")
|
|
192
|
+
return df
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
## 4. ネットワーク構築 & トポロジー解析
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
def build_network(interaction_df, source_col="protein_a", target_col="protein_b",
|
|
199
|
+
weight_col="combined_score"):
|
|
200
|
+
"""
|
|
201
|
+
NetworkX グラフ構築 & トポロジー解析。
|
|
202
|
+
|
|
203
|
+
Parameters:
|
|
204
|
+
interaction_df: DataFrame — 相互作用データ
|
|
205
|
+
source_col, target_col: str — ノードカラム名
|
|
206
|
+
weight_col: str — エッジ重みカラム名
|
|
207
|
+
"""
|
|
208
|
+
G = nx.Graph()
|
|
209
|
+
for _, row in interaction_df.iterrows():
|
|
210
|
+
G.add_edge(
|
|
211
|
+
row[source_col], row[target_col],
|
|
212
|
+
weight=row.get(weight_col, 1.0),
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# トポロジー指標
|
|
216
|
+
degree = dict(G.degree())
|
|
217
|
+
betweenness = nx.betweenness_centrality(G)
|
|
218
|
+
closeness = nx.closeness_centrality(G)
|
|
219
|
+
clustering = nx.clustering(G)
|
|
220
|
+
|
|
221
|
+
metrics = pd.DataFrame({
|
|
222
|
+
"node": list(degree.keys()),
|
|
223
|
+
"degree": list(degree.values()),
|
|
224
|
+
"betweenness": [betweenness[n] for n in degree],
|
|
225
|
+
"closeness": [closeness[n] for n in degree],
|
|
226
|
+
"clustering": [clustering[n] for n in degree],
|
|
227
|
+
}).sort_values("betweenness", ascending=False)
|
|
228
|
+
|
|
229
|
+
print(f"Network: {G.number_of_nodes()} nodes, "
|
|
230
|
+
f"{G.number_of_edges()} edges, "
|
|
231
|
+
f"density={nx.density(G):.4f}")
|
|
232
|
+
return G, metrics
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
## 5. コミュニティ検出
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
from networkx.algorithms.community import greedy_modularity_communities
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def detect_communities(G, resolution=1.0):
|
|
242
|
+
"""
|
|
243
|
+
ネットワーク上のコミュニティ (モジュール) 検出。
|
|
244
|
+
|
|
245
|
+
Parameters:
|
|
246
|
+
G: nx.Graph — ネットワークグラフ
|
|
247
|
+
resolution: float — 解像度パラメータ
|
|
248
|
+
"""
|
|
249
|
+
communities = list(greedy_modularity_communities(G, resolution=resolution))
|
|
250
|
+
modularity = nx.algorithms.community.modularity(G, communities)
|
|
251
|
+
|
|
252
|
+
comm_data = []
|
|
253
|
+
for i, comm in enumerate(communities):
|
|
254
|
+
for node in comm:
|
|
255
|
+
comm_data.append({"node": node, "community": i})
|
|
256
|
+
|
|
257
|
+
df = pd.DataFrame(comm_data)
|
|
258
|
+
print(f"Communities: {len(communities)} detected, "
|
|
259
|
+
f"modularity={modularity:.4f}")
|
|
260
|
+
return df, modularity
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## 6. STRING 機能濃縮解析
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
def string_enrichment(proteins, species=9606):
|
|
267
|
+
"""
|
|
268
|
+
STRING API 機能濃縮解析 (GO/KEGG/Reactome/InterPro)。
|
|
269
|
+
|
|
270
|
+
Parameters:
|
|
271
|
+
proteins: list — タンパク質名リスト
|
|
272
|
+
species: int — NCBI Taxonomy ID
|
|
273
|
+
"""
|
|
274
|
+
url = f"{STRING_API}/{OUTPUT_FORMAT}/enrichment"
|
|
275
|
+
params = {
|
|
276
|
+
"identifiers": "\r".join(proteins),
|
|
277
|
+
"species": species,
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
resp = requests.post(url, data=params)
|
|
281
|
+
resp.raise_for_status()
|
|
282
|
+
enrichment = resp.json()
|
|
283
|
+
|
|
284
|
+
rows = []
|
|
285
|
+
for e in enrichment:
|
|
286
|
+
rows.append({
|
|
287
|
+
"category": e.get("category"),
|
|
288
|
+
"term": e.get("term"),
|
|
289
|
+
"description": e.get("description"),
|
|
290
|
+
"p_value": e.get("p_value"),
|
|
291
|
+
"fdr": e.get("fdr"),
|
|
292
|
+
"number_of_genes": e.get("number_of_genes"),
|
|
293
|
+
"input_genes": e.get("inputGenes", ""),
|
|
294
|
+
})
|
|
295
|
+
|
|
296
|
+
df = pd.DataFrame(rows)
|
|
297
|
+
if not df.empty:
|
|
298
|
+
df = df.sort_values("fdr")
|
|
299
|
+
print(f"Enrichment: {len(df)} terms, "
|
|
300
|
+
f"{df[df['fdr'] < 0.05].shape[0]} significant (FDR<0.05)")
|
|
301
|
+
return df
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## 7. 統合 PPI 解析パイプライン
|
|
305
|
+
|
|
306
|
+
```python
|
|
307
|
+
def integrated_ppi_pipeline(genes, species=9606, score=700):
|
|
308
|
+
"""
|
|
309
|
+
STRING + BioGRID + STITCH 統合 PPI パイプライン。
|
|
310
|
+
|
|
311
|
+
Pipeline:
|
|
312
|
+
STRING network → BioGRID validation → topology → communities →
|
|
313
|
+
enrichment
|
|
314
|
+
"""
|
|
315
|
+
# STRING ネットワーク
|
|
316
|
+
string_df = get_string_network(genes, species, score)
|
|
317
|
+
|
|
318
|
+
# ネットワーク構築 & トポロジー
|
|
319
|
+
G, metrics = build_network(string_df)
|
|
320
|
+
|
|
321
|
+
# コミュニティ検出
|
|
322
|
+
comm_df, modularity = detect_communities(G)
|
|
323
|
+
|
|
324
|
+
# STRING 濃縮解析
|
|
325
|
+
all_nodes = list(G.nodes())
|
|
326
|
+
enrichment = string_enrichment(all_nodes[:500], species)
|
|
327
|
+
|
|
328
|
+
result = {
|
|
329
|
+
"n_nodes": G.number_of_nodes(),
|
|
330
|
+
"n_edges": G.number_of_edges(),
|
|
331
|
+
"density": round(nx.density(G), 4),
|
|
332
|
+
"n_communities": comm_df["community"].nunique(),
|
|
333
|
+
"modularity": round(modularity, 4),
|
|
334
|
+
"hub_genes": metrics.head(10)["node"].tolist(),
|
|
335
|
+
"n_enriched_terms": len(enrichment[enrichment["fdr"] < 0.05])
|
|
336
|
+
if not enrichment.empty else 0,
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
print(f"\n=== Integrated PPI Pipeline ===")
|
|
340
|
+
print(f"Nodes: {result['n_nodes']}, Edges: {result['n_edges']}")
|
|
341
|
+
print(f"Hub genes: {', '.join(result['hub_genes'][:5])}")
|
|
342
|
+
return result
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
---
|
|
346
|
+
|
|
347
|
+
## パイプライン統合
|
|
348
|
+
|
|
349
|
+
```
|
|
350
|
+
drug-target-profiling → string-network-api → pathway-enrichment
|
|
351
|
+
(候補ターゲット) (STRING PPI 構築) (GO/KEGG 濃縮)
|
|
352
|
+
│ │ ↓
|
|
353
|
+
protein-interaction ───┘ │ ontology-enrichment
|
|
354
|
+
(IntAct/HumanBase) ↓ (EFO/Enrichr)
|
|
355
|
+
network-analysis
|
|
356
|
+
(既存スキル補完)
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
## パイプライン出力
|
|
360
|
+
|
|
361
|
+
| ファイル | 説明 | 次スキル |
|
|
362
|
+
|---------|------|---------|
|
|
363
|
+
| `results/string_network.csv` | STRING PPI ネットワーク | → network-analysis |
|
|
364
|
+
| `results/ppi_topology.csv` | トポロジー指標 | → drug-target-profiling |
|
|
365
|
+
| `results/ppi_communities.csv` | コミュニティ割当 | → pathway-enrichment |
|
|
366
|
+
| `results/string_enrichment.csv` | 機能濃縮結果 | → ontology-enrichment |
|
|
367
|
+
|
|
368
|
+
## 利用可能ツール (ToolUniverse SMCP)
|
|
369
|
+
|
|
370
|
+
| ツール名 | 用途 |
|
|
371
|
+
|---------|------|
|
|
372
|
+
| `STRING_get_protein_interactions` | STRING PPI 取得 |
|
|
373
|
+
| `BioGRID_get_interactions` | BioGRID 実験的 PPI |
|
|
374
|
+
| `STITCH_get_chemical_protein_interactions` | STITCH 化合物-タンパク質 |
|
|
375
|
+
| `STITCH_get_interaction_partners` | STITCH 相互作用パートナー |
|
|
376
|
+
| `STITCH_resolve_identifier` | STITCH ID 解決 |
|