@nahisaho/satori 0.11.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -47
- package/package.json +1 -1
- package/src/.github/skills/scientific-cancer-genomics/SKILL.md +287 -0
- package/src/.github/skills/scientific-clinical-reporting/SKILL.md +324 -0
- package/src/.github/skills/scientific-literature-search/SKILL.md +443 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +288 -0
- package/src/.github/skills/scientific-molecular-docking/SKILL.md +303 -0
- package/src/.github/skills/scientific-pathway-enrichment/SKILL.md +449 -0
- package/src/.github/skills/scientific-protein-domain-family/SKILL.md +369 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +352 -0
- package/src/.github/skills/scientific-systematic-review/SKILL.md +361 -0
- package/src/.github/skills/scientific-variant-effect-prediction/SKILL.md +325 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-protein-interaction-network
|
|
3
|
+
description: |
|
|
4
|
+
タンパク質-タンパク質相互作用 (PPI) ネットワーク解析スキル。STRING、IntAct、
|
|
5
|
+
BioGRID、STITCH (化学-タンパク質) 相互作用データベースを統合した
|
|
6
|
+
ネットワーク構築・解析パイプライン。GO/KEGG 富化、相互作用パートナー発見、
|
|
7
|
+
組織特異的ネットワーク (HumanBase)、化合物-標的ネットワーク対応。
|
|
8
|
+
14 の ToolUniverse SMCP ツールと連携。
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Scientific Protein Interaction Network
|
|
12
|
+
|
|
13
|
+
STRING / IntAct / BioGRID / STITCH の 4 大 PPI データベースを統合した
|
|
14
|
+
タンパク質相互作用ネットワーク解析パイプラインを提供する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- DEG や変異遺伝子の PPI ネットワークを構築するとき
|
|
19
|
+
- ハブタンパク質やボトルネックの特定が必要なとき
|
|
20
|
+
- 化合物と標的タンパク質の相互作用を調べるとき
|
|
21
|
+
- 組織特異的な相互作用ネットワークを評価するとき
|
|
22
|
+
- PPI データに基づく GO/KEGG 富化やモジュール解析を行うとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. STRING PPI ネットワーク取得
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import requests
|
|
32
|
+
import pandas as pd
|
|
33
|
+
import networkx as nx
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def string_get_interactions(proteins, species=9606,
|
|
37
|
+
score_threshold=400,
|
|
38
|
+
network_type="functional"):
|
|
39
|
+
"""
|
|
40
|
+
STRING API v12 による PPI ネットワーク取得。
|
|
41
|
+
|
|
42
|
+
Parameters:
|
|
43
|
+
proteins: list — タンパク質名/UniProt ID リスト
|
|
44
|
+
species: int — NCBI Taxonomy ID (9606=Homo sapiens)
|
|
45
|
+
score_threshold: int — 信頼スコア閾値 (0-1000)
|
|
46
|
+
network_type: "functional" or "physical"
|
|
47
|
+
"""
|
|
48
|
+
base = "https://string-db.org/api/json"
|
|
49
|
+
|
|
50
|
+
# タンパク質 ID 解決
|
|
51
|
+
resolve_url = f"{base}/get_string_ids"
|
|
52
|
+
resolved = []
|
|
53
|
+
for batch in [proteins[i:i+10] for i in range(0, len(proteins), 10)]:
|
|
54
|
+
params = {
|
|
55
|
+
"identifiers": "\r".join(batch),
|
|
56
|
+
"species": species,
|
|
57
|
+
"limit": 1,
|
|
58
|
+
}
|
|
59
|
+
resp = requests.get(resolve_url, params=params)
|
|
60
|
+
for r in resp.json():
|
|
61
|
+
resolved.append(r["stringId"])
|
|
62
|
+
|
|
63
|
+
if not resolved:
|
|
64
|
+
print("No proteins resolved")
|
|
65
|
+
return pd.DataFrame(), nx.Graph()
|
|
66
|
+
|
|
67
|
+
# 相互作用取得
|
|
68
|
+
interaction_url = f"{base}/network"
|
|
69
|
+
params = {
|
|
70
|
+
"identifiers": "\r".join(resolved),
|
|
71
|
+
"species": species,
|
|
72
|
+
"required_score": score_threshold,
|
|
73
|
+
"network_type": network_type,
|
|
74
|
+
}
|
|
75
|
+
resp = requests.get(interaction_url, params=params)
|
|
76
|
+
interactions = resp.json()
|
|
77
|
+
|
|
78
|
+
edges = []
|
|
79
|
+
for i in interactions:
|
|
80
|
+
edges.append({
|
|
81
|
+
"protein_a": i["preferredName_A"],
|
|
82
|
+
"protein_b": i["preferredName_B"],
|
|
83
|
+
"score": i["score"],
|
|
84
|
+
"nscore": i.get("nscore", 0),
|
|
85
|
+
"fscore": i.get("fscore", 0),
|
|
86
|
+
"pscore": i.get("pscore", 0),
|
|
87
|
+
"ascore": i.get("ascore", 0),
|
|
88
|
+
"escore": i.get("escore", 0),
|
|
89
|
+
"dscore": i.get("dscore", 0),
|
|
90
|
+
"tscore": i.get("tscore", 0),
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
df = pd.DataFrame(edges)
|
|
94
|
+
|
|
95
|
+
# NetworkX グラフ構築
|
|
96
|
+
G = nx.Graph()
|
|
97
|
+
for _, row in df.iterrows():
|
|
98
|
+
G.add_edge(row["protein_a"], row["protein_b"],
|
|
99
|
+
weight=row["score"] / 1000.0)
|
|
100
|
+
|
|
101
|
+
print(f"STRING network: {G.number_of_nodes()} nodes, "
|
|
102
|
+
f"{G.number_of_edges()} edges (score ≥ {score_threshold})")
|
|
103
|
+
return df, G
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## 2. IntAct 分子相互作用検索
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
def intact_search_interactions(query, species="human",
|
|
110
|
+
interaction_type=None,
|
|
111
|
+
max_results=200):
|
|
112
|
+
"""
|
|
113
|
+
IntAct REST API による分子相互作用検索。
|
|
114
|
+
|
|
115
|
+
Parameters:
|
|
116
|
+
query: str — タンパク質名/UniProt ID
|
|
117
|
+
species: str or int — "human" or taxonomy ID
|
|
118
|
+
interaction_type: str — MI term (e.g., "MI:0407" physical association)
|
|
119
|
+
"""
|
|
120
|
+
url = "https://www.ebi.ac.uk/intact/ws/interaction/findInteractions"
|
|
121
|
+
params = {
|
|
122
|
+
"query": query,
|
|
123
|
+
"maxResults": max_results,
|
|
124
|
+
}
|
|
125
|
+
if species:
|
|
126
|
+
params["species"] = species
|
|
127
|
+
|
|
128
|
+
resp = requests.get(url, params=params)
|
|
129
|
+
if resp.status_code != 200:
|
|
130
|
+
print(f"IntAct error: {resp.status_code}")
|
|
131
|
+
return pd.DataFrame()
|
|
132
|
+
|
|
133
|
+
data = resp.json()
|
|
134
|
+
interactions = data.get("content", [])
|
|
135
|
+
|
|
136
|
+
results = []
|
|
137
|
+
for ix in interactions:
|
|
138
|
+
interactor_a = ix.get("interactorA", {})
|
|
139
|
+
interactor_b = ix.get("interactorB", {})
|
|
140
|
+
results.append({
|
|
141
|
+
"interactor_a": interactor_a.get("preferredIdentifier", ""),
|
|
142
|
+
"interactor_a_name": interactor_a.get("shortLabel", ""),
|
|
143
|
+
"interactor_b": interactor_b.get("preferredIdentifier", ""),
|
|
144
|
+
"interactor_b_name": interactor_b.get("shortLabel", ""),
|
|
145
|
+
"interaction_type": ix.get("interactionType", ""),
|
|
146
|
+
"detection_method": ix.get("detectionMethod", ""),
|
|
147
|
+
"confidence": ix.get("confidenceValue", 0),
|
|
148
|
+
"publication": ix.get("pubmedId", ""),
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
df = pd.DataFrame(results)
|
|
152
|
+
print(f"IntAct: {len(df)} interactions for '{query}'")
|
|
153
|
+
return df
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## 3. STITCH 化合物-タンパク質相互作用
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
def stitch_chemical_protein(chemicals, species=9606,
|
|
160
|
+
score_threshold=400):
|
|
161
|
+
"""
|
|
162
|
+
STITCH API による化合物-タンパク質相互作用検索。
|
|
163
|
+
|
|
164
|
+
Parameters:
|
|
165
|
+
chemicals: list — 化合物名/CID リスト
|
|
166
|
+
species: int — NCBI Taxonomy ID
|
|
167
|
+
score_threshold: int — 信頼スコア閾値
|
|
168
|
+
"""
|
|
169
|
+
url = "http://stitch.embl.de/api/json/interactionsList"
|
|
170
|
+
params = {
|
|
171
|
+
"identifiers": "\r".join(chemicals),
|
|
172
|
+
"species": species,
|
|
173
|
+
"required_score": score_threshold,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
resp = requests.get(url, params=params)
|
|
177
|
+
interactions = resp.json()
|
|
178
|
+
|
|
179
|
+
results = []
|
|
180
|
+
for i in interactions:
|
|
181
|
+
results.append({
|
|
182
|
+
"chemical": i.get("preferredName_A", ""),
|
|
183
|
+
"protein": i.get("preferredName_B", ""),
|
|
184
|
+
"score": i.get("score", 0),
|
|
185
|
+
"type_a": "chemical" if i.get("ncbiTaxonId_A") == -1 else "protein",
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
df = pd.DataFrame(results)
|
|
189
|
+
print(f"STITCH: {len(df)} chemical-protein interactions")
|
|
190
|
+
return df
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## 4. PPI ネットワーク解析 (中心性・モジュール)
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
def analyze_ppi_network(G, community_method="louvain"):
|
|
197
|
+
"""
|
|
198
|
+
PPI ネットワークのトポロジー解析。
|
|
199
|
+
|
|
200
|
+
Parameters:
|
|
201
|
+
G: nx.Graph — PPI ネットワーク
|
|
202
|
+
community_method: "louvain" or "label_propagation"
|
|
203
|
+
"""
|
|
204
|
+
if G.number_of_nodes() == 0:
|
|
205
|
+
return {}
|
|
206
|
+
|
|
207
|
+
# 中心性指標
|
|
208
|
+
degree_cent = nx.degree_centrality(G)
|
|
209
|
+
betweenness = nx.betweenness_centrality(G)
|
|
210
|
+
closeness = nx.closeness_centrality(G)
|
|
211
|
+
|
|
212
|
+
# ハブタンパク質 (degree top 10)
|
|
213
|
+
hubs = sorted(degree_cent.items(), key=lambda x: -x[1])[:10]
|
|
214
|
+
|
|
215
|
+
# ボトルネック (betweenness top 10)
|
|
216
|
+
bottlenecks = sorted(betweenness.items(), key=lambda x: -x[1])[:10]
|
|
217
|
+
|
|
218
|
+
# コミュニティ検出
|
|
219
|
+
if community_method == "louvain":
|
|
220
|
+
from community import community_louvain
|
|
221
|
+
partition = community_louvain.best_partition(G)
|
|
222
|
+
else:
|
|
223
|
+
communities = nx.community.label_propagation_communities(G)
|
|
224
|
+
partition = {}
|
|
225
|
+
for i, comm in enumerate(communities):
|
|
226
|
+
for node in comm:
|
|
227
|
+
partition[node] = i
|
|
228
|
+
|
|
229
|
+
n_communities = len(set(partition.values()))
|
|
230
|
+
|
|
231
|
+
stats = {
|
|
232
|
+
"nodes": G.number_of_nodes(),
|
|
233
|
+
"edges": G.number_of_edges(),
|
|
234
|
+
"density": round(nx.density(G), 4),
|
|
235
|
+
"avg_clustering": round(nx.average_clustering(G), 4),
|
|
236
|
+
"connected_components": nx.number_connected_components(G),
|
|
237
|
+
"communities": n_communities,
|
|
238
|
+
"hub_proteins": [h[0] for h in hubs],
|
|
239
|
+
"bottleneck_proteins": [b[0] for b in bottlenecks],
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
centrality_df = pd.DataFrame({
|
|
243
|
+
"protein": list(degree_cent.keys()),
|
|
244
|
+
"degree_centrality": list(degree_cent.values()),
|
|
245
|
+
"betweenness": [betweenness[n] for n in degree_cent.keys()],
|
|
246
|
+
"closeness": [closeness[n] for n in degree_cent.keys()],
|
|
247
|
+
"community": [partition.get(n, -1) for n in degree_cent.keys()],
|
|
248
|
+
}).sort_values("degree_centrality", ascending=False)
|
|
249
|
+
|
|
250
|
+
print(f"PPI analysis: {stats['nodes']} nodes, {stats['edges']} edges, "
|
|
251
|
+
f"{n_communities} communities")
|
|
252
|
+
return stats, centrality_df, partition
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
## 5. PPI ネットワーク可視化
|
|
256
|
+
|
|
257
|
+
```python
|
|
258
|
+
def visualize_ppi_network(G, partition=None, hub_proteins=None,
|
|
259
|
+
output="figures/ppi_network.png",
|
|
260
|
+
layout="spring"):
|
|
261
|
+
"""
|
|
262
|
+
PPI ネットワークの可視化。
|
|
263
|
+
"""
|
|
264
|
+
import matplotlib.pyplot as plt
|
|
265
|
+
import os
|
|
266
|
+
os.makedirs(os.path.dirname(output), exist_ok=True)
|
|
267
|
+
|
|
268
|
+
fig, ax = plt.subplots(figsize=(14, 14))
|
|
269
|
+
|
|
270
|
+
if layout == "spring":
|
|
271
|
+
pos = nx.spring_layout(G, k=1.5, seed=42)
|
|
272
|
+
elif layout == "kamada_kawai":
|
|
273
|
+
pos = nx.kamada_kawai_layout(G)
|
|
274
|
+
|
|
275
|
+
# ノードサイズ = 次数
|
|
276
|
+
node_sizes = [300 + 100 * G.degree(n) for n in G.nodes()]
|
|
277
|
+
|
|
278
|
+
# コミュニティカラー
|
|
279
|
+
if partition:
|
|
280
|
+
import matplotlib.cm as cm
|
|
281
|
+
n_comm = len(set(partition.values()))
|
|
282
|
+
colors = [cm.Set3(partition.get(n, 0) / max(n_comm, 1)) for n in G.nodes()]
|
|
283
|
+
else:
|
|
284
|
+
colors = "steelblue"
|
|
285
|
+
|
|
286
|
+
nx.draw_networkx_edges(G, pos, alpha=0.2, ax=ax)
|
|
287
|
+
nx.draw_networkx_nodes(G, pos, node_size=node_sizes,
|
|
288
|
+
node_color=colors, alpha=0.8, ax=ax)
|
|
289
|
+
|
|
290
|
+
# ハブタンパク質のラベル
|
|
291
|
+
if hub_proteins:
|
|
292
|
+
labels = {n: n for n in G.nodes() if n in hub_proteins}
|
|
293
|
+
else:
|
|
294
|
+
labels = {n: n for n in G.nodes() if G.degree(n) >= 5}
|
|
295
|
+
nx.draw_networkx_labels(G, pos, labels, font_size=8, ax=ax)
|
|
296
|
+
|
|
297
|
+
ax.set_title(f"PPI Network ({G.number_of_nodes()} proteins, "
|
|
298
|
+
f"{G.number_of_edges()} interactions)")
|
|
299
|
+
ax.axis("off")
|
|
300
|
+
plt.tight_layout()
|
|
301
|
+
plt.savefig(output, dpi=300, bbox_inches="tight")
|
|
302
|
+
plt.close()
|
|
303
|
+
print(f"Saved: {output}")
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
## References
|
|
307
|
+
|
|
308
|
+
### Output Files
|
|
309
|
+
|
|
310
|
+
| ファイル | 形式 |
|
|
311
|
+
|---|---|
|
|
312
|
+
| `results/string_interactions.csv` | CSV |
|
|
313
|
+
| `results/intact_interactions.csv` | CSV |
|
|
314
|
+
| `results/stitch_interactions.csv` | CSV |
|
|
315
|
+
| `results/ppi_centrality.csv` | CSV |
|
|
316
|
+
| `results/ppi_network.graphml` | GraphML |
|
|
317
|
+
| `figures/ppi_network.png` | PNG |
|
|
318
|
+
|
|
319
|
+
### 利用可能ツール
|
|
320
|
+
|
|
321
|
+
> [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
|
|
322
|
+
|
|
323
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
324
|
+
|---|---|---|
|
|
325
|
+
| IntAct | `intact_search_interactions` | 分子相互作用検索 |
|
|
326
|
+
| IntAct | `intact_get_interactions` | 相互作用データ取得 |
|
|
327
|
+
| IntAct | `intact_get_interactor` | 相互作用因子詳細 |
|
|
328
|
+
| IntAct | `intact_get_interaction_details` | 相互作用詳細 |
|
|
329
|
+
| IntAct | `intact_get_interaction_network` | ネットワーク取得 |
|
|
330
|
+
| IntAct | `intact_get_interactions_by_organism` | 生物種別相互作用 |
|
|
331
|
+
| IntAct | `intact_get_interactions_by_complex` | 複合体別相互作用 |
|
|
332
|
+
| IntAct | `intact_get_complex_details` | 複合体詳細 |
|
|
333
|
+
| STRING/BioGRID | `STRING_get_protein_interactions` | STRING PPI 取得 |
|
|
334
|
+
| STRING/BioGRID | `BioGRID_get_interactions` | BioGRID 相互作用取得 |
|
|
335
|
+
| STITCH | `STITCH_get_chemical_protein_interactions` | 化合物-タンパク質相互作用 |
|
|
336
|
+
| STITCH | `STITCH_get_interaction_partners` | 相互作用パートナー |
|
|
337
|
+
| STITCH | `STITCH_resolve_identifier` | 化合物 ID 解決 |
|
|
338
|
+
| HumanBase | `humanbase_ppi_analysis` | 組織特異的 PPI 解析 |
|
|
339
|
+
|
|
340
|
+
### 参照スキル
|
|
341
|
+
|
|
342
|
+
| スキル | 関連 |
|
|
343
|
+
|---|---|
|
|
344
|
+
| `scientific-drug-target-profiling` | 標的タンパク質 → PPI 拡張 |
|
|
345
|
+
| `scientific-network-analysis` | 汎用ネットワーク解析手法 |
|
|
346
|
+
| `scientific-pathway-enrichment` | PPI モジュール → パスウェイ富化 |
|
|
347
|
+
| `scientific-protein-structure-analysis` | 構造情報 → 相互作用界面 |
|
|
348
|
+
| `scientific-systems-biology` | GRN ↔ PPI 統合 |
|
|
349
|
+
|
|
350
|
+
### 依存パッケージ
|
|
351
|
+
|
|
352
|
+
`networkx`, `requests`, `pandas`, `matplotlib`, `python-louvain` (community)
|