@nahisaho/satori 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,376 @@
1
+ ---
2
+ name: scientific-string-network-api
3
+ description: |
4
+ STRING/BioGRID/STITCH ネットワーク解析スキル。STRING タンパク質相互作用
5
+ ネットワーク直接 API、BioGRID 実験的 PPI、STITCH 化学-タンパク質ネットワーク、
6
+ ネットワークトポロジー解析・コミュニティ検出・機能濃縮統合パイプライン。
7
+ ---
8
+
9
+ # Scientific STRING Network API
10
+
11
+ STRING v12 / BioGRID / STITCH API を活用した PPI・化合物-タンパク質
12
+ ネットワーク解析パイプラインを提供する。既存の protein-interaction-network
13
+ スキル (IntAct/HumanBase) を補完し、STRING 直接 API ベースの高度な
14
+ ネットワーク分析を統合。
15
+
16
+ ## When to Use
17
+
18
+ - STRING API でタンパク質相互作用ネットワークを直接構築するとき
19
+ - BioGRID から実験的エビデンスベースの PPI を取得するとき
20
+ - STITCH で化合物-タンパク質間ネットワークを検索するとき
21
+ - ネットワークトポロジー指標 (次数分布・媒介中心性) を計算するとき
22
+ - PPI ネットワーク上でコミュニティ検出を行うとき
23
+ - 機能濃縮解析 (STRING enrichment) をネットワーク上で実行するとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. STRING PPI ネットワーク取得
30
+
31
+ ```python
32
+ import requests
33
+ import pandas as pd
34
+ import networkx as nx
35
+
36
+ STRING_API = "https://string-db.org/api"
37
+ OUTPUT_FORMAT = "json"
38
+
39
+
40
+ def get_string_network(proteins, species=9606, score_threshold=400,
41
+ network_type="functional", limit=50):
42
+ """
43
+ STRING PPI ネットワーク取得。
44
+
45
+ Parameters:
46
+ proteins: list — タンパク質名リスト (例: ["TP53", "MDM2", "BRCA1"])
47
+ species: int — NCBI Taxonomy ID (9606=human)
48
+ score_threshold: int — 信頼スコア閾値 (0-1000)
49
+ network_type: str — "functional" or "physical"
50
+ limit: int — interactor 最大数
51
+
52
+ ToolUniverse:
53
+ STRING_get_protein_interactions(
54
+ protein_ids=proteins, species=species,
55
+ confidence_score=score_threshold/1000,
56
+ network_type=network_type, limit=limit
57
+ )
58
+ """
59
+ url = f"{STRING_API}/{OUTPUT_FORMAT}/network"
60
+ params = {
61
+ "identifiers": "\r".join(proteins),
62
+ "species": species,
63
+ "required_score": score_threshold,
64
+ "network_type": network_type,
65
+ "limit": limit,
66
+ }
67
+
68
+ resp = requests.post(url, data=params)
69
+ resp.raise_for_status()
70
+ interactions = resp.json()
71
+
72
+ rows = []
73
+ for i in interactions:
74
+ rows.append({
75
+ "protein_a": i.get("preferredName_A"),
76
+ "protein_b": i.get("preferredName_B"),
77
+ "combined_score": i.get("score"),
78
+ "nscore": i.get("nscore"),
79
+ "fscore": i.get("fscore"),
80
+ "pscore": i.get("pscore"),
81
+ "ascore": i.get("ascore"),
82
+ "escore": i.get("escore"),
83
+ "dscore": i.get("dscore"),
84
+ "tscore": i.get("tscore"),
85
+ })
86
+
87
+ df = pd.DataFrame(rows)
88
+ print(f"STRING network: {len(df)} interactions "
89
+ f"(score ≥ {score_threshold/1000})")
90
+ return df
91
+ ```
92
+
93
+ ## 2. BioGRID 実験的 PPI 取得
94
+
95
+ ```python
96
+ def get_biogrid_interactions(genes, organism=9606, evidence_type=None,
97
+ api_key="YOUR_KEY", limit=500):
98
+ """
99
+ BioGRID 実験的 PPI データ取得。
100
+
101
+ Parameters:
102
+ genes: list — 遺伝子名リスト
103
+ organism: int — NCBI Taxonomy ID
104
+ evidence_type: str — "physical" or "genetic"
105
+ api_key: str — BioGRID API key (https://webservice.thebiogrid.org)
106
+ limit: int — 最大取得数
107
+
108
+ ToolUniverse:
109
+ BioGRID_get_interactions(
110
+ gene_names=genes, organism=organism,
111
+ interaction_type=evidence_type, limit=limit
112
+ )
113
+ """
114
+ url = "https://webservice.thebiogrid.org/interactions"
115
+ params = {
116
+ "accessKey": api_key,
117
+ "geneList": "|".join(genes),
118
+ "organism": organism,
119
+ "format": "json",
120
+ "max": limit,
121
+ "searchNames": "true",
122
+ "includeInteractors": "true",
123
+ }
124
+ if evidence_type:
125
+ params["interSpeciesExcluded"] = "true"
126
+
127
+ resp = requests.get(url, params=params)
128
+ resp.raise_for_status()
129
+ data = resp.json()
130
+
131
+ rows = []
132
+ for _, interaction in data.items():
133
+ rows.append({
134
+ "gene_a": interaction.get("OFFICIAL_SYMBOL_A"),
135
+ "gene_b": interaction.get("OFFICIAL_SYMBOL_B"),
136
+ "experimental_system": interaction.get("EXPERIMENTAL_SYSTEM"),
137
+ "throughput": interaction.get("THROUGHPUT"),
138
+ "pubmed_id": interaction.get("PUBMED_ID"),
139
+ "source_db": "BioGRID",
140
+ })
141
+
142
+ df = pd.DataFrame(rows)
143
+ print(f"BioGRID: {len(df)} interactions for {genes}")
144
+ return df
145
+ ```
146
+
147
+ ## 3. STITCH 化合物-タンパク質ネットワーク
148
+
149
+ ```python
150
+ def get_stitch_interactions(identifiers, species=9606, score=400, limit=20):
151
+ """
152
+ STITCH 化合物-タンパク質相互作用取得。
153
+
154
+ Parameters:
155
+ identifiers: list — CID (化合物) または遺伝子名リスト
156
+ species: int — NCBI Taxonomy ID
157
+ score: int — 信頼スコア閾値
158
+ limit: int — 最大結果数
159
+
160
+ ToolUniverse:
161
+ STITCH_get_chemical_protein_interactions(
162
+ identifiers=identifiers, species=species,
163
+ required_score=score, limit=limit
164
+ )
165
+ STITCH_get_interaction_partners(identifiers=identifiers)
166
+ STITCH_resolve_identifier(identifiers=identifiers)
167
+ """
168
+ url = f"https://stitch.embl.de/api/{OUTPUT_FORMAT}/interactionsList"
169
+ params = {
170
+ "identifiers": "\r".join(identifiers),
171
+ "species": species,
172
+ "required_score": score,
173
+ "limit": limit,
174
+ }
175
+
176
+ resp = requests.post(url, data=params)
177
+ resp.raise_for_status()
178
+ interactions = resp.json()
179
+
180
+ rows = []
181
+ for i in interactions:
182
+ rows.append({
183
+ "interactor_a": i.get("preferredName_A", i.get("stringId_A")),
184
+ "interactor_b": i.get("preferredName_B", i.get("stringId_B")),
185
+ "combined_score": i.get("score"),
186
+ "is_chemical": "CID" in str(i.get("stringId_A", ""))
187
+ or "CID" in str(i.get("stringId_B", "")),
188
+ })
189
+
190
+ df = pd.DataFrame(rows)
191
+ print(f"STITCH: {len(df)} chemical-protein interactions")
192
+ return df
193
+ ```
194
+
195
+ ## 4. ネットワーク構築 & トポロジー解析
196
+
197
+ ```python
198
+ def build_network(interaction_df, source_col="protein_a", target_col="protein_b",
199
+ weight_col="combined_score"):
200
+ """
201
+ NetworkX グラフ構築 & トポロジー解析。
202
+
203
+ Parameters:
204
+ interaction_df: DataFrame — 相互作用データ
205
+ source_col, target_col: str — ノードカラム名
206
+ weight_col: str — エッジ重みカラム名
207
+ """
208
+ G = nx.Graph()
209
+ for _, row in interaction_df.iterrows():
210
+ G.add_edge(
211
+ row[source_col], row[target_col],
212
+ weight=row.get(weight_col, 1.0),
213
+ )
214
+
215
+ # トポロジー指標
216
+ degree = dict(G.degree())
217
+ betweenness = nx.betweenness_centrality(G)
218
+ closeness = nx.closeness_centrality(G)
219
+ clustering = nx.clustering(G)
220
+
221
+ metrics = pd.DataFrame({
222
+ "node": list(degree.keys()),
223
+ "degree": list(degree.values()),
224
+ "betweenness": [betweenness[n] for n in degree],
225
+ "closeness": [closeness[n] for n in degree],
226
+ "clustering": [clustering[n] for n in degree],
227
+ }).sort_values("betweenness", ascending=False)
228
+
229
+ print(f"Network: {G.number_of_nodes()} nodes, "
230
+ f"{G.number_of_edges()} edges, "
231
+ f"density={nx.density(G):.4f}")
232
+ return G, metrics
233
+ ```
234
+
235
+ ## 5. コミュニティ検出
236
+
237
+ ```python
238
+ from networkx.algorithms.community import greedy_modularity_communities
239
+
240
+
241
+ def detect_communities(G, resolution=1.0):
242
+ """
243
+ ネットワーク上のコミュニティ (モジュール) 検出。
244
+
245
+ Parameters:
246
+ G: nx.Graph — ネットワークグラフ
247
+ resolution: float — 解像度パラメータ
248
+ """
249
+ communities = list(greedy_modularity_communities(G, resolution=resolution))
250
+ modularity = nx.algorithms.community.modularity(G, communities)
251
+
252
+ comm_data = []
253
+ for i, comm in enumerate(communities):
254
+ for node in comm:
255
+ comm_data.append({"node": node, "community": i})
256
+
257
+ df = pd.DataFrame(comm_data)
258
+ print(f"Communities: {len(communities)} detected, "
259
+ f"modularity={modularity:.4f}")
260
+ return df, modularity
261
+ ```
262
+
263
+ ## 6. STRING 機能濃縮解析
264
+
265
+ ```python
266
+ def string_enrichment(proteins, species=9606):
267
+ """
268
+ STRING API 機能濃縮解析 (GO/KEGG/Reactome/InterPro)。
269
+
270
+ Parameters:
271
+ proteins: list — タンパク質名リスト
272
+ species: int — NCBI Taxonomy ID
273
+ """
274
+ url = f"{STRING_API}/{OUTPUT_FORMAT}/enrichment"
275
+ params = {
276
+ "identifiers": "\r".join(proteins),
277
+ "species": species,
278
+ }
279
+
280
+ resp = requests.post(url, data=params)
281
+ resp.raise_for_status()
282
+ enrichment = resp.json()
283
+
284
+ rows = []
285
+ for e in enrichment:
286
+ rows.append({
287
+ "category": e.get("category"),
288
+ "term": e.get("term"),
289
+ "description": e.get("description"),
290
+ "p_value": e.get("p_value"),
291
+ "fdr": e.get("fdr"),
292
+ "number_of_genes": e.get("number_of_genes"),
293
+ "input_genes": e.get("inputGenes", ""),
294
+ })
295
+
296
+ df = pd.DataFrame(rows)
297
+ if not df.empty:
298
+ df = df.sort_values("fdr")
299
+ print(f"Enrichment: {len(df)} terms, "
300
+ f"{df[df['fdr'] < 0.05].shape[0]} significant (FDR<0.05)")
301
+ return df
302
+ ```
303
+
304
+ ## 7. 統合 PPI 解析パイプライン
305
+
306
+ ```python
307
+ def integrated_ppi_pipeline(genes, species=9606, score=700):
308
+ """
309
+ STRING + BioGRID + STITCH 統合 PPI パイプライン。
310
+
311
+ Pipeline:
312
+ STRING network → BioGRID validation → topology → communities →
313
+ enrichment
314
+ """
315
+ # STRING ネットワーク
316
+ string_df = get_string_network(genes, species, score)
317
+
318
+ # ネットワーク構築 & トポロジー
319
+ G, metrics = build_network(string_df)
320
+
321
+ # コミュニティ検出
322
+ comm_df, modularity = detect_communities(G)
323
+
324
+ # STRING 濃縮解析
325
+ all_nodes = list(G.nodes())
326
+ enrichment = string_enrichment(all_nodes[:500], species)
327
+
328
+ result = {
329
+ "n_nodes": G.number_of_nodes(),
330
+ "n_edges": G.number_of_edges(),
331
+ "density": round(nx.density(G), 4),
332
+ "n_communities": comm_df["community"].nunique(),
333
+ "modularity": round(modularity, 4),
334
+ "hub_genes": metrics.head(10)["node"].tolist(),
335
+ "n_enriched_terms": len(enrichment[enrichment["fdr"] < 0.05])
336
+ if not enrichment.empty else 0,
337
+ }
338
+
339
+ print(f"\n=== Integrated PPI Pipeline ===")
340
+ print(f"Nodes: {result['n_nodes']}, Edges: {result['n_edges']}")
341
+ print(f"Hub genes: {', '.join(result['hub_genes'][:5])}")
342
+ return result
343
+ ```
344
+
345
+ ---
346
+
347
+ ## パイプライン統合
348
+
349
+ ```
350
+ drug-target-profiling → string-network-api → pathway-enrichment
351
+ (候補ターゲット) (STRING PPI 構築) (GO/KEGG 濃縮)
352
+ │ │ ↓
353
+ protein-interaction ───┘ │ ontology-enrichment
354
+ (IntAct/HumanBase) ↓ (EFO/Enrichr)
355
+ network-analysis
356
+ (既存スキル補完)
357
+ ```
358
+
359
+ ## パイプライン出力
360
+
361
+ | ファイル | 説明 | 次スキル |
362
+ |---------|------|---------|
363
+ | `results/string_network.csv` | STRING PPI ネットワーク | → network-analysis |
364
+ | `results/ppi_topology.csv` | トポロジー指標 | → drug-target-profiling |
365
+ | `results/ppi_communities.csv` | コミュニティ割当 | → pathway-enrichment |
366
+ | `results/string_enrichment.csv` | 機能濃縮結果 | → ontology-enrichment |
367
+
368
+ ## 利用可能ツール (ToolUniverse SMCP)
369
+
370
+ | ツール名 | 用途 |
371
+ |---------|------|
372
+ | `STRING_get_protein_interactions` | STRING PPI 取得 |
373
+ | `BioGRID_get_interactions` | BioGRID 実験的 PPI |
374
+ | `STITCH_get_chemical_protein_interactions` | STITCH 化合物-タンパク質 |
375
+ | `STITCH_get_interaction_partners` | STITCH 相互作用パートナー |
376
+ | `STITCH_resolve_identifier` | STITCH ID 解決 |