@nahisaho/satori 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -39
- package/package.json +1 -1
- package/src/.github/skills/scientific-admet-pharmacokinetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-biobank-cohort/SKILL.md +268 -0
- package/src/.github/skills/scientific-cancer-genomics/SKILL.md +7 -0
- package/src/.github/skills/scientific-cell-line-resources/SKILL.md +4 -0
- package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +4 -0
- package/src/.github/skills/scientific-civic-evidence/SKILL.md +292 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +4 -0
- package/src/.github/skills/scientific-depmap-dependencies/SKILL.md +239 -0
- package/src/.github/skills/scientific-disease-research/SKILL.md +4 -0
- package/src/.github/skills/scientific-drug-target-profiling/SKILL.md +4 -0
- package/src/.github/skills/scientific-drugbank-resources/SKILL.md +269 -0
- package/src/.github/skills/scientific-gdc-portal/SKILL.md +280 -0
- package/src/.github/skills/scientific-gnomad-variants/SKILL.md +356 -0
- package/src/.github/skills/scientific-immunoinformatics/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolic-flux/SKILL.md +306 -0
- package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +4 -0
- package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +4 -0
- package/src/.github/skills/scientific-monarch-ontology/SKILL.md +260 -0
- package/src/.github/skills/scientific-opentargets-genetics/SKILL.md +299 -0
- package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +10 -0
- package/src/.github/skills/scientific-precision-oncology/SKILL.md +4 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +4 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +4 -0
- package/src/.github/skills/scientific-rcsb-pdb-search/SKILL.md +280 -0
- package/src/.github/skills/scientific-reactome-pathways/SKILL.md +242 -0
- package/src/.github/skills/scientific-spatial-multiomics/SKILL.md +293 -0
- package/src/.github/skills/scientific-stitch-chemical-network/SKILL.md +318 -0
- package/src/.github/skills/scientific-string-network-api/SKILL.md +4 -0
- package/src/.github/skills/scientific-uniprot-proteome/SKILL.md +273 -0
- package/src/.github/skills/scientific-variant-interpretation/SKILL.md +4 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-spatial-multiomics
|
|
3
|
+
description: |
|
|
4
|
+
空間マルチオミクス統合スキル。MERFISH/Visium 等の空間
|
|
5
|
+
トランスクリプトームと空間プロテオミクスのマルチモーダル
|
|
6
|
+
統合・空間共検出解析・セル近傍グラフ構築パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Spatial Multi-omics
|
|
10
|
+
|
|
11
|
+
MERFISH・Visium・CODEX 等の空間マルチオミクスデータを統合し、
|
|
12
|
+
マルチモーダルアライメント・空間共検出解析・近傍グラフベースの
|
|
13
|
+
空間コミュニティ検出パイプラインを提供する。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- 空間トランスクリプトームと空間プロテオミクスを統合するとき
|
|
18
|
+
- MERFISH + CODEX 等マルチモーダル空間データをアライメントするとき
|
|
19
|
+
- 空間的に共局在する分子シグネチャを同定するとき
|
|
20
|
+
- セル近傍グラフからニッチ/コミュニティを抽出するとき
|
|
21
|
+
- 空間マルチオミクスの前処理パイプラインを構築するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. 空間マルチモーダルデータ読み込み
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import numpy as np
|
|
31
|
+
import pandas as pd
|
|
32
|
+
from scipy.spatial import cKDTree
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_spatial_modality(coord_file, expr_file,
|
|
36
|
+
modality_name="RNA"):
|
|
37
|
+
"""
|
|
38
|
+
空間モダリティデータ読み込み。
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
coord_file: str — 座標 CSV (cell_id, x, y)
|
|
42
|
+
expr_file: str — 発現/タンパク質 CSV
|
|
43
|
+
(cell_id, features...)
|
|
44
|
+
modality_name: str — モダリティ名
|
|
45
|
+
"""
|
|
46
|
+
coords = pd.read_csv(coord_file, index_col="cell_id")
|
|
47
|
+
expr = pd.read_csv(expr_file, index_col="cell_id")
|
|
48
|
+
|
|
49
|
+
common = coords.index.intersection(expr.index)
|
|
50
|
+
coords = coords.loc[common]
|
|
51
|
+
expr = expr.loc[common]
|
|
52
|
+
|
|
53
|
+
print(f"Spatial {modality_name}: "
|
|
54
|
+
f"{len(common)} cells, "
|
|
55
|
+
f"{expr.shape[1]} features")
|
|
56
|
+
return coords, expr
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def spatial_alignment(coords_a, coords_b,
|
|
60
|
+
max_distance=50.0):
|
|
61
|
+
"""
|
|
62
|
+
空間座標アライメント (最近傍マッチング)。
|
|
63
|
+
|
|
64
|
+
Parameters:
|
|
65
|
+
coords_a: DataFrame — モダリティ A 座標 (x, y)
|
|
66
|
+
coords_b: DataFrame — モダリティ B 座標 (x, y)
|
|
67
|
+
max_distance: float — 最大マッチング距離 (μm)
|
|
68
|
+
"""
|
|
69
|
+
tree_b = cKDTree(coords_b[["x", "y"]].values)
|
|
70
|
+
dists, idxs = tree_b.query(
|
|
71
|
+
coords_a[["x", "y"]].values, k=1)
|
|
72
|
+
|
|
73
|
+
mask = dists < max_distance
|
|
74
|
+
matched_a = coords_a.index[mask]
|
|
75
|
+
matched_b = coords_b.index[idxs[mask]]
|
|
76
|
+
|
|
77
|
+
alignment = pd.DataFrame({
|
|
78
|
+
"cell_a": matched_a,
|
|
79
|
+
"cell_b": matched_b,
|
|
80
|
+
"distance": dists[mask],
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
print(f"Alignment: {len(alignment)} matched pairs "
|
|
84
|
+
f"(max_dist={max_distance}μm)")
|
|
85
|
+
return alignment
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## 2. 空間共検出解析
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
def spatial_codetection(expr_a, expr_b, alignment,
|
|
92
|
+
method="pearson", top_n=50):
|
|
93
|
+
"""
|
|
94
|
+
空間共検出相関解析。
|
|
95
|
+
|
|
96
|
+
Parameters:
|
|
97
|
+
expr_a: DataFrame — モダリティ A 発現行列
|
|
98
|
+
expr_b: DataFrame — モダリティ B 発現行列
|
|
99
|
+
alignment: DataFrame — アライメント結果
|
|
100
|
+
method: str — 相関メソッド
|
|
101
|
+
(pearson / spearman)
|
|
102
|
+
top_n: int — 上位ペア数
|
|
103
|
+
"""
|
|
104
|
+
from itertools import product
|
|
105
|
+
from scipy import stats
|
|
106
|
+
|
|
107
|
+
a_matched = expr_a.loc[alignment["cell_a"]]
|
|
108
|
+
b_matched = expr_b.loc[alignment["cell_b"]]
|
|
109
|
+
a_matched.index = range(len(a_matched))
|
|
110
|
+
b_matched.index = range(len(b_matched))
|
|
111
|
+
|
|
112
|
+
results = []
|
|
113
|
+
for fa, fb in product(a_matched.columns[:100],
|
|
114
|
+
b_matched.columns[:100]):
|
|
115
|
+
va = a_matched[fa].values
|
|
116
|
+
vb = b_matched[fb].values
|
|
117
|
+
mask = np.isfinite(va) & np.isfinite(vb)
|
|
118
|
+
if mask.sum() < 30:
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
if method == "spearman":
|
|
122
|
+
r, p = stats.spearmanr(va[mask], vb[mask])
|
|
123
|
+
else:
|
|
124
|
+
r, p = stats.pearsonr(va[mask], vb[mask])
|
|
125
|
+
|
|
126
|
+
results.append({
|
|
127
|
+
"feature_a": fa,
|
|
128
|
+
"feature_b": fb,
|
|
129
|
+
"correlation": r,
|
|
130
|
+
"p_value": p,
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
df = pd.DataFrame(results)
|
|
134
|
+
df.sort_values("correlation", ascending=False,
|
|
135
|
+
key=abs, inplace=True)
|
|
136
|
+
top = df.head(top_n)
|
|
137
|
+
|
|
138
|
+
print(f"Codetection: {len(df)} pairs, "
|
|
139
|
+
f"top r={top.iloc[0]['correlation']:.3f}")
|
|
140
|
+
return top
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## 3. セル近傍グラフ・コミュニティ検出
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
def cell_neighborhood_graph(coords, k_neighbors=15):
|
|
147
|
+
"""
|
|
148
|
+
セル近傍グラフ構築。
|
|
149
|
+
|
|
150
|
+
Parameters:
|
|
151
|
+
coords: DataFrame — 座標 (x, y)
|
|
152
|
+
k_neighbors: int — k 近傍数
|
|
153
|
+
"""
|
|
154
|
+
tree = cKDTree(coords[["x", "y"]].values)
|
|
155
|
+
dists, idxs = tree.query(
|
|
156
|
+
coords[["x", "y"]].values,
|
|
157
|
+
k=k_neighbors + 1)
|
|
158
|
+
|
|
159
|
+
edges = []
|
|
160
|
+
for i in range(len(coords)):
|
|
161
|
+
for j_idx in range(1, k_neighbors + 1):
|
|
162
|
+
j = idxs[i, j_idx]
|
|
163
|
+
edges.append({
|
|
164
|
+
"source": coords.index[i],
|
|
165
|
+
"target": coords.index[j],
|
|
166
|
+
"distance": dists[i, j_idx],
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
edge_df = pd.DataFrame(edges)
|
|
170
|
+
print(f"Neighborhood graph: "
|
|
171
|
+
f"{len(coords)} nodes, "
|
|
172
|
+
f"{len(edge_df)} edges (k={k_neighbors})")
|
|
173
|
+
return edge_df
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def spatial_community_detection(edge_df, coords,
|
|
177
|
+
resolution=1.0):
|
|
178
|
+
"""
|
|
179
|
+
空間コミュニティ検出 (Leiden)。
|
|
180
|
+
|
|
181
|
+
Parameters:
|
|
182
|
+
edge_df: DataFrame — エッジリスト
|
|
183
|
+
coords: DataFrame — 座標
|
|
184
|
+
resolution: float — Leiden 解像度
|
|
185
|
+
"""
|
|
186
|
+
try:
|
|
187
|
+
import igraph as ig
|
|
188
|
+
import leidenalg
|
|
189
|
+
except ImportError:
|
|
190
|
+
print("pip install igraph leidenalg")
|
|
191
|
+
return pd.DataFrame()
|
|
192
|
+
|
|
193
|
+
nodes = list(coords.index)
|
|
194
|
+
node_map = {n: i for i, n in enumerate(nodes)}
|
|
195
|
+
|
|
196
|
+
g = ig.Graph(directed=False)
|
|
197
|
+
g.add_vertices(len(nodes))
|
|
198
|
+
edges = [
|
|
199
|
+
(node_map[r["source"]], node_map[r["target"]])
|
|
200
|
+
for _, r in edge_df.iterrows()
|
|
201
|
+
if r["source"] in node_map
|
|
202
|
+
and r["target"] in node_map
|
|
203
|
+
]
|
|
204
|
+
g.add_edges(edges)
|
|
205
|
+
|
|
206
|
+
part = leidenalg.find_partition(
|
|
207
|
+
g, leidenalg.RBConfigurationVertexPartition,
|
|
208
|
+
resolution_parameter=resolution)
|
|
209
|
+
|
|
210
|
+
result = pd.DataFrame({
|
|
211
|
+
"cell_id": nodes,
|
|
212
|
+
"community": part.membership,
|
|
213
|
+
"x": coords["x"].values,
|
|
214
|
+
"y": coords["y"].values,
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
n_comm = result["community"].nunique()
|
|
218
|
+
print(f"Communities: {n_comm} spatial niches "
|
|
219
|
+
f"(resolution={resolution})")
|
|
220
|
+
return result
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## 4. 空間マルチオミクス統合パイプライン
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
def spatial_multiomics_pipeline(
|
|
227
|
+
rna_coords, rna_expr,
|
|
228
|
+
protein_coords, protein_expr,
|
|
229
|
+
output_dir="results",
|
|
230
|
+
):
|
|
231
|
+
"""
|
|
232
|
+
空間マルチオミクス統合パイプライン。
|
|
233
|
+
|
|
234
|
+
Parameters:
|
|
235
|
+
rna_coords: str — RNA 座標ファイル
|
|
236
|
+
rna_expr: str — RNA 発現ファイル
|
|
237
|
+
protein_coords: str — プロテオミクス座標ファイル
|
|
238
|
+
protein_expr: str — プロテオミクス発現ファイル
|
|
239
|
+
output_dir: str — 出力ディレクトリ
|
|
240
|
+
"""
|
|
241
|
+
from pathlib import Path
|
|
242
|
+
output_dir = Path(output_dir)
|
|
243
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
244
|
+
|
|
245
|
+
# 1) データ読み込み
|
|
246
|
+
rc, re = load_spatial_modality(
|
|
247
|
+
rna_coords, rna_expr, "RNA")
|
|
248
|
+
pc, pe = load_spatial_modality(
|
|
249
|
+
protein_coords, protein_expr, "Protein")
|
|
250
|
+
|
|
251
|
+
# 2) 空間アライメント
|
|
252
|
+
alignment = spatial_alignment(rc, pc)
|
|
253
|
+
alignment.to_csv(output_dir / "alignment.csv",
|
|
254
|
+
index=False)
|
|
255
|
+
|
|
256
|
+
# 3) 共検出解析
|
|
257
|
+
codet = spatial_codetection(re, pe, alignment)
|
|
258
|
+
codet.to_csv(output_dir / "codetection.csv",
|
|
259
|
+
index=False)
|
|
260
|
+
|
|
261
|
+
# 4) 近傍グラフ + コミュニティ
|
|
262
|
+
edges = cell_neighborhood_graph(rc)
|
|
263
|
+
comms = spatial_community_detection(edges, rc)
|
|
264
|
+
comms.to_csv(output_dir / "communities.csv",
|
|
265
|
+
index=False)
|
|
266
|
+
|
|
267
|
+
print(f"Spatial multiomics pipeline → {output_dir}")
|
|
268
|
+
return {
|
|
269
|
+
"alignment": alignment,
|
|
270
|
+
"codetection": codet,
|
|
271
|
+
"communities": comms,
|
|
272
|
+
}
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
---
|
|
276
|
+
|
|
277
|
+
## パイプライン統合
|
|
278
|
+
|
|
279
|
+
```
|
|
280
|
+
spatial-transcriptomics → spatial-multiomics → multi-omics
|
|
281
|
+
(Visium/MERFISH) (マルチモーダル統合) (統合オミクス)
|
|
282
|
+
│ │ ↓
|
|
283
|
+
human-cell-atlas ─────────────┘ single-cell-rnaseq
|
|
284
|
+
(HCA atlas) (scRNA-seq 参照)
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
## パイプライン出力
|
|
288
|
+
|
|
289
|
+
| ファイル | 説明 | 次スキル |
|
|
290
|
+
|---------|------|---------|
|
|
291
|
+
| `results/alignment.csv` | モダリティ間アライメント | → multi-omics |
|
|
292
|
+
| `results/codetection.csv` | 共検出ペア | → pathway-analysis |
|
|
293
|
+
| `results/communities.csv` | 空間コミュニティ | → spatial-transcriptomics |
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-stitch-chemical-network
|
|
3
|
+
description: |
|
|
4
|
+
STITCH 化学-タンパク質相互作用ネットワークスキル。STITCH
|
|
5
|
+
REST API を用いた化学物質-タンパク質インタラクション検索・
|
|
6
|
+
信頼度スコアリング・ネットワーク薬理学・ポリファーマコロジー解析。
|
|
7
|
+
ToolUniverse 連携: stitch。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: stitch
|
|
10
|
+
name: STITCH
|
|
11
|
+
description: 化学物質-タンパク質相互作用ネットワーク (EMBL)
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Scientific STITCH Chemical Network
|
|
15
|
+
|
|
16
|
+
STITCH (Search Tool for Interactions of Chemicals) REST API
|
|
17
|
+
を活用した化学物質-タンパク質相互作用検索・信頼度スコアリング・
|
|
18
|
+
ネットワーク薬理学・ポリファーマコロジー解析パイプラインを提供する。
|
|
19
|
+
|
|
20
|
+
## When to Use
|
|
21
|
+
|
|
22
|
+
- 化学物質とタンパク質の相互作用エビデンスを検索するとき
|
|
23
|
+
- 薬物の標的タンパク質ネットワークを構築するとき
|
|
24
|
+
- ポリファーマコロジー (多標的薬理作用) を解析するとき
|
|
25
|
+
- 化学物質間の類似ネットワークを構築するとき
|
|
26
|
+
- ネットワーク薬理学 (Network Pharmacology) を実施するとき
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
## 1. 化学物質-タンパク質相互作用検索
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
import requests
|
|
36
|
+
import pandas as pd
|
|
37
|
+
|
|
38
|
+
STITCH_API = "http://stitch.embl.de/api"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def stitch_interactions(chemical, species=9606,
|
|
42
|
+
required_score=400, limit=50):
|
|
43
|
+
"""
|
|
44
|
+
STITCH — 化学物質-タンパク質相互作用検索。
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
chemical: str — 化学物質名または CID
|
|
48
|
+
(例: "aspirin", "CIDm00002244")
|
|
49
|
+
species: int — NCBI Taxonomy ID
|
|
50
|
+
(9606=ヒト)
|
|
51
|
+
required_score: int — 最低信頼度スコア
|
|
52
|
+
(0-1000, 400=medium)
|
|
53
|
+
limit: int — 最大結果数
|
|
54
|
+
"""
|
|
55
|
+
url = f"{STITCH_API}/tsv/interactionsList"
|
|
56
|
+
params = {
|
|
57
|
+
"identifiers": chemical,
|
|
58
|
+
"species": species,
|
|
59
|
+
"required_score": required_score,
|
|
60
|
+
"limit": limit,
|
|
61
|
+
}
|
|
62
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
63
|
+
resp.raise_for_status()
|
|
64
|
+
|
|
65
|
+
lines = resp.text.strip().split("\n")
|
|
66
|
+
if len(lines) < 2:
|
|
67
|
+
return pd.DataFrame()
|
|
68
|
+
|
|
69
|
+
header = lines[0].split("\t")
|
|
70
|
+
rows = [line.split("\t") for line in lines[1:]]
|
|
71
|
+
df = pd.DataFrame(rows, columns=header)
|
|
72
|
+
|
|
73
|
+
if "score" in df.columns:
|
|
74
|
+
df["score"] = pd.to_numeric(
|
|
75
|
+
df["score"], errors="coerce")
|
|
76
|
+
|
|
77
|
+
print(f"STITCH: {chemical} → {len(df)} interactions")
|
|
78
|
+
return df
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def stitch_resolve(identifiers, species=9606):
|
|
82
|
+
"""
|
|
83
|
+
STITCH — 化学物質/タンパク質 ID 解決。
|
|
84
|
+
|
|
85
|
+
Parameters:
|
|
86
|
+
identifiers: list[str] — 化学物質/タンパク質名リスト
|
|
87
|
+
species: int — NCBI Taxonomy ID
|
|
88
|
+
"""
|
|
89
|
+
url = f"{STITCH_API}/tsv/resolveList"
|
|
90
|
+
params = {
|
|
91
|
+
"identifiers": "\r".join(identifiers),
|
|
92
|
+
"species": species,
|
|
93
|
+
}
|
|
94
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
95
|
+
resp.raise_for_status()
|
|
96
|
+
|
|
97
|
+
lines = resp.text.strip().split("\n")
|
|
98
|
+
if len(lines) < 2:
|
|
99
|
+
return pd.DataFrame()
|
|
100
|
+
|
|
101
|
+
header = lines[0].split("\t")
|
|
102
|
+
rows = [line.split("\t") for line in lines[1:]]
|
|
103
|
+
df = pd.DataFrame(rows, columns=header)
|
|
104
|
+
|
|
105
|
+
print(f"STITCH resolve: {len(identifiers)} queries → "
|
|
106
|
+
f"{len(df)} results")
|
|
107
|
+
return df
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## 2. ネットワーク薬理学
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
def stitch_network(chemicals, species=9606,
|
|
114
|
+
required_score=400):
|
|
115
|
+
"""
|
|
116
|
+
STITCH — 多化学物質ネットワーク構築。
|
|
117
|
+
|
|
118
|
+
Parameters:
|
|
119
|
+
chemicals: list[str] — 化学物質名リスト
|
|
120
|
+
species: int — NCBI Taxonomy ID
|
|
121
|
+
required_score: int — 最低信頼度スコア
|
|
122
|
+
"""
|
|
123
|
+
url = f"{STITCH_API}/tsv/network"
|
|
124
|
+
params = {
|
|
125
|
+
"identifiers": "\r".join(chemicals),
|
|
126
|
+
"species": species,
|
|
127
|
+
"required_score": required_score,
|
|
128
|
+
}
|
|
129
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
130
|
+
resp.raise_for_status()
|
|
131
|
+
|
|
132
|
+
lines = resp.text.strip().split("\n")
|
|
133
|
+
if len(lines) < 2:
|
|
134
|
+
return pd.DataFrame()
|
|
135
|
+
|
|
136
|
+
header = lines[0].split("\t")
|
|
137
|
+
rows = [line.split("\t") for line in lines[1:]]
|
|
138
|
+
df = pd.DataFrame(rows, columns=header)
|
|
139
|
+
|
|
140
|
+
nodes = set()
|
|
141
|
+
if "stringId_A" in df.columns:
|
|
142
|
+
nodes.update(df["stringId_A"].unique())
|
|
143
|
+
if "stringId_B" in df.columns:
|
|
144
|
+
nodes.update(df["stringId_B"].unique())
|
|
145
|
+
|
|
146
|
+
print(f"STITCH network: {len(nodes)} nodes, "
|
|
147
|
+
f"{len(df)} edges")
|
|
148
|
+
return df
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def polypharmacology_analysis(drug_list, species=9606,
|
|
152
|
+
required_score=700):
|
|
153
|
+
"""
|
|
154
|
+
ポリファーマコロジー解析。
|
|
155
|
+
|
|
156
|
+
Parameters:
|
|
157
|
+
drug_list: list[str] — 薬物名リスト
|
|
158
|
+
species: int — NCBI Taxonomy ID
|
|
159
|
+
required_score: int — 高信頼度スコア閾値
|
|
160
|
+
"""
|
|
161
|
+
all_targets = {}
|
|
162
|
+
for drug in drug_list:
|
|
163
|
+
interactions = stitch_interactions(
|
|
164
|
+
drug, species, required_score)
|
|
165
|
+
if interactions.empty:
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
targets = set()
|
|
169
|
+
for col in ["stringId_A", "stringId_B"]:
|
|
170
|
+
if col in interactions.columns:
|
|
171
|
+
targets.update(
|
|
172
|
+
interactions[col].unique())
|
|
173
|
+
# 化学物質自身を除外
|
|
174
|
+
targets = {t for t in targets
|
|
175
|
+
if not t.startswith("CID")}
|
|
176
|
+
all_targets[drug] = targets
|
|
177
|
+
|
|
178
|
+
# 共通標的計算
|
|
179
|
+
if len(all_targets) < 2:
|
|
180
|
+
return pd.DataFrame()
|
|
181
|
+
|
|
182
|
+
pairs = []
|
|
183
|
+
drugs = list(all_targets.keys())
|
|
184
|
+
for i in range(len(drugs)):
|
|
185
|
+
for j in range(i + 1, len(drugs)):
|
|
186
|
+
shared = (all_targets[drugs[i]]
|
|
187
|
+
& all_targets[drugs[j]])
|
|
188
|
+
union = (all_targets[drugs[i]]
|
|
189
|
+
| all_targets[drugs[j]])
|
|
190
|
+
jaccard = (len(shared) / len(union)
|
|
191
|
+
if union else 0)
|
|
192
|
+
pairs.append({
|
|
193
|
+
"drug_a": drugs[i],
|
|
194
|
+
"drug_b": drugs[j],
|
|
195
|
+
"shared_targets": len(shared),
|
|
196
|
+
"jaccard_index": jaccard,
|
|
197
|
+
"shared_list": "; ".join(
|
|
198
|
+
sorted(shared)),
|
|
199
|
+
})
|
|
200
|
+
|
|
201
|
+
df = pd.DataFrame(pairs)
|
|
202
|
+
df.sort_values("jaccard_index", ascending=False,
|
|
203
|
+
inplace=True)
|
|
204
|
+
print(f"Polypharmacology: {len(drugs)} drugs, "
|
|
205
|
+
f"{len(df)} pairs")
|
|
206
|
+
return df
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
## 3. エンリッチメント解析
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
def stitch_enrichment(identifiers, species=9606):
|
|
213
|
+
"""
|
|
214
|
+
STITCH — 機能エンリッチメント解析。
|
|
215
|
+
|
|
216
|
+
Parameters:
|
|
217
|
+
identifiers: list[str] — タンパク質/化学物質リスト
|
|
218
|
+
species: int — NCBI Taxonomy ID
|
|
219
|
+
"""
|
|
220
|
+
url = f"{STITCH_API}/tsv/enrichment"
|
|
221
|
+
params = {
|
|
222
|
+
"identifiers": "\r".join(identifiers),
|
|
223
|
+
"species": species,
|
|
224
|
+
}
|
|
225
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
226
|
+
resp.raise_for_status()
|
|
227
|
+
|
|
228
|
+
lines = resp.text.strip().split("\n")
|
|
229
|
+
if len(lines) < 2:
|
|
230
|
+
return pd.DataFrame()
|
|
231
|
+
|
|
232
|
+
header = lines[0].split("\t")
|
|
233
|
+
rows = [line.split("\t") for line in lines[1:]]
|
|
234
|
+
df = pd.DataFrame(rows, columns=header)
|
|
235
|
+
|
|
236
|
+
if "p_value" in df.columns:
|
|
237
|
+
df["p_value"] = pd.to_numeric(
|
|
238
|
+
df["p_value"], errors="coerce")
|
|
239
|
+
df.sort_values("p_value", inplace=True)
|
|
240
|
+
|
|
241
|
+
print(f"STITCH enrichment: {len(df)} terms")
|
|
242
|
+
return df
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
## 4. STITCH 統合パイプライン
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
def stitch_pipeline(chemicals, species=9606,
|
|
249
|
+
output_dir="results"):
|
|
250
|
+
"""
|
|
251
|
+
STITCH 統合パイプライン。
|
|
252
|
+
|
|
253
|
+
Parameters:
|
|
254
|
+
chemicals: list[str] — 化学物質名リスト
|
|
255
|
+
species: int — NCBI Taxonomy ID
|
|
256
|
+
output_dir: str — 出力ディレクトリ
|
|
257
|
+
"""
|
|
258
|
+
from pathlib import Path
|
|
259
|
+
output_dir = Path(output_dir)
|
|
260
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
261
|
+
|
|
262
|
+
# 1) 個別相互作用
|
|
263
|
+
all_interactions = []
|
|
264
|
+
for chem in chemicals:
|
|
265
|
+
ixns = stitch_interactions(chem, species)
|
|
266
|
+
ixns["query_chemical"] = chem
|
|
267
|
+
all_interactions.append(ixns)
|
|
268
|
+
ixn_df = pd.concat(all_interactions, ignore_index=True)
|
|
269
|
+
ixn_df.to_csv(
|
|
270
|
+
output_dir / "stitch_interactions.csv",
|
|
271
|
+
index=False)
|
|
272
|
+
|
|
273
|
+
# 2) ネットワーク
|
|
274
|
+
network = stitch_network(chemicals, species)
|
|
275
|
+
network.to_csv(
|
|
276
|
+
output_dir / "stitch_network.csv",
|
|
277
|
+
index=False)
|
|
278
|
+
|
|
279
|
+
# 3) ポリファーマコロジー
|
|
280
|
+
polypharm = polypharmacology_analysis(
|
|
281
|
+
chemicals, species)
|
|
282
|
+
polypharm.to_csv(
|
|
283
|
+
output_dir / "polypharmacology.csv",
|
|
284
|
+
index=False)
|
|
285
|
+
|
|
286
|
+
print(f"STITCH pipeline → {output_dir}")
|
|
287
|
+
return {
|
|
288
|
+
"interactions": ixn_df,
|
|
289
|
+
"network": network,
|
|
290
|
+
"polypharmacology": polypharm,
|
|
291
|
+
}
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
## ToolUniverse 連携
|
|
297
|
+
|
|
298
|
+
| TU Key | ツール名 | 連携内容 |
|
|
299
|
+
|--------|---------|---------|
|
|
300
|
+
| `stitch` | STITCH | 化学物質-タンパク質相互作用 (EMBL) |
|
|
301
|
+
|
|
302
|
+
## パイプライン統合
|
|
303
|
+
|
|
304
|
+
```
|
|
305
|
+
cheminformatics → stitch-chemical-network → drug-target-profiling
|
|
306
|
+
(化合物記述子) (STITCH 相互作用) (DGIdb 標的)
|
|
307
|
+
│ │ ↓
|
|
308
|
+
string-network-api ────────┘ pharmacology-targets
|
|
309
|
+
(STRING PPI) (BindingDB/GtoPdb)
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## パイプライン出力
|
|
313
|
+
|
|
314
|
+
| ファイル | 説明 | 次スキル |
|
|
315
|
+
|---------|------|---------|
|
|
316
|
+
| `results/stitch_interactions.csv` | 化学物質-標的 | → drug-target-profiling |
|
|
317
|
+
| `results/stitch_network.csv` | ネットワーク | → string-network-api |
|
|
318
|
+
| `results/polypharmacology.csv` | 多標的解析 | → pharmacology-targets |
|
|
@@ -4,6 +4,10 @@ description: |
|
|
|
4
4
|
STRING/BioGRID/STITCH ネットワーク解析スキル。STRING タンパク質相互作用
|
|
5
5
|
ネットワーク直接 API、BioGRID 実験的 PPI、STITCH 化学-タンパク質ネットワーク、
|
|
6
6
|
ネットワークトポロジー解析・コミュニティ検出・機能濃縮統合パイプライン。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: ppi
|
|
9
|
+
name: STRING/BioGRID PPI
|
|
10
|
+
description: タンパク質・化学物質相互作用ネットワーク
|
|
7
11
|
---
|
|
8
12
|
|
|
9
13
|
# Scientific STRING Network API
|