@nahisaho/satori 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -30
- package/package.json +1 -1
- package/src/.github/skills/scientific-advanced-imaging/SKILL.md +382 -0
- package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +509 -0
- package/src/.github/skills/scientific-data-submission/SKILL.md +357 -0
- package/src/.github/skills/scientific-deep-chemistry/SKILL.md +350 -0
- package/src/.github/skills/scientific-ensembl-genomics/SKILL.md +378 -0
- package/src/.github/skills/scientific-expression-comparison/SKILL.md +303 -0
- package/src/.github/skills/scientific-gpu-singlecell/SKILL.md +296 -0
- package/src/.github/skills/scientific-marine-ecology/SKILL.md +429 -0
- package/src/.github/skills/scientific-md-simulation/SKILL.md +315 -0
- package/src/.github/skills/scientific-model-organism-db/SKILL.md +329 -0
- package/src/.github/skills/scientific-nci60-screening/SKILL.md +307 -0
- package/src/.github/skills/scientific-perturbation-analysis/SKILL.md +297 -0
- package/src/.github/skills/scientific-plant-biology/SKILL.md +321 -0
- package/src/.github/skills/scientific-rrna-taxonomy/SKILL.md +379 -0
- package/src/.github/skills/scientific-scatac-signac/SKILL.md +300 -0
- package/src/.github/skills/scientific-scvi-integration/SKILL.md +344 -0
- package/src/.github/skills/scientific-string-network-api/SKILL.md +376 -0
- package/src/.github/skills/scientific-toxicology-env/SKILL.md +309 -0
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-nci60-screening
|
|
3
|
+
description: |
|
|
4
|
+
NCI-60 がん細胞株薬剤応答スキル。CellMiner API 薬剤感受性・
|
|
5
|
+
NCI-60 GI50/LC50 データ・DepMap cancer dependency 統合・
|
|
6
|
+
薬剤-分子マーカー相関・細胞株パネル比較解析。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific NCI-60 Screening
|
|
10
|
+
|
|
11
|
+
CellMiner / NCI-60 / DepMap を活用したがん細胞株薬剤応答
|
|
12
|
+
パイプラインを提供する。高スループットスクリーニングデータの
|
|
13
|
+
統合解析、薬剤感受性マーカー同定、パネル比較。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- NCI-60 細胞株パネルの薬剤応答 (GI50) を解析するとき
|
|
18
|
+
- CellMiner から化合物活性データを取得するとき
|
|
19
|
+
- 薬剤感受性と分子マーカー (変異/発現) の相関を調べるとき
|
|
20
|
+
- DepMap CRISPR/RNAi 依存性データを併用するとき
|
|
21
|
+
- 細胞株間の薬剤応答パターンを比較するとき
|
|
22
|
+
- 新規化合物のスクリーニング結果を NCI-60 と比較するとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. CellMiner データ取得
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import requests
|
|
32
|
+
import pandas as pd
|
|
33
|
+
import numpy as np
|
|
34
|
+
from io import StringIO
|
|
35
|
+
|
|
36
|
+
CELLMINER_BASE = "https://discover.nci.nih.gov/cellminer/api"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def cellminer_drug_activity(nsc_id=None, drug_name=None):
|
|
40
|
+
"""
|
|
41
|
+
CellMiner — NCI-60 薬剤活性データ取得。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
nsc_id: str — NSC ID (例: "740")
|
|
45
|
+
drug_name: str — 薬剤名 (例: "Paclitaxel")
|
|
46
|
+
"""
|
|
47
|
+
if nsc_id:
|
|
48
|
+
url = f"{CELLMINER_BASE}/compound/{nsc_id}/activity"
|
|
49
|
+
elif drug_name:
|
|
50
|
+
url = f"{CELLMINER_BASE}/compound/search"
|
|
51
|
+
params = {"name": drug_name}
|
|
52
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
53
|
+
resp.raise_for_status()
|
|
54
|
+
compounds = resp.json()
|
|
55
|
+
if not compounds:
|
|
56
|
+
print(f"Drug not found: {drug_name}")
|
|
57
|
+
return pd.DataFrame()
|
|
58
|
+
nsc_id = compounds[0].get("nsc", "")
|
|
59
|
+
url = f"{CELLMINER_BASE}/compound/{nsc_id}/activity"
|
|
60
|
+
|
|
61
|
+
resp = requests.get(url, timeout=30)
|
|
62
|
+
resp.raise_for_status()
|
|
63
|
+
data = resp.json()
|
|
64
|
+
|
|
65
|
+
results = []
|
|
66
|
+
for cell_line, values in data.get("activity", {}).items():
|
|
67
|
+
results.append({
|
|
68
|
+
"cell_line": cell_line,
|
|
69
|
+
"tissue": values.get("tissue", ""),
|
|
70
|
+
"gi50_log": values.get("gi50", None),
|
|
71
|
+
"tgi_log": values.get("tgi", None),
|
|
72
|
+
"lc50_log": values.get("lc50", None),
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
df = pd.DataFrame(results)
|
|
76
|
+
print(f"CellMiner: NSC {nsc_id} → {len(df)} cell lines")
|
|
77
|
+
return df
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## 2. NCI-60 バルクデータ取得
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
def nci60_bulk_download(data_type="drug_activity"):
|
|
84
|
+
"""
|
|
85
|
+
NCI-60 バルクデータセット取得。
|
|
86
|
+
|
|
87
|
+
Parameters:
|
|
88
|
+
data_type: str — "drug_activity", "gene_expression",
|
|
89
|
+
"mutation", "copy_number"
|
|
90
|
+
"""
|
|
91
|
+
urls = {
|
|
92
|
+
"drug_activity": "https://discover.nci.nih.gov/cellminer/download/DTP_NCI60_ZSCORE.csv",
|
|
93
|
+
"gene_expression": "https://discover.nci.nih.gov/cellminer/download/GeneExpr_RMA.csv",
|
|
94
|
+
"mutation": "https://discover.nci.nih.gov/cellminer/download/Exome_Mutation.csv",
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
url = urls.get(data_type)
|
|
98
|
+
if not url:
|
|
99
|
+
raise ValueError(f"Unknown data type: {data_type}")
|
|
100
|
+
|
|
101
|
+
resp = requests.get(url, timeout=120)
|
|
102
|
+
resp.raise_for_status()
|
|
103
|
+
|
|
104
|
+
df = pd.read_csv(StringIO(resp.text))
|
|
105
|
+
print(f"NCI-60 bulk: {data_type} → {df.shape}")
|
|
106
|
+
return df
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## 3. 薬剤-分子マーカー相関
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from scipy import stats
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def drug_marker_correlation(drug_activity, molecular_data,
|
|
116
|
+
marker_type="expression", top_n=50):
|
|
117
|
+
"""
|
|
118
|
+
薬剤感受性と分子マーカーの相関解析。
|
|
119
|
+
|
|
120
|
+
Parameters:
|
|
121
|
+
drug_activity: pd.DataFrame — GI50 データ (cell_line, gi50)
|
|
122
|
+
molecular_data: pd.DataFrame — 分子データ (cell_line, gene, value)
|
|
123
|
+
marker_type: str — "expression", "mutation", "copy_number"
|
|
124
|
+
top_n: int — 上位相関遺伝子数
|
|
125
|
+
"""
|
|
126
|
+
# 細胞株一致
|
|
127
|
+
common_lines = set(drug_activity["cell_line"]) & set(molecular_data["cell_line"])
|
|
128
|
+
drug_sub = drug_activity[drug_activity["cell_line"].isin(common_lines)]
|
|
129
|
+
mol_sub = molecular_data[molecular_data["cell_line"].isin(common_lines)]
|
|
130
|
+
|
|
131
|
+
# 遺伝子ごとの相関
|
|
132
|
+
correlations = []
|
|
133
|
+
genes = mol_sub["gene"].unique() if "gene" in mol_sub.columns else mol_sub.columns[1:]
|
|
134
|
+
|
|
135
|
+
drug_values = drug_sub.set_index("cell_line")["gi50_log"]
|
|
136
|
+
|
|
137
|
+
for gene in genes:
|
|
138
|
+
if "gene" in mol_sub.columns:
|
|
139
|
+
gene_data = mol_sub[mol_sub["gene"] == gene].set_index("cell_line")["value"]
|
|
140
|
+
else:
|
|
141
|
+
gene_data = mol_sub.set_index("cell_line")[gene]
|
|
142
|
+
|
|
143
|
+
common = drug_values.index.intersection(gene_data.index)
|
|
144
|
+
if len(common) < 10:
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
r, p = stats.pearsonr(drug_values[common], gene_data[common])
|
|
148
|
+
correlations.append({
|
|
149
|
+
"gene": gene,
|
|
150
|
+
"pearson_r": r,
|
|
151
|
+
"p_value": p,
|
|
152
|
+
"n_samples": len(common),
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
corr_df = pd.DataFrame(correlations)
|
|
156
|
+
corr_df["adj_p"] = corr_df["p_value"] * len(corr_df) # Bonferroni
|
|
157
|
+
corr_df = corr_df.sort_values("p_value")
|
|
158
|
+
|
|
159
|
+
print(f"Drug-marker correlation: {len(corr_df)} genes tested, "
|
|
160
|
+
f"top |r| = {corr_df['pearson_r'].abs().max():.3f}")
|
|
161
|
+
return corr_df.head(top_n)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## 4. 組織別薬剤応答パターン
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
def tissue_response_pattern(drug_activity, min_lines=3):
|
|
168
|
+
"""
|
|
169
|
+
組織別の薬剤応答パターン解析。
|
|
170
|
+
|
|
171
|
+
Parameters:
|
|
172
|
+
drug_activity: pd.DataFrame — GI50 データ
|
|
173
|
+
min_lines: int — 最小細胞株数
|
|
174
|
+
"""
|
|
175
|
+
tissue_stats = drug_activity.groupby("tissue").agg(
|
|
176
|
+
n_lines=("gi50_log", "count"),
|
|
177
|
+
mean_gi50=("gi50_log", "mean"),
|
|
178
|
+
std_gi50=("gi50_log", "std"),
|
|
179
|
+
min_gi50=("gi50_log", "min"),
|
|
180
|
+
max_gi50=("gi50_log", "max"),
|
|
181
|
+
).reset_index()
|
|
182
|
+
|
|
183
|
+
tissue_stats = tissue_stats[tissue_stats["n_lines"] >= min_lines]
|
|
184
|
+
tissue_stats = tissue_stats.sort_values("mean_gi50")
|
|
185
|
+
|
|
186
|
+
# 感受性/耐性スコア
|
|
187
|
+
overall_mean = drug_activity["gi50_log"].mean()
|
|
188
|
+
tissue_stats["sensitivity_z"] = (
|
|
189
|
+
(tissue_stats["mean_gi50"] - overall_mean)
|
|
190
|
+
/ drug_activity["gi50_log"].std()
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
print(f"Tissue patterns: {len(tissue_stats)} tissues")
|
|
194
|
+
for _, row in tissue_stats.iterrows():
|
|
195
|
+
label = "Sensitive" if row["sensitivity_z"] < -0.5 else (
|
|
196
|
+
"Resistant" if row["sensitivity_z"] > 0.5 else "Neutral"
|
|
197
|
+
)
|
|
198
|
+
print(f" {row['tissue']}: GI50={row['mean_gi50']:.2f} ({label})")
|
|
199
|
+
return tissue_stats
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## 5. DepMap 統合スクリーニング
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
DEPMAP_BASE = "https://depmap.org/portal/api"
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def depmap_gene_dependency(gene_symbol, dataset="Chronos_Combined"):
|
|
209
|
+
"""
|
|
210
|
+
DepMap — CRISPR/RNAi 遺伝子依存性取得。
|
|
211
|
+
|
|
212
|
+
Parameters:
|
|
213
|
+
gene_symbol: str — 遺伝子シンボル
|
|
214
|
+
dataset: str — データセット名
|
|
215
|
+
"""
|
|
216
|
+
url = f"{DEPMAP_BASE}/download/custom"
|
|
217
|
+
params = {
|
|
218
|
+
"gene": gene_symbol,
|
|
219
|
+
"dataset": dataset,
|
|
220
|
+
}
|
|
221
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
222
|
+
resp.raise_for_status()
|
|
223
|
+
data = resp.json()
|
|
224
|
+
|
|
225
|
+
results = []
|
|
226
|
+
for entry in data.get("data", []):
|
|
227
|
+
results.append({
|
|
228
|
+
"cell_line": entry.get("cell_line_name", ""),
|
|
229
|
+
"lineage": entry.get("lineage", ""),
|
|
230
|
+
"dependency_score": entry.get("score", None),
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
df = pd.DataFrame(results)
|
|
234
|
+
if len(df) > 0:
|
|
235
|
+
n_dependent = (df["dependency_score"] < -0.5).sum()
|
|
236
|
+
print(f"DepMap {gene_symbol}: {len(df)} lines, "
|
|
237
|
+
f"{n_dependent} dependent (score < -0.5)")
|
|
238
|
+
return df
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## 6. NCI-60 統合スクリーニングパイプライン
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
def nci60_screening_pipeline(drug_name=None, nsc_id=None,
|
|
245
|
+
target_gene=None, output_dir="results"):
|
|
246
|
+
"""
|
|
247
|
+
NCI-60 + DepMap 統合スクリーニングパイプライン。
|
|
248
|
+
|
|
249
|
+
Parameters:
|
|
250
|
+
drug_name: str — 薬剤名
|
|
251
|
+
nsc_id: str — NSC ID
|
|
252
|
+
target_gene: str — 標的遺伝子 (DepMap 連携)
|
|
253
|
+
output_dir: str — 出力ディレクトリ
|
|
254
|
+
"""
|
|
255
|
+
from pathlib import Path
|
|
256
|
+
output_dir = Path(output_dir)
|
|
257
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
258
|
+
|
|
259
|
+
# 1) NCI-60 薬剤活性
|
|
260
|
+
drug_data = cellminer_drug_activity(nsc_id=nsc_id, drug_name=drug_name)
|
|
261
|
+
drug_data.to_csv(output_dir / "drug_activity.csv", index=False)
|
|
262
|
+
|
|
263
|
+
# 2) 組織別パターン
|
|
264
|
+
tissue_patterns = tissue_response_pattern(drug_data)
|
|
265
|
+
tissue_patterns.to_csv(output_dir / "tissue_patterns.csv", index=False)
|
|
266
|
+
|
|
267
|
+
# 3) 発現相関
|
|
268
|
+
expr_data = nci60_bulk_download("gene_expression")
|
|
269
|
+
correlations = drug_marker_correlation(drug_data, expr_data)
|
|
270
|
+
correlations.to_csv(output_dir / "marker_correlations.csv", index=False)
|
|
271
|
+
|
|
272
|
+
# 4) DepMap 連携 (標的遺伝子あれば)
|
|
273
|
+
if target_gene:
|
|
274
|
+
depmap_data = depmap_gene_dependency(target_gene)
|
|
275
|
+
depmap_data.to_csv(output_dir / "depmap_dependency.csv", index=False)
|
|
276
|
+
|
|
277
|
+
print(f"Pipeline complete: {output_dir}")
|
|
278
|
+
return {
|
|
279
|
+
"drug_activity": drug_data,
|
|
280
|
+
"tissue_patterns": tissue_patterns,
|
|
281
|
+
"correlations": correlations,
|
|
282
|
+
}
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## パイプライン統合
|
|
288
|
+
|
|
289
|
+
```
|
|
290
|
+
compound-screening → nci60-screening → precision-oncology
|
|
291
|
+
(ZINC/VS) (NCI-60/DepMap) (MTB レポート)
|
|
292
|
+
│ │ ↓
|
|
293
|
+
drug-target-profiling ──────┘ cancer-genomics
|
|
294
|
+
(ChEMBL/DGIdb) │ (COSMIC/DepMap)
|
|
295
|
+
↓
|
|
296
|
+
cell-line-resources
|
|
297
|
+
(Cellosaurus)
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## パイプライン出力
|
|
301
|
+
|
|
302
|
+
| ファイル | 説明 | 次スキル |
|
|
303
|
+
|---------|------|---------|
|
|
304
|
+
| `results/drug_activity.csv` | NCI-60 GI50 データ | → precision-oncology |
|
|
305
|
+
| `results/tissue_patterns.csv` | 組織別応答パターン | → cancer-genomics |
|
|
306
|
+
| `results/marker_correlations.csv` | 薬剤-マーカー相関 | → drug-target-profiling |
|
|
307
|
+
| `results/depmap_dependency.csv` | DepMap 依存性スコア | → cell-line-resources |
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-perturbation-analysis
|
|
3
|
+
description: |
|
|
4
|
+
シングルセル摂動解析スキル。pertpy による CRISPR スクリーン解析・
|
|
5
|
+
薬剤応答分析・scGen 摂動予測・Augur 摂動応答性スコアリング・
|
|
6
|
+
scIB 統合ベンチマーク・差次的摂動応答パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Perturbation Analysis
|
|
10
|
+
|
|
11
|
+
pertpy / Augur / scIB を活用したシングルセルレベルの摂動解析
|
|
12
|
+
パイプラインを提供する。CRISPR スクリーン、薬剤処理、
|
|
13
|
+
遺伝子ノックダウンなどの摂動データの統合解析。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- CRISPR スクリーンデータ (Perturb-seq) を解析するとき
|
|
18
|
+
- 薬剤処理前後のシングルセル発現変動を評価するとき
|
|
19
|
+
- 摂動応答の細胞型特異性を定量するとき
|
|
20
|
+
- 複数のバッチ統合手法をベンチマークするとき (scIB)
|
|
21
|
+
- 摂動の効果を in silico で予測するとき (scGen)
|
|
22
|
+
- 差次的優先度 (Augur) で摂動応答性の高い細胞型を特定するとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. pertpy セットアップ & データ読込み
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import pertpy as pt
|
|
32
|
+
import scanpy as sc
|
|
33
|
+
import anndata as ad
|
|
34
|
+
import pandas as pd
|
|
35
|
+
import numpy as np
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def load_perturbation_data(adata_path, perturbation_key="perturbation",
|
|
39
|
+
control_label="control"):
|
|
40
|
+
"""
|
|
41
|
+
摂動実験 AnnData 読込み & 前処理。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
adata_path: str — AnnData ファイルパス
|
|
45
|
+
perturbation_key: str — 摂動ラベルカラム
|
|
46
|
+
control_label: str — コントロールラベル
|
|
47
|
+
|
|
48
|
+
K-Dense: pertpy
|
|
49
|
+
"""
|
|
50
|
+
adata = sc.read_h5ad(adata_path)
|
|
51
|
+
|
|
52
|
+
# 基本前処理
|
|
53
|
+
sc.pp.filter_cells(adata, min_genes=200)
|
|
54
|
+
sc.pp.filter_genes(adata, min_cells=3)
|
|
55
|
+
sc.pp.normalize_total(adata, target_sum=1e4)
|
|
56
|
+
sc.pp.log1p(adata)
|
|
57
|
+
|
|
58
|
+
n_perturbations = adata.obs[perturbation_key].nunique()
|
|
59
|
+
n_control = (adata.obs[perturbation_key] == control_label).sum()
|
|
60
|
+
n_perturbed = len(adata) - n_control
|
|
61
|
+
|
|
62
|
+
print(f"Loaded: {len(adata)} cells, {n_perturbations} perturbations")
|
|
63
|
+
print(f"Control: {n_control}, Perturbed: {n_perturbed}")
|
|
64
|
+
return adata
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## 2. 差次的遺伝子発現 (摂動 vs コントロール)
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
def differential_perturbation(adata, perturbation_key="perturbation",
|
|
71
|
+
control="control", target=None):
|
|
72
|
+
"""
|
|
73
|
+
摂動-コントロール間差次的発現解析。
|
|
74
|
+
|
|
75
|
+
Parameters:
|
|
76
|
+
adata: AnnData — 摂動データ
|
|
77
|
+
perturbation_key: str — 摂動ラベル
|
|
78
|
+
control: str — コントロールラベル
|
|
79
|
+
target: str — 比較対象摂動 (None で全摂動)
|
|
80
|
+
"""
|
|
81
|
+
if target:
|
|
82
|
+
mask = adata.obs[perturbation_key].isin([control, target])
|
|
83
|
+
adata_sub = adata[mask].copy()
|
|
84
|
+
else:
|
|
85
|
+
adata_sub = adata.copy()
|
|
86
|
+
|
|
87
|
+
sc.tl.rank_genes_groups(
|
|
88
|
+
adata_sub,
|
|
89
|
+
groupby=perturbation_key,
|
|
90
|
+
reference=control,
|
|
91
|
+
method="wilcoxon",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
results = {}
|
|
95
|
+
for group in adata_sub.obs[perturbation_key].unique():
|
|
96
|
+
if group == control:
|
|
97
|
+
continue
|
|
98
|
+
try:
|
|
99
|
+
degs = sc.get.rank_genes_groups_df(adata_sub, group=group)
|
|
100
|
+
degs_sig = degs[degs["pvals_adj"] < 0.05]
|
|
101
|
+
results[group] = {
|
|
102
|
+
"n_degs": len(degs_sig),
|
|
103
|
+
"n_up": (degs_sig["logfoldchanges"] > 0).sum(),
|
|
104
|
+
"n_down": (degs_sig["logfoldchanges"] < 0).sum(),
|
|
105
|
+
"top_genes": degs_sig.head(10)["names"].tolist(),
|
|
106
|
+
}
|
|
107
|
+
except Exception:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
print(f"DE results: {len(results)} perturbations analyzed")
|
|
111
|
+
return results
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## 3. Augur 摂動応答性スコアリング
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
def augur_prioritization(adata, perturbation_key="perturbation",
|
|
118
|
+
cell_type_key="cell_type", control="control"):
|
|
119
|
+
"""
|
|
120
|
+
Augur で細胞型ごとの摂動応答性をスコアリング。
|
|
121
|
+
|
|
122
|
+
Parameters:
|
|
123
|
+
adata: AnnData — 摂動データ
|
|
124
|
+
perturbation_key: str — 摂動ラベル
|
|
125
|
+
cell_type_key: str — 細胞型ラベル
|
|
126
|
+
control: str — コントロールラベル
|
|
127
|
+
|
|
128
|
+
K-Dense: augur (via pertpy)
|
|
129
|
+
"""
|
|
130
|
+
ag = pt.tl.Augur(estimator="random_forest_classifier")
|
|
131
|
+
|
|
132
|
+
# 摂動 vs コントロールで各細胞型のAUC計算
|
|
133
|
+
adata_augur, results = ag.predict(
|
|
134
|
+
adata,
|
|
135
|
+
condition_key=perturbation_key,
|
|
136
|
+
cell_type_key=cell_type_key,
|
|
137
|
+
control_label=control,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# 結果をDataFrameに
|
|
141
|
+
auc_df = results["summary_metrics"]
|
|
142
|
+
auc_df = auc_df.sort_values("auc", ascending=False)
|
|
143
|
+
|
|
144
|
+
print(f"Augur prioritization:")
|
|
145
|
+
for _, row in auc_df.head(5).iterrows():
|
|
146
|
+
print(f" {row['cell_type']}: AUC={row['auc']:.3f}")
|
|
147
|
+
|
|
148
|
+
return auc_df
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## 4. scGen 摂動予測
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
def scgen_perturbation_prediction(adata, perturbation_key="perturbation",
|
|
155
|
+
cell_type_key="cell_type",
|
|
156
|
+
control="control", target_perturbation=None,
|
|
157
|
+
target_cell_type=None):
|
|
158
|
+
"""
|
|
159
|
+
scGen による摂動効果の in silico 予測。
|
|
160
|
+
|
|
161
|
+
Parameters:
|
|
162
|
+
adata: AnnData — 訓練データ
|
|
163
|
+
target_perturbation: str — 予測対象の摂動
|
|
164
|
+
target_cell_type: str — 予測対象の細胞型
|
|
165
|
+
"""
|
|
166
|
+
import scgen
|
|
167
|
+
|
|
168
|
+
# モデル訓練
|
|
169
|
+
scg = scgen.SCGEN(adata)
|
|
170
|
+
scg.train(max_epochs=100, batch_size=32)
|
|
171
|
+
|
|
172
|
+
# 予測
|
|
173
|
+
pred, delta = scg.predict(
|
|
174
|
+
ctrl_key=control,
|
|
175
|
+
stim_key=target_perturbation,
|
|
176
|
+
celltype_to_predict=target_cell_type,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
print(f"scGen prediction: {target_cell_type} under {target_perturbation}")
|
|
180
|
+
print(f" Predicted cells: {pred.shape[0]}")
|
|
181
|
+
return pred, delta
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## 5. scIB 統合ベンチマーク
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
def benchmark_integration(adata, batch_key="batch", label_key="cell_type",
|
|
188
|
+
methods=None):
|
|
189
|
+
"""
|
|
190
|
+
scIB でバッチ統合手法をベンチマーク。
|
|
191
|
+
|
|
192
|
+
Parameters:
|
|
193
|
+
adata: AnnData — バッチ混在データ
|
|
194
|
+
batch_key: str — バッチラベル
|
|
195
|
+
label_key: str — 細胞型ラベル
|
|
196
|
+
methods: list — 評価するメトリクス
|
|
197
|
+
|
|
198
|
+
K-Dense: scib
|
|
199
|
+
"""
|
|
200
|
+
import scib
|
|
201
|
+
|
|
202
|
+
if methods is None:
|
|
203
|
+
methods = ["scib"]
|
|
204
|
+
|
|
205
|
+
# 基本メトリクス
|
|
206
|
+
metrics = {}
|
|
207
|
+
|
|
208
|
+
# batch correction metrics
|
|
209
|
+
metrics["batch_kbet"] = scib.me.kBET(
|
|
210
|
+
adata, batch_key=batch_key, label_key=label_key
|
|
211
|
+
)
|
|
212
|
+
metrics["batch_silhouette"] = scib.me.silhouette_batch(
|
|
213
|
+
adata, batch_key=batch_key, label_key=label_key, embed="X_pca"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# bio conservation metrics
|
|
217
|
+
metrics["bio_nmi"] = scib.me.nmi(adata, label_key, "leiden")
|
|
218
|
+
metrics["bio_ari"] = scib.me.ari(adata, label_key, "leiden")
|
|
219
|
+
metrics["bio_silhouette"] = scib.me.silhouette(
|
|
220
|
+
adata, label_key=label_key, embed="X_pca"
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# 総合スコア
|
|
224
|
+
metrics["overall"] = 0.6 * np.mean([
|
|
225
|
+
metrics["bio_nmi"], metrics["bio_ari"], metrics["bio_silhouette"]
|
|
226
|
+
]) + 0.4 * np.mean([
|
|
227
|
+
metrics["batch_kbet"], metrics["batch_silhouette"]
|
|
228
|
+
])
|
|
229
|
+
|
|
230
|
+
print(f"scIB benchmark:")
|
|
231
|
+
for k, v in metrics.items():
|
|
232
|
+
print(f" {k}: {v:.4f}")
|
|
233
|
+
return metrics
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## 6. 摂動シグネチャ解析
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
def perturbation_signature(adata, perturbation_key="perturbation",
|
|
240
|
+
control="control", n_top_genes=50):
|
|
241
|
+
"""
|
|
242
|
+
摂動特異的遺伝子シグネチャ抽出。
|
|
243
|
+
|
|
244
|
+
Parameters:
|
|
245
|
+
adata: AnnData — 摂動データ
|
|
246
|
+
perturbation_key: str — 摂動ラベル
|
|
247
|
+
control: str — コントロールラベル
|
|
248
|
+
n_top_genes: int — トップ遺伝子数
|
|
249
|
+
"""
|
|
250
|
+
perturbations = [p for p in adata.obs[perturbation_key].unique()
|
|
251
|
+
if p != control]
|
|
252
|
+
|
|
253
|
+
signatures = {}
|
|
254
|
+
ctrl_mean = adata[adata.obs[perturbation_key] == control].X.mean(axis=0)
|
|
255
|
+
ctrl_mean = np.asarray(ctrl_mean).flatten()
|
|
256
|
+
|
|
257
|
+
for pert in perturbations:
|
|
258
|
+
pert_mask = adata.obs[perturbation_key] == pert
|
|
259
|
+
pert_mean = adata[pert_mask].X.mean(axis=0)
|
|
260
|
+
pert_mean = np.asarray(pert_mean).flatten()
|
|
261
|
+
|
|
262
|
+
delta = pert_mean - ctrl_mean
|
|
263
|
+
gene_indices = np.argsort(np.abs(delta))[::-1][:n_top_genes]
|
|
264
|
+
|
|
265
|
+
signatures[pert] = {
|
|
266
|
+
"top_genes": adata.var_names[gene_indices].tolist(),
|
|
267
|
+
"deltas": delta[gene_indices].tolist(),
|
|
268
|
+
"n_cells": int(pert_mask.sum()),
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
print(f"Signatures extracted: {len(signatures)} perturbations, "
|
|
272
|
+
f"{n_top_genes} genes each")
|
|
273
|
+
return signatures
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## パイプライン統合
|
|
279
|
+
|
|
280
|
+
```
|
|
281
|
+
single-cell-genomics → perturbation-analysis → pathway-enrichment
|
|
282
|
+
(scRNA-seq QC) (摂動 DE/Augur/scGen) (KEGG/Reactome)
|
|
283
|
+
│ │ ↓
|
|
284
|
+
spatial-transcriptomics ──┘ │ disease-research
|
|
285
|
+
(Visium/MERFISH) ↓ (GWAS/DisGeNET)
|
|
286
|
+
drug-target-profiling
|
|
287
|
+
(標的候補評価)
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## パイプライン出力
|
|
291
|
+
|
|
292
|
+
| ファイル | 説明 | 次スキル |
|
|
293
|
+
|---------|------|---------|
|
|
294
|
+
| `results/perturbation_de.json` | 差次的発現結果 | → pathway-enrichment |
|
|
295
|
+
| `results/augur_scores.csv` | Augur 応答性スコア | → single-cell-genomics |
|
|
296
|
+
| `results/perturbation_signatures.json` | 摂動シグネチャ | → drug-target-profiling |
|
|
297
|
+
| `results/scib_benchmark.json` | 統合ベンチマーク | → spatial-transcriptomics |
|