@nahisaho/satori 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +134 -43
- package/package.json +1 -1
- package/src/.github/skills/scientific-advanced-imaging/SKILL.md +382 -0
- package/src/.github/skills/scientific-biomedical-pubtator/SKILL.md +331 -0
- package/src/.github/skills/scientific-cell-line-resources/SKILL.md +258 -0
- package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +509 -0
- package/src/.github/skills/scientific-deep-chemistry/SKILL.md +350 -0
- package/src/.github/skills/scientific-ebi-databases/SKILL.md +280 -0
- package/src/.github/skills/scientific-ensembl-genomics/SKILL.md +378 -0
- package/src/.github/skills/scientific-expression-comparison/SKILL.md +303 -0
- package/src/.github/skills/scientific-md-simulation/SKILL.md +315 -0
- package/src/.github/skills/scientific-model-organism-db/SKILL.md +329 -0
- package/src/.github/skills/scientific-ontology-enrichment/SKILL.md +340 -0
- package/src/.github/skills/scientific-perturbation-analysis/SKILL.md +297 -0
- package/src/.github/skills/scientific-phylogenetics/SKILL.md +297 -0
- package/src/.github/skills/scientific-preprint-archive/SKILL.md +476 -0
- package/src/.github/skills/scientific-public-health-data/SKILL.md +322 -0
- package/src/.github/skills/scientific-regulatory-genomics/SKILL.md +274 -0
- package/src/.github/skills/scientific-reinforcement-learning/SKILL.md +280 -0
- package/src/.github/skills/scientific-scvi-integration/SKILL.md +344 -0
- package/src/.github/skills/scientific-string-network-api/SKILL.md +376 -0
- package/src/.github/skills/scientific-symbolic-mathematics/SKILL.md +277 -0
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-ontology-enrichment
|
|
3
|
+
description: |
|
|
4
|
+
オントロジー・エンリッチメント解析スキル。EFO 実験ファクターオントロジー、
|
|
5
|
+
OLS オントロジー検索サービス、Enrichr 遺伝子セット濃縮解析、
|
|
6
|
+
UMLS メタシソーラス統一医学言語体系の統合パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Ontology Enrichment
|
|
10
|
+
|
|
11
|
+
EFO / OLS / Enrichr / UMLS を統合した
|
|
12
|
+
オントロジー検索・エンリッチメント解析パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- EFO で実験条件 (疾患・細胞型・組織) のオントロジー ID を取得するとき
|
|
17
|
+
- OLS で複数オントロジー横断検索 (HP, MONDO, DOID, GO, CHEBI) するとき
|
|
18
|
+
- Enrichr で遺伝子リストの濃縮解析を行うとき
|
|
19
|
+
- UMLS CUI で異なる用語体系間のマッピングを行うとき
|
|
20
|
+
- GWAS Catalog の trait を EFO 用語で標準化するとき
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
## 1. EFO 実験ファクターオントロジー
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import requests
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
OLS_API = "https://www.ebi.ac.uk/ols4/api"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def search_efo(query, exact=False):
|
|
36
|
+
"""
|
|
37
|
+
EFO (Experimental Factor Ontology) 検索。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
query: str — 検索語 (疾患名、細胞型、組織名等)
|
|
41
|
+
exact: bool — 完全一致検索
|
|
42
|
+
|
|
43
|
+
ToolUniverse:
|
|
44
|
+
EFO_search(query=query, exact=exact)
|
|
45
|
+
"""
|
|
46
|
+
params = {
|
|
47
|
+
"q": query,
|
|
48
|
+
"ontology": "efo",
|
|
49
|
+
"exact": str(exact).lower(),
|
|
50
|
+
"rows": 30,
|
|
51
|
+
}
|
|
52
|
+
resp = requests.get(f"{OLS_API}/search", params=params)
|
|
53
|
+
resp.raise_for_status()
|
|
54
|
+
data = resp.json()
|
|
55
|
+
|
|
56
|
+
results = []
|
|
57
|
+
for doc in data.get("response", {}).get("docs", []):
|
|
58
|
+
results.append({
|
|
59
|
+
"efo_id": doc.get("obo_id", ""),
|
|
60
|
+
"label": doc.get("label", ""),
|
|
61
|
+
"description": (doc.get("description") or [""])[0][:200],
|
|
62
|
+
"iri": doc.get("iri", ""),
|
|
63
|
+
"ontology": doc.get("ontology_name", ""),
|
|
64
|
+
"is_defining_ontology": doc.get("is_defining_ontology", False),
|
|
65
|
+
"synonyms": doc.get("synonym", []),
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
df = pd.DataFrame(results)
|
|
69
|
+
print(f"EFO search '{query}': {len(df)} terms")
|
|
70
|
+
return df
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## 2. OLS マルチオントロジー検索
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
def search_ols(query, ontologies=None, type_filter=None):
|
|
77
|
+
"""
|
|
78
|
+
OLS (Ontology Lookup Service) マルチオントロジー横断検索。
|
|
79
|
+
|
|
80
|
+
Parameters:
|
|
81
|
+
query: str — 検索語
|
|
82
|
+
ontologies: list — オントロジー ID リスト (e.g., ["hp", "mondo", "go"])
|
|
83
|
+
type_filter: str — "class", "property", "individual"
|
|
84
|
+
|
|
85
|
+
ToolUniverse:
|
|
86
|
+
OLS_search(query=query, ontology=ontology)
|
|
87
|
+
OLS_get_term(ontology=ontology, iri=iri)
|
|
88
|
+
OLS_get_ancestors(ontology=ontology, iri=iri)
|
|
89
|
+
"""
|
|
90
|
+
params = {"q": query, "rows": 50}
|
|
91
|
+
if ontologies:
|
|
92
|
+
params["ontology"] = ",".join(ontologies)
|
|
93
|
+
if type_filter:
|
|
94
|
+
params["type"] = type_filter
|
|
95
|
+
|
|
96
|
+
resp = requests.get(f"{OLS_API}/search", params=params)
|
|
97
|
+
resp.raise_for_status()
|
|
98
|
+
data = resp.json()
|
|
99
|
+
|
|
100
|
+
results = []
|
|
101
|
+
for doc in data.get("response", {}).get("docs", []):
|
|
102
|
+
results.append({
|
|
103
|
+
"obo_id": doc.get("obo_id", ""),
|
|
104
|
+
"label": doc.get("label", ""),
|
|
105
|
+
"ontology": doc.get("ontology_name", ""),
|
|
106
|
+
"description": (doc.get("description") or [""])[0][:200],
|
|
107
|
+
"iri": doc.get("iri", ""),
|
|
108
|
+
"synonyms": doc.get("synonym", []),
|
|
109
|
+
"has_children": doc.get("has_children", False),
|
|
110
|
+
})
|
|
111
|
+
|
|
112
|
+
df = pd.DataFrame(results)
|
|
113
|
+
print(f"OLS search '{query}' "
|
|
114
|
+
f"[{','.join(ontologies) if ontologies else 'all'}]: "
|
|
115
|
+
f"{len(df)} terms")
|
|
116
|
+
return df
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_ols_term_hierarchy(ontology, term_id):
|
|
120
|
+
"""
|
|
121
|
+
OLS 用語の階層構造 (ancestors/descendants) 取得。
|
|
122
|
+
|
|
123
|
+
Parameters:
|
|
124
|
+
ontology: str — オントロジー ID (e.g., "hp", "go")
|
|
125
|
+
term_id: str — OBO ID (e.g., "HP:0001250")
|
|
126
|
+
"""
|
|
127
|
+
iri = f"http://purl.obolibrary.org/obo/{term_id.replace(':', '_')}"
|
|
128
|
+
encoded_iri = requests.utils.quote(requests.utils.quote(iri, safe=""), safe="")
|
|
129
|
+
|
|
130
|
+
# Ancestors
|
|
131
|
+
anc_resp = requests.get(
|
|
132
|
+
f"{OLS_API}/ontologies/{ontology}/terms/{encoded_iri}/ancestors"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Descendants
|
|
136
|
+
desc_resp = requests.get(
|
|
137
|
+
f"{OLS_API}/ontologies/{ontology}/terms/{encoded_iri}/descendants"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
hierarchy = {"ancestors": [], "descendants": []}
|
|
141
|
+
|
|
142
|
+
if anc_resp.status_code == 200:
|
|
143
|
+
for t in anc_resp.json().get("_embedded", {}).get("terms", []):
|
|
144
|
+
hierarchy["ancestors"].append({
|
|
145
|
+
"id": t.get("obo_id", ""),
|
|
146
|
+
"label": t.get("label", ""),
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
if desc_resp.status_code == 200:
|
|
150
|
+
for t in desc_resp.json().get("_embedded", {}).get("terms", []):
|
|
151
|
+
hierarchy["descendants"].append({
|
|
152
|
+
"id": t.get("obo_id", ""),
|
|
153
|
+
"label": t.get("label", ""),
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
print(f"OLS hierarchy {term_id}: "
|
|
157
|
+
f"{len(hierarchy['ancestors'])} ancestors, "
|
|
158
|
+
f"{len(hierarchy['descendants'])} descendants")
|
|
159
|
+
return hierarchy
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## 3. Enrichr 遺伝子セット濃縮解析
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
ENRICHR_API = "https://maayanlab.cloud/Enrichr"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def run_enrichr(gene_list, description="", gene_set_libraries=None):
|
|
169
|
+
"""
|
|
170
|
+
Enrichr 遺伝子リスト濃縮解析。
|
|
171
|
+
|
|
172
|
+
Parameters:
|
|
173
|
+
gene_list: list — 遺伝子シンボルリスト (e.g., ["TP53", "BRCA1", "EGFR"])
|
|
174
|
+
description: str — 解析の説明
|
|
175
|
+
gene_set_libraries: list — 使用する遺伝子セットライブラリ
|
|
176
|
+
|
|
177
|
+
ToolUniverse:
|
|
178
|
+
Enrichr_submit_gene_list(genes=gene_list)
|
|
179
|
+
Enrichr_get_enrichment(user_list_id=id, library=library)
|
|
180
|
+
"""
|
|
181
|
+
if gene_set_libraries is None:
|
|
182
|
+
gene_set_libraries = [
|
|
183
|
+
"GO_Biological_Process_2023",
|
|
184
|
+
"GO_Molecular_Function_2023",
|
|
185
|
+
"KEGG_2021_Human",
|
|
186
|
+
"Reactome_2022",
|
|
187
|
+
"WikiPathway_2023_Human",
|
|
188
|
+
"DisGeNET",
|
|
189
|
+
]
|
|
190
|
+
|
|
191
|
+
# Submit gene list
|
|
192
|
+
genes_str = "\n".join(gene_list)
|
|
193
|
+
submit_resp = requests.post(
|
|
194
|
+
f"{ENRICHR_API}/addList",
|
|
195
|
+
files={"list": (None, genes_str), "description": (None, description)},
|
|
196
|
+
)
|
|
197
|
+
submit_resp.raise_for_status()
|
|
198
|
+
user_list_id = submit_resp.json().get("userListId")
|
|
199
|
+
print(f"Enrichr: submitted {len(gene_list)} genes (ID={user_list_id})")
|
|
200
|
+
|
|
201
|
+
# Get enrichment results per library
|
|
202
|
+
all_results = {}
|
|
203
|
+
for library in gene_set_libraries:
|
|
204
|
+
enrich_resp = requests.get(
|
|
205
|
+
f"{ENRICHR_API}/enrich",
|
|
206
|
+
params={"userListId": user_list_id, "backgroundType": library},
|
|
207
|
+
)
|
|
208
|
+
enrich_resp.raise_for_status()
|
|
209
|
+
data = enrich_resp.json()
|
|
210
|
+
|
|
211
|
+
results = []
|
|
212
|
+
for term_data in data.get(library, []):
|
|
213
|
+
results.append({
|
|
214
|
+
"rank": term_data[0],
|
|
215
|
+
"term": term_data[1],
|
|
216
|
+
"p_value": term_data[2],
|
|
217
|
+
"z_score": term_data[3],
|
|
218
|
+
"combined_score": term_data[4],
|
|
219
|
+
"overlap_genes": term_data[5],
|
|
220
|
+
"adjusted_p": term_data[6],
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
df = pd.DataFrame(results)
|
|
224
|
+
if not df.empty:
|
|
225
|
+
df = df.sort_values("adjusted_p")
|
|
226
|
+
all_results[library] = df
|
|
227
|
+
sig_count = (df["adjusted_p"] < 0.05).sum() if not df.empty else 0
|
|
228
|
+
print(f" {library}: {sig_count} significant terms (FDR < 0.05)")
|
|
229
|
+
|
|
230
|
+
return all_results
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## 4. UMLS メタシソーラスマッピング
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
UMLS_API = "https://uts-ws.nlm.nih.gov/rest"
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def search_umls(query, api_key, search_type="words"):
|
|
240
|
+
"""
|
|
241
|
+
UMLS メタシソーラス検索。
|
|
242
|
+
|
|
243
|
+
Parameters:
|
|
244
|
+
query: str — 検索語 (疾患名、症状、薬剤名)
|
|
245
|
+
api_key: str — UMLS API キー
|
|
246
|
+
search_type: str — "words", "exact", "leftTruncation"
|
|
247
|
+
|
|
248
|
+
ToolUniverse:
|
|
249
|
+
UMLS_search(query=query, search_type=search_type)
|
|
250
|
+
UMLS_get_concept(cui=cui)
|
|
251
|
+
"""
|
|
252
|
+
params = {
|
|
253
|
+
"string": query,
|
|
254
|
+
"searchType": search_type,
|
|
255
|
+
"apiKey": api_key,
|
|
256
|
+
"pageSize": 25,
|
|
257
|
+
}
|
|
258
|
+
resp = requests.get(f"{UMLS_API}/search/current", params=params)
|
|
259
|
+
resp.raise_for_status()
|
|
260
|
+
data = resp.json()
|
|
261
|
+
|
|
262
|
+
results = []
|
|
263
|
+
for item in data.get("result", {}).get("results", []):
|
|
264
|
+
results.append({
|
|
265
|
+
"cui": item.get("ui", ""),
|
|
266
|
+
"name": item.get("name", ""),
|
|
267
|
+
"root_source": item.get("rootSource", ""),
|
|
268
|
+
"uri": item.get("uri", ""),
|
|
269
|
+
})
|
|
270
|
+
|
|
271
|
+
df = pd.DataFrame(results)
|
|
272
|
+
print(f"UMLS search '{query}': {len(df)} concepts")
|
|
273
|
+
return df
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def get_umls_crosswalk(cui, api_key, target_source=None):
|
|
277
|
+
"""
|
|
278
|
+
UMLS CUI からの用語体系間マッピング。
|
|
279
|
+
|
|
280
|
+
Parameters:
|
|
281
|
+
cui: str — UMLS CUI (e.g., "C0023264")
|
|
282
|
+
api_key: str — UMLS API キー
|
|
283
|
+
target_source: str — ターゲット用語体系 (e.g., "SNOMEDCT_US", "ICD10CM", "MeSH")
|
|
284
|
+
"""
|
|
285
|
+
params = {"apiKey": api_key, "pageSize": 100}
|
|
286
|
+
if target_source:
|
|
287
|
+
params["sabs"] = target_source
|
|
288
|
+
|
|
289
|
+
resp = requests.get(f"{UMLS_API}/content/current/CUI/{cui}/atoms", params=params)
|
|
290
|
+
resp.raise_for_status()
|
|
291
|
+
data = resp.json()
|
|
292
|
+
|
|
293
|
+
mappings = []
|
|
294
|
+
for atom in data.get("result", []):
|
|
295
|
+
mappings.append({
|
|
296
|
+
"source": atom.get("rootSource", ""),
|
|
297
|
+
"code": atom.get("sourceConcept", ""),
|
|
298
|
+
"name": atom.get("name", ""),
|
|
299
|
+
"term_type": atom.get("termType", ""),
|
|
300
|
+
})
|
|
301
|
+
|
|
302
|
+
df = pd.DataFrame(mappings)
|
|
303
|
+
if target_source:
|
|
304
|
+
df = df[df["source"] == target_source]
|
|
305
|
+
|
|
306
|
+
print(f"UMLS crosswalk {cui}: {len(df)} mappings "
|
|
307
|
+
f"({target_source or 'all sources'})")
|
|
308
|
+
return df
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
---
|
|
312
|
+
|
|
313
|
+
## 利用可能ツール
|
|
314
|
+
|
|
315
|
+
| ToolUniverse カテゴリ | 主なツール |
|
|
316
|
+
|---|---|
|
|
317
|
+
| `efo` | `EFO_search` |
|
|
318
|
+
| `ols` | `OLS_search`, `OLS_get_term`, `OLS_get_ancestors` |
|
|
319
|
+
| `enrichr` | `Enrichr_submit_gene_list`, `Enrichr_get_enrichment` |
|
|
320
|
+
| `umls` | `UMLS_search`, `UMLS_get_concept` |
|
|
321
|
+
|
|
322
|
+
## パイプライン出力
|
|
323
|
+
|
|
324
|
+
| 出力ファイル | 説明 | 連携先スキル |
|
|
325
|
+
|---|---|---|
|
|
326
|
+
| `results/efo_terms.csv` | EFO 標準化用語 | → disease-research, gene-expression |
|
|
327
|
+
| `results/enrichr_results/` | 遺伝子セット濃縮結果 | → pathway-enrichment, multi-omics |
|
|
328
|
+
| `results/umls_mapping.json` | UMLS 用語マッピング | → clinical-decision-support, public-health-data |
|
|
329
|
+
| `results/ontology_hierarchy.json` | オントロジー階層 | → text-mining-nlp, knowledge-graph |
|
|
330
|
+
|
|
331
|
+
## パイプライン統合
|
|
332
|
+
|
|
333
|
+
```
|
|
334
|
+
disease-research ──→ ontology-enrichment ──→ pathway-enrichment
|
|
335
|
+
(GWAS/DisGeNET) (EFO/OLS/UMLS/Enrichr) (KEGG/Reactome/GO)
|
|
336
|
+
│
|
|
337
|
+
├──→ biothings-idmapping (CUI→Gene→Protein)
|
|
338
|
+
├──→ public-health-data (UMLS→RxNorm)
|
|
339
|
+
└──→ clinical-reporting (SNOMED/ICD マッピング)
|
|
340
|
+
```
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-perturbation-analysis
|
|
3
|
+
description: |
|
|
4
|
+
シングルセル摂動解析スキル。pertpy による CRISPR スクリーン解析・
|
|
5
|
+
薬剤応答分析・scGen 摂動予測・Augur 摂動応答性スコアリング・
|
|
6
|
+
scIB 統合ベンチマーク・差次的摂動応答パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Perturbation Analysis
|
|
10
|
+
|
|
11
|
+
pertpy / Augur / scIB を活用したシングルセルレベルの摂動解析
|
|
12
|
+
パイプラインを提供する。CRISPR スクリーン、薬剤処理、
|
|
13
|
+
遺伝子ノックダウンなどの摂動データの統合解析。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- CRISPR スクリーンデータ (Perturb-seq) を解析するとき
|
|
18
|
+
- 薬剤処理前後のシングルセル発現変動を評価するとき
|
|
19
|
+
- 摂動応答の細胞型特異性を定量するとき
|
|
20
|
+
- 複数のバッチ統合手法をベンチマークするとき (scIB)
|
|
21
|
+
- 摂動の効果を in silico で予測するとき (scGen)
|
|
22
|
+
- 差次的優先度 (Augur) で摂動応答性の高い細胞型を特定するとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. pertpy セットアップ & データ読込み
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import pertpy as pt
|
|
32
|
+
import scanpy as sc
|
|
33
|
+
import anndata as ad
|
|
34
|
+
import pandas as pd
|
|
35
|
+
import numpy as np
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def load_perturbation_data(adata_path, perturbation_key="perturbation",
|
|
39
|
+
control_label="control"):
|
|
40
|
+
"""
|
|
41
|
+
摂動実験 AnnData 読込み & 前処理。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
adata_path: str — AnnData ファイルパス
|
|
45
|
+
perturbation_key: str — 摂動ラベルカラム
|
|
46
|
+
control_label: str — コントロールラベル
|
|
47
|
+
|
|
48
|
+
K-Dense: pertpy
|
|
49
|
+
"""
|
|
50
|
+
adata = sc.read_h5ad(adata_path)
|
|
51
|
+
|
|
52
|
+
# 基本前処理
|
|
53
|
+
sc.pp.filter_cells(adata, min_genes=200)
|
|
54
|
+
sc.pp.filter_genes(adata, min_cells=3)
|
|
55
|
+
sc.pp.normalize_total(adata, target_sum=1e4)
|
|
56
|
+
sc.pp.log1p(adata)
|
|
57
|
+
|
|
58
|
+
n_perturbations = adata.obs[perturbation_key].nunique()
|
|
59
|
+
n_control = (adata.obs[perturbation_key] == control_label).sum()
|
|
60
|
+
n_perturbed = len(adata) - n_control
|
|
61
|
+
|
|
62
|
+
print(f"Loaded: {len(adata)} cells, {n_perturbations} perturbations")
|
|
63
|
+
print(f"Control: {n_control}, Perturbed: {n_perturbed}")
|
|
64
|
+
return adata
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## 2. 差次的遺伝子発現 (摂動 vs コントロール)
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
def differential_perturbation(adata, perturbation_key="perturbation",
|
|
71
|
+
control="control", target=None):
|
|
72
|
+
"""
|
|
73
|
+
摂動-コントロール間差次的発現解析。
|
|
74
|
+
|
|
75
|
+
Parameters:
|
|
76
|
+
adata: AnnData — 摂動データ
|
|
77
|
+
perturbation_key: str — 摂動ラベル
|
|
78
|
+
control: str — コントロールラベル
|
|
79
|
+
target: str — 比較対象摂動 (None で全摂動)
|
|
80
|
+
"""
|
|
81
|
+
if target:
|
|
82
|
+
mask = adata.obs[perturbation_key].isin([control, target])
|
|
83
|
+
adata_sub = adata[mask].copy()
|
|
84
|
+
else:
|
|
85
|
+
adata_sub = adata.copy()
|
|
86
|
+
|
|
87
|
+
sc.tl.rank_genes_groups(
|
|
88
|
+
adata_sub,
|
|
89
|
+
groupby=perturbation_key,
|
|
90
|
+
reference=control,
|
|
91
|
+
method="wilcoxon",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
results = {}
|
|
95
|
+
for group in adata_sub.obs[perturbation_key].unique():
|
|
96
|
+
if group == control:
|
|
97
|
+
continue
|
|
98
|
+
try:
|
|
99
|
+
degs = sc.get.rank_genes_groups_df(adata_sub, group=group)
|
|
100
|
+
degs_sig = degs[degs["pvals_adj"] < 0.05]
|
|
101
|
+
results[group] = {
|
|
102
|
+
"n_degs": len(degs_sig),
|
|
103
|
+
"n_up": (degs_sig["logfoldchanges"] > 0).sum(),
|
|
104
|
+
"n_down": (degs_sig["logfoldchanges"] < 0).sum(),
|
|
105
|
+
"top_genes": degs_sig.head(10)["names"].tolist(),
|
|
106
|
+
}
|
|
107
|
+
except Exception:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
print(f"DE results: {len(results)} perturbations analyzed")
|
|
111
|
+
return results
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## 3. Augur 摂動応答性スコアリング
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
def augur_prioritization(adata, perturbation_key="perturbation",
|
|
118
|
+
cell_type_key="cell_type", control="control"):
|
|
119
|
+
"""
|
|
120
|
+
Augur で細胞型ごとの摂動応答性をスコアリング。
|
|
121
|
+
|
|
122
|
+
Parameters:
|
|
123
|
+
adata: AnnData — 摂動データ
|
|
124
|
+
perturbation_key: str — 摂動ラベル
|
|
125
|
+
cell_type_key: str — 細胞型ラベル
|
|
126
|
+
control: str — コントロールラベル
|
|
127
|
+
|
|
128
|
+
K-Dense: augur (via pertpy)
|
|
129
|
+
"""
|
|
130
|
+
ag = pt.tl.Augur(estimator="random_forest_classifier")
|
|
131
|
+
|
|
132
|
+
# 摂動 vs コントロールで各細胞型のAUC計算
|
|
133
|
+
adata_augur, results = ag.predict(
|
|
134
|
+
adata,
|
|
135
|
+
condition_key=perturbation_key,
|
|
136
|
+
cell_type_key=cell_type_key,
|
|
137
|
+
control_label=control,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# 結果をDataFrameに
|
|
141
|
+
auc_df = results["summary_metrics"]
|
|
142
|
+
auc_df = auc_df.sort_values("auc", ascending=False)
|
|
143
|
+
|
|
144
|
+
print(f"Augur prioritization:")
|
|
145
|
+
for _, row in auc_df.head(5).iterrows():
|
|
146
|
+
print(f" {row['cell_type']}: AUC={row['auc']:.3f}")
|
|
147
|
+
|
|
148
|
+
return auc_df
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## 4. scGen 摂動予測
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
def scgen_perturbation_prediction(adata, perturbation_key="perturbation",
|
|
155
|
+
cell_type_key="cell_type",
|
|
156
|
+
control="control", target_perturbation=None,
|
|
157
|
+
target_cell_type=None):
|
|
158
|
+
"""
|
|
159
|
+
scGen による摂動効果の in silico 予測。
|
|
160
|
+
|
|
161
|
+
Parameters:
|
|
162
|
+
adata: AnnData — 訓練データ
|
|
163
|
+
target_perturbation: str — 予測対象の摂動
|
|
164
|
+
target_cell_type: str — 予測対象の細胞型
|
|
165
|
+
"""
|
|
166
|
+
import scgen
|
|
167
|
+
|
|
168
|
+
# モデル訓練
|
|
169
|
+
scg = scgen.SCGEN(adata)
|
|
170
|
+
scg.train(max_epochs=100, batch_size=32)
|
|
171
|
+
|
|
172
|
+
# 予測
|
|
173
|
+
pred, delta = scg.predict(
|
|
174
|
+
ctrl_key=control,
|
|
175
|
+
stim_key=target_perturbation,
|
|
176
|
+
celltype_to_predict=target_cell_type,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
print(f"scGen prediction: {target_cell_type} under {target_perturbation}")
|
|
180
|
+
print(f" Predicted cells: {pred.shape[0]}")
|
|
181
|
+
return pred, delta
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## 5. scIB 統合ベンチマーク
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
def benchmark_integration(adata, batch_key="batch", label_key="cell_type",
|
|
188
|
+
methods=None):
|
|
189
|
+
"""
|
|
190
|
+
scIB でバッチ統合手法をベンチマーク。
|
|
191
|
+
|
|
192
|
+
Parameters:
|
|
193
|
+
adata: AnnData — バッチ混在データ
|
|
194
|
+
batch_key: str — バッチラベル
|
|
195
|
+
label_key: str — 細胞型ラベル
|
|
196
|
+
methods: list — 評価するメトリクス
|
|
197
|
+
|
|
198
|
+
K-Dense: scib
|
|
199
|
+
"""
|
|
200
|
+
import scib
|
|
201
|
+
|
|
202
|
+
if methods is None:
|
|
203
|
+
methods = ["scib"]
|
|
204
|
+
|
|
205
|
+
# 基本メトリクス
|
|
206
|
+
metrics = {}
|
|
207
|
+
|
|
208
|
+
# batch correction metrics
|
|
209
|
+
metrics["batch_kbet"] = scib.me.kBET(
|
|
210
|
+
adata, batch_key=batch_key, label_key=label_key
|
|
211
|
+
)
|
|
212
|
+
metrics["batch_silhouette"] = scib.me.silhouette_batch(
|
|
213
|
+
adata, batch_key=batch_key, label_key=label_key, embed="X_pca"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# bio conservation metrics
|
|
217
|
+
metrics["bio_nmi"] = scib.me.nmi(adata, label_key, "leiden")
|
|
218
|
+
metrics["bio_ari"] = scib.me.ari(adata, label_key, "leiden")
|
|
219
|
+
metrics["bio_silhouette"] = scib.me.silhouette(
|
|
220
|
+
adata, label_key=label_key, embed="X_pca"
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# 総合スコア
|
|
224
|
+
metrics["overall"] = 0.6 * np.mean([
|
|
225
|
+
metrics["bio_nmi"], metrics["bio_ari"], metrics["bio_silhouette"]
|
|
226
|
+
]) + 0.4 * np.mean([
|
|
227
|
+
metrics["batch_kbet"], metrics["batch_silhouette"]
|
|
228
|
+
])
|
|
229
|
+
|
|
230
|
+
print(f"scIB benchmark:")
|
|
231
|
+
for k, v in metrics.items():
|
|
232
|
+
print(f" {k}: {v:.4f}")
|
|
233
|
+
return metrics
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## 6. 摂動シグネチャ解析
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
def perturbation_signature(adata, perturbation_key="perturbation",
|
|
240
|
+
control="control", n_top_genes=50):
|
|
241
|
+
"""
|
|
242
|
+
摂動特異的遺伝子シグネチャ抽出。
|
|
243
|
+
|
|
244
|
+
Parameters:
|
|
245
|
+
adata: AnnData — 摂動データ
|
|
246
|
+
perturbation_key: str — 摂動ラベル
|
|
247
|
+
control: str — コントロールラベル
|
|
248
|
+
n_top_genes: int — トップ遺伝子数
|
|
249
|
+
"""
|
|
250
|
+
perturbations = [p for p in adata.obs[perturbation_key].unique()
|
|
251
|
+
if p != control]
|
|
252
|
+
|
|
253
|
+
signatures = {}
|
|
254
|
+
ctrl_mean = adata[adata.obs[perturbation_key] == control].X.mean(axis=0)
|
|
255
|
+
ctrl_mean = np.asarray(ctrl_mean).flatten()
|
|
256
|
+
|
|
257
|
+
for pert in perturbations:
|
|
258
|
+
pert_mask = adata.obs[perturbation_key] == pert
|
|
259
|
+
pert_mean = adata[pert_mask].X.mean(axis=0)
|
|
260
|
+
pert_mean = np.asarray(pert_mean).flatten()
|
|
261
|
+
|
|
262
|
+
delta = pert_mean - ctrl_mean
|
|
263
|
+
gene_indices = np.argsort(np.abs(delta))[::-1][:n_top_genes]
|
|
264
|
+
|
|
265
|
+
signatures[pert] = {
|
|
266
|
+
"top_genes": adata.var_names[gene_indices].tolist(),
|
|
267
|
+
"deltas": delta[gene_indices].tolist(),
|
|
268
|
+
"n_cells": int(pert_mask.sum()),
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
print(f"Signatures extracted: {len(signatures)} perturbations, "
|
|
272
|
+
f"{n_top_genes} genes each")
|
|
273
|
+
return signatures
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## パイプライン統合
|
|
279
|
+
|
|
280
|
+
```
|
|
281
|
+
single-cell-genomics → perturbation-analysis → pathway-enrichment
|
|
282
|
+
(scRNA-seq QC) (摂動 DE/Augur/scGen) (KEGG/Reactome)
|
|
283
|
+
│ │ ↓
|
|
284
|
+
spatial-transcriptomics ──┘ │ disease-research
|
|
285
|
+
(Visium/MERFISH) ↓ (GWAS/DisGeNET)
|
|
286
|
+
drug-target-profiling
|
|
287
|
+
(標的候補評価)
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## パイプライン出力
|
|
291
|
+
|
|
292
|
+
| ファイル | 説明 | 次スキル |
|
|
293
|
+
|---------|------|---------|
|
|
294
|
+
| `results/perturbation_de.json` | 差次的発現結果 | → pathway-enrichment |
|
|
295
|
+
| `results/augur_scores.csv` | Augur 応答性スコア | → single-cell-genomics |
|
|
296
|
+
| `results/perturbation_signatures.json` | 摂動シグネチャ | → drug-target-profiling |
|
|
297
|
+
| `results/scib_benchmark.json` | 統合ベンチマーク | → spatial-transcriptomics |
|