@nahisaho/satori 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/README.md +134 -43
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-advanced-imaging/SKILL.md +382 -0
  4. package/src/.github/skills/scientific-biomedical-pubtator/SKILL.md +331 -0
  5. package/src/.github/skills/scientific-cell-line-resources/SKILL.md +258 -0
  6. package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +509 -0
  7. package/src/.github/skills/scientific-deep-chemistry/SKILL.md +350 -0
  8. package/src/.github/skills/scientific-ebi-databases/SKILL.md +280 -0
  9. package/src/.github/skills/scientific-ensembl-genomics/SKILL.md +378 -0
  10. package/src/.github/skills/scientific-expression-comparison/SKILL.md +303 -0
  11. package/src/.github/skills/scientific-md-simulation/SKILL.md +315 -0
  12. package/src/.github/skills/scientific-model-organism-db/SKILL.md +329 -0
  13. package/src/.github/skills/scientific-ontology-enrichment/SKILL.md +340 -0
  14. package/src/.github/skills/scientific-perturbation-analysis/SKILL.md +297 -0
  15. package/src/.github/skills/scientific-phylogenetics/SKILL.md +297 -0
  16. package/src/.github/skills/scientific-preprint-archive/SKILL.md +476 -0
  17. package/src/.github/skills/scientific-public-health-data/SKILL.md +322 -0
  18. package/src/.github/skills/scientific-regulatory-genomics/SKILL.md +274 -0
  19. package/src/.github/skills/scientific-reinforcement-learning/SKILL.md +280 -0
  20. package/src/.github/skills/scientific-scvi-integration/SKILL.md +344 -0
  21. package/src/.github/skills/scientific-string-network-api/SKILL.md +376 -0
  22. package/src/.github/skills/scientific-symbolic-mathematics/SKILL.md +277 -0
@@ -0,0 +1,340 @@
1
+ ---
2
+ name: scientific-ontology-enrichment
3
+ description: |
4
+ オントロジー・エンリッチメント解析スキル。EFO 実験ファクターオントロジー、
5
+ OLS オントロジー検索サービス、Enrichr 遺伝子セット濃縮解析、
6
+ UMLS メタシソーラス統一医学言語体系の統合パイプライン。
7
+ ---
8
+
9
+ # Scientific Ontology Enrichment
10
+
11
+ EFO / OLS / Enrichr / UMLS を統合した
12
+ オントロジー検索・エンリッチメント解析パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - EFO で実験条件 (疾患・細胞型・組織) のオントロジー ID を取得するとき
17
+ - OLS で複数オントロジー横断検索 (HP, MONDO, DOID, GO, CHEBI) するとき
18
+ - Enrichr で遺伝子リストの濃縮解析を行うとき
19
+ - UMLS CUI で異なる用語体系間のマッピングを行うとき
20
+ - GWAS Catalog の trait を EFO 用語で標準化するとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. EFO 実験ファクターオントロジー
27
+
28
+ ```python
29
+ import requests
30
+ import pandas as pd
31
+
32
+ OLS_API = "https://www.ebi.ac.uk/ols4/api"
33
+
34
+
35
+ def search_efo(query, exact=False):
36
+ """
37
+ EFO (Experimental Factor Ontology) 検索。
38
+
39
+ Parameters:
40
+ query: str — 検索語 (疾患名、細胞型、組織名等)
41
+ exact: bool — 完全一致検索
42
+
43
+ ToolUniverse:
44
+ EFO_search(query=query, exact=exact)
45
+ """
46
+ params = {
47
+ "q": query,
48
+ "ontology": "efo",
49
+ "exact": str(exact).lower(),
50
+ "rows": 30,
51
+ }
52
+ resp = requests.get(f"{OLS_API}/search", params=params)
53
+ resp.raise_for_status()
54
+ data = resp.json()
55
+
56
+ results = []
57
+ for doc in data.get("response", {}).get("docs", []):
58
+ results.append({
59
+ "efo_id": doc.get("obo_id", ""),
60
+ "label": doc.get("label", ""),
61
+ "description": (doc.get("description") or [""])[0][:200],
62
+ "iri": doc.get("iri", ""),
63
+ "ontology": doc.get("ontology_name", ""),
64
+ "is_defining_ontology": doc.get("is_defining_ontology", False),
65
+ "synonyms": doc.get("synonym", []),
66
+ })
67
+
68
+ df = pd.DataFrame(results)
69
+ print(f"EFO search '{query}': {len(df)} terms")
70
+ return df
71
+ ```
72
+
73
+ ## 2. OLS マルチオントロジー検索
74
+
75
+ ```python
76
+ def search_ols(query, ontologies=None, type_filter=None):
77
+ """
78
+ OLS (Ontology Lookup Service) マルチオントロジー横断検索。
79
+
80
+ Parameters:
81
+ query: str — 検索語
82
+ ontologies: list — オントロジー ID リスト (e.g., ["hp", "mondo", "go"])
83
+ type_filter: str — "class", "property", "individual"
84
+
85
+ ToolUniverse:
86
+ OLS_search(query=query, ontology=ontology)
87
+ OLS_get_term(ontology=ontology, iri=iri)
88
+ OLS_get_ancestors(ontology=ontology, iri=iri)
89
+ """
90
+ params = {"q": query, "rows": 50}
91
+ if ontologies:
92
+ params["ontology"] = ",".join(ontologies)
93
+ if type_filter:
94
+ params["type"] = type_filter
95
+
96
+ resp = requests.get(f"{OLS_API}/search", params=params)
97
+ resp.raise_for_status()
98
+ data = resp.json()
99
+
100
+ results = []
101
+ for doc in data.get("response", {}).get("docs", []):
102
+ results.append({
103
+ "obo_id": doc.get("obo_id", ""),
104
+ "label": doc.get("label", ""),
105
+ "ontology": doc.get("ontology_name", ""),
106
+ "description": (doc.get("description") or [""])[0][:200],
107
+ "iri": doc.get("iri", ""),
108
+ "synonyms": doc.get("synonym", []),
109
+ "has_children": doc.get("has_children", False),
110
+ })
111
+
112
+ df = pd.DataFrame(results)
113
+ print(f"OLS search '{query}' "
114
+ f"[{','.join(ontologies) if ontologies else 'all'}]: "
115
+ f"{len(df)} terms")
116
+ return df
117
+
118
+
119
+ def get_ols_term_hierarchy(ontology, term_id):
120
+ """
121
+ OLS 用語の階層構造 (ancestors/descendants) 取得。
122
+
123
+ Parameters:
124
+ ontology: str — オントロジー ID (e.g., "hp", "go")
125
+ term_id: str — OBO ID (e.g., "HP:0001250")
126
+ """
127
+ iri = f"http://purl.obolibrary.org/obo/{term_id.replace(':', '_')}"
128
+ encoded_iri = requests.utils.quote(requests.utils.quote(iri, safe=""), safe="")
129
+
130
+ # Ancestors
131
+ anc_resp = requests.get(
132
+ f"{OLS_API}/ontologies/{ontology}/terms/{encoded_iri}/ancestors"
133
+ )
134
+
135
+ # Descendants
136
+ desc_resp = requests.get(
137
+ f"{OLS_API}/ontologies/{ontology}/terms/{encoded_iri}/descendants"
138
+ )
139
+
140
+ hierarchy = {"ancestors": [], "descendants": []}
141
+
142
+ if anc_resp.status_code == 200:
143
+ for t in anc_resp.json().get("_embedded", {}).get("terms", []):
144
+ hierarchy["ancestors"].append({
145
+ "id": t.get("obo_id", ""),
146
+ "label": t.get("label", ""),
147
+ })
148
+
149
+ if desc_resp.status_code == 200:
150
+ for t in desc_resp.json().get("_embedded", {}).get("terms", []):
151
+ hierarchy["descendants"].append({
152
+ "id": t.get("obo_id", ""),
153
+ "label": t.get("label", ""),
154
+ })
155
+
156
+ print(f"OLS hierarchy {term_id}: "
157
+ f"{len(hierarchy['ancestors'])} ancestors, "
158
+ f"{len(hierarchy['descendants'])} descendants")
159
+ return hierarchy
160
+ ```
161
+
162
+ ## 3. Enrichr 遺伝子セット濃縮解析
163
+
164
+ ```python
165
+ ENRICHR_API = "https://maayanlab.cloud/Enrichr"
166
+
167
+
168
+ def run_enrichr(gene_list, description="", gene_set_libraries=None):
169
+ """
170
+ Enrichr 遺伝子リスト濃縮解析。
171
+
172
+ Parameters:
173
+ gene_list: list — 遺伝子シンボルリスト (e.g., ["TP53", "BRCA1", "EGFR"])
174
+ description: str — 解析の説明
175
+ gene_set_libraries: list — 使用する遺伝子セットライブラリ
176
+
177
+ ToolUniverse:
178
+ Enrichr_submit_gene_list(genes=gene_list)
179
+ Enrichr_get_enrichment(user_list_id=id, library=library)
180
+ """
181
+ if gene_set_libraries is None:
182
+ gene_set_libraries = [
183
+ "GO_Biological_Process_2023",
184
+ "GO_Molecular_Function_2023",
185
+ "KEGG_2021_Human",
186
+ "Reactome_2022",
187
+ "WikiPathway_2023_Human",
188
+ "DisGeNET",
189
+ ]
190
+
191
+ # Submit gene list
192
+ genes_str = "\n".join(gene_list)
193
+ submit_resp = requests.post(
194
+ f"{ENRICHR_API}/addList",
195
+ files={"list": (None, genes_str), "description": (None, description)},
196
+ )
197
+ submit_resp.raise_for_status()
198
+ user_list_id = submit_resp.json().get("userListId")
199
+ print(f"Enrichr: submitted {len(gene_list)} genes (ID={user_list_id})")
200
+
201
+ # Get enrichment results per library
202
+ all_results = {}
203
+ for library in gene_set_libraries:
204
+ enrich_resp = requests.get(
205
+ f"{ENRICHR_API}/enrich",
206
+ params={"userListId": user_list_id, "backgroundType": library},
207
+ )
208
+ enrich_resp.raise_for_status()
209
+ data = enrich_resp.json()
210
+
211
+ results = []
212
+ for term_data in data.get(library, []):
213
+ results.append({
214
+ "rank": term_data[0],
215
+ "term": term_data[1],
216
+ "p_value": term_data[2],
217
+ "z_score": term_data[3],
218
+ "combined_score": term_data[4],
219
+ "overlap_genes": term_data[5],
220
+ "adjusted_p": term_data[6],
221
+ })
222
+
223
+ df = pd.DataFrame(results)
224
+ if not df.empty:
225
+ df = df.sort_values("adjusted_p")
226
+ all_results[library] = df
227
+ sig_count = (df["adjusted_p"] < 0.05).sum() if not df.empty else 0
228
+ print(f" {library}: {sig_count} significant terms (FDR < 0.05)")
229
+
230
+ return all_results
231
+ ```
232
+
233
+ ## 4. UMLS メタシソーラスマッピング
234
+
235
+ ```python
236
+ UMLS_API = "https://uts-ws.nlm.nih.gov/rest"
237
+
238
+
239
+ def search_umls(query, api_key, search_type="words"):
240
+ """
241
+ UMLS メタシソーラス検索。
242
+
243
+ Parameters:
244
+ query: str — 検索語 (疾患名、症状、薬剤名)
245
+ api_key: str — UMLS API キー
246
+ search_type: str — "words", "exact", "leftTruncation"
247
+
248
+ ToolUniverse:
249
+ UMLS_search(query=query, search_type=search_type)
250
+ UMLS_get_concept(cui=cui)
251
+ """
252
+ params = {
253
+ "string": query,
254
+ "searchType": search_type,
255
+ "apiKey": api_key,
256
+ "pageSize": 25,
257
+ }
258
+ resp = requests.get(f"{UMLS_API}/search/current", params=params)
259
+ resp.raise_for_status()
260
+ data = resp.json()
261
+
262
+ results = []
263
+ for item in data.get("result", {}).get("results", []):
264
+ results.append({
265
+ "cui": item.get("ui", ""),
266
+ "name": item.get("name", ""),
267
+ "root_source": item.get("rootSource", ""),
268
+ "uri": item.get("uri", ""),
269
+ })
270
+
271
+ df = pd.DataFrame(results)
272
+ print(f"UMLS search '{query}': {len(df)} concepts")
273
+ return df
274
+
275
+
276
+ def get_umls_crosswalk(cui, api_key, target_source=None):
277
+ """
278
+ UMLS CUI からの用語体系間マッピング。
279
+
280
+ Parameters:
281
+ cui: str — UMLS CUI (e.g., "C0023264")
282
+ api_key: str — UMLS API キー
283
+ target_source: str — ターゲット用語体系 (e.g., "SNOMEDCT_US", "ICD10CM", "MeSH")
284
+ """
285
+ params = {"apiKey": api_key, "pageSize": 100}
286
+ if target_source:
287
+ params["sabs"] = target_source
288
+
289
+ resp = requests.get(f"{UMLS_API}/content/current/CUI/{cui}/atoms", params=params)
290
+ resp.raise_for_status()
291
+ data = resp.json()
292
+
293
+ mappings = []
294
+ for atom in data.get("result", []):
295
+ mappings.append({
296
+ "source": atom.get("rootSource", ""),
297
+ "code": atom.get("sourceConcept", ""),
298
+ "name": atom.get("name", ""),
299
+ "term_type": atom.get("termType", ""),
300
+ })
301
+
302
+ df = pd.DataFrame(mappings)
303
+ if target_source:
304
+ df = df[df["source"] == target_source]
305
+
306
+ print(f"UMLS crosswalk {cui}: {len(df)} mappings "
307
+ f"({target_source or 'all sources'})")
308
+ return df
309
+ ```
310
+
311
+ ---
312
+
313
+ ## 利用可能ツール
314
+
315
+ | ToolUniverse カテゴリ | 主なツール |
316
+ |---|---|
317
+ | `efo` | `EFO_search` |
318
+ | `ols` | `OLS_search`, `OLS_get_term`, `OLS_get_ancestors` |
319
+ | `enrichr` | `Enrichr_submit_gene_list`, `Enrichr_get_enrichment` |
320
+ | `umls` | `UMLS_search`, `UMLS_get_concept` |
321
+
322
+ ## パイプライン出力
323
+
324
+ | 出力ファイル | 説明 | 連携先スキル |
325
+ |---|---|---|
326
+ | `results/efo_terms.csv` | EFO 標準化用語 | → disease-research, gene-expression |
327
+ | `results/enrichr_results/` | 遺伝子セット濃縮結果 | → pathway-enrichment, multi-omics |
328
+ | `results/umls_mapping.json` | UMLS 用語マッピング | → clinical-decision-support, public-health-data |
329
+ | `results/ontology_hierarchy.json` | オントロジー階層 | → text-mining-nlp, knowledge-graph |
330
+
331
+ ## パイプライン統合
332
+
333
+ ```
334
+ disease-research ──→ ontology-enrichment ──→ pathway-enrichment
335
+ (GWAS/DisGeNET) (EFO/OLS/UMLS/Enrichr) (KEGG/Reactome/GO)
336
+
337
+ ├──→ biothings-idmapping (CUI→Gene→Protein)
338
+ ├──→ public-health-data (UMLS→RxNorm)
339
+ └──→ clinical-reporting (SNOMED/ICD マッピング)
340
+ ```
@@ -0,0 +1,297 @@
1
+ ---
2
+ name: scientific-perturbation-analysis
3
+ description: |
4
+ シングルセル摂動解析スキル。pertpy による CRISPR スクリーン解析・
5
+ 薬剤応答分析・scGen 摂動予測・Augur 摂動応答性スコアリング・
6
+ scIB 統合ベンチマーク・差次的摂動応答パイプライン。
7
+ ---
8
+
9
+ # Scientific Perturbation Analysis
10
+
11
+ pertpy / Augur / scIB を活用したシングルセルレベルの摂動解析
12
+ パイプラインを提供する。CRISPR スクリーン、薬剤処理、
13
+ 遺伝子ノックダウンなどの摂動データの統合解析。
14
+
15
+ ## When to Use
16
+
17
+ - CRISPR スクリーンデータ (Perturb-seq) を解析するとき
18
+ - 薬剤処理前後のシングルセル発現変動を評価するとき
19
+ - 摂動応答の細胞型特異性を定量するとき
20
+ - 複数のバッチ統合手法をベンチマークするとき (scIB)
21
+ - 摂動の効果を in silico で予測するとき (scGen)
22
+ - 差次的優先度 (Augur) で摂動応答性の高い細胞型を特定するとき
23
+
24
+ ---
25
+
26
+ ## Quick Start
27
+
28
+ ## 1. pertpy セットアップ & データ読込み
29
+
30
+ ```python
31
+ import pertpy as pt
32
+ import scanpy as sc
33
+ import anndata as ad
34
+ import pandas as pd
35
+ import numpy as np
36
+
37
+
38
+ def load_perturbation_data(adata_path, perturbation_key="perturbation",
39
+ control_label="control"):
40
+ """
41
+ 摂動実験 AnnData 読込み & 前処理。
42
+
43
+ Parameters:
44
+ adata_path: str — AnnData ファイルパス
45
+ perturbation_key: str — 摂動ラベルカラム
46
+ control_label: str — コントロールラベル
47
+
48
+ K-Dense: pertpy
49
+ """
50
+ adata = sc.read_h5ad(adata_path)
51
+
52
+ # 基本前処理
53
+ sc.pp.filter_cells(adata, min_genes=200)
54
+ sc.pp.filter_genes(adata, min_cells=3)
55
+ sc.pp.normalize_total(adata, target_sum=1e4)
56
+ sc.pp.log1p(adata)
57
+
58
+ n_perturbations = adata.obs[perturbation_key].nunique()
59
+ n_control = (adata.obs[perturbation_key] == control_label).sum()
60
+ n_perturbed = len(adata) - n_control
61
+
62
+ print(f"Loaded: {len(adata)} cells, {n_perturbations} perturbations")
63
+ print(f"Control: {n_control}, Perturbed: {n_perturbed}")
64
+ return adata
65
+ ```
66
+
67
+ ## 2. 差次的遺伝子発現 (摂動 vs コントロール)
68
+
69
+ ```python
70
+ def differential_perturbation(adata, perturbation_key="perturbation",
71
+ control="control", target=None):
72
+ """
73
+ 摂動-コントロール間差次的発現解析。
74
+
75
+ Parameters:
76
+ adata: AnnData — 摂動データ
77
+ perturbation_key: str — 摂動ラベル
78
+ control: str — コントロールラベル
79
+ target: str — 比較対象摂動 (None で全摂動)
80
+ """
81
+ if target:
82
+ mask = adata.obs[perturbation_key].isin([control, target])
83
+ adata_sub = adata[mask].copy()
84
+ else:
85
+ adata_sub = adata.copy()
86
+
87
+ sc.tl.rank_genes_groups(
88
+ adata_sub,
89
+ groupby=perturbation_key,
90
+ reference=control,
91
+ method="wilcoxon",
92
+ )
93
+
94
+ results = {}
95
+ for group in adata_sub.obs[perturbation_key].unique():
96
+ if group == control:
97
+ continue
98
+ try:
99
+ degs = sc.get.rank_genes_groups_df(adata_sub, group=group)
100
+ degs_sig = degs[degs["pvals_adj"] < 0.05]
101
+ results[group] = {
102
+ "n_degs": len(degs_sig),
103
+ "n_up": (degs_sig["logfoldchanges"] > 0).sum(),
104
+ "n_down": (degs_sig["logfoldchanges"] < 0).sum(),
105
+ "top_genes": degs_sig.head(10)["names"].tolist(),
106
+ }
107
+ except Exception:
108
+ continue
109
+
110
+ print(f"DE results: {len(results)} perturbations analyzed")
111
+ return results
112
+ ```
113
+
114
+ ## 3. Augur 摂動応答性スコアリング
115
+
116
+ ```python
117
+ def augur_prioritization(adata, perturbation_key="perturbation",
118
+ cell_type_key="cell_type", control="control"):
119
+ """
120
+ Augur で細胞型ごとの摂動応答性をスコアリング。
121
+
122
+ Parameters:
123
+ adata: AnnData — 摂動データ
124
+ perturbation_key: str — 摂動ラベル
125
+ cell_type_key: str — 細胞型ラベル
126
+ control: str — コントロールラベル
127
+
128
+ K-Dense: augur (via pertpy)
129
+ """
130
+ ag = pt.tl.Augur(estimator="random_forest_classifier")
131
+
132
+ # 摂動 vs コントロールで各細胞型のAUC計算
133
+ adata_augur, results = ag.predict(
134
+ adata,
135
+ condition_key=perturbation_key,
136
+ cell_type_key=cell_type_key,
137
+ control_label=control,
138
+ )
139
+
140
+ # 結果をDataFrameに
141
+ auc_df = results["summary_metrics"]
142
+ auc_df = auc_df.sort_values("auc", ascending=False)
143
+
144
+ print(f"Augur prioritization:")
145
+ for _, row in auc_df.head(5).iterrows():
146
+ print(f" {row['cell_type']}: AUC={row['auc']:.3f}")
147
+
148
+ return auc_df
149
+ ```
150
+
151
+ ## 4. scGen 摂動予測
152
+
153
+ ```python
154
+ def scgen_perturbation_prediction(adata, perturbation_key="perturbation",
155
+ cell_type_key="cell_type",
156
+ control="control", target_perturbation=None,
157
+ target_cell_type=None):
158
+ """
159
+ scGen による摂動効果の in silico 予測。
160
+
161
+ Parameters:
162
+ adata: AnnData — 訓練データ
163
+ target_perturbation: str — 予測対象の摂動
164
+ target_cell_type: str — 予測対象の細胞型
165
+ """
166
+ import scgen
167
+
168
+ # モデル訓練
169
+ scg = scgen.SCGEN(adata)
170
+ scg.train(max_epochs=100, batch_size=32)
171
+
172
+ # 予測
173
+ pred, delta = scg.predict(
174
+ ctrl_key=control,
175
+ stim_key=target_perturbation,
176
+ celltype_to_predict=target_cell_type,
177
+ )
178
+
179
+ print(f"scGen prediction: {target_cell_type} under {target_perturbation}")
180
+ print(f" Predicted cells: {pred.shape[0]}")
181
+ return pred, delta
182
+ ```
183
+
184
+ ## 5. scIB 統合ベンチマーク
185
+
186
+ ```python
187
+ def benchmark_integration(adata, batch_key="batch", label_key="cell_type",
188
+ methods=None):
189
+ """
190
+ scIB でバッチ統合手法をベンチマーク。
191
+
192
+ Parameters:
193
+ adata: AnnData — バッチ混在データ
194
+ batch_key: str — バッチラベル
195
+ label_key: str — 細胞型ラベル
196
+ methods: list — 評価するメトリクス
197
+
198
+ K-Dense: scib
199
+ """
200
+ import scib
201
+
202
+ if methods is None:
203
+ methods = ["scib"]
204
+
205
+ # 基本メトリクス
206
+ metrics = {}
207
+
208
+ # batch correction metrics
209
+ metrics["batch_kbet"] = scib.me.kBET(
210
+ adata, batch_key=batch_key, label_key=label_key
211
+ )
212
+ metrics["batch_silhouette"] = scib.me.silhouette_batch(
213
+ adata, batch_key=batch_key, label_key=label_key, embed="X_pca"
214
+ )
215
+
216
+ # bio conservation metrics
217
+ metrics["bio_nmi"] = scib.me.nmi(adata, label_key, "leiden")
218
+ metrics["bio_ari"] = scib.me.ari(adata, label_key, "leiden")
219
+ metrics["bio_silhouette"] = scib.me.silhouette(
220
+ adata, label_key=label_key, embed="X_pca"
221
+ )
222
+
223
+ # 総合スコア
224
+ metrics["overall"] = 0.6 * np.mean([
225
+ metrics["bio_nmi"], metrics["bio_ari"], metrics["bio_silhouette"]
226
+ ]) + 0.4 * np.mean([
227
+ metrics["batch_kbet"], metrics["batch_silhouette"]
228
+ ])
229
+
230
+ print(f"scIB benchmark:")
231
+ for k, v in metrics.items():
232
+ print(f" {k}: {v:.4f}")
233
+ return metrics
234
+ ```
235
+
236
+ ## 6. 摂動シグネチャ解析
237
+
238
+ ```python
239
+ def perturbation_signature(adata, perturbation_key="perturbation",
240
+ control="control", n_top_genes=50):
241
+ """
242
+ 摂動特異的遺伝子シグネチャ抽出。
243
+
244
+ Parameters:
245
+ adata: AnnData — 摂動データ
246
+ perturbation_key: str — 摂動ラベル
247
+ control: str — コントロールラベル
248
+ n_top_genes: int — トップ遺伝子数
249
+ """
250
+ perturbations = [p for p in adata.obs[perturbation_key].unique()
251
+ if p != control]
252
+
253
+ signatures = {}
254
+ ctrl_mean = adata[adata.obs[perturbation_key] == control].X.mean(axis=0)
255
+ ctrl_mean = np.asarray(ctrl_mean).flatten()
256
+
257
+ for pert in perturbations:
258
+ pert_mask = adata.obs[perturbation_key] == pert
259
+ pert_mean = adata[pert_mask].X.mean(axis=0)
260
+ pert_mean = np.asarray(pert_mean).flatten()
261
+
262
+ delta = pert_mean - ctrl_mean
263
+ gene_indices = np.argsort(np.abs(delta))[::-1][:n_top_genes]
264
+
265
+ signatures[pert] = {
266
+ "top_genes": adata.var_names[gene_indices].tolist(),
267
+ "deltas": delta[gene_indices].tolist(),
268
+ "n_cells": int(pert_mask.sum()),
269
+ }
270
+
271
+ print(f"Signatures extracted: {len(signatures)} perturbations, "
272
+ f"{n_top_genes} genes each")
273
+ return signatures
274
+ ```
275
+
276
+ ---
277
+
278
+ ## パイプライン統合
279
+
280
+ ```
281
+ single-cell-genomics → perturbation-analysis → pathway-enrichment
282
+ (scRNA-seq QC) (摂動 DE/Augur/scGen) (KEGG/Reactome)
283
+ │ │ ↓
284
+ spatial-transcriptomics ──┘ │ disease-research
285
+ (Visium/MERFISH) ↓ (GWAS/DisGeNET)
286
+ drug-target-profiling
287
+ (標的候補評価)
288
+ ```
289
+
290
+ ## パイプライン出力
291
+
292
+ | ファイル | 説明 | 次スキル |
293
+ |---------|------|---------|
294
+ | `results/perturbation_de.json` | 差次的発現結果 | → pathway-enrichment |
295
+ | `results/augur_scores.csv` | Augur 応答性スコア | → single-cell-genomics |
296
+ | `results/perturbation_signatures.json` | 摂動シグネチャ | → drug-target-profiling |
297
+ | `results/scib_benchmark.json` | 統合ベンチマーク | → spatial-transcriptomics |