@nahisaho/satori 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -22
- package/package.json +1 -1
- package/src/.github/skills/scientific-data-submission/SKILL.md +357 -0
- package/src/.github/skills/scientific-gpu-singlecell/SKILL.md +296 -0
- package/src/.github/skills/scientific-marine-ecology/SKILL.md +429 -0
- package/src/.github/skills/scientific-nci60-screening/SKILL.md +307 -0
- package/src/.github/skills/scientific-plant-biology/SKILL.md +321 -0
- package/src/.github/skills/scientific-rrna-taxonomy/SKILL.md +379 -0
- package/src/.github/skills/scientific-scatac-signac/SKILL.md +300 -0
- package/src/.github/skills/scientific-toxicology-env/SKILL.md +309 -0
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-nci60-screening
|
|
3
|
+
description: |
|
|
4
|
+
NCI-60 がん細胞株薬剤応答スキル。CellMiner API 薬剤感受性・
|
|
5
|
+
NCI-60 GI50/LC50 データ・DepMap cancer dependency 統合・
|
|
6
|
+
薬剤-分子マーカー相関・細胞株パネル比較解析。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific NCI-60 Screening
|
|
10
|
+
|
|
11
|
+
CellMiner / NCI-60 / DepMap を活用したがん細胞株薬剤応答
|
|
12
|
+
パイプラインを提供する。高スループットスクリーニングデータの
|
|
13
|
+
統合解析、薬剤感受性マーカー同定、パネル比較。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- NCI-60 細胞株パネルの薬剤応答 (GI50) を解析するとき
|
|
18
|
+
- CellMiner から化合物活性データを取得するとき
|
|
19
|
+
- 薬剤感受性と分子マーカー (変異/発現) の相関を調べるとき
|
|
20
|
+
- DepMap CRISPR/RNAi 依存性データを併用するとき
|
|
21
|
+
- 細胞株間の薬剤応答パターンを比較するとき
|
|
22
|
+
- 新規化合物のスクリーニング結果を NCI-60 と比較するとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. CellMiner データ取得
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import requests
|
|
32
|
+
import pandas as pd
|
|
33
|
+
import numpy as np
|
|
34
|
+
from io import StringIO
|
|
35
|
+
|
|
36
|
+
CELLMINER_BASE = "https://discover.nci.nih.gov/cellminer/api"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def cellminer_drug_activity(nsc_id=None, drug_name=None):
|
|
40
|
+
"""
|
|
41
|
+
CellMiner — NCI-60 薬剤活性データ取得。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
nsc_id: str — NSC ID (例: "740")
|
|
45
|
+
drug_name: str — 薬剤名 (例: "Paclitaxel")
|
|
46
|
+
"""
|
|
47
|
+
if nsc_id:
|
|
48
|
+
url = f"{CELLMINER_BASE}/compound/{nsc_id}/activity"
|
|
49
|
+
elif drug_name:
|
|
50
|
+
url = f"{CELLMINER_BASE}/compound/search"
|
|
51
|
+
params = {"name": drug_name}
|
|
52
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
53
|
+
resp.raise_for_status()
|
|
54
|
+
compounds = resp.json()
|
|
55
|
+
if not compounds:
|
|
56
|
+
print(f"Drug not found: {drug_name}")
|
|
57
|
+
return pd.DataFrame()
|
|
58
|
+
nsc_id = compounds[0].get("nsc", "")
|
|
59
|
+
url = f"{CELLMINER_BASE}/compound/{nsc_id}/activity"
|
|
60
|
+
|
|
61
|
+
resp = requests.get(url, timeout=30)
|
|
62
|
+
resp.raise_for_status()
|
|
63
|
+
data = resp.json()
|
|
64
|
+
|
|
65
|
+
results = []
|
|
66
|
+
for cell_line, values in data.get("activity", {}).items():
|
|
67
|
+
results.append({
|
|
68
|
+
"cell_line": cell_line,
|
|
69
|
+
"tissue": values.get("tissue", ""),
|
|
70
|
+
"gi50_log": values.get("gi50", None),
|
|
71
|
+
"tgi_log": values.get("tgi", None),
|
|
72
|
+
"lc50_log": values.get("lc50", None),
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
df = pd.DataFrame(results)
|
|
76
|
+
print(f"CellMiner: NSC {nsc_id} → {len(df)} cell lines")
|
|
77
|
+
return df
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## 2. NCI-60 バルクデータ取得
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
def nci60_bulk_download(data_type="drug_activity"):
|
|
84
|
+
"""
|
|
85
|
+
NCI-60 バルクデータセット取得。
|
|
86
|
+
|
|
87
|
+
Parameters:
|
|
88
|
+
data_type: str — "drug_activity", "gene_expression",
|
|
89
|
+
"mutation", "copy_number"
|
|
90
|
+
"""
|
|
91
|
+
urls = {
|
|
92
|
+
"drug_activity": "https://discover.nci.nih.gov/cellminer/download/DTP_NCI60_ZSCORE.csv",
|
|
93
|
+
"gene_expression": "https://discover.nci.nih.gov/cellminer/download/GeneExpr_RMA.csv",
|
|
94
|
+
"mutation": "https://discover.nci.nih.gov/cellminer/download/Exome_Mutation.csv",
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
url = urls.get(data_type)
|
|
98
|
+
if not url:
|
|
99
|
+
raise ValueError(f"Unknown data type: {data_type}")
|
|
100
|
+
|
|
101
|
+
resp = requests.get(url, timeout=120)
|
|
102
|
+
resp.raise_for_status()
|
|
103
|
+
|
|
104
|
+
df = pd.read_csv(StringIO(resp.text))
|
|
105
|
+
print(f"NCI-60 bulk: {data_type} → {df.shape}")
|
|
106
|
+
return df
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## 3. 薬剤-分子マーカー相関
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from scipy import stats
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def drug_marker_correlation(drug_activity, molecular_data,
|
|
116
|
+
marker_type="expression", top_n=50):
|
|
117
|
+
"""
|
|
118
|
+
薬剤感受性と分子マーカーの相関解析。
|
|
119
|
+
|
|
120
|
+
Parameters:
|
|
121
|
+
drug_activity: pd.DataFrame — GI50 データ (cell_line, gi50)
|
|
122
|
+
molecular_data: pd.DataFrame — 分子データ (cell_line, gene, value)
|
|
123
|
+
marker_type: str — "expression", "mutation", "copy_number"
|
|
124
|
+
top_n: int — 上位相関遺伝子数
|
|
125
|
+
"""
|
|
126
|
+
# 細胞株一致
|
|
127
|
+
common_lines = set(drug_activity["cell_line"]) & set(molecular_data["cell_line"])
|
|
128
|
+
drug_sub = drug_activity[drug_activity["cell_line"].isin(common_lines)]
|
|
129
|
+
mol_sub = molecular_data[molecular_data["cell_line"].isin(common_lines)]
|
|
130
|
+
|
|
131
|
+
# 遺伝子ごとの相関
|
|
132
|
+
correlations = []
|
|
133
|
+
genes = mol_sub["gene"].unique() if "gene" in mol_sub.columns else mol_sub.columns[1:]
|
|
134
|
+
|
|
135
|
+
drug_values = drug_sub.set_index("cell_line")["gi50_log"]
|
|
136
|
+
|
|
137
|
+
for gene in genes:
|
|
138
|
+
if "gene" in mol_sub.columns:
|
|
139
|
+
gene_data = mol_sub[mol_sub["gene"] == gene].set_index("cell_line")["value"]
|
|
140
|
+
else:
|
|
141
|
+
gene_data = mol_sub.set_index("cell_line")[gene]
|
|
142
|
+
|
|
143
|
+
common = drug_values.index.intersection(gene_data.index)
|
|
144
|
+
if len(common) < 10:
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
r, p = stats.pearsonr(drug_values[common], gene_data[common])
|
|
148
|
+
correlations.append({
|
|
149
|
+
"gene": gene,
|
|
150
|
+
"pearson_r": r,
|
|
151
|
+
"p_value": p,
|
|
152
|
+
"n_samples": len(common),
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
corr_df = pd.DataFrame(correlations)
|
|
156
|
+
corr_df["adj_p"] = corr_df["p_value"] * len(corr_df) # Bonferroni
|
|
157
|
+
corr_df = corr_df.sort_values("p_value")
|
|
158
|
+
|
|
159
|
+
print(f"Drug-marker correlation: {len(corr_df)} genes tested, "
|
|
160
|
+
f"top |r| = {corr_df['pearson_r'].abs().max():.3f}")
|
|
161
|
+
return corr_df.head(top_n)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## 4. 組織別薬剤応答パターン
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
def tissue_response_pattern(drug_activity, min_lines=3):
|
|
168
|
+
"""
|
|
169
|
+
組織別の薬剤応答パターン解析。
|
|
170
|
+
|
|
171
|
+
Parameters:
|
|
172
|
+
drug_activity: pd.DataFrame — GI50 データ
|
|
173
|
+
min_lines: int — 最小細胞株数
|
|
174
|
+
"""
|
|
175
|
+
tissue_stats = drug_activity.groupby("tissue").agg(
|
|
176
|
+
n_lines=("gi50_log", "count"),
|
|
177
|
+
mean_gi50=("gi50_log", "mean"),
|
|
178
|
+
std_gi50=("gi50_log", "std"),
|
|
179
|
+
min_gi50=("gi50_log", "min"),
|
|
180
|
+
max_gi50=("gi50_log", "max"),
|
|
181
|
+
).reset_index()
|
|
182
|
+
|
|
183
|
+
tissue_stats = tissue_stats[tissue_stats["n_lines"] >= min_lines]
|
|
184
|
+
tissue_stats = tissue_stats.sort_values("mean_gi50")
|
|
185
|
+
|
|
186
|
+
# 感受性/耐性スコア
|
|
187
|
+
overall_mean = drug_activity["gi50_log"].mean()
|
|
188
|
+
tissue_stats["sensitivity_z"] = (
|
|
189
|
+
(tissue_stats["mean_gi50"] - overall_mean)
|
|
190
|
+
/ drug_activity["gi50_log"].std()
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
print(f"Tissue patterns: {len(tissue_stats)} tissues")
|
|
194
|
+
for _, row in tissue_stats.iterrows():
|
|
195
|
+
label = "Sensitive" if row["sensitivity_z"] < -0.5 else (
|
|
196
|
+
"Resistant" if row["sensitivity_z"] > 0.5 else "Neutral"
|
|
197
|
+
)
|
|
198
|
+
print(f" {row['tissue']}: GI50={row['mean_gi50']:.2f} ({label})")
|
|
199
|
+
return tissue_stats
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## 5. DepMap 統合スクリーニング
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
DEPMAP_BASE = "https://depmap.org/portal/api"
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def depmap_gene_dependency(gene_symbol, dataset="Chronos_Combined"):
|
|
209
|
+
"""
|
|
210
|
+
DepMap — CRISPR/RNAi 遺伝子依存性取得。
|
|
211
|
+
|
|
212
|
+
Parameters:
|
|
213
|
+
gene_symbol: str — 遺伝子シンボル
|
|
214
|
+
dataset: str — データセット名
|
|
215
|
+
"""
|
|
216
|
+
url = f"{DEPMAP_BASE}/download/custom"
|
|
217
|
+
params = {
|
|
218
|
+
"gene": gene_symbol,
|
|
219
|
+
"dataset": dataset,
|
|
220
|
+
}
|
|
221
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
222
|
+
resp.raise_for_status()
|
|
223
|
+
data = resp.json()
|
|
224
|
+
|
|
225
|
+
results = []
|
|
226
|
+
for entry in data.get("data", []):
|
|
227
|
+
results.append({
|
|
228
|
+
"cell_line": entry.get("cell_line_name", ""),
|
|
229
|
+
"lineage": entry.get("lineage", ""),
|
|
230
|
+
"dependency_score": entry.get("score", None),
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
df = pd.DataFrame(results)
|
|
234
|
+
if len(df) > 0:
|
|
235
|
+
n_dependent = (df["dependency_score"] < -0.5).sum()
|
|
236
|
+
print(f"DepMap {gene_symbol}: {len(df)} lines, "
|
|
237
|
+
f"{n_dependent} dependent (score < -0.5)")
|
|
238
|
+
return df
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## 6. NCI-60 統合スクリーニングパイプライン
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
def nci60_screening_pipeline(drug_name=None, nsc_id=None,
|
|
245
|
+
target_gene=None, output_dir="results"):
|
|
246
|
+
"""
|
|
247
|
+
NCI-60 + DepMap 統合スクリーニングパイプライン。
|
|
248
|
+
|
|
249
|
+
Parameters:
|
|
250
|
+
drug_name: str — 薬剤名
|
|
251
|
+
nsc_id: str — NSC ID
|
|
252
|
+
target_gene: str — 標的遺伝子 (DepMap 連携)
|
|
253
|
+
output_dir: str — 出力ディレクトリ
|
|
254
|
+
"""
|
|
255
|
+
from pathlib import Path
|
|
256
|
+
output_dir = Path(output_dir)
|
|
257
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
258
|
+
|
|
259
|
+
# 1) NCI-60 薬剤活性
|
|
260
|
+
drug_data = cellminer_drug_activity(nsc_id=nsc_id, drug_name=drug_name)
|
|
261
|
+
drug_data.to_csv(output_dir / "drug_activity.csv", index=False)
|
|
262
|
+
|
|
263
|
+
# 2) 組織別パターン
|
|
264
|
+
tissue_patterns = tissue_response_pattern(drug_data)
|
|
265
|
+
tissue_patterns.to_csv(output_dir / "tissue_patterns.csv", index=False)
|
|
266
|
+
|
|
267
|
+
# 3) 発現相関
|
|
268
|
+
expr_data = nci60_bulk_download("gene_expression")
|
|
269
|
+
correlations = drug_marker_correlation(drug_data, expr_data)
|
|
270
|
+
correlations.to_csv(output_dir / "marker_correlations.csv", index=False)
|
|
271
|
+
|
|
272
|
+
# 4) DepMap 連携 (標的遺伝子あれば)
|
|
273
|
+
if target_gene:
|
|
274
|
+
depmap_data = depmap_gene_dependency(target_gene)
|
|
275
|
+
depmap_data.to_csv(output_dir / "depmap_dependency.csv", index=False)
|
|
276
|
+
|
|
277
|
+
print(f"Pipeline complete: {output_dir}")
|
|
278
|
+
return {
|
|
279
|
+
"drug_activity": drug_data,
|
|
280
|
+
"tissue_patterns": tissue_patterns,
|
|
281
|
+
"correlations": correlations,
|
|
282
|
+
}
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## パイプライン統合
|
|
288
|
+
|
|
289
|
+
```
|
|
290
|
+
compound-screening → nci60-screening → precision-oncology
|
|
291
|
+
(ZINC/VS) (NCI-60/DepMap) (MTB レポート)
|
|
292
|
+
│ │ ↓
|
|
293
|
+
drug-target-profiling ──────┘ cancer-genomics
|
|
294
|
+
(ChEMBL/DGIdb) │ (COSMIC/DepMap)
|
|
295
|
+
↓
|
|
296
|
+
cell-line-resources
|
|
297
|
+
(Cellosaurus)
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## パイプライン出力
|
|
301
|
+
|
|
302
|
+
| ファイル | 説明 | 次スキル |
|
|
303
|
+
|---------|------|---------|
|
|
304
|
+
| `results/drug_activity.csv` | NCI-60 GI50 データ | → precision-oncology |
|
|
305
|
+
| `results/tissue_patterns.csv` | 組織別応答パターン | → cancer-genomics |
|
|
306
|
+
| `results/marker_correlations.csv` | 薬剤-マーカー相関 | → drug-target-profiling |
|
|
307
|
+
| `results/depmap_dependency.csv` | DepMap 依存性スコア | → cell-line-resources |
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-plant-biology
|
|
3
|
+
description: |
|
|
4
|
+
植物バイオロジー統合スキル。Plant Reactome 代謝パスウェイ・
|
|
5
|
+
TAIR Arabidopsis ゲノム情報・Phytozome 比較ゲノミクス・
|
|
6
|
+
Ensembl Plants 種間オーソログ解析。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Plant Biology
|
|
10
|
+
|
|
11
|
+
Plant Reactome / TAIR / Phytozome / Ensembl Plants を活用した
|
|
12
|
+
植物ゲノム・代謝パスウェイ統合解析パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- 植物代謝パスウェイ解析 (Plant Reactome) を実行するとき
|
|
17
|
+
- Arabidopsis thaliana の遺伝子・タンパク質情報を取得するとき
|
|
18
|
+
- 植物種間の比較ゲノミクス解析を行うとき
|
|
19
|
+
- 植物オーソログ・パラログを同定するとき
|
|
20
|
+
- 作物改良のための候補遺伝子を探索するとき
|
|
21
|
+
- 植物表現型データと遺伝子型を統合するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. Plant Reactome パスウェイ検索
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import requests
|
|
31
|
+
import pandas as pd
|
|
32
|
+
import json
|
|
33
|
+
|
|
34
|
+
PLANT_REACTOME = "https://plantreactome.gramene.org/ContentService"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def plant_reactome_search(query, species="Oryza sativa"):
|
|
38
|
+
"""
|
|
39
|
+
Plant Reactome — 植物代謝/シグナルパスウェイ検索。
|
|
40
|
+
|
|
41
|
+
Parameters:
|
|
42
|
+
query: str — 検索クエリ (例: "photosynthesis")
|
|
43
|
+
species: str — 種名
|
|
44
|
+
"""
|
|
45
|
+
url = f"{PLANT_REACTOME}/search/query"
|
|
46
|
+
params = {"query": query, "species": species, "cluster": True}
|
|
47
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
48
|
+
resp.raise_for_status()
|
|
49
|
+
data = resp.json()
|
|
50
|
+
|
|
51
|
+
results = []
|
|
52
|
+
for group in data.get("results", []):
|
|
53
|
+
for entry in group.get("entries", []):
|
|
54
|
+
results.append({
|
|
55
|
+
"stId": entry.get("stId", ""),
|
|
56
|
+
"name": entry.get("name", ""),
|
|
57
|
+
"species": entry.get("species", ""),
|
|
58
|
+
"type": entry.get("exactType", ""),
|
|
59
|
+
"compartment": entry.get("compartmentNames", []),
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
df = pd.DataFrame(results)
|
|
63
|
+
print(f"Plant Reactome: '{query}' → {len(df)} entries ({species})")
|
|
64
|
+
return df
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def plant_reactome_pathway_detail(pathway_id):
|
|
68
|
+
"""
|
|
69
|
+
Plant Reactome パスウェイ詳細取得。
|
|
70
|
+
|
|
71
|
+
Parameters:
|
|
72
|
+
pathway_id: str — パスウェイ ID (例: "R-OSA-1119616")
|
|
73
|
+
"""
|
|
74
|
+
url = f"{PLANT_REACTOME}/data/pathway/{pathway_id}/containedEvents"
|
|
75
|
+
resp = requests.get(url, timeout=30)
|
|
76
|
+
resp.raise_for_status()
|
|
77
|
+
events = resp.json()
|
|
78
|
+
|
|
79
|
+
steps = []
|
|
80
|
+
for event in events:
|
|
81
|
+
steps.append({
|
|
82
|
+
"stId": event.get("stId", ""),
|
|
83
|
+
"name": event.get("displayName", ""),
|
|
84
|
+
"type": event.get("className", ""),
|
|
85
|
+
"input_count": len(event.get("input", [])),
|
|
86
|
+
"output_count": len(event.get("output", [])),
|
|
87
|
+
"catalyst": event.get("catalystActivity", [{}])[0].get(
|
|
88
|
+
"displayName", "") if event.get("catalystActivity") else "",
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
df = pd.DataFrame(steps)
|
|
92
|
+
print(f"Pathway {pathway_id}: {len(df)} reaction steps")
|
|
93
|
+
return df
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## 2. TAIR Arabidopsis 遺伝子情報
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
TAIR_BASE = "https://www.arabidopsis.org/api"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def tair_gene_search(gene_id=None, gene_name=None, keyword=None):
|
|
103
|
+
"""
|
|
104
|
+
TAIR — Arabidopsis thaliana 遺伝子情報取得。
|
|
105
|
+
|
|
106
|
+
Parameters:
|
|
107
|
+
gene_id: str — AGI ID (例: "AT1G01010")
|
|
108
|
+
gene_name: str — 遺伝子名 (例: "FLC")
|
|
109
|
+
keyword: str — キーワード検索
|
|
110
|
+
"""
|
|
111
|
+
if gene_id:
|
|
112
|
+
url = f"{TAIR_BASE}/gene/{gene_id}"
|
|
113
|
+
resp = requests.get(url, timeout=30)
|
|
114
|
+
resp.raise_for_status()
|
|
115
|
+
data = resp.json()
|
|
116
|
+
return pd.DataFrame([{
|
|
117
|
+
"agi_id": data.get("locus", ""),
|
|
118
|
+
"name": data.get("name", ""),
|
|
119
|
+
"description": data.get("description", ""),
|
|
120
|
+
"chromosome": data.get("chromosome", ""),
|
|
121
|
+
"start": data.get("start", ""),
|
|
122
|
+
"end": data.get("end", ""),
|
|
123
|
+
"strand": data.get("strand", ""),
|
|
124
|
+
"gene_model_type": data.get("gene_model_type", ""),
|
|
125
|
+
}])
|
|
126
|
+
|
|
127
|
+
# キーワード検索
|
|
128
|
+
search_term = gene_name or keyword or ""
|
|
129
|
+
url = f"{TAIR_BASE}/search/gene"
|
|
130
|
+
params = {"query": search_term, "limit": 50}
|
|
131
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
132
|
+
resp.raise_for_status()
|
|
133
|
+
data = resp.json()
|
|
134
|
+
|
|
135
|
+
results = []
|
|
136
|
+
for gene in data.get("results", []):
|
|
137
|
+
results.append({
|
|
138
|
+
"agi_id": gene.get("locus", ""),
|
|
139
|
+
"name": gene.get("name", ""),
|
|
140
|
+
"description": gene.get("description", ""),
|
|
141
|
+
"chromosome": gene.get("chromosome", ""),
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
df = pd.DataFrame(results)
|
|
145
|
+
print(f"TAIR: '{search_term}' → {len(df)} genes")
|
|
146
|
+
return df
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def tair_gene_expression(gene_id):
|
|
150
|
+
"""
|
|
151
|
+
TAIR — 遺伝子発現パターン取得。
|
|
152
|
+
|
|
153
|
+
Parameters:
|
|
154
|
+
gene_id: str — AGI ID
|
|
155
|
+
"""
|
|
156
|
+
url = f"{TAIR_BASE}/gene/{gene_id}/expression"
|
|
157
|
+
resp = requests.get(url, timeout=30)
|
|
158
|
+
resp.raise_for_status()
|
|
159
|
+
data = resp.json()
|
|
160
|
+
|
|
161
|
+
tissues = []
|
|
162
|
+
for expr in data.get("expression", []):
|
|
163
|
+
tissues.append({
|
|
164
|
+
"tissue": expr.get("tissue", ""),
|
|
165
|
+
"stage": expr.get("developmental_stage", ""),
|
|
166
|
+
"level": expr.get("expression_level", ""),
|
|
167
|
+
"source": expr.get("source", ""),
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
df = pd.DataFrame(tissues)
|
|
171
|
+
print(f"TAIR expression: {gene_id} → {len(df)} tissue records")
|
|
172
|
+
return df
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## 3. Ensembl Plants 種間比較
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
ENSEMBL_PLANTS = "https://rest.ensembl.org"
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def ensembl_plants_orthologs(gene_id, source_species="arabidopsis_thaliana",
|
|
182
|
+
target_species=None):
|
|
183
|
+
"""
|
|
184
|
+
Ensembl Plants — 植物種間オーソログ検索。
|
|
185
|
+
|
|
186
|
+
Parameters:
|
|
187
|
+
gene_id: str — Ensembl Gene ID or symbol
|
|
188
|
+
source_species: str — 起源種
|
|
189
|
+
target_species: str — ターゲット種 (None = 全種)
|
|
190
|
+
"""
|
|
191
|
+
url = f"{ENSEMBL_PLANTS}/homology/id/{gene_id}"
|
|
192
|
+
params = {
|
|
193
|
+
"type": "orthologues",
|
|
194
|
+
"content-type": "application/json",
|
|
195
|
+
"compara": "plants",
|
|
196
|
+
}
|
|
197
|
+
if target_species:
|
|
198
|
+
params["target_species"] = target_species
|
|
199
|
+
|
|
200
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
201
|
+
resp.raise_for_status()
|
|
202
|
+
data = resp.json()
|
|
203
|
+
|
|
204
|
+
orthologs = []
|
|
205
|
+
for homology in data.get("data", [{}])[0].get("homologies", []):
|
|
206
|
+
target = homology.get("target", {})
|
|
207
|
+
orthologs.append({
|
|
208
|
+
"source_gene": gene_id,
|
|
209
|
+
"source_species": source_species,
|
|
210
|
+
"target_gene": target.get("id", ""),
|
|
211
|
+
"target_species": target.get("species", ""),
|
|
212
|
+
"target_protein": target.get("protein_id", ""),
|
|
213
|
+
"identity": target.get("perc_id", 0),
|
|
214
|
+
"dn_ds": homology.get("dn_ds", None),
|
|
215
|
+
"type": homology.get("type", ""),
|
|
216
|
+
})
|
|
217
|
+
|
|
218
|
+
df = pd.DataFrame(orthologs)
|
|
219
|
+
print(f"Ensembl Plants orthologs: {gene_id} → {len(df)} homologs")
|
|
220
|
+
return df
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## 4. Phytozome 比較ゲノミクス
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
PHYTOZOME_BASE = "https://phytozome-next.jgi.doe.gov/api"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def phytozome_gene_family(gene_id, species="Athaliana"):
|
|
230
|
+
"""
|
|
231
|
+
Phytozome — 遺伝子ファミリー・比較ゲノミクス。
|
|
232
|
+
|
|
233
|
+
Parameters:
|
|
234
|
+
gene_id: str — 遺伝子 ID
|
|
235
|
+
species: str — 種略称
|
|
236
|
+
"""
|
|
237
|
+
url = f"{PHYTOZOME_BASE}/search"
|
|
238
|
+
params = {"query": gene_id, "organism": species}
|
|
239
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
240
|
+
resp.raise_for_status()
|
|
241
|
+
data = resp.json()
|
|
242
|
+
|
|
243
|
+
families = []
|
|
244
|
+
for hit in data.get("hits", []):
|
|
245
|
+
families.append({
|
|
246
|
+
"gene_id": hit.get("gene_id", ""),
|
|
247
|
+
"family_id": hit.get("family_id", ""),
|
|
248
|
+
"family_name": hit.get("family_name", ""),
|
|
249
|
+
"species": hit.get("organism", ""),
|
|
250
|
+
"annotation": hit.get("annotation", ""),
|
|
251
|
+
"pfam_domains": hit.get("pfam", []),
|
|
252
|
+
})
|
|
253
|
+
|
|
254
|
+
df = pd.DataFrame(families)
|
|
255
|
+
print(f"Phytozome: {gene_id} → {len(df)} family members")
|
|
256
|
+
return df
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
## 5. 植物バイオロジー統合パイプライン
|
|
260
|
+
|
|
261
|
+
```python
|
|
262
|
+
def plant_biology_pipeline(gene_query, species="Oryza sativa",
|
|
263
|
+
output_dir="results"):
|
|
264
|
+
"""
|
|
265
|
+
植物バイオロジー統合パイプライン。
|
|
266
|
+
|
|
267
|
+
Parameters:
|
|
268
|
+
gene_query: str — 遺伝子/パスウェイクエリ
|
|
269
|
+
species: str — 対象種
|
|
270
|
+
output_dir: str — 出力ディレクトリ
|
|
271
|
+
"""
|
|
272
|
+
from pathlib import Path
|
|
273
|
+
output_dir = Path(output_dir)
|
|
274
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
275
|
+
|
|
276
|
+
# 1) Plant Reactome パスウェイ
|
|
277
|
+
pathways = plant_reactome_search(gene_query, species=species)
|
|
278
|
+
pathways.to_csv(output_dir / "plant_pathways.csv", index=False)
|
|
279
|
+
|
|
280
|
+
# 2) TAIR (Arabidopsis ならば)
|
|
281
|
+
tair_genes = tair_gene_search(keyword=gene_query)
|
|
282
|
+
tair_genes.to_csv(output_dir / "tair_genes.csv", index=False)
|
|
283
|
+
|
|
284
|
+
# 3) Ensembl Plants オーソログ
|
|
285
|
+
if len(tair_genes) > 0:
|
|
286
|
+
top_gene = tair_genes.iloc[0]["agi_id"]
|
|
287
|
+
orthologs = ensembl_plants_orthologs(top_gene)
|
|
288
|
+
orthologs.to_csv(output_dir / "orthologs.csv", index=False)
|
|
289
|
+
else:
|
|
290
|
+
orthologs = pd.DataFrame()
|
|
291
|
+
|
|
292
|
+
print(f"Plant biology pipeline: {output_dir}")
|
|
293
|
+
return {
|
|
294
|
+
"pathways": pathways,
|
|
295
|
+
"tair_genes": tair_genes,
|
|
296
|
+
"orthologs": orthologs,
|
|
297
|
+
}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
---
|
|
301
|
+
|
|
302
|
+
## パイプライン統合
|
|
303
|
+
|
|
304
|
+
```
|
|
305
|
+
pathway-enrichment → plant-biology → environmental-ecology
|
|
306
|
+
(KEGG/Reactome) (PlantReactome) (生態学/環境)
|
|
307
|
+
│ │ ↓
|
|
308
|
+
gene-annotation ────────┘ marine-ecology
|
|
309
|
+
(GO/InterPro) │ (OBIS/WoRMS)
|
|
310
|
+
↓
|
|
311
|
+
comparative-genomics
|
|
312
|
+
(Ensembl 比較)
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
## パイプライン出力
|
|
316
|
+
|
|
317
|
+
| ファイル | 説明 | 次スキル |
|
|
318
|
+
|---------|------|---------|
|
|
319
|
+
| `results/plant_pathways.csv` | Plant Reactome パスウェイ | → pathway-enrichment |
|
|
320
|
+
| `results/tair_genes.csv` | TAIR Arabidopsis 遺伝子 | → gene-annotation |
|
|
321
|
+
| `results/orthologs.csv` | 種間オーソログ | → comparative-genomics |
|