@nahisaho/satori 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -29
- package/package.json +1 -1
- package/src/.github/skills/scientific-data-submission/SKILL.md +357 -0
- package/src/.github/skills/scientific-encode-screen/SKILL.md +315 -0
- package/src/.github/skills/scientific-environmental-geodata/SKILL.md +255 -0
- package/src/.github/skills/scientific-geo-expression/SKILL.md +274 -0
- package/src/.github/skills/scientific-gpu-singlecell/SKILL.md +296 -0
- package/src/.github/skills/scientific-human-cell-atlas/SKILL.md +294 -0
- package/src/.github/skills/scientific-marine-ecology/SKILL.md +429 -0
- package/src/.github/skills/scientific-metabolic-atlas/SKILL.md +263 -0
- package/src/.github/skills/scientific-nci60-screening/SKILL.md +307 -0
- package/src/.github/skills/scientific-paleobiology/SKILL.md +265 -0
- package/src/.github/skills/scientific-parasite-genomics/SKILL.md +280 -0
- package/src/.github/skills/scientific-plant-biology/SKILL.md +321 -0
- package/src/.github/skills/scientific-rrna-taxonomy/SKILL.md +379 -0
- package/src/.github/skills/scientific-scatac-signac/SKILL.md +300 -0
- package/src/.github/skills/scientific-squidpy-advanced/SKILL.md +251 -0
- package/src/.github/skills/scientific-toxicology-env/SKILL.md +309 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-parasite-genomics
|
|
3
|
+
description: |
|
|
4
|
+
寄生虫ゲノミクススキル。PlasmoDB/VectorBase/ToxoDB REST API
|
|
5
|
+
による寄生虫ゲノム検索・遺伝子情報・薬剤標的同定・比較
|
|
6
|
+
ゲノミクス。直接 REST API 連携 (TU 外)。
|
|
7
|
+
tu_tools: []
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific Parasite Genomics
|
|
11
|
+
|
|
12
|
+
VEuPathDB ファミリー (PlasmoDB, VectorBase, ToxoDB, TriTrypDB)
|
|
13
|
+
の REST API を活用した寄生虫ゲノミクス解析パイプラインを提供
|
|
14
|
+
する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- マラリア原虫ゲノム (PlasmoDB) を検索するとき
|
|
19
|
+
- 蚊・ダニ等の媒介生物ゲノム (VectorBase) を検索するとき
|
|
20
|
+
- トキソプラズマゲノム (ToxoDB) を検索するとき
|
|
21
|
+
- トリパノソーマ/リーシュマニアゲノム (TriTrypDB) を検索するとき
|
|
22
|
+
- 寄生虫の薬剤標的候補を同定するとき
|
|
23
|
+
- 寄生虫間の比較ゲノミクスを実施するとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. VEuPathDB 遺伝子検索
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import requests
|
|
33
|
+
import pandas as pd
|
|
34
|
+
import numpy as np
|
|
35
|
+
|
|
36
|
+
VEUPATHDB_SITES = {
|
|
37
|
+
"plasmo": "https://plasmodb.org/plasmo/service",
|
|
38
|
+
"vector": "https://vectorbase.org/vectorbase/service",
|
|
39
|
+
"toxo": "https://toxodb.org/toxo/service",
|
|
40
|
+
"tritryp": "https://tritrypdb.org/tritrypdb/service",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def veupathdb_search_genes(organism, query, db="plasmo",
|
|
45
|
+
limit=100):
|
|
46
|
+
"""
|
|
47
|
+
VEuPathDB — 遺伝子検索。
|
|
48
|
+
|
|
49
|
+
Parameters:
|
|
50
|
+
organism: str — 生物種名 (例: "Plasmodium falciparum 3D7")
|
|
51
|
+
query: str — 検索キーワード (例: "kinase", "transporter")
|
|
52
|
+
db: str — データベース ("plasmo", "vector", "toxo", "tritryp")
|
|
53
|
+
limit: int — 最大結果数
|
|
54
|
+
"""
|
|
55
|
+
base = VEUPATHDB_SITES.get(db, VEUPATHDB_SITES["plasmo"])
|
|
56
|
+
url = f"{base}/record-types/gene/searches/GenesByTextSearch"
|
|
57
|
+
|
|
58
|
+
payload = {
|
|
59
|
+
"searchConfig": {
|
|
60
|
+
"parameters": {
|
|
61
|
+
"text_expression": query,
|
|
62
|
+
"text_fields": "Gene ID,Gene Name or Symbol,"
|
|
63
|
+
"Gene product",
|
|
64
|
+
"organism": [organism],
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"reportConfig": {
|
|
68
|
+
"attributes": ["primary_key", "gene_name",
|
|
69
|
+
"gene_product", "gene_type",
|
|
70
|
+
"chromosome", "start_min",
|
|
71
|
+
"end_max", "strand"],
|
|
72
|
+
"pagination": {"offset": 0, "numRecords": limit},
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
headers = {"Content-Type": "application/json"}
|
|
76
|
+
resp = requests.post(url, json=payload, headers=headers,
|
|
77
|
+
timeout=60)
|
|
78
|
+
resp.raise_for_status()
|
|
79
|
+
data = resp.json()
|
|
80
|
+
|
|
81
|
+
results = []
|
|
82
|
+
for rec in data.get("records", []):
|
|
83
|
+
attrs = rec.get("attributes", {})
|
|
84
|
+
results.append({
|
|
85
|
+
"gene_id": attrs.get("primary_key", ""),
|
|
86
|
+
"gene_name": attrs.get("gene_name", ""),
|
|
87
|
+
"product": attrs.get("gene_product", ""),
|
|
88
|
+
"gene_type": attrs.get("gene_type", ""),
|
|
89
|
+
"chromosome": attrs.get("chromosome", ""),
|
|
90
|
+
"start": attrs.get("start_min", None),
|
|
91
|
+
"end": attrs.get("end_max", None),
|
|
92
|
+
"strand": attrs.get("strand", ""),
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
df = pd.DataFrame(results)
|
|
96
|
+
print(f"VEuPathDB ({db}) genes: {len(df)} results "
|
|
97
|
+
f"(organism={organism}, query={query})")
|
|
98
|
+
return df
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## 2. 遺伝子機能アノテーション
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
def veupathdb_gene_annotation(gene_id, db="plasmo"):
|
|
105
|
+
"""
|
|
106
|
+
VEuPathDB — 遺伝子機能アノテーション取得。
|
|
107
|
+
|
|
108
|
+
Parameters:
|
|
109
|
+
gene_id: str — 遺伝子 ID (例: "PF3D7_1133400")
|
|
110
|
+
db: str — データベース
|
|
111
|
+
"""
|
|
112
|
+
base = VEUPATHDB_SITES.get(db, VEUPATHDB_SITES["plasmo"])
|
|
113
|
+
url = f"{base}/record-types/gene/records/{gene_id}"
|
|
114
|
+
|
|
115
|
+
params = {
|
|
116
|
+
"attributes": "all",
|
|
117
|
+
"tables": "GoTerms,InterPro,MetabolicPathways,"
|
|
118
|
+
"PubMed,EcNumber",
|
|
119
|
+
}
|
|
120
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
121
|
+
resp.raise_for_status()
|
|
122
|
+
data = resp.json()
|
|
123
|
+
|
|
124
|
+
attrs = data.get("attributes", {})
|
|
125
|
+
tables = data.get("tables", {})
|
|
126
|
+
|
|
127
|
+
annotation = {
|
|
128
|
+
"gene_id": gene_id,
|
|
129
|
+
"gene_name": attrs.get("gene_name", ""),
|
|
130
|
+
"product": attrs.get("gene_product", ""),
|
|
131
|
+
"molecular_weight": attrs.get("molecular_weight", ""),
|
|
132
|
+
"isoelectric_point": attrs.get("isoelectric_point", ""),
|
|
133
|
+
"signal_peptide": attrs.get("signal_peptide", ""),
|
|
134
|
+
"transmembrane_domains": attrs.get("transmembrane_domains", ""),
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
# GO Term 取得
|
|
138
|
+
go_terms = []
|
|
139
|
+
for go_rec in tables.get("GoTerms", []):
|
|
140
|
+
go_terms.append({
|
|
141
|
+
"go_id": go_rec.get("go_id", ""),
|
|
142
|
+
"go_term": go_rec.get("go_term_name", ""),
|
|
143
|
+
"ontology": go_rec.get("ontology", ""),
|
|
144
|
+
"evidence": go_rec.get("evidence_code", ""),
|
|
145
|
+
})
|
|
146
|
+
annotation["go_terms"] = go_terms
|
|
147
|
+
|
|
148
|
+
# InterPro ドメイン
|
|
149
|
+
domains = []
|
|
150
|
+
for d in tables.get("InterPro", []):
|
|
151
|
+
domains.append({
|
|
152
|
+
"interpro_id": d.get("interpro_primary_id", ""),
|
|
153
|
+
"name": d.get("interpro_name", ""),
|
|
154
|
+
"description": d.get("interpro_description", ""),
|
|
155
|
+
})
|
|
156
|
+
annotation["domains"] = domains
|
|
157
|
+
|
|
158
|
+
print(f"VEuPathDB annotation: {gene_id}, "
|
|
159
|
+
f"{len(go_terms)} GO terms, {len(domains)} domains")
|
|
160
|
+
return annotation
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## 3. 薬剤標的候補スクリーニング
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
def parasite_drug_target_screen(organism, db="plasmo",
|
|
167
|
+
essentiality_threshold=0.5):
|
|
168
|
+
"""
|
|
169
|
+
寄生虫ゲノム — 薬剤標的候補スクリーニング。
|
|
170
|
+
|
|
171
|
+
Parameters:
|
|
172
|
+
organism: str — 生物種
|
|
173
|
+
db: str — データベース
|
|
174
|
+
essentiality_threshold: float — 必須性スコア閾値
|
|
175
|
+
"""
|
|
176
|
+
# キナーゼ検索
|
|
177
|
+
kinases = veupathdb_search_genes(organism, "kinase", db=db)
|
|
178
|
+
# プロテアーゼ検索
|
|
179
|
+
proteases = veupathdb_search_genes(organism, "protease", db=db)
|
|
180
|
+
# トランスポーター検索
|
|
181
|
+
transporters = veupathdb_search_genes(
|
|
182
|
+
organism, "transporter", db=db)
|
|
183
|
+
|
|
184
|
+
all_targets = pd.concat([kinases, proteases, transporters],
|
|
185
|
+
ignore_index=True)
|
|
186
|
+
all_targets = all_targets.drop_duplicates(subset=["gene_id"])
|
|
187
|
+
|
|
188
|
+
# 薬剤標的性スコア (ヒューリスティック)
|
|
189
|
+
all_targets["target_class"] = "unknown"
|
|
190
|
+
all_targets.loc[
|
|
191
|
+
all_targets["gene_id"].isin(kinases["gene_id"]),
|
|
192
|
+
"target_class"] = "kinase"
|
|
193
|
+
all_targets.loc[
|
|
194
|
+
all_targets["gene_id"].isin(proteases["gene_id"]),
|
|
195
|
+
"target_class"] = "protease"
|
|
196
|
+
all_targets.loc[
|
|
197
|
+
all_targets["gene_id"].isin(transporters["gene_id"]),
|
|
198
|
+
"target_class"] = "transporter"
|
|
199
|
+
|
|
200
|
+
print(f"Drug target screen: {len(all_targets)} candidates "
|
|
201
|
+
f"(kinases={len(kinases)}, proteases={len(proteases)}, "
|
|
202
|
+
f"transporters={len(transporters)})")
|
|
203
|
+
return all_targets
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## 4. 寄生虫ゲノミクス統合パイプライン
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
def parasite_genomics_pipeline(organism, query,
|
|
210
|
+
db="plasmo",
|
|
211
|
+
output_dir="results"):
|
|
212
|
+
"""
|
|
213
|
+
寄生虫ゲノミクス統合パイプライン。
|
|
214
|
+
|
|
215
|
+
Parameters:
|
|
216
|
+
organism: str — 生物種 (例: "Plasmodium falciparum 3D7")
|
|
217
|
+
query: str — 検索クエリ
|
|
218
|
+
db: str — データベース
|
|
219
|
+
output_dir: str — 出力ディレクトリ
|
|
220
|
+
"""
|
|
221
|
+
from pathlib import Path
|
|
222
|
+
output_dir = Path(output_dir)
|
|
223
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
224
|
+
|
|
225
|
+
# 1) 遺伝子検索
|
|
226
|
+
genes = veupathdb_search_genes(organism, query, db=db)
|
|
227
|
+
genes.to_csv(output_dir / "genes.csv", index=False)
|
|
228
|
+
|
|
229
|
+
# 2) トップ遺伝子のアノテーション
|
|
230
|
+
annotations = []
|
|
231
|
+
for gene_id in genes["gene_id"].head(10):
|
|
232
|
+
try:
|
|
233
|
+
ann = veupathdb_gene_annotation(gene_id, db=db)
|
|
234
|
+
annotations.append(ann)
|
|
235
|
+
except Exception:
|
|
236
|
+
continue
|
|
237
|
+
ann_df = pd.DataFrame([{
|
|
238
|
+
k: v for k, v in a.items()
|
|
239
|
+
if not isinstance(v, list)
|
|
240
|
+
} for a in annotations])
|
|
241
|
+
ann_df.to_csv(output_dir / "annotations.csv", index=False)
|
|
242
|
+
|
|
243
|
+
# 3) 薬剤標的スクリーニング
|
|
244
|
+
targets = parasite_drug_target_screen(organism, db=db)
|
|
245
|
+
targets.to_csv(output_dir / "drug_targets.csv", index=False)
|
|
246
|
+
|
|
247
|
+
print(f"Parasite genomics pipeline: {output_dir}")
|
|
248
|
+
return {
|
|
249
|
+
"genes": genes,
|
|
250
|
+
"annotations": annotations,
|
|
251
|
+
"drug_targets": targets,
|
|
252
|
+
}
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## ToolUniverse 連携
|
|
258
|
+
|
|
259
|
+
直接 REST API 使用 (VEuPathDB は ToolUniverse 外)。
|
|
260
|
+
|
|
261
|
+
## パイプライン統合
|
|
262
|
+
|
|
263
|
+
```
|
|
264
|
+
infectious-disease → parasite-genomics → phylogenetics
|
|
265
|
+
(病原体情報) (寄生虫ゲノム) (系統解析)
|
|
266
|
+
│ │ ↓
|
|
267
|
+
drug-discovery ─────────┘ comparative-genomics
|
|
268
|
+
(薬剤探索) │ (比較ゲノミクス)
|
|
269
|
+
↓
|
|
270
|
+
pathway-enrichment
|
|
271
|
+
(パスウェイ解析)
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## パイプライン出力
|
|
275
|
+
|
|
276
|
+
| ファイル | 説明 | 次スキル |
|
|
277
|
+
|---------|------|---------|
|
|
278
|
+
| `results/genes.csv` | 遺伝子一覧 | → phylogenetics |
|
|
279
|
+
| `results/annotations.csv` | 機能アノテーション | → pathway-enrichment |
|
|
280
|
+
| `results/drug_targets.csv` | 薬剤標的候補 | → drug-discovery |
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-plant-biology
|
|
3
|
+
description: |
|
|
4
|
+
植物バイオロジー統合スキル。Plant Reactome 代謝パスウェイ・
|
|
5
|
+
TAIR Arabidopsis ゲノム情報・Phytozome 比較ゲノミクス・
|
|
6
|
+
Ensembl Plants 種間オーソログ解析。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Plant Biology
|
|
10
|
+
|
|
11
|
+
Plant Reactome / TAIR / Phytozome / Ensembl Plants を活用した
|
|
12
|
+
植物ゲノム・代謝パスウェイ統合解析パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- 植物代謝パスウェイ解析 (Plant Reactome) を実行するとき
|
|
17
|
+
- Arabidopsis thaliana の遺伝子・タンパク質情報を取得するとき
|
|
18
|
+
- 植物種間の比較ゲノミクス解析を行うとき
|
|
19
|
+
- 植物オーソログ・パラログを同定するとき
|
|
20
|
+
- 作物改良のための候補遺伝子を探索するとき
|
|
21
|
+
- 植物表現型データと遺伝子型を統合するとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. Plant Reactome パスウェイ検索
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import requests
|
|
31
|
+
import pandas as pd
|
|
32
|
+
import json
|
|
33
|
+
|
|
34
|
+
PLANT_REACTOME = "https://plantreactome.gramene.org/ContentService"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def plant_reactome_search(query, species="Oryza sativa"):
|
|
38
|
+
"""
|
|
39
|
+
Plant Reactome — 植物代謝/シグナルパスウェイ検索。
|
|
40
|
+
|
|
41
|
+
Parameters:
|
|
42
|
+
query: str — 検索クエリ (例: "photosynthesis")
|
|
43
|
+
species: str — 種名
|
|
44
|
+
"""
|
|
45
|
+
url = f"{PLANT_REACTOME}/search/query"
|
|
46
|
+
params = {"query": query, "species": species, "cluster": True}
|
|
47
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
48
|
+
resp.raise_for_status()
|
|
49
|
+
data = resp.json()
|
|
50
|
+
|
|
51
|
+
results = []
|
|
52
|
+
for group in data.get("results", []):
|
|
53
|
+
for entry in group.get("entries", []):
|
|
54
|
+
results.append({
|
|
55
|
+
"stId": entry.get("stId", ""),
|
|
56
|
+
"name": entry.get("name", ""),
|
|
57
|
+
"species": entry.get("species", ""),
|
|
58
|
+
"type": entry.get("exactType", ""),
|
|
59
|
+
"compartment": entry.get("compartmentNames", []),
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
df = pd.DataFrame(results)
|
|
63
|
+
print(f"Plant Reactome: '{query}' → {len(df)} entries ({species})")
|
|
64
|
+
return df
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def plant_reactome_pathway_detail(pathway_id):
|
|
68
|
+
"""
|
|
69
|
+
Plant Reactome パスウェイ詳細取得。
|
|
70
|
+
|
|
71
|
+
Parameters:
|
|
72
|
+
pathway_id: str — パスウェイ ID (例: "R-OSA-1119616")
|
|
73
|
+
"""
|
|
74
|
+
url = f"{PLANT_REACTOME}/data/pathway/{pathway_id}/containedEvents"
|
|
75
|
+
resp = requests.get(url, timeout=30)
|
|
76
|
+
resp.raise_for_status()
|
|
77
|
+
events = resp.json()
|
|
78
|
+
|
|
79
|
+
steps = []
|
|
80
|
+
for event in events:
|
|
81
|
+
steps.append({
|
|
82
|
+
"stId": event.get("stId", ""),
|
|
83
|
+
"name": event.get("displayName", ""),
|
|
84
|
+
"type": event.get("className", ""),
|
|
85
|
+
"input_count": len(event.get("input", [])),
|
|
86
|
+
"output_count": len(event.get("output", [])),
|
|
87
|
+
"catalyst": event.get("catalystActivity", [{}])[0].get(
|
|
88
|
+
"displayName", "") if event.get("catalystActivity") else "",
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
df = pd.DataFrame(steps)
|
|
92
|
+
print(f"Pathway {pathway_id}: {len(df)} reaction steps")
|
|
93
|
+
return df
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## 2. TAIR Arabidopsis 遺伝子情報
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
TAIR_BASE = "https://www.arabidopsis.org/api"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def tair_gene_search(gene_id=None, gene_name=None, keyword=None):
|
|
103
|
+
"""
|
|
104
|
+
TAIR — Arabidopsis thaliana 遺伝子情報取得。
|
|
105
|
+
|
|
106
|
+
Parameters:
|
|
107
|
+
gene_id: str — AGI ID (例: "AT1G01010")
|
|
108
|
+
gene_name: str — 遺伝子名 (例: "FLC")
|
|
109
|
+
keyword: str — キーワード検索
|
|
110
|
+
"""
|
|
111
|
+
if gene_id:
|
|
112
|
+
url = f"{TAIR_BASE}/gene/{gene_id}"
|
|
113
|
+
resp = requests.get(url, timeout=30)
|
|
114
|
+
resp.raise_for_status()
|
|
115
|
+
data = resp.json()
|
|
116
|
+
return pd.DataFrame([{
|
|
117
|
+
"agi_id": data.get("locus", ""),
|
|
118
|
+
"name": data.get("name", ""),
|
|
119
|
+
"description": data.get("description", ""),
|
|
120
|
+
"chromosome": data.get("chromosome", ""),
|
|
121
|
+
"start": data.get("start", ""),
|
|
122
|
+
"end": data.get("end", ""),
|
|
123
|
+
"strand": data.get("strand", ""),
|
|
124
|
+
"gene_model_type": data.get("gene_model_type", ""),
|
|
125
|
+
}])
|
|
126
|
+
|
|
127
|
+
# キーワード検索
|
|
128
|
+
search_term = gene_name or keyword or ""
|
|
129
|
+
url = f"{TAIR_BASE}/search/gene"
|
|
130
|
+
params = {"query": search_term, "limit": 50}
|
|
131
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
132
|
+
resp.raise_for_status()
|
|
133
|
+
data = resp.json()
|
|
134
|
+
|
|
135
|
+
results = []
|
|
136
|
+
for gene in data.get("results", []):
|
|
137
|
+
results.append({
|
|
138
|
+
"agi_id": gene.get("locus", ""),
|
|
139
|
+
"name": gene.get("name", ""),
|
|
140
|
+
"description": gene.get("description", ""),
|
|
141
|
+
"chromosome": gene.get("chromosome", ""),
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
df = pd.DataFrame(results)
|
|
145
|
+
print(f"TAIR: '{search_term}' → {len(df)} genes")
|
|
146
|
+
return df
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def tair_gene_expression(gene_id):
|
|
150
|
+
"""
|
|
151
|
+
TAIR — 遺伝子発現パターン取得。
|
|
152
|
+
|
|
153
|
+
Parameters:
|
|
154
|
+
gene_id: str — AGI ID
|
|
155
|
+
"""
|
|
156
|
+
url = f"{TAIR_BASE}/gene/{gene_id}/expression"
|
|
157
|
+
resp = requests.get(url, timeout=30)
|
|
158
|
+
resp.raise_for_status()
|
|
159
|
+
data = resp.json()
|
|
160
|
+
|
|
161
|
+
tissues = []
|
|
162
|
+
for expr in data.get("expression", []):
|
|
163
|
+
tissues.append({
|
|
164
|
+
"tissue": expr.get("tissue", ""),
|
|
165
|
+
"stage": expr.get("developmental_stage", ""),
|
|
166
|
+
"level": expr.get("expression_level", ""),
|
|
167
|
+
"source": expr.get("source", ""),
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
df = pd.DataFrame(tissues)
|
|
171
|
+
print(f"TAIR expression: {gene_id} → {len(df)} tissue records")
|
|
172
|
+
return df
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## 3. Ensembl Plants 種間比較
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
ENSEMBL_PLANTS = "https://rest.ensembl.org"
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def ensembl_plants_orthologs(gene_id, source_species="arabidopsis_thaliana",
|
|
182
|
+
target_species=None):
|
|
183
|
+
"""
|
|
184
|
+
Ensembl Plants — 植物種間オーソログ検索。
|
|
185
|
+
|
|
186
|
+
Parameters:
|
|
187
|
+
gene_id: str — Ensembl Gene ID or symbol
|
|
188
|
+
source_species: str — 起源種
|
|
189
|
+
target_species: str — ターゲット種 (None = 全種)
|
|
190
|
+
"""
|
|
191
|
+
url = f"{ENSEMBL_PLANTS}/homology/id/{gene_id}"
|
|
192
|
+
params = {
|
|
193
|
+
"type": "orthologues",
|
|
194
|
+
"content-type": "application/json",
|
|
195
|
+
"compara": "plants",
|
|
196
|
+
}
|
|
197
|
+
if target_species:
|
|
198
|
+
params["target_species"] = target_species
|
|
199
|
+
|
|
200
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
201
|
+
resp.raise_for_status()
|
|
202
|
+
data = resp.json()
|
|
203
|
+
|
|
204
|
+
orthologs = []
|
|
205
|
+
for homology in data.get("data", [{}])[0].get("homologies", []):
|
|
206
|
+
target = homology.get("target", {})
|
|
207
|
+
orthologs.append({
|
|
208
|
+
"source_gene": gene_id,
|
|
209
|
+
"source_species": source_species,
|
|
210
|
+
"target_gene": target.get("id", ""),
|
|
211
|
+
"target_species": target.get("species", ""),
|
|
212
|
+
"target_protein": target.get("protein_id", ""),
|
|
213
|
+
"identity": target.get("perc_id", 0),
|
|
214
|
+
"dn_ds": homology.get("dn_ds", None),
|
|
215
|
+
"type": homology.get("type", ""),
|
|
216
|
+
})
|
|
217
|
+
|
|
218
|
+
df = pd.DataFrame(orthologs)
|
|
219
|
+
print(f"Ensembl Plants orthologs: {gene_id} → {len(df)} homologs")
|
|
220
|
+
return df
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## 4. Phytozome 比較ゲノミクス
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
PHYTOZOME_BASE = "https://phytozome-next.jgi.doe.gov/api"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def phytozome_gene_family(gene_id, species="Athaliana"):
|
|
230
|
+
"""
|
|
231
|
+
Phytozome — 遺伝子ファミリー・比較ゲノミクス。
|
|
232
|
+
|
|
233
|
+
Parameters:
|
|
234
|
+
gene_id: str — 遺伝子 ID
|
|
235
|
+
species: str — 種略称
|
|
236
|
+
"""
|
|
237
|
+
url = f"{PHYTOZOME_BASE}/search"
|
|
238
|
+
params = {"query": gene_id, "organism": species}
|
|
239
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
240
|
+
resp.raise_for_status()
|
|
241
|
+
data = resp.json()
|
|
242
|
+
|
|
243
|
+
families = []
|
|
244
|
+
for hit in data.get("hits", []):
|
|
245
|
+
families.append({
|
|
246
|
+
"gene_id": hit.get("gene_id", ""),
|
|
247
|
+
"family_id": hit.get("family_id", ""),
|
|
248
|
+
"family_name": hit.get("family_name", ""),
|
|
249
|
+
"species": hit.get("organism", ""),
|
|
250
|
+
"annotation": hit.get("annotation", ""),
|
|
251
|
+
"pfam_domains": hit.get("pfam", []),
|
|
252
|
+
})
|
|
253
|
+
|
|
254
|
+
df = pd.DataFrame(families)
|
|
255
|
+
print(f"Phytozome: {gene_id} → {len(df)} family members")
|
|
256
|
+
return df
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
## 5. 植物バイオロジー統合パイプライン
|
|
260
|
+
|
|
261
|
+
```python
|
|
262
|
+
def plant_biology_pipeline(gene_query, species="Oryza sativa",
|
|
263
|
+
output_dir="results"):
|
|
264
|
+
"""
|
|
265
|
+
植物バイオロジー統合パイプライン。
|
|
266
|
+
|
|
267
|
+
Parameters:
|
|
268
|
+
gene_query: str — 遺伝子/パスウェイクエリ
|
|
269
|
+
species: str — 対象種
|
|
270
|
+
output_dir: str — 出力ディレクトリ
|
|
271
|
+
"""
|
|
272
|
+
from pathlib import Path
|
|
273
|
+
output_dir = Path(output_dir)
|
|
274
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
275
|
+
|
|
276
|
+
# 1) Plant Reactome パスウェイ
|
|
277
|
+
pathways = plant_reactome_search(gene_query, species=species)
|
|
278
|
+
pathways.to_csv(output_dir / "plant_pathways.csv", index=False)
|
|
279
|
+
|
|
280
|
+
# 2) TAIR (Arabidopsis ならば)
|
|
281
|
+
tair_genes = tair_gene_search(keyword=gene_query)
|
|
282
|
+
tair_genes.to_csv(output_dir / "tair_genes.csv", index=False)
|
|
283
|
+
|
|
284
|
+
# 3) Ensembl Plants オーソログ
|
|
285
|
+
if len(tair_genes) > 0:
|
|
286
|
+
top_gene = tair_genes.iloc[0]["agi_id"]
|
|
287
|
+
orthologs = ensembl_plants_orthologs(top_gene)
|
|
288
|
+
orthologs.to_csv(output_dir / "orthologs.csv", index=False)
|
|
289
|
+
else:
|
|
290
|
+
orthologs = pd.DataFrame()
|
|
291
|
+
|
|
292
|
+
print(f"Plant biology pipeline: {output_dir}")
|
|
293
|
+
return {
|
|
294
|
+
"pathways": pathways,
|
|
295
|
+
"tair_genes": tair_genes,
|
|
296
|
+
"orthologs": orthologs,
|
|
297
|
+
}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
---
|
|
301
|
+
|
|
302
|
+
## パイプライン統合
|
|
303
|
+
|
|
304
|
+
```
|
|
305
|
+
pathway-enrichment → plant-biology → environmental-ecology
|
|
306
|
+
(KEGG/Reactome) (PlantReactome) (生態学/環境)
|
|
307
|
+
│ │ ↓
|
|
308
|
+
gene-annotation ────────┘ marine-ecology
|
|
309
|
+
(GO/InterPro) │ (OBIS/WoRMS)
|
|
310
|
+
↓
|
|
311
|
+
comparative-genomics
|
|
312
|
+
(Ensembl 比較)
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
## パイプライン出力
|
|
316
|
+
|
|
317
|
+
| ファイル | 説明 | 次スキル |
|
|
318
|
+
|---------|------|---------|
|
|
319
|
+
| `results/plant_pathways.csv` | Plant Reactome パスウェイ | → pathway-enrichment |
|
|
320
|
+
| `results/tair_genes.csv` | TAIR Arabidopsis 遺伝子 | → gene-annotation |
|
|
321
|
+
| `results/orthologs.csv` | 種間オーソログ | → comparative-genomics |
|