@nahisaho/satori 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -41
- package/package.json +1 -1
- package/src/.github/skills/scientific-alphafold-structures/SKILL.md +256 -0
- package/src/.github/skills/scientific-arrayexpress-expression/SKILL.md +264 -0
- package/src/.github/skills/scientific-crossref-metadata/SKILL.md +313 -0
- package/src/.github/skills/scientific-encode-screen/SKILL.md +315 -0
- package/src/.github/skills/scientific-environmental-geodata/SKILL.md +255 -0
- package/src/.github/skills/scientific-geo-expression/SKILL.md +274 -0
- package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +271 -0
- package/src/.github/skills/scientific-gwas-catalog/SKILL.md +267 -0
- package/src/.github/skills/scientific-human-cell-atlas/SKILL.md +294 -0
- package/src/.github/skills/scientific-icgc-cancer-data/SKILL.md +351 -0
- package/src/.github/skills/scientific-metabolic-atlas/SKILL.md +263 -0
- package/src/.github/skills/scientific-paleobiology/SKILL.md +265 -0
- package/src/.github/skills/scientific-parasite-genomics/SKILL.md +280 -0
- package/src/.github/skills/scientific-pharmgkb-pgx/SKILL.md +306 -0
- package/src/.github/skills/scientific-semantic-scholar/SKILL.md +298 -0
- package/src/.github/skills/scientific-squidpy-advanced/SKILL.md +251 -0
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-paleobiology
|
|
3
|
+
description: |
|
|
4
|
+
古生物学データベーススキル。Paleobiology Database (PBDB) REST
|
|
5
|
+
API による化石産出記録・分類群・コレクション検索、地質年代
|
|
6
|
+
多様性曲線・古地理解析。ToolUniverse 連携: paleobiology。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: paleobiology
|
|
9
|
+
name: Paleobiology Database
|
|
10
|
+
description: PBDB 化石産出記録・分類群・コレクション検索
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific Paleobiology
|
|
14
|
+
|
|
15
|
+
Paleobiology Database (PBDB) REST API を活用した古生物学的
|
|
16
|
+
多様性解析パイプラインを提供する。
|
|
17
|
+
|
|
18
|
+
## When to Use
|
|
19
|
+
|
|
20
|
+
- 化石産出記録 (occurrence) を検索するとき
|
|
21
|
+
- 分類群 (taxa) の地質年代分布を調べるとき
|
|
22
|
+
- 化石コレクション/産地情報を検索するとき
|
|
23
|
+
- 地質年代を通じた多様性曲線を作成するとき
|
|
24
|
+
- 大量絶滅イベントのパターンを分析するとき
|
|
25
|
+
- 古地理的分布を解析するとき
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
## 1. PBDB 化石産出記録検索
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import requests
|
|
35
|
+
import pandas as pd
|
|
36
|
+
import numpy as np
|
|
37
|
+
|
|
38
|
+
PBDB_BASE = "https://paleobiodb.org/data1.2"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def pbdb_search_occurrences(taxon=None, interval=None,
|
|
42
|
+
lngmin=None, lngmax=None,
|
|
43
|
+
latmin=None, latmax=None, limit=1000):
|
|
44
|
+
"""
|
|
45
|
+
PBDB — 化石産出記録検索。
|
|
46
|
+
|
|
47
|
+
Parameters:
|
|
48
|
+
taxon: str — 分類群名 (例: "Dinosauria", "Trilobita")
|
|
49
|
+
interval: str — 地質年代区間 (例: "Cretaceous", "Permian")
|
|
50
|
+
lngmin: float — 経度最小値
|
|
51
|
+
lngmax: float — 経度最大値
|
|
52
|
+
latmin: float — 緯度最小値
|
|
53
|
+
latmax: float — 緯度最大値
|
|
54
|
+
limit: int — 最大結果数
|
|
55
|
+
"""
|
|
56
|
+
url = f"{PBDB_BASE}/occs/list.json"
|
|
57
|
+
params = {
|
|
58
|
+
"show": "coords,phylo,time",
|
|
59
|
+
"limit": limit,
|
|
60
|
+
}
|
|
61
|
+
if taxon:
|
|
62
|
+
params["base_name"] = taxon
|
|
63
|
+
if interval:
|
|
64
|
+
params["interval"] = interval
|
|
65
|
+
if all(v is not None for v in [lngmin, lngmax, latmin, latmax]):
|
|
66
|
+
params.update({
|
|
67
|
+
"lngmin": lngmin, "lngmax": lngmax,
|
|
68
|
+
"latmin": latmin, "latmax": latmax,
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
72
|
+
resp.raise_for_status()
|
|
73
|
+
records = resp.json().get("records", [])
|
|
74
|
+
|
|
75
|
+
results = []
|
|
76
|
+
for r in records:
|
|
77
|
+
results.append({
|
|
78
|
+
"occurrence_no": r.get("oid", ""),
|
|
79
|
+
"taxon_name": r.get("tna", ""),
|
|
80
|
+
"taxon_rank": r.get("rnk", ""),
|
|
81
|
+
"phylum": r.get("phl", ""),
|
|
82
|
+
"class": r.get("cll", ""),
|
|
83
|
+
"order": r.get("odl", ""),
|
|
84
|
+
"family": r.get("fml", ""),
|
|
85
|
+
"early_interval": r.get("oei", ""),
|
|
86
|
+
"late_interval": r.get("oli", ""),
|
|
87
|
+
"max_ma": r.get("eag", None),
|
|
88
|
+
"min_ma": r.get("lag", None),
|
|
89
|
+
"lng": r.get("lng", None),
|
|
90
|
+
"lat": r.get("lat", None),
|
|
91
|
+
"collection_no": r.get("cid", ""),
|
|
92
|
+
"reference_no": r.get("rid", ""),
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
df = pd.DataFrame(results)
|
|
96
|
+
print(f"PBDB occurrences: {len(df)} records "
|
|
97
|
+
f"(taxon={taxon}, interval={interval})")
|
|
98
|
+
return df
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## 2. PBDB 分類群情報検索
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
def pbdb_search_taxa(name=None, rank=None, interval=None, limit=500):
|
|
105
|
+
"""
|
|
106
|
+
PBDB — 分類群検索。
|
|
107
|
+
|
|
108
|
+
Parameters:
|
|
109
|
+
name: str — 分類群名 (例: "Dinosauria")
|
|
110
|
+
rank: str — ランク (例: "genus", "family", "order")
|
|
111
|
+
interval: str — 地質年代区間
|
|
112
|
+
limit: int — 最大結果数
|
|
113
|
+
"""
|
|
114
|
+
url = f"{PBDB_BASE}/taxa/list.json"
|
|
115
|
+
params = {
|
|
116
|
+
"show": "attr,app,size",
|
|
117
|
+
"limit": limit,
|
|
118
|
+
}
|
|
119
|
+
if name:
|
|
120
|
+
params["base_name"] = name
|
|
121
|
+
if rank:
|
|
122
|
+
params["rank"] = rank
|
|
123
|
+
if interval:
|
|
124
|
+
params["interval"] = interval
|
|
125
|
+
|
|
126
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
127
|
+
resp.raise_for_status()
|
|
128
|
+
records = resp.json().get("records", [])
|
|
129
|
+
|
|
130
|
+
results = []
|
|
131
|
+
for r in records:
|
|
132
|
+
results.append({
|
|
133
|
+
"taxon_no": r.get("oid", ""),
|
|
134
|
+
"taxon_name": r.get("nam", ""),
|
|
135
|
+
"rank": r.get("rnk", ""),
|
|
136
|
+
"parent_name": r.get("prl", ""),
|
|
137
|
+
"n_occs": r.get("noc", 0),
|
|
138
|
+
"first_appearance": r.get("fea", ""),
|
|
139
|
+
"last_appearance": r.get("lla", ""),
|
|
140
|
+
"extant": r.get("ext", ""),
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
df = pd.DataFrame(results)
|
|
144
|
+
print(f"PBDB taxa: {len(df)} records (name={name})")
|
|
145
|
+
return df
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## 3. 地質年代多様性曲線
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
def pbdb_diversity_curve(taxon, time_resolution="stage",
|
|
152
|
+
rank="genus"):
|
|
153
|
+
"""
|
|
154
|
+
PBDB — 地質年代多様性曲線生成。
|
|
155
|
+
|
|
156
|
+
Parameters:
|
|
157
|
+
taxon: str — 分類群名
|
|
158
|
+
time_resolution: str — "stage" or "epoch" or "period"
|
|
159
|
+
rank: str — カウントするランク ("genus", "family")
|
|
160
|
+
"""
|
|
161
|
+
url = f"{PBDB_BASE}/occs/diversity.json"
|
|
162
|
+
params = {
|
|
163
|
+
"base_name": taxon,
|
|
164
|
+
"count": rank,
|
|
165
|
+
"time_reso": time_resolution,
|
|
166
|
+
}
|
|
167
|
+
resp = requests.get(url, params=params, timeout=60)
|
|
168
|
+
resp.raise_for_status()
|
|
169
|
+
records = resp.json().get("records", [])
|
|
170
|
+
|
|
171
|
+
results = []
|
|
172
|
+
for r in records:
|
|
173
|
+
results.append({
|
|
174
|
+
"interval_name": r.get("idn", ""),
|
|
175
|
+
"max_ma": r.get("eag", None),
|
|
176
|
+
"min_ma": r.get("lag", None),
|
|
177
|
+
"mid_ma": (float(r.get("eag", 0)) +
|
|
178
|
+
float(r.get("lag", 0))) / 2,
|
|
179
|
+
"sampled_in_bin": r.get("dsb", 0),
|
|
180
|
+
"n_originations": r.get("dor", 0),
|
|
181
|
+
"n_extinctions": r.get("dex", 0),
|
|
182
|
+
"range_through": r.get("drt", 0),
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
df = pd.DataFrame(results)
|
|
186
|
+
print(f"PBDB diversity: {len(df)} intervals, "
|
|
187
|
+
f"max diversity={df['sampled_in_bin'].max()} {rank}")
|
|
188
|
+
return df
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## 4. 古生物学統合パイプライン
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
def paleobiology_pipeline(taxon, interval=None,
|
|
195
|
+
output_dir="results"):
|
|
196
|
+
"""
|
|
197
|
+
古生物学統合パイプライン。
|
|
198
|
+
|
|
199
|
+
Parameters:
|
|
200
|
+
taxon: str — 分類群名 (例: "Dinosauria")
|
|
201
|
+
interval: str — 地質年代区間 (オプション)
|
|
202
|
+
output_dir: str — 出力ディレクトリ
|
|
203
|
+
"""
|
|
204
|
+
from pathlib import Path
|
|
205
|
+
output_dir = Path(output_dir)
|
|
206
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
207
|
+
|
|
208
|
+
# 1) 産出記録
|
|
209
|
+
occ = pbdb_search_occurrences(taxon=taxon, interval=interval)
|
|
210
|
+
occ.to_csv(output_dir / "occurrences.csv", index=False)
|
|
211
|
+
|
|
212
|
+
# 2) 分類群情報
|
|
213
|
+
taxa = pbdb_search_taxa(name=taxon)
|
|
214
|
+
taxa.to_csv(output_dir / "taxa.csv", index=False)
|
|
215
|
+
|
|
216
|
+
# 3) 多様性曲線
|
|
217
|
+
diversity = pbdb_diversity_curve(taxon)
|
|
218
|
+
diversity.to_csv(output_dir / "diversity.csv", index=False)
|
|
219
|
+
|
|
220
|
+
# 4) 地理的サマリ
|
|
221
|
+
if "lat" in occ.columns and "lng" in occ.columns:
|
|
222
|
+
geo_summary = occ.groupby("early_interval").agg(
|
|
223
|
+
n_records=("occurrence_no", "count"),
|
|
224
|
+
mean_lat=("lat", "mean"),
|
|
225
|
+
mean_lng=("lng", "mean"),
|
|
226
|
+
).reset_index()
|
|
227
|
+
geo_summary.to_csv(output_dir / "geo_summary.csv", index=False)
|
|
228
|
+
|
|
229
|
+
print(f"Paleobiology pipeline: {output_dir}")
|
|
230
|
+
return {
|
|
231
|
+
"occurrences": occ,
|
|
232
|
+
"taxa": taxa,
|
|
233
|
+
"diversity": diversity,
|
|
234
|
+
}
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## ToolUniverse 連携
|
|
240
|
+
|
|
241
|
+
| TU Key | ツール名 | 連携内容 |
|
|
242
|
+
|--------|---------|---------|
|
|
243
|
+
| `paleobiology` | Paleobiology Database | 化石産出・分類群・コレクション検索 |
|
|
244
|
+
|
|
245
|
+
## パイプライン統合
|
|
246
|
+
|
|
247
|
+
```
|
|
248
|
+
phylogenetics → paleobiology → environmental-ecology
|
|
249
|
+
(系統解析) (化石記録) (GBIF/生態)
|
|
250
|
+
│ │ ↓
|
|
251
|
+
taxonomy ─────────┘ environmental-geodata
|
|
252
|
+
(分類体系) │ (環境モデリング)
|
|
253
|
+
↓
|
|
254
|
+
macroevolution
|
|
255
|
+
(大進化パターン)
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## パイプライン出力
|
|
259
|
+
|
|
260
|
+
| ファイル | 説明 | 次スキル |
|
|
261
|
+
|---------|------|---------|
|
|
262
|
+
| `results/occurrences.csv` | 化石産出記録 | → environmental-ecology |
|
|
263
|
+
| `results/taxa.csv` | 分類群情報 | → phylogenetics |
|
|
264
|
+
| `results/diversity.csv` | 多様性曲線 | → macroevolution |
|
|
265
|
+
| `results/geo_summary.csv` | 古地理サマリ | → environmental-geodata |
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-parasite-genomics
|
|
3
|
+
description: |
|
|
4
|
+
寄生虫ゲノミクススキル。PlasmoDB/VectorBase/ToxoDB REST API
|
|
5
|
+
による寄生虫ゲノム検索・遺伝子情報・薬剤標的同定・比較
|
|
6
|
+
ゲノミクス。直接 REST API 連携 (TU 外)。
|
|
7
|
+
tu_tools: []
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific Parasite Genomics
|
|
11
|
+
|
|
12
|
+
VEuPathDB ファミリー (PlasmoDB, VectorBase, ToxoDB, TriTrypDB)
|
|
13
|
+
の REST API を活用した寄生虫ゲノミクス解析パイプラインを提供
|
|
14
|
+
する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- マラリア原虫ゲノム (PlasmoDB) を検索するとき
|
|
19
|
+
- 蚊・ダニ等の媒介生物ゲノム (VectorBase) を検索するとき
|
|
20
|
+
- トキソプラズマゲノム (ToxoDB) を検索するとき
|
|
21
|
+
- トリパノソーマ/リーシュマニアゲノム (TriTrypDB) を検索するとき
|
|
22
|
+
- 寄生虫の薬剤標的候補を同定するとき
|
|
23
|
+
- 寄生虫間の比較ゲノミクスを実施するとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. VEuPathDB 遺伝子検索
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import requests
|
|
33
|
+
import pandas as pd
|
|
34
|
+
import numpy as np
|
|
35
|
+
|
|
36
|
+
VEUPATHDB_SITES = {
|
|
37
|
+
"plasmo": "https://plasmodb.org/plasmo/service",
|
|
38
|
+
"vector": "https://vectorbase.org/vectorbase/service",
|
|
39
|
+
"toxo": "https://toxodb.org/toxo/service",
|
|
40
|
+
"tritryp": "https://tritrypdb.org/tritrypdb/service",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def veupathdb_search_genes(organism, query, db="plasmo",
|
|
45
|
+
limit=100):
|
|
46
|
+
"""
|
|
47
|
+
VEuPathDB — 遺伝子検索。
|
|
48
|
+
|
|
49
|
+
Parameters:
|
|
50
|
+
organism: str — 生物種名 (例: "Plasmodium falciparum 3D7")
|
|
51
|
+
query: str — 検索キーワード (例: "kinase", "transporter")
|
|
52
|
+
db: str — データベース ("plasmo", "vector", "toxo", "tritryp")
|
|
53
|
+
limit: int — 最大結果数
|
|
54
|
+
"""
|
|
55
|
+
base = VEUPATHDB_SITES.get(db, VEUPATHDB_SITES["plasmo"])
|
|
56
|
+
url = f"{base}/record-types/gene/searches/GenesByTextSearch"
|
|
57
|
+
|
|
58
|
+
payload = {
|
|
59
|
+
"searchConfig": {
|
|
60
|
+
"parameters": {
|
|
61
|
+
"text_expression": query,
|
|
62
|
+
"text_fields": "Gene ID,Gene Name or Symbol,"
|
|
63
|
+
"Gene product",
|
|
64
|
+
"organism": [organism],
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"reportConfig": {
|
|
68
|
+
"attributes": ["primary_key", "gene_name",
|
|
69
|
+
"gene_product", "gene_type",
|
|
70
|
+
"chromosome", "start_min",
|
|
71
|
+
"end_max", "strand"],
|
|
72
|
+
"pagination": {"offset": 0, "numRecords": limit},
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
headers = {"Content-Type": "application/json"}
|
|
76
|
+
resp = requests.post(url, json=payload, headers=headers,
|
|
77
|
+
timeout=60)
|
|
78
|
+
resp.raise_for_status()
|
|
79
|
+
data = resp.json()
|
|
80
|
+
|
|
81
|
+
results = []
|
|
82
|
+
for rec in data.get("records", []):
|
|
83
|
+
attrs = rec.get("attributes", {})
|
|
84
|
+
results.append({
|
|
85
|
+
"gene_id": attrs.get("primary_key", ""),
|
|
86
|
+
"gene_name": attrs.get("gene_name", ""),
|
|
87
|
+
"product": attrs.get("gene_product", ""),
|
|
88
|
+
"gene_type": attrs.get("gene_type", ""),
|
|
89
|
+
"chromosome": attrs.get("chromosome", ""),
|
|
90
|
+
"start": attrs.get("start_min", None),
|
|
91
|
+
"end": attrs.get("end_max", None),
|
|
92
|
+
"strand": attrs.get("strand", ""),
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
df = pd.DataFrame(results)
|
|
96
|
+
print(f"VEuPathDB ({db}) genes: {len(df)} results "
|
|
97
|
+
f"(organism={organism}, query={query})")
|
|
98
|
+
return df
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## 2. 遺伝子機能アノテーション
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
def veupathdb_gene_annotation(gene_id, db="plasmo"):
|
|
105
|
+
"""
|
|
106
|
+
VEuPathDB — 遺伝子機能アノテーション取得。
|
|
107
|
+
|
|
108
|
+
Parameters:
|
|
109
|
+
gene_id: str — 遺伝子 ID (例: "PF3D7_1133400")
|
|
110
|
+
db: str — データベース
|
|
111
|
+
"""
|
|
112
|
+
base = VEUPATHDB_SITES.get(db, VEUPATHDB_SITES["plasmo"])
|
|
113
|
+
url = f"{base}/record-types/gene/records/{gene_id}"
|
|
114
|
+
|
|
115
|
+
params = {
|
|
116
|
+
"attributes": "all",
|
|
117
|
+
"tables": "GoTerms,InterPro,MetabolicPathways,"
|
|
118
|
+
"PubMed,EcNumber",
|
|
119
|
+
}
|
|
120
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
121
|
+
resp.raise_for_status()
|
|
122
|
+
data = resp.json()
|
|
123
|
+
|
|
124
|
+
attrs = data.get("attributes", {})
|
|
125
|
+
tables = data.get("tables", {})
|
|
126
|
+
|
|
127
|
+
annotation = {
|
|
128
|
+
"gene_id": gene_id,
|
|
129
|
+
"gene_name": attrs.get("gene_name", ""),
|
|
130
|
+
"product": attrs.get("gene_product", ""),
|
|
131
|
+
"molecular_weight": attrs.get("molecular_weight", ""),
|
|
132
|
+
"isoelectric_point": attrs.get("isoelectric_point", ""),
|
|
133
|
+
"signal_peptide": attrs.get("signal_peptide", ""),
|
|
134
|
+
"transmembrane_domains": attrs.get("transmembrane_domains", ""),
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
# GO Term 取得
|
|
138
|
+
go_terms = []
|
|
139
|
+
for go_rec in tables.get("GoTerms", []):
|
|
140
|
+
go_terms.append({
|
|
141
|
+
"go_id": go_rec.get("go_id", ""),
|
|
142
|
+
"go_term": go_rec.get("go_term_name", ""),
|
|
143
|
+
"ontology": go_rec.get("ontology", ""),
|
|
144
|
+
"evidence": go_rec.get("evidence_code", ""),
|
|
145
|
+
})
|
|
146
|
+
annotation["go_terms"] = go_terms
|
|
147
|
+
|
|
148
|
+
# InterPro ドメイン
|
|
149
|
+
domains = []
|
|
150
|
+
for d in tables.get("InterPro", []):
|
|
151
|
+
domains.append({
|
|
152
|
+
"interpro_id": d.get("interpro_primary_id", ""),
|
|
153
|
+
"name": d.get("interpro_name", ""),
|
|
154
|
+
"description": d.get("interpro_description", ""),
|
|
155
|
+
})
|
|
156
|
+
annotation["domains"] = domains
|
|
157
|
+
|
|
158
|
+
print(f"VEuPathDB annotation: {gene_id}, "
|
|
159
|
+
f"{len(go_terms)} GO terms, {len(domains)} domains")
|
|
160
|
+
return annotation
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## 3. 薬剤標的候補スクリーニング
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
def parasite_drug_target_screen(organism, db="plasmo",
|
|
167
|
+
essentiality_threshold=0.5):
|
|
168
|
+
"""
|
|
169
|
+
寄生虫ゲノム — 薬剤標的候補スクリーニング。
|
|
170
|
+
|
|
171
|
+
Parameters:
|
|
172
|
+
organism: str — 生物種
|
|
173
|
+
db: str — データベース
|
|
174
|
+
essentiality_threshold: float — 必須性スコア閾値
|
|
175
|
+
"""
|
|
176
|
+
# キナーゼ検索
|
|
177
|
+
kinases = veupathdb_search_genes(organism, "kinase", db=db)
|
|
178
|
+
# プロテアーゼ検索
|
|
179
|
+
proteases = veupathdb_search_genes(organism, "protease", db=db)
|
|
180
|
+
# トランスポーター検索
|
|
181
|
+
transporters = veupathdb_search_genes(
|
|
182
|
+
organism, "transporter", db=db)
|
|
183
|
+
|
|
184
|
+
all_targets = pd.concat([kinases, proteases, transporters],
|
|
185
|
+
ignore_index=True)
|
|
186
|
+
all_targets = all_targets.drop_duplicates(subset=["gene_id"])
|
|
187
|
+
|
|
188
|
+
# 薬剤標的性スコア (ヒューリスティック)
|
|
189
|
+
all_targets["target_class"] = "unknown"
|
|
190
|
+
all_targets.loc[
|
|
191
|
+
all_targets["gene_id"].isin(kinases["gene_id"]),
|
|
192
|
+
"target_class"] = "kinase"
|
|
193
|
+
all_targets.loc[
|
|
194
|
+
all_targets["gene_id"].isin(proteases["gene_id"]),
|
|
195
|
+
"target_class"] = "protease"
|
|
196
|
+
all_targets.loc[
|
|
197
|
+
all_targets["gene_id"].isin(transporters["gene_id"]),
|
|
198
|
+
"target_class"] = "transporter"
|
|
199
|
+
|
|
200
|
+
print(f"Drug target screen: {len(all_targets)} candidates "
|
|
201
|
+
f"(kinases={len(kinases)}, proteases={len(proteases)}, "
|
|
202
|
+
f"transporters={len(transporters)})")
|
|
203
|
+
return all_targets
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## 4. 寄生虫ゲノミクス統合パイプライン
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
def parasite_genomics_pipeline(organism, query,
|
|
210
|
+
db="plasmo",
|
|
211
|
+
output_dir="results"):
|
|
212
|
+
"""
|
|
213
|
+
寄生虫ゲノミクス統合パイプライン。
|
|
214
|
+
|
|
215
|
+
Parameters:
|
|
216
|
+
organism: str — 生物種 (例: "Plasmodium falciparum 3D7")
|
|
217
|
+
query: str — 検索クエリ
|
|
218
|
+
db: str — データベース
|
|
219
|
+
output_dir: str — 出力ディレクトリ
|
|
220
|
+
"""
|
|
221
|
+
from pathlib import Path
|
|
222
|
+
output_dir = Path(output_dir)
|
|
223
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
224
|
+
|
|
225
|
+
# 1) 遺伝子検索
|
|
226
|
+
genes = veupathdb_search_genes(organism, query, db=db)
|
|
227
|
+
genes.to_csv(output_dir / "genes.csv", index=False)
|
|
228
|
+
|
|
229
|
+
# 2) トップ遺伝子のアノテーション
|
|
230
|
+
annotations = []
|
|
231
|
+
for gene_id in genes["gene_id"].head(10):
|
|
232
|
+
try:
|
|
233
|
+
ann = veupathdb_gene_annotation(gene_id, db=db)
|
|
234
|
+
annotations.append(ann)
|
|
235
|
+
except Exception:
|
|
236
|
+
continue
|
|
237
|
+
ann_df = pd.DataFrame([{
|
|
238
|
+
k: v for k, v in a.items()
|
|
239
|
+
if not isinstance(v, list)
|
|
240
|
+
} for a in annotations])
|
|
241
|
+
ann_df.to_csv(output_dir / "annotations.csv", index=False)
|
|
242
|
+
|
|
243
|
+
# 3) 薬剤標的スクリーニング
|
|
244
|
+
targets = parasite_drug_target_screen(organism, db=db)
|
|
245
|
+
targets.to_csv(output_dir / "drug_targets.csv", index=False)
|
|
246
|
+
|
|
247
|
+
print(f"Parasite genomics pipeline: {output_dir}")
|
|
248
|
+
return {
|
|
249
|
+
"genes": genes,
|
|
250
|
+
"annotations": annotations,
|
|
251
|
+
"drug_targets": targets,
|
|
252
|
+
}
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## ToolUniverse 連携
|
|
258
|
+
|
|
259
|
+
直接 REST API 使用 (VEuPathDB は ToolUniverse 外)。
|
|
260
|
+
|
|
261
|
+
## パイプライン統合
|
|
262
|
+
|
|
263
|
+
```
|
|
264
|
+
infectious-disease → parasite-genomics → phylogenetics
|
|
265
|
+
(病原体情報) (寄生虫ゲノム) (系統解析)
|
|
266
|
+
│ │ ↓
|
|
267
|
+
drug-discovery ─────────┘ comparative-genomics
|
|
268
|
+
(薬剤探索) │ (比較ゲノミクス)
|
|
269
|
+
↓
|
|
270
|
+
pathway-enrichment
|
|
271
|
+
(パスウェイ解析)
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## パイプライン出力
|
|
275
|
+
|
|
276
|
+
| ファイル | 説明 | 次スキル |
|
|
277
|
+
|---------|------|---------|
|
|
278
|
+
| `results/genes.csv` | 遺伝子一覧 | → phylogenetics |
|
|
279
|
+
| `results/annotations.csv` | 機能アノテーション | → pathway-enrichment |
|
|
280
|
+
| `results/drug_targets.csv` | 薬剤標的候補 | → drug-discovery |
|