@nahisaho/satori 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,280 @@
1
+ ---
2
+ name: scientific-parasite-genomics
3
+ description: |
4
+ 寄生虫ゲノミクススキル。PlasmoDB/VectorBase/ToxoDB REST API
5
+ による寄生虫ゲノム検索・遺伝子情報・薬剤標的同定・比較
6
+ ゲノミクス。直接 REST API 連携 (TU 外)。
7
+ tu_tools: []
8
+ ---
9
+
10
+ # Scientific Parasite Genomics
11
+
12
+ VEuPathDB ファミリー (PlasmoDB, VectorBase, ToxoDB, TriTrypDB)
13
+ の REST API を活用した寄生虫ゲノミクス解析パイプラインを提供
14
+ する。
15
+
16
+ ## When to Use
17
+
18
+ - マラリア原虫ゲノム (PlasmoDB) を検索するとき
19
+ - 蚊・ダニ等の媒介生物ゲノム (VectorBase) を検索するとき
20
+ - トキソプラズマゲノム (ToxoDB) を検索するとき
21
+ - トリパノソーマ/リーシュマニアゲノム (TriTrypDB) を検索するとき
22
+ - 寄生虫の薬剤標的候補を同定するとき
23
+ - 寄生虫間の比較ゲノミクスを実施するとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. VEuPathDB 遺伝子検索
30
+
31
+ ```python
32
+ import requests
33
+ import pandas as pd
34
+ import numpy as np
35
+
36
+ VEUPATHDB_SITES = {
37
+ "plasmo": "https://plasmodb.org/plasmo/service",
38
+ "vector": "https://vectorbase.org/vectorbase/service",
39
+ "toxo": "https://toxodb.org/toxo/service",
40
+ "tritryp": "https://tritrypdb.org/tritrypdb/service",
41
+ }
42
+
43
+
44
+ def veupathdb_search_genes(organism, query, db="plasmo",
45
+ limit=100):
46
+ """
47
+ VEuPathDB — 遺伝子検索。
48
+
49
+ Parameters:
50
+ organism: str — 生物種名 (例: "Plasmodium falciparum 3D7")
51
+ query: str — 検索キーワード (例: "kinase", "transporter")
52
+ db: str — データベース ("plasmo", "vector", "toxo", "tritryp")
53
+ limit: int — 最大結果数
54
+ """
55
+ base = VEUPATHDB_SITES.get(db, VEUPATHDB_SITES["plasmo"])
56
+ url = f"{base}/record-types/gene/searches/GenesByTextSearch"
57
+
58
+ payload = {
59
+ "searchConfig": {
60
+ "parameters": {
61
+ "text_expression": query,
62
+ "text_fields": "Gene ID,Gene Name or Symbol,"
63
+ "Gene product",
64
+ "organism": [organism],
65
+ }
66
+ },
67
+ "reportConfig": {
68
+ "attributes": ["primary_key", "gene_name",
69
+ "gene_product", "gene_type",
70
+ "chromosome", "start_min",
71
+ "end_max", "strand"],
72
+ "pagination": {"offset": 0, "numRecords": limit},
73
+ },
74
+ }
75
+ headers = {"Content-Type": "application/json"}
76
+ resp = requests.post(url, json=payload, headers=headers,
77
+ timeout=60)
78
+ resp.raise_for_status()
79
+ data = resp.json()
80
+
81
+ results = []
82
+ for rec in data.get("records", []):
83
+ attrs = rec.get("attributes", {})
84
+ results.append({
85
+ "gene_id": attrs.get("primary_key", ""),
86
+ "gene_name": attrs.get("gene_name", ""),
87
+ "product": attrs.get("gene_product", ""),
88
+ "gene_type": attrs.get("gene_type", ""),
89
+ "chromosome": attrs.get("chromosome", ""),
90
+ "start": attrs.get("start_min", None),
91
+ "end": attrs.get("end_max", None),
92
+ "strand": attrs.get("strand", ""),
93
+ })
94
+
95
+ df = pd.DataFrame(results)
96
+ print(f"VEuPathDB ({db}) genes: {len(df)} results "
97
+ f"(organism={organism}, query={query})")
98
+ return df
99
+ ```
100
+
101
+ ## 2. 遺伝子機能アノテーション
102
+
103
+ ```python
104
+ def veupathdb_gene_annotation(gene_id, db="plasmo"):
105
+ """
106
+ VEuPathDB — 遺伝子機能アノテーション取得。
107
+
108
+ Parameters:
109
+ gene_id: str — 遺伝子 ID (例: "PF3D7_1133400")
110
+ db: str — データベース
111
+ """
112
+ base = VEUPATHDB_SITES.get(db, VEUPATHDB_SITES["plasmo"])
113
+ url = f"{base}/record-types/gene/records/{gene_id}"
114
+
115
+ params = {
116
+ "attributes": "all",
117
+ "tables": "GoTerms,InterPro,MetabolicPathways,"
118
+ "PubMed,EcNumber",
119
+ }
120
+ resp = requests.get(url, params=params, timeout=30)
121
+ resp.raise_for_status()
122
+ data = resp.json()
123
+
124
+ attrs = data.get("attributes", {})
125
+ tables = data.get("tables", {})
126
+
127
+ annotation = {
128
+ "gene_id": gene_id,
129
+ "gene_name": attrs.get("gene_name", ""),
130
+ "product": attrs.get("gene_product", ""),
131
+ "molecular_weight": attrs.get("molecular_weight", ""),
132
+ "isoelectric_point": attrs.get("isoelectric_point", ""),
133
+ "signal_peptide": attrs.get("signal_peptide", ""),
134
+ "transmembrane_domains": attrs.get("transmembrane_domains", ""),
135
+ }
136
+
137
+ # GO Term 取得
138
+ go_terms = []
139
+ for go_rec in tables.get("GoTerms", []):
140
+ go_terms.append({
141
+ "go_id": go_rec.get("go_id", ""),
142
+ "go_term": go_rec.get("go_term_name", ""),
143
+ "ontology": go_rec.get("ontology", ""),
144
+ "evidence": go_rec.get("evidence_code", ""),
145
+ })
146
+ annotation["go_terms"] = go_terms
147
+
148
+ # InterPro ドメイン
149
+ domains = []
150
+ for d in tables.get("InterPro", []):
151
+ domains.append({
152
+ "interpro_id": d.get("interpro_primary_id", ""),
153
+ "name": d.get("interpro_name", ""),
154
+ "description": d.get("interpro_description", ""),
155
+ })
156
+ annotation["domains"] = domains
157
+
158
+ print(f"VEuPathDB annotation: {gene_id}, "
159
+ f"{len(go_terms)} GO terms, {len(domains)} domains")
160
+ return annotation
161
+ ```
162
+
163
+ ## 3. 薬剤標的候補スクリーニング
164
+
165
+ ```python
166
+ def parasite_drug_target_screen(organism, db="plasmo",
167
+ essentiality_threshold=0.5):
168
+ """
169
+ 寄生虫ゲノム — 薬剤標的候補スクリーニング。
170
+
171
+ Parameters:
172
+ organism: str — 生物種
173
+ db: str — データベース
174
+ essentiality_threshold: float — 必須性スコア閾値
175
+ """
176
+ # キナーゼ検索
177
+ kinases = veupathdb_search_genes(organism, "kinase", db=db)
178
+ # プロテアーゼ検索
179
+ proteases = veupathdb_search_genes(organism, "protease", db=db)
180
+ # トランスポーター検索
181
+ transporters = veupathdb_search_genes(
182
+ organism, "transporter", db=db)
183
+
184
+ all_targets = pd.concat([kinases, proteases, transporters],
185
+ ignore_index=True)
186
+ all_targets = all_targets.drop_duplicates(subset=["gene_id"])
187
+
188
+ # 薬剤標的性スコア (ヒューリスティック)
189
+ all_targets["target_class"] = "unknown"
190
+ all_targets.loc[
191
+ all_targets["gene_id"].isin(kinases["gene_id"]),
192
+ "target_class"] = "kinase"
193
+ all_targets.loc[
194
+ all_targets["gene_id"].isin(proteases["gene_id"]),
195
+ "target_class"] = "protease"
196
+ all_targets.loc[
197
+ all_targets["gene_id"].isin(transporters["gene_id"]),
198
+ "target_class"] = "transporter"
199
+
200
+ print(f"Drug target screen: {len(all_targets)} candidates "
201
+ f"(kinases={len(kinases)}, proteases={len(proteases)}, "
202
+ f"transporters={len(transporters)})")
203
+ return all_targets
204
+ ```
205
+
206
+ ## 4. 寄生虫ゲノミクス統合パイプライン
207
+
208
+ ```python
209
+ def parasite_genomics_pipeline(organism, query,
210
+ db="plasmo",
211
+ output_dir="results"):
212
+ """
213
+ 寄生虫ゲノミクス統合パイプライン。
214
+
215
+ Parameters:
216
+ organism: str — 生物種 (例: "Plasmodium falciparum 3D7")
217
+ query: str — 検索クエリ
218
+ db: str — データベース
219
+ output_dir: str — 出力ディレクトリ
220
+ """
221
+ from pathlib import Path
222
+ output_dir = Path(output_dir)
223
+ output_dir.mkdir(parents=True, exist_ok=True)
224
+
225
+ # 1) 遺伝子検索
226
+ genes = veupathdb_search_genes(organism, query, db=db)
227
+ genes.to_csv(output_dir / "genes.csv", index=False)
228
+
229
+ # 2) トップ遺伝子のアノテーション
230
+ annotations = []
231
+ for gene_id in genes["gene_id"].head(10):
232
+ try:
233
+ ann = veupathdb_gene_annotation(gene_id, db=db)
234
+ annotations.append(ann)
235
+ except Exception:
236
+ continue
237
+ ann_df = pd.DataFrame([{
238
+ k: v for k, v in a.items()
239
+ if not isinstance(v, list)
240
+ } for a in annotations])
241
+ ann_df.to_csv(output_dir / "annotations.csv", index=False)
242
+
243
+ # 3) 薬剤標的スクリーニング
244
+ targets = parasite_drug_target_screen(organism, db=db)
245
+ targets.to_csv(output_dir / "drug_targets.csv", index=False)
246
+
247
+ print(f"Parasite genomics pipeline: {output_dir}")
248
+ return {
249
+ "genes": genes,
250
+ "annotations": annotations,
251
+ "drug_targets": targets,
252
+ }
253
+ ```
254
+
255
+ ---
256
+
257
+ ## ToolUniverse 連携
258
+
259
+ 直接 REST API 使用 (VEuPathDB は ToolUniverse 外)。
260
+
261
+ ## パイプライン統合
262
+
263
+ ```
264
+ infectious-disease → parasite-genomics → phylogenetics
265
+ (病原体情報) (寄生虫ゲノム) (系統解析)
266
+ │ │ ↓
267
+ drug-discovery ─────────┘ comparative-genomics
268
+ (薬剤探索) │ (比較ゲノミクス)
269
+
270
+ pathway-enrichment
271
+ (パスウェイ解析)
272
+ ```
273
+
274
+ ## パイプライン出力
275
+
276
+ | ファイル | 説明 | 次スキル |
277
+ |---------|------|---------|
278
+ | `results/genes.csv` | 遺伝子一覧 | → phylogenetics |
279
+ | `results/annotations.csv` | 機能アノテーション | → pathway-enrichment |
280
+ | `results/drug_targets.csv` | 薬剤標的候補 | → drug-discovery |
@@ -0,0 +1,321 @@
1
+ ---
2
+ name: scientific-plant-biology
3
+ description: |
4
+ 植物バイオロジー統合スキル。Plant Reactome 代謝パスウェイ・
5
+ TAIR Arabidopsis ゲノム情報・Phytozome 比較ゲノミクス・
6
+ Ensembl Plants 種間オーソログ解析。
7
+ ---
8
+
9
+ # Scientific Plant Biology
10
+
11
+ Plant Reactome / TAIR / Phytozome / Ensembl Plants を活用した
12
+ 植物ゲノム・代謝パスウェイ統合解析パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 植物代謝パスウェイ解析 (Plant Reactome) を実行するとき
17
+ - Arabidopsis thaliana の遺伝子・タンパク質情報を取得するとき
18
+ - 植物種間の比較ゲノミクス解析を行うとき
19
+ - 植物オーソログ・パラログを同定するとき
20
+ - 作物改良のための候補遺伝子を探索するとき
21
+ - 植物表現型データと遺伝子型を統合するとき
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ## 1. Plant Reactome パスウェイ検索
28
+
29
+ ```python
30
+ import requests
31
+ import pandas as pd
32
+ import json
33
+
34
+ PLANT_REACTOME = "https://plantreactome.gramene.org/ContentService"
35
+
36
+
37
+ def plant_reactome_search(query, species="Oryza sativa"):
38
+ """
39
+ Plant Reactome — 植物代謝/シグナルパスウェイ検索。
40
+
41
+ Parameters:
42
+ query: str — 検索クエリ (例: "photosynthesis")
43
+ species: str — 種名
44
+ """
45
+ url = f"{PLANT_REACTOME}/search/query"
46
+ params = {"query": query, "species": species, "cluster": True}
47
+ resp = requests.get(url, params=params, timeout=30)
48
+ resp.raise_for_status()
49
+ data = resp.json()
50
+
51
+ results = []
52
+ for group in data.get("results", []):
53
+ for entry in group.get("entries", []):
54
+ results.append({
55
+ "stId": entry.get("stId", ""),
56
+ "name": entry.get("name", ""),
57
+ "species": entry.get("species", ""),
58
+ "type": entry.get("exactType", ""),
59
+ "compartment": entry.get("compartmentNames", []),
60
+ })
61
+
62
+ df = pd.DataFrame(results)
63
+ print(f"Plant Reactome: '{query}' → {len(df)} entries ({species})")
64
+ return df
65
+
66
+
67
+ def plant_reactome_pathway_detail(pathway_id):
68
+ """
69
+ Plant Reactome パスウェイ詳細取得。
70
+
71
+ Parameters:
72
+ pathway_id: str — パスウェイ ID (例: "R-OSA-1119616")
73
+ """
74
+ url = f"{PLANT_REACTOME}/data/pathway/{pathway_id}/containedEvents"
75
+ resp = requests.get(url, timeout=30)
76
+ resp.raise_for_status()
77
+ events = resp.json()
78
+
79
+ steps = []
80
+ for event in events:
81
+ steps.append({
82
+ "stId": event.get("stId", ""),
83
+ "name": event.get("displayName", ""),
84
+ "type": event.get("className", ""),
85
+ "input_count": len(event.get("input", [])),
86
+ "output_count": len(event.get("output", [])),
87
+ "catalyst": event.get("catalystActivity", [{}])[0].get(
88
+ "displayName", "") if event.get("catalystActivity") else "",
89
+ })
90
+
91
+ df = pd.DataFrame(steps)
92
+ print(f"Pathway {pathway_id}: {len(df)} reaction steps")
93
+ return df
94
+ ```
95
+
96
+ ## 2. TAIR Arabidopsis 遺伝子情報
97
+
98
+ ```python
99
+ TAIR_BASE = "https://www.arabidopsis.org/api"
100
+
101
+
102
+ def tair_gene_search(gene_id=None, gene_name=None, keyword=None):
103
+ """
104
+ TAIR — Arabidopsis thaliana 遺伝子情報取得。
105
+
106
+ Parameters:
107
+ gene_id: str — AGI ID (例: "AT1G01010")
108
+ gene_name: str — 遺伝子名 (例: "FLC")
109
+ keyword: str — キーワード検索
110
+ """
111
+ if gene_id:
112
+ url = f"{TAIR_BASE}/gene/{gene_id}"
113
+ resp = requests.get(url, timeout=30)
114
+ resp.raise_for_status()
115
+ data = resp.json()
116
+ return pd.DataFrame([{
117
+ "agi_id": data.get("locus", ""),
118
+ "name": data.get("name", ""),
119
+ "description": data.get("description", ""),
120
+ "chromosome": data.get("chromosome", ""),
121
+ "start": data.get("start", ""),
122
+ "end": data.get("end", ""),
123
+ "strand": data.get("strand", ""),
124
+ "gene_model_type": data.get("gene_model_type", ""),
125
+ }])
126
+
127
+ # キーワード検索
128
+ search_term = gene_name or keyword or ""
129
+ url = f"{TAIR_BASE}/search/gene"
130
+ params = {"query": search_term, "limit": 50}
131
+ resp = requests.get(url, params=params, timeout=30)
132
+ resp.raise_for_status()
133
+ data = resp.json()
134
+
135
+ results = []
136
+ for gene in data.get("results", []):
137
+ results.append({
138
+ "agi_id": gene.get("locus", ""),
139
+ "name": gene.get("name", ""),
140
+ "description": gene.get("description", ""),
141
+ "chromosome": gene.get("chromosome", ""),
142
+ })
143
+
144
+ df = pd.DataFrame(results)
145
+ print(f"TAIR: '{search_term}' → {len(df)} genes")
146
+ return df
147
+
148
+
149
+ def tair_gene_expression(gene_id):
150
+ """
151
+ TAIR — 遺伝子発現パターン取得。
152
+
153
+ Parameters:
154
+ gene_id: str — AGI ID
155
+ """
156
+ url = f"{TAIR_BASE}/gene/{gene_id}/expression"
157
+ resp = requests.get(url, timeout=30)
158
+ resp.raise_for_status()
159
+ data = resp.json()
160
+
161
+ tissues = []
162
+ for expr in data.get("expression", []):
163
+ tissues.append({
164
+ "tissue": expr.get("tissue", ""),
165
+ "stage": expr.get("developmental_stage", ""),
166
+ "level": expr.get("expression_level", ""),
167
+ "source": expr.get("source", ""),
168
+ })
169
+
170
+ df = pd.DataFrame(tissues)
171
+ print(f"TAIR expression: {gene_id} → {len(df)} tissue records")
172
+ return df
173
+ ```
174
+
175
+ ## 3. Ensembl Plants 種間比較
176
+
177
+ ```python
178
+ ENSEMBL_PLANTS = "https://rest.ensembl.org"
179
+
180
+
181
+ def ensembl_plants_orthologs(gene_id, source_species="arabidopsis_thaliana",
182
+ target_species=None):
183
+ """
184
+ Ensembl Plants — 植物種間オーソログ検索。
185
+
186
+ Parameters:
187
+ gene_id: str — Ensembl Gene ID or symbol
188
+ source_species: str — 起源種
189
+ target_species: str — ターゲット種 (None = 全種)
190
+ """
191
+ url = f"{ENSEMBL_PLANTS}/homology/id/{gene_id}"
192
+ params = {
193
+ "type": "orthologues",
194
+ "content-type": "application/json",
195
+ "compara": "plants",
196
+ }
197
+ if target_species:
198
+ params["target_species"] = target_species
199
+
200
+ resp = requests.get(url, params=params, timeout=30)
201
+ resp.raise_for_status()
202
+ data = resp.json()
203
+
204
+ orthologs = []
205
+ for homology in data.get("data", [{}])[0].get("homologies", []):
206
+ target = homology.get("target", {})
207
+ orthologs.append({
208
+ "source_gene": gene_id,
209
+ "source_species": source_species,
210
+ "target_gene": target.get("id", ""),
211
+ "target_species": target.get("species", ""),
212
+ "target_protein": target.get("protein_id", ""),
213
+ "identity": target.get("perc_id", 0),
214
+ "dn_ds": homology.get("dn_ds", None),
215
+ "type": homology.get("type", ""),
216
+ })
217
+
218
+ df = pd.DataFrame(orthologs)
219
+ print(f"Ensembl Plants orthologs: {gene_id} → {len(df)} homologs")
220
+ return df
221
+ ```
222
+
223
+ ## 4. Phytozome 比較ゲノミクス
224
+
225
+ ```python
226
+ PHYTOZOME_BASE = "https://phytozome-next.jgi.doe.gov/api"
227
+
228
+
229
+ def phytozome_gene_family(gene_id, species="Athaliana"):
230
+ """
231
+ Phytozome — 遺伝子ファミリー・比較ゲノミクス。
232
+
233
+ Parameters:
234
+ gene_id: str — 遺伝子 ID
235
+ species: str — 種略称
236
+ """
237
+ url = f"{PHYTOZOME_BASE}/search"
238
+ params = {"query": gene_id, "organism": species}
239
+ resp = requests.get(url, params=params, timeout=30)
240
+ resp.raise_for_status()
241
+ data = resp.json()
242
+
243
+ families = []
244
+ for hit in data.get("hits", []):
245
+ families.append({
246
+ "gene_id": hit.get("gene_id", ""),
247
+ "family_id": hit.get("family_id", ""),
248
+ "family_name": hit.get("family_name", ""),
249
+ "species": hit.get("organism", ""),
250
+ "annotation": hit.get("annotation", ""),
251
+ "pfam_domains": hit.get("pfam", []),
252
+ })
253
+
254
+ df = pd.DataFrame(families)
255
+ print(f"Phytozome: {gene_id} → {len(df)} family members")
256
+ return df
257
+ ```
258
+
259
+ ## 5. 植物バイオロジー統合パイプライン
260
+
261
+ ```python
262
+ def plant_biology_pipeline(gene_query, species="Oryza sativa",
263
+ output_dir="results"):
264
+ """
265
+ 植物バイオロジー統合パイプライン。
266
+
267
+ Parameters:
268
+ gene_query: str — 遺伝子/パスウェイクエリ
269
+ species: str — 対象種
270
+ output_dir: str — 出力ディレクトリ
271
+ """
272
+ from pathlib import Path
273
+ output_dir = Path(output_dir)
274
+ output_dir.mkdir(parents=True, exist_ok=True)
275
+
276
+ # 1) Plant Reactome パスウェイ
277
+ pathways = plant_reactome_search(gene_query, species=species)
278
+ pathways.to_csv(output_dir / "plant_pathways.csv", index=False)
279
+
280
+ # 2) TAIR (Arabidopsis ならば)
281
+ tair_genes = tair_gene_search(keyword=gene_query)
282
+ tair_genes.to_csv(output_dir / "tair_genes.csv", index=False)
283
+
284
+ # 3) Ensembl Plants オーソログ
285
+ if len(tair_genes) > 0:
286
+ top_gene = tair_genes.iloc[0]["agi_id"]
287
+ orthologs = ensembl_plants_orthologs(top_gene)
288
+ orthologs.to_csv(output_dir / "orthologs.csv", index=False)
289
+ else:
290
+ orthologs = pd.DataFrame()
291
+
292
+ print(f"Plant biology pipeline: {output_dir}")
293
+ return {
294
+ "pathways": pathways,
295
+ "tair_genes": tair_genes,
296
+ "orthologs": orthologs,
297
+ }
298
+ ```
299
+
300
+ ---
301
+
302
+ ## パイプライン統合
303
+
304
+ ```
305
+ pathway-enrichment → plant-biology → environmental-ecology
306
+ (KEGG/Reactome) (PlantReactome) (生態学/環境)
307
+ │ │ ↓
308
+ gene-annotation ────────┘ marine-ecology
309
+ (GO/InterPro) │ (OBIS/WoRMS)
310
+
311
+ comparative-genomics
312
+ (Ensembl 比較)
313
+ ```
314
+
315
+ ## パイプライン出力
316
+
317
+ | ファイル | 説明 | 次スキル |
318
+ |---------|------|---------|
319
+ | `results/plant_pathways.csv` | Plant Reactome パスウェイ | → pathway-enrichment |
320
+ | `results/tair_genes.csv` | TAIR Arabidopsis 遺伝子 | → gene-annotation |
321
+ | `results/orthologs.csv` | 種間オーソログ | → comparative-genomics |