@nahisaho/satori 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,265 @@
1
+ ---
2
+ name: scientific-paleobiology
3
+ description: |
4
+ 古生物学データベーススキル。Paleobiology Database (PBDB) REST
5
+ API による化石産出記録・分類群・コレクション検索、地質年代
6
+ 多様性曲線・古地理解析。ToolUniverse 連携: paleobiology。
7
+ tu_tools:
8
+ - key: paleobiology
9
+ name: Paleobiology Database
10
+ description: PBDB 化石産出記録・分類群・コレクション検索
11
+ ---
12
+
13
+ # Scientific Paleobiology
14
+
15
+ Paleobiology Database (PBDB) REST API を活用した古生物学的
16
+ 多様性解析パイプラインを提供する。
17
+
18
+ ## When to Use
19
+
20
+ - 化石産出記録 (occurrence) を検索するとき
21
+ - 分類群 (taxa) の地質年代分布を調べるとき
22
+ - 化石コレクション/産地情報を検索するとき
23
+ - 地質年代を通じた多様性曲線を作成するとき
24
+ - 大量絶滅イベントのパターンを分析するとき
25
+ - 古地理的分布を解析するとき
26
+
27
+ ---
28
+
29
+ ## Quick Start
30
+
31
+ ## 1. PBDB 化石産出記録検索
32
+
33
+ ```python
34
+ import requests
35
+ import pandas as pd
36
+ import numpy as np
37
+
38
+ PBDB_BASE = "https://paleobiodb.org/data1.2"
39
+
40
+
41
+ def pbdb_search_occurrences(taxon=None, interval=None,
42
+ lngmin=None, lngmax=None,
43
+ latmin=None, latmax=None, limit=1000):
44
+ """
45
+ PBDB — 化石産出記録検索。
46
+
47
+ Parameters:
48
+ taxon: str — 分類群名 (例: "Dinosauria", "Trilobita")
49
+ interval: str — 地質年代区間 (例: "Cretaceous", "Permian")
50
+ lngmin: float — 経度最小値
51
+ lngmax: float — 経度最大値
52
+ latmin: float — 緯度最小値
53
+ latmax: float — 緯度最大値
54
+ limit: int — 最大結果数
55
+ """
56
+ url = f"{PBDB_BASE}/occs/list.json"
57
+ params = {
58
+ "show": "coords,phylo,time",
59
+ "limit": limit,
60
+ }
61
+ if taxon:
62
+ params["base_name"] = taxon
63
+ if interval:
64
+ params["interval"] = interval
65
+ if all(v is not None for v in [lngmin, lngmax, latmin, latmax]):
66
+ params.update({
67
+ "lngmin": lngmin, "lngmax": lngmax,
68
+ "latmin": latmin, "latmax": latmax,
69
+ })
70
+
71
+ resp = requests.get(url, params=params, timeout=30)
72
+ resp.raise_for_status()
73
+ records = resp.json().get("records", [])
74
+
75
+ results = []
76
+ for r in records:
77
+ results.append({
78
+ "occurrence_no": r.get("oid", ""),
79
+ "taxon_name": r.get("tna", ""),
80
+ "taxon_rank": r.get("rnk", ""),
81
+ "phylum": r.get("phl", ""),
82
+ "class": r.get("cll", ""),
83
+ "order": r.get("odl", ""),
84
+ "family": r.get("fml", ""),
85
+ "early_interval": r.get("oei", ""),
86
+ "late_interval": r.get("oli", ""),
87
+ "max_ma": r.get("eag", None),
88
+ "min_ma": r.get("lag", None),
89
+ "lng": r.get("lng", None),
90
+ "lat": r.get("lat", None),
91
+ "collection_no": r.get("cid", ""),
92
+ "reference_no": r.get("rid", ""),
93
+ })
94
+
95
+ df = pd.DataFrame(results)
96
+ print(f"PBDB occurrences: {len(df)} records "
97
+ f"(taxon={taxon}, interval={interval})")
98
+ return df
99
+ ```
100
+
101
+ ## 2. PBDB 分類群情報検索
102
+
103
+ ```python
104
+ def pbdb_search_taxa(name=None, rank=None, interval=None, limit=500):
105
+ """
106
+ PBDB — 分類群検索。
107
+
108
+ Parameters:
109
+ name: str — 分類群名 (例: "Dinosauria")
110
+ rank: str — ランク (例: "genus", "family", "order")
111
+ interval: str — 地質年代区間
112
+ limit: int — 最大結果数
113
+ """
114
+ url = f"{PBDB_BASE}/taxa/list.json"
115
+ params = {
116
+ "show": "attr,app,size",
117
+ "limit": limit,
118
+ }
119
+ if name:
120
+ params["base_name"] = name
121
+ if rank:
122
+ params["rank"] = rank
123
+ if interval:
124
+ params["interval"] = interval
125
+
126
+ resp = requests.get(url, params=params, timeout=30)
127
+ resp.raise_for_status()
128
+ records = resp.json().get("records", [])
129
+
130
+ results = []
131
+ for r in records:
132
+ results.append({
133
+ "taxon_no": r.get("oid", ""),
134
+ "taxon_name": r.get("nam", ""),
135
+ "rank": r.get("rnk", ""),
136
+ "parent_name": r.get("prl", ""),
137
+ "n_occs": r.get("noc", 0),
138
+ "first_appearance": r.get("fea", ""),
139
+ "last_appearance": r.get("lla", ""),
140
+ "extant": r.get("ext", ""),
141
+ })
142
+
143
+ df = pd.DataFrame(results)
144
+ print(f"PBDB taxa: {len(df)} records (name={name})")
145
+ return df
146
+ ```
147
+
148
+ ## 3. 地質年代多様性曲線
149
+
150
+ ```python
151
+ def pbdb_diversity_curve(taxon, time_resolution="stage",
152
+ rank="genus"):
153
+ """
154
+ PBDB — 地質年代多様性曲線生成。
155
+
156
+ Parameters:
157
+ taxon: str — 分類群名
158
+ time_resolution: str — "stage" or "epoch" or "period"
159
+ rank: str — カウントするランク ("genus", "family")
160
+ """
161
+ url = f"{PBDB_BASE}/occs/diversity.json"
162
+ params = {
163
+ "base_name": taxon,
164
+ "count": rank,
165
+ "time_reso": time_resolution,
166
+ }
167
+ resp = requests.get(url, params=params, timeout=60)
168
+ resp.raise_for_status()
169
+ records = resp.json().get("records", [])
170
+
171
+ results = []
172
+ for r in records:
173
+ results.append({
174
+ "interval_name": r.get("idn", ""),
175
+ "max_ma": r.get("eag", None),
176
+ "min_ma": r.get("lag", None),
177
+ "mid_ma": (float(r.get("eag", 0)) +
178
+ float(r.get("lag", 0))) / 2,
179
+ "sampled_in_bin": r.get("dsb", 0),
180
+ "n_originations": r.get("dor", 0),
181
+ "n_extinctions": r.get("dex", 0),
182
+ "range_through": r.get("drt", 0),
183
+ })
184
+
185
+ df = pd.DataFrame(results)
186
+ print(f"PBDB diversity: {len(df)} intervals, "
187
+ f"max diversity={df['sampled_in_bin'].max()} {rank}")
188
+ return df
189
+ ```
190
+
191
+ ## 4. 古生物学統合パイプライン
192
+
193
+ ```python
194
+ def paleobiology_pipeline(taxon, interval=None,
195
+ output_dir="results"):
196
+ """
197
+ 古生物学統合パイプライン。
198
+
199
+ Parameters:
200
+ taxon: str — 分類群名 (例: "Dinosauria")
201
+ interval: str — 地質年代区間 (オプション)
202
+ output_dir: str — 出力ディレクトリ
203
+ """
204
+ from pathlib import Path
205
+ output_dir = Path(output_dir)
206
+ output_dir.mkdir(parents=True, exist_ok=True)
207
+
208
+ # 1) 産出記録
209
+ occ = pbdb_search_occurrences(taxon=taxon, interval=interval)
210
+ occ.to_csv(output_dir / "occurrences.csv", index=False)
211
+
212
+ # 2) 分類群情報
213
+ taxa = pbdb_search_taxa(name=taxon)
214
+ taxa.to_csv(output_dir / "taxa.csv", index=False)
215
+
216
+ # 3) 多様性曲線
217
+ diversity = pbdb_diversity_curve(taxon)
218
+ diversity.to_csv(output_dir / "diversity.csv", index=False)
219
+
220
+ # 4) 地理的サマリ
221
+ if "lat" in occ.columns and "lng" in occ.columns:
222
+ geo_summary = occ.groupby("early_interval").agg(
223
+ n_records=("occurrence_no", "count"),
224
+ mean_lat=("lat", "mean"),
225
+ mean_lng=("lng", "mean"),
226
+ ).reset_index()
227
+ geo_summary.to_csv(output_dir / "geo_summary.csv", index=False)
228
+
229
+ print(f"Paleobiology pipeline: {output_dir}")
230
+ return {
231
+ "occurrences": occ,
232
+ "taxa": taxa,
233
+ "diversity": diversity,
234
+ }
235
+ ```
236
+
237
+ ---
238
+
239
+ ## ToolUniverse 連携
240
+
241
+ | TU Key | ツール名 | 連携内容 |
242
+ |--------|---------|---------|
243
+ | `paleobiology` | Paleobiology Database | 化石産出・分類群・コレクション検索 |
244
+
245
+ ## パイプライン統合
246
+
247
+ ```
248
+ phylogenetics → paleobiology → environmental-ecology
249
+ (系統解析) (化石記録) (GBIF/生態)
250
+ │ │ ↓
251
+ taxonomy ─────────┘ environmental-geodata
252
+ (分類体系) │ (環境モデリング)
253
+
254
+ macroevolution
255
+ (大進化パターン)
256
+ ```
257
+
258
+ ## パイプライン出力
259
+
260
+ | ファイル | 説明 | 次スキル |
261
+ |---------|------|---------|
262
+ | `results/occurrences.csv` | 化石産出記録 | → environmental-ecology |
263
+ | `results/taxa.csv` | 分類群情報 | → phylogenetics |
264
+ | `results/diversity.csv` | 多様性曲線 | → macroevolution |
265
+ | `results/geo_summary.csv` | 古地理サマリ | → environmental-geodata |
@@ -0,0 +1,280 @@
1
+ ---
2
+ name: scientific-parasite-genomics
3
+ description: |
4
+ 寄生虫ゲノミクススキル。PlasmoDB/VectorBase/ToxoDB REST API
5
+ による寄生虫ゲノム検索・遺伝子情報・薬剤標的同定・比較
6
+ ゲノミクス。直接 REST API 連携 (TU 外)。
7
+ tu_tools: []
8
+ ---
9
+
10
+ # Scientific Parasite Genomics
11
+
12
+ VEuPathDB ファミリー (PlasmoDB, VectorBase, ToxoDB, TriTrypDB)
13
+ の REST API を活用した寄生虫ゲノミクス解析パイプラインを提供
14
+ する。
15
+
16
+ ## When to Use
17
+
18
+ - マラリア原虫ゲノム (PlasmoDB) を検索するとき
19
+ - 蚊・ダニ等の媒介生物ゲノム (VectorBase) を検索するとき
20
+ - トキソプラズマゲノム (ToxoDB) を検索するとき
21
+ - トリパノソーマ/リーシュマニアゲノム (TriTrypDB) を検索するとき
22
+ - 寄生虫の薬剤標的候補を同定するとき
23
+ - 寄生虫間の比較ゲノミクスを実施するとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. VEuPathDB 遺伝子検索
30
+
31
+ ```python
32
+ import requests
33
+ import pandas as pd
34
+ import numpy as np
35
+
36
+ VEUPATHDB_SITES = {
37
+ "plasmo": "https://plasmodb.org/plasmo/service",
38
+ "vector": "https://vectorbase.org/vectorbase/service",
39
+ "toxo": "https://toxodb.org/toxo/service",
40
+ "tritryp": "https://tritrypdb.org/tritrypdb/service",
41
+ }
42
+
43
+
44
+ def veupathdb_search_genes(organism, query, db="plasmo",
45
+ limit=100):
46
+ """
47
+ VEuPathDB — 遺伝子検索。
48
+
49
+ Parameters:
50
+ organism: str — 生物種名 (例: "Plasmodium falciparum 3D7")
51
+ query: str — 検索キーワード (例: "kinase", "transporter")
52
+ db: str — データベース ("plasmo", "vector", "toxo", "tritryp")
53
+ limit: int — 最大結果数
54
+ """
55
+ base = VEUPATHDB_SITES.get(db, VEUPATHDB_SITES["plasmo"])
56
+ url = f"{base}/record-types/gene/searches/GenesByTextSearch"
57
+
58
+ payload = {
59
+ "searchConfig": {
60
+ "parameters": {
61
+ "text_expression": query,
62
+ "text_fields": "Gene ID,Gene Name or Symbol,"
63
+ "Gene product",
64
+ "organism": [organism],
65
+ }
66
+ },
67
+ "reportConfig": {
68
+ "attributes": ["primary_key", "gene_name",
69
+ "gene_product", "gene_type",
70
+ "chromosome", "start_min",
71
+ "end_max", "strand"],
72
+ "pagination": {"offset": 0, "numRecords": limit},
73
+ },
74
+ }
75
+ headers = {"Content-Type": "application/json"}
76
+ resp = requests.post(url, json=payload, headers=headers,
77
+ timeout=60)
78
+ resp.raise_for_status()
79
+ data = resp.json()
80
+
81
+ results = []
82
+ for rec in data.get("records", []):
83
+ attrs = rec.get("attributes", {})
84
+ results.append({
85
+ "gene_id": attrs.get("primary_key", ""),
86
+ "gene_name": attrs.get("gene_name", ""),
87
+ "product": attrs.get("gene_product", ""),
88
+ "gene_type": attrs.get("gene_type", ""),
89
+ "chromosome": attrs.get("chromosome", ""),
90
+ "start": attrs.get("start_min", None),
91
+ "end": attrs.get("end_max", None),
92
+ "strand": attrs.get("strand", ""),
93
+ })
94
+
95
+ df = pd.DataFrame(results)
96
+ print(f"VEuPathDB ({db}) genes: {len(df)} results "
97
+ f"(organism={organism}, query={query})")
98
+ return df
99
+ ```
100
+
101
+ ## 2. 遺伝子機能アノテーション
102
+
103
+ ```python
104
+ def veupathdb_gene_annotation(gene_id, db="plasmo"):
105
+ """
106
+ VEuPathDB — 遺伝子機能アノテーション取得。
107
+
108
+ Parameters:
109
+ gene_id: str — 遺伝子 ID (例: "PF3D7_1133400")
110
+ db: str — データベース
111
+ """
112
+ base = VEUPATHDB_SITES.get(db, VEUPATHDB_SITES["plasmo"])
113
+ url = f"{base}/record-types/gene/records/{gene_id}"
114
+
115
+ params = {
116
+ "attributes": "all",
117
+ "tables": "GoTerms,InterPro,MetabolicPathways,"
118
+ "PubMed,EcNumber",
119
+ }
120
+ resp = requests.get(url, params=params, timeout=30)
121
+ resp.raise_for_status()
122
+ data = resp.json()
123
+
124
+ attrs = data.get("attributes", {})
125
+ tables = data.get("tables", {})
126
+
127
+ annotation = {
128
+ "gene_id": gene_id,
129
+ "gene_name": attrs.get("gene_name", ""),
130
+ "product": attrs.get("gene_product", ""),
131
+ "molecular_weight": attrs.get("molecular_weight", ""),
132
+ "isoelectric_point": attrs.get("isoelectric_point", ""),
133
+ "signal_peptide": attrs.get("signal_peptide", ""),
134
+ "transmembrane_domains": attrs.get("transmembrane_domains", ""),
135
+ }
136
+
137
+ # GO Term 取得
138
+ go_terms = []
139
+ for go_rec in tables.get("GoTerms", []):
140
+ go_terms.append({
141
+ "go_id": go_rec.get("go_id", ""),
142
+ "go_term": go_rec.get("go_term_name", ""),
143
+ "ontology": go_rec.get("ontology", ""),
144
+ "evidence": go_rec.get("evidence_code", ""),
145
+ })
146
+ annotation["go_terms"] = go_terms
147
+
148
+ # InterPro ドメイン
149
+ domains = []
150
+ for d in tables.get("InterPro", []):
151
+ domains.append({
152
+ "interpro_id": d.get("interpro_primary_id", ""),
153
+ "name": d.get("interpro_name", ""),
154
+ "description": d.get("interpro_description", ""),
155
+ })
156
+ annotation["domains"] = domains
157
+
158
+ print(f"VEuPathDB annotation: {gene_id}, "
159
+ f"{len(go_terms)} GO terms, {len(domains)} domains")
160
+ return annotation
161
+ ```
162
+
163
+ ## 3. 薬剤標的候補スクリーニング
164
+
165
+ ```python
166
+ def parasite_drug_target_screen(organism, db="plasmo",
167
+ essentiality_threshold=0.5):
168
+ """
169
+ 寄生虫ゲノム — 薬剤標的候補スクリーニング。
170
+
171
+ Parameters:
172
+ organism: str — 生物種
173
+ db: str — データベース
174
+ essentiality_threshold: float — 必須性スコア閾値
175
+ """
176
+ # キナーゼ検索
177
+ kinases = veupathdb_search_genes(organism, "kinase", db=db)
178
+ # プロテアーゼ検索
179
+ proteases = veupathdb_search_genes(organism, "protease", db=db)
180
+ # トランスポーター検索
181
+ transporters = veupathdb_search_genes(
182
+ organism, "transporter", db=db)
183
+
184
+ all_targets = pd.concat([kinases, proteases, transporters],
185
+ ignore_index=True)
186
+ all_targets = all_targets.drop_duplicates(subset=["gene_id"])
187
+
188
+ # 薬剤標的性スコア (ヒューリスティック)
189
+ all_targets["target_class"] = "unknown"
190
+ all_targets.loc[
191
+ all_targets["gene_id"].isin(kinases["gene_id"]),
192
+ "target_class"] = "kinase"
193
+ all_targets.loc[
194
+ all_targets["gene_id"].isin(proteases["gene_id"]),
195
+ "target_class"] = "protease"
196
+ all_targets.loc[
197
+ all_targets["gene_id"].isin(transporters["gene_id"]),
198
+ "target_class"] = "transporter"
199
+
200
+ print(f"Drug target screen: {len(all_targets)} candidates "
201
+ f"(kinases={len(kinases)}, proteases={len(proteases)}, "
202
+ f"transporters={len(transporters)})")
203
+ return all_targets
204
+ ```
205
+
206
+ ## 4. 寄生虫ゲノミクス統合パイプライン
207
+
208
+ ```python
209
+ def parasite_genomics_pipeline(organism, query,
210
+ db="plasmo",
211
+ output_dir="results"):
212
+ """
213
+ 寄生虫ゲノミクス統合パイプライン。
214
+
215
+ Parameters:
216
+ organism: str — 生物種 (例: "Plasmodium falciparum 3D7")
217
+ query: str — 検索クエリ
218
+ db: str — データベース
219
+ output_dir: str — 出力ディレクトリ
220
+ """
221
+ from pathlib import Path
222
+ output_dir = Path(output_dir)
223
+ output_dir.mkdir(parents=True, exist_ok=True)
224
+
225
+ # 1) 遺伝子検索
226
+ genes = veupathdb_search_genes(organism, query, db=db)
227
+ genes.to_csv(output_dir / "genes.csv", index=False)
228
+
229
+ # 2) トップ遺伝子のアノテーション
230
+ annotations = []
231
+ for gene_id in genes["gene_id"].head(10):
232
+ try:
233
+ ann = veupathdb_gene_annotation(gene_id, db=db)
234
+ annotations.append(ann)
235
+ except Exception:
236
+ continue
237
+ ann_df = pd.DataFrame([{
238
+ k: v for k, v in a.items()
239
+ if not isinstance(v, list)
240
+ } for a in annotations])
241
+ ann_df.to_csv(output_dir / "annotations.csv", index=False)
242
+
243
+ # 3) 薬剤標的スクリーニング
244
+ targets = parasite_drug_target_screen(organism, db=db)
245
+ targets.to_csv(output_dir / "drug_targets.csv", index=False)
246
+
247
+ print(f"Parasite genomics pipeline: {output_dir}")
248
+ return {
249
+ "genes": genes,
250
+ "annotations": annotations,
251
+ "drug_targets": targets,
252
+ }
253
+ ```
254
+
255
+ ---
256
+
257
+ ## ToolUniverse 連携
258
+
259
+ 直接 REST API 使用 (VEuPathDB は ToolUniverse 外)。
260
+
261
+ ## パイプライン統合
262
+
263
+ ```
264
+ infectious-disease → parasite-genomics → phylogenetics
265
+ (病原体情報) (寄生虫ゲノム) (系統解析)
266
+ │ │ ↓
267
+ drug-discovery ─────────┘ comparative-genomics
268
+ (薬剤探索) │ (比較ゲノミクス)
269
+
270
+ pathway-enrichment
271
+ (パスウェイ解析)
272
+ ```
273
+
274
+ ## パイプライン出力
275
+
276
+ | ファイル | 説明 | 次スキル |
277
+ |---------|------|---------|
278
+ | `results/genes.csv` | 遺伝子一覧 | → phylogenetics |
279
+ | `results/annotations.csv` | 機能アノテーション | → pathway-enrichment |
280
+ | `results/drug_targets.csv` | 薬剤標的候補 | → drug-discovery |