@nahisaho/satori 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -29
- package/package.json +1 -1
- package/src/.github/skills/scientific-data-submission/SKILL.md +357 -0
- package/src/.github/skills/scientific-encode-screen/SKILL.md +315 -0
- package/src/.github/skills/scientific-environmental-geodata/SKILL.md +255 -0
- package/src/.github/skills/scientific-geo-expression/SKILL.md +274 -0
- package/src/.github/skills/scientific-gpu-singlecell/SKILL.md +296 -0
- package/src/.github/skills/scientific-human-cell-atlas/SKILL.md +294 -0
- package/src/.github/skills/scientific-marine-ecology/SKILL.md +429 -0
- package/src/.github/skills/scientific-metabolic-atlas/SKILL.md +263 -0
- package/src/.github/skills/scientific-nci60-screening/SKILL.md +307 -0
- package/src/.github/skills/scientific-paleobiology/SKILL.md +265 -0
- package/src/.github/skills/scientific-parasite-genomics/SKILL.md +280 -0
- package/src/.github/skills/scientific-plant-biology/SKILL.md +321 -0
- package/src/.github/skills/scientific-rrna-taxonomy/SKILL.md +379 -0
- package/src/.github/skills/scientific-scatac-signac/SKILL.md +300 -0
- package/src/.github/skills/scientific-squidpy-advanced/SKILL.md +251 -0
- package/src/.github/skills/scientific-toxicology-env/SKILL.md +309 -0
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-nci60-screening
|
|
3
|
+
description: |
|
|
4
|
+
NCI-60 がん細胞株薬剤応答スキル。CellMiner API 薬剤感受性・
|
|
5
|
+
NCI-60 GI50/LC50 データ・DepMap cancer dependency 統合・
|
|
6
|
+
薬剤-分子マーカー相関・細胞株パネル比較解析。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific NCI-60 Screening
|
|
10
|
+
|
|
11
|
+
CellMiner / NCI-60 / DepMap を活用したがん細胞株薬剤応答
|
|
12
|
+
パイプラインを提供する。高スループットスクリーニングデータの
|
|
13
|
+
統合解析、薬剤感受性マーカー同定、パネル比較。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- NCI-60 細胞株パネルの薬剤応答 (GI50) を解析するとき
|
|
18
|
+
- CellMiner から化合物活性データを取得するとき
|
|
19
|
+
- 薬剤感受性と分子マーカー (変異/発現) の相関を調べるとき
|
|
20
|
+
- DepMap CRISPR/RNAi 依存性データを併用するとき
|
|
21
|
+
- 細胞株間の薬剤応答パターンを比較するとき
|
|
22
|
+
- 新規化合物のスクリーニング結果を NCI-60 と比較するとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. CellMiner データ取得
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import requests
|
|
32
|
+
import pandas as pd
|
|
33
|
+
import numpy as np
|
|
34
|
+
from io import StringIO
|
|
35
|
+
|
|
36
|
+
CELLMINER_BASE = "https://discover.nci.nih.gov/cellminer/api"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def cellminer_drug_activity(nsc_id=None, drug_name=None):
|
|
40
|
+
"""
|
|
41
|
+
CellMiner — NCI-60 薬剤活性データ取得。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
nsc_id: str — NSC ID (例: "740")
|
|
45
|
+
drug_name: str — 薬剤名 (例: "Paclitaxel")
|
|
46
|
+
"""
|
|
47
|
+
if nsc_id:
|
|
48
|
+
url = f"{CELLMINER_BASE}/compound/{nsc_id}/activity"
|
|
49
|
+
elif drug_name:
|
|
50
|
+
url = f"{CELLMINER_BASE}/compound/search"
|
|
51
|
+
params = {"name": drug_name}
|
|
52
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
53
|
+
resp.raise_for_status()
|
|
54
|
+
compounds = resp.json()
|
|
55
|
+
if not compounds:
|
|
56
|
+
print(f"Drug not found: {drug_name}")
|
|
57
|
+
return pd.DataFrame()
|
|
58
|
+
nsc_id = compounds[0].get("nsc", "")
|
|
59
|
+
url = f"{CELLMINER_BASE}/compound/{nsc_id}/activity"
|
|
60
|
+
|
|
61
|
+
resp = requests.get(url, timeout=30)
|
|
62
|
+
resp.raise_for_status()
|
|
63
|
+
data = resp.json()
|
|
64
|
+
|
|
65
|
+
results = []
|
|
66
|
+
for cell_line, values in data.get("activity", {}).items():
|
|
67
|
+
results.append({
|
|
68
|
+
"cell_line": cell_line,
|
|
69
|
+
"tissue": values.get("tissue", ""),
|
|
70
|
+
"gi50_log": values.get("gi50", None),
|
|
71
|
+
"tgi_log": values.get("tgi", None),
|
|
72
|
+
"lc50_log": values.get("lc50", None),
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
df = pd.DataFrame(results)
|
|
76
|
+
print(f"CellMiner: NSC {nsc_id} → {len(df)} cell lines")
|
|
77
|
+
return df
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## 2. NCI-60 バルクデータ取得
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
def nci60_bulk_download(data_type="drug_activity"):
|
|
84
|
+
"""
|
|
85
|
+
NCI-60 バルクデータセット取得。
|
|
86
|
+
|
|
87
|
+
Parameters:
|
|
88
|
+
data_type: str — "drug_activity", "gene_expression",
|
|
89
|
+
"mutation", "copy_number"
|
|
90
|
+
"""
|
|
91
|
+
urls = {
|
|
92
|
+
"drug_activity": "https://discover.nci.nih.gov/cellminer/download/DTP_NCI60_ZSCORE.csv",
|
|
93
|
+
"gene_expression": "https://discover.nci.nih.gov/cellminer/download/GeneExpr_RMA.csv",
|
|
94
|
+
"mutation": "https://discover.nci.nih.gov/cellminer/download/Exome_Mutation.csv",
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
url = urls.get(data_type)
|
|
98
|
+
if not url:
|
|
99
|
+
raise ValueError(f"Unknown data type: {data_type}")
|
|
100
|
+
|
|
101
|
+
resp = requests.get(url, timeout=120)
|
|
102
|
+
resp.raise_for_status()
|
|
103
|
+
|
|
104
|
+
df = pd.read_csv(StringIO(resp.text))
|
|
105
|
+
print(f"NCI-60 bulk: {data_type} → {df.shape}")
|
|
106
|
+
return df
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## 3. 薬剤-分子マーカー相関
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from scipy import stats
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def drug_marker_correlation(drug_activity, molecular_data,
|
|
116
|
+
marker_type="expression", top_n=50):
|
|
117
|
+
"""
|
|
118
|
+
薬剤感受性と分子マーカーの相関解析。
|
|
119
|
+
|
|
120
|
+
Parameters:
|
|
121
|
+
drug_activity: pd.DataFrame — GI50 データ (cell_line, gi50)
|
|
122
|
+
molecular_data: pd.DataFrame — 分子データ (cell_line, gene, value)
|
|
123
|
+
marker_type: str — "expression", "mutation", "copy_number"
|
|
124
|
+
top_n: int — 上位相関遺伝子数
|
|
125
|
+
"""
|
|
126
|
+
# 細胞株一致
|
|
127
|
+
common_lines = set(drug_activity["cell_line"]) & set(molecular_data["cell_line"])
|
|
128
|
+
drug_sub = drug_activity[drug_activity["cell_line"].isin(common_lines)]
|
|
129
|
+
mol_sub = molecular_data[molecular_data["cell_line"].isin(common_lines)]
|
|
130
|
+
|
|
131
|
+
# 遺伝子ごとの相関
|
|
132
|
+
correlations = []
|
|
133
|
+
genes = mol_sub["gene"].unique() if "gene" in mol_sub.columns else mol_sub.columns[1:]
|
|
134
|
+
|
|
135
|
+
drug_values = drug_sub.set_index("cell_line")["gi50_log"]
|
|
136
|
+
|
|
137
|
+
for gene in genes:
|
|
138
|
+
if "gene" in mol_sub.columns:
|
|
139
|
+
gene_data = mol_sub[mol_sub["gene"] == gene].set_index("cell_line")["value"]
|
|
140
|
+
else:
|
|
141
|
+
gene_data = mol_sub.set_index("cell_line")[gene]
|
|
142
|
+
|
|
143
|
+
common = drug_values.index.intersection(gene_data.index)
|
|
144
|
+
if len(common) < 10:
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
r, p = stats.pearsonr(drug_values[common], gene_data[common])
|
|
148
|
+
correlations.append({
|
|
149
|
+
"gene": gene,
|
|
150
|
+
"pearson_r": r,
|
|
151
|
+
"p_value": p,
|
|
152
|
+
"n_samples": len(common),
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
corr_df = pd.DataFrame(correlations)
|
|
156
|
+
corr_df["adj_p"] = corr_df["p_value"] * len(corr_df) # Bonferroni
|
|
157
|
+
corr_df = corr_df.sort_values("p_value")
|
|
158
|
+
|
|
159
|
+
print(f"Drug-marker correlation: {len(corr_df)} genes tested, "
|
|
160
|
+
f"top |r| = {corr_df['pearson_r'].abs().max():.3f}")
|
|
161
|
+
return corr_df.head(top_n)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## 4. 組織別薬剤応答パターン
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
def tissue_response_pattern(drug_activity, min_lines=3):
|
|
168
|
+
"""
|
|
169
|
+
組織別の薬剤応答パターン解析。
|
|
170
|
+
|
|
171
|
+
Parameters:
|
|
172
|
+
drug_activity: pd.DataFrame — GI50 データ
|
|
173
|
+
min_lines: int — 最小細胞株数
|
|
174
|
+
"""
|
|
175
|
+
tissue_stats = drug_activity.groupby("tissue").agg(
|
|
176
|
+
n_lines=("gi50_log", "count"),
|
|
177
|
+
mean_gi50=("gi50_log", "mean"),
|
|
178
|
+
std_gi50=("gi50_log", "std"),
|
|
179
|
+
min_gi50=("gi50_log", "min"),
|
|
180
|
+
max_gi50=("gi50_log", "max"),
|
|
181
|
+
).reset_index()
|
|
182
|
+
|
|
183
|
+
tissue_stats = tissue_stats[tissue_stats["n_lines"] >= min_lines]
|
|
184
|
+
tissue_stats = tissue_stats.sort_values("mean_gi50")
|
|
185
|
+
|
|
186
|
+
# 感受性/耐性スコア
|
|
187
|
+
overall_mean = drug_activity["gi50_log"].mean()
|
|
188
|
+
tissue_stats["sensitivity_z"] = (
|
|
189
|
+
(tissue_stats["mean_gi50"] - overall_mean)
|
|
190
|
+
/ drug_activity["gi50_log"].std()
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
print(f"Tissue patterns: {len(tissue_stats)} tissues")
|
|
194
|
+
for _, row in tissue_stats.iterrows():
|
|
195
|
+
label = "Sensitive" if row["sensitivity_z"] < -0.5 else (
|
|
196
|
+
"Resistant" if row["sensitivity_z"] > 0.5 else "Neutral"
|
|
197
|
+
)
|
|
198
|
+
print(f" {row['tissue']}: GI50={row['mean_gi50']:.2f} ({label})")
|
|
199
|
+
return tissue_stats
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## 5. DepMap 統合スクリーニング
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
DEPMAP_BASE = "https://depmap.org/portal/api"
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def depmap_gene_dependency(gene_symbol, dataset="Chronos_Combined"):
|
|
209
|
+
"""
|
|
210
|
+
DepMap — CRISPR/RNAi 遺伝子依存性取得。
|
|
211
|
+
|
|
212
|
+
Parameters:
|
|
213
|
+
gene_symbol: str — 遺伝子シンボル
|
|
214
|
+
dataset: str — データセット名
|
|
215
|
+
"""
|
|
216
|
+
url = f"{DEPMAP_BASE}/download/custom"
|
|
217
|
+
params = {
|
|
218
|
+
"gene": gene_symbol,
|
|
219
|
+
"dataset": dataset,
|
|
220
|
+
}
|
|
221
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
222
|
+
resp.raise_for_status()
|
|
223
|
+
data = resp.json()
|
|
224
|
+
|
|
225
|
+
results = []
|
|
226
|
+
for entry in data.get("data", []):
|
|
227
|
+
results.append({
|
|
228
|
+
"cell_line": entry.get("cell_line_name", ""),
|
|
229
|
+
"lineage": entry.get("lineage", ""),
|
|
230
|
+
"dependency_score": entry.get("score", None),
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
df = pd.DataFrame(results)
|
|
234
|
+
if len(df) > 0:
|
|
235
|
+
n_dependent = (df["dependency_score"] < -0.5).sum()
|
|
236
|
+
print(f"DepMap {gene_symbol}: {len(df)} lines, "
|
|
237
|
+
f"{n_dependent} dependent (score < -0.5)")
|
|
238
|
+
return df
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## 6. NCI-60 統合スクリーニングパイプライン
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
def nci60_screening_pipeline(drug_name=None, nsc_id=None,
|
|
245
|
+
target_gene=None, output_dir="results"):
|
|
246
|
+
"""
|
|
247
|
+
NCI-60 + DepMap 統合スクリーニングパイプライン。
|
|
248
|
+
|
|
249
|
+
Parameters:
|
|
250
|
+
drug_name: str — 薬剤名
|
|
251
|
+
nsc_id: str — NSC ID
|
|
252
|
+
target_gene: str — 標的遺伝子 (DepMap 連携)
|
|
253
|
+
output_dir: str — 出力ディレクトリ
|
|
254
|
+
"""
|
|
255
|
+
from pathlib import Path
|
|
256
|
+
output_dir = Path(output_dir)
|
|
257
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
258
|
+
|
|
259
|
+
# 1) NCI-60 薬剤活性
|
|
260
|
+
drug_data = cellminer_drug_activity(nsc_id=nsc_id, drug_name=drug_name)
|
|
261
|
+
drug_data.to_csv(output_dir / "drug_activity.csv", index=False)
|
|
262
|
+
|
|
263
|
+
# 2) 組織別パターン
|
|
264
|
+
tissue_patterns = tissue_response_pattern(drug_data)
|
|
265
|
+
tissue_patterns.to_csv(output_dir / "tissue_patterns.csv", index=False)
|
|
266
|
+
|
|
267
|
+
# 3) 発現相関
|
|
268
|
+
expr_data = nci60_bulk_download("gene_expression")
|
|
269
|
+
correlations = drug_marker_correlation(drug_data, expr_data)
|
|
270
|
+
correlations.to_csv(output_dir / "marker_correlations.csv", index=False)
|
|
271
|
+
|
|
272
|
+
# 4) DepMap 連携 (標的遺伝子あれば)
|
|
273
|
+
if target_gene:
|
|
274
|
+
depmap_data = depmap_gene_dependency(target_gene)
|
|
275
|
+
depmap_data.to_csv(output_dir / "depmap_dependency.csv", index=False)
|
|
276
|
+
|
|
277
|
+
print(f"Pipeline complete: {output_dir}")
|
|
278
|
+
return {
|
|
279
|
+
"drug_activity": drug_data,
|
|
280
|
+
"tissue_patterns": tissue_patterns,
|
|
281
|
+
"correlations": correlations,
|
|
282
|
+
}
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## パイプライン統合
|
|
288
|
+
|
|
289
|
+
```
|
|
290
|
+
compound-screening → nci60-screening → precision-oncology
|
|
291
|
+
(ZINC/VS) (NCI-60/DepMap) (MTB レポート)
|
|
292
|
+
│ │ ↓
|
|
293
|
+
drug-target-profiling ──────┘ cancer-genomics
|
|
294
|
+
(ChEMBL/DGIdb) │ (COSMIC/DepMap)
|
|
295
|
+
↓
|
|
296
|
+
cell-line-resources
|
|
297
|
+
(Cellosaurus)
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## パイプライン出力
|
|
301
|
+
|
|
302
|
+
| ファイル | 説明 | 次スキル |
|
|
303
|
+
|---------|------|---------|
|
|
304
|
+
| `results/drug_activity.csv` | NCI-60 GI50 データ | → precision-oncology |
|
|
305
|
+
| `results/tissue_patterns.csv` | 組織別応答パターン | → cancer-genomics |
|
|
306
|
+
| `results/marker_correlations.csv` | 薬剤-マーカー相関 | → drug-target-profiling |
|
|
307
|
+
| `results/depmap_dependency.csv` | DepMap 依存性スコア | → cell-line-resources |
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-paleobiology
|
|
3
|
+
description: |
|
|
4
|
+
古生物学データベーススキル。Paleobiology Database (PBDB) REST
|
|
5
|
+
API による化石産出記録・分類群・コレクション検索、地質年代
|
|
6
|
+
多様性曲線・古地理解析。ToolUniverse 連携: paleobiology。
|
|
7
|
+
tu_tools:
|
|
8
|
+
- key: paleobiology
|
|
9
|
+
name: Paleobiology Database
|
|
10
|
+
description: PBDB 化石産出記録・分類群・コレクション検索
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Scientific Paleobiology
|
|
14
|
+
|
|
15
|
+
Paleobiology Database (PBDB) REST API を活用した古生物学的
|
|
16
|
+
多様性解析パイプラインを提供する。
|
|
17
|
+
|
|
18
|
+
## When to Use
|
|
19
|
+
|
|
20
|
+
- 化石産出記録 (occurrence) を検索するとき
|
|
21
|
+
- 分類群 (taxa) の地質年代分布を調べるとき
|
|
22
|
+
- 化石コレクション/産地情報を検索するとき
|
|
23
|
+
- 地質年代を通じた多様性曲線を作成するとき
|
|
24
|
+
- 大量絶滅イベントのパターンを分析するとき
|
|
25
|
+
- 古地理的分布を解析するとき
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
## 1. PBDB 化石産出記録検索
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import requests
|
|
35
|
+
import pandas as pd
|
|
36
|
+
import numpy as np
|
|
37
|
+
|
|
38
|
+
PBDB_BASE = "https://paleobiodb.org/data1.2"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def pbdb_search_occurrences(taxon=None, interval=None,
|
|
42
|
+
lngmin=None, lngmax=None,
|
|
43
|
+
latmin=None, latmax=None, limit=1000):
|
|
44
|
+
"""
|
|
45
|
+
PBDB — 化石産出記録検索。
|
|
46
|
+
|
|
47
|
+
Parameters:
|
|
48
|
+
taxon: str — 分類群名 (例: "Dinosauria", "Trilobita")
|
|
49
|
+
interval: str — 地質年代区間 (例: "Cretaceous", "Permian")
|
|
50
|
+
lngmin: float — 経度最小値
|
|
51
|
+
lngmax: float — 経度最大値
|
|
52
|
+
latmin: float — 緯度最小値
|
|
53
|
+
latmax: float — 緯度最大値
|
|
54
|
+
limit: int — 最大結果数
|
|
55
|
+
"""
|
|
56
|
+
url = f"{PBDB_BASE}/occs/list.json"
|
|
57
|
+
params = {
|
|
58
|
+
"show": "coords,phylo,time",
|
|
59
|
+
"limit": limit,
|
|
60
|
+
}
|
|
61
|
+
if taxon:
|
|
62
|
+
params["base_name"] = taxon
|
|
63
|
+
if interval:
|
|
64
|
+
params["interval"] = interval
|
|
65
|
+
if all(v is not None for v in [lngmin, lngmax, latmin, latmax]):
|
|
66
|
+
params.update({
|
|
67
|
+
"lngmin": lngmin, "lngmax": lngmax,
|
|
68
|
+
"latmin": latmin, "latmax": latmax,
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
72
|
+
resp.raise_for_status()
|
|
73
|
+
records = resp.json().get("records", [])
|
|
74
|
+
|
|
75
|
+
results = []
|
|
76
|
+
for r in records:
|
|
77
|
+
results.append({
|
|
78
|
+
"occurrence_no": r.get("oid", ""),
|
|
79
|
+
"taxon_name": r.get("tna", ""),
|
|
80
|
+
"taxon_rank": r.get("rnk", ""),
|
|
81
|
+
"phylum": r.get("phl", ""),
|
|
82
|
+
"class": r.get("cll", ""),
|
|
83
|
+
"order": r.get("odl", ""),
|
|
84
|
+
"family": r.get("fml", ""),
|
|
85
|
+
"early_interval": r.get("oei", ""),
|
|
86
|
+
"late_interval": r.get("oli", ""),
|
|
87
|
+
"max_ma": r.get("eag", None),
|
|
88
|
+
"min_ma": r.get("lag", None),
|
|
89
|
+
"lng": r.get("lng", None),
|
|
90
|
+
"lat": r.get("lat", None),
|
|
91
|
+
"collection_no": r.get("cid", ""),
|
|
92
|
+
"reference_no": r.get("rid", ""),
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
df = pd.DataFrame(results)
|
|
96
|
+
print(f"PBDB occurrences: {len(df)} records "
|
|
97
|
+
f"(taxon={taxon}, interval={interval})")
|
|
98
|
+
return df
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## 2. PBDB 分類群情報検索
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
def pbdb_search_taxa(name=None, rank=None, interval=None, limit=500):
|
|
105
|
+
"""
|
|
106
|
+
PBDB — 分類群検索。
|
|
107
|
+
|
|
108
|
+
Parameters:
|
|
109
|
+
name: str — 分類群名 (例: "Dinosauria")
|
|
110
|
+
rank: str — ランク (例: "genus", "family", "order")
|
|
111
|
+
interval: str — 地質年代区間
|
|
112
|
+
limit: int — 最大結果数
|
|
113
|
+
"""
|
|
114
|
+
url = f"{PBDB_BASE}/taxa/list.json"
|
|
115
|
+
params = {
|
|
116
|
+
"show": "attr,app,size",
|
|
117
|
+
"limit": limit,
|
|
118
|
+
}
|
|
119
|
+
if name:
|
|
120
|
+
params["base_name"] = name
|
|
121
|
+
if rank:
|
|
122
|
+
params["rank"] = rank
|
|
123
|
+
if interval:
|
|
124
|
+
params["interval"] = interval
|
|
125
|
+
|
|
126
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
127
|
+
resp.raise_for_status()
|
|
128
|
+
records = resp.json().get("records", [])
|
|
129
|
+
|
|
130
|
+
results = []
|
|
131
|
+
for r in records:
|
|
132
|
+
results.append({
|
|
133
|
+
"taxon_no": r.get("oid", ""),
|
|
134
|
+
"taxon_name": r.get("nam", ""),
|
|
135
|
+
"rank": r.get("rnk", ""),
|
|
136
|
+
"parent_name": r.get("prl", ""),
|
|
137
|
+
"n_occs": r.get("noc", 0),
|
|
138
|
+
"first_appearance": r.get("fea", ""),
|
|
139
|
+
"last_appearance": r.get("lla", ""),
|
|
140
|
+
"extant": r.get("ext", ""),
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
df = pd.DataFrame(results)
|
|
144
|
+
print(f"PBDB taxa: {len(df)} records (name={name})")
|
|
145
|
+
return df
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## 3. 地質年代多様性曲線
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
def pbdb_diversity_curve(taxon, time_resolution="stage",
|
|
152
|
+
rank="genus"):
|
|
153
|
+
"""
|
|
154
|
+
PBDB — 地質年代多様性曲線生成。
|
|
155
|
+
|
|
156
|
+
Parameters:
|
|
157
|
+
taxon: str — 分類群名
|
|
158
|
+
time_resolution: str — "stage" or "epoch" or "period"
|
|
159
|
+
rank: str — カウントするランク ("genus", "family")
|
|
160
|
+
"""
|
|
161
|
+
url = f"{PBDB_BASE}/occs/diversity.json"
|
|
162
|
+
params = {
|
|
163
|
+
"base_name": taxon,
|
|
164
|
+
"count": rank,
|
|
165
|
+
"time_reso": time_resolution,
|
|
166
|
+
}
|
|
167
|
+
resp = requests.get(url, params=params, timeout=60)
|
|
168
|
+
resp.raise_for_status()
|
|
169
|
+
records = resp.json().get("records", [])
|
|
170
|
+
|
|
171
|
+
results = []
|
|
172
|
+
for r in records:
|
|
173
|
+
results.append({
|
|
174
|
+
"interval_name": r.get("idn", ""),
|
|
175
|
+
"max_ma": r.get("eag", None),
|
|
176
|
+
"min_ma": r.get("lag", None),
|
|
177
|
+
"mid_ma": (float(r.get("eag", 0)) +
|
|
178
|
+
float(r.get("lag", 0))) / 2,
|
|
179
|
+
"sampled_in_bin": r.get("dsb", 0),
|
|
180
|
+
"n_originations": r.get("dor", 0),
|
|
181
|
+
"n_extinctions": r.get("dex", 0),
|
|
182
|
+
"range_through": r.get("drt", 0),
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
df = pd.DataFrame(results)
|
|
186
|
+
print(f"PBDB diversity: {len(df)} intervals, "
|
|
187
|
+
f"max diversity={df['sampled_in_bin'].max()} {rank}")
|
|
188
|
+
return df
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## 4. 古生物学統合パイプライン
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
def paleobiology_pipeline(taxon, interval=None,
|
|
195
|
+
output_dir="results"):
|
|
196
|
+
"""
|
|
197
|
+
古生物学統合パイプライン。
|
|
198
|
+
|
|
199
|
+
Parameters:
|
|
200
|
+
taxon: str — 分類群名 (例: "Dinosauria")
|
|
201
|
+
interval: str — 地質年代区間 (オプション)
|
|
202
|
+
output_dir: str — 出力ディレクトリ
|
|
203
|
+
"""
|
|
204
|
+
from pathlib import Path
|
|
205
|
+
output_dir = Path(output_dir)
|
|
206
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
207
|
+
|
|
208
|
+
# 1) 産出記録
|
|
209
|
+
occ = pbdb_search_occurrences(taxon=taxon, interval=interval)
|
|
210
|
+
occ.to_csv(output_dir / "occurrences.csv", index=False)
|
|
211
|
+
|
|
212
|
+
# 2) 分類群情報
|
|
213
|
+
taxa = pbdb_search_taxa(name=taxon)
|
|
214
|
+
taxa.to_csv(output_dir / "taxa.csv", index=False)
|
|
215
|
+
|
|
216
|
+
# 3) 多様性曲線
|
|
217
|
+
diversity = pbdb_diversity_curve(taxon)
|
|
218
|
+
diversity.to_csv(output_dir / "diversity.csv", index=False)
|
|
219
|
+
|
|
220
|
+
# 4) 地理的サマリ
|
|
221
|
+
if "lat" in occ.columns and "lng" in occ.columns:
|
|
222
|
+
geo_summary = occ.groupby("early_interval").agg(
|
|
223
|
+
n_records=("occurrence_no", "count"),
|
|
224
|
+
mean_lat=("lat", "mean"),
|
|
225
|
+
mean_lng=("lng", "mean"),
|
|
226
|
+
).reset_index()
|
|
227
|
+
geo_summary.to_csv(output_dir / "geo_summary.csv", index=False)
|
|
228
|
+
|
|
229
|
+
print(f"Paleobiology pipeline: {output_dir}")
|
|
230
|
+
return {
|
|
231
|
+
"occurrences": occ,
|
|
232
|
+
"taxa": taxa,
|
|
233
|
+
"diversity": diversity,
|
|
234
|
+
}
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## ToolUniverse 連携
|
|
240
|
+
|
|
241
|
+
| TU Key | ツール名 | 連携内容 |
|
|
242
|
+
|--------|---------|---------|
|
|
243
|
+
| `paleobiology` | Paleobiology Database | 化石産出・分類群・コレクション検索 |
|
|
244
|
+
|
|
245
|
+
## パイプライン統合
|
|
246
|
+
|
|
247
|
+
```
|
|
248
|
+
phylogenetics → paleobiology → environmental-ecology
|
|
249
|
+
(系統解析) (化石記録) (GBIF/生態)
|
|
250
|
+
│ │ ↓
|
|
251
|
+
taxonomy ─────────┘ environmental-geodata
|
|
252
|
+
(分類体系) │ (環境モデリング)
|
|
253
|
+
↓
|
|
254
|
+
macroevolution
|
|
255
|
+
(大進化パターン)
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## パイプライン出力
|
|
259
|
+
|
|
260
|
+
| ファイル | 説明 | 次スキル |
|
|
261
|
+
|---------|------|---------|
|
|
262
|
+
| `results/occurrences.csv` | 化石産出記録 | → environmental-ecology |
|
|
263
|
+
| `results/taxa.csv` | 分類群情報 | → phylogenetics |
|
|
264
|
+
| `results/diversity.csv` | 多様性曲線 | → macroevolution |
|
|
265
|
+
| `results/geo_summary.csv` | 古地理サマリ | → environmental-geodata |
|