@nahisaho/satori 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,307 @@
1
+ ---
2
+ name: scientific-nci60-screening
3
+ description: |
4
+ NCI-60 がん細胞株薬剤応答スキル。CellMiner API 薬剤感受性・
5
+ NCI-60 GI50/LC50 データ・DepMap cancer dependency 統合・
6
+ 薬剤-分子マーカー相関・細胞株パネル比較解析。
7
+ ---
8
+
9
+ # Scientific NCI-60 Screening
10
+
11
+ CellMiner / NCI-60 / DepMap を活用したがん細胞株薬剤応答
12
+ パイプラインを提供する。高スループットスクリーニングデータの
13
+ 統合解析、薬剤感受性マーカー同定、パネル比較。
14
+
15
+ ## When to Use
16
+
17
+ - NCI-60 細胞株パネルの薬剤応答 (GI50) を解析するとき
18
+ - CellMiner から化合物活性データを取得するとき
19
+ - 薬剤感受性と分子マーカー (変異/発現) の相関を調べるとき
20
+ - DepMap CRISPR/RNAi 依存性データを併用するとき
21
+ - 細胞株間の薬剤応答パターンを比較するとき
22
+ - 新規化合物のスクリーニング結果を NCI-60 と比較するとき
23
+
24
+ ---
25
+
26
+ ## Quick Start
27
+
28
+ ## 1. CellMiner データ取得
29
+
30
+ ```python
31
+ import requests
32
+ import pandas as pd
33
+ import numpy as np
34
+ from io import StringIO
35
+
36
+ CELLMINER_BASE = "https://discover.nci.nih.gov/cellminer/api"
37
+
38
+
39
+ def cellminer_drug_activity(nsc_id=None, drug_name=None):
40
+ """
41
+ CellMiner — NCI-60 薬剤活性データ取得。
42
+
43
+ Parameters:
44
+ nsc_id: str — NSC ID (例: "740")
45
+ drug_name: str — 薬剤名 (例: "Paclitaxel")
46
+ """
47
+ if nsc_id:
48
+ url = f"{CELLMINER_BASE}/compound/{nsc_id}/activity"
49
+ elif drug_name:
50
+ url = f"{CELLMINER_BASE}/compound/search"
51
+ params = {"name": drug_name}
52
+ resp = requests.get(url, params=params, timeout=30)
53
+ resp.raise_for_status()
54
+ compounds = resp.json()
55
+ if not compounds:
56
+ print(f"Drug not found: {drug_name}")
57
+ return pd.DataFrame()
58
+ nsc_id = compounds[0].get("nsc", "")
59
+ url = f"{CELLMINER_BASE}/compound/{nsc_id}/activity"
60
+
61
+ resp = requests.get(url, timeout=30)
62
+ resp.raise_for_status()
63
+ data = resp.json()
64
+
65
+ results = []
66
+ for cell_line, values in data.get("activity", {}).items():
67
+ results.append({
68
+ "cell_line": cell_line,
69
+ "tissue": values.get("tissue", ""),
70
+ "gi50_log": values.get("gi50", None),
71
+ "tgi_log": values.get("tgi", None),
72
+ "lc50_log": values.get("lc50", None),
73
+ })
74
+
75
+ df = pd.DataFrame(results)
76
+ print(f"CellMiner: NSC {nsc_id} → {len(df)} cell lines")
77
+ return df
78
+ ```
79
+
80
+ ## 2. NCI-60 バルクデータ取得
81
+
82
+ ```python
83
+ def nci60_bulk_download(data_type="drug_activity"):
84
+ """
85
+ NCI-60 バルクデータセット取得。
86
+
87
+ Parameters:
88
+ data_type: str — "drug_activity", "gene_expression",
89
+ "mutation", "copy_number"
90
+ """
91
+ urls = {
92
+ "drug_activity": "https://discover.nci.nih.gov/cellminer/download/DTP_NCI60_ZSCORE.csv",
93
+ "gene_expression": "https://discover.nci.nih.gov/cellminer/download/GeneExpr_RMA.csv",
94
+ "mutation": "https://discover.nci.nih.gov/cellminer/download/Exome_Mutation.csv",
95
+ }
96
+
97
+ url = urls.get(data_type)
98
+ if not url:
99
+ raise ValueError(f"Unknown data type: {data_type}")
100
+
101
+ resp = requests.get(url, timeout=120)
102
+ resp.raise_for_status()
103
+
104
+ df = pd.read_csv(StringIO(resp.text))
105
+ print(f"NCI-60 bulk: {data_type} → {df.shape}")
106
+ return df
107
+ ```
108
+
109
+ ## 3. 薬剤-分子マーカー相関
110
+
111
+ ```python
112
+ from scipy import stats
113
+
114
+
115
+ def drug_marker_correlation(drug_activity, molecular_data,
116
+ marker_type="expression", top_n=50):
117
+ """
118
+ 薬剤感受性と分子マーカーの相関解析。
119
+
120
+ Parameters:
121
+ drug_activity: pd.DataFrame — GI50 データ (cell_line, gi50)
122
+ molecular_data: pd.DataFrame — 分子データ (cell_line, gene, value)
123
+ marker_type: str — "expression", "mutation", "copy_number"
124
+ top_n: int — 上位相関遺伝子数
125
+ """
126
+ # 細胞株一致
127
+ common_lines = set(drug_activity["cell_line"]) & set(molecular_data["cell_line"])
128
+ drug_sub = drug_activity[drug_activity["cell_line"].isin(common_lines)]
129
+ mol_sub = molecular_data[molecular_data["cell_line"].isin(common_lines)]
130
+
131
+ # 遺伝子ごとの相関
132
+ correlations = []
133
+ genes = mol_sub["gene"].unique() if "gene" in mol_sub.columns else mol_sub.columns[1:]
134
+
135
+ drug_values = drug_sub.set_index("cell_line")["gi50_log"]
136
+
137
+ for gene in genes:
138
+ if "gene" in mol_sub.columns:
139
+ gene_data = mol_sub[mol_sub["gene"] == gene].set_index("cell_line")["value"]
140
+ else:
141
+ gene_data = mol_sub.set_index("cell_line")[gene]
142
+
143
+ common = drug_values.index.intersection(gene_data.index)
144
+ if len(common) < 10:
145
+ continue
146
+
147
+ r, p = stats.pearsonr(drug_values[common], gene_data[common])
148
+ correlations.append({
149
+ "gene": gene,
150
+ "pearson_r": r,
151
+ "p_value": p,
152
+ "n_samples": len(common),
153
+ })
154
+
155
+ corr_df = pd.DataFrame(correlations)
156
+ corr_df["adj_p"] = corr_df["p_value"] * len(corr_df) # Bonferroni
157
+ corr_df = corr_df.sort_values("p_value")
158
+
159
+ print(f"Drug-marker correlation: {len(corr_df)} genes tested, "
160
+ f"top |r| = {corr_df['pearson_r'].abs().max():.3f}")
161
+ return corr_df.head(top_n)
162
+ ```
163
+
164
+ ## 4. 組織別薬剤応答パターン
165
+
166
+ ```python
167
+ def tissue_response_pattern(drug_activity, min_lines=3):
168
+ """
169
+ 組織別の薬剤応答パターン解析。
170
+
171
+ Parameters:
172
+ drug_activity: pd.DataFrame — GI50 データ
173
+ min_lines: int — 最小細胞株数
174
+ """
175
+ tissue_stats = drug_activity.groupby("tissue").agg(
176
+ n_lines=("gi50_log", "count"),
177
+ mean_gi50=("gi50_log", "mean"),
178
+ std_gi50=("gi50_log", "std"),
179
+ min_gi50=("gi50_log", "min"),
180
+ max_gi50=("gi50_log", "max"),
181
+ ).reset_index()
182
+
183
+ tissue_stats = tissue_stats[tissue_stats["n_lines"] >= min_lines]
184
+ tissue_stats = tissue_stats.sort_values("mean_gi50")
185
+
186
+ # 感受性/耐性スコア
187
+ overall_mean = drug_activity["gi50_log"].mean()
188
+ tissue_stats["sensitivity_z"] = (
189
+ (tissue_stats["mean_gi50"] - overall_mean)
190
+ / drug_activity["gi50_log"].std()
191
+ )
192
+
193
+ print(f"Tissue patterns: {len(tissue_stats)} tissues")
194
+ for _, row in tissue_stats.iterrows():
195
+ label = "Sensitive" if row["sensitivity_z"] < -0.5 else (
196
+ "Resistant" if row["sensitivity_z"] > 0.5 else "Neutral"
197
+ )
198
+ print(f" {row['tissue']}: GI50={row['mean_gi50']:.2f} ({label})")
199
+ return tissue_stats
200
+ ```
201
+
202
+ ## 5. DepMap 統合スクリーニング
203
+
204
+ ```python
205
+ DEPMAP_BASE = "https://depmap.org/portal/api"
206
+
207
+
208
+ def depmap_gene_dependency(gene_symbol, dataset="Chronos_Combined"):
209
+ """
210
+ DepMap — CRISPR/RNAi 遺伝子依存性取得。
211
+
212
+ Parameters:
213
+ gene_symbol: str — 遺伝子シンボル
214
+ dataset: str — データセット名
215
+ """
216
+ url = f"{DEPMAP_BASE}/download/custom"
217
+ params = {
218
+ "gene": gene_symbol,
219
+ "dataset": dataset,
220
+ }
221
+ resp = requests.get(url, params=params, timeout=30)
222
+ resp.raise_for_status()
223
+ data = resp.json()
224
+
225
+ results = []
226
+ for entry in data.get("data", []):
227
+ results.append({
228
+ "cell_line": entry.get("cell_line_name", ""),
229
+ "lineage": entry.get("lineage", ""),
230
+ "dependency_score": entry.get("score", None),
231
+ })
232
+
233
+ df = pd.DataFrame(results)
234
+ if len(df) > 0:
235
+ n_dependent = (df["dependency_score"] < -0.5).sum()
236
+ print(f"DepMap {gene_symbol}: {len(df)} lines, "
237
+ f"{n_dependent} dependent (score < -0.5)")
238
+ return df
239
+ ```
240
+
241
+ ## 6. NCI-60 統合スクリーニングパイプライン
242
+
243
+ ```python
244
+ def nci60_screening_pipeline(drug_name=None, nsc_id=None,
245
+ target_gene=None, output_dir="results"):
246
+ """
247
+ NCI-60 + DepMap 統合スクリーニングパイプライン。
248
+
249
+ Parameters:
250
+ drug_name: str — 薬剤名
251
+ nsc_id: str — NSC ID
252
+ target_gene: str — 標的遺伝子 (DepMap 連携)
253
+ output_dir: str — 出力ディレクトリ
254
+ """
255
+ from pathlib import Path
256
+ output_dir = Path(output_dir)
257
+ output_dir.mkdir(parents=True, exist_ok=True)
258
+
259
+ # 1) NCI-60 薬剤活性
260
+ drug_data = cellminer_drug_activity(nsc_id=nsc_id, drug_name=drug_name)
261
+ drug_data.to_csv(output_dir / "drug_activity.csv", index=False)
262
+
263
+ # 2) 組織別パターン
264
+ tissue_patterns = tissue_response_pattern(drug_data)
265
+ tissue_patterns.to_csv(output_dir / "tissue_patterns.csv", index=False)
266
+
267
+ # 3) 発現相関
268
+ expr_data = nci60_bulk_download("gene_expression")
269
+ correlations = drug_marker_correlation(drug_data, expr_data)
270
+ correlations.to_csv(output_dir / "marker_correlations.csv", index=False)
271
+
272
+ # 4) DepMap 連携 (標的遺伝子あれば)
273
+ if target_gene:
274
+ depmap_data = depmap_gene_dependency(target_gene)
275
+ depmap_data.to_csv(output_dir / "depmap_dependency.csv", index=False)
276
+
277
+ print(f"Pipeline complete: {output_dir}")
278
+ return {
279
+ "drug_activity": drug_data,
280
+ "tissue_patterns": tissue_patterns,
281
+ "correlations": correlations,
282
+ }
283
+ ```
284
+
285
+ ---
286
+
287
+ ## パイプライン統合
288
+
289
+ ```
290
+ compound-screening → nci60-screening → precision-oncology
291
+ (ZINC/VS) (NCI-60/DepMap) (MTB レポート)
292
+ │ │ ↓
293
+ drug-target-profiling ──────┘ cancer-genomics
294
+ (ChEMBL/DGIdb) │ (COSMIC/DepMap)
295
+
296
+ cell-line-resources
297
+ (Cellosaurus)
298
+ ```
299
+
300
+ ## パイプライン出力
301
+
302
+ | ファイル | 説明 | 次スキル |
303
+ |---------|------|---------|
304
+ | `results/drug_activity.csv` | NCI-60 GI50 データ | → precision-oncology |
305
+ | `results/tissue_patterns.csv` | 組織別応答パターン | → cancer-genomics |
306
+ | `results/marker_correlations.csv` | 薬剤-マーカー相関 | → drug-target-profiling |
307
+ | `results/depmap_dependency.csv` | DepMap 依存性スコア | → cell-line-resources |
@@ -0,0 +1,265 @@
1
+ ---
2
+ name: scientific-paleobiology
3
+ description: |
4
+ 古生物学データベーススキル。Paleobiology Database (PBDB) REST
5
+ API による化石産出記録・分類群・コレクション検索、地質年代
6
+ 多様性曲線・古地理解析。ToolUniverse 連携: paleobiology。
7
+ tu_tools:
8
+ - key: paleobiology
9
+ name: Paleobiology Database
10
+ description: PBDB 化石産出記録・分類群・コレクション検索
11
+ ---
12
+
13
+ # Scientific Paleobiology
14
+
15
+ Paleobiology Database (PBDB) REST API を活用した古生物学的
16
+ 多様性解析パイプラインを提供する。
17
+
18
+ ## When to Use
19
+
20
+ - 化石産出記録 (occurrence) を検索するとき
21
+ - 分類群 (taxa) の地質年代分布を調べるとき
22
+ - 化石コレクション/産地情報を検索するとき
23
+ - 地質年代を通じた多様性曲線を作成するとき
24
+ - 大量絶滅イベントのパターンを分析するとき
25
+ - 古地理的分布を解析するとき
26
+
27
+ ---
28
+
29
+ ## Quick Start
30
+
31
+ ## 1. PBDB 化石産出記録検索
32
+
33
+ ```python
34
+ import requests
35
+ import pandas as pd
36
+ import numpy as np
37
+
38
+ PBDB_BASE = "https://paleobiodb.org/data1.2"
39
+
40
+
41
+ def pbdb_search_occurrences(taxon=None, interval=None,
42
+ lngmin=None, lngmax=None,
43
+ latmin=None, latmax=None, limit=1000):
44
+ """
45
+ PBDB — 化石産出記録検索。
46
+
47
+ Parameters:
48
+ taxon: str — 分類群名 (例: "Dinosauria", "Trilobita")
49
+ interval: str — 地質年代区間 (例: "Cretaceous", "Permian")
50
+ lngmin: float — 経度最小値
51
+ lngmax: float — 経度最大値
52
+ latmin: float — 緯度最小値
53
+ latmax: float — 緯度最大値
54
+ limit: int — 最大結果数
55
+ """
56
+ url = f"{PBDB_BASE}/occs/list.json"
57
+ params = {
58
+ "show": "coords,phylo,time",
59
+ "limit": limit,
60
+ }
61
+ if taxon:
62
+ params["base_name"] = taxon
63
+ if interval:
64
+ params["interval"] = interval
65
+ if all(v is not None for v in [lngmin, lngmax, latmin, latmax]):
66
+ params.update({
67
+ "lngmin": lngmin, "lngmax": lngmax,
68
+ "latmin": latmin, "latmax": latmax,
69
+ })
70
+
71
+ resp = requests.get(url, params=params, timeout=30)
72
+ resp.raise_for_status()
73
+ records = resp.json().get("records", [])
74
+
75
+ results = []
76
+ for r in records:
77
+ results.append({
78
+ "occurrence_no": r.get("oid", ""),
79
+ "taxon_name": r.get("tna", ""),
80
+ "taxon_rank": r.get("rnk", ""),
81
+ "phylum": r.get("phl", ""),
82
+ "class": r.get("cll", ""),
83
+ "order": r.get("odl", ""),
84
+ "family": r.get("fml", ""),
85
+ "early_interval": r.get("oei", ""),
86
+ "late_interval": r.get("oli", ""),
87
+ "max_ma": r.get("eag", None),
88
+ "min_ma": r.get("lag", None),
89
+ "lng": r.get("lng", None),
90
+ "lat": r.get("lat", None),
91
+ "collection_no": r.get("cid", ""),
92
+ "reference_no": r.get("rid", ""),
93
+ })
94
+
95
+ df = pd.DataFrame(results)
96
+ print(f"PBDB occurrences: {len(df)} records "
97
+ f"(taxon={taxon}, interval={interval})")
98
+ return df
99
+ ```
100
+
101
+ ## 2. PBDB 分類群情報検索
102
+
103
+ ```python
104
+ def pbdb_search_taxa(name=None, rank=None, interval=None, limit=500):
105
+ """
106
+ PBDB — 分類群検索。
107
+
108
+ Parameters:
109
+ name: str — 分類群名 (例: "Dinosauria")
110
+ rank: str — ランク (例: "genus", "family", "order")
111
+ interval: str — 地質年代区間
112
+ limit: int — 最大結果数
113
+ """
114
+ url = f"{PBDB_BASE}/taxa/list.json"
115
+ params = {
116
+ "show": "attr,app,size",
117
+ "limit": limit,
118
+ }
119
+ if name:
120
+ params["base_name"] = name
121
+ if rank:
122
+ params["rank"] = rank
123
+ if interval:
124
+ params["interval"] = interval
125
+
126
+ resp = requests.get(url, params=params, timeout=30)
127
+ resp.raise_for_status()
128
+ records = resp.json().get("records", [])
129
+
130
+ results = []
131
+ for r in records:
132
+ results.append({
133
+ "taxon_no": r.get("oid", ""),
134
+ "taxon_name": r.get("nam", ""),
135
+ "rank": r.get("rnk", ""),
136
+ "parent_name": r.get("prl", ""),
137
+ "n_occs": r.get("noc", 0),
138
+ "first_appearance": r.get("fea", ""),
139
+ "last_appearance": r.get("lla", ""),
140
+ "extant": r.get("ext", ""),
141
+ })
142
+
143
+ df = pd.DataFrame(results)
144
+ print(f"PBDB taxa: {len(df)} records (name={name})")
145
+ return df
146
+ ```
147
+
148
+ ## 3. 地質年代多様性曲線
149
+
150
+ ```python
151
+ def pbdb_diversity_curve(taxon, time_resolution="stage",
152
+ rank="genus"):
153
+ """
154
+ PBDB — 地質年代多様性曲線生成。
155
+
156
+ Parameters:
157
+ taxon: str — 分類群名
158
+ time_resolution: str — "stage" or "epoch" or "period"
159
+ rank: str — カウントするランク ("genus", "family")
160
+ """
161
+ url = f"{PBDB_BASE}/occs/diversity.json"
162
+ params = {
163
+ "base_name": taxon,
164
+ "count": rank,
165
+ "time_reso": time_resolution,
166
+ }
167
+ resp = requests.get(url, params=params, timeout=60)
168
+ resp.raise_for_status()
169
+ records = resp.json().get("records", [])
170
+
171
+ results = []
172
+ for r in records:
173
+ results.append({
174
+ "interval_name": r.get("idn", ""),
175
+ "max_ma": r.get("eag", None),
176
+ "min_ma": r.get("lag", None),
177
+ "mid_ma": (float(r.get("eag", 0)) +
178
+ float(r.get("lag", 0))) / 2,
179
+ "sampled_in_bin": r.get("dsb", 0),
180
+ "n_originations": r.get("dor", 0),
181
+ "n_extinctions": r.get("dex", 0),
182
+ "range_through": r.get("drt", 0),
183
+ })
184
+
185
+ df = pd.DataFrame(results)
186
+ print(f"PBDB diversity: {len(df)} intervals, "
187
+ f"max diversity={df['sampled_in_bin'].max()} {rank}")
188
+ return df
189
+ ```
190
+
191
+ ## 4. 古生物学統合パイプライン
192
+
193
+ ```python
194
+ def paleobiology_pipeline(taxon, interval=None,
195
+ output_dir="results"):
196
+ """
197
+ 古生物学統合パイプライン。
198
+
199
+ Parameters:
200
+ taxon: str — 分類群名 (例: "Dinosauria")
201
+ interval: str — 地質年代区間 (オプション)
202
+ output_dir: str — 出力ディレクトリ
203
+ """
204
+ from pathlib import Path
205
+ output_dir = Path(output_dir)
206
+ output_dir.mkdir(parents=True, exist_ok=True)
207
+
208
+ # 1) 産出記録
209
+ occ = pbdb_search_occurrences(taxon=taxon, interval=interval)
210
+ occ.to_csv(output_dir / "occurrences.csv", index=False)
211
+
212
+ # 2) 分類群情報
213
+ taxa = pbdb_search_taxa(name=taxon)
214
+ taxa.to_csv(output_dir / "taxa.csv", index=False)
215
+
216
+ # 3) 多様性曲線
217
+ diversity = pbdb_diversity_curve(taxon)
218
+ diversity.to_csv(output_dir / "diversity.csv", index=False)
219
+
220
+ # 4) 地理的サマリ
221
+ if "lat" in occ.columns and "lng" in occ.columns:
222
+ geo_summary = occ.groupby("early_interval").agg(
223
+ n_records=("occurrence_no", "count"),
224
+ mean_lat=("lat", "mean"),
225
+ mean_lng=("lng", "mean"),
226
+ ).reset_index()
227
+ geo_summary.to_csv(output_dir / "geo_summary.csv", index=False)
228
+
229
+ print(f"Paleobiology pipeline: {output_dir}")
230
+ return {
231
+ "occurrences": occ,
232
+ "taxa": taxa,
233
+ "diversity": diversity,
234
+ }
235
+ ```
236
+
237
+ ---
238
+
239
+ ## ToolUniverse 連携
240
+
241
+ | TU Key | ツール名 | 連携内容 |
242
+ |--------|---------|---------|
243
+ | `paleobiology` | Paleobiology Database | 化石産出・分類群・コレクション検索 |
244
+
245
+ ## パイプライン統合
246
+
247
+ ```
248
+ phylogenetics → paleobiology → environmental-ecology
249
+ (系統解析) (化石記録) (GBIF/生態)
250
+ │ │ ↓
251
+ taxonomy ─────────┘ environmental-geodata
252
+ (分類体系) │ (環境モデリング)
253
+
254
+ macroevolution
255
+ (大進化パターン)
256
+ ```
257
+
258
+ ## パイプライン出力
259
+
260
+ | ファイル | 説明 | 次スキル |
261
+ |---------|------|---------|
262
+ | `results/occurrences.csv` | 化石産出記録 | → environmental-ecology |
263
+ | `results/taxa.csv` | 分類群情報 | → phylogenetics |
264
+ | `results/diversity.csv` | 多様性曲線 | → macroevolution |
265
+ | `results/geo_summary.csv` | 古地理サマリ | → environmental-geodata |