@nahisaho/satori 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/README.md +134 -43
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-advanced-imaging/SKILL.md +382 -0
  4. package/src/.github/skills/scientific-biomedical-pubtator/SKILL.md +331 -0
  5. package/src/.github/skills/scientific-cell-line-resources/SKILL.md +258 -0
  6. package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +509 -0
  7. package/src/.github/skills/scientific-deep-chemistry/SKILL.md +350 -0
  8. package/src/.github/skills/scientific-ebi-databases/SKILL.md +280 -0
  9. package/src/.github/skills/scientific-ensembl-genomics/SKILL.md +378 -0
  10. package/src/.github/skills/scientific-expression-comparison/SKILL.md +303 -0
  11. package/src/.github/skills/scientific-md-simulation/SKILL.md +315 -0
  12. package/src/.github/skills/scientific-model-organism-db/SKILL.md +329 -0
  13. package/src/.github/skills/scientific-ontology-enrichment/SKILL.md +340 -0
  14. package/src/.github/skills/scientific-perturbation-analysis/SKILL.md +297 -0
  15. package/src/.github/skills/scientific-phylogenetics/SKILL.md +297 -0
  16. package/src/.github/skills/scientific-preprint-archive/SKILL.md +476 -0
  17. package/src/.github/skills/scientific-public-health-data/SKILL.md +322 -0
  18. package/src/.github/skills/scientific-regulatory-genomics/SKILL.md +274 -0
  19. package/src/.github/skills/scientific-reinforcement-learning/SKILL.md +280 -0
  20. package/src/.github/skills/scientific-scvi-integration/SKILL.md +344 -0
  21. package/src/.github/skills/scientific-string-network-api/SKILL.md +376 -0
  22. package/src/.github/skills/scientific-symbolic-mathematics/SKILL.md +277 -0
@@ -0,0 +1,509 @@
1
+ ---
2
+ name: scientific-chembl-assay-mining
3
+ description: |
4
+ ChEMBL アッセイ・活性データマイニングスキル。ChEMBL REST API による
5
+ アッセイ検索・バイオアクティビティデータ取得・IC50/Ki/EC50 SAR 解析・
6
+ ターゲット-化合物マッピング・選択性プロファイリング・ATC 分類検索・
7
+ 構造アラート検出パイプライン。
8
+ ---
9
+
10
+ # Scientific ChEMBL Assay Mining
11
+
12
+ ChEMBL REST API (EBI) を活用したバイオアクティビティデータマイニング
13
+ パイプラインを提供する。アッセイ検索、活性値解析、SAR (構造活性相関)、
14
+ ターゲット選択性プロファイリング、分子ドッキング前処理を統合。
15
+
16
+ ## When to Use
17
+
18
+ - ChEMBL からターゲット特異的アッセイデータを取得するとき
19
+ - IC50/Ki/EC50 をバルク取得して SAR 解析するとき
20
+ - 特定標的に対する化合物選択性を評価するとき
21
+ - 分子構造類似性検索・サブ構造検索を行うとき
22
+ - ATC 分類から承認薬を特定し薬理プロファイルを構築するとき
23
+ - 構造アラート (PAINS, Dundee) をスクリーニングするとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. ターゲット検索 & アッセイ取得
30
+
31
+ ```python
32
+ import requests
33
+ import pandas as pd
34
+
35
+ CHEMBL_API = "https://www.ebi.ac.uk/chembl/api/data"
36
+ HEADERS = {"Accept": "application/json"}
37
+
38
+
39
+ def search_target(query, organism="Homo sapiens", limit=10):
40
+ """
41
+ ChEMBL ターゲット検索。
42
+
43
+ Parameters:
44
+ query: str — ターゲット名 (例: "EGFR", "CDK4")
45
+ organism: str — 生物種
46
+ limit: int — 最大取得数
47
+
48
+ ToolUniverse:
49
+ ChEMBL_search_targets(pref_name__contains=query, organism=organism)
50
+ ChEMBL_get_target(target_chembl_id=target_id)
51
+ """
52
+ url = f"{CHEMBL_API}/target.json"
53
+ params = {
54
+ "pref_name__icontains": query,
55
+ "organism": organism,
56
+ "limit": limit,
57
+ }
58
+ resp = requests.get(url, params=params, headers=HEADERS)
59
+ resp.raise_for_status()
60
+ targets = resp.json().get("targets", [])
61
+
62
+ rows = []
63
+ for t in targets:
64
+ rows.append({
65
+ "target_chembl_id": t.get("target_chembl_id"),
66
+ "pref_name": t.get("pref_name"),
67
+ "target_type": t.get("target_type"),
68
+ "organism": t.get("organism"),
69
+ })
70
+
71
+ df = pd.DataFrame(rows)
72
+ print(f"ChEMBL targets matching '{query}': {len(df)}")
73
+ return df
74
+ ```
75
+
76
+ ## 2. バイオアクティビティデータ取得
77
+
78
+ ```python
79
+ def get_target_activities(target_chembl_id, standard_type="IC50",
80
+ max_value=10000, limit=500):
81
+ """
82
+ ターゲットに対するバイオアクティビティデータ取得。
83
+
84
+ Parameters:
85
+ target_chembl_id: str — ChEMBL ターゲットID
86
+ standard_type: str — "IC50", "Ki", "EC50", "Kd" etc.
87
+ max_value: float — nM 単位閾値
88
+ limit: int — 最大取得数
89
+
90
+ ToolUniverse:
91
+ ChEMBL_search_activities(
92
+ target_chembl_id=target_chembl_id,
93
+ standard_type=standard_type,
94
+ standard_value__lte=max_value
95
+ )
96
+ ChEMBL_get_target_activities(target_chembl_id__exact=target_chembl_id)
97
+ """
98
+ url = f"{CHEMBL_API}/activity.json"
99
+ params = {
100
+ "target_chembl_id": target_chembl_id,
101
+ "standard_type": standard_type,
102
+ "standard_value__lte": max_value,
103
+ "standard_units": "nM",
104
+ "limit": limit,
105
+ }
106
+ resp = requests.get(url, params=params, headers=HEADERS)
107
+ resp.raise_for_status()
108
+ activities = resp.json().get("activities", [])
109
+
110
+ rows = []
111
+ for act in activities:
112
+ rows.append({
113
+ "molecule_chembl_id": act.get("molecule_chembl_id"),
114
+ "canonical_smiles": act.get("canonical_smiles"),
115
+ "standard_type": act.get("standard_type"),
116
+ "standard_value": act.get("standard_value"),
117
+ "standard_units": act.get("standard_units"),
118
+ "pchembl_value": act.get("pchembl_value"),
119
+ "assay_chembl_id": act.get("assay_chembl_id"),
120
+ "assay_type": act.get("assay_type"),
121
+ "target_chembl_id": act.get("target_chembl_id"),
122
+ })
123
+
124
+ df = pd.DataFrame(rows)
125
+ if "standard_value" in df.columns:
126
+ df["standard_value"] = pd.to_numeric(df["standard_value"], errors="coerce")
127
+ print(f"Activities for {target_chembl_id} ({standard_type}): {len(df)}")
128
+ return df
129
+ ```
130
+
131
+ ## 3. アッセイ詳細検索
132
+
133
+ ```python
134
+ def search_assays(target_chembl_id=None, assay_type=None, limit=50):
135
+ """
136
+ ChEMBL アッセイ検索。
137
+
138
+ Parameters:
139
+ target_chembl_id: str — ターゲット ChEMBL ID
140
+ assay_type: str — "B" (Binding), "F" (Functional), "A" (ADME)
141
+ limit: int — 最大取得数
142
+
143
+ ToolUniverse:
144
+ ChEMBL_search_assays(
145
+ target_chembl_id=target_chembl_id,
146
+ assay_type=assay_type
147
+ )
148
+ ChEMBL_get_assay(assay_chembl_id=assay_id)
149
+ ChEMBL_get_assay_activities(assay_chembl_id__exact=assay_id)
150
+ """
151
+ url = f"{CHEMBL_API}/assay.json"
152
+ params = {"limit": limit}
153
+ if target_chembl_id:
154
+ params["target_chembl_id"] = target_chembl_id
155
+ if assay_type:
156
+ params["assay_type"] = assay_type
157
+
158
+ resp = requests.get(url, params=params, headers=HEADERS)
159
+ resp.raise_for_status()
160
+ assays = resp.json().get("assays", [])
161
+
162
+ rows = []
163
+ for a in assays:
164
+ rows.append({
165
+ "assay_chembl_id": a.get("assay_chembl_id"),
166
+ "description": a.get("description", "")[:200],
167
+ "assay_type": a.get("assay_type"),
168
+ "assay_organism": a.get("assay_organism"),
169
+ "confidence_score": a.get("confidence_score"),
170
+ "target_chembl_id": a.get("target_chembl_id"),
171
+ })
172
+
173
+ df = pd.DataFrame(rows)
174
+ print(f"Assays found: {len(df)}")
175
+ return df
176
+ ```
177
+
178
+ ## 4. SAR (構造活性相関) 解析
179
+
180
+ ```python
181
+ import numpy as np
182
+
183
+
184
+ def sar_analysis(activity_df, pchembl_col="pchembl_value"):
185
+ """
186
+ バイオアクティビティデータの SAR 解析。
187
+
188
+ Parameters:
189
+ activity_df: DataFrame — get_target_activities の出力
190
+ pchembl_col: str — pChEMBL 値カラム名
191
+
192
+ Returns:
193
+ dict — SAR 統計サマリ
194
+ """
195
+ df = activity_df.copy()
196
+ df[pchembl_col] = pd.to_numeric(df[pchembl_col], errors="coerce")
197
+ df = df.dropna(subset=[pchembl_col])
198
+
199
+ summary = {
200
+ "n_compounds": len(df),
201
+ "n_unique_molecules": df["molecule_chembl_id"].nunique(),
202
+ "pchembl_mean": round(df[pchembl_col].mean(), 2),
203
+ "pchembl_median": round(df[pchembl_col].median(), 2),
204
+ "pchembl_std": round(df[pchembl_col].std(), 2),
205
+ "pchembl_range": [
206
+ round(df[pchembl_col].min(), 2),
207
+ round(df[pchembl_col].max(), 2),
208
+ ],
209
+ "most_potent": df.loc[df[pchembl_col].idxmax()].to_dict()
210
+ if len(df) > 0 else None,
211
+ "potency_bins": {
212
+ "high_potent_lt100nM": int((df[pchembl_col] >= 7.0).sum()),
213
+ "moderate_100_1000nM": int(
214
+ ((df[pchembl_col] >= 6.0) & (df[pchembl_col] < 7.0)).sum()
215
+ ),
216
+ "weak_gt1000nM": int((df[pchembl_col] < 6.0).sum()),
217
+ },
218
+ }
219
+
220
+ print(f"SAR summary: {summary['n_unique_molecules']} molecules, "
221
+ f"pChEMBL mean={summary['pchembl_mean']}")
222
+ return summary
223
+ ```
224
+
225
+ ## 5. 選択性プロファイリング
226
+
227
+ ```python
228
+ def selectivity_profile(molecule_chembl_id, limit=100):
229
+ """
230
+ 化合物のマルチターゲット選択性評価。
231
+
232
+ Parameters:
233
+ molecule_chembl_id: str — 化合物 ChEMBL ID
234
+ limit: int — 最大取得数
235
+
236
+ ToolUniverse:
237
+ ChEMBL_get_molecule_targets(
238
+ molecule_chembl_id__exact=molecule_chembl_id
239
+ )
240
+ ChEMBL_search_activities(molecule_chembl_id=molecule_chembl_id)
241
+ """
242
+ url = f"{CHEMBL_API}/activity.json"
243
+ params = {
244
+ "molecule_chembl_id": molecule_chembl_id,
245
+ "limit": limit,
246
+ }
247
+ resp = requests.get(url, params=params, headers=HEADERS)
248
+ resp.raise_for_status()
249
+ activities = resp.json().get("activities", [])
250
+
251
+ target_data = {}
252
+ for act in activities:
253
+ tid = act.get("target_chembl_id")
254
+ pchembl = act.get("pchembl_value")
255
+ if tid and pchembl:
256
+ if tid not in target_data:
257
+ target_data[tid] = {
258
+ "target_pref_name": act.get("target_pref_name", ""),
259
+ "pchembl_values": [],
260
+ }
261
+ target_data[tid]["pchembl_values"].append(float(pchembl))
262
+
263
+ profile = []
264
+ for tid, info in target_data.items():
265
+ vals = info["pchembl_values"]
266
+ profile.append({
267
+ "target_chembl_id": tid,
268
+ "target_name": info["target_pref_name"],
269
+ "n_measurements": len(vals),
270
+ "best_pchembl": round(max(vals), 2),
271
+ "mean_pchembl": round(np.mean(vals), 2),
272
+ })
273
+
274
+ df = pd.DataFrame(profile).sort_values("best_pchembl", ascending=False)
275
+ print(f"Selectivity: {molecule_chembl_id} tested on {len(df)} targets")
276
+ return df
277
+ ```
278
+
279
+ ## 6. 分子類似性 & サブ構造検索
280
+
281
+ ```python
282
+ def similarity_search(smiles, threshold=70, max_results=25):
283
+ """
284
+ SMILES 構造による類似性検索。
285
+
286
+ Parameters:
287
+ smiles: str — クエリ SMILES
288
+ threshold: int — Tanimoto 類似性閾値 (%)
289
+ max_results: int — 最大結果数
290
+
291
+ ToolUniverse:
292
+ ChEMBL_search_similar_molecules(
293
+ query=smiles, similarity_threshold=threshold
294
+ )
295
+ ChEMBL_search_substructure(smiles=smiles)
296
+ """
297
+ url = f"{CHEMBL_API}/similarity/{smiles}/{threshold}.json"
298
+ params = {"limit": max_results}
299
+ resp = requests.get(url, params=params, headers=HEADERS)
300
+ resp.raise_for_status()
301
+ molecules = resp.json().get("molecules", [])
302
+
303
+ rows = []
304
+ for mol in molecules:
305
+ rows.append({
306
+ "molecule_chembl_id": mol.get("molecule_chembl_id"),
307
+ "pref_name": mol.get("pref_name"),
308
+ "similarity": mol.get("similarity"),
309
+ "canonical_smiles": mol.get("molecule_structures", {}).get(
310
+ "canonical_smiles", ""
311
+ ),
312
+ "max_phase": mol.get("max_phase"),
313
+ })
314
+
315
+ df = pd.DataFrame(rows)
316
+ print(f"Similar molecules (>{threshold}%): {len(df)}")
317
+ return df
318
+ ```
319
+
320
+ ## 7. ATC 分類 & 承認薬検索
321
+
322
+ ```python
323
+ def search_approved_drugs(target_chembl_id, limit=50):
324
+ """
325
+ ターゲットに対する承認薬を ATC 分類とともに検索。
326
+
327
+ ToolUniverse:
328
+ ChEMBL_search_drugs(max_phase=4)
329
+ ChEMBL_search_mechanisms(target_chembl_id=target_chembl_id)
330
+ ChEMBL_search_atc_classification()
331
+ """
332
+ url = f"{CHEMBL_API}/mechanism.json"
333
+ params = {
334
+ "target_chembl_id": target_chembl_id,
335
+ "limit": limit,
336
+ }
337
+ resp = requests.get(url, params=params, headers=HEADERS)
338
+ resp.raise_for_status()
339
+ mechanisms = resp.json().get("mechanisms", [])
340
+
341
+ rows = []
342
+ for mech in mechanisms:
343
+ drug_id = mech.get("molecule_chembl_id")
344
+ drug_resp = requests.get(
345
+ f"{CHEMBL_API}/molecule/{drug_id}.json", headers=HEADERS
346
+ )
347
+ if drug_resp.ok:
348
+ drug = drug_resp.json()
349
+ rows.append({
350
+ "molecule_chembl_id": drug_id,
351
+ "pref_name": drug.get("pref_name"),
352
+ "max_phase": drug.get("max_phase"),
353
+ "mechanism": mech.get("mechanism_of_action"),
354
+ "action_type": mech.get("action_type"),
355
+ "first_approval": drug.get("first_approval"),
356
+ "atc_classifications": drug.get("atc_classifications", []),
357
+ })
358
+
359
+ df = pd.DataFrame(rows)
360
+ print(f"Drugs/mechanisms for {target_chembl_id}: {len(df)}")
361
+ return df
362
+ ```
363
+
364
+ ## 8. 構造アラート検出
365
+
366
+ ```python
367
+ def check_structural_alerts(molecule_chembl_id):
368
+ """
369
+ 化合物の構造アラート (PAINS, Dundee) を検出。
370
+
371
+ ToolUniverse:
372
+ ChEMBL_search_compound_structural_alerts(
373
+ molecule_chembl_id=molecule_chembl_id
374
+ )
375
+ """
376
+ url = f"{CHEMBL_API}/compound_structural_alert.json"
377
+ params = {"molecule_chembl_id": molecule_chembl_id, "limit": 100}
378
+ resp = requests.get(url, params=params, headers=HEADERS)
379
+ resp.raise_for_status()
380
+ alerts = resp.json().get("compound_structural_alerts", [])
381
+
382
+ rows = []
383
+ for a in alerts:
384
+ rows.append({
385
+ "alert_set_name": a.get("alert", {}).get("alert_set", {}).get(
386
+ "set_name", ""
387
+ ),
388
+ "smarts": a.get("alert", {}).get("smarts", ""),
389
+ "alert_name": a.get("alert", {}).get("alert_name", ""),
390
+ })
391
+
392
+ df = pd.DataFrame(rows)
393
+ if len(df) > 0:
394
+ print(f"⚠ {molecule_chembl_id}: {len(df)} structural alerts found")
395
+ else:
396
+ print(f"✓ {molecule_chembl_id}: No structural alerts")
397
+ return df
398
+ ```
399
+
400
+ ## 9. 統合 SAR マイニングパイプライン
401
+
402
+ ```python
403
+ def chembl_sar_pipeline(target_query, organism="Homo sapiens",
404
+ standard_type="IC50", max_nm=10000):
405
+ """
406
+ ChEMBL SAR マイニング統合パイプライン。
407
+
408
+ Pipeline:
409
+ search_target → get_target_activities → sar_analysis →
410
+ selectivity_profile (top hits) → check_structural_alerts
411
+
412
+ Parameters:
413
+ target_query: str — ターゲット名
414
+ organism: str — 生物種
415
+ standard_type: str — 活性値タイプ
416
+ max_nm: float — nM 閾値
417
+ """
418
+ # Step 1: ターゲット検索
419
+ targets = search_target(target_query, organism)
420
+ if targets.empty:
421
+ print(f"No targets found for '{target_query}'")
422
+ return None
423
+
424
+ target_id = targets.iloc[0]["target_chembl_id"]
425
+ print(f"\nSelected target: {target_id}")
426
+
427
+ # Step 2: バイオアクティビティ取得
428
+ activities = get_target_activities(target_id, standard_type, max_nm)
429
+ if activities.empty:
430
+ print("No activities found")
431
+ return None
432
+
433
+ # Step 3: SAR 解析
434
+ sar = sar_analysis(activities)
435
+
436
+ # Step 4: トップ化合物の選択性
437
+ top = activities.nlargest(3, "pchembl_value")
438
+ selectivity_results = []
439
+ for _, row in top.iterrows():
440
+ mol_id = row["molecule_chembl_id"]
441
+ sel = selectivity_profile(mol_id)
442
+ selectivity_results.append({"molecule": mol_id, "profile": sel})
443
+
444
+ # Step 5: 構造アラートチェック
445
+ alert_results = {}
446
+ for _, row in top.iterrows():
447
+ mol_id = row["molecule_chembl_id"]
448
+ alerts = check_structural_alerts(mol_id)
449
+ alert_results[mol_id] = len(alerts)
450
+
451
+ result = {
452
+ "target": target_id,
453
+ "sar_summary": sar,
454
+ "top_compounds": top.to_dict("records"),
455
+ "structural_alerts": alert_results,
456
+ }
457
+
458
+ print(f"\n=== ChEMBL SAR Pipeline Complete ===")
459
+ print(f"Target: {target_id}")
460
+ print(f"Compounds: {sar['n_unique_molecules']}")
461
+ print(f"Top hit pChEMBL: {sar['pchembl_range'][1]}")
462
+
463
+ return result
464
+ ```
465
+
466
+ ---
467
+
468
+ ## パイプライン統合
469
+
470
+ ```
471
+ drug-target-profiling → chembl-assay-mining → admet-pharmacokinetics
472
+ (ターゲット候補) (SAR データマイニング) (ADMET/PK 評価)
473
+ │ │ ↓
474
+ compound-screening selectivity_profile molecular-docking
475
+ (ZINC ライブラリ) (マルチターゲット) (Vina/DiffDock)
476
+ ```
477
+
478
+ ## パイプライン出力
479
+
480
+ | ファイル | 説明 | 次スキル |
481
+ |---------|------|---------|
482
+ | `results/chembl_activities.csv` | バイオアクティビティデータ | → admet-pharmacokinetics |
483
+ | `results/sar_summary.json` | SAR 統計サマリ | → drug-target-profiling |
484
+ | `results/selectivity_profile.csv` | 選択性プロファイル | → compound-screening |
485
+ | `results/structural_alerts.json` | 構造アラート結果 | → molecular-docking |
486
+
487
+ ## 利用可能ツール (ToolUniverse SMCP)
488
+
489
+ | ツール名 | 用途 |
490
+ |---------|------|
491
+ | `ChEMBL_search_targets` | ターゲット検索 |
492
+ | `ChEMBL_get_target` | ターゲット詳細 |
493
+ | `ChEMBL_search_assays` | アッセイ検索 |
494
+ | `ChEMBL_get_assay` | アッセイ詳細 |
495
+ | `ChEMBL_get_assay_activities` | アッセイ活性データ |
496
+ | `ChEMBL_search_activities` | 活性検索 (フィルタ付き) |
497
+ | `ChEMBL_get_activity` | 活性データ詳細 |
498
+ | `ChEMBL_get_molecule` | 化合物詳細 |
499
+ | `ChEMBL_get_molecule_targets` | 化合物-ターゲットマップ |
500
+ | `ChEMBL_search_similar_molecules` | 類似性検索 |
501
+ | `ChEMBL_search_substructure` | サブ構造検索 |
502
+ | `ChEMBL_search_drugs` | 承認薬検索 |
503
+ | `ChEMBL_search_mechanisms` | 作用機序検索 |
504
+ | `ChEMBL_search_atc_classification` | ATC 分類 |
505
+ | `ChEMBL_search_compound_structural_alerts` | 構造アラート |
506
+ | `ChEMBL_search_cell_lines` | 細胞株検索 |
507
+ | `ChEMBL_search_binding_sites` | 結合サイト |
508
+ | `ChEMBL_get_drug` | 薬剤情報 |
509
+ | `ChEMBL_get_drug_mechanisms` | 薬剤作用機序 |