@nahisaho/satori 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +150 -54
- package/package.json +1 -1
- package/src/.github/skills/scientific-biomedical-pubtator/SKILL.md +331 -0
- package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +298 -0
- package/src/.github/skills/scientific-cell-line-resources/SKILL.md +258 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +245 -0
- package/src/.github/skills/scientific-ebi-databases/SKILL.md +280 -0
- package/src/.github/skills/scientific-genome-sequence-tools/SKILL.md +304 -0
- package/src/.github/skills/scientific-healthcare-ai/SKILL.md +273 -0
- package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +244 -0
- package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +288 -0
- package/src/.github/skills/scientific-noncoding-rna/SKILL.md +262 -0
- package/src/.github/skills/scientific-ontology-enrichment/SKILL.md +340 -0
- package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +323 -0
- package/src/.github/skills/scientific-phylogenetics/SKILL.md +297 -0
- package/src/.github/skills/scientific-preprint-archive/SKILL.md +476 -0
- package/src/.github/skills/scientific-public-health-data/SKILL.md +322 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +327 -0
- package/src/.github/skills/scientific-regulatory-genomics/SKILL.md +274 -0
- package/src/.github/skills/scientific-reinforcement-learning/SKILL.md +280 -0
- package/src/.github/skills/scientific-structural-proteomics/SKILL.md +317 -0
- package/src/.github/skills/scientific-symbolic-mathematics/SKILL.md +277 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-cell-line-resources
|
|
3
|
+
description: |
|
|
4
|
+
細胞株リソーススキル。Cellosaurus 細胞株データベース検索、
|
|
5
|
+
STR プロファイルマッチング、コンタミネーション検出、
|
|
6
|
+
細胞株メタデータ (由来組織・疾患・種) 取得パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Cell Line Resources
|
|
10
|
+
|
|
11
|
+
Cellosaurus を中心とした細胞株リソースデータベースアクセス
|
|
12
|
+
パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- 細胞株の正式名称・アクセッション番号を確認するとき
|
|
17
|
+
- 細胞株の由来 (組織・疾患・種) を調べるとき
|
|
18
|
+
- STR プロファイルで細胞株の同一性を検証するとき
|
|
19
|
+
- 細胞株のコンタミネーション (ミスアイデンティフィケーション) を確認するとき
|
|
20
|
+
- 実験に使用する細胞株の参考文献・データベースリンクを取得するとき
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
## 1. Cellosaurus 細胞株検索
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import requests
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
CELLOSAURUS_API = "https://api.cellosaurus.org"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def search_cellosaurus(query, limit=25):
|
|
36
|
+
"""
|
|
37
|
+
Cellosaurus 細胞株検索。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
query: str — 細胞株名 (e.g., "HeLa", "MCF-7", "A549")
|
|
41
|
+
limit: int — 最大取得数
|
|
42
|
+
|
|
43
|
+
ToolUniverse:
|
|
44
|
+
Cellosaurus_search(query=query)
|
|
45
|
+
Cellosaurus_get_cell_line(accession=accession)
|
|
46
|
+
Cellosaurus_get_str_profile(accession=accession)
|
|
47
|
+
"""
|
|
48
|
+
params = {"q": query, "rows": limit, "format": "json"}
|
|
49
|
+
resp = requests.get(f"{CELLOSAURUS_API}/search/cell-line", params=params)
|
|
50
|
+
resp.raise_for_status()
|
|
51
|
+
data = resp.json()
|
|
52
|
+
|
|
53
|
+
results = []
|
|
54
|
+
for cell_line in data.get("result", {}).get("cellLineList", []):
|
|
55
|
+
cl = cell_line.get("cellLine", {})
|
|
56
|
+
results.append({
|
|
57
|
+
"accession": cl.get("accession", ""),
|
|
58
|
+
"name": cl.get("name", ""),
|
|
59
|
+
"synonyms": [s.get("value", "") for s in cl.get("synonymList", [])],
|
|
60
|
+
"category": cl.get("category", ""),
|
|
61
|
+
"sex": cl.get("sex", ""),
|
|
62
|
+
"species": cl.get("species", {}).get("value", ""),
|
|
63
|
+
"diseases": [
|
|
64
|
+
d.get("terminology", {}).get("value", "")
|
|
65
|
+
for d in cl.get("diseaseList", [])
|
|
66
|
+
],
|
|
67
|
+
"derived_from_site": cl.get("derivedFromSite", {}).get("value", ""),
|
|
68
|
+
"is_contaminated": cl.get("isContaminated", False),
|
|
69
|
+
"is_problematic": cl.get("isProblematic", False),
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
df = pd.DataFrame(results)
|
|
73
|
+
print(f"Cellosaurus search '{query}': {len(df)} cell lines")
|
|
74
|
+
return df
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## 2. 細胞株詳細情報取得
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
def get_cellosaurus_entry(accession):
|
|
81
|
+
"""
|
|
82
|
+
Cellosaurus 細胞株詳細情報取得。
|
|
83
|
+
|
|
84
|
+
Parameters:
|
|
85
|
+
accession: str — Cellosaurus アクセッション (e.g., "CVCL_0030")
|
|
86
|
+
"""
|
|
87
|
+
resp = requests.get(
|
|
88
|
+
f"{CELLOSAURUS_API}/cell-line/{accession}",
|
|
89
|
+
params={"format": "json"}
|
|
90
|
+
)
|
|
91
|
+
resp.raise_for_status()
|
|
92
|
+
data = resp.json()
|
|
93
|
+
|
|
94
|
+
cl = data.get("cellLine", {})
|
|
95
|
+
entry = {
|
|
96
|
+
"accession": cl.get("accession", ""),
|
|
97
|
+
"name": cl.get("name", ""),
|
|
98
|
+
"category": cl.get("category", ""),
|
|
99
|
+
"sex": cl.get("sex", ""),
|
|
100
|
+
"age": cl.get("age", ""),
|
|
101
|
+
"species": cl.get("species", {}).get("value", ""),
|
|
102
|
+
"diseases": [
|
|
103
|
+
{
|
|
104
|
+
"name": d.get("terminology", {}).get("value", ""),
|
|
105
|
+
"accession": d.get("terminology", {}).get("accession", ""),
|
|
106
|
+
}
|
|
107
|
+
for d in cl.get("diseaseList", [])
|
|
108
|
+
],
|
|
109
|
+
"derived_from_site": cl.get("derivedFromSite", {}).get("value", ""),
|
|
110
|
+
"is_contaminated": cl.get("isContaminated", False),
|
|
111
|
+
"contamination_comment": cl.get("contaminationComment", ""),
|
|
112
|
+
"str_profile": cl.get("strList", []),
|
|
113
|
+
"references": [
|
|
114
|
+
{
|
|
115
|
+
"pmid": r.get("pubmedId", ""),
|
|
116
|
+
"title": r.get("title", ""),
|
|
117
|
+
}
|
|
118
|
+
for r in cl.get("referenceList", [])
|
|
119
|
+
],
|
|
120
|
+
"cross_references": [
|
|
121
|
+
{
|
|
122
|
+
"database": xr.get("database", ""),
|
|
123
|
+
"accession": xr.get("accession", ""),
|
|
124
|
+
}
|
|
125
|
+
for xr in cl.get("xrefList", [])
|
|
126
|
+
],
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
print(f"Cellosaurus {accession}: {entry['name']} "
|
|
130
|
+
f"({entry['species']}, {entry['category']})")
|
|
131
|
+
return entry
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## 3. STR プロファイル検証
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
def check_str_profile(accession, str_data=None):
|
|
138
|
+
"""
|
|
139
|
+
STR (Short Tandem Repeat) プロファイルによる細胞株同一性検証。
|
|
140
|
+
|
|
141
|
+
Parameters:
|
|
142
|
+
accession: str — Cellosaurus アクセッション
|
|
143
|
+
str_data: dict — 測定した STR データ {marker: alleles}
|
|
144
|
+
"""
|
|
145
|
+
entry = get_cellosaurus_entry(accession)
|
|
146
|
+
ref_str = entry.get("str_profile", [])
|
|
147
|
+
|
|
148
|
+
if not ref_str:
|
|
149
|
+
print(f"WARNING: {accession} has no STR profile in Cellosaurus")
|
|
150
|
+
return {"match": None, "message": "No reference STR profile available"}
|
|
151
|
+
|
|
152
|
+
ref_markers = {}
|
|
153
|
+
for marker in ref_str:
|
|
154
|
+
name = marker.get("marker", "")
|
|
155
|
+
alleles = marker.get("alleles", "")
|
|
156
|
+
ref_markers[name] = alleles
|
|
157
|
+
|
|
158
|
+
if str_data is None:
|
|
159
|
+
print(f"Reference STR for {accession}: {len(ref_markers)} markers")
|
|
160
|
+
return {"reference_str": ref_markers, "marker_count": len(ref_markers)}
|
|
161
|
+
|
|
162
|
+
# Calculate match percentage
|
|
163
|
+
matched = 0
|
|
164
|
+
total = 0
|
|
165
|
+
details = []
|
|
166
|
+
for marker, ref_alleles in ref_markers.items():
|
|
167
|
+
if marker in str_data:
|
|
168
|
+
total += 1
|
|
169
|
+
measured = str_data[marker]
|
|
170
|
+
if set(str(ref_alleles).split(",")) == set(str(measured).split(",")):
|
|
171
|
+
matched += 1
|
|
172
|
+
details.append({"marker": marker, "match": True})
|
|
173
|
+
else:
|
|
174
|
+
details.append({
|
|
175
|
+
"marker": marker, "match": False,
|
|
176
|
+
"reference": ref_alleles, "measured": measured,
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
match_pct = (matched / total * 100) if total > 0 else 0
|
|
180
|
+
result = {
|
|
181
|
+
"match_percentage": match_pct,
|
|
182
|
+
"matched": matched,
|
|
183
|
+
"total_compared": total,
|
|
184
|
+
"is_authenticated": match_pct >= 80,
|
|
185
|
+
"details": details,
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
status = "PASS" if result["is_authenticated"] else "FAIL"
|
|
189
|
+
print(f"STR verification {accession}: {match_pct:.1f}% match → {status}")
|
|
190
|
+
return result
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## 4. コンタミネーション・問題細胞株チェック
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
def check_contamination_status(cell_line_names):
|
|
197
|
+
"""
|
|
198
|
+
細胞株リストのコンタミネーション/ミスアイデンティフィケーション確認。
|
|
199
|
+
|
|
200
|
+
Parameters:
|
|
201
|
+
cell_line_names: list — 細胞株名リスト
|
|
202
|
+
"""
|
|
203
|
+
results = []
|
|
204
|
+
for name in cell_line_names:
|
|
205
|
+
df = search_cellosaurus(name, limit=1)
|
|
206
|
+
if df.empty:
|
|
207
|
+
results.append({
|
|
208
|
+
"name": name, "found": False,
|
|
209
|
+
"is_contaminated": None, "is_problematic": None,
|
|
210
|
+
})
|
|
211
|
+
continue
|
|
212
|
+
|
|
213
|
+
row = df.iloc[0]
|
|
214
|
+
results.append({
|
|
215
|
+
"name": name,
|
|
216
|
+
"found": True,
|
|
217
|
+
"accession": row.get("accession", ""),
|
|
218
|
+
"official_name": row.get("name", ""),
|
|
219
|
+
"is_contaminated": row.get("is_contaminated", False),
|
|
220
|
+
"is_problematic": row.get("is_problematic", False),
|
|
221
|
+
"species": row.get("species", ""),
|
|
222
|
+
"diseases": row.get("diseases", []),
|
|
223
|
+
})
|
|
224
|
+
|
|
225
|
+
df = pd.DataFrame(results)
|
|
226
|
+
contaminated = df["is_contaminated"].sum() if "is_contaminated" in df else 0
|
|
227
|
+
problematic = df["is_problematic"].sum() if "is_problematic" in df else 0
|
|
228
|
+
print(f"Cell line check: {len(cell_line_names)} lines, "
|
|
229
|
+
f"{contaminated} contaminated, {problematic} problematic")
|
|
230
|
+
return df
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## 利用可能ツール
|
|
236
|
+
|
|
237
|
+
| ToolUniverse カテゴリ | 主なツール |
|
|
238
|
+
|---|---|
|
|
239
|
+
| `cellosaurus` | `Cellosaurus_search`, `Cellosaurus_get_cell_line`, `Cellosaurus_get_str_profile` |
|
|
240
|
+
|
|
241
|
+
## パイプライン出力
|
|
242
|
+
|
|
243
|
+
| 出力ファイル | 説明 | 連携先スキル |
|
|
244
|
+
|---|---|---|
|
|
245
|
+
| `results/cell_lines.csv` | 細胞株メタデータ | → cancer-genomics, precision-oncology |
|
|
246
|
+
| `results/str_verification.json` | STR 検証結果 | → lab-automation, lab-data-management |
|
|
247
|
+
| `results/contamination_report.json` | コンタミレポート | → research-methodology |
|
|
248
|
+
|
|
249
|
+
## パイプライン統合
|
|
250
|
+
|
|
251
|
+
```
|
|
252
|
+
cancer-genomics ──→ cell-line-resources ──→ lab-automation
|
|
253
|
+
(COSMIC/DepMap) (Cellosaurus STR) (プロトコル管理)
|
|
254
|
+
│
|
|
255
|
+
├──→ precision-oncology (腫瘍細胞株)
|
|
256
|
+
├──→ disease-research (疾患モデル)
|
|
257
|
+
└──→ human-protein-atlas (発現データ)
|
|
258
|
+
```
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-compound-screening
|
|
3
|
+
description: |
|
|
4
|
+
化合物スクリーニングスキル。ZINC データベースを活用した購入可能化合物検索、
|
|
5
|
+
SMILES/名前ベースの類似性検索、カタログフィルタリング、
|
|
6
|
+
バーチャルスクリーニング前処理パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Compound Screening
|
|
10
|
+
|
|
11
|
+
ZINC データベースを活用した化合物ライブラリ検索・
|
|
12
|
+
バーチャルスクリーニング前処理パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- 購入可能な化合物ライブラリを検索するとき
|
|
17
|
+
- SMILES 構造式から類似化合物を探すとき
|
|
18
|
+
- 化合物名からデータベースレコードを取得するとき
|
|
19
|
+
- ベンダーカタログの絞り込みを行うとき
|
|
20
|
+
- バーチャルスクリーニング用の化合物セットを準備するとき
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
## 1. ZINC 化合物名検索
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import requests
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
ZINC_API = "https://zinc15.docking.org"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def zinc_search_by_name(name, max_results=20):
|
|
36
|
+
"""
|
|
37
|
+
ZINC データベースで化合物名による検索。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
name: str — compound name (e.g., "aspirin")
|
|
41
|
+
max_results: int — maximum results
|
|
42
|
+
|
|
43
|
+
ToolUniverse:
|
|
44
|
+
ZINC_search_by_name(name=name)
|
|
45
|
+
"""
|
|
46
|
+
url = f"{ZINC_API}/substances/search"
|
|
47
|
+
params = {"q": name, "count": max_results}
|
|
48
|
+
resp = requests.get(url, params=params)
|
|
49
|
+
resp.raise_for_status()
|
|
50
|
+
data = resp.json()
|
|
51
|
+
|
|
52
|
+
results = []
|
|
53
|
+
for item in data:
|
|
54
|
+
results.append({
|
|
55
|
+
"zinc_id": item.get("zinc_id", ""),
|
|
56
|
+
"name": item.get("name", ""),
|
|
57
|
+
"smiles": item.get("smiles", ""),
|
|
58
|
+
"mwt": item.get("mwt", ""),
|
|
59
|
+
"logp": item.get("logp", ""),
|
|
60
|
+
"purchasable": item.get("purchasability", ""),
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
df = pd.DataFrame(results)
|
|
64
|
+
print(f"ZINC search '{name}': {len(df)} compounds")
|
|
65
|
+
return df
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## 2. ZINC SMILES 類似性検索
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
def zinc_search_by_smiles(smiles, similarity=0.7, max_results=20):
|
|
72
|
+
"""
|
|
73
|
+
ZINC で SMILES 構造式による類似性検索。
|
|
74
|
+
|
|
75
|
+
Parameters:
|
|
76
|
+
smiles: str — SMILES string
|
|
77
|
+
similarity: float — Tanimoto similarity threshold (0-1)
|
|
78
|
+
|
|
79
|
+
ToolUniverse:
|
|
80
|
+
ZINC_search_by_smiles(smiles=smiles)
|
|
81
|
+
"""
|
|
82
|
+
url = f"{ZINC_API}/substances/search"
|
|
83
|
+
params = {
|
|
84
|
+
"smiles": smiles,
|
|
85
|
+
"similarity": similarity,
|
|
86
|
+
"count": max_results,
|
|
87
|
+
}
|
|
88
|
+
resp = requests.get(url, params=params)
|
|
89
|
+
resp.raise_for_status()
|
|
90
|
+
data = resp.json()
|
|
91
|
+
|
|
92
|
+
results = []
|
|
93
|
+
for item in data:
|
|
94
|
+
results.append({
|
|
95
|
+
"zinc_id": item.get("zinc_id", ""),
|
|
96
|
+
"smiles": item.get("smiles", ""),
|
|
97
|
+
"similarity": item.get("similarity", ""),
|
|
98
|
+
"mwt": item.get("mwt", ""),
|
|
99
|
+
"logp": item.get("logp", ""),
|
|
100
|
+
"purchasable": item.get("purchasability", ""),
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
df = pd.DataFrame(results)
|
|
104
|
+
print(f"ZINC SMILES search: {len(df)} similar compounds "
|
|
105
|
+
f"(threshold={similarity})")
|
|
106
|
+
return df
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## 3. ZINC 化合物詳細取得
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
def zinc_get_substance(zinc_id):
|
|
113
|
+
"""
|
|
114
|
+
ZINC ID から化合物の完全情報を取得。
|
|
115
|
+
|
|
116
|
+
Parameters:
|
|
117
|
+
zinc_id: str — ZINC ID (e.g., "ZINC000000000001")
|
|
118
|
+
|
|
119
|
+
ToolUniverse:
|
|
120
|
+
ZINC_get_substance(zinc_id=zinc_id)
|
|
121
|
+
"""
|
|
122
|
+
url = f"{ZINC_API}/substances/{zinc_id}.json"
|
|
123
|
+
resp = requests.get(url)
|
|
124
|
+
resp.raise_for_status()
|
|
125
|
+
data = resp.json()
|
|
126
|
+
|
|
127
|
+
info = {
|
|
128
|
+
"zinc_id": data.get("zinc_id", ""),
|
|
129
|
+
"name": data.get("name", ""),
|
|
130
|
+
"smiles": data.get("smiles", ""),
|
|
131
|
+
"inchikey": data.get("inchikey", ""),
|
|
132
|
+
"mwt": data.get("mwt", ""),
|
|
133
|
+
"logp": data.get("logp", ""),
|
|
134
|
+
"num_rotatable_bonds": data.get("num_rotatable_bonds", ""),
|
|
135
|
+
"num_hba": data.get("num_hba", ""),
|
|
136
|
+
"num_hbd": data.get("num_hbd", ""),
|
|
137
|
+
"tpsa": data.get("tpsa", ""),
|
|
138
|
+
"purchasable": data.get("purchasability", ""),
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
print(f"ZINC {zinc_id}: {info['name']} (MW={info['mwt']})")
|
|
142
|
+
return info, data
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## 4. ZINC カタログ一覧
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
def zinc_get_catalogs():
|
|
149
|
+
"""
|
|
150
|
+
ZINC の利用可能カタログ (ベンダー) 一覧を取得。
|
|
151
|
+
|
|
152
|
+
ToolUniverse:
|
|
153
|
+
ZINC_get_catalogs()
|
|
154
|
+
"""
|
|
155
|
+
url = f"{ZINC_API}/catalogs.json"
|
|
156
|
+
resp = requests.get(url)
|
|
157
|
+
resp.raise_for_status()
|
|
158
|
+
data = resp.json()
|
|
159
|
+
|
|
160
|
+
results = []
|
|
161
|
+
for cat in data:
|
|
162
|
+
results.append({
|
|
163
|
+
"catalog_name": cat.get("name", ""),
|
|
164
|
+
"short_name": cat.get("short_name", ""),
|
|
165
|
+
"num_substances": cat.get("num_substances", 0),
|
|
166
|
+
"url": cat.get("url", ""),
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
df = pd.DataFrame(results)
|
|
170
|
+
print(f"ZINC catalogs: {len(df)} vendors")
|
|
171
|
+
return df
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## 5. バーチャルスクリーニング前処理パイプライン
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
def virtual_screening_prep(query_smiles, lipinski=True, max_compounds=100):
|
|
178
|
+
"""
|
|
179
|
+
バーチャルスクリーニング用の化合物セット準備。
|
|
180
|
+
Lipinski's Rule of Five フィルタリング含む。
|
|
181
|
+
|
|
182
|
+
ToolUniverse (横断):
|
|
183
|
+
ZINC_search_by_smiles(smiles=query_smiles) → ZINC_get_substance(zinc_id)
|
|
184
|
+
"""
|
|
185
|
+
# Step 1: Similar compound search
|
|
186
|
+
df = zinc_search_by_smiles(query_smiles, similarity=0.6,
|
|
187
|
+
max_results=max_compounds)
|
|
188
|
+
|
|
189
|
+
if df.empty:
|
|
190
|
+
print("No similar compounds found")
|
|
191
|
+
return df
|
|
192
|
+
|
|
193
|
+
# Step 2: Lipinski filter
|
|
194
|
+
if lipinski:
|
|
195
|
+
df["mwt"] = pd.to_numeric(df["mwt"], errors="coerce")
|
|
196
|
+
df["logp"] = pd.to_numeric(df["logp"], errors="coerce")
|
|
197
|
+
before = len(df)
|
|
198
|
+
df = df[
|
|
199
|
+
(df["mwt"] <= 500)
|
|
200
|
+
& (df["logp"] <= 5)
|
|
201
|
+
]
|
|
202
|
+
print(f"Lipinski filter: {before} → {len(df)} compounds")
|
|
203
|
+
|
|
204
|
+
# Step 3: Sort by similarity
|
|
205
|
+
df["similarity"] = pd.to_numeric(df["similarity"], errors="coerce")
|
|
206
|
+
df = df.sort_values("similarity", ascending=False)
|
|
207
|
+
|
|
208
|
+
print(f"VS prep: {len(df)} compounds ready for screening")
|
|
209
|
+
return df
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## References
|
|
213
|
+
|
|
214
|
+
### Output Files
|
|
215
|
+
|
|
216
|
+
| ファイル | 形式 |
|
|
217
|
+
|---|---|
|
|
218
|
+
| `results/zinc_search.csv` | CSV |
|
|
219
|
+
| `results/zinc_similar.csv` | CSV |
|
|
220
|
+
| `results/zinc_substance.json` | JSON |
|
|
221
|
+
| `results/zinc_catalogs.csv` | CSV |
|
|
222
|
+
| `results/vs_library.csv` | CSV |
|
|
223
|
+
|
|
224
|
+
### 利用可能ツール
|
|
225
|
+
|
|
226
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
227
|
+
|---|---|---|
|
|
228
|
+
| ZINC | `ZINC_search_by_name` | 化合物名検索 |
|
|
229
|
+
| ZINC | `ZINC_search_by_smiles` | SMILES 類似性検索 |
|
|
230
|
+
| ZINC | `ZINC_get_substance` | 化合物詳細 |
|
|
231
|
+
| ZINC | `ZINC_get_catalogs` | カタログ一覧 |
|
|
232
|
+
|
|
233
|
+
### 参照スキル
|
|
234
|
+
|
|
235
|
+
| スキル | 関連 |
|
|
236
|
+
|---|---|
|
|
237
|
+
| `scientific-compound-similarity` | 化合物類似性 |
|
|
238
|
+
| `scientific-pharmacology-targets` | 薬理学ターゲット |
|
|
239
|
+
| `scientific-molecular-docking` | 分子ドッキング |
|
|
240
|
+
| `scientific-drug-target-interaction` | DTI 解析 |
|
|
241
|
+
| `scientific-admet-toxicity` | ADMET 毒性 |
|
|
242
|
+
|
|
243
|
+
### 依存パッケージ
|
|
244
|
+
|
|
245
|
+
`requests`, `pandas`
|