@nahisaho/satori 0.11.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -56
- package/package.json +1 -1
- package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +298 -0
- package/src/.github/skills/scientific-cancer-genomics/SKILL.md +287 -0
- package/src/.github/skills/scientific-clinical-reporting/SKILL.md +324 -0
- package/src/.github/skills/scientific-compound-screening/SKILL.md +245 -0
- package/src/.github/skills/scientific-genome-sequence-tools/SKILL.md +304 -0
- package/src/.github/skills/scientific-healthcare-ai/SKILL.md +273 -0
- package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +244 -0
- package/src/.github/skills/scientific-literature-search/SKILL.md +443 -0
- package/src/.github/skills/scientific-metabolic-modeling/SKILL.md +288 -0
- package/src/.github/skills/scientific-metabolomics-databases/SKILL.md +288 -0
- package/src/.github/skills/scientific-molecular-docking/SKILL.md +303 -0
- package/src/.github/skills/scientific-noncoding-rna/SKILL.md +262 -0
- package/src/.github/skills/scientific-pathway-enrichment/SKILL.md +449 -0
- package/src/.github/skills/scientific-pharmacology-targets/SKILL.md +323 -0
- package/src/.github/skills/scientific-protein-domain-family/SKILL.md +369 -0
- package/src/.github/skills/scientific-protein-interaction-network/SKILL.md +352 -0
- package/src/.github/skills/scientific-rare-disease-genetics/SKILL.md +327 -0
- package/src/.github/skills/scientific-structural-proteomics/SKILL.md +317 -0
- package/src/.github/skills/scientific-systematic-review/SKILL.md +361 -0
- package/src/.github/skills/scientific-variant-effect-prediction/SKILL.md +325 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-clinical-reporting
|
|
3
|
+
description: |
|
|
4
|
+
臨床レポート自動生成スキル。検査結果サマリー (SOAP ノート)、バイオマーカー
|
|
5
|
+
プロファイルレポート、薬理ゲノミクスレポート、臨床試験要約を構造化テンプレート
|
|
6
|
+
(PDF/LaTeX/HTML) で出力。HL7 FHIR DiagnosticReport 形式にも対応。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Clinical Reporting
|
|
10
|
+
|
|
11
|
+
臨床データから構造化レポートを自動生成するパイプラインを提供する。
|
|
12
|
+
|
|
13
|
+
## When to Use
|
|
14
|
+
|
|
15
|
+
- 検査結果を SOAP ノート形式でまとめるとき
|
|
16
|
+
- バイオマーカープロファイルレポートを作成するとき
|
|
17
|
+
- ファーマコゲノミクスレポート (CPIC ガイドライン準拠) が必要なとき
|
|
18
|
+
- 臨床試験の CSR (Clinical Study Report) サマリーを生成するとき
|
|
19
|
+
- HL7 FHIR DiagnosticReport 形式で出力するとき
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
## 1. SOAP ノート生成
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
import json
|
|
29
|
+
from datetime import datetime
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def generate_soap_note(patient_data, findings, assessment, plan):
|
|
33
|
+
"""
|
|
34
|
+
SOAP ノート形式の臨床レポートを生成。
|
|
35
|
+
|
|
36
|
+
Parameters:
|
|
37
|
+
patient_data: dict — {"id": "...", "age": 45, "sex": "M", ...}
|
|
38
|
+
findings: dict — {"subjective": [...], "objective": [...]}
|
|
39
|
+
assessment: list — 評価・診断リスト
|
|
40
|
+
plan: list — 治療計画リスト
|
|
41
|
+
"""
|
|
42
|
+
soap = {
|
|
43
|
+
"report_type": "SOAP_Note",
|
|
44
|
+
"generated_at": datetime.now().isoformat(),
|
|
45
|
+
"patient": {
|
|
46
|
+
"id": patient_data.get("id", "ANON"),
|
|
47
|
+
"age": patient_data.get("age"),
|
|
48
|
+
"sex": patient_data.get("sex"),
|
|
49
|
+
},
|
|
50
|
+
"S": { # Subjective
|
|
51
|
+
"chief_complaint": findings.get("chief_complaint", ""),
|
|
52
|
+
"history": findings.get("subjective", []),
|
|
53
|
+
},
|
|
54
|
+
"O": { # Objective
|
|
55
|
+
"vitals": findings.get("vitals", {}),
|
|
56
|
+
"lab_results": findings.get("lab_results", []),
|
|
57
|
+
"imaging": findings.get("imaging", []),
|
|
58
|
+
"physical_exam": findings.get("objective", []),
|
|
59
|
+
},
|
|
60
|
+
"A": { # Assessment
|
|
61
|
+
"diagnoses": assessment,
|
|
62
|
+
"differential": findings.get("differential", []),
|
|
63
|
+
},
|
|
64
|
+
"P": { # Plan
|
|
65
|
+
"treatment": plan,
|
|
66
|
+
"follow_up": findings.get("follow_up", ""),
|
|
67
|
+
"referrals": findings.get("referrals", []),
|
|
68
|
+
},
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
print(f"SOAP note: patient={soap['patient']['id']}, "
|
|
72
|
+
f"diagnoses={len(assessment)}, plans={len(plan)}")
|
|
73
|
+
return soap
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## 2. バイオマーカープロファイルレポート
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
import pandas as pd
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def biomarker_profile_report(biomarkers_df, reference_ranges=None):
|
|
83
|
+
"""
|
|
84
|
+
バイオマーカープロファイルレポート生成。
|
|
85
|
+
|
|
86
|
+
Parameters:
|
|
87
|
+
biomarkers_df: DataFrame — columns: [marker, value, unit, specimen]
|
|
88
|
+
reference_ranges: dict — {"marker": {"low": x, "high": y, "unit": "..."}}
|
|
89
|
+
"""
|
|
90
|
+
if reference_ranges is None:
|
|
91
|
+
reference_ranges = {
|
|
92
|
+
"CEA": {"low": 0, "high": 5.0, "unit": "ng/mL"},
|
|
93
|
+
"AFP": {"low": 0, "high": 10.0, "unit": "ng/mL"},
|
|
94
|
+
"CA19-9": {"low": 0, "high": 37.0, "unit": "U/mL"},
|
|
95
|
+
"CA125": {"low": 0, "high": 35.0, "unit": "U/mL"},
|
|
96
|
+
"PSA": {"low": 0, "high": 4.0, "unit": "ng/mL"},
|
|
97
|
+
"HER2": {"low": 0, "high": 1, "unit": "IHC score"},
|
|
98
|
+
"Ki-67": {"low": 0, "high": 14, "unit": "%"},
|
|
99
|
+
"PD-L1 TPS": {"low": 0, "high": 1, "unit": "%"},
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
results = []
|
|
103
|
+
for _, row in biomarkers_df.iterrows():
|
|
104
|
+
marker = row["marker"]
|
|
105
|
+
value = float(row["value"])
|
|
106
|
+
ref = reference_ranges.get(marker, {})
|
|
107
|
+
|
|
108
|
+
status = "normal"
|
|
109
|
+
if ref:
|
|
110
|
+
if value > ref.get("high", float("inf")):
|
|
111
|
+
status = "HIGH"
|
|
112
|
+
elif value < ref.get("low", float("-inf")):
|
|
113
|
+
status = "LOW"
|
|
114
|
+
|
|
115
|
+
results.append({
|
|
116
|
+
"marker": marker,
|
|
117
|
+
"value": value,
|
|
118
|
+
"unit": row.get("unit", ref.get("unit", "")),
|
|
119
|
+
"reference": f"{ref.get('low', '?')}-{ref.get('high', '?')}",
|
|
120
|
+
"status": status,
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
report_df = pd.DataFrame(results)
|
|
124
|
+
abnormal = report_df[report_df["status"] != "normal"]
|
|
125
|
+
|
|
126
|
+
report = {
|
|
127
|
+
"report_type": "Biomarker_Profile",
|
|
128
|
+
"total_markers": len(report_df),
|
|
129
|
+
"abnormal_count": len(abnormal),
|
|
130
|
+
"results": report_df.to_dict("records"),
|
|
131
|
+
"summary": (
|
|
132
|
+
f"{len(abnormal)} of {len(report_df)} markers outside reference range"
|
|
133
|
+
if len(abnormal) > 0
|
|
134
|
+
else "All markers within reference range"
|
|
135
|
+
),
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
print(f"Biomarker profile: {len(report_df)} markers, "
|
|
139
|
+
f"{len(abnormal)} abnormal")
|
|
140
|
+
return report
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## 3. ファーマコゲノミクスレポート
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
def pharmacogenomics_report(genotypes, medications):
|
|
147
|
+
"""
|
|
148
|
+
CPIC ガイドライン準拠のファーマコゲノミクスレポート。
|
|
149
|
+
|
|
150
|
+
Parameters:
|
|
151
|
+
genotypes: dict — {"CYP2D6": "*1/*4", "CYP2C19": "*1/*2", ...}
|
|
152
|
+
medications: list — ["codeine", "clopidogrel", ...]
|
|
153
|
+
"""
|
|
154
|
+
# CPIC phenotype マッピング (簡略)
|
|
155
|
+
cpic_phenotypes = {
|
|
156
|
+
"CYP2D6": {
|
|
157
|
+
"*1/*1": "Normal Metabolizer",
|
|
158
|
+
"*1/*4": "Intermediate Metabolizer",
|
|
159
|
+
"*4/*4": "Poor Metabolizer",
|
|
160
|
+
"*1/*2xN": "Ultrarapid Metabolizer",
|
|
161
|
+
},
|
|
162
|
+
"CYP2C19": {
|
|
163
|
+
"*1/*1": "Normal Metabolizer",
|
|
164
|
+
"*1/*2": "Intermediate Metabolizer",
|
|
165
|
+
"*2/*2": "Poor Metabolizer",
|
|
166
|
+
"*1/*17": "Rapid Metabolizer",
|
|
167
|
+
"*17/*17": "Ultrarapid Metabolizer",
|
|
168
|
+
},
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
# 推奨アクション (簡略)
|
|
172
|
+
drug_gene_map = {
|
|
173
|
+
"codeine": {"gene": "CYP2D6", "action": {
|
|
174
|
+
"Poor Metabolizer": "AVOID — use alternative analgesic",
|
|
175
|
+
"Ultrarapid Metabolizer": "AVOID — toxicity risk",
|
|
176
|
+
"Intermediate Metabolizer": "Use with caution, consider alternative",
|
|
177
|
+
}},
|
|
178
|
+
"clopidogrel": {"gene": "CYP2C19", "action": {
|
|
179
|
+
"Poor Metabolizer": "Use alternative antiplatelet (e.g., prasugrel)",
|
|
180
|
+
"Intermediate Metabolizer": "Consider alternative antiplatelet",
|
|
181
|
+
}},
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
recommendations = []
|
|
185
|
+
for drug in medications:
|
|
186
|
+
entry = drug_gene_map.get(drug, {})
|
|
187
|
+
gene = entry.get("gene", "Unknown")
|
|
188
|
+
genotype = genotypes.get(gene, "Unknown")
|
|
189
|
+
phenotype_map = cpic_phenotypes.get(gene, {})
|
|
190
|
+
phenotype = phenotype_map.get(genotype, "Indeterminate")
|
|
191
|
+
|
|
192
|
+
action = entry.get("action", {}).get(phenotype, "Standard dosing")
|
|
193
|
+
recommendations.append({
|
|
194
|
+
"drug": drug,
|
|
195
|
+
"gene": gene,
|
|
196
|
+
"genotype": genotype,
|
|
197
|
+
"phenotype": phenotype,
|
|
198
|
+
"recommendation": action,
|
|
199
|
+
"cpic_level": "A" if drug in drug_gene_map else "N/A",
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
report = {
|
|
203
|
+
"report_type": "Pharmacogenomics",
|
|
204
|
+
"genotypes_tested": genotypes,
|
|
205
|
+
"medications_queried": medications,
|
|
206
|
+
"recommendations": recommendations,
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
print(f"PGx report: {len(genotypes)} genes, {len(medications)} drugs, "
|
|
210
|
+
f"{len(recommendations)} recommendations")
|
|
211
|
+
return report
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## 4. 構造化レポート出力 (LaTeX/HTML)
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
def export_clinical_report(report, output_format="html",
|
|
218
|
+
output_path="reports/clinical_report"):
|
|
219
|
+
"""
|
|
220
|
+
臨床レポートを LaTeX/HTML/FHIR JSON 形式で出力。
|
|
221
|
+
|
|
222
|
+
Parameters:
|
|
223
|
+
report: dict — SOAP, Biomarker, PGx レポート
|
|
224
|
+
output_format: "html", "latex", "fhir_json"
|
|
225
|
+
output_path: str — 出力先パス (拡張子なし)
|
|
226
|
+
"""
|
|
227
|
+
import os
|
|
228
|
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
229
|
+
report_type = report.get("report_type", "Clinical")
|
|
230
|
+
|
|
231
|
+
if output_format == "html":
|
|
232
|
+
filepath = f"{output_path}.html"
|
|
233
|
+
html_parts = [
|
|
234
|
+
"<!DOCTYPE html><html><head>",
|
|
235
|
+
f"<title>{report_type} Report</title>",
|
|
236
|
+
"<style>body{font-family:Arial;margin:2em;}"
|
|
237
|
+
"table{border-collapse:collapse;width:100%;}"
|
|
238
|
+
"td,th{border:1px solid #ddd;padding:8px;}</style>",
|
|
239
|
+
"</head><body>",
|
|
240
|
+
f"<h1>{report_type} Report</h1>",
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
if report_type == "SOAP_Note":
|
|
244
|
+
for section in ["S", "O", "A", "P"]:
|
|
245
|
+
html_parts.append(f"<h2>{section}</h2>")
|
|
246
|
+
html_parts.append(f"<pre>{json.dumps(report.get(section, {}), indent=2, ensure_ascii=False)}</pre>")
|
|
247
|
+
|
|
248
|
+
elif report_type == "Biomarker_Profile":
|
|
249
|
+
html_parts.append("<table><tr><th>Marker</th><th>Value</th>"
|
|
250
|
+
"<th>Reference</th><th>Status</th></tr>")
|
|
251
|
+
for r in report.get("results", []):
|
|
252
|
+
status_color = "red" if r["status"] != "normal" else "green"
|
|
253
|
+
html_parts.append(
|
|
254
|
+
f"<tr><td>{r['marker']}</td><td>{r['value']} {r['unit']}</td>"
|
|
255
|
+
f"<td>{r['reference']}</td>"
|
|
256
|
+
f"<td style='color:{status_color}'>{r['status']}</td></tr>"
|
|
257
|
+
)
|
|
258
|
+
html_parts.append("</table>")
|
|
259
|
+
|
|
260
|
+
html_parts.append("</body></html>")
|
|
261
|
+
with open(filepath, "w") as f:
|
|
262
|
+
f.write("\n".join(html_parts))
|
|
263
|
+
|
|
264
|
+
elif output_format == "fhir_json":
|
|
265
|
+
filepath = f"{output_path}.fhir.json"
|
|
266
|
+
fhir = {
|
|
267
|
+
"resourceType": "DiagnosticReport",
|
|
268
|
+
"status": "final",
|
|
269
|
+
"category": [{"coding": [{"system": "http://terminology.hl7.org/CodeSystem/v2-0074",
|
|
270
|
+
"code": "LAB"}]}],
|
|
271
|
+
"code": {"text": report_type},
|
|
272
|
+
"issued": report.get("generated_at", datetime.now().isoformat()),
|
|
273
|
+
"result": [],
|
|
274
|
+
}
|
|
275
|
+
with open(filepath, "w") as f:
|
|
276
|
+
json.dump(fhir, f, indent=2)
|
|
277
|
+
|
|
278
|
+
elif output_format == "latex":
|
|
279
|
+
filepath = f"{output_path}.tex"
|
|
280
|
+
with open(filepath, "w") as f:
|
|
281
|
+
f.write(f"\\documentclass{{article}}\n")
|
|
282
|
+
f.write(f"\\title{{{report_type} Report}}\n")
|
|
283
|
+
f.write("\\begin{document}\n\\maketitle\n")
|
|
284
|
+
f.write(f"Report type: {report_type}\n")
|
|
285
|
+
f.write("\\end{document}\n")
|
|
286
|
+
|
|
287
|
+
print(f"Report exported: {filepath}")
|
|
288
|
+
return filepath
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
## References
|
|
292
|
+
|
|
293
|
+
### Output Files
|
|
294
|
+
|
|
295
|
+
| ファイル | 形式 |
|
|
296
|
+
|---|---|
|
|
297
|
+
| `reports/soap_note.json` | JSON |
|
|
298
|
+
| `reports/biomarker_profile.json` | JSON |
|
|
299
|
+
| `reports/pgx_report.json` | JSON |
|
|
300
|
+
| `reports/clinical_report.html` | HTML |
|
|
301
|
+
| `reports/clinical_report.tex` | LaTeX |
|
|
302
|
+
| `reports/clinical_report.fhir.json` | FHIR JSON |
|
|
303
|
+
|
|
304
|
+
### 利用可能ツール
|
|
305
|
+
|
|
306
|
+
> 本スキルは ToolUniverse ツールに直接依存しない。
|
|
307
|
+
|
|
308
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
309
|
+
|---|---|---|
|
|
310
|
+
| — | — | — |
|
|
311
|
+
|
|
312
|
+
### 参照スキル
|
|
313
|
+
|
|
314
|
+
| スキル | 関連 |
|
|
315
|
+
|---|---|
|
|
316
|
+
| `scientific-variant-interpretation` | バリアント解釈レポート |
|
|
317
|
+
| `scientific-variant-effect-prediction` | バリアント病原性スコア |
|
|
318
|
+
| `scientific-pharmacogenomics` | PGx ガイドライン |
|
|
319
|
+
| `scientific-precision-oncology` | 精密腫瘍学レポート |
|
|
320
|
+
| `scientific-disease-research` | 疾患情報統合 |
|
|
321
|
+
|
|
322
|
+
### 依存パッケージ
|
|
323
|
+
|
|
324
|
+
`pandas`, `json` (stdlib), `datetime` (stdlib)
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-compound-screening
|
|
3
|
+
description: |
|
|
4
|
+
化合物スクリーニングスキル。ZINC データベースを活用した購入可能化合物検索、
|
|
5
|
+
SMILES/名前ベースの類似性検索、カタログフィルタリング、
|
|
6
|
+
バーチャルスクリーニング前処理パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Compound Screening
|
|
10
|
+
|
|
11
|
+
ZINC データベースを活用した化合物ライブラリ検索・
|
|
12
|
+
バーチャルスクリーニング前処理パイプラインを提供する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- 購入可能な化合物ライブラリを検索するとき
|
|
17
|
+
- SMILES 構造式から類似化合物を探すとき
|
|
18
|
+
- 化合物名からデータベースレコードを取得するとき
|
|
19
|
+
- ベンダーカタログの絞り込みを行うとき
|
|
20
|
+
- バーチャルスクリーニング用の化合物セットを準備するとき
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
## 1. ZINC 化合物名検索
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import requests
|
|
30
|
+
import pandas as pd
|
|
31
|
+
|
|
32
|
+
ZINC_API = "https://zinc15.docking.org"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def zinc_search_by_name(name, max_results=20):
|
|
36
|
+
"""
|
|
37
|
+
ZINC データベースで化合物名による検索。
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
name: str — compound name (e.g., "aspirin")
|
|
41
|
+
max_results: int — maximum results
|
|
42
|
+
|
|
43
|
+
ToolUniverse:
|
|
44
|
+
ZINC_search_by_name(name=name)
|
|
45
|
+
"""
|
|
46
|
+
url = f"{ZINC_API}/substances/search"
|
|
47
|
+
params = {"q": name, "count": max_results}
|
|
48
|
+
resp = requests.get(url, params=params)
|
|
49
|
+
resp.raise_for_status()
|
|
50
|
+
data = resp.json()
|
|
51
|
+
|
|
52
|
+
results = []
|
|
53
|
+
for item in data:
|
|
54
|
+
results.append({
|
|
55
|
+
"zinc_id": item.get("zinc_id", ""),
|
|
56
|
+
"name": item.get("name", ""),
|
|
57
|
+
"smiles": item.get("smiles", ""),
|
|
58
|
+
"mwt": item.get("mwt", ""),
|
|
59
|
+
"logp": item.get("logp", ""),
|
|
60
|
+
"purchasable": item.get("purchasability", ""),
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
df = pd.DataFrame(results)
|
|
64
|
+
print(f"ZINC search '{name}': {len(df)} compounds")
|
|
65
|
+
return df
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## 2. ZINC SMILES 類似性検索
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
def zinc_search_by_smiles(smiles, similarity=0.7, max_results=20):
|
|
72
|
+
"""
|
|
73
|
+
ZINC で SMILES 構造式による類似性検索。
|
|
74
|
+
|
|
75
|
+
Parameters:
|
|
76
|
+
smiles: str — SMILES string
|
|
77
|
+
similarity: float — Tanimoto similarity threshold (0-1)
|
|
78
|
+
|
|
79
|
+
ToolUniverse:
|
|
80
|
+
ZINC_search_by_smiles(smiles=smiles)
|
|
81
|
+
"""
|
|
82
|
+
url = f"{ZINC_API}/substances/search"
|
|
83
|
+
params = {
|
|
84
|
+
"smiles": smiles,
|
|
85
|
+
"similarity": similarity,
|
|
86
|
+
"count": max_results,
|
|
87
|
+
}
|
|
88
|
+
resp = requests.get(url, params=params)
|
|
89
|
+
resp.raise_for_status()
|
|
90
|
+
data = resp.json()
|
|
91
|
+
|
|
92
|
+
results = []
|
|
93
|
+
for item in data:
|
|
94
|
+
results.append({
|
|
95
|
+
"zinc_id": item.get("zinc_id", ""),
|
|
96
|
+
"smiles": item.get("smiles", ""),
|
|
97
|
+
"similarity": item.get("similarity", ""),
|
|
98
|
+
"mwt": item.get("mwt", ""),
|
|
99
|
+
"logp": item.get("logp", ""),
|
|
100
|
+
"purchasable": item.get("purchasability", ""),
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
df = pd.DataFrame(results)
|
|
104
|
+
print(f"ZINC SMILES search: {len(df)} similar compounds "
|
|
105
|
+
f"(threshold={similarity})")
|
|
106
|
+
return df
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## 3. ZINC 化合物詳細取得
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
def zinc_get_substance(zinc_id):
|
|
113
|
+
"""
|
|
114
|
+
ZINC ID から化合物の完全情報を取得。
|
|
115
|
+
|
|
116
|
+
Parameters:
|
|
117
|
+
zinc_id: str — ZINC ID (e.g., "ZINC000000000001")
|
|
118
|
+
|
|
119
|
+
ToolUniverse:
|
|
120
|
+
ZINC_get_substance(zinc_id=zinc_id)
|
|
121
|
+
"""
|
|
122
|
+
url = f"{ZINC_API}/substances/{zinc_id}.json"
|
|
123
|
+
resp = requests.get(url)
|
|
124
|
+
resp.raise_for_status()
|
|
125
|
+
data = resp.json()
|
|
126
|
+
|
|
127
|
+
info = {
|
|
128
|
+
"zinc_id": data.get("zinc_id", ""),
|
|
129
|
+
"name": data.get("name", ""),
|
|
130
|
+
"smiles": data.get("smiles", ""),
|
|
131
|
+
"inchikey": data.get("inchikey", ""),
|
|
132
|
+
"mwt": data.get("mwt", ""),
|
|
133
|
+
"logp": data.get("logp", ""),
|
|
134
|
+
"num_rotatable_bonds": data.get("num_rotatable_bonds", ""),
|
|
135
|
+
"num_hba": data.get("num_hba", ""),
|
|
136
|
+
"num_hbd": data.get("num_hbd", ""),
|
|
137
|
+
"tpsa": data.get("tpsa", ""),
|
|
138
|
+
"purchasable": data.get("purchasability", ""),
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
print(f"ZINC {zinc_id}: {info['name']} (MW={info['mwt']})")
|
|
142
|
+
return info, data
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## 4. ZINC カタログ一覧
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
def zinc_get_catalogs():
|
|
149
|
+
"""
|
|
150
|
+
ZINC の利用可能カタログ (ベンダー) 一覧を取得。
|
|
151
|
+
|
|
152
|
+
ToolUniverse:
|
|
153
|
+
ZINC_get_catalogs()
|
|
154
|
+
"""
|
|
155
|
+
url = f"{ZINC_API}/catalogs.json"
|
|
156
|
+
resp = requests.get(url)
|
|
157
|
+
resp.raise_for_status()
|
|
158
|
+
data = resp.json()
|
|
159
|
+
|
|
160
|
+
results = []
|
|
161
|
+
for cat in data:
|
|
162
|
+
results.append({
|
|
163
|
+
"catalog_name": cat.get("name", ""),
|
|
164
|
+
"short_name": cat.get("short_name", ""),
|
|
165
|
+
"num_substances": cat.get("num_substances", 0),
|
|
166
|
+
"url": cat.get("url", ""),
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
df = pd.DataFrame(results)
|
|
170
|
+
print(f"ZINC catalogs: {len(df)} vendors")
|
|
171
|
+
return df
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## 5. バーチャルスクリーニング前処理パイプライン
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
def virtual_screening_prep(query_smiles, lipinski=True, max_compounds=100):
|
|
178
|
+
"""
|
|
179
|
+
バーチャルスクリーニング用の化合物セット準備。
|
|
180
|
+
Lipinski's Rule of Five フィルタリング含む。
|
|
181
|
+
|
|
182
|
+
ToolUniverse (横断):
|
|
183
|
+
ZINC_search_by_smiles(smiles=query_smiles) → ZINC_get_substance(zinc_id)
|
|
184
|
+
"""
|
|
185
|
+
# Step 1: Similar compound search
|
|
186
|
+
df = zinc_search_by_smiles(query_smiles, similarity=0.6,
|
|
187
|
+
max_results=max_compounds)
|
|
188
|
+
|
|
189
|
+
if df.empty:
|
|
190
|
+
print("No similar compounds found")
|
|
191
|
+
return df
|
|
192
|
+
|
|
193
|
+
# Step 2: Lipinski filter
|
|
194
|
+
if lipinski:
|
|
195
|
+
df["mwt"] = pd.to_numeric(df["mwt"], errors="coerce")
|
|
196
|
+
df["logp"] = pd.to_numeric(df["logp"], errors="coerce")
|
|
197
|
+
before = len(df)
|
|
198
|
+
df = df[
|
|
199
|
+
(df["mwt"] <= 500)
|
|
200
|
+
& (df["logp"] <= 5)
|
|
201
|
+
]
|
|
202
|
+
print(f"Lipinski filter: {before} → {len(df)} compounds")
|
|
203
|
+
|
|
204
|
+
# Step 3: Sort by similarity
|
|
205
|
+
df["similarity"] = pd.to_numeric(df["similarity"], errors="coerce")
|
|
206
|
+
df = df.sort_values("similarity", ascending=False)
|
|
207
|
+
|
|
208
|
+
print(f"VS prep: {len(df)} compounds ready for screening")
|
|
209
|
+
return df
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## References
|
|
213
|
+
|
|
214
|
+
### Output Files
|
|
215
|
+
|
|
216
|
+
| ファイル | 形式 |
|
|
217
|
+
|---|---|
|
|
218
|
+
| `results/zinc_search.csv` | CSV |
|
|
219
|
+
| `results/zinc_similar.csv` | CSV |
|
|
220
|
+
| `results/zinc_substance.json` | JSON |
|
|
221
|
+
| `results/zinc_catalogs.csv` | CSV |
|
|
222
|
+
| `results/vs_library.csv` | CSV |
|
|
223
|
+
|
|
224
|
+
### 利用可能ツール
|
|
225
|
+
|
|
226
|
+
| カテゴリ | 主要ツール | 用途 |
|
|
227
|
+
|---|---|---|
|
|
228
|
+
| ZINC | `ZINC_search_by_name` | 化合物名検索 |
|
|
229
|
+
| ZINC | `ZINC_search_by_smiles` | SMILES 類似性検索 |
|
|
230
|
+
| ZINC | `ZINC_get_substance` | 化合物詳細 |
|
|
231
|
+
| ZINC | `ZINC_get_catalogs` | カタログ一覧 |
|
|
232
|
+
|
|
233
|
+
### 参照スキル
|
|
234
|
+
|
|
235
|
+
| スキル | 関連 |
|
|
236
|
+
|---|---|
|
|
237
|
+
| `scientific-compound-similarity` | 化合物類似性 |
|
|
238
|
+
| `scientific-pharmacology-targets` | 薬理学ターゲット |
|
|
239
|
+
| `scientific-molecular-docking` | 分子ドッキング |
|
|
240
|
+
| `scientific-drug-target-interaction` | DTI 解析 |
|
|
241
|
+
| `scientific-admet-toxicity` | ADMET 毒性 |
|
|
242
|
+
|
|
243
|
+
### 依存パッケージ
|
|
244
|
+
|
|
245
|
+
`requests`, `pandas`
|