@nahisaho/satori 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -30
- package/package.json +1 -1
- package/src/.github/skills/scientific-advanced-imaging/SKILL.md +382 -0
- package/src/.github/skills/scientific-chembl-assay-mining/SKILL.md +509 -0
- package/src/.github/skills/scientific-data-submission/SKILL.md +357 -0
- package/src/.github/skills/scientific-deep-chemistry/SKILL.md +350 -0
- package/src/.github/skills/scientific-ensembl-genomics/SKILL.md +378 -0
- package/src/.github/skills/scientific-expression-comparison/SKILL.md +303 -0
- package/src/.github/skills/scientific-gpu-singlecell/SKILL.md +296 -0
- package/src/.github/skills/scientific-marine-ecology/SKILL.md +429 -0
- package/src/.github/skills/scientific-md-simulation/SKILL.md +315 -0
- package/src/.github/skills/scientific-model-organism-db/SKILL.md +329 -0
- package/src/.github/skills/scientific-nci60-screening/SKILL.md +307 -0
- package/src/.github/skills/scientific-perturbation-analysis/SKILL.md +297 -0
- package/src/.github/skills/scientific-plant-biology/SKILL.md +321 -0
- package/src/.github/skills/scientific-rrna-taxonomy/SKILL.md +379 -0
- package/src/.github/skills/scientific-scatac-signac/SKILL.md +300 -0
- package/src/.github/skills/scientific-scvi-integration/SKILL.md +344 -0
- package/src/.github/skills/scientific-string-network-api/SKILL.md +376 -0
- package/src/.github/skills/scientific-toxicology-env/SKILL.md +309 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-md-simulation
|
|
3
|
+
description: |
|
|
4
|
+
分子動力学シミュレーション解析スキル。MDAnalysis によるトラジェクトリ解析・
|
|
5
|
+
RMSD/RMSF/Rg 時系列指標・水素結合解析・二次構造変化追跡・
|
|
6
|
+
OpenFF Toolkit力場パラメータ化・溶媒和自由エネルギー推定パイプライン。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific MD Simulation
|
|
10
|
+
|
|
11
|
+
MDAnalysis と OpenFF Toolkit を活用した分子動力学 (MD) シミュレーション
|
|
12
|
+
解析パイプラインを提供する。トラジェクトリ読込みから構造指標計算、
|
|
13
|
+
水素結合解析、力場パラメータ化まで統合。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- MD トラジェクトリ (DCD/XTC/TRR/GRO) を解析するとき
|
|
18
|
+
- RMSD/RMSF/Radius of Gyration 時系列を計算するとき
|
|
19
|
+
- 水素結合パターンを解析するとき
|
|
20
|
+
- OpenFF で小分子の力場パラメータを自動生成するとき
|
|
21
|
+
- タンパク質-リガンド複合体の安定性を評価するとき
|
|
22
|
+
- 溶媒接触表面積 (SASA) を計算するとき
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
## 1. トラジェクトリ読込み & 基本情報
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import MDAnalysis as mda
|
|
32
|
+
import numpy as np
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def load_trajectory(topology, trajectory):
|
|
37
|
+
"""
|
|
38
|
+
MD トラジェクトリ読込み。
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
topology: str — トポロジファイル (PSF/PDB/GRO/PRMTOP)
|
|
42
|
+
trajectory: str — トラジェクトリファイル (DCD/XTC/TRR)
|
|
43
|
+
|
|
44
|
+
K-Dense: mdanalysis
|
|
45
|
+
"""
|
|
46
|
+
u = mda.Universe(topology, trajectory)
|
|
47
|
+
|
|
48
|
+
info = {
|
|
49
|
+
"n_atoms": u.atoms.n_atoms,
|
|
50
|
+
"n_residues": u.residues.n_residues,
|
|
51
|
+
"n_segments": u.segments.n_segments,
|
|
52
|
+
"n_frames": u.trajectory.n_frames,
|
|
53
|
+
"dt_ps": u.trajectory.dt,
|
|
54
|
+
"total_time_ns": u.trajectory.n_frames * u.trajectory.dt / 1000,
|
|
55
|
+
"topology_format": topology.split(".")[-1],
|
|
56
|
+
"trajectory_format": trajectory.split(".")[-1],
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
print(f"Loaded: {info['n_atoms']} atoms, {info['n_frames']} frames, "
|
|
60
|
+
f"{info['total_time_ns']:.1f} ns")
|
|
61
|
+
return u, info
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## 2. RMSD 解析
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from MDAnalysis.analysis.rms import RMSD
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def compute_rmsd(universe, selection="backbone", ref_frame=0):
|
|
71
|
+
"""
|
|
72
|
+
RMSD (Root Mean Square Deviation) 計算。
|
|
73
|
+
|
|
74
|
+
Parameters:
|
|
75
|
+
universe: mda.Universe — MD ユニバース
|
|
76
|
+
selection: str — 原子選択文字列
|
|
77
|
+
ref_frame: int — 参照フレーム
|
|
78
|
+
"""
|
|
79
|
+
R = RMSD(universe, universe, select=selection, ref_frame=ref_frame)
|
|
80
|
+
R.run()
|
|
81
|
+
|
|
82
|
+
df = pd.DataFrame({
|
|
83
|
+
"frame": R.results.rmsd[:, 0].astype(int),
|
|
84
|
+
"time_ps": R.results.rmsd[:, 1],
|
|
85
|
+
"rmsd_A": R.results.rmsd[:, 2],
|
|
86
|
+
})
|
|
87
|
+
df["time_ns"] = df["time_ps"] / 1000
|
|
88
|
+
|
|
89
|
+
print(f"RMSD ({selection}): mean={df['rmsd_A'].mean():.2f} Å, "
|
|
90
|
+
f"max={df['rmsd_A'].max():.2f} Å")
|
|
91
|
+
return df
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## 3. RMSF 解析
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from MDAnalysis.analysis.rms import RMSF as RMSFAnalysis
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def compute_rmsf(universe, selection="name CA"):
|
|
101
|
+
"""
|
|
102
|
+
RMSF (Root Mean Square Fluctuation) per residue。
|
|
103
|
+
|
|
104
|
+
Parameters:
|
|
105
|
+
universe: mda.Universe — MD ユニバース
|
|
106
|
+
selection: str — 原子選択 (通常 Cα)
|
|
107
|
+
"""
|
|
108
|
+
atoms = universe.select_atoms(selection)
|
|
109
|
+
R = RMSFAnalysis(atoms).run()
|
|
110
|
+
|
|
111
|
+
df = pd.DataFrame({
|
|
112
|
+
"resid": atoms.resids,
|
|
113
|
+
"resname": atoms.resnames,
|
|
114
|
+
"rmsf_A": R.results.rmsf,
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
# 柔軟領域の同定
|
|
118
|
+
threshold = df["rmsf_A"].mean() + 2 * df["rmsf_A"].std()
|
|
119
|
+
df["flexible"] = df["rmsf_A"] > threshold
|
|
120
|
+
|
|
121
|
+
print(f"RMSF: mean={df['rmsf_A'].mean():.2f} Å, "
|
|
122
|
+
f"flexible residues={df['flexible'].sum()}")
|
|
123
|
+
return df
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## 4. Radius of Gyration
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
def compute_radius_of_gyration(universe, selection="protein"):
|
|
130
|
+
"""
|
|
131
|
+
Radius of Gyration (Rg) 時系列計算。
|
|
132
|
+
|
|
133
|
+
Parameters:
|
|
134
|
+
universe: mda.Universe — MD ユニバース
|
|
135
|
+
selection: str — 原子選択
|
|
136
|
+
"""
|
|
137
|
+
protein = universe.select_atoms(selection)
|
|
138
|
+
rg_data = []
|
|
139
|
+
|
|
140
|
+
for ts in universe.trajectory:
|
|
141
|
+
rg = protein.radius_of_gyration()
|
|
142
|
+
rg_data.append({
|
|
143
|
+
"frame": ts.frame,
|
|
144
|
+
"time_ns": ts.time / 1000,
|
|
145
|
+
"rg_A": rg,
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
df = pd.DataFrame(rg_data)
|
|
149
|
+
print(f"Rg: mean={df['rg_A'].mean():.2f} Å, "
|
|
150
|
+
f"std={df['rg_A'].std():.2f} Å")
|
|
151
|
+
return df
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## 5. 水素結合解析
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from MDAnalysis.analysis.hydrogenbonds import HydrogenBondAnalysis
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def hydrogen_bond_analysis(universe, donor_sel="protein", acceptor_sel="protein",
|
|
161
|
+
d_a_cutoff=3.0, angle_cutoff=150):
|
|
162
|
+
"""
|
|
163
|
+
水素結合解析。
|
|
164
|
+
|
|
165
|
+
Parameters:
|
|
166
|
+
universe: mda.Universe — MD ユニバース
|
|
167
|
+
donor_sel: str — ドナー選択
|
|
168
|
+
acceptor_sel: str — アクセプター選択
|
|
169
|
+
d_a_cutoff: float — D-A 距離閾値 (Å)
|
|
170
|
+
angle_cutoff: float — D-H-A 角度閾値 (°)
|
|
171
|
+
"""
|
|
172
|
+
hbonds = HydrogenBondAnalysis(
|
|
173
|
+
universe,
|
|
174
|
+
donors_sel=donor_sel,
|
|
175
|
+
acceptors_sel=acceptor_sel,
|
|
176
|
+
d_a_cutoff=d_a_cutoff,
|
|
177
|
+
d_h_a_angle_cutoff=angle_cutoff,
|
|
178
|
+
)
|
|
179
|
+
hbonds.run()
|
|
180
|
+
|
|
181
|
+
# フレームあたり水素結合数
|
|
182
|
+
counts = hbonds.count_by_time()
|
|
183
|
+
df_counts = pd.DataFrame({
|
|
184
|
+
"time_ps": counts[:, 0],
|
|
185
|
+
"n_hbonds": counts[:, 1].astype(int),
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
print(f"H-bonds: mean={df_counts['n_hbonds'].mean():.1f}/frame, "
|
|
189
|
+
f"total unique={len(hbonds.results.hbonds)}")
|
|
190
|
+
return hbonds, df_counts
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## 6. SASA (溶媒接触表面積)
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from MDAnalysis.analysis.sasa import SASA
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def compute_sasa(universe, selection="protein"):
|
|
200
|
+
"""
|
|
201
|
+
Solvent Accessible Surface Area (SASA) 計算。
|
|
202
|
+
|
|
203
|
+
Parameters:
|
|
204
|
+
universe: mda.Universe
|
|
205
|
+
selection: str — 原子選択
|
|
206
|
+
"""
|
|
207
|
+
atoms = universe.select_atoms(selection)
|
|
208
|
+
sasa = SASA(atoms)
|
|
209
|
+
sasa.run()
|
|
210
|
+
|
|
211
|
+
df = pd.DataFrame({
|
|
212
|
+
"frame": range(len(sasa.results.area)),
|
|
213
|
+
"sasa_A2": sasa.results.area,
|
|
214
|
+
})
|
|
215
|
+
|
|
216
|
+
print(f"SASA: mean={df['sasa_A2'].mean():.1f} Ų, "
|
|
217
|
+
f"std={df['sasa_A2'].std():.1f} Ų")
|
|
218
|
+
return df
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## 7. OpenFF 力場パラメータ化
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
def parameterize_with_openff(smiles, force_field="openff-2.1.0"):
|
|
225
|
+
"""
|
|
226
|
+
OpenFF Toolkit でリガンドの力場パラメータ自動生成。
|
|
227
|
+
|
|
228
|
+
Parameters:
|
|
229
|
+
smiles: str — リガンド SMILES
|
|
230
|
+
force_field: str — OpenFF 力場バージョン
|
|
231
|
+
|
|
232
|
+
K-Dense: openff
|
|
233
|
+
"""
|
|
234
|
+
from openff.toolkit import Molecule, ForceField
|
|
235
|
+
from openff.interchange import Interchange
|
|
236
|
+
|
|
237
|
+
mol = Molecule.from_smiles(smiles)
|
|
238
|
+
mol.generate_conformers(n_conformers=1)
|
|
239
|
+
|
|
240
|
+
ff = ForceField(f"{force_field}.offxml")
|
|
241
|
+
topology = mol.to_topology()
|
|
242
|
+
interchange = Interchange.from_smirnoff(ff, topology)
|
|
243
|
+
|
|
244
|
+
result = {
|
|
245
|
+
"smiles": smiles,
|
|
246
|
+
"force_field": force_field,
|
|
247
|
+
"n_atoms": mol.n_atoms,
|
|
248
|
+
"n_bonds": mol.n_bonds,
|
|
249
|
+
"n_conformers": mol.n_conformers,
|
|
250
|
+
"partial_charges": mol.partial_charges,
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
print(f"OpenFF parameterized: {smiles} ({mol.n_atoms} atoms, "
|
|
254
|
+
f"FF={force_field})")
|
|
255
|
+
return interchange, result
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## 8. 統合 MD 解析パイプライン
|
|
259
|
+
|
|
260
|
+
```python
|
|
261
|
+
def md_analysis_pipeline(topology, trajectory, selection="protein"):
|
|
262
|
+
"""
|
|
263
|
+
MD トラジェクトリ統合解析パイプライン。
|
|
264
|
+
|
|
265
|
+
Pipeline:
|
|
266
|
+
load → RMSD → RMSF → Rg → H-bonds → summary
|
|
267
|
+
"""
|
|
268
|
+
u, info = load_trajectory(topology, trajectory)
|
|
269
|
+
|
|
270
|
+
rmsd = compute_rmsd(u, "backbone")
|
|
271
|
+
rmsf = compute_rmsf(u, "name CA")
|
|
272
|
+
rg = compute_radius_of_gyration(u, selection)
|
|
273
|
+
hbonds, hb_counts = hydrogen_bond_analysis(u)
|
|
274
|
+
|
|
275
|
+
summary = {
|
|
276
|
+
"system": info,
|
|
277
|
+
"rmsd_mean_A": round(rmsd["rmsd_A"].mean(), 2),
|
|
278
|
+
"rmsd_std_A": round(rmsd["rmsd_A"].std(), 2),
|
|
279
|
+
"rmsf_mean_A": round(rmsf["rmsf_A"].mean(), 2),
|
|
280
|
+
"n_flexible_residues": int(rmsf["flexible"].sum()),
|
|
281
|
+
"rg_mean_A": round(rg["rg_A"].mean(), 2),
|
|
282
|
+
"hbonds_mean_per_frame": round(hb_counts["n_hbonds"].mean(), 1),
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
print(f"\n=== MD Analysis Summary ===")
|
|
286
|
+
for k, v in summary.items():
|
|
287
|
+
if isinstance(v, dict):
|
|
288
|
+
continue
|
|
289
|
+
print(f" {k}: {v}")
|
|
290
|
+
|
|
291
|
+
return summary
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
## パイプライン統合
|
|
297
|
+
|
|
298
|
+
```
|
|
299
|
+
molecular-docking ──→ md-simulation ──→ admet-pharmacokinetics
|
|
300
|
+
(ドッキングポーズ) (MD 安定性評価) (PK パラメータ推定)
|
|
301
|
+
│ │ ↓
|
|
302
|
+
protein-structure ──┘ │ drug-target-profiling
|
|
303
|
+
(PDB/AlphaFold) ↓ (候補評価)
|
|
304
|
+
computational-materials
|
|
305
|
+
(pymatgen/VASP 連携)
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
## パイプライン出力
|
|
309
|
+
|
|
310
|
+
| ファイル | 説明 | 次スキル |
|
|
311
|
+
|---------|------|---------|
|
|
312
|
+
| `results/rmsd_timeseries.csv` | RMSD 時系列 | → publication-figures |
|
|
313
|
+
| `results/rmsf_per_residue.csv` | RMSF 残基別 | → protein-structure-analysis |
|
|
314
|
+
| `results/hbond_analysis.csv` | 水素結合解析 | → molecular-docking |
|
|
315
|
+
| `results/md_summary.json` | 統合サマリ | → admet-pharmacokinetics |
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-model-organism-db
|
|
3
|
+
description: |
|
|
4
|
+
モデル生物データベース統合スキル。FlyBase (ショウジョウバエ)、
|
|
5
|
+
WormBase (線虫)、ZFIN (ゼブラフィッシュ)、RGD (ラット)、
|
|
6
|
+
MGI (マウス) の REST API を統合した
|
|
7
|
+
モデル生物遺伝子・表現型・疾患モデル横断検索パイプライン。
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Scientific Model Organism Database
|
|
11
|
+
|
|
12
|
+
主要 5 モデル生物データベース (FlyBase / WormBase / ZFIN / RGD / MGI) を
|
|
13
|
+
統合した遺伝子・表現型・疾患モデル横断検索パイプラインを提供する。
|
|
14
|
+
|
|
15
|
+
## When to Use
|
|
16
|
+
|
|
17
|
+
- ヒト遺伝子のモデル生物オルソログを検索するとき
|
|
18
|
+
- モデル生物の表現型データを疾患研究に活用するとき
|
|
19
|
+
- 遺伝子改変動物モデルの表現型情報を取得するとき
|
|
20
|
+
- 複数のモデル生物間で機能保存性を比較するとき
|
|
21
|
+
- IMPC (既存スキル) を補完してラット/魚/ハエ/線虫データが必要なとき
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
## 1. MGI (Mouse Genome Informatics) 遺伝子検索
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import requests
|
|
31
|
+
import pandas as pd
|
|
32
|
+
|
|
33
|
+
MGI_API = "http://www.informatics.jax.org/api"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def search_mgi_gene(query, limit=20):
|
|
37
|
+
"""
|
|
38
|
+
MGI マウス遺伝子検索。
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
query: str — 遺伝子名またはシンボル
|
|
42
|
+
limit: int — 最大取得数
|
|
43
|
+
"""
|
|
44
|
+
url = f"{MGI_API}/gene/search"
|
|
45
|
+
params = {"query": query, "limit": limit}
|
|
46
|
+
resp = requests.get(url, params=params)
|
|
47
|
+
resp.raise_for_status()
|
|
48
|
+
data = resp.json()
|
|
49
|
+
|
|
50
|
+
rows = []
|
|
51
|
+
for gene in data.get("results", []):
|
|
52
|
+
rows.append({
|
|
53
|
+
"mgi_id": gene.get("mgiId"),
|
|
54
|
+
"symbol": gene.get("symbol"),
|
|
55
|
+
"name": gene.get("name"),
|
|
56
|
+
"chromosome": gene.get("chromosome"),
|
|
57
|
+
"feature_type": gene.get("featureType"),
|
|
58
|
+
"organism": "Mus musculus",
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
df = pd.DataFrame(rows[:limit])
|
|
62
|
+
print(f"MGI search '{query}': {len(df)} genes")
|
|
63
|
+
return df
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## 2. RGD (Rat Genome Database) 遺伝子検索
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
RGD_API = "https://rest.rgd.mcw.edu/rgdws"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def search_rgd_gene(query, species="rat"):
|
|
73
|
+
"""
|
|
74
|
+
RGD ラット遺伝子検索。
|
|
75
|
+
|
|
76
|
+
Parameters:
|
|
77
|
+
query: str — 遺伝子シンボル
|
|
78
|
+
species: str — "rat", "mouse", "human"
|
|
79
|
+
"""
|
|
80
|
+
species_map = {"rat": 3, "mouse": 2, "human": 1}
|
|
81
|
+
species_key = species_map.get(species, 3)
|
|
82
|
+
|
|
83
|
+
url = f"{RGD_API}/genes/{query}/{species_key}"
|
|
84
|
+
resp = requests.get(url, headers={"Accept": "application/json"})
|
|
85
|
+
resp.raise_for_status()
|
|
86
|
+
data = resp.json()
|
|
87
|
+
|
|
88
|
+
if isinstance(data, dict):
|
|
89
|
+
data = [data]
|
|
90
|
+
|
|
91
|
+
rows = []
|
|
92
|
+
for gene in data:
|
|
93
|
+
rows.append({
|
|
94
|
+
"rgd_id": gene.get("rgdId"),
|
|
95
|
+
"symbol": gene.get("symbol"),
|
|
96
|
+
"name": gene.get("name"),
|
|
97
|
+
"chromosome": gene.get("chromosome"),
|
|
98
|
+
"type": gene.get("type"),
|
|
99
|
+
"organism": species,
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
df = pd.DataFrame(rows)
|
|
103
|
+
print(f"RGD search '{query}': {len(df)} genes ({species})")
|
|
104
|
+
return df
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## 3. ZFIN (Zebrafish Information Network)
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
ZFIN_API = "https://zfin.org/action/api"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def search_zfin_gene(query, limit=20):
|
|
114
|
+
"""
|
|
115
|
+
ZFIN ゼブラフィッシュ遺伝子検索。
|
|
116
|
+
|
|
117
|
+
Parameters:
|
|
118
|
+
query: str — 遺伝子名またはシンボル
|
|
119
|
+
limit: int — 最大取得数
|
|
120
|
+
"""
|
|
121
|
+
url = f"{ZFIN_API}/marker/search"
|
|
122
|
+
params = {"name": query, "limit": limit, "type": "gene"}
|
|
123
|
+
resp = requests.get(url, params=params)
|
|
124
|
+
resp.raise_for_status()
|
|
125
|
+
data = resp.json()
|
|
126
|
+
|
|
127
|
+
rows = []
|
|
128
|
+
for gene in data.get("results", []):
|
|
129
|
+
rows.append({
|
|
130
|
+
"zfin_id": gene.get("zdbID"),
|
|
131
|
+
"symbol": gene.get("abbreviation"),
|
|
132
|
+
"name": gene.get("name"),
|
|
133
|
+
"type": gene.get("markerType"),
|
|
134
|
+
"organism": "Danio rerio",
|
|
135
|
+
})
|
|
136
|
+
|
|
137
|
+
df = pd.DataFrame(rows[:limit])
|
|
138
|
+
print(f"ZFIN search '{query}': {len(df)} genes")
|
|
139
|
+
return df
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## 4. FlyBase (Drosophila)
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
FLYBASE_API = "https://api.flybase.org/api/v1.0"
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def search_flybase_gene(query, limit=20):
|
|
149
|
+
"""
|
|
150
|
+
FlyBase ショウジョウバエ遺伝子検索。
|
|
151
|
+
|
|
152
|
+
Parameters:
|
|
153
|
+
query: str — 遺伝子名またはシンボル
|
|
154
|
+
limit: int — 最大取得数
|
|
155
|
+
"""
|
|
156
|
+
url = f"{FLYBASE_API}/gene/search/{query}"
|
|
157
|
+
params = {"limit": limit}
|
|
158
|
+
resp = requests.get(url, params=params)
|
|
159
|
+
resp.raise_for_status()
|
|
160
|
+
data = resp.json()
|
|
161
|
+
|
|
162
|
+
genes = data.get("resultset", {}).get("result", [])
|
|
163
|
+
rows = []
|
|
164
|
+
for gene in genes:
|
|
165
|
+
rows.append({
|
|
166
|
+
"flybase_id": gene.get("id"),
|
|
167
|
+
"symbol": gene.get("symbol"),
|
|
168
|
+
"name": gene.get("name"),
|
|
169
|
+
"chromosome": gene.get("location", {}).get("chromosome"),
|
|
170
|
+
"organism": "Drosophila melanogaster",
|
|
171
|
+
})
|
|
172
|
+
|
|
173
|
+
df = pd.DataFrame(rows[:limit])
|
|
174
|
+
print(f"FlyBase search '{query}': {len(df)} genes")
|
|
175
|
+
return df
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## 5. WormBase (C. elegans)
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
WORMBASE_API = "https://wormbase.org/rest"
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def search_wormbase_gene(query, limit=20):
|
|
185
|
+
"""
|
|
186
|
+
WormBase 線虫遺伝子検索。
|
|
187
|
+
|
|
188
|
+
Parameters:
|
|
189
|
+
query: str — 遺伝子名またはシンボル
|
|
190
|
+
limit: int — 最大取得数
|
|
191
|
+
"""
|
|
192
|
+
url = f"{WORMBASE_API}/field/gene/{query}/overview"
|
|
193
|
+
resp = requests.get(url, headers={"Accept": "application/json"})
|
|
194
|
+
resp.raise_for_status()
|
|
195
|
+
data = resp.json()
|
|
196
|
+
|
|
197
|
+
overview = data.get("overview", {})
|
|
198
|
+
info = {
|
|
199
|
+
"wormbase_id": overview.get("name", {}).get("data", {}).get("id"),
|
|
200
|
+
"symbol": overview.get("name", {}).get("data", {}).get("label"),
|
|
201
|
+
"concise_description": overview.get("concise_description", {}).get(
|
|
202
|
+
"data", ""
|
|
203
|
+
),
|
|
204
|
+
"organism": "Caenorhabditis elegans",
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
print(f"WormBase: {info['symbol']} ({info['wormbase_id']})")
|
|
208
|
+
return info
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## 6. モデル生物横断オルソログ検索
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
def cross_species_ortholog_search(human_gene):
|
|
215
|
+
"""
|
|
216
|
+
ヒト遺伝子の 5 モデル生物オルソログ横断検索。
|
|
217
|
+
|
|
218
|
+
Parameters:
|
|
219
|
+
human_gene: str — ヒト遺伝子シンボル (例: "TP53")
|
|
220
|
+
|
|
221
|
+
Pipeline:
|
|
222
|
+
MGI → RGD → ZFIN → FlyBase → WormBase
|
|
223
|
+
"""
|
|
224
|
+
results = []
|
|
225
|
+
|
|
226
|
+
# Mouse (MGI)
|
|
227
|
+
try:
|
|
228
|
+
mgi = search_mgi_gene(human_gene, limit=3)
|
|
229
|
+
if not mgi.empty:
|
|
230
|
+
results.append({"organism": "Mouse", "db": "MGI",
|
|
231
|
+
"symbol": mgi.iloc[0]["symbol"],
|
|
232
|
+
"id": mgi.iloc[0]["mgi_id"]})
|
|
233
|
+
except Exception as e:
|
|
234
|
+
print(f"MGI error: {e}")
|
|
235
|
+
|
|
236
|
+
# Rat (RGD)
|
|
237
|
+
try:
|
|
238
|
+
rgd = search_rgd_gene(human_gene, "rat")
|
|
239
|
+
if not rgd.empty:
|
|
240
|
+
results.append({"organism": "Rat", "db": "RGD",
|
|
241
|
+
"symbol": rgd.iloc[0]["symbol"],
|
|
242
|
+
"id": str(rgd.iloc[0]["rgd_id"])})
|
|
243
|
+
except Exception as e:
|
|
244
|
+
print(f"RGD error: {e}")
|
|
245
|
+
|
|
246
|
+
# Zebrafish (ZFIN)
|
|
247
|
+
try:
|
|
248
|
+
zfin = search_zfin_gene(human_gene.lower(), limit=3)
|
|
249
|
+
if not zfin.empty:
|
|
250
|
+
results.append({"organism": "Zebrafish", "db": "ZFIN",
|
|
251
|
+
"symbol": zfin.iloc[0]["symbol"],
|
|
252
|
+
"id": zfin.iloc[0]["zfin_id"]})
|
|
253
|
+
except Exception as e:
|
|
254
|
+
print(f"ZFIN error: {e}")
|
|
255
|
+
|
|
256
|
+
# Drosophila (FlyBase)
|
|
257
|
+
try:
|
|
258
|
+
fly = search_flybase_gene(human_gene, limit=3)
|
|
259
|
+
if not fly.empty:
|
|
260
|
+
results.append({"organism": "Drosophila", "db": "FlyBase",
|
|
261
|
+
"symbol": fly.iloc[0]["symbol"],
|
|
262
|
+
"id": fly.iloc[0]["flybase_id"]})
|
|
263
|
+
except Exception as e:
|
|
264
|
+
print(f"FlyBase error: {e}")
|
|
265
|
+
|
|
266
|
+
# C. elegans (WormBase)
|
|
267
|
+
try:
|
|
268
|
+
worm = search_wormbase_gene(human_gene.lower())
|
|
269
|
+
if worm.get("wormbase_id"):
|
|
270
|
+
results.append({"organism": "C. elegans", "db": "WormBase",
|
|
271
|
+
"symbol": worm["symbol"],
|
|
272
|
+
"id": worm["wormbase_id"]})
|
|
273
|
+
except Exception as e:
|
|
274
|
+
print(f"WormBase error: {e}")
|
|
275
|
+
|
|
276
|
+
df = pd.DataFrame(results)
|
|
277
|
+
print(f"\nOrthologs of {human_gene}: {len(df)} model organisms")
|
|
278
|
+
return df
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## 7. 表現型データ統合
|
|
282
|
+
|
|
283
|
+
```python
|
|
284
|
+
def get_mgi_phenotypes(mgi_id):
|
|
285
|
+
"""
|
|
286
|
+
MGI 遺伝子の表現型アノテーション取得。
|
|
287
|
+
|
|
288
|
+
Parameters:
|
|
289
|
+
mgi_id: str — MGI ID (例: "MGI:98834")
|
|
290
|
+
"""
|
|
291
|
+
url = f"{MGI_API}/gene/{mgi_id}/phenotypes"
|
|
292
|
+
resp = requests.get(url)
|
|
293
|
+
resp.raise_for_status()
|
|
294
|
+
data = resp.json()
|
|
295
|
+
|
|
296
|
+
rows = []
|
|
297
|
+
for pheno in data.get("phenotypes", []):
|
|
298
|
+
rows.append({
|
|
299
|
+
"mp_id": pheno.get("mpId"),
|
|
300
|
+
"mp_term": pheno.get("mpTerm"),
|
|
301
|
+
"allele_symbol": pheno.get("alleleSymbol"),
|
|
302
|
+
"genetic_background": pheno.get("geneticBackground"),
|
|
303
|
+
})
|
|
304
|
+
|
|
305
|
+
df = pd.DataFrame(rows)
|
|
306
|
+
print(f"Phenotypes for {mgi_id}: {len(df)} MP annotations")
|
|
307
|
+
return df
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
---
|
|
311
|
+
|
|
312
|
+
## パイプライン統合
|
|
313
|
+
|
|
314
|
+
```
|
|
315
|
+
ensembl-genomics ──→ model-organism-db ──→ disease-research
|
|
316
|
+
(オルソログ ID) (表現型データ) (疾患モデル)
|
|
317
|
+
│ │ ↓
|
|
318
|
+
biothings-idmapping ──┘ ↓ rare-disease-genetics
|
|
319
|
+
(ID マッピング) phylogenetics (OMIM/Orphanet)
|
|
320
|
+
(種間系統解析)
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
## パイプライン出力
|
|
324
|
+
|
|
325
|
+
| ファイル | 説明 | 次スキル |
|
|
326
|
+
|---------|------|---------|
|
|
327
|
+
| `results/model_orthologs.csv` | モデル生物オルソログ | → ensembl-genomics |
|
|
328
|
+
| `results/mgi_phenotypes.csv` | マウス表現型 | → disease-research |
|
|
329
|
+
| `results/cross_species.json` | 横断比較結果 | → phylogenetics |
|