@nahisaho/satori 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -23
- package/package.json +1 -1
- package/src/.github/skills/scientific-clinical-pharmacology/SKILL.md +361 -0
- package/src/.github/skills/scientific-clinical-standards/SKILL.md +444 -0
- package/src/.github/skills/scientific-crispr-design/SKILL.md +369 -0
- package/src/.github/skills/scientific-environmental-ecology/SKILL.md +5 -0
- package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +5 -0
- package/src/.github/skills/scientific-epigenomics-chromatin/SKILL.md +5 -0
- package/src/.github/skills/scientific-glycomics/SKILL.md +274 -0
- package/src/.github/skills/scientific-immunoinformatics/SKILL.md +9 -0
- package/src/.github/skills/scientific-lipidomics/SKILL.md +284 -0
- package/src/.github/skills/scientific-metabolomics/SKILL.md +3 -0
- package/src/.github/skills/scientific-metagenome-assembled-genomes/SKILL.md +299 -0
- package/src/.github/skills/scientific-model-organism-db/SKILL.md +8 -0
- package/src/.github/skills/scientific-public-health-data/SKILL.md +11 -0
- package/src/.github/skills/scientific-systems-biology/SKILL.md +11 -0
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-crispr-design
|
|
3
|
+
description: |
|
|
4
|
+
CRISPR gRNA 設計スキル。Cas9/Cas12a PAM 配列検索・
|
|
5
|
+
オフターゲットスコアリング (CFD/MIT)・
|
|
6
|
+
CRISPRscan/Rule Set 2 活性予測・検証プライマー設計・
|
|
7
|
+
sgRNA スクリーニングライブラリ構築パイプライン。
|
|
8
|
+
TU 外スキル (Python ライブラリ + ローカル解析)。
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Scientific CRISPR Design
|
|
12
|
+
|
|
13
|
+
CRISPR gRNA 設計・オフターゲット評価・活性予測を統合した
|
|
14
|
+
効率的なガイド RNA 選択パイプラインを提供する。
|
|
15
|
+
|
|
16
|
+
## When to Use
|
|
17
|
+
|
|
18
|
+
- CRISPR-Cas9/Cas12a の gRNA を設計するとき
|
|
19
|
+
- PAM 配列検索とガイド候補の列挙を行うとき
|
|
20
|
+
- オフターゲットスコア (CFD/MIT) で安全性を評価するとき
|
|
21
|
+
- gRNA 活性スコア (CRISPRscan/Rule Set 2) で効率を予測するとき
|
|
22
|
+
- CRISPR スクリーニングライブラリを構築するとき
|
|
23
|
+
- 検証用 PCR プライマーを設計するとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. PAM 配列検索・gRNA 候補列挙
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import re
|
|
33
|
+
import pandas as pd
|
|
34
|
+
from Bio import SeqIO
|
|
35
|
+
from Bio.Seq import Seq
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# PAM パターン定義
|
|
39
|
+
PAM_PATTERNS = {
|
|
40
|
+
"SpCas9": {"pam": "NGG", "guide_len": 20,
|
|
41
|
+
"pam_side": "3prime"},
|
|
42
|
+
"SaCas9": {"pam": "NNGRRT", "guide_len": 21,
|
|
43
|
+
"pam_side": "3prime"},
|
|
44
|
+
"Cas12a": {"pam": "TTTV", "guide_len": 23,
|
|
45
|
+
"pam_side": "5prime"},
|
|
46
|
+
"xCas9": {"pam": "NG", "guide_len": 20,
|
|
47
|
+
"pam_side": "3prime"},
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def iupac_to_regex(pam):
|
|
52
|
+
"""IUPAC → 正規表現変換。"""
|
|
53
|
+
iupac = {
|
|
54
|
+
"N": "[ACGT]", "R": "[AG]", "Y": "[CT]",
|
|
55
|
+
"S": "[GC]", "W": "[AT]", "K": "[GT]",
|
|
56
|
+
"M": "[AC]", "B": "[CGT]", "D": "[AGT]",
|
|
57
|
+
"H": "[ACT]", "V": "[ACG]",
|
|
58
|
+
}
|
|
59
|
+
return "".join(iupac.get(c, c) for c in pam)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def find_grna_candidates(sequence, cas_type="SpCas9",
|
|
63
|
+
strand="both"):
|
|
64
|
+
"""
|
|
65
|
+
gRNA 候補の列挙。
|
|
66
|
+
|
|
67
|
+
Parameters:
|
|
68
|
+
sequence: str — 標的 DNA 配列
|
|
69
|
+
cas_type: str — Cas タイプ
|
|
70
|
+
strand: str — "sense"/"antisense"/"both"
|
|
71
|
+
"""
|
|
72
|
+
config = PAM_PATTERNS[cas_type]
|
|
73
|
+
pam_re = iupac_to_regex(config["pam"])
|
|
74
|
+
gl = config["guide_len"]
|
|
75
|
+
side = config["pam_side"]
|
|
76
|
+
seq = sequence.upper()
|
|
77
|
+
|
|
78
|
+
candidates = []
|
|
79
|
+
|
|
80
|
+
def _search_strand(s, s_name):
|
|
81
|
+
for m in re.finditer(
|
|
82
|
+
f"(?=({pam_re}))", s):
|
|
83
|
+
pos = m.start()
|
|
84
|
+
if side == "3prime":
|
|
85
|
+
start = pos - gl
|
|
86
|
+
if start < 0:
|
|
87
|
+
continue
|
|
88
|
+
guide = s[start:pos]
|
|
89
|
+
else: # 5prime
|
|
90
|
+
start = pos + len(config["pam"])
|
|
91
|
+
end = start + gl
|
|
92
|
+
if end > len(s):
|
|
93
|
+
continue
|
|
94
|
+
guide = s[start:end]
|
|
95
|
+
|
|
96
|
+
if len(guide) != gl:
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
gc = (guide.count("G")
|
|
100
|
+
+ guide.count("C")) / gl
|
|
101
|
+
|
|
102
|
+
candidates.append({
|
|
103
|
+
"guide": guide,
|
|
104
|
+
"pam": m.group(1),
|
|
105
|
+
"position": pos,
|
|
106
|
+
"strand": s_name,
|
|
107
|
+
"gc_content": round(gc, 3),
|
|
108
|
+
"length": gl,
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
if strand in ("sense", "both"):
|
|
112
|
+
_search_strand(seq, "+")
|
|
113
|
+
if strand in ("antisense", "both"):
|
|
114
|
+
rc = str(Seq(seq).reverse_complement())
|
|
115
|
+
_search_strand(rc, "-")
|
|
116
|
+
|
|
117
|
+
df = pd.DataFrame(candidates)
|
|
118
|
+
|
|
119
|
+
# GC フィルタ (30-70%)
|
|
120
|
+
if not df.empty:
|
|
121
|
+
df = df[(df["gc_content"] >= 0.30)
|
|
122
|
+
& (df["gc_content"] <= 0.70)]
|
|
123
|
+
|
|
124
|
+
print(f"CRISPR {cas_type}: "
|
|
125
|
+
f"{len(df)} gRNA candidates "
|
|
126
|
+
f"(GC 30-70%)")
|
|
127
|
+
return df.reset_index(drop=True)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## 2. オフターゲットスコアリング
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
import numpy as np
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# CFD スコア簡易実装 (Doench 2016)
|
|
137
|
+
def cfd_score(guide, off_target):
|
|
138
|
+
"""
|
|
139
|
+
CFD (Cutting Frequency Determination) スコア。
|
|
140
|
+
|
|
141
|
+
Parameters:
|
|
142
|
+
guide: str — gRNA 配列 (20nt)
|
|
143
|
+
off_target: str — オフターゲットサイト
|
|
144
|
+
"""
|
|
145
|
+
# ポジション別ミスマッチペナルティ (簡易版)
|
|
146
|
+
mm_penalty = {
|
|
147
|
+
1: 0.0, 2: 0.0, 3: 0.014, 4: 0.0,
|
|
148
|
+
5: 0.0, 6: 0.395, 7: 0.317, 8: 0.0,
|
|
149
|
+
9: 0.389, 10: 0.079, 11: 0.445,
|
|
150
|
+
12: 0.508, 13: 0.613, 14: 0.851,
|
|
151
|
+
15: 0.732, 16: 0.828, 17: 0.615,
|
|
152
|
+
18: 0.804, 19: 0.685, 20: 0.583,
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
score = 1.0
|
|
156
|
+
for i in range(min(len(guide),
|
|
157
|
+
len(off_target))):
|
|
158
|
+
if guide[i] != off_target[i]:
|
|
159
|
+
pos = i + 1
|
|
160
|
+
penalty = mm_penalty.get(pos, 0.5)
|
|
161
|
+
score *= (1.0 - penalty)
|
|
162
|
+
|
|
163
|
+
return round(score, 4)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def score_off_targets(guide, genome_fasta,
|
|
167
|
+
max_mismatches=4):
|
|
168
|
+
"""
|
|
169
|
+
ゲノムワイドオフターゲットスコアリング。
|
|
170
|
+
|
|
171
|
+
Parameters:
|
|
172
|
+
guide: str — gRNA 配列
|
|
173
|
+
genome_fasta: str — リファレンスゲノム
|
|
174
|
+
max_mismatches: int — 最大ミスマッチ数
|
|
175
|
+
"""
|
|
176
|
+
results = []
|
|
177
|
+
gl = len(guide)
|
|
178
|
+
guide_upper = guide.upper()
|
|
179
|
+
|
|
180
|
+
for record in SeqIO.parse(
|
|
181
|
+
genome_fasta, "fasta"):
|
|
182
|
+
seq = str(record.seq).upper()
|
|
183
|
+
for i in range(len(seq) - gl - 3):
|
|
184
|
+
site = seq[i:i + gl]
|
|
185
|
+
pam = seq[i + gl:i + gl + 3]
|
|
186
|
+
if not re.match("[ACGT]GG", pam):
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
mm = sum(1 for a, b in
|
|
190
|
+
zip(guide_upper, site)
|
|
191
|
+
if a != b)
|
|
192
|
+
if mm <= max_mismatches:
|
|
193
|
+
results.append({
|
|
194
|
+
"chrom": record.id,
|
|
195
|
+
"position": i,
|
|
196
|
+
"site": site,
|
|
197
|
+
"pam": pam,
|
|
198
|
+
"mismatches": mm,
|
|
199
|
+
"cfd_score": cfd_score(
|
|
200
|
+
guide_upper, site),
|
|
201
|
+
})
|
|
202
|
+
|
|
203
|
+
df = pd.DataFrame(results)
|
|
204
|
+
df = df.sort_values("cfd_score",
|
|
205
|
+
ascending=False)
|
|
206
|
+
print(f"Off-target: {len(df)} sites "
|
|
207
|
+
f"(≤{max_mismatches} mm)")
|
|
208
|
+
return df
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## 3. gRNA 活性予測
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
def rule_set2_score(guide_30mer):
|
|
215
|
+
"""
|
|
216
|
+
Rule Set 2 活性スコア (Doench 2016 簡易版)。
|
|
217
|
+
|
|
218
|
+
Parameters:
|
|
219
|
+
guide_30mer: str — 30nt 配列
|
|
220
|
+
(4nt upstream + 20nt guide + 3nt PAM
|
|
221
|
+
+ 3nt downstream)
|
|
222
|
+
"""
|
|
223
|
+
seq = guide_30mer.upper()
|
|
224
|
+
if len(seq) != 30:
|
|
225
|
+
print(f"Warning: expected 30nt, "
|
|
226
|
+
f"got {len(seq)}")
|
|
227
|
+
return 0.0
|
|
228
|
+
|
|
229
|
+
guide = seq[4:24]
|
|
230
|
+
gc = (guide.count("G")
|
|
231
|
+
+ guide.count("C")) / 20
|
|
232
|
+
|
|
233
|
+
# 位置重み付きスコア (簡易)
|
|
234
|
+
score = 0.5
|
|
235
|
+
|
|
236
|
+
# GC 最適範囲
|
|
237
|
+
if 0.40 <= gc <= 0.70:
|
|
238
|
+
score += 0.1
|
|
239
|
+
elif gc < 0.30 or gc > 0.80:
|
|
240
|
+
score -= 0.2
|
|
241
|
+
|
|
242
|
+
# PAM 近傍優先塩基
|
|
243
|
+
if guide[-1] == "G":
|
|
244
|
+
score += 0.05
|
|
245
|
+
if guide[-4] == "C":
|
|
246
|
+
score += 0.03
|
|
247
|
+
|
|
248
|
+
# ポリ T 回避 (Pol III 終結)
|
|
249
|
+
if "TTTT" in guide:
|
|
250
|
+
score -= 0.3
|
|
251
|
+
|
|
252
|
+
return round(max(0, min(1, score)), 3)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def rank_grnas(candidates_df, genome_fasta=None):
|
|
256
|
+
"""
|
|
257
|
+
gRNA 候補ランキング。
|
|
258
|
+
|
|
259
|
+
Parameters:
|
|
260
|
+
candidates_df: pd.DataFrame — gRNA 候補
|
|
261
|
+
genome_fasta: str | None — オフタ解析用
|
|
262
|
+
"""
|
|
263
|
+
df = candidates_df.copy()
|
|
264
|
+
|
|
265
|
+
# 活性スコア (30mer が無い場合は guide のみ)
|
|
266
|
+
df["activity_score"] = df["guide"].apply(
|
|
267
|
+
lambda g: rule_set2_score(
|
|
268
|
+
"AAAA" + g + "GGGNNN"
|
|
269
|
+
if len(g) == 20
|
|
270
|
+
else g.ljust(30, "N")))
|
|
271
|
+
|
|
272
|
+
# オフターゲット (ゲノムがあれば)
|
|
273
|
+
if genome_fasta:
|
|
274
|
+
ot_scores = []
|
|
275
|
+
for guide in df["guide"]:
|
|
276
|
+
ot = score_off_targets(
|
|
277
|
+
guide, genome_fasta, 3)
|
|
278
|
+
specificity = (
|
|
279
|
+
1.0 / (1.0 + len(ot))
|
|
280
|
+
if not ot.empty else 1.0)
|
|
281
|
+
ot_scores.append(round(specificity, 3))
|
|
282
|
+
df["specificity"] = ot_scores
|
|
283
|
+
else:
|
|
284
|
+
df["specificity"] = 1.0
|
|
285
|
+
|
|
286
|
+
# 総合スコア
|
|
287
|
+
df["composite_score"] = (
|
|
288
|
+
df["activity_score"] * 0.5
|
|
289
|
+
+ df["specificity"] * 0.3
|
|
290
|
+
+ df["gc_content"].clip(0.4, 0.6) * 0.2
|
|
291
|
+
).round(3)
|
|
292
|
+
|
|
293
|
+
df = df.sort_values("composite_score",
|
|
294
|
+
ascending=False)
|
|
295
|
+
print(f"gRNA ranking: top score = "
|
|
296
|
+
f"{df['composite_score'].iloc[0]}")
|
|
297
|
+
return df
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## 4. sgRNA ライブラリ構築
|
|
301
|
+
|
|
302
|
+
```python
|
|
303
|
+
def build_sgrna_library(gene_list,
|
|
304
|
+
genome_fasta,
|
|
305
|
+
guides_per_gene=4,
|
|
306
|
+
cas_type="SpCas9"):
|
|
307
|
+
"""
|
|
308
|
+
スクリーニング用 sgRNA ライブラリ構築。
|
|
309
|
+
|
|
310
|
+
Parameters:
|
|
311
|
+
gene_list: list[dict] — 遺伝子リスト
|
|
312
|
+
[{"gene": "TP53", "sequence": "ATCG..."}]
|
|
313
|
+
genome_fasta: str — リファレンスゲノム
|
|
314
|
+
guides_per_gene: int — 遺伝子あたり gRNA 数
|
|
315
|
+
cas_type: str — Cas タイプ
|
|
316
|
+
"""
|
|
317
|
+
library = []
|
|
318
|
+
|
|
319
|
+
for gene_info in gene_list:
|
|
320
|
+
gene = gene_info["gene"]
|
|
321
|
+
seq = gene_info["sequence"]
|
|
322
|
+
|
|
323
|
+
candidates = find_grna_candidates(
|
|
324
|
+
seq, cas_type)
|
|
325
|
+
|
|
326
|
+
if candidates.empty:
|
|
327
|
+
print(f" {gene}: no candidates")
|
|
328
|
+
continue
|
|
329
|
+
|
|
330
|
+
ranked = rank_grnas(candidates)
|
|
331
|
+
top = ranked.head(guides_per_gene)
|
|
332
|
+
|
|
333
|
+
for _, row in top.iterrows():
|
|
334
|
+
library.append({
|
|
335
|
+
"gene": gene,
|
|
336
|
+
"guide": row["guide"],
|
|
337
|
+
"position": row["position"],
|
|
338
|
+
"strand": row["strand"],
|
|
339
|
+
"gc_content": row["gc_content"],
|
|
340
|
+
"activity": row["activity_score"],
|
|
341
|
+
"composite": row["composite_score"],
|
|
342
|
+
})
|
|
343
|
+
|
|
344
|
+
df = pd.DataFrame(library)
|
|
345
|
+
n_genes = df["gene"].nunique()
|
|
346
|
+
print(f"Library: {len(df)} sgRNAs for "
|
|
347
|
+
f"{n_genes} genes")
|
|
348
|
+
return df
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
---
|
|
352
|
+
|
|
353
|
+
## パイプライン統合
|
|
354
|
+
|
|
355
|
+
```
|
|
356
|
+
genome-sequence-tools → crispr-design → perturbation-analysis
|
|
357
|
+
(ゲノム配列取得) (gRNA 設計) (摂動実験解析)
|
|
358
|
+
│ │ ↓
|
|
359
|
+
variant-effect-prediction ─┘ functional-genomics
|
|
360
|
+
(変異影響予測) (機能ゲノミクス)
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
## パイプライン出力
|
|
364
|
+
|
|
365
|
+
| ファイル | 説明 | 次スキル |
|
|
366
|
+
|---------|------|---------|
|
|
367
|
+
| `grna_candidates.csv` | gRNA 候補リスト | → ランキング |
|
|
368
|
+
| `off_target_report.csv` | オフターゲット評価 | → 安全性確認 |
|
|
369
|
+
| `sgrna_library.csv` | sgRNA ライブラリ | → perturbation-analysis |
|
|
@@ -4,6 +4,11 @@ description: |
|
|
|
4
4
|
環境科学・生態学解析スキル。種分布モデリング(SDM / MaxEnt)・
|
|
5
5
|
生物多様性指標(α/β/γ 多様性)・群集構造解析(NMDS/CCA/RDA)・
|
|
6
6
|
生態学的ニッチモデリング・保全優先順位評価・OBIS/GBIF データ統合パイプライン。
|
|
7
|
+
ToolUniverse 連携: gbif。
|
|
8
|
+
tu_tools:
|
|
9
|
+
- key: gbif
|
|
10
|
+
name: GBIF
|
|
11
|
+
description: 地球規模生物多様性情報ファシリティ
|
|
7
12
|
---
|
|
8
13
|
|
|
9
14
|
# Scientific Environmental Ecology
|
|
@@ -5,6 +5,11 @@ description: |
|
|
|
5
5
|
リスク指標(RR/OR/HR/NNT)・標準化死亡比(SMR)・年齢調整率・
|
|
6
6
|
空間疫学(GIS / 空間クラスタリング)・因果推論ダイアグラム(DAG)・
|
|
7
7
|
WHO/CDC/EU 公衆衛生データ統合パイプライン。
|
|
8
|
+
ToolUniverse 連携: who_gho。
|
|
9
|
+
tu_tools:
|
|
10
|
+
- key: who_gho
|
|
11
|
+
name: WHO GHO
|
|
12
|
+
description: WHO Global Health Observatory 健康統計 API
|
|
8
13
|
---
|
|
9
14
|
|
|
10
15
|
# Scientific Epidemiology & Public Health
|
|
@@ -6,6 +6,11 @@ description: |
|
|
|
6
6
|
ヒストン修飾クロマチン状態モデリング (ChromHMM)、Hi-C 接触マップ・TAD 検出、
|
|
7
7
|
転写因子結合サイト予測 (モチーフ濃縮)、差次結合解析 (DiffBind) を統合した
|
|
8
8
|
計算エピゲノミクスパイプライン。ChIP-Atlas 43 万+実験との連携対応。
|
|
9
|
+
ToolUniverse 連携: chipatlas。
|
|
10
|
+
tu_tools:
|
|
11
|
+
- key: chipatlas
|
|
12
|
+
name: ChIP-Atlas
|
|
13
|
+
description: ChIP-Atlas エピゲノミクスエンリッチメント解析 (43万+実験)
|
|
9
14
|
---
|
|
10
15
|
|
|
11
16
|
# Scientific Epigenomics & Chromatin Biology
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-glycomics
|
|
3
|
+
description: |
|
|
4
|
+
糖鎖構造解析スキル。GlyConnect / GlyGen / GlyCosmos
|
|
5
|
+
糖鎖データベース統合検索・糖鎖構造描画・糖タンパク質
|
|
6
|
+
グリコシル化部位予測・レクチンバインディング・
|
|
7
|
+
糖鎖マスフラグメンテーション解析パイプライン。
|
|
8
|
+
TU 外スキル (直接 REST API + Python ライブラリ)。
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Scientific Glycomics
|
|
12
|
+
|
|
13
|
+
GlyConnect / GlyGen / GlyCosmos 糖鎖データベースを統合した
|
|
14
|
+
糖鎖構造解析・糖タンパク質グリコサイト予測・レクチン特異性・
|
|
15
|
+
糖鎖 MS フラグメンテーション解析パイプラインを提供する。
|
|
16
|
+
|
|
17
|
+
## When to Use
|
|
18
|
+
|
|
19
|
+
- 糖鎖構造を GlyTouCan ID から検索・描画するとき
|
|
20
|
+
- タンパク質のグリコシル化部位を予測・マッピングするとき
|
|
21
|
+
- GlyGen/GlyConnect で糖鎖-タンパク質関連を検索するとき
|
|
22
|
+
- 糖鎖マススペクトルのフラグメンテーション解析を行うとき
|
|
23
|
+
- レクチン-糖鎖結合特異性を調査するとき
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. GlyGen 糖鎖検索
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import requests
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
GLYGEN_API = "https://api.glygen.org"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def glygen_glycan_search(glycan_type=None,
|
|
39
|
+
mass_range=None):
|
|
40
|
+
"""
|
|
41
|
+
GlyGen — 糖鎖検索。
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
glycan_type: str | None — 糖鎖タイプ
|
|
45
|
+
("N-linked", "O-linked", "GAG" 等)
|
|
46
|
+
mass_range: tuple | None — (min_mass, max_mass)
|
|
47
|
+
"""
|
|
48
|
+
query = {}
|
|
49
|
+
if glycan_type:
|
|
50
|
+
query["glycan_type"] = glycan_type
|
|
51
|
+
if mass_range:
|
|
52
|
+
query["mass"] = {
|
|
53
|
+
"min": mass_range[0],
|
|
54
|
+
"max": mass_range[1]}
|
|
55
|
+
|
|
56
|
+
url = f"{GLYGEN_API}/glycan/search"
|
|
57
|
+
resp = requests.post(url, json=query, timeout=30)
|
|
58
|
+
resp.raise_for_status()
|
|
59
|
+
data = resp.json()
|
|
60
|
+
|
|
61
|
+
results = data.get("results", [])
|
|
62
|
+
rows = []
|
|
63
|
+
for r in results:
|
|
64
|
+
rows.append({
|
|
65
|
+
"glytoucan_ac": r.get("glytoucan_ac", ""),
|
|
66
|
+
"mass": r.get("mass", 0),
|
|
67
|
+
"glycan_type": r.get("glycan_type", ""),
|
|
68
|
+
"composition": r.get(
|
|
69
|
+
"composition", ""),
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
df = pd.DataFrame(rows)
|
|
73
|
+
print(f"GlyGen search: {len(df)} glycans found")
|
|
74
|
+
return df
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def glygen_glycan_detail(glytoucan_ac):
|
|
78
|
+
"""
|
|
79
|
+
GlyGen — 糖鎖詳細情報取得。
|
|
80
|
+
|
|
81
|
+
Parameters:
|
|
82
|
+
glytoucan_ac: str — GlyTouCan アクセッション
|
|
83
|
+
"""
|
|
84
|
+
url = f"{GLYGEN_API}/glycan/detail/{glytoucan_ac}"
|
|
85
|
+
resp = requests.get(url, timeout=30)
|
|
86
|
+
resp.raise_for_status()
|
|
87
|
+
data = resp.json()
|
|
88
|
+
|
|
89
|
+
info = {
|
|
90
|
+
"glytoucan_ac": data.get("glytoucan_ac", ""),
|
|
91
|
+
"mass": data.get("mass", 0),
|
|
92
|
+
"glycan_type": data.get("glycan_type", ""),
|
|
93
|
+
"iupac": data.get("iupac", ""),
|
|
94
|
+
"glycoct": data.get("glycoct", ""),
|
|
95
|
+
"species": [s.get("name", "")
|
|
96
|
+
for s in data.get("species", [])],
|
|
97
|
+
"proteins": len(data.get("glycoprotein", [])),
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
print(f"GlyGen: {glytoucan_ac} → "
|
|
101
|
+
f"type={info['glycan_type']}, "
|
|
102
|
+
f"mass={info['mass']:.1f}, "
|
|
103
|
+
f"proteins={info['proteins']}")
|
|
104
|
+
return info
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## 2. 糖タンパク質グリコサイト検索
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
def glygen_protein_glycosylation(uniprot_ac):
|
|
111
|
+
"""
|
|
112
|
+
GlyGen — タンパク質グリコシル化部位取得。
|
|
113
|
+
|
|
114
|
+
Parameters:
|
|
115
|
+
uniprot_ac: str — UniProt アクセッション
|
|
116
|
+
"""
|
|
117
|
+
url = f"{GLYGEN_API}/protein/detail/{uniprot_ac}"
|
|
118
|
+
resp = requests.get(url, timeout=30)
|
|
119
|
+
resp.raise_for_status()
|
|
120
|
+
data = resp.json()
|
|
121
|
+
|
|
122
|
+
sites = data.get("glycosylation", [])
|
|
123
|
+
rows = []
|
|
124
|
+
for site in sites:
|
|
125
|
+
rows.append({
|
|
126
|
+
"position": site.get("position", 0),
|
|
127
|
+
"type": site.get("type", ""),
|
|
128
|
+
"glytoucan_ac": site.get(
|
|
129
|
+
"glytoucan_ac", ""),
|
|
130
|
+
"residue": site.get("residue", ""),
|
|
131
|
+
"evidence": site.get("evidence", ""),
|
|
132
|
+
})
|
|
133
|
+
|
|
134
|
+
df = pd.DataFrame(rows)
|
|
135
|
+
print(f"GlyGen glycosites: {uniprot_ac} → "
|
|
136
|
+
f"{len(df)} sites")
|
|
137
|
+
return df
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## 3. 糖鎖 MS フラグメンテーション
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
import numpy as np
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def glycan_fragmentation(composition,
|
|
147
|
+
ion_type="[M+Na]+"):
|
|
148
|
+
"""
|
|
149
|
+
糖鎖 MS フラグメンテーション予測。
|
|
150
|
+
|
|
151
|
+
Parameters:
|
|
152
|
+
composition: dict — 糖鎖組成
|
|
153
|
+
例: {"Hex": 5, "HexNAc": 4, "Fuc": 1,
|
|
154
|
+
"NeuAc": 2}
|
|
155
|
+
ion_type: str — イオン種
|
|
156
|
+
"""
|
|
157
|
+
monosaccharide_mass = {
|
|
158
|
+
"Hex": 162.0528,
|
|
159
|
+
"HexNAc": 203.0794,
|
|
160
|
+
"Fuc": 146.0579,
|
|
161
|
+
"NeuAc": 291.0954,
|
|
162
|
+
"NeuGc": 307.0903,
|
|
163
|
+
"Pent": 132.0423,
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
adducts = {
|
|
167
|
+
"[M+Na]+": 22.9892,
|
|
168
|
+
"[M+H]+": 1.0073,
|
|
169
|
+
"[M+K]+": 38.9632,
|
|
170
|
+
"[M-H]-": -1.0073,
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
total_mass = 18.0106 # water
|
|
174
|
+
for sugar, count in composition.items():
|
|
175
|
+
if sugar in monosaccharide_mass:
|
|
176
|
+
total_mass += (monosaccharide_mass[sugar]
|
|
177
|
+
* count)
|
|
178
|
+
|
|
179
|
+
adduct = adducts.get(ion_type, 22.9892)
|
|
180
|
+
precursor_mz = total_mass + adduct
|
|
181
|
+
|
|
182
|
+
# Y-type fragments (reducing end)
|
|
183
|
+
fragments = []
|
|
184
|
+
for sugar, count in composition.items():
|
|
185
|
+
if sugar not in monosaccharide_mass:
|
|
186
|
+
continue
|
|
187
|
+
for i in range(1, count + 1):
|
|
188
|
+
loss = monosaccharide_mass[sugar] * i
|
|
189
|
+
frag_mz = precursor_mz - loss
|
|
190
|
+
fragments.append({
|
|
191
|
+
"type": f"Y (loss {i}x{sugar})",
|
|
192
|
+
"mz": round(frag_mz, 4),
|
|
193
|
+
"loss": round(loss, 4),
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
df = pd.DataFrame(fragments).sort_values(
|
|
197
|
+
"mz", ascending=False)
|
|
198
|
+
print(f"Glycan fragmentation: "
|
|
199
|
+
f"precursor={precursor_mz:.4f}, "
|
|
200
|
+
f"{len(df)} fragments")
|
|
201
|
+
return df
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
## 4. 糖鎖解析統合パイプライン
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
def glycomics_pipeline(uniprot_ids,
|
|
208
|
+
output_dir="results"):
|
|
209
|
+
"""
|
|
210
|
+
糖鎖解析統合パイプライン。
|
|
211
|
+
|
|
212
|
+
Parameters:
|
|
213
|
+
uniprot_ids: list[str] — UniProt ID リスト
|
|
214
|
+
output_dir: str — 出力ディレクトリ
|
|
215
|
+
"""
|
|
216
|
+
from pathlib import Path
|
|
217
|
+
output_dir = Path(output_dir)
|
|
218
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
219
|
+
|
|
220
|
+
# 1) グリコサイトマッピング
|
|
221
|
+
all_sites = []
|
|
222
|
+
for uid in uniprot_ids:
|
|
223
|
+
sites = glygen_protein_glycosylation(uid)
|
|
224
|
+
sites["protein"] = uid
|
|
225
|
+
all_sites.append(sites)
|
|
226
|
+
if all_sites:
|
|
227
|
+
sites_df = pd.concat(all_sites,
|
|
228
|
+
ignore_index=True)
|
|
229
|
+
sites_df.to_csv(
|
|
230
|
+
output_dir / "glycosites.csv",
|
|
231
|
+
index=False)
|
|
232
|
+
|
|
233
|
+
# 2) 糖鎖詳細取得
|
|
234
|
+
unique_glycans = set()
|
|
235
|
+
for df in all_sites:
|
|
236
|
+
if not df.empty:
|
|
237
|
+
unique_glycans.update(
|
|
238
|
+
df["glytoucan_ac"].dropna().unique())
|
|
239
|
+
|
|
240
|
+
glycan_details = []
|
|
241
|
+
for gac in list(unique_glycans)[:50]:
|
|
242
|
+
if gac:
|
|
243
|
+
detail = glygen_glycan_detail(gac)
|
|
244
|
+
if detail:
|
|
245
|
+
glycan_details.append(detail)
|
|
246
|
+
if glycan_details:
|
|
247
|
+
gdf = pd.DataFrame(glycan_details)
|
|
248
|
+
gdf.to_csv(
|
|
249
|
+
output_dir / "glycan_details.csv",
|
|
250
|
+
index=False)
|
|
251
|
+
|
|
252
|
+
print(f"Glycomics pipeline → {output_dir}")
|
|
253
|
+
return {"sites": sites_df if all_sites else
|
|
254
|
+
pd.DataFrame()}
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
---
|
|
258
|
+
|
|
259
|
+
## パイプライン統合
|
|
260
|
+
|
|
261
|
+
```
|
|
262
|
+
proteomics-mass-spectrometry → glycomics → pathway-enrichment
|
|
263
|
+
(LC-MS/MS PTM 同定) (糖鎖構造) (糖鎖パスウェイ)
|
|
264
|
+
│ │ ↓
|
|
265
|
+
protein-structure-analysis ────┘ immunoinformatics
|
|
266
|
+
(糖鎖結合サイト構造) (抗体グリコシル化)
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
## パイプライン出力
|
|
270
|
+
|
|
271
|
+
| ファイル | 説明 | 次スキル |
|
|
272
|
+
|---------|------|---------|
|
|
273
|
+
| `results/glycosites.csv` | グリコシル化部位 | → protein-structure-analysis |
|
|
274
|
+
| `results/glycan_details.csv` | 糖鎖詳細 | → pathway-enrichment |
|
|
@@ -9,6 +9,15 @@ tu_tools:
|
|
|
9
9
|
- key: iedb
|
|
10
10
|
name: IEDB
|
|
11
11
|
description: 免疫エピトープデータベース
|
|
12
|
+
- key: imgt
|
|
13
|
+
name: IMGT
|
|
14
|
+
description: 国際免疫遺伝学情報システム
|
|
15
|
+
- key: sabdab
|
|
16
|
+
name: SAbDab
|
|
17
|
+
description: 構造抗体データベース
|
|
18
|
+
- key: therasabdab
|
|
19
|
+
name: TheraSAbDab
|
|
20
|
+
description: 治療用抗体構造データベース
|
|
12
21
|
---
|
|
13
22
|
|
|
14
23
|
# Scientific Immunoinformatics
|