@nahisaho/satori 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +70 -39
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-biothings-idmapping/SKILL.md +4 -0
  4. package/src/.github/skills/scientific-cellxgene-census/SKILL.md +257 -0
  5. package/src/.github/skills/scientific-clingen-curation/SKILL.md +258 -0
  6. package/src/.github/skills/scientific-clinical-nlp/SKILL.md +250 -0
  7. package/src/.github/skills/scientific-clinical-pharmacology/SKILL.md +361 -0
  8. package/src/.github/skills/scientific-clinical-standards/SKILL.md +444 -0
  9. package/src/.github/skills/scientific-crispr-design/SKILL.md +369 -0
  10. package/src/.github/skills/scientific-drug-repurposing/SKILL.md +4 -0
  11. package/src/.github/skills/scientific-environmental-ecology/SKILL.md +5 -0
  12. package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +5 -0
  13. package/src/.github/skills/scientific-epigenomics-chromatin/SKILL.md +5 -0
  14. package/src/.github/skills/scientific-glycomics/SKILL.md +274 -0
  15. package/src/.github/skills/scientific-gtex-tissue-expression/SKILL.md +5 -2
  16. package/src/.github/skills/scientific-hgnc-nomenclature/SKILL.md +282 -0
  17. package/src/.github/skills/scientific-human-cell-atlas/SKILL.md +3 -0
  18. package/src/.github/skills/scientific-human-protein-atlas/SKILL.md +4 -0
  19. package/src/.github/skills/scientific-immunoinformatics/SKILL.md +9 -0
  20. package/src/.github/skills/scientific-lipidomics/SKILL.md +284 -0
  21. package/src/.github/skills/scientific-metabolomics/SKILL.md +3 -0
  22. package/src/.github/skills/scientific-metabolomics-network/SKILL.md +311 -0
  23. package/src/.github/skills/scientific-metagenome-assembled-genomes/SKILL.md +299 -0
  24. package/src/.github/skills/scientific-model-organism-db/SKILL.md +8 -0
  25. package/src/.github/skills/scientific-pharmacogenomics/SKILL.md +4 -0
  26. package/src/.github/skills/scientific-pharos-targets/SKILL.md +276 -0
  27. package/src/.github/skills/scientific-protein-structure-analysis/SKILL.md +4 -0
  28. package/src/.github/skills/scientific-public-health-data/SKILL.md +11 -0
  29. package/src/.github/skills/scientific-systems-biology/SKILL.md +11 -0
  30. package/src/.github/skills/scientific-variant-effect-prediction/SKILL.md +7 -0
@@ -0,0 +1,369 @@
1
+ ---
2
+ name: scientific-crispr-design
3
+ description: |
4
+ CRISPR gRNA 設計スキル。Cas9/Cas12a PAM 配列検索・
5
+ オフターゲットスコアリング (CFD/MIT)・
6
+ CRISPRscan/Rule Set 2 活性予測・検証プライマー設計・
7
+ sgRNA スクリーニングライブラリ構築パイプライン。
8
+ TU 外スキル (Python ライブラリ + ローカル解析)。
9
+ ---
10
+
11
+ # Scientific CRISPR Design
12
+
13
+ CRISPR gRNA 設計・オフターゲット評価・活性予測を統合した
14
+ 効率的なガイド RNA 選択パイプラインを提供する。
15
+
16
+ ## When to Use
17
+
18
+ - CRISPR-Cas9/Cas12a の gRNA を設計するとき
19
+ - PAM 配列検索とガイド候補の列挙を行うとき
20
+ - オフターゲットスコア (CFD/MIT) で安全性を評価するとき
21
+ - gRNA 活性スコア (CRISPRscan/Rule Set 2) で効率を予測するとき
22
+ - CRISPR スクリーニングライブラリを構築するとき
23
+ - 検証用 PCR プライマーを設計するとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. PAM 配列検索・gRNA 候補列挙
30
+
31
+ ```python
32
+ import re
33
+ import pandas as pd
34
+ from Bio import SeqIO
35
+ from Bio.Seq import Seq
36
+
37
+
38
+ # PAM パターン定義
39
+ PAM_PATTERNS = {
40
+ "SpCas9": {"pam": "NGG", "guide_len": 20,
41
+ "pam_side": "3prime"},
42
+ "SaCas9": {"pam": "NNGRRT", "guide_len": 21,
43
+ "pam_side": "3prime"},
44
+ "Cas12a": {"pam": "TTTV", "guide_len": 23,
45
+ "pam_side": "5prime"},
46
+ "xCas9": {"pam": "NG", "guide_len": 20,
47
+ "pam_side": "3prime"},
48
+ }
49
+
50
+
51
+ def iupac_to_regex(pam):
52
+ """IUPAC → 正規表現変換。"""
53
+ iupac = {
54
+ "N": "[ACGT]", "R": "[AG]", "Y": "[CT]",
55
+ "S": "[GC]", "W": "[AT]", "K": "[GT]",
56
+ "M": "[AC]", "B": "[CGT]", "D": "[AGT]",
57
+ "H": "[ACT]", "V": "[ACG]",
58
+ }
59
+ return "".join(iupac.get(c, c) for c in pam)
60
+
61
+
62
+ def find_grna_candidates(sequence, cas_type="SpCas9",
63
+ strand="both"):
64
+ """
65
+ gRNA 候補の列挙。
66
+
67
+ Parameters:
68
+ sequence: str — 標的 DNA 配列
69
+ cas_type: str — Cas タイプ
70
+ strand: str — "sense"/"antisense"/"both"
71
+ """
72
+ config = PAM_PATTERNS[cas_type]
73
+ pam_re = iupac_to_regex(config["pam"])
74
+ gl = config["guide_len"]
75
+ side = config["pam_side"]
76
+ seq = sequence.upper()
77
+
78
+ candidates = []
79
+
80
+ def _search_strand(s, s_name):
81
+ for m in re.finditer(
82
+ f"(?=({pam_re}))", s):
83
+ pos = m.start()
84
+ if side == "3prime":
85
+ start = pos - gl
86
+ if start < 0:
87
+ continue
88
+ guide = s[start:pos]
89
+ else: # 5prime
90
+ start = pos + len(config["pam"])
91
+ end = start + gl
92
+ if end > len(s):
93
+ continue
94
+ guide = s[start:end]
95
+
96
+ if len(guide) != gl:
97
+ continue
98
+
99
+ gc = (guide.count("G")
100
+ + guide.count("C")) / gl
101
+
102
+ candidates.append({
103
+ "guide": guide,
104
+ "pam": m.group(1),
105
+ "position": pos,
106
+ "strand": s_name,
107
+ "gc_content": round(gc, 3),
108
+ "length": gl,
109
+ })
110
+
111
+ if strand in ("sense", "both"):
112
+ _search_strand(seq, "+")
113
+ if strand in ("antisense", "both"):
114
+ rc = str(Seq(seq).reverse_complement())
115
+ _search_strand(rc, "-")
116
+
117
+ df = pd.DataFrame(candidates)
118
+
119
+ # GC フィルタ (30-70%)
120
+ if not df.empty:
121
+ df = df[(df["gc_content"] >= 0.30)
122
+ & (df["gc_content"] <= 0.70)]
123
+
124
+ print(f"CRISPR {cas_type}: "
125
+ f"{len(df)} gRNA candidates "
126
+ f"(GC 30-70%)")
127
+ return df.reset_index(drop=True)
128
+ ```
129
+
130
+ ## 2. オフターゲットスコアリング
131
+
132
+ ```python
133
+ import numpy as np
134
+
135
+
136
+ # CFD スコア簡易実装 (Doench 2016)
137
+ def cfd_score(guide, off_target):
138
+ """
139
+ CFD (Cutting Frequency Determination) スコア。
140
+
141
+ Parameters:
142
+ guide: str — gRNA 配列 (20nt)
143
+ off_target: str — オフターゲットサイト
144
+ """
145
+ # ポジション別ミスマッチペナルティ (簡易版)
146
+ mm_penalty = {
147
+ 1: 0.0, 2: 0.0, 3: 0.014, 4: 0.0,
148
+ 5: 0.0, 6: 0.395, 7: 0.317, 8: 0.0,
149
+ 9: 0.389, 10: 0.079, 11: 0.445,
150
+ 12: 0.508, 13: 0.613, 14: 0.851,
151
+ 15: 0.732, 16: 0.828, 17: 0.615,
152
+ 18: 0.804, 19: 0.685, 20: 0.583,
153
+ }
154
+
155
+ score = 1.0
156
+ for i in range(min(len(guide),
157
+ len(off_target))):
158
+ if guide[i] != off_target[i]:
159
+ pos = i + 1
160
+ penalty = mm_penalty.get(pos, 0.5)
161
+ score *= (1.0 - penalty)
162
+
163
+ return round(score, 4)
164
+
165
+
166
+ def score_off_targets(guide, genome_fasta,
167
+ max_mismatches=4):
168
+ """
169
+ ゲノムワイドオフターゲットスコアリング。
170
+
171
+ Parameters:
172
+ guide: str — gRNA 配列
173
+ genome_fasta: str — リファレンスゲノム
174
+ max_mismatches: int — 最大ミスマッチ数
175
+ """
176
+ results = []
177
+ gl = len(guide)
178
+ guide_upper = guide.upper()
179
+
180
+ for record in SeqIO.parse(
181
+ genome_fasta, "fasta"):
182
+ seq = str(record.seq).upper()
183
+ for i in range(len(seq) - gl - 3):
184
+ site = seq[i:i + gl]
185
+ pam = seq[i + gl:i + gl + 3]
186
+ if not re.match("[ACGT]GG", pam):
187
+ continue
188
+
189
+ mm = sum(1 for a, b in
190
+ zip(guide_upper, site)
191
+ if a != b)
192
+ if mm <= max_mismatches:
193
+ results.append({
194
+ "chrom": record.id,
195
+ "position": i,
196
+ "site": site,
197
+ "pam": pam,
198
+ "mismatches": mm,
199
+ "cfd_score": cfd_score(
200
+ guide_upper, site),
201
+ })
202
+
203
+ df = pd.DataFrame(results)
204
+ df = df.sort_values("cfd_score",
205
+ ascending=False)
206
+ print(f"Off-target: {len(df)} sites "
207
+ f"(≤{max_mismatches} mm)")
208
+ return df
209
+ ```
210
+
211
+ ## 3. gRNA 活性予測
212
+
213
+ ```python
214
+ def rule_set2_score(guide_30mer):
215
+ """
216
+ Rule Set 2 活性スコア (Doench 2016 簡易版)。
217
+
218
+ Parameters:
219
+ guide_30mer: str — 30nt 配列
220
+ (4nt upstream + 20nt guide + 3nt PAM
221
+ + 3nt downstream)
222
+ """
223
+ seq = guide_30mer.upper()
224
+ if len(seq) != 30:
225
+ print(f"Warning: expected 30nt, "
226
+ f"got {len(seq)}")
227
+ return 0.0
228
+
229
+ guide = seq[4:24]
230
+ gc = (guide.count("G")
231
+ + guide.count("C")) / 20
232
+
233
+ # 位置重み付きスコア (簡易)
234
+ score = 0.5
235
+
236
+ # GC 最適範囲
237
+ if 0.40 <= gc <= 0.70:
238
+ score += 0.1
239
+ elif gc < 0.30 or gc > 0.80:
240
+ score -= 0.2
241
+
242
+ # PAM 近傍優先塩基
243
+ if guide[-1] == "G":
244
+ score += 0.05
245
+ if guide[-4] == "C":
246
+ score += 0.03
247
+
248
+ # ポリ T 回避 (Pol III 終結)
249
+ if "TTTT" in guide:
250
+ score -= 0.3
251
+
252
+ return round(max(0, min(1, score)), 3)
253
+
254
+
255
+ def rank_grnas(candidates_df, genome_fasta=None):
256
+ """
257
+ gRNA 候補ランキング。
258
+
259
+ Parameters:
260
+ candidates_df: pd.DataFrame — gRNA 候補
261
+ genome_fasta: str | None — オフタ解析用
262
+ """
263
+ df = candidates_df.copy()
264
+
265
+ # 活性スコア (30mer が無い場合は guide のみ)
266
+ df["activity_score"] = df["guide"].apply(
267
+ lambda g: rule_set2_score(
268
+ "AAAA" + g + "GGGNNN"
269
+ if len(g) == 20
270
+ else g.ljust(30, "N")))
271
+
272
+ # オフターゲット (ゲノムがあれば)
273
+ if genome_fasta:
274
+ ot_scores = []
275
+ for guide in df["guide"]:
276
+ ot = score_off_targets(
277
+ guide, genome_fasta, 3)
278
+ specificity = (
279
+ 1.0 / (1.0 + len(ot))
280
+ if not ot.empty else 1.0)
281
+ ot_scores.append(round(specificity, 3))
282
+ df["specificity"] = ot_scores
283
+ else:
284
+ df["specificity"] = 1.0
285
+
286
+ # 総合スコア
287
+ df["composite_score"] = (
288
+ df["activity_score"] * 0.5
289
+ + df["specificity"] * 0.3
290
+ + df["gc_content"].clip(0.4, 0.6) * 0.2
291
+ ).round(3)
292
+
293
+ df = df.sort_values("composite_score",
294
+ ascending=False)
295
+ print(f"gRNA ranking: top score = "
296
+ f"{df['composite_score'].iloc[0]}")
297
+ return df
298
+ ```
299
+
300
+ ## 4. sgRNA ライブラリ構築
301
+
302
+ ```python
303
+ def build_sgrna_library(gene_list,
304
+ genome_fasta,
305
+ guides_per_gene=4,
306
+ cas_type="SpCas9"):
307
+ """
308
+ スクリーニング用 sgRNA ライブラリ構築。
309
+
310
+ Parameters:
311
+ gene_list: list[dict] — 遺伝子リスト
312
+ [{"gene": "TP53", "sequence": "ATCG..."}]
313
+ genome_fasta: str — リファレンスゲノム
314
+ guides_per_gene: int — 遺伝子あたり gRNA 数
315
+ cas_type: str — Cas タイプ
316
+ """
317
+ library = []
318
+
319
+ for gene_info in gene_list:
320
+ gene = gene_info["gene"]
321
+ seq = gene_info["sequence"]
322
+
323
+ candidates = find_grna_candidates(
324
+ seq, cas_type)
325
+
326
+ if candidates.empty:
327
+ print(f" {gene}: no candidates")
328
+ continue
329
+
330
+ ranked = rank_grnas(candidates)
331
+ top = ranked.head(guides_per_gene)
332
+
333
+ for _, row in top.iterrows():
334
+ library.append({
335
+ "gene": gene,
336
+ "guide": row["guide"],
337
+ "position": row["position"],
338
+ "strand": row["strand"],
339
+ "gc_content": row["gc_content"],
340
+ "activity": row["activity_score"],
341
+ "composite": row["composite_score"],
342
+ })
343
+
344
+ df = pd.DataFrame(library)
345
+ n_genes = df["gene"].nunique()
346
+ print(f"Library: {len(df)} sgRNAs for "
347
+ f"{n_genes} genes")
348
+ return df
349
+ ```
350
+
351
+ ---
352
+
353
+ ## パイプライン統合
354
+
355
+ ```
356
+ genome-sequence-tools → crispr-design → perturbation-analysis
357
+ (ゲノム配列取得) (gRNA 設計) (摂動実験解析)
358
+ │ │ ↓
359
+ variant-effect-prediction ─┘ functional-genomics
360
+ (変異影響予測) (機能ゲノミクス)
361
+ ```
362
+
363
+ ## パイプライン出力
364
+
365
+ | ファイル | 説明 | 次スキル |
366
+ |---------|------|---------|
367
+ | `grna_candidates.csv` | gRNA 候補リスト | → ランキング |
368
+ | `off_target_report.csv` | オフターゲット評価 | → 安全性確認 |
369
+ | `sgrna_library.csv` | sgRNA ライブラリ | → perturbation-analysis |
@@ -5,6 +5,10 @@ description: |
5
5
  パラダイムに準拠し、7 つの戦略(ターゲット型、化合物型、疾患駆動型、メカニズム型、
6
6
  ネットワーク型、表現型、構造型)で候補を体系的に探索。
7
7
  「ドラッグリポジショニングして」「既存薬の新規適応を探して」で発火。
8
+ tu_tools:
9
+ - key: pharos
10
+ name: Pharos
11
+ description: IDG Pharos/TCRD ターゲットナレッジベース
8
12
  ---
9
13
 
10
14
  # Scientific Drug Repurposing
@@ -4,6 +4,11 @@ description: |
4
4
  環境科学・生態学解析スキル。種分布モデリング(SDM / MaxEnt)・
5
5
  生物多様性指標(α/β/γ 多様性)・群集構造解析(NMDS/CCA/RDA)・
6
6
  生態学的ニッチモデリング・保全優先順位評価・OBIS/GBIF データ統合パイプライン。
7
+ ToolUniverse 連携: gbif。
8
+ tu_tools:
9
+ - key: gbif
10
+ name: GBIF
11
+ description: 地球規模生物多様性情報ファシリティ
7
12
  ---
8
13
 
9
14
  # Scientific Environmental Ecology
@@ -5,6 +5,11 @@ description: |
5
5
  リスク指標(RR/OR/HR/NNT)・標準化死亡比(SMR)・年齢調整率・
6
6
  空間疫学(GIS / 空間クラスタリング)・因果推論ダイアグラム(DAG)・
7
7
  WHO/CDC/EU 公衆衛生データ統合パイプライン。
8
+ ToolUniverse 連携: who_gho。
9
+ tu_tools:
10
+ - key: who_gho
11
+ name: WHO GHO
12
+ description: WHO Global Health Observatory 健康統計 API
8
13
  ---
9
14
 
10
15
  # Scientific Epidemiology & Public Health
@@ -6,6 +6,11 @@ description: |
6
6
  ヒストン修飾クロマチン状態モデリング (ChromHMM)、Hi-C 接触マップ・TAD 検出、
7
7
  転写因子結合サイト予測 (モチーフ濃縮)、差次結合解析 (DiffBind) を統合した
8
8
  計算エピゲノミクスパイプライン。ChIP-Atlas 43 万+実験との連携対応。
9
+ ToolUniverse 連携: chipatlas。
10
+ tu_tools:
11
+ - key: chipatlas
12
+ name: ChIP-Atlas
13
+ description: ChIP-Atlas エピゲノミクスエンリッチメント解析 (43万+実験)
9
14
  ---
10
15
 
11
16
  # Scientific Epigenomics & Chromatin Biology
@@ -0,0 +1,274 @@
1
+ ---
2
+ name: scientific-glycomics
3
+ description: |
4
+ 糖鎖構造解析スキル。GlyConnect / GlyGen / GlyCosmos
5
+ 糖鎖データベース統合検索・糖鎖構造描画・糖タンパク質
6
+ グリコシル化部位予測・レクチンバインディング・
7
+ 糖鎖マスフラグメンテーション解析パイプライン。
8
+ TU 外スキル (直接 REST API + Python ライブラリ)。
9
+ ---
10
+
11
+ # Scientific Glycomics
12
+
13
+ GlyConnect / GlyGen / GlyCosmos 糖鎖データベースを統合した
14
+ 糖鎖構造解析・糖タンパク質グリコサイト予測・レクチン特異性・
15
+ 糖鎖 MS フラグメンテーション解析パイプラインを提供する。
16
+
17
+ ## When to Use
18
+
19
+ - 糖鎖構造を GlyTouCan ID から検索・描画するとき
20
+ - タンパク質のグリコシル化部位を予測・マッピングするとき
21
+ - GlyGen/GlyConnect で糖鎖-タンパク質関連を検索するとき
22
+ - 糖鎖マススペクトルのフラグメンテーション解析を行うとき
23
+ - レクチン-糖鎖結合特異性を調査するとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. GlyGen 糖鎖検索
30
+
31
+ ```python
32
+ import requests
33
+ import pandas as pd
34
+
35
+ GLYGEN_API = "https://api.glygen.org"
36
+
37
+
38
+ def glygen_glycan_search(glycan_type=None,
39
+ mass_range=None):
40
+ """
41
+ GlyGen — 糖鎖検索。
42
+
43
+ Parameters:
44
+ glycan_type: str | None — 糖鎖タイプ
45
+ ("N-linked", "O-linked", "GAG" 等)
46
+ mass_range: tuple | None — (min_mass, max_mass)
47
+ """
48
+ query = {}
49
+ if glycan_type:
50
+ query["glycan_type"] = glycan_type
51
+ if mass_range:
52
+ query["mass"] = {
53
+ "min": mass_range[0],
54
+ "max": mass_range[1]}
55
+
56
+ url = f"{GLYGEN_API}/glycan/search"
57
+ resp = requests.post(url, json=query, timeout=30)
58
+ resp.raise_for_status()
59
+ data = resp.json()
60
+
61
+ results = data.get("results", [])
62
+ rows = []
63
+ for r in results:
64
+ rows.append({
65
+ "glytoucan_ac": r.get("glytoucan_ac", ""),
66
+ "mass": r.get("mass", 0),
67
+ "glycan_type": r.get("glycan_type", ""),
68
+ "composition": r.get(
69
+ "composition", ""),
70
+ })
71
+
72
+ df = pd.DataFrame(rows)
73
+ print(f"GlyGen search: {len(df)} glycans found")
74
+ return df
75
+
76
+
77
+ def glygen_glycan_detail(glytoucan_ac):
78
+ """
79
+ GlyGen — 糖鎖詳細情報取得。
80
+
81
+ Parameters:
82
+ glytoucan_ac: str — GlyTouCan アクセッション
83
+ """
84
+ url = f"{GLYGEN_API}/glycan/detail/{glytoucan_ac}"
85
+ resp = requests.get(url, timeout=30)
86
+ resp.raise_for_status()
87
+ data = resp.json()
88
+
89
+ info = {
90
+ "glytoucan_ac": data.get("glytoucan_ac", ""),
91
+ "mass": data.get("mass", 0),
92
+ "glycan_type": data.get("glycan_type", ""),
93
+ "iupac": data.get("iupac", ""),
94
+ "glycoct": data.get("glycoct", ""),
95
+ "species": [s.get("name", "")
96
+ for s in data.get("species", [])],
97
+ "proteins": len(data.get("glycoprotein", [])),
98
+ }
99
+
100
+ print(f"GlyGen: {glytoucan_ac} → "
101
+ f"type={info['glycan_type']}, "
102
+ f"mass={info['mass']:.1f}, "
103
+ f"proteins={info['proteins']}")
104
+ return info
105
+ ```
106
+
107
+ ## 2. 糖タンパク質グリコサイト検索
108
+
109
+ ```python
110
+ def glygen_protein_glycosylation(uniprot_ac):
111
+ """
112
+ GlyGen — タンパク質グリコシル化部位取得。
113
+
114
+ Parameters:
115
+ uniprot_ac: str — UniProt アクセッション
116
+ """
117
+ url = f"{GLYGEN_API}/protein/detail/{uniprot_ac}"
118
+ resp = requests.get(url, timeout=30)
119
+ resp.raise_for_status()
120
+ data = resp.json()
121
+
122
+ sites = data.get("glycosylation", [])
123
+ rows = []
124
+ for site in sites:
125
+ rows.append({
126
+ "position": site.get("position", 0),
127
+ "type": site.get("type", ""),
128
+ "glytoucan_ac": site.get(
129
+ "glytoucan_ac", ""),
130
+ "residue": site.get("residue", ""),
131
+ "evidence": site.get("evidence", ""),
132
+ })
133
+
134
+ df = pd.DataFrame(rows)
135
+ print(f"GlyGen glycosites: {uniprot_ac} → "
136
+ f"{len(df)} sites")
137
+ return df
138
+ ```
139
+
140
+ ## 3. 糖鎖 MS フラグメンテーション
141
+
142
+ ```python
143
+ import numpy as np
144
+
145
+
146
+ def glycan_fragmentation(composition,
147
+ ion_type="[M+Na]+"):
148
+ """
149
+ 糖鎖 MS フラグメンテーション予測。
150
+
151
+ Parameters:
152
+ composition: dict — 糖鎖組成
153
+ 例: {"Hex": 5, "HexNAc": 4, "Fuc": 1,
154
+ "NeuAc": 2}
155
+ ion_type: str — イオン種
156
+ """
157
+ monosaccharide_mass = {
158
+ "Hex": 162.0528,
159
+ "HexNAc": 203.0794,
160
+ "Fuc": 146.0579,
161
+ "NeuAc": 291.0954,
162
+ "NeuGc": 307.0903,
163
+ "Pent": 132.0423,
164
+ }
165
+
166
+ adducts = {
167
+ "[M+Na]+": 22.9892,
168
+ "[M+H]+": 1.0073,
169
+ "[M+K]+": 38.9632,
170
+ "[M-H]-": -1.0073,
171
+ }
172
+
173
+ total_mass = 18.0106 # water
174
+ for sugar, count in composition.items():
175
+ if sugar in monosaccharide_mass:
176
+ total_mass += (monosaccharide_mass[sugar]
177
+ * count)
178
+
179
+ adduct = adducts.get(ion_type, 22.9892)
180
+ precursor_mz = total_mass + adduct
181
+
182
+ # Y-type fragments (reducing end)
183
+ fragments = []
184
+ for sugar, count in composition.items():
185
+ if sugar not in monosaccharide_mass:
186
+ continue
187
+ for i in range(1, count + 1):
188
+ loss = monosaccharide_mass[sugar] * i
189
+ frag_mz = precursor_mz - loss
190
+ fragments.append({
191
+ "type": f"Y (loss {i}x{sugar})",
192
+ "mz": round(frag_mz, 4),
193
+ "loss": round(loss, 4),
194
+ })
195
+
196
+ df = pd.DataFrame(fragments).sort_values(
197
+ "mz", ascending=False)
198
+ print(f"Glycan fragmentation: "
199
+ f"precursor={precursor_mz:.4f}, "
200
+ f"{len(df)} fragments")
201
+ return df
202
+ ```
203
+
204
+ ## 4. 糖鎖解析統合パイプライン
205
+
206
+ ```python
207
+ def glycomics_pipeline(uniprot_ids,
208
+ output_dir="results"):
209
+ """
210
+ 糖鎖解析統合パイプライン。
211
+
212
+ Parameters:
213
+ uniprot_ids: list[str] — UniProt ID リスト
214
+ output_dir: str — 出力ディレクトリ
215
+ """
216
+ from pathlib import Path
217
+ output_dir = Path(output_dir)
218
+ output_dir.mkdir(parents=True, exist_ok=True)
219
+
220
+ # 1) グリコサイトマッピング
221
+ all_sites = []
222
+ for uid in uniprot_ids:
223
+ sites = glygen_protein_glycosylation(uid)
224
+ sites["protein"] = uid
225
+ all_sites.append(sites)
226
+ if all_sites:
227
+ sites_df = pd.concat(all_sites,
228
+ ignore_index=True)
229
+ sites_df.to_csv(
230
+ output_dir / "glycosites.csv",
231
+ index=False)
232
+
233
+ # 2) 糖鎖詳細取得
234
+ unique_glycans = set()
235
+ for df in all_sites:
236
+ if not df.empty:
237
+ unique_glycans.update(
238
+ df["glytoucan_ac"].dropna().unique())
239
+
240
+ glycan_details = []
241
+ for gac in list(unique_glycans)[:50]:
242
+ if gac:
243
+ detail = glygen_glycan_detail(gac)
244
+ if detail:
245
+ glycan_details.append(detail)
246
+ if glycan_details:
247
+ gdf = pd.DataFrame(glycan_details)
248
+ gdf.to_csv(
249
+ output_dir / "glycan_details.csv",
250
+ index=False)
251
+
252
+ print(f"Glycomics pipeline → {output_dir}")
253
+ return {"sites": sites_df if all_sites else
254
+ pd.DataFrame()}
255
+ ```
256
+
257
+ ---
258
+
259
+ ## パイプライン統合
260
+
261
+ ```
262
+ proteomics-mass-spectrometry → glycomics → pathway-enrichment
263
+ (LC-MS/MS PTM 同定) (糖鎖構造) (糖鎖パスウェイ)
264
+ │ │ ↓
265
+ protein-structure-analysis ────┘ immunoinformatics
266
+ (糖鎖結合サイト構造) (抗体グリコシル化)
267
+ ```
268
+
269
+ ## パイプライン出力
270
+
271
+ | ファイル | 説明 | 次スキル |
272
+ |---------|------|---------|
273
+ | `results/glycosites.csv` | グリコシル化部位 | → protein-structure-analysis |
274
+ | `results/glycan_details.csv` | 糖鎖詳細 | → pathway-enrichment |
@@ -3,8 +3,11 @@ name: scientific-gtex-tissue-expression
3
3
  description: |
4
4
  GTEx 組織発現スキル。GTEx Portal REST API v2 による
5
5
  組織特異的遺伝子発現パターン解析・eQTL ルックアップ・
6
- 多組織比較。直接 API (ToolUniverse 非連携)
7
- tu_tools: []
6
+ 多組織比較。ToolUniverse 連携: gtex_v2
7
+ tu_tools:
8
+ - key: gtex_v2
9
+ name: GTEx v2
10
+ description: GTEx Portal REST API v2 組織特異的発現・eQTL
8
11
  ---
9
12
 
10
13
  # Scientific GTEx Tissue Expression