@nahisaho/satori 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +138 -2
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-admet-pharmacokinetics/SKILL.md +14 -0
  4. package/src/.github/skills/scientific-bioinformatics/SKILL.md +13 -0
  5. package/src/.github/skills/scientific-cheminformatics/SKILL.md +13 -0
  6. package/src/.github/skills/scientific-citation-checker/SKILL.md +12 -0
  7. package/src/.github/skills/scientific-clinical-decision-support/SKILL.md +14 -0
  8. package/src/.github/skills/scientific-deep-research/SKILL.md +15 -0
  9. package/src/.github/skills/scientific-disease-research/SKILL.md +14 -0
  10. package/src/.github/skills/scientific-drug-repurposing/SKILL.md +14 -0
  11. package/src/.github/skills/scientific-drug-target-profiling/SKILL.md +14 -0
  12. package/src/.github/skills/scientific-environmental-ecology/SKILL.md +295 -0
  13. package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +332 -0
  14. package/src/.github/skills/scientific-grant-writing/SKILL.md +12 -0
  15. package/src/.github/skills/scientific-graph-neural-networks/SKILL.md +12 -0
  16. package/src/.github/skills/scientific-immunoinformatics/SKILL.md +341 -0
  17. package/src/.github/skills/scientific-infectious-disease/SKILL.md +342 -0
  18. package/src/.github/skills/scientific-meta-analysis/SKILL.md +11 -0
  19. package/src/.github/skills/scientific-metabolomics/SKILL.md +13 -0
  20. package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +349 -0
  21. package/src/.github/skills/scientific-multi-omics/SKILL.md +13 -0
  22. package/src/.github/skills/scientific-network-analysis/SKILL.md +13 -0
  23. package/src/.github/skills/scientific-pharmacovigilance/SKILL.md +15 -0
  24. package/src/.github/skills/scientific-population-genetics/SKILL.md +336 -0
  25. package/src/.github/skills/scientific-precision-oncology/SKILL.md +14 -0
  26. package/src/.github/skills/scientific-protein-design/SKILL.md +13 -0
  27. package/src/.github/skills/scientific-protein-structure-analysis/SKILL.md +13 -0
  28. package/src/.github/skills/scientific-sequence-analysis/SKILL.md +13 -0
  29. package/src/.github/skills/scientific-single-cell-genomics/SKILL.md +361 -0
  30. package/src/.github/skills/scientific-spatial-transcriptomics/SKILL.md +281 -0
  31. package/src/.github/skills/scientific-survival-clinical/SKILL.md +12 -0
  32. package/src/.github/skills/scientific-systems-biology/SKILL.md +310 -0
  33. package/src/.github/skills/scientific-text-mining-nlp/SKILL.md +358 -0
  34. package/src/.github/skills/scientific-variant-interpretation/SKILL.md +14 -0
@@ -0,0 +1,341 @@
1
+ ---
2
+ name: scientific-immunoinformatics
3
+ description: |
4
+ 免疫情報学スキル。エピトープ予測(MHC-I/II バインディング)・
5
+ T 細胞/B 細胞エピトープマッピング・抗体構造解析(CDR ループ)・
6
+ 免疫レパトア解析(TCR/BCR クロノタイプ)・ワクチン候補設計・
7
+ IEDB/IMGT/SAbDab データベース統合パイプライン。
8
+ ---
9
+
10
+ # Scientific Immunoinformatics
11
+
12
+ 免疫情報学(Immunoinformatics)に特化した解析パイプラインを提供する。
13
+ エピトープ予測、MHC 結合親和性推定、抗体配列・構造解析、
14
+ 免疫レパトア多様性解析、ワクチン候補優先順位付けを体系的に扱う。
15
+
16
+ ## When to Use
17
+
18
+ - ペプチド-MHC 結合親和性を予測するとき
19
+ - T 細胞 / B 細胞エピトープを同定・マッピングするとき
20
+ - TCR / BCR レパトア(クロノタイプ)多様性を解析するとき
21
+ - 抗体 CDR ループの構造モデリングを行うとき
22
+ - ワクチン候補アンチゲンの優先順位付けを行うとき
23
+
24
+ ---
25
+
26
+ ## Quick Start
27
+
28
+ ## 1. MHC-I バインディング予測
29
+
30
+ ```python
31
+ import numpy as np
32
+ import pandas as pd
33
+
34
+ def predict_mhc_binding(peptides, alleles, method="netmhcpan"):
35
+ """
36
+ MHC クラス I バインディング親和性予測。
37
+
38
+ method:
39
+ - "netmhcpan": NetMHCpan 4.1 — ペプチド-MHC 結合 IC50 予測
40
+ - "mhcflurry": MHCflurry 2.0 — ニューラルネットワークベース
41
+
42
+ 閾値:
43
+ - Strong binder: IC50 < 50 nM (または %Rank < 0.5)
44
+ - Weak binder: IC50 < 500 nM (または %Rank < 2.0)
45
+
46
+ Parameters:
47
+ peptides: ペプチド配列リスト(8-14 mer)
48
+ alleles: HLA アレルリスト (e.g., ["HLA-A*02:01", "HLA-B*07:02"])
49
+ """
50
+ from mhcflurry import Class1PresentationPredictor
51
+
52
+ predictor = Class1PresentationPredictor.load()
53
+
54
+ results = []
55
+ for peptide in peptides:
56
+ for allele in alleles:
57
+ pred = predictor.predict(peptides=[peptide], alleles=[allele],
58
+ verbose=0)
59
+ results.append({
60
+ "peptide": peptide,
61
+ "allele": allele,
62
+ "affinity_nM": pred["affinity"].values[0],
63
+ "percentile_rank": pred["affinity_percentile"].values[0],
64
+ "processing_score": pred["processing_score"].values[0],
65
+ "presentation_score": pred["presentation_score"].values[0],
66
+ })
67
+
68
+ df = pd.DataFrame(results)
69
+ df["binding_level"] = np.where(
70
+ df["affinity_nM"] < 50, "Strong",
71
+ np.where(df["affinity_nM"] < 500, "Weak", "Non-binder")
72
+ )
73
+
74
+ n_strong = (df["binding_level"] == "Strong").sum()
75
+ n_weak = (df["binding_level"] == "Weak").sum()
76
+ print(f" MHC-I: {n_strong} strong + {n_weak} weak binders / {len(df)} predictions")
77
+ return df
78
+ ```
79
+
80
+ ## 2. B 細胞エピトープ予測
81
+
82
+ ```python
83
+ def predict_bcell_epitopes(sequence, window_size=20, threshold=0.5):
84
+ """
85
+ B 細胞(線状)エピトープ予測。
86
+
87
+ 統合スコアリング:
88
+ 1. BepiPred 2.0: Random Forest ベース予測
89
+ 2. Parker hydrophilicity scale
90
+ 3. Emini surface accessibility
91
+ 4. Chou-Fasman β-turn prediction
92
+
93
+ combined_score = 0.4 * bepipred + 0.2 * hydrophilicity +
94
+ 0.2 * surface + 0.2 * beta_turn
95
+ """
96
+ from Bio.SeqUtils.ProtParam import ProteinAnalysis
97
+
98
+ pa = ProteinAnalysis(str(sequence))
99
+
100
+ # Parker hydrophilicity
101
+ hydrophilicity = pa.protein_scale(window=window_size,
102
+ param_dict="Parker")
103
+
104
+ # 簡易 B 細胞エピトープスコア
105
+ from Bio.SeqUtils.ProtParam import ProtParamData
106
+ flexibility = pa.flexibility()
107
+
108
+ epitopes = []
109
+ for i in range(len(sequence) - window_size + 1):
110
+ window = sequence[i:i + window_size]
111
+ score = np.mean([
112
+ hydrophilicity[i] if i < len(hydrophilicity) else 0,
113
+ flexibility[i] if i < len(flexibility) else 0,
114
+ ])
115
+ if score > threshold:
116
+ epitopes.append({
117
+ "start": i + 1,
118
+ "end": i + window_size,
119
+ "sequence": window,
120
+ "score": score,
121
+ })
122
+
123
+ df = pd.DataFrame(epitopes)
124
+ print(f" B-cell epitopes: {len(df)} predicted (threshold={threshold})")
125
+ return df
126
+ ```
127
+
128
+ ## 3. TCR/BCR レパトア解析
129
+
130
+ ```python
131
+ def repertoire_analysis(clonotype_df, chain="TRB",
132
+ clone_col="cdr3_aa", count_col="clone_count"):
133
+ """
134
+ TCR/BCR レパトア多様性解析。
135
+
136
+ 多様性指標:
137
+ - Shannon entropy: H = -Σ pᵢ log₂(pᵢ)
138
+ - Simpson index: D = 1 - Σ pᵢ²
139
+ - Chao1 estimator: S_est = S_obs + f₁²/(2·f₂)
140
+ - Clonality: 1 - H/log₂(N)
141
+ - Gini coefficient: 均等性の指標
142
+
143
+ Parameters:
144
+ clonotype_df: クロノタイプ DataFrame (cdr3_aa, clone_count)
145
+ chain: TCR/BCR 鎖 (TRA, TRB, IGH, IGL, IGK)
146
+ """
147
+ from scipy.stats import entropy
148
+
149
+ counts = clonotype_df[count_col].values
150
+ total = counts.sum()
151
+ freqs = counts / total
152
+
153
+ # Shannon entropy
154
+ H = entropy(freqs, base=2)
155
+ # Simpson index
156
+ D = 1 - np.sum(freqs ** 2)
157
+ # Clonality
158
+ n_clones = len(counts)
159
+ clonality = 1 - H / np.log2(n_clones) if n_clones > 1 else 0
160
+
161
+ # Chao1
162
+ f1 = np.sum(counts == 1) # singletons
163
+ f2 = np.sum(counts == 2) # doubletons
164
+ chao1 = n_clones + (f1 ** 2) / (2 * max(f2, 1))
165
+
166
+ # Gini coefficient
167
+ sorted_freqs = np.sort(freqs)
168
+ n = len(sorted_freqs)
169
+ gini = (2 * np.sum((np.arange(1, n + 1)) * sorted_freqs) / (n * np.sum(sorted_freqs))) - (n + 1) / n
170
+
171
+ # Top clones
172
+ top10 = clonotype_df.nlargest(10, count_col)
173
+
174
+ metrics = {
175
+ "chain": chain,
176
+ "n_clonotypes": n_clones,
177
+ "total_cells": int(total),
178
+ "shannon_entropy": round(H, 4),
179
+ "simpson_index": round(D, 4),
180
+ "clonality": round(clonality, 4),
181
+ "chao1": round(chao1, 1),
182
+ "gini": round(gini, 4),
183
+ "top1_frequency": round(freqs[0], 4) if len(freqs) > 0 else 0,
184
+ }
185
+
186
+ print(f" Repertoire ({chain}): {n_clones} clonotypes, "
187
+ f"Shannon={H:.3f}, Clonality={clonality:.3f}")
188
+ return metrics, top10
189
+ ```
190
+
191
+ ## 4. 抗体構造解析
192
+
193
+ ```python
194
+ def antibody_structure_analysis(vh_seq, vl_seq, numbering="imgt"):
195
+ """
196
+ 抗体可変領域の構造解析。
197
+
198
+ パイプライン:
199
+ 1. ANARCI ナンバリング(IMGT / Kabat / Chothia)
200
+ 2. CDR ループ同定(CDR-H1/H2/H3, CDR-L1/L2/L3)
201
+ 3. フレームワーク領域(FR1-FR4)抽出
202
+ 4. 発生確率・体細胞超変異(SHM)率推定
203
+ 5. ヒト化可能性スコア
204
+
205
+ CDR 定義(IMGT 方式):
206
+ CDR-H1: 26-33 (8 残基)
207
+ CDR-H2: 51-57 (7 残基)
208
+ CDR-H3: 93-102 (可変長)
209
+ """
210
+ from anarci import anarci
211
+
212
+ # ナンバリング
213
+ vh_numbered = anarci([("VH", vh_seq)], scheme=numbering)
214
+ vl_numbered = anarci([("VL", vl_seq)], scheme=numbering)
215
+
216
+ # CDR 抽出(IMGT 方式)
217
+ cdr_regions = {
218
+ "CDR-H1": (26, 33), "CDR-H2": (51, 57), "CDR-H3": (93, 102),
219
+ "CDR-L1": (27, 32), "CDR-L2": (50, 52), "CDR-L3": (89, 97),
220
+ }
221
+
222
+ cdrs = {}
223
+ for name, (start, end) in cdr_regions.items():
224
+ chain_data = vh_numbered if "H" in name else vl_numbered
225
+ seq = extract_region(chain_data, start, end)
226
+ cdrs[name] = seq
227
+
228
+ # SHM 率(生殖系列との差分)推定
229
+ def estimate_shm_rate(numbered_seq, germline_db="imgt"):
230
+ """生殖系列配列との差異から SHM 率を推定"""
231
+ # 簡易実装: 生殖系列との一致率
232
+ return 0.0 # 要生殖系列 DB
233
+
234
+ result = {
235
+ "cdrs": cdrs,
236
+ "vh_length": len(vh_seq),
237
+ "vl_length": len(vl_seq),
238
+ "cdr_h3_length": len(cdrs.get("CDR-H3", "")),
239
+ "numbering": numbering,
240
+ }
241
+
242
+ print(f" Antibody: CDR-H3 length={result['cdr_h3_length']}, "
243
+ f"scheme={numbering}")
244
+ return result
245
+ ```
246
+
247
+ ## 5. ワクチン候補優先順位付け
248
+
249
+ ```python
250
+ def vaccine_candidate_ranking(antigens_df, weights=None):
251
+ """
252
+ ワクチン候補アンチゲンの多基準優先順位付け。
253
+
254
+ 評価基準:
255
+ 1. Antigenicity score: VaxiJen 2.0 スコア(閾値 > 0.4)
256
+ 2. Allergenicity: AllerTOP 非アレルゲン性
257
+ 3. Toxicity: ToxinPred 非毒性
258
+ 4. MHC coverage: HLA supertype カバー率
259
+ 5. Conservation: 配列保存性(多株間)
260
+ 6. Surface accessibility: 表面露出度
261
+
262
+ Composite score = Σ wᵢ · normalized_scoreᵢ
263
+ """
264
+ if weights is None:
265
+ weights = {
266
+ "antigenicity": 0.25,
267
+ "mhc_coverage": 0.25,
268
+ "conservation": 0.20,
269
+ "surface_accessibility": 0.15,
270
+ "non_allergenicity": 0.10,
271
+ "non_toxicity": 0.05,
272
+ }
273
+
274
+ # Min-max 正規化
275
+ for col in weights.keys():
276
+ if col in antigens_df.columns:
277
+ min_val = antigens_df[col].min()
278
+ max_val = antigens_df[col].max()
279
+ if max_val > min_val:
280
+ antigens_df[f"{col}_norm"] = (antigens_df[col] - min_val) / (max_val - min_val)
281
+ else:
282
+ antigens_df[f"{col}_norm"] = 1.0
283
+
284
+ # Composite スコア
285
+ antigens_df["composite_score"] = sum(
286
+ w * antigens_df.get(f"{col}_norm", 0)
287
+ for col, w in weights.items()
288
+ )
289
+
290
+ antigens_df = antigens_df.sort_values("composite_score", ascending=False)
291
+ print(f" Vaccine candidates: {len(antigens_df)} antigens ranked")
292
+ return antigens_df
293
+ ```
294
+
295
+ ## References
296
+
297
+ ### Output Files
298
+
299
+ | ファイル | 形式 |
300
+ |---|---|
301
+ | `results/mhc_binding_predictions.csv` | CSV |
302
+ | `results/bcell_epitopes.csv` | CSV |
303
+ | `results/repertoire_diversity.json` | JSON |
304
+ | `results/antibody_structure.json` | JSON |
305
+ | `results/vaccine_candidates_ranked.csv` | CSV |
306
+ | `figures/epitope_map.png` | PNG |
307
+ | `figures/repertoire_clonality.png` | PNG |
308
+
309
+ ### 利用可能ツール
310
+
311
+ > [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
312
+
313
+ | カテゴリ | 主要ツール | 用途 |
314
+ |---|---|---|
315
+ | IEDB | `iedb_search_epitopes` | エピトープ検索 |
316
+ | IEDB | `iedb_get_epitope_mhc` | エピトープ-MHC 結合データ |
317
+ | IEDB | `iedb_search_bcell` | B 細胞エピトープ検索 |
318
+ | IEDB | `iedb_search_mhc` | MHC アレル検索 |
319
+ | IEDB | `iedb_search_antigens` | 抗原検索 |
320
+ | IMGT | `IMGT_get_gene_info` | 免疫遺伝子情報 |
321
+ | IMGT | `IMGT_get_sequence` | 免疫グロブリン配列取得 |
322
+ | IMGT | `IMGT_search_genes` | 免疫遺伝子検索 |
323
+ | SAbDab | `SAbDab_search_structures` | 抗体構造検索 |
324
+ | SAbDab | `SAbDab_get_structure` | 抗体構造取得 |
325
+ | TheraSAbDab | `TheraSAbDab_search_therapeutics` | 治療用抗体検索 |
326
+ | TheraSAbDab | `TheraSAbDab_search_by_target` | 標的別治療用抗体 |
327
+ | UniProt | `UniProt_get_entry_by_accession` | タンパク質情報取得 |
328
+
329
+ ### 参照スキル
330
+
331
+ | スキル | 連携内容 |
332
+ |---|---|
333
+ | [scientific-sequence-analysis](../scientific-sequence-analysis/SKILL.md) | 配列アライメント・保存性解析 |
334
+ | [scientific-protein-structure-analysis](../scientific-protein-structure-analysis/SKILL.md) | 抗体 3D 構造解析 |
335
+ | [scientific-protein-design](../scientific-protein-design/SKILL.md) | 抗体エンジニアリング |
336
+ | [scientific-variant-interpretation](../scientific-variant-interpretation/SKILL.md) | HLA タイピング・バリアント解釈 |
337
+ | [scientific-single-cell-genomics](../scientific-single-cell-genomics/SKILL.md) | 免疫細胞サブタイプ解析 |
338
+
339
+ #### 依存パッケージ
340
+
341
+ - mhcflurry, anarci, biopython, immcantation, scirpy
@@ -0,0 +1,342 @@
1
+ ---
2
+ name: scientific-infectious-disease
3
+ description: |
4
+ 感染症ゲノミクス・疫学スキル。病原体ゲノム解析(SNP/系統樹)・
5
+ AMR(薬剤耐性)遺伝子検出・分子疫学(MLST/cgMLST)・
6
+ アウトブレイク調査トレーシング・疫学的 SIR/SEIR コンパートメントモデル・
7
+ 伝播ネットワーク推定パイプライン。
8
+ ---
9
+
10
+ # Scientific Infectious Disease Genomics
11
+
12
+ 病原体ゲノミクスと感染症疫学の統合解析パイプラインを提供する。
13
+ 病原体配列タイピング、系統解析、薬剤耐性遺伝子検出、
14
+ アウトブレイク伝播推定、数理疫学モデルを体系的に扱う。
15
+
16
+ ## When to Use
17
+
18
+ - 病原体の全ゲノムシーケンスデータの解析が必要なとき
19
+ - 薬剤耐性(AMR)遺伝子を検出・分類するとき
20
+ - 分子疫学タイピング(MLST, cgMLST, SNP)を行うとき
21
+ - アウトブレイクの伝播経路を推定するとき
22
+ - SIR / SEIR 等のコンパートメントモデルで感染拡大をシミュレーションするとき
23
+
24
+ ---
25
+
26
+ ## Quick Start
27
+
28
+ ## 1. 病原体ゲノム前処理
29
+
30
+ ```python
31
+ import numpy as np
32
+ import pandas as pd
33
+
34
+ def pathogen_qc_pipeline(fastq_r1, fastq_r2, reference_genome,
35
+ min_depth=30, min_coverage=0.95):
36
+ """
37
+ 病原体 WGS 前処理パイプライン。
38
+
39
+ 手順:
40
+ 1. Fastp — read QC + adapter trimming
41
+ 2. BWA-MEM2 — リファレンスマッピング
42
+ 3. Samtools / Picard — dupmark + sort
43
+ 4. FreeBayes / GATK — variant calling
44
+ 5. カバレッジ / 深度 QC
45
+
46
+ 品質基準:
47
+ - mean_depth ≥ min_depth (既定: 30x)
48
+ - genome_coverage ≥ min_coverage (既定: 95%)
49
+ """
50
+ import subprocess
51
+
52
+ cmds = [
53
+ # QC + trimming
54
+ f"fastp -i {fastq_r1} -I {fastq_r2} -o trim_R1.fq.gz -O trim_R2.fq.gz "
55
+ f"--json qc_report.json",
56
+ # Mapping
57
+ f"bwa-mem2 mem -t 8 {reference_genome} trim_R1.fq.gz trim_R2.fq.gz | "
58
+ f"samtools sort -@ 4 -o aligned.bam",
59
+ # Mark duplicates
60
+ f"samtools markdup aligned.bam dedup.bam",
61
+ f"samtools index dedup.bam",
62
+ # Variant calling
63
+ f"freebayes -f {reference_genome} dedup.bam > variants.vcf",
64
+ # Coverage stats
65
+ f"samtools depth -a dedup.bam | awk '{{sum+=$3; n++}} END {{print sum/n}}'"
66
+ ]
67
+
68
+ for cmd in cmds:
69
+ subprocess.run(cmd, shell=True, check=True)
70
+
71
+ print(f" Pipeline complete: variants.vcf generated")
72
+ return "variants.vcf"
73
+ ```
74
+
75
+ ## 2. AMR 遺伝子検出
76
+
77
+ ```python
78
+ def detect_amr_genes(assembly_fasta, database="resfinder"):
79
+ """
80
+ 薬剤耐性(AMR)遺伝子の検出。
81
+
82
+ データベース:
83
+ - ResFinder: 後天性耐性遺伝子
84
+ - CARD (RGI): 包括的 AMR データベース
85
+ - AMRFinderPlus: NCBI 統合 AMR 検出
86
+
87
+ 結果カテゴリ:
88
+ - 耐性遺伝子(acquired resistance genes)
89
+ - 点変異(point mutations)
90
+ - 耐性表現型予測
91
+ """
92
+ import subprocess
93
+ import json
94
+
95
+ if database == "resfinder":
96
+ cmd = (f"python -m resfinder -ifa {assembly_fasta} "
97
+ f"--acquired --point -o resfinder_results/")
98
+ subprocess.run(cmd, shell=True, check=True)
99
+
100
+ with open("resfinder_results/ResFinder_results_tab.txt") as f:
101
+ lines = f.readlines()
102
+ results = parse_resfinder_output(lines)
103
+
104
+ elif database == "card":
105
+ cmd = f"rgi main -i {assembly_fasta} -o rgi_results -t contig -a BLAST"
106
+ subprocess.run(cmd, shell=True, check=True)
107
+ results = pd.read_csv("rgi_results.txt", sep="\t")
108
+
109
+ n_genes = len(results) if isinstance(results, list) else len(results)
110
+ print(f" AMR: {n_genes} resistance genes detected ({database})")
111
+ return results
112
+
113
+
114
+ def parse_resfinder_output(lines):
115
+ """ResFinder 出力をパースする。"""
116
+ results = []
117
+ for line in lines[1:]:
118
+ fields = line.strip().split("\t")
119
+ if len(fields) >= 6:
120
+ results.append({
121
+ "gene": fields[0],
122
+ "identity": float(fields[1]),
123
+ "coverage": float(fields[2]),
124
+ "phenotype": fields[5] if len(fields) > 5 else "Unknown",
125
+ })
126
+ return results
127
+ ```
128
+
129
+ ## 3. 分子疫学タイピング
130
+
131
+ ```python
132
+ def molecular_typing(assembly_fasta, organism, scheme="mlst"):
133
+ """
134
+ 分子疫学タイピング。
135
+
136
+ scheme:
137
+ - "mlst": Multi-Locus Sequence Typing(7 遺伝子座)
138
+ - "cgmlst": core genome MLST(数百〜数千遺伝子座)
139
+ - "wgmlst": whole genome MLST
140
+
141
+ MLST:
142
+ 各ハウスキーピング遺伝子座のアリル番号の組み合わせで
143
+ Sequence Type(ST)を決定する。
144
+ """
145
+ import subprocess
146
+
147
+ if scheme == "mlst":
148
+ cmd = f"mlst {assembly_fasta} --scheme {organism}"
149
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
150
+ fields = result.stdout.strip().split("\t")
151
+ typing = {
152
+ "file": fields[0],
153
+ "scheme": fields[1],
154
+ "ST": fields[2],
155
+ "alleles": fields[3:],
156
+ }
157
+ elif scheme == "cgmlst":
158
+ cmd = f"chewbbaca AlleleCall -i {assembly_fasta} -g schema/ -o cgmlst_results/"
159
+ subprocess.run(cmd, shell=True, check=True)
160
+ typing = {"scheme": "cgMLST", "results_dir": "cgmlst_results/"}
161
+
162
+ print(f" Typing: ST={typing.get('ST', 'N/A')} ({scheme})")
163
+ return typing
164
+ ```
165
+
166
+ ## 4. 系統解析・伝播推定
167
+
168
+ ```python
169
+ def phylogenetic_analysis(alignment_fasta, method="iqtree", model="GTR+G"):
170
+ """
171
+ 病原体系統解析パイプライン。
172
+
173
+ method:
174
+ - "iqtree": IQ-TREE 2 — 最尤法(ModelFinder 自動モデル選択)
175
+ - "raxml": RAxML-NG — 最尤法
176
+ - "beast": BEAST 2 — ベイズ系統年代学
177
+
178
+ アウトブレイク推定:
179
+ - SNP 距離行列 → 最小スパニングツリー
180
+ - tMRCA (最近共通祖先時間) 推定
181
+ """
182
+ import subprocess
183
+ from Bio import Phylo
184
+
185
+ if method == "iqtree":
186
+ cmd = (f"iqtree2 -s {alignment_fasta} -m {model} "
187
+ f"-bb 1000 -alrt 1000 -nt AUTO")
188
+ subprocess.run(cmd, shell=True, check=True)
189
+ tree = Phylo.read(f"{alignment_fasta}.treefile", "newick")
190
+
191
+ return tree
192
+
193
+
194
+ def transmission_network(snp_matrix, max_snp_distance=10):
195
+ """
196
+ SNP 距離ベースの伝播ネットワーク推定。
197
+
198
+ 基準:
199
+ - 直接伝播: SNP 距離 ≤ max_snp_distance
200
+ - 近縁クラスタ: SNP 距離 ≤ 2 × max_snp_distance
201
+
202
+ アルゴリズム:
203
+ 1. ペアワイズ SNP 距離計算
204
+ 2. 閾値以下のペアをエッジとして接続
205
+ 3. 最小スパニングツリーで伝播方向推定
206
+ """
207
+ import networkx as nx
208
+
209
+ G = nx.Graph()
210
+ samples = snp_matrix.index.tolist()
211
+ G.add_nodes_from(samples)
212
+
213
+ for i, s1 in enumerate(samples):
214
+ for j, s2 in enumerate(samples):
215
+ if i < j:
216
+ dist = snp_matrix.iloc[i, j]
217
+ if dist <= max_snp_distance:
218
+ G.add_edge(s1, s2, weight=dist, snp_distance=dist)
219
+
220
+ mst = nx.minimum_spanning_tree(G)
221
+ clusters = list(nx.connected_components(G))
222
+
223
+ print(f" Transmission: {G.number_of_edges()} links, "
224
+ f"{len(clusters)} clusters")
225
+ return G, mst, clusters
226
+ ```
227
+
228
+ ## 5. SIR / SEIR コンパートメントモデル
229
+
230
+ ```python
231
+ from scipy.integrate import odeint
232
+
233
+ def sir_model(y, t, beta, gamma, N):
234
+ """
235
+ SIR コンパートメントモデル。
236
+
237
+ dS/dt = -β · S · I / N
238
+ dI/dt = β · S · I / N - γ · I
239
+ dR/dt = γ · I
240
+
241
+ R₀ = β / γ (基本再生産数)
242
+ """
243
+ S, I, R = y
244
+ dSdt = -beta * S * I / N
245
+ dIdt = beta * S * I / N - gamma * I
246
+ dRdt = gamma * I
247
+ return [dSdt, dIdt, dRdt]
248
+
249
+
250
+ def seir_model(y, t, beta, sigma, gamma, N):
251
+ """
252
+ SEIR コンパートメントモデル(潜伏期あり)。
253
+
254
+ dS/dt = -β · S · I / N
255
+ dE/dt = β · S · I / N - σ · E
256
+ dI/dt = σ · E - γ · I
257
+ dR/dt = γ · I
258
+
259
+ σ: 潜伏期の逆数 (1/incubation_period)
260
+ """
261
+ S, E, I, R = y
262
+ dSdt = -beta * S * I / N
263
+ dEdt = beta * S * I / N - sigma * E
264
+ dIdt = sigma * E - gamma * I
265
+ dRdt = gamma * I
266
+ return [dSdt, dEdt, dIdt, dRdt]
267
+
268
+
269
+ def run_epidemic_simulation(model="SIR", N=1e6, I0=10, R0=2.5,
270
+ gamma=1/10, sigma=1/5, days=180):
271
+ """
272
+ 感染症拡大シミュレーション。
273
+
274
+ Parameters:
275
+ R0: 基本再生産数
276
+ gamma: 回復率 (1/感染期間)
277
+ sigma: 発症率 (1/潜伏期間、SEIR のみ)
278
+ days: シミュレーション日数
279
+ """
280
+ beta = R0 * gamma
281
+ t = np.linspace(0, days, days * 10)
282
+
283
+ if model == "SIR":
284
+ y0 = [N - I0, I0, 0]
285
+ sol = odeint(sir_model, y0, t, args=(beta, gamma, N))
286
+ df = pd.DataFrame(sol, columns=["S", "I", "R"])
287
+ elif model == "SEIR":
288
+ y0 = [N - I0, 0, I0, 0]
289
+ sol = odeint(seir_model, y0, t, args=(beta, sigma, gamma, N))
290
+ df = pd.DataFrame(sol, columns=["S", "E", "I", "R"])
291
+
292
+ df["t"] = t
293
+ peak_I = df["I"].max()
294
+ peak_day = df.loc[df["I"].idxmax(), "t"]
295
+
296
+ print(f" {model}: R₀={R0:.1f}, peak infection={peak_I:.0f} at day {peak_day:.0f}")
297
+ return df
298
+ ```
299
+
300
+ ## References
301
+
302
+ ### Output Files
303
+
304
+ | ファイル | 形式 |
305
+ |---|---|
306
+ | `results/amr_genes.csv` | CSV |
307
+ | `results/mlst_typing.json` | JSON |
308
+ | `results/snp_matrix.csv` | CSV |
309
+ | `results/transmission_network.json` | JSON |
310
+ | `results/epidemic_simulation.csv` | CSV |
311
+ | `figures/phylogenetic_tree.png` | PNG |
312
+ | `figures/transmission_network.png` | PNG |
313
+ | `figures/epidemic_curves.png` | PNG |
314
+
315
+ ### 利用可能ツール
316
+
317
+ > [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
318
+
319
+ | カテゴリ | 主要ツール | 用途 |
320
+ |---|---|---|
321
+ | EUHealthInfo | `euhealthinfo_search_infectious_diseases` | 感染症サーベイランスデータ |
322
+ | EUHealthInfo | `euhealthinfo_search_surveillance` | 疫学サーベイランス |
323
+ | CDC | `cdc_data_search_datasets` | CDC データセット検索 |
324
+ | CDC | `cdc_data_get_dataset` | CDC データ取得 |
325
+ | NCBI | `BLAST_nucleotide_search` | 病原体配列同定 |
326
+ | NCBI | `NCBI_get_sequence` | ゲノム配列取得 |
327
+ | PubMed | `PubMed_search_articles` | 感染症文献検索 |
328
+ | ClinicalTrials | `search_clinical_trials` | 感染症治療臨床試験 |
329
+
330
+ ### 参照スキル
331
+
332
+ | スキル | 連携内容 |
333
+ |---|---|
334
+ | [scientific-sequence-analysis](../scientific-sequence-analysis/SKILL.md) | 配列アライメント・BLAST |
335
+ | [scientific-bioinformatics](../scientific-bioinformatics/SKILL.md) | ゲノムアノテーション |
336
+ | [scientific-network-analysis](../scientific-network-analysis/SKILL.md) | 伝播ネットワーク可視化 |
337
+ | [scientific-survival-clinical](../scientific-survival-clinical/SKILL.md) | 感染症アウトカム解析 |
338
+ | [scientific-bayesian-statistics](../scientific-bayesian-statistics/SKILL.md) | ベイズ系統年代学 |
339
+
340
+ #### 依存パッケージ
341
+
342
+ - biopython, ete3, scipy, networkx, subprocess (fastp, bwa-mem2, freebayes, iqtree2)
@@ -344,6 +344,17 @@ def cumulative_meta_analysis(studies_df, sort_by="year", model="random"):
344
344
  | `figures/funnel_plot.png` | PNG |
345
345
  | `figures/cumulative_meta.png` | PNG |
346
346
 
347
+ ### 利用可能ツール
348
+
349
+ > [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
350
+
351
+ | カテゴリ | 主要ツール | 用途 |
352
+ |---|---|---|
353
+ | PubMed | `PubMed_search_articles` | メタアナリシス対象文献検索 |
354
+ | PubMed | `PubMed_get_article` | 論文メタデータ取得 |
355
+ | EuropePMC | `EuropePMC_search_articles` | ヨーロッパ文献検索 |
356
+ | Crossref | `Crossref_search_works` | 出版情報検索 |
357
+
347
358
  #### 依存パッケージ
348
359
 
349
360
  ```