@nahisaho/satori 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/README.md +188 -39
  2. package/package.json +1 -1
  3. package/src/.github/skills/scientific-clinical-trials-analytics/SKILL.md +340 -0
  4. package/src/.github/skills/scientific-computational-materials/SKILL.md +353 -0
  5. package/src/.github/skills/scientific-environmental-ecology/SKILL.md +295 -0
  6. package/src/.github/skills/scientific-epidemiology-public-health/SKILL.md +332 -0
  7. package/src/.github/skills/scientific-epigenomics-chromatin/SKILL.md +567 -0
  8. package/src/.github/skills/scientific-gene-expression-transcriptomics/SKILL.md +330 -0
  9. package/src/.github/skills/scientific-immunoinformatics/SKILL.md +341 -0
  10. package/src/.github/skills/scientific-infectious-disease/SKILL.md +342 -0
  11. package/src/.github/skills/scientific-lab-data-management/SKILL.md +334 -0
  12. package/src/.github/skills/scientific-microbiome-metagenomics/SKILL.md +349 -0
  13. package/src/.github/skills/scientific-neuroscience-electrophysiology/SKILL.md +400 -0
  14. package/src/.github/skills/scientific-pharmacogenomics/SKILL.md +342 -0
  15. package/src/.github/skills/scientific-population-genetics/SKILL.md +336 -0
  16. package/src/.github/skills/scientific-proteomics-mass-spectrometry/SKILL.md +401 -0
  17. package/src/.github/skills/scientific-regulatory-science/SKILL.md +256 -0
  18. package/src/.github/skills/scientific-scientific-schematics/SKILL.md +336 -0
  19. package/src/.github/skills/scientific-single-cell-genomics/SKILL.md +361 -0
  20. package/src/.github/skills/scientific-spatial-transcriptomics/SKILL.md +281 -0
  21. package/src/.github/skills/scientific-systems-biology/SKILL.md +310 -0
  22. package/src/.github/skills/scientific-text-mining-nlp/SKILL.md +358 -0
@@ -0,0 +1,342 @@
1
+ ---
2
+ name: scientific-pharmacogenomics
3
+ description: |
4
+ ファーマコゲノミクス (薬理ゲノム学) 解析スキル。PharmGKB/ClinPGx による
5
+ 遺伝子-薬物相互作用照会、CPIC ガイドライン取得・解釈、Star アレル分類、
6
+ 代謝酵素表現型判定 (PM/IM/NM/RM/UM)、FDA 薬理ゲノムバイオマーカー、
7
+ 投与量レコメンデーション、PGx レポート生成を統合した
8
+ 個別化薬物療法支援パイプライン。
9
+ ---
10
+
11
+ # Scientific Pharmacogenomics
12
+
13
+ 遺伝子型に基づく薬物選択・用量調整を支援する
14
+ ファーマコゲノミクスパイプライン。CPIC/DPWG ガイドライン・PharmGKB・
15
+ FDA PGx バイオマーカーのデータベース照会と解釈を統合。
16
+
17
+ ## When to Use
18
+
19
+ - 遺伝子型に基づく薬物選択・投与量調整が必要なとき
20
+ - CPIC/DPWG ガイドラインの系統的照会が必要なとき
21
+ - Star アレル分類・代謝酵素表現型 (PM/IM/NM/RM/UM) を判定するとき
22
+ - FDA 承認ファーマコゲノミクスバイオマーカーを確認するとき
23
+ - 個別化薬物療法レポートを生成するとき
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. 遺伝子-薬物相互作用照会
30
+
31
+ ```python
32
+ import pandas as pd
33
+ import json
34
+
35
+
36
+ def query_gene_drug_interactions(gene_symbol, data_source="PharmGKB"):
37
+ """
38
+ 遺伝子-薬物相互作用の照会。
39
+
40
+ 主要 PGx 遺伝子:
41
+ - CYP2D6: コデイン, タモキシフェン, トラマドール
42
+ - CYP2C19: クロピドグレル, エスシタロプラム, オメプラゾール
43
+ - CYP2C9: ワルファリン, フェニトイン
44
+ - CYP3A5: タクロリムス
45
+ - DPYD: フルオロウラシル, カペシタビン
46
+ - TPMT/NUDT15: アザチオプリン, 6-MP
47
+ - UGT1A1: イリノテカン
48
+ - SLCO1B1: シンバスタチン
49
+ - HLA-B: アバカビル (*57:01), カルバマゼピン (*15:02)
50
+ - VKORC1: ワルファリン
51
+ """
52
+ print(f" Querying {data_source} for gene: {gene_symbol}")
53
+
54
+ # PharmGKB の主要フィールド
55
+ query_fields = {
56
+ "gene": gene_symbol,
57
+ "clinical_annotations": "Tier 1A-3",
58
+ "drug_labels": "FDA/EMA/PMDA/HCSC",
59
+ "guideline_annotations": "CPIC/DPWG/CPNDS",
60
+ "variant_annotations": "Clinical significance",
61
+ "pathway_annotations": "PK/PD pathways",
62
+ }
63
+
64
+ return query_fields
65
+
66
+
67
+ def get_cpic_guidelines(gene_symbol=None, drug_name=None):
68
+ """
69
+ CPIC ガイドラインの取得。
70
+
71
+ CPIC Level:
72
+ - Level A: 処方変更を義務付ける強力なエビデンス
73
+ - Level A/B: 処方変更を推奨するエビデンス
74
+ - Level B: 考慮すべきエビデンス
75
+ - Level C: 情報提供レベル
76
+ - Level D: 有用なデータなし
77
+ """
78
+ print(f" Querying CPIC guidelines:")
79
+ if gene_symbol:
80
+ print(f" Gene: {gene_symbol}")
81
+ if drug_name:
82
+ print(f" Drug: {drug_name}")
83
+
84
+ return {"gene": gene_symbol, "drug": drug_name, "level": "pending"}
85
+ ```
86
+
87
+ ## 2. Star アレル分類・表現型判定
88
+
89
+ ```python
90
+ import pandas as pd
91
+ import numpy as np
92
+
93
+
94
+ def star_allele_annotation(gene, genotype_variants):
95
+ """
96
+ Star アレル分類パイプライン。
97
+
98
+ PharmCAT (Pharmacogenomics Clinical Annotation Tool) 互換の
99
+ Star アレルコール → 表現型変換。
100
+
101
+ Parameters:
102
+ gene: 対象遺伝子 (e.g., "CYP2D6")
103
+ genotype_variants: {rsID: genotype} 辞書
104
+ """
105
+ # CYP2D6 Star アレル例
106
+ cyp2d6_alleles = {
107
+ "*1": {"function": "Normal", "activity_score": 1.0},
108
+ "*2": {"function": "Normal", "activity_score": 1.0},
109
+ "*3": {"function": "No function", "activity_score": 0.0},
110
+ "*4": {"function": "No function", "activity_score": 0.0},
111
+ "*5": {"function": "No function (gene deletion)", "activity_score": 0.0},
112
+ "*6": {"function": "No function", "activity_score": 0.0},
113
+ "*9": {"function": "Decreased", "activity_score": 0.5},
114
+ "*10": {"function": "Decreased", "activity_score": 0.25},
115
+ "*17": {"function": "Decreased", "activity_score": 0.5},
116
+ "*41": {"function": "Decreased", "activity_score": 0.5},
117
+ }
118
+
119
+ print(f" Gene: {gene}")
120
+ print(f" Variants provided: {len(genotype_variants)}")
121
+ print(f" Reference: PharmCAT + PharmVar")
122
+
123
+ return cyp2d6_alleles
124
+
125
+
126
+ def determine_metabolizer_phenotype(gene, diplotype, activity_scores):
127
+ """
128
+ Activity Score ベースの代謝酵素表現型判定。
129
+
130
+ 表現型分類:
131
+ - PM (Poor Metabolizer): AS = 0
132
+ - IM (Intermediate Metabolizer): 0 < AS < 1.25
133
+ - NM (Normal Metabolizer): 1.25 ≤ AS ≤ 2.25
134
+ - RM (Rapid Metabolizer): 2.25 < AS ≤ 3.0 (CYP2C19 のみ)
135
+ - UM (Ultra-rapid Metabolizer): AS > 3.0 or gene duplication
136
+ """
137
+ as1, as2 = activity_scores
138
+ total_as = as1 + as2
139
+
140
+ if total_as == 0:
141
+ phenotype = "PM (Poor Metabolizer)"
142
+ elif total_as < 1.25:
143
+ phenotype = "IM (Intermediate Metabolizer)"
144
+ elif total_as <= 2.25:
145
+ phenotype = "NM (Normal Metabolizer)"
146
+ elif total_as <= 3.0:
147
+ phenotype = "RM (Rapid Metabolizer)"
148
+ else:
149
+ phenotype = "UM (Ultra-rapid Metabolizer)"
150
+
151
+ print(f" Gene: {gene}")
152
+ print(f" Diplotype: {diplotype}")
153
+ print(f" Activity Score: {as1} + {as2} = {total_as}")
154
+ print(f" Phenotype: {phenotype}")
155
+
156
+ # CPIC 投与量レコメンデーション
157
+ if gene == "CYP2D6" and "PM" in phenotype:
158
+ print(" ⚠ CPIC: コデイン禁忌 (モルフィンへの変換不可)")
159
+ print(" ⚠ CPIC: タモキシフェン → 代替薬を推奨")
160
+
161
+ return {"gene": gene, "diplotype": diplotype,
162
+ "activity_score": total_as, "phenotype": phenotype}
163
+ ```
164
+
165
+ ## 3. FDA PGx バイオマーカー照会
166
+
167
+ ```python
168
+ import pandas as pd
169
+
170
+
171
+ def query_fda_pgx_biomarkers(drug_name=None, gene_name=None,
172
+ biomarker_type=None):
173
+ """
174
+ FDA 承認ファーマコゲノミクスバイオマーカーの照会。
175
+
176
+ FDA PGx Labeling Categories:
177
+ - Required: テスト必須 (e.g., HLA-B*57:01 for abacavir)
178
+ - Recommended: テスト推奨
179
+ - Actionable: PGx 情報あり
180
+ - Informative: 参考情報
181
+
182
+ 300+ の遺伝子-薬物ペアが FDA ラベルに記載。
183
+ """
184
+ print(" Querying FDA Pharmacogenomic Biomarkers:")
185
+ if drug_name:
186
+ print(f" Drug: {drug_name}")
187
+ if gene_name:
188
+ print(f" Gene: {gene_name}")
189
+ if biomarker_type:
190
+ print(f" Type: {biomarker_type}")
191
+
192
+ # FDA 主要バイオマーカー例
193
+ key_biomarkers = [
194
+ {"gene": "HLA-B*57:01", "drug": "Abacavir", "action": "Required",
195
+ "recommendation": "HLA-B*57:01 陽性 → 禁忌 (過敏反応リスク)"},
196
+ {"gene": "CYP2C19", "drug": "Clopidogrel", "action": "Actionable",
197
+ "recommendation": "PM → 代替抗血小板薬 (ticagrelor/prasugrel)"},
198
+ {"gene": "DPYD", "drug": "5-FU/Capecitabine", "action": "Recommended",
199
+ "recommendation": "PM → 用量 50% 減量 or 代替薬"},
200
+ {"gene": "UGT1A1*28", "drug": "Irinotecan", "action": "Recommended",
201
+ "recommendation": "TA7/TA7 → 初回投与量減量"},
202
+ {"gene": "TPMT/NUDT15", "drug": "Azathioprine", "action": "Recommended",
203
+ "recommendation": "PM → 10%用量 or 代替薬"},
204
+ ]
205
+
206
+ return pd.DataFrame(key_biomarkers)
207
+
208
+
209
+ def pgx_dosing_recommendation(gene, phenotype, drug):
210
+ """
211
+ 表現型に基づく CPIC 投与量レコメンデーション生成。
212
+ """
213
+ # CPIC 投与量テーブル例 (CYP2C19 × Clopidogrel)
214
+ cpic_table = {
215
+ ("CYP2C19", "UM", "Clopidogrel"): "標準用量 75 mg/day",
216
+ ("CYP2C19", "RM", "Clopidogrel"): "標準用量 75 mg/day",
217
+ ("CYP2C19", "NM", "Clopidogrel"): "標準用量 75 mg/day",
218
+ ("CYP2C19", "IM", "Clopidogrel"): "代替抗血小板薬を推奨 (ticagrelor/prasugrel)",
219
+ ("CYP2C19", "PM", "Clopidogrel"): "代替抗血小板薬を推奨 (ticagrelor/prasugrel)",
220
+ }
221
+
222
+ key = (gene, phenotype.split(" ")[0], drug)
223
+ recommendation = cpic_table.get(key, "ガイドライン情報なし")
224
+
225
+ result = {
226
+ "gene": gene,
227
+ "phenotype": phenotype,
228
+ "drug": drug,
229
+ "recommendation": recommendation,
230
+ "source": "CPIC",
231
+ "evidence_level": "Level A",
232
+ }
233
+
234
+ print(f" PGx Dosing Recommendation:")
235
+ print(f" Gene: {gene} | Phenotype: {phenotype}")
236
+ print(f" Drug: {drug}")
237
+ print(f" Recommendation: {recommendation}")
238
+
239
+ return result
240
+ ```
241
+
242
+ ## 4. PGx レポート生成
243
+
244
+ ```python
245
+ import json
246
+ import pandas as pd
247
+ from datetime import datetime
248
+
249
+
250
+ def generate_pgx_report(patient_results, output_file="results/pgx_report.json"):
251
+ """
252
+ 包括的 PGx レポート生成。
253
+
254
+ 含まれる情報:
255
+ - 患者遺伝子型サマリ
256
+ - Star アレル → 表現型マッピング
257
+ - 各薬物の CPIC/DPWG レコメンデーション
258
+ - FDA バイオマーカーステータス
259
+ - アクショナブル所見ハイライト
260
+ """
261
+ import os
262
+ os.makedirs(os.path.dirname(output_file), exist_ok=True)
263
+
264
+ report = {
265
+ "report_type": "Pharmacogenomics Report",
266
+ "generated_at": datetime.now().isoformat(),
267
+ "patient_id": patient_results.get("patient_id", "anonymous"),
268
+ "genes_tested": [],
269
+ "actionable_findings": [],
270
+ "drug_recommendations": [],
271
+ }
272
+
273
+ for gene_result in patient_results.get("gene_results", []):
274
+ gene_entry = {
275
+ "gene": gene_result["gene"],
276
+ "diplotype": gene_result["diplotype"],
277
+ "phenotype": gene_result["phenotype"],
278
+ "activity_score": gene_result.get("activity_score"),
279
+ }
280
+ report["genes_tested"].append(gene_entry)
281
+
282
+ # アクショナブル所見の抽出
283
+ if "PM" in gene_result["phenotype"] or "UM" in gene_result["phenotype"]:
284
+ report["actionable_findings"].append({
285
+ "gene": gene_result["gene"],
286
+ "phenotype": gene_result["phenotype"],
287
+ "clinical_significance": "Actionable — 投与量調整/代替薬検討を推奨",
288
+ })
289
+
290
+ with open(output_file, "w") as f:
291
+ json.dump(report, f, ensure_ascii=False, indent=2)
292
+
293
+ print(f" PGx Report generated: {output_file}")
294
+ print(f" Genes tested: {len(report['genes_tested'])}")
295
+ print(f" Actionable findings: {len(report['actionable_findings'])}")
296
+
297
+ return report
298
+ ```
299
+
300
+ ## References
301
+
302
+ ### Output Files
303
+
304
+ | ファイル | 形式 |
305
+ |---|---|
306
+ | `results/pgx_report.json` | JSON |
307
+ | `results/gene_drug_interactions.csv` | CSV |
308
+ | `results/star_allele_calls.csv` | CSV |
309
+ | `results/dosing_recommendations.csv` | CSV |
310
+ | `figures/pgx_phenotype_summary.png` | PNG |
311
+
312
+ ### 利用可能ツール
313
+
314
+ > [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
315
+
316
+ | カテゴリ | 主要ツール | 用途 |
317
+ |---|---|---|
318
+ | PharmGKB | `PharmGKB_search_drugs` | 薬物検索 (遺伝子関連) |
319
+ | PharmGKB | `PharmGKB_search_genes` | PGx 遺伝子検索 |
320
+ | PharmGKB | `PharmGKB_get_drug_details` | 薬物詳細 (クロスリファレンス) |
321
+ | PharmGKB | `PharmGKB_get_gene_details` | 遺伝子詳細 (アレル情報) |
322
+ | PharmGKB | `PharmGKB_get_clinical_annotations` | 遺伝子-薬物臨床アノテーション |
323
+ | PharmGKB | `PharmGKB_get_dosing_guidelines` | CPIC/DPWG 用量ガイドライン |
324
+ | PharmGKB | `PharmGKB_search_variants` | 遺伝的変異検索 |
325
+ | FDA | `fda_pharmacogenomic_biomarkers` | FDA PGx バイオマーカー一覧 |
326
+ | FDA | `FDA_get_pharmacogenomics_info_by_drug_name` | 薬物名で PGx 情報取得 |
327
+ | FDA | `FDA_get_drug_name_by_pharmacogenomics` | PGx から薬物名取得 |
328
+ | OpenTargets | `OpenTargets_drug_pharmacogenomics_data` | 薬物 PGx データ |
329
+
330
+ ### 参照スキル
331
+
332
+ | スキル | 関連 |
333
+ |---|---|
334
+ | `scientific-variant-interpretation` | ACMG/AMP バリアント解釈 |
335
+ | `scientific-pharmacovigilance` | 市販後安全性監視 |
336
+ | `scientific-clinical-decision-support` | 臨床意思決定 |
337
+ | `scientific-precision-oncology` | 腫瘍 PGx (OncoKB) |
338
+ | `scientific-population-genetics` | 集団間アレル頻度差 |
339
+
340
+ ### 依存パッケージ
341
+
342
+ `pandas`, `numpy`, `json`
@@ -0,0 +1,336 @@
1
+ ---
2
+ name: scientific-population-genetics
3
+ description: |
4
+ 集団遺伝学解析スキル。アレル頻度解析・Hardy-Weinberg 平衡検定・
5
+ 集団構造解析(PCA / ADMIXTURE)・Fst 分化指標・選択圧検出(iHS / XP-EHH)・
6
+ 連鎖不平衡(LD)解析・GWAS Catalog / gnomAD データ統合パイプライン。
7
+ ---
8
+
9
+ # Scientific Population Genetics
10
+
11
+ 集団遺伝学に特化した解析パイプラインを提供する。
12
+ アレル頻度、集団構造、遺伝的分化、自然選択シグナル、
13
+ 連鎖不平衡の解析を体系的に扱い、GWAS Catalog・gnomAD との統合を支援する。
14
+
15
+ ## When to Use
16
+
17
+ - アレル頻度分布や Hardy-Weinberg 平衡を検定するとき
18
+ - 集団構造(PCA / ADMIXTURE / STRUCTURE)を解析するとき
19
+ - 集団間の遺伝的分化(Fst)を評価するとき
20
+ - 自然選択シグナル(iHS / Tajima's D / XP-EHH)を検出するとき
21
+ - GWAS 関連バリアントの集団遺伝学的解釈を行うとき
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ## 1. QC・アレル頻度解析
28
+
29
+ ```python
30
+ import numpy as np
31
+ import pandas as pd
32
+
33
+ def genotype_qc(plink_prefix, mind=0.02, geno=0.02, maf=0.01,
34
+ hwe_p=1e-6):
35
+ """
36
+ ジェノタイプ QC パイプライン(PLINK 2)。
37
+
38
+ フィルタリング基準:
39
+ - --mind: 個体ミッシング率 ≤ mind
40
+ - --geno: SNP ミッシング率 ≤ geno
41
+ - --maf: Minor Allele Frequency ≥ maf
42
+ - --hwe: Hardy-Weinberg p ≥ hwe_p(コントロールのみ)
43
+
44
+ 追加 QC:
45
+ - 性別不一致チェック
46
+ - IBD 推定(近親者除外: π̂ > 0.25)
47
+ - PCA アウトライアー除外
48
+ """
49
+ import subprocess
50
+
51
+ # Step 1: ミッシング率フィルタ
52
+ cmd = (f"plink2 --bfile {plink_prefix} "
53
+ f"--mind {mind} --geno {geno} --maf {maf} "
54
+ f"--hwe {hwe_p} "
55
+ f"--make-bed --out {plink_prefix}_qc")
56
+ subprocess.run(cmd, shell=True, check=True)
57
+
58
+ # Step 2: IBD 推定(近親者検出)
59
+ cmd = (f"plink2 --bfile {plink_prefix}_qc "
60
+ f"--indep-pairwise 50 5 0.2 --out {plink_prefix}_prune")
61
+ subprocess.run(cmd, shell=True, check=True)
62
+
63
+ cmd = (f"plink2 --bfile {plink_prefix}_qc "
64
+ f"--extract {plink_prefix}_prune.prune.in "
65
+ f"--genome --out {plink_prefix}_ibd")
66
+ subprocess.run(cmd, shell=True, check=True)
67
+
68
+ return f"{plink_prefix}_qc"
69
+
70
+
71
+ def allele_frequency_stats(genotype_matrix, populations):
72
+ """
73
+ 集団別アレル頻度統計。
74
+
75
+ 算出指標:
76
+ - MAF: Minor Allele Frequency
77
+ - Het: Observed heterozygosity = n_het / n_total
78
+ - Expected Het (He): 2pq
79
+ - HWE: Hardy-Weinberg 平衡検定 (χ² test)
80
+ H₀: f(AA) = p², f(Aa) = 2pq, f(aa) = q²
81
+ """
82
+ from scipy.stats import chi2
83
+
84
+ results = []
85
+ for pop in populations["population"].unique():
86
+ pop_idx = populations[populations["population"] == pop].index
87
+ geno_pop = genotype_matrix.loc[pop_idx]
88
+
89
+ for snp in geno_pop.columns:
90
+ counts = geno_pop[snp].value_counts()
91
+ n = counts.sum()
92
+ n_0 = counts.get(0, 0) # AA
93
+ n_1 = counts.get(1, 0) # Aa
94
+ n_2 = counts.get(2, 0) # aa
95
+
96
+ p = (2 * n_0 + n_1) / (2 * n)
97
+ q = 1 - p
98
+ maf = min(p, q)
99
+
100
+ # HWE test
101
+ exp_0 = n * p**2
102
+ exp_1 = n * 2*p*q
103
+ exp_2 = n * q**2
104
+ if exp_0 > 0 and exp_1 > 0 and exp_2 > 0:
105
+ chi2_stat = ((n_0-exp_0)**2/exp_0 + (n_1-exp_1)**2/exp_1 +
106
+ (n_2-exp_2)**2/exp_2)
107
+ hwe_p = 1 - chi2.cdf(chi2_stat, df=1)
108
+ else:
109
+ hwe_p = 1.0
110
+
111
+ het_obs = n_1 / n
112
+ het_exp = 2 * p * q
113
+
114
+ results.append({
115
+ "snp": snp, "population": pop,
116
+ "MAF": round(maf, 4), "p": round(p, 4),
117
+ "Het_obs": round(het_obs, 4), "Het_exp": round(het_exp, 4),
118
+ "HWE_p": round(hwe_p, 6),
119
+ })
120
+
121
+ return pd.DataFrame(results)
122
+ ```
123
+
124
+ ## 2. 集団構造解析
125
+
126
+ ```python
127
+ def population_structure(plink_prefix, n_components=10, method="pca"):
128
+ """
129
+ 集団構造解析。
130
+
131
+ method:
132
+ - "pca": 主成分分析 — 集団間の遺伝的差異を 2D/3D で可視化
133
+ - "admixture": ADMIXTURE — 各個体の祖先集団比率を推定
134
+ K=2〜10 を試行し、CV error 最小の K を選択
135
+
136
+ PCA on genotypes:
137
+ X を (n_samples × n_snps) ジェノタイプ行列として
138
+ 共分散行列 C = XᵀX / n_snps の固有値分解
139
+ """
140
+ import subprocess
141
+
142
+ if method == "pca":
143
+ cmd = (f"plink2 --bfile {plink_prefix} "
144
+ f"--pca {n_components} --out {plink_prefix}_pca")
145
+ subprocess.run(cmd, shell=True, check=True)
146
+
147
+ eigenvec = pd.read_csv(f"{plink_prefix}_pca.eigenvec", sep="\t")
148
+ eigenval = pd.read_csv(f"{plink_prefix}_pca.eigenval", header=None)
149
+ var_explained = eigenval[0] / eigenval[0].sum()
150
+
151
+ print(f" PCA: PC1={var_explained[0]:.3f}, PC2={var_explained[1]:.3f}")
152
+ return eigenvec, var_explained
153
+
154
+ elif method == "admixture":
155
+ cv_errors = {}
156
+ for K in range(2, 11):
157
+ cmd = f"admixture --cv {plink_prefix}.bed {K}"
158
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
159
+ # CV error 抽出
160
+ for line in result.stdout.split("\n"):
161
+ if "CV error" in line:
162
+ cv_errors[K] = float(line.split(": ")[1])
163
+
164
+ best_K = min(cv_errors, key=cv_errors.get)
165
+ Q = pd.read_csv(f"{plink_prefix}.{best_K}.Q", sep=" ", header=None)
166
+
167
+ print(f" ADMIXTURE: best K={best_K} (CV error={cv_errors[best_K]:.4f})")
168
+ return Q, cv_errors, best_K
169
+ ```
170
+
171
+ ## 3. 遺伝的分化(Fst)
172
+
173
+ ```python
174
+ def calculate_fst(genotype_matrix, populations, method="weir_cockerham"):
175
+ """
176
+ 集団間遺伝的分化指標 Fst 算出。
177
+
178
+ Weir-Cockerham (1984) 推定量:
179
+ F_ST = σ²_a / (σ²_a + σ²_b + σ²_w)
180
+ σ²_a: 集団間分散
181
+ σ²_b: 集団内個体間分散
182
+ σ²_w: 個体内(アレル間)分散
183
+
184
+ 解釈:
185
+ Fst = 0: 分化なし(パンミクシア)
186
+ 0 < Fst < 0.05: 低分化
187
+ 0.05 ≤ Fst < 0.15: 中程度の分化
188
+ 0.15 ≤ Fst < 0.25: 大きな分化
189
+ Fst ≥ 0.25: 非常に大きな分化
190
+
191
+ genome-wide Fst: 全 SNP の加重平均
192
+ per-SNP Fst: 局所的適応シグナルの検出
193
+ """
194
+ pop_labels = populations["population"]
195
+ unique_pops = pop_labels.unique()
196
+
197
+ fst_per_snp = []
198
+ for snp in genotype_matrix.columns:
199
+ # 集団別アレル頻度
200
+ pop_freqs = {}
201
+ pop_sizes = {}
202
+ for pop in unique_pops:
203
+ idx = pop_labels[pop_labels == pop].index
204
+ geno = genotype_matrix.loc[idx, snp].dropna()
205
+ p = (2 * (geno == 0).sum() + (geno == 1).sum()) / (2 * len(geno))
206
+ pop_freqs[pop] = p
207
+ pop_sizes[pop] = len(geno)
208
+
209
+ # Weir-Cockerham Fst
210
+ n_pops = len(unique_pops)
211
+ n_total = sum(pop_sizes.values())
212
+ p_bar = sum(pop_freqs[p] * pop_sizes[p] for p in unique_pops) / n_total
213
+ n_bar = n_total / n_pops
214
+
215
+ MSP = sum(pop_sizes[p] * (pop_freqs[p] - p_bar)**2
216
+ for p in unique_pops) / (n_pops - 1)
217
+ MSG = sum(pop_sizes[p] * pop_freqs[p] * (1 - pop_freqs[p])
218
+ for p in unique_pops) / (n_total - n_pops)
219
+
220
+ nc = (n_total - sum(n**2 for n in pop_sizes.values()) / n_total) / (n_pops - 1)
221
+
222
+ if (MSP + (nc - 1) * MSG) > 0:
223
+ fst = (MSP - MSG) / (MSP + (nc - 1) * MSG)
224
+ else:
225
+ fst = 0
226
+
227
+ fst_per_snp.append({"snp": snp, "Fst": max(fst, 0), "p_bar": p_bar})
228
+
229
+ fst_df = pd.DataFrame(fst_per_snp)
230
+ genome_fst = fst_df["Fst"].mean()
231
+
232
+ print(f" Fst: genome-wide={genome_fst:.4f}, "
233
+ f"max per-SNP={fst_df['Fst'].max():.4f}")
234
+ return fst_df, genome_fst
235
+ ```
236
+
237
+ ## 4. 自然選択シグナル検出
238
+
239
+ ```python
240
+ def selection_scan(haplotype_matrix, positions, method="ihs"):
241
+ """
242
+ 自然選択シグナルの検出。
243
+
244
+ method:
245
+ - "ihs": Integrated Haplotype Score — ローカル正の選択
246
+ |iHS| > 2: 選択シグナル候補
247
+ - "tajima_d": Tajima's D — 中立性検定
248
+ D > 0: バランス選択 or 集団縮小
249
+ D < 0: 正の選択 or 集団拡大
250
+ D ≈ 0: 中立進化
251
+ - "xpehh": Cross-Population EHH — 集団間正の選択
252
+
253
+ iHS:
254
+ 各 SNP について、派生アレル (derived) と祖先アレル (ancestral) の
255
+ Extended Haplotype Homozygosity (EHH) を比較。
256
+ iHS = ln(iHH_A / iHH_D) → 標準化
257
+ """
258
+ if method == "tajima_d":
259
+ # スライディングウィンドウ Tajima's D
260
+ from allel import tajima_d
261
+ import allel
262
+
263
+ D_values = []
264
+ window_size = 50000
265
+ step = 10000
266
+
267
+ for start in range(0, positions[-1], step):
268
+ end = start + window_size
269
+ mask = (positions >= start) & (positions < end)
270
+ if mask.sum() > 5:
271
+ ac = allel.AlleleCountsArray(
272
+ haplotype_matrix[:, mask].sum(axis=0).reshape(-1, 1))
273
+ D = tajima_d(ac)
274
+ D_values.append({"start": start, "end": end, "D": D,
275
+ "n_snps": mask.sum()})
276
+
277
+ df = pd.DataFrame(D_values)
278
+ print(f" Tajima's D: mean={df['D'].mean():.3f}, "
279
+ f"range=[{df['D'].min():.3f}, {df['D'].max():.3f}]")
280
+ return df
281
+
282
+ elif method == "ihs":
283
+ import allel
284
+ ihs = allel.ihs(haplotype_matrix, positions)
285
+ # 標準化
286
+ ihs_std = (ihs - np.nanmean(ihs)) / np.nanstd(ihs)
287
+
288
+ n_sig = np.sum(np.abs(ihs_std) > 2)
289
+ print(f" iHS: {n_sig} candidate regions (|iHS|>2)")
290
+ return ihs_std
291
+ ```
292
+
293
+ ## References
294
+
295
+ ### Output Files
296
+
297
+ | ファイル | 形式 |
298
+ |---|---|
299
+ | `results/allele_frequencies.csv` | CSV |
300
+ | `results/pca_eigenvec.csv` | CSV |
301
+ | `results/admixture_Q.csv` | CSV |
302
+ | `results/fst_per_snp.csv` | CSV |
303
+ | `results/selection_scan.csv` | CSV |
304
+ | `figures/pca_populations.png` | PNG |
305
+ | `figures/admixture_barplot.png` | PNG |
306
+ | `figures/manhattan_fst.png` | PNG |
307
+
308
+ ### 利用可能ツール
309
+
310
+ > [ToolUniverse](https://github.com/mims-harvard/ToolUniverse) SMCP 経由で利用可能な外部ツール。
311
+
312
+ | カテゴリ | 主要ツール | 用途 |
313
+ |---|---|---|
314
+ | gnomAD | `gnomad_get_variant` | バリアント集団頻度 |
315
+ | gnomAD | `gnomad_get_gene_constraints` | 遺伝子制約指標 |
316
+ | gnomAD | `gnomad_get_region` | 領域別バリアント |
317
+ | gnomAD | `gnomad_search_variants` | バリアント検索 |
318
+ | GWAS | `GWAS_search_associations_by_gene` | 遺伝子別 GWAS 関連 |
319
+ | GWAS | `gwas_search_studies` | GWAS 研究検索 |
320
+ | GWAS | `gwas_get_variants_for_trait` | 形質別バリアント |
321
+ | GWAS | `gwas_get_associations_for_snp` | SNP 別関連 |
322
+ | GWAS | `gwas_get_snps_for_gene` | 遺伝子近傍 SNP |
323
+
324
+ ### 参照スキル
325
+
326
+ | スキル | 連携内容 |
327
+ |---|---|
328
+ | [scientific-variant-interpretation](../scientific-variant-interpretation/SKILL.md) | バリアント臨床解釈 |
329
+ | [scientific-bioinformatics](../scientific-bioinformatics/SKILL.md) | ゲノムアノテーション |
330
+ | [scientific-disease-research](../scientific-disease-research/SKILL.md) | 疾患-遺伝子関連 |
331
+ | [scientific-statistical-testing](../scientific-statistical-testing/SKILL.md) | 統計検定 |
332
+ | [scientific-pca-tsne](../scientific-pca-tsne/SKILL.md) | 次元削減 |
333
+
334
+ #### 依存パッケージ
335
+
336
+ - scikit-allel, plink2, admixture, pandas, numpy, scipy