@nahisaho/satori 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/LICENCE +0 -0
  2. package/README.md +191 -0
  3. package/bin/satori.js +95 -0
  4. package/package.json +29 -0
  5. package/src/.github/skills/scientific-academic-writing/SKILL.md +361 -0
  6. package/src/.github/skills/scientific-academic-writing/assets/acs_article.md +199 -0
  7. package/src/.github/skills/scientific-academic-writing/assets/elsevier_article.md +244 -0
  8. package/src/.github/skills/scientific-academic-writing/assets/ieee_transactions.md +212 -0
  9. package/src/.github/skills/scientific-academic-writing/assets/imrad_standard.md +181 -0
  10. package/src/.github/skills/scientific-academic-writing/assets/nature_article.md +179 -0
  11. package/src/.github/skills/scientific-academic-writing/assets/qiita_technical_article.md +385 -0
  12. package/src/.github/skills/scientific-academic-writing/assets/science_research_article.md +169 -0
  13. package/src/.github/skills/scientific-bioinformatics/SKILL.md +220 -0
  14. package/src/.github/skills/scientific-biosignal-processing/SKILL.md +357 -0
  15. package/src/.github/skills/scientific-causal-inference/SKILL.md +347 -0
  16. package/src/.github/skills/scientific-cheminformatics/SKILL.md +196 -0
  17. package/src/.github/skills/scientific-data-preprocessing/SKILL.md +413 -0
  18. package/src/.github/skills/scientific-data-simulation/SKILL.md +244 -0
  19. package/src/.github/skills/scientific-doe/SKILL.md +360 -0
  20. package/src/.github/skills/scientific-eda-correlation/SKILL.md +141 -0
  21. package/src/.github/skills/scientific-feature-importance/SKILL.md +208 -0
  22. package/src/.github/skills/scientific-image-analysis/SKILL.md +310 -0
  23. package/src/.github/skills/scientific-materials-characterization/SKILL.md +368 -0
  24. package/src/.github/skills/scientific-meta-analysis/SKILL.md +352 -0
  25. package/src/.github/skills/scientific-metabolomics/SKILL.md +326 -0
  26. package/src/.github/skills/scientific-ml-classification/SKILL.md +265 -0
  27. package/src/.github/skills/scientific-ml-regression/SKILL.md +215 -0
  28. package/src/.github/skills/scientific-multi-omics/SKILL.md +303 -0
  29. package/src/.github/skills/scientific-network-analysis/SKILL.md +257 -0
  30. package/src/.github/skills/scientific-pca-tsne/SKILL.md +235 -0
  31. package/src/.github/skills/scientific-pipeline-scaffold/SKILL.md +331 -0
  32. package/src/.github/skills/scientific-process-optimization/SKILL.md +215 -0
  33. package/src/.github/skills/scientific-publication-figures/SKILL.md +208 -0
  34. package/src/.github/skills/scientific-sequence-analysis/SKILL.md +389 -0
  35. package/src/.github/skills/scientific-spectral-signal/SKILL.md +227 -0
  36. package/src/.github/skills/scientific-statistical-testing/SKILL.md +240 -0
  37. package/src/.github/skills/scientific-survival-clinical/SKILL.md +239 -0
  38. package/src/.github/skills/scientific-time-series/SKILL.md +291 -0
@@ -0,0 +1,240 @@
1
+ ---
2
+ name: scientific-statistical-testing
3
+ description: |
4
+ 統計検定・多重比較・エンリッチメント解析のスキル。t検定、カイ二乗検定、ANOVA、
5
+ Bonferroni/BH 補正、Fisher 正確検定、ベイズ推論を行う際に使用。
6
+ Scientific Skills Exp-03, 04, 06, 07 で確立したパターン。
7
+ ---
8
+
9
+ # Scientific Statistical Testing & Enrichment Analysis
10
+
11
+ 仮説検定、多重比較補正、エンリッチメント解析のための統計パイプラインスキル。
12
+ 頻度論的検定とベイズ推論の両方のアプローチを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 2 群間の有意差を検定したいとき(t 検定、Mann-Whitney U)
17
+ - 多群間の比較(ANOVA、Kruskal-Wallis)
18
+ - 多重比較の補正(Bonferroni、Benjamini-Hochberg)
19
+ - パスウェイエンリッチメント解析(Fisher 正確検定)
20
+ - ベイズ推論(Beta-Binomial 共役モデル)
21
+
22
+ ## Quick Start
23
+
24
+ ## 標準パイプライン
25
+
26
+ ### 1. 2 群間検定
27
+
28
+ ```python
29
+ from scipy import stats
30
+ import numpy as np
31
+ import pandas as pd
32
+
33
+ def two_group_test(group1, group2, test="auto", alternative="two-sided"):
34
+ """
35
+ 2 群間の検定を実行する。
36
+ test='auto' の場合、正規性検定に基づいて t 検定 or Mann-Whitney U を選択。
37
+ """
38
+ # 正規性検定(Shapiro-Wilk)
39
+ if test == "auto":
40
+ _, p1 = stats.shapiro(group1) if len(group1) <= 5000 else (0, 0.05)
41
+ _, p2 = stats.shapiro(group2) if len(group2) <= 5000 else (0, 0.05)
42
+ test = "ttest" if (p1 > 0.05 and p2 > 0.05) else "mannwhitney"
43
+
44
+ if test == "ttest":
45
+ stat, pval = stats.ttest_ind(group1, group2, alternative=alternative)
46
+ test_name = "Welch's t-test"
47
+ elif test == "mannwhitney":
48
+ stat, pval = stats.mannwhitneyu(group1, group2, alternative=alternative)
49
+ test_name = "Mann-Whitney U"
50
+ else:
51
+ raise ValueError(f"Unknown test: {test}")
52
+
53
+ # 効果量(Cohen's d)
54
+ pooled_std = np.sqrt((np.var(group1, ddof=1) + np.var(group2, ddof=1)) / 2)
55
+ cohens_d = (np.mean(group1) - np.mean(group2)) / pooled_std if pooled_std > 0 else 0
56
+
57
+ return {
58
+ "test": test_name,
59
+ "statistic": stat,
60
+ "p_value": pval,
61
+ "cohens_d": cohens_d,
62
+ "effect_size": ("large" if abs(cohens_d) > 0.8 else
63
+ "medium" if abs(cohens_d) > 0.5 else "small"),
64
+ }
65
+ ```
66
+
67
+ ### 2. 多重比較補正
68
+
69
+ ```python
70
+ from statsmodels.stats.multitest import multipletests
71
+
72
+ def multiple_testing_correction(p_values, method="fdr_bh", alpha=0.05):
73
+ """
74
+ 多重比較補正を適用する。
75
+ method: 'bonferroni', 'fdr_bh' (Benjamini-Hochberg), 'holm'
76
+ """
77
+ reject, p_corrected, _, _ = multipletests(p_values, alpha=alpha, method=method)
78
+ return reject, p_corrected
79
+ ```
80
+
81
+ ### 3. ANOVA / Kruskal-Wallis
82
+
83
+ ```python
84
+ def multi_group_test(groups, test="auto"):
85
+ """
86
+ 多群間の比較を実行する。
87
+ groups: [array1, array2, ...] のリスト
88
+ """
89
+ # 正規性チェック
90
+ normal = all(stats.shapiro(g)[1] > 0.05 for g in groups if len(g) <= 5000)
91
+
92
+ if test == "auto":
93
+ test = "anova" if normal else "kruskal"
94
+
95
+ if test == "anova":
96
+ stat, pval = stats.f_oneway(*groups)
97
+ test_name = "One-way ANOVA"
98
+ elif test == "kruskal":
99
+ stat, pval = stats.kruskal(*groups)
100
+ test_name = "Kruskal-Wallis"
101
+
102
+ return {"test": test_name, "statistic": stat, "p_value": pval}
103
+ ```
104
+
105
+ ### 4. Fisher 正確検定パスウェイエンリッチメント(Exp-04, 07)
106
+
107
+ ```python
108
+ def pathway_enrichment(deg_list, pathway_dict, background_size,
109
+ method="fisher", correction="fdr_bh"):
110
+ """
111
+ Fisher 正確検定によるパスウェイエンリッチメント解析。
112
+ deg_list: 差次発現遺伝子リスト
113
+ pathway_dict: {pathway_name: [gene1, gene2, ...]} の辞書
114
+ background_size: バックグラウンド遺伝子数
115
+ """
116
+ results = []
117
+ deg_set = set(deg_list)
118
+
119
+ for pathway, genes in pathway_dict.items():
120
+ gene_set = set(genes)
121
+ overlap = deg_set & gene_set
122
+
123
+ # 2×2 分割表
124
+ a = len(overlap) # DEG ∩ Pathway
125
+ b = len(deg_set) - a # DEG ∩ ~Pathway
126
+ c = len(gene_set) - a # ~DEG ∩ Pathway
127
+ d = background_size - a - b - c # ~DEG ∩ ~Pathway
128
+
129
+ _, pval = stats.fisher_exact([[a, b], [c, d]], alternative="greater")
130
+ fold_enrichment = (a / len(deg_set)) / (len(gene_set) / background_size) \
131
+ if len(gene_set) > 0 and len(deg_set) > 0 else 0
132
+
133
+ results.append({
134
+ "Pathway": pathway,
135
+ "Overlap": a,
136
+ "Pathway_Size": len(gene_set),
137
+ "Fold_Enrichment": fold_enrichment,
138
+ "p_value": pval,
139
+ "Genes": ", ".join(sorted(overlap)),
140
+ })
141
+
142
+ results_df = pd.DataFrame(results)
143
+
144
+ # 多重検定補正
145
+ if len(results_df) > 0:
146
+ reject, p_adj = multiple_testing_correction(
147
+ results_df["p_value"].values, method=correction
148
+ )
149
+ results_df["p_adjusted"] = p_adj
150
+ results_df["Significant"] = reject
151
+
152
+ results_df = results_df.sort_values("p_value")
153
+ results_df.to_csv("results/pathway_enrichment.csv", index=False)
154
+ return results_df
155
+ ```
156
+
157
+ ### 5. ベイズ推論(Beta-Binomial, Exp-06 パターン)
158
+
159
+ ```python
160
+ def bayesian_beta_binomial(successes, trials, prior_alpha=1, prior_beta=1):
161
+ """
162
+ Beta-Binomial 共役モデルによるベイズ推論。
163
+ 事前分布: Beta(alpha, beta), デフォルトは一様事前分布。
164
+ """
165
+ post_alpha = prior_alpha + successes
166
+ post_beta = prior_beta + (trials - successes)
167
+
168
+ from scipy.stats import beta
169
+ posterior = beta(post_alpha, post_beta)
170
+
171
+ return {
172
+ "posterior_mean": posterior.mean(),
173
+ "posterior_std": posterior.std(),
174
+ "95%_CI": (posterior.ppf(0.025), posterior.ppf(0.975)),
175
+ "MAP": (post_alpha - 1) / (post_alpha + post_beta - 2)
176
+ if post_alpha > 1 and post_beta > 1 else posterior.mean(),
177
+ "posterior_alpha": post_alpha,
178
+ "posterior_beta": post_beta,
179
+ }
180
+ ```
181
+
182
+ ### 6. 生存解析(Kaplan-Meier + Cox PH, Exp-03/06)
183
+
184
+ ```python
185
+ def survival_analysis(df, time_col, event_col, group_col):
186
+ """
187
+ Kaplan-Meier 生存曲線と Log-rank 検定を実行する。
188
+ lifelines ライブラリが必要。
189
+ """
190
+ from lifelines import KaplanMeierFitter, CoxPHFitter
191
+ from lifelines.statistics import logrank_test
192
+ import matplotlib.pyplot as plt
193
+
194
+ groups = df[group_col].unique()
195
+ fig, ax = plt.subplots(figsize=(8, 6))
196
+ kmf = KaplanMeierFitter()
197
+
198
+ for group in sorted(groups):
199
+ mask = df[group_col] == group
200
+ kmf.fit(df.loc[mask, time_col], event_observed=df.loc[mask, event_col],
201
+ label=str(group))
202
+ kmf.plot_survival_function(ax=ax)
203
+
204
+ # Log-rank 検定(2 群の場合)
205
+ if len(groups) == 2:
206
+ g1 = df[df[group_col] == groups[0]]
207
+ g2 = df[df[group_col] == groups[1]]
208
+ lr = logrank_test(g1[time_col], g2[time_col],
209
+ event_observed_A=g1[event_col],
210
+ event_observed_B=g2[event_col])
211
+ ax.text(0.7, 0.9, f"Log-rank p={lr.p_value:.4f}",
212
+ transform=ax.transAxes, fontsize=10)
213
+
214
+ ax.set_xlabel("Time")
215
+ ax.set_ylabel("Survival Probability")
216
+ ax.set_title("Kaplan-Meier Survival Curves", fontweight="bold")
217
+ plt.tight_layout()
218
+ plt.savefig("figures/kaplan_meier.png", dpi=300, bbox_inches="tight")
219
+ plt.close()
220
+
221
+ return lr.p_value if len(groups) == 2 else None
222
+ ```
223
+
224
+ ## References
225
+
226
+ ### Output Files
227
+
228
+ | ファイル | 形式 |
229
+ |---|---|
230
+ | `results/pathway_enrichment.csv` | CSV |
231
+ | `results/statistical_tests.csv` | CSV |
232
+ | `figures/kaplan_meier.png` | PNG |
233
+ | `figures/enrichment_dotplot.png` | PNG |
234
+
235
+ #### 参照実験
236
+
237
+ - **Exp-03**: Mann-Whitney U + Volcano Plot + 生存解析
238
+ - **Exp-04**: Fisher パスウェイエンリッチメント + Louvain コミュニティ
239
+ - **Exp-06**: 頻度論 + ベイズ推論 + 検出力分析
240
+ - **Exp-07**: Welch t 検定 + BH 補正 + PLS-DA VIP
@@ -0,0 +1,239 @@
1
+ ---
2
+ name: scientific-survival-clinical
3
+ description: |
4
+ 生存解析と臨床統計のスキル。Kaplan-Meier 曲線、Cox 比例ハザードモデル、Log-rank 検定、
5
+ 検出力分析、NNT/NNH 算出を行う際に使用。
6
+ Scientific Skills Exp-03, 06 で確立したパターン。
7
+ ---
8
+
9
+ # Scientific Survival & Clinical Statistics
10
+
11
+ 臨床試験データの統計解析パイプラインスキル。生存時間解析、検出力分析、
12
+ 安全性解析の標準ワークフローを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 生存時間解析(Kaplan-Meier, Cox PH)を行いたいとき
17
+ - 臨床試験のサンプルサイズ・検出力を計算したいとき
18
+ - 有害事象の安全性解析(RR, OR, NNT, NNH)を行いたいとき
19
+ - ベイズ逐次更新による試験モニタリング
20
+
21
+ ## Quick Start
22
+
23
+ ## 標準パイプライン
24
+
25
+ ### 1. 検出力分析・サンプルサイズ算出
26
+
27
+ ```python
28
+ from statsmodels.stats.power import TTestIndPower
29
+ import numpy as np
30
+
31
+ def power_analysis(effect_size, alpha=0.05, power=0.80, ratio=1.0):
32
+ """
33
+ 2 群間 t 検定の検出力分析。
34
+ 必要サンプルサイズまたは達成検出力を算出する。
35
+ """
36
+ analysis = TTestIndPower()
37
+
38
+ # サンプルサイズ算出
39
+ n = analysis.solve_power(effect_size=effect_size, alpha=alpha,
40
+ power=power, ratio=ratio, alternative="two-sided")
41
+
42
+ # 検出力カーブ
43
+ n_range = np.arange(10, 500, 10)
44
+ powers = [analysis.solve_power(effect_size=effect_size, nobs1=n1,
45
+ alpha=alpha, ratio=ratio)
46
+ for n1 in n_range]
47
+
48
+ return {
49
+ "required_n_per_group": int(np.ceil(n)),
50
+ "effect_size": effect_size,
51
+ "alpha": alpha,
52
+ "target_power": power,
53
+ "power_curve": {"n": n_range.tolist(), "power": powers},
54
+ }
55
+ ```
56
+
57
+ ### 2. Kaplan-Meier + Log-rank 検定
58
+
59
+ ```python
60
+ import matplotlib.pyplot as plt
61
+
62
+ def kaplan_meier_analysis(df, time_col, event_col, group_col,
63
+ figsize=(10, 7)):
64
+ """
65
+ Kaplan-Meier 生存曲線と Log-rank 検定を実行する。
66
+ """
67
+ from lifelines import KaplanMeierFitter
68
+ from lifelines.statistics import logrank_test
69
+
70
+ fig, ax = plt.subplots(figsize=figsize)
71
+ groups = sorted(df[group_col].unique())
72
+ results = {}
73
+
74
+ kmf = KaplanMeierFitter()
75
+ for group in groups:
76
+ mask = df[group_col] == group
77
+ kmf.fit(df.loc[mask, time_col],
78
+ event_observed=df.loc[mask, event_col],
79
+ label=str(group))
80
+ kmf.plot_survival_function(ax=ax, ci_show=True)
81
+ results[group] = {
82
+ "median_survival": kmf.median_survival_time_,
83
+ "n": mask.sum(),
84
+ }
85
+
86
+ # Log-rank 検定
87
+ if len(groups) == 2:
88
+ g1 = df[df[group_col] == groups[0]]
89
+ g2 = df[df[group_col] == groups[1]]
90
+ lr = logrank_test(
91
+ g1[time_col], g2[time_col],
92
+ event_observed_A=g1[event_col],
93
+ event_observed_B=g2[event_col]
94
+ )
95
+ results["logrank_p"] = lr.p_value
96
+ ax.text(0.65, 0.85, f"Log-rank p = {lr.p_value:.4f}",
97
+ transform=ax.transAxes, fontsize=11,
98
+ bbox=dict(boxstyle="round,pad=0.3", facecolor="wheat"))
99
+
100
+ ax.set_xlabel("Time", fontsize=12)
101
+ ax.set_ylabel("Survival Probability", fontsize=12)
102
+ ax.set_title("Kaplan-Meier Survival Curves", fontsize=14, fontweight="bold")
103
+ ax.set_ylim(0, 1.05)
104
+ plt.tight_layout()
105
+ plt.savefig("figures/kaplan_meier.png", dpi=300, bbox_inches="tight")
106
+ plt.close()
107
+ return results
108
+ ```
109
+
110
+ ### 3. Cox 比例ハザードモデル
111
+
112
+ ```python
113
+ def cox_proportional_hazard(df, time_col, event_col, covariates):
114
+ """
115
+ Cox 比例ハザードモデルを学習し、ハザード比を算出する。
116
+ """
117
+ from lifelines import CoxPHFitter
118
+
119
+ cph = CoxPHFitter()
120
+ cox_df = df[[time_col, event_col] + covariates].dropna()
121
+ cph.fit(cox_df, duration_col=time_col, event_col=event_col)
122
+
123
+ # ハザード比
124
+ summary = cph.summary
125
+ summary.to_csv("results/cox_ph_results.csv")
126
+
127
+ # Forest plot 形式の可視化
128
+ fig, ax = plt.subplots(figsize=(10, max(4, len(covariates) * 0.8)))
129
+ cph.plot(ax=ax)
130
+ ax.set_title("Cox PH: Hazard Ratios", fontweight="bold")
131
+ ax.axvline(x=0, color="gray", linestyle="--")
132
+ plt.tight_layout()
133
+ plt.savefig("figures/cox_ph_forest.png", dpi=300, bbox_inches="tight")
134
+ plt.close()
135
+
136
+ return cph, summary
137
+ ```
138
+
139
+ ### 4. 安全性解析(RR, OR, NNT, NNH)
140
+
141
+ ```python
142
+ def safety_analysis(n_event_treatment, n_total_treatment,
143
+ n_event_control, n_total_control, event_name="AE"):
144
+ """有害事象の安全性指標を算出する。"""
145
+ p_t = n_event_treatment / n_total_treatment
146
+ p_c = n_event_control / n_total_control
147
+
148
+ # 相対リスク
149
+ rr = p_t / p_c if p_c > 0 else np.inf
150
+
151
+ # オッズ比
152
+ odds_t = p_t / (1 - p_t) if p_t < 1 else np.inf
153
+ odds_c = p_c / (1 - p_c) if p_c < 1 else np.inf
154
+ odds_ratio = odds_t / odds_c if odds_c > 0 else np.inf
155
+
156
+ # NNT / NNH
157
+ ard = abs(p_t - p_c)
158
+ nnt_nnh = 1 / ard if ard > 0 else np.inf
159
+ metric = "NNH" if p_t > p_c else "NNT"
160
+
161
+ return {
162
+ "Event": event_name,
163
+ "Rate_Treatment": f"{p_t:.3f}",
164
+ "Rate_Control": f"{p_c:.3f}",
165
+ "Relative_Risk": f"{rr:.3f}",
166
+ "Odds_Ratio": f"{odds_ratio:.3f}",
167
+ "ARD": f"{ard:.3f}",
168
+ metric: f"{nnt_nnh:.1f}",
169
+ }
170
+ ```
171
+
172
+ ### 5. ベイズ逐次更新(Exp-06 パターン)
173
+
174
+ ```python
175
+ from scipy.stats import beta as beta_dist
176
+
177
+ def bayesian_sequential_update(successes_list, trials_list,
178
+ prior_alpha=1, prior_beta=1,
179
+ figsize=(12, 6)):
180
+ """
181
+ Beta-Binomial 共役モデルによる逐次ベイズ更新の可視化。
182
+ successes_list / trials_list: 各中間解析時点の累積値
183
+ """
184
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize)
185
+ x = np.linspace(0, 1, 500)
186
+
187
+ alpha, beta = prior_alpha, prior_beta
188
+
189
+ for i, (s, n) in enumerate(zip(successes_list, trials_list)):
190
+ alpha_post = alpha + s
191
+ beta_post = beta + (n - s)
192
+
193
+ posterior = beta_dist(alpha_post, beta_post)
194
+ ax1.plot(x, posterior.pdf(x), linewidth=2,
195
+ label=f"Interim {i+1} (n={n})")
196
+
197
+ alpha, beta = alpha_post, beta_post
198
+
199
+ ax1.set_xlabel("Response Rate")
200
+ ax1.set_ylabel("Density")
201
+ ax1.set_title("Bayesian Sequential Update", fontweight="bold")
202
+ ax1.legend()
203
+
204
+ # 信用区間の推移
205
+ ci_lower = [beta_dist(prior_alpha + s, prior_beta + n - s).ppf(0.025)
206
+ for s, n in zip(successes_list, trials_list)]
207
+ ci_upper = [beta_dist(prior_alpha + s, prior_beta + n - s).ppf(0.975)
208
+ for s, n in zip(successes_list, trials_list)]
209
+ means = [beta_dist(prior_alpha + s, prior_beta + n - s).mean()
210
+ for s, n in zip(successes_list, trials_list)]
211
+
212
+ ax2.fill_between(range(1, len(means) + 1), ci_lower, ci_upper,
213
+ alpha=0.3, color="steelblue")
214
+ ax2.plot(range(1, len(means) + 1), means, "bo-", linewidth=2)
215
+ ax2.set_xlabel("Interim Analysis")
216
+ ax2.set_ylabel("Posterior Mean (95% CI)")
217
+ ax2.set_title("Credible Interval Evolution", fontweight="bold")
218
+
219
+ plt.tight_layout()
220
+ plt.savefig("figures/bayesian_update.png", dpi=300, bbox_inches="tight")
221
+ plt.close()
222
+ ```
223
+
224
+ ## References
225
+
226
+ ### Output Files
227
+
228
+ | ファイル | 形式 |
229
+ |---|---|
230
+ | `results/cox_ph_results.csv` | CSV |
231
+ | `results/safety_analysis.csv` | CSV |
232
+ | `figures/kaplan_meier.png` | PNG |
233
+ | `figures/cox_ph_forest.png` | PNG |
234
+ | `figures/bayesian_update.png` | PNG |
235
+
236
+ #### 参照実験
237
+
238
+ - **Exp-03**: Kaplan-Meier + Cox PH(がん生存解析)
239
+ - **Exp-06**: Phase III RCT 統計解析(検出力、頻度論+ベイズ、安全性)