@nahisaho/satori 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/LICENCE +0 -0
  2. package/README.md +191 -0
  3. package/bin/satori.js +95 -0
  4. package/package.json +29 -0
  5. package/src/.github/skills/scientific-academic-writing/SKILL.md +361 -0
  6. package/src/.github/skills/scientific-academic-writing/assets/acs_article.md +199 -0
  7. package/src/.github/skills/scientific-academic-writing/assets/elsevier_article.md +244 -0
  8. package/src/.github/skills/scientific-academic-writing/assets/ieee_transactions.md +212 -0
  9. package/src/.github/skills/scientific-academic-writing/assets/imrad_standard.md +181 -0
  10. package/src/.github/skills/scientific-academic-writing/assets/nature_article.md +179 -0
  11. package/src/.github/skills/scientific-academic-writing/assets/qiita_technical_article.md +385 -0
  12. package/src/.github/skills/scientific-academic-writing/assets/science_research_article.md +169 -0
  13. package/src/.github/skills/scientific-bioinformatics/SKILL.md +220 -0
  14. package/src/.github/skills/scientific-biosignal-processing/SKILL.md +357 -0
  15. package/src/.github/skills/scientific-causal-inference/SKILL.md +347 -0
  16. package/src/.github/skills/scientific-cheminformatics/SKILL.md +196 -0
  17. package/src/.github/skills/scientific-data-preprocessing/SKILL.md +413 -0
  18. package/src/.github/skills/scientific-data-simulation/SKILL.md +244 -0
  19. package/src/.github/skills/scientific-doe/SKILL.md +360 -0
  20. package/src/.github/skills/scientific-eda-correlation/SKILL.md +141 -0
  21. package/src/.github/skills/scientific-feature-importance/SKILL.md +208 -0
  22. package/src/.github/skills/scientific-image-analysis/SKILL.md +310 -0
  23. package/src/.github/skills/scientific-materials-characterization/SKILL.md +368 -0
  24. package/src/.github/skills/scientific-meta-analysis/SKILL.md +352 -0
  25. package/src/.github/skills/scientific-metabolomics/SKILL.md +326 -0
  26. package/src/.github/skills/scientific-ml-classification/SKILL.md +265 -0
  27. package/src/.github/skills/scientific-ml-regression/SKILL.md +215 -0
  28. package/src/.github/skills/scientific-multi-omics/SKILL.md +303 -0
  29. package/src/.github/skills/scientific-network-analysis/SKILL.md +257 -0
  30. package/src/.github/skills/scientific-pca-tsne/SKILL.md +235 -0
  31. package/src/.github/skills/scientific-pipeline-scaffold/SKILL.md +331 -0
  32. package/src/.github/skills/scientific-process-optimization/SKILL.md +215 -0
  33. package/src/.github/skills/scientific-publication-figures/SKILL.md +208 -0
  34. package/src/.github/skills/scientific-sequence-analysis/SKILL.md +389 -0
  35. package/src/.github/skills/scientific-spectral-signal/SKILL.md +227 -0
  36. package/src/.github/skills/scientific-statistical-testing/SKILL.md +240 -0
  37. package/src/.github/skills/scientific-survival-clinical/SKILL.md +239 -0
  38. package/src/.github/skills/scientific-time-series/SKILL.md +291 -0
@@ -0,0 +1,347 @@
1
+ ---
2
+ name: scientific-causal-inference
3
+ description: |
4
+ 因果推論スキル。傾向スコアマッチング(PSM)、逆確率重み付け(IPW / IPTW)、
5
+ 操作変数法(2SLS)、差分の差分法(DID)、回帰不連続デザイン(RDD)、
6
+ DAG ベースの共変量選択(backdoor criterion)、感度分析テンプレートを提供。
7
+ ---
8
+
9
+ # Scientific Causal Inference
10
+
11
+ 観察データから因果効果を推定するための統計的手法パイプライン。
12
+ RCT が実施できない状況で交絡因子を調整し、因果的解釈を可能にする。
13
+
14
+ ## When to Use
15
+
16
+ - 観察データから因果効果(ATE / ATT)を推定したいとき
17
+ - 交絡因子の調整が必要なとき
18
+ - 傾向スコアによるマッチングや重み付けが必要なとき
19
+ - 自然実験データ(DID / RDD)を分析するとき
20
+ - DAG を描いて因果構造を明示化するとき
21
+
22
+ ---
23
+
24
+ ## Quick Start
25
+
26
+ ## 1. DAG(有向非巡回グラフ)の定義
27
+
28
+ ```python
29
+ import networkx as nx
30
+ import matplotlib.pyplot as plt
31
+
32
+ def define_causal_dag(edges, treatment, outcome, figsize=(10, 6)):
33
+ """
34
+ 因果 DAG を定義し可視化する。
35
+
36
+ Parameters:
37
+ edges: list of (cause, effect) tuples
38
+ treatment: 処置変数名
39
+ outcome: アウトカム変数名
40
+
41
+ Example:
42
+ edges = [("Age", "Treatment"), ("Age", "Outcome"),
43
+ ("Treatment", "Outcome"), ("Gender", "Treatment")]
44
+ """
45
+ G = nx.DiGraph()
46
+ G.add_edges_from(edges)
47
+
48
+ # 色分け
49
+ color_map = []
50
+ for node in G.nodes():
51
+ if node == treatment:
52
+ color_map.append("#FF6B6B")
53
+ elif node == outcome:
54
+ color_map.append("#4ECDC4")
55
+ else:
56
+ color_map.append("#95E1D3")
57
+
58
+ fig, ax = plt.subplots(figsize=figsize)
59
+ pos = nx.spring_layout(G, k=2, seed=42)
60
+ nx.draw(G, pos, ax=ax, with_labels=True, node_color=color_map,
61
+ node_size=2000, font_size=11, font_weight="bold",
62
+ edge_color="gray", arrows=True, arrowsize=20, width=2)
63
+ ax.set_title("Causal DAG", fontweight="bold", fontsize=14)
64
+ plt.tight_layout()
65
+ plt.savefig("figures/causal_dag.png", dpi=300, bbox_inches="tight")
66
+ plt.close()
67
+
68
+ return G
69
+
70
+
71
+ def identify_confounders(dag, treatment, outcome):
72
+ """
73
+ Backdoor criterion に基づく共変量の同定。
74
+ 処置→アウトカムのバックドアパスを遮断するために調整すべき変数を返す。
75
+ """
76
+ # 簡易版: treatment の親ノードのうち、outcome にもパスがある変数
77
+ parents_of_treatment = set(dag.predecessors(treatment))
78
+ confounders = set()
79
+
80
+ for parent in parents_of_treatment:
81
+ if nx.has_path(dag, parent, outcome):
82
+ confounders.add(parent)
83
+
84
+ return confounders
85
+ ```
86
+
87
+ ## 2. 傾向スコアマッチング (PSM)
88
+
89
+ ```python
90
+ import numpy as np
91
+ import pandas as pd
92
+ from sklearn.linear_model import LogisticRegression
93
+ from scipy.spatial.distance import cdist
94
+
95
+ def propensity_score_matching(df, treatment_col, covariates, outcome_col,
96
+ caliper=0.2, n_matches=1):
97
+ """
98
+ 傾向スコアマッチング。
99
+
100
+ Steps:
101
+ 1. ロジスティック回帰で傾向スコア P(T=1|X) を推定
102
+ 2. 最近傍マッチング(キャリパー制約付き)
103
+ 3. マッチング後の共変量バランスチェック
104
+ 4. ATE / ATT の推定
105
+
106
+ Parameters:
107
+ caliper: マッチング許容距離(傾向スコアの標準偏差の倍率)
108
+ """
109
+ # Step 1: 傾向スコア推定
110
+ X = df[covariates].values
111
+ T = df[treatment_col].values
112
+
113
+ lr = LogisticRegression(max_iter=1000, random_state=42)
114
+ lr.fit(X, T)
115
+ ps = lr.predict_proba(X)[:, 1]
116
+ df = df.copy()
117
+ df["propensity_score"] = ps
118
+
119
+ # Step 2: マッチング
120
+ treated_idx = df[df[treatment_col] == 1].index
121
+ control_idx = df[df[treatment_col] == 0].index
122
+
123
+ ps_treated = ps[treated_idx]
124
+ ps_control = ps[control_idx]
125
+
126
+ caliper_val = caliper * np.std(ps)
127
+ matched_pairs = []
128
+
129
+ for i, t_idx in enumerate(treated_idx):
130
+ distances = np.abs(ps_treated[i] - ps_control)
131
+ within_caliper = np.where(distances <= caliper_val)[0]
132
+ if len(within_caliper) > 0:
133
+ best = within_caliper[np.argmin(distances[within_caliper])]
134
+ matched_pairs.append((t_idx, control_idx[best]))
135
+
136
+ print(f" Matched {len(matched_pairs)} / {len(treated_idx)} treated units")
137
+
138
+ # Step 3: バランスチェック (SMD)
139
+ matched_treated = df.loc[[p[0] for p in matched_pairs]]
140
+ matched_control = df.loc[[p[1] for p in matched_pairs]]
141
+
142
+ balance = []
143
+ for cov in covariates:
144
+ smd_before = _standardized_mean_diff(
145
+ df[df[treatment_col]==1][cov], df[df[treatment_col]==0][cov])
146
+ smd_after = _standardized_mean_diff(
147
+ matched_treated[cov], matched_control[cov])
148
+ balance.append({
149
+ "covariate": cov,
150
+ "SMD_before": smd_before,
151
+ "SMD_after": smd_after,
152
+ "balanced": abs(smd_after) < 0.1,
153
+ })
154
+
155
+ balance_df = pd.DataFrame(balance)
156
+
157
+ # Step 4: ATT 推定
158
+ att = matched_treated[outcome_col].mean() - matched_control[outcome_col].mean()
159
+
160
+ return {
161
+ "ATT": att,
162
+ "n_matched": len(matched_pairs),
163
+ "balance": balance_df,
164
+ "propensity_scores": ps,
165
+ "matched_pairs": matched_pairs,
166
+ }
167
+
168
+
169
+ def _standardized_mean_diff(x1, x2):
170
+ """Standardized Mean Difference (SMD) = |μ1 - μ2| / sqrt((s1² + s2²)/2)"""
171
+ return abs(x1.mean() - x2.mean()) / np.sqrt((x1.var() + x2.var()) / 2 + 1e-10)
172
+ ```
173
+
174
+ ## 3. 逆確率重み付け (IPW / IPTW)
175
+
176
+ ```python
177
+ def inverse_probability_weighting(df, treatment_col, covariates, outcome_col):
178
+ """
179
+ 逆確率重み付け推定量 (IPTW - Inverse Probability of Treatment Weighting)。
180
+
181
+ ATE = E[Y(1)] - E[Y(0)]
182
+ = Σ (T·Y/PS) / Σ (T/PS) - Σ ((1-T)·Y/(1-PS)) / Σ ((1-T)/(1-PS))
183
+ """
184
+ X = df[covariates].values
185
+ T = df[treatment_col].values
186
+ Y = df[outcome_col].values
187
+
188
+ # 傾向スコア推定
189
+ lr = LogisticRegression(max_iter=1000, random_state=42)
190
+ lr.fit(X, T)
191
+ ps = lr.predict_proba(X)[:, 1]
192
+
193
+ # 重み計算(安定化重み)
194
+ w_treated = T / (ps + 1e-10)
195
+ w_control = (1 - T) / (1 - ps + 1e-10)
196
+
197
+ # ATE
198
+ E_Y1 = np.sum(w_treated * Y) / np.sum(w_treated)
199
+ E_Y0 = np.sum(w_control * Y) / np.sum(w_control)
200
+ ate = E_Y1 - E_Y0
201
+
202
+ # ATT
203
+ att = np.mean(T * Y) - np.sum((1-T) * ps / (1-ps+1e-10) * Y) / np.sum((1-T) * ps / (1-ps+1e-10))
204
+
205
+ return {
206
+ "ATE": ate,
207
+ "ATT": att,
208
+ "E_Y1": E_Y1,
209
+ "E_Y0": E_Y0,
210
+ "propensity_scores": ps,
211
+ "weights_treated": w_treated,
212
+ "weights_control": w_control,
213
+ }
214
+ ```
215
+
216
+ ## 4. 差分の差分法 (DID)
217
+
218
+ ```python
219
+ import statsmodels.api as sm
220
+
221
+ def difference_in_differences(df, time_col, treatment_col, outcome_col,
222
+ covariates=None):
223
+ """
224
+ 差分の差分法 (Difference-in-Differences)。
225
+
226
+ Y = β0 + β1·Post + β2·Treat + β3·(Post × Treat) + ε
227
+ β3 が因果効果(DID 推定量)
228
+ """
229
+ df = df.copy()
230
+ df["interaction"] = df[time_col] * df[treatment_col]
231
+
232
+ X_cols = [time_col, treatment_col, "interaction"]
233
+ if covariates:
234
+ X_cols += covariates
235
+
236
+ X = sm.add_constant(df[X_cols])
237
+ model = sm.OLS(df[outcome_col], X).fit()
238
+
239
+ return {
240
+ "DID_estimate": model.params["interaction"],
241
+ "DID_se": model.bse["interaction"],
242
+ "DID_pvalue": model.pvalues["interaction"],
243
+ "DID_ci_95": model.conf_int().loc["interaction"].tolist(),
244
+ "model_summary": model.summary2().tables[1],
245
+ }
246
+ ```
247
+
248
+ ## 5. 回帰不連続デザイン (RDD)
249
+
250
+ ```python
251
+ def regression_discontinuity(df, running_var, outcome_col, cutoff,
252
+ bandwidth=None, kernel="triangular"):
253
+ """
254
+ 回帰不連続デザイン (Sharp RDD)。
255
+
256
+ カットオフ前後での局所回帰による処置効果の推定。
257
+
258
+ Parameters:
259
+ running_var: 強制変数(running variable)列名
260
+ cutoff: カットオフ値
261
+ bandwidth: バンド幅(None = IK 最適バンド幅)
262
+ """
263
+ df = df.copy()
264
+ df["centered"] = df[running_var] - cutoff
265
+ df["treated"] = (df[running_var] >= cutoff).astype(int)
266
+
267
+ if bandwidth is None:
268
+ bandwidth = 1.5 * df["centered"].std()
269
+
270
+ # バンド幅内のデータ
271
+ in_band = df[df["centered"].abs() <= bandwidth]
272
+
273
+ # カーネル重み
274
+ if kernel == "triangular":
275
+ weights = 1 - np.abs(in_band["centered"]) / bandwidth
276
+ else:
277
+ weights = np.ones(len(in_band))
278
+
279
+ # 局所線形回帰
280
+ X = sm.add_constant(in_band[["centered", "treated"]])
281
+ X["interaction"] = in_band["centered"] * in_band["treated"]
282
+ model = sm.WLS(in_band[outcome_col], X, weights=weights).fit()
283
+
284
+ return {
285
+ "RDD_estimate": model.params["treated"],
286
+ "RDD_se": model.bse["treated"],
287
+ "RDD_pvalue": model.pvalues["treated"],
288
+ "RDD_ci_95": model.conf_int().loc["treated"].tolist(),
289
+ "bandwidth": bandwidth,
290
+ "n_in_bandwidth": len(in_band),
291
+ }
292
+ ```
293
+
294
+ ## 6. 感度分析(Rosenbaum Bounds)
295
+
296
+ ```python
297
+ def rosenbaum_sensitivity(matched_outcomes_treated, matched_outcomes_control,
298
+ gamma_range=None):
299
+ """
300
+ Rosenbaum 感度分析。
301
+ 隠れた交絡の影響度 Γ に対する因果推定の頑健性を評価。
302
+
303
+ Γ = 1: 交絡なし仮定
304
+ Γ > 1: 隠れた交絡がΓ倍のオッズ比を持つ場合
305
+ """
306
+ from scipy.stats import norm
307
+
308
+ if gamma_range is None:
309
+ gamma_range = np.arange(1.0, 3.1, 0.1)
310
+
311
+ diffs = matched_outcomes_treated - matched_outcomes_control
312
+ n = len(diffs)
313
+ signs = (diffs > 0).astype(int)
314
+ T_obs = np.sum(signs)
315
+
316
+ results = []
317
+ for gamma in gamma_range:
318
+ p_upper = gamma / (1 + gamma)
319
+ E_T = n * p_upper
320
+ Var_T = n * p_upper * (1 - p_upper)
321
+ z = (T_obs - E_T) / np.sqrt(Var_T + 1e-10)
322
+ p_value = 1 - norm.cdf(z)
323
+ results.append({"gamma": gamma, "z_statistic": z, "p_value": p_value})
324
+
325
+ return pd.DataFrame(results)
326
+ ```
327
+
328
+ ## References
329
+
330
+ ### Output Files
331
+
332
+ | ファイル | 形式 |
333
+ |---|---|
334
+ | `results/causal_estimates.csv` | CSV |
335
+ | `results/covariate_balance.csv` | CSV |
336
+ | `results/sensitivity_analysis.csv` | CSV |
337
+ | `figures/causal_dag.png` | PNG |
338
+ | `figures/propensity_distribution.png` | PNG |
339
+ | `figures/rdd_plot.png` | PNG |
340
+
341
+ #### 依存パッケージ
342
+
343
+ ```
344
+ statsmodels>=0.14
345
+ scikit-learn>=1.3
346
+ networkx>=3.0
347
+ ```
@@ -0,0 +1,196 @@
1
+ ---
2
+ name: scientific-cheminformatics
3
+ description: |
4
+ ケモインフォマティクス解析のスキル。RDKit を用いた分子記述子計算、Morgan フィンガープリント、
5
+ Tanimoto 類似度、構造アラート検出、Lipinski Rule of 5 評価を行う際に使用。
6
+ Scientific Skills Exp-02, 05 で確立したパターン。
7
+ ---
8
+
9
+ # Scientific Cheminformatics Analysis
10
+
11
+ RDKit を用いた分子解析パイプラインスキル。SMILES → 分子記述子 → SAR 解析 →
12
+ 毒性予測までの創薬ケモインフォマティクスワークフローを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 化合物の物理化学的性質を算出したいとき
17
+ - SMILES 文字列から分子記述子を計算したいとき
18
+ - 化合物間の構造類似度を評価したいとき
19
+ - 構造活性相関(SAR)を解析したいとき
20
+ - 構造アラート(トキシコフォア)を検出したいとき
21
+ - Lipinski Rule of 5 / ドラッグライクネスを評価したいとき
22
+
23
+ ## Quick Start
24
+
25
+ ## 標準パイプライン
26
+
27
+ ### 1. SMILES → 分子オブジェクト変換
28
+
29
+ ```python
30
+ from rdkit import Chem
31
+ from rdkit.Chem import Descriptors, AllChem, QED, Lipinski
32
+ from rdkit.Chem.Scaffolds import MurckoScaffold
33
+ import pandas as pd
34
+ import numpy as np
35
+
36
+ def smiles_to_mol(smiles):
37
+ """SMILES 文字列から RDKit 分子オブジェクトを生成する。"""
38
+ mol = Chem.MolFromSmiles(smiles)
39
+ if mol is None:
40
+ raise ValueError(f"Invalid SMILES: {smiles}")
41
+ return mol
42
+ ```
43
+
44
+ ### 2. 分子記述子の一括計算
45
+
46
+ ```python
47
+ def calculate_descriptors(smiles_list, names=None):
48
+ """
49
+ SMILES リストから主要な分子記述子を一括計算する。
50
+ 返値: DataFrame
51
+ """
52
+ records = []
53
+ for i, smi in enumerate(smiles_list):
54
+ mol = Chem.MolFromSmiles(smi)
55
+ if mol is None:
56
+ continue
57
+
58
+ record = {
59
+ "Name": names[i] if names else f"Mol_{i}",
60
+ "SMILES": smi,
61
+ "MW": Descriptors.MolWt(mol),
62
+ "LogP": Descriptors.MolLogP(mol),
63
+ "TPSA": Descriptors.TPSA(mol),
64
+ "HBA": Descriptors.NumHAcceptors(mol),
65
+ "HBD": Descriptors.NumHDonors(mol),
66
+ "RotBonds": Descriptors.NumRotatableBonds(mol),
67
+ "AromaticRings": Descriptors.NumAromaticRings(mol),
68
+ "HeavyAtoms": mol.GetNumHeavyAtoms(),
69
+ "QED": QED.qed(mol),
70
+ "Fraction_CSP3": Descriptors.FractionCSP3(mol),
71
+ }
72
+ records.append(record)
73
+
74
+ return pd.DataFrame(records)
75
+ ```
76
+
77
+ ### 3. Morgan フィンガープリント & Tanimoto 類似度
78
+
79
+ ```python
80
+ from rdkit import DataStructs
81
+
82
+ def compute_fingerprints(smiles_list, radius=2, nBits=2048):
83
+ """Morgan フィンガープリントを生成する。"""
84
+ fps = []
85
+ for smi in smiles_list:
86
+ mol = Chem.MolFromSmiles(smi)
87
+ if mol:
88
+ fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits)
89
+ fps.append(fp)
90
+ return fps
91
+
92
+ def tanimoto_similarity_matrix(fps, names=None):
93
+ """Tanimoto 類似度行列を算出する。"""
94
+ n = len(fps)
95
+ sim_matrix = np.zeros((n, n))
96
+ for i in range(n):
97
+ for j in range(n):
98
+ sim_matrix[i, j] = DataStructs.TanimotoSimilarity(fps[i], fps[j])
99
+
100
+ if names is None:
101
+ names = [f"Mol_{i}" for i in range(n)]
102
+
103
+ sim_df = pd.DataFrame(sim_matrix, index=names, columns=names)
104
+ sim_df.to_csv("results/tanimoto_similarity.csv")
105
+ return sim_df
106
+ ```
107
+
108
+ ### 4. Lipinski Rule of 5 評価
109
+
110
+ ```python
111
+ def lipinski_evaluation(desc_df):
112
+ """Lipinski Rule of 5 の準拠チェック。"""
113
+ desc_df = desc_df.copy()
114
+ desc_df["Lipinski_MW"] = desc_df["MW"] <= 500
115
+ desc_df["Lipinski_LogP"] = desc_df["LogP"] <= 5
116
+ desc_df["Lipinski_HBA"] = desc_df["HBA"] <= 10
117
+ desc_df["Lipinski_HBD"] = desc_df["HBD"] <= 5
118
+ desc_df["Lipinski_Violations"] = 4 - (
119
+ desc_df["Lipinski_MW"].astype(int) +
120
+ desc_df["Lipinski_LogP"].astype(int) +
121
+ desc_df["Lipinski_HBA"].astype(int) +
122
+ desc_df["Lipinski_HBD"].astype(int)
123
+ )
124
+ desc_df["Lipinski_Pass"] = desc_df["Lipinski_Violations"] <= 1
125
+ return desc_df
126
+ ```
127
+
128
+ ### 5. 構造アラート(トキシコフォア)検出(Exp-05)
129
+
130
+ ```python
131
+ STRUCTURAL_ALERTS = {
132
+ "Nitro": "[N+](=O)[O-]",
133
+ "Epoxide": "C1OC1",
134
+ "Aldehyde": "[CH]=O",
135
+ "Michael_Acceptor": "C=CC(=O)",
136
+ "Acyl_Halide": "C(=O)[F,Cl,Br,I]",
137
+ "Aniline": "c1ccccc1N",
138
+ "Hydrazine": "NN",
139
+ "Sulfonate": "S(=O)(=O)[O-]",
140
+ }
141
+
142
+ def detect_structural_alerts(smiles_list, names=None, alerts=None):
143
+ """SMARTS パターンによる構造アラートの検出。"""
144
+ if alerts is None:
145
+ alerts = STRUCTURAL_ALERTS
146
+
147
+ results = []
148
+ for i, smi in enumerate(smiles_list):
149
+ mol = Chem.MolFromSmiles(smi)
150
+ if mol is None:
151
+ continue
152
+
153
+ name = names[i] if names else f"Mol_{i}"
154
+ for alert_name, smarts in alerts.items():
155
+ pattern = Chem.MolFromSmarts(smarts)
156
+ if mol.HasSubstructMatch(pattern):
157
+ results.append({"Name": name, "SMILES": smi,
158
+ "Alert": alert_name, "SMARTS": smarts})
159
+
160
+ return pd.DataFrame(results)
161
+ ```
162
+
163
+ ### 6. Murcko スキャフォールド解析
164
+
165
+ ```python
166
+ def scaffold_analysis(smiles_list, names=None):
167
+ """Murcko スキャフォールドの抽出と分類。"""
168
+ scaffolds = []
169
+ for i, smi in enumerate(smiles_list):
170
+ mol = Chem.MolFromSmiles(smi)
171
+ if mol:
172
+ core = MurckoScaffold.GetScaffoldForMol(mol)
173
+ scaffolds.append({
174
+ "Name": names[i] if names else f"Mol_{i}",
175
+ "SMILES": smi,
176
+ "Scaffold": Chem.MolToSmiles(core),
177
+ })
178
+ return pd.DataFrame(scaffolds)
179
+ ```
180
+
181
+ ## References
182
+
183
+ ### Output Files
184
+
185
+ | ファイル | 形式 |
186
+ |---|---|
187
+ | `results/molecular_properties.csv` | CSV |
188
+ | `results/tanimoto_similarity.csv` | CSV |
189
+ | `results/structural_alerts.csv` | CSV |
190
+ | `figures/chemical_space_pca.png` | PNG |
191
+ | `figures/similarity_heatmap.png` | PNG |
192
+
193
+ #### 参照実験
194
+
195
+ - **Exp-02**: EGFR 阻害剤 SAR 解析(記述子、Tanimoto、MCS、Scaffold)
196
+ - **Exp-05**: 毒性予測(構造アラート、Morgan FP 分類モデル)