@nahisaho/satori 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,280 @@
1
+ ---
2
+ name: scientific-reinforcement-learning
3
+ description: |
4
+ 強化学習スキル。Stable-Baselines3 による RL エージェント訓練、
5
+ Gymnasium 環境構築、PufferLib 大規模マルチエージェント、
6
+ 科学応用 (分子生成・実験最適化・ロボット制御) パイプライン。
7
+ ---
8
+
9
+ # Scientific Reinforcement Learning
10
+
11
+ Stable-Baselines3 / PufferLib / Gymnasium を活用した
12
+ 強化学習パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - RL エージェントを訓練・評価するとき
17
+ - カスタム Gymnasium 環境を構築するとき
18
+ - 分子設計・創薬に RL を適用するとき
19
+ - 実験パラメータの逐次最適化に RL を使うとき
20
+ - マルチエージェント強化学習を実行するとき
21
+ - ロボティクス・ラボオートメーションの制御方策を学習するとき
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ## 1. Stable-Baselines3 基本訓練
28
+
29
+ ```python
30
+ import numpy as np
31
+ import gymnasium as gym
32
+ from stable_baselines3 import PPO, SAC, A2C, DQN
33
+ from stable_baselines3.common.evaluation import evaluate_policy
34
+ from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
35
+ from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback
36
+
37
+
38
+ def train_rl_agent(env_id, algorithm="PPO", total_timesteps=100_000,
39
+ n_envs=4, hyperparams=None):
40
+ """
41
+ Stable-Baselines3 RL エージェント訓練。
42
+
43
+ Parameters:
44
+ env_id: str — Gymnasium 環境 ID (e.g., "CartPole-v1", "LunarLander-v3")
45
+ algorithm: str — "PPO", "SAC", "A2C", "DQN"
46
+ total_timesteps: int — 総訓練ステップ数
47
+ n_envs: int — 並列環境数
48
+ hyperparams: dict — ハイパーパラメータ override
49
+
50
+ K-Dense: stable-baselines3 — RL training framework
51
+ """
52
+ algo_map = {"PPO": PPO, "SAC": SAC, "A2C": A2C, "DQN": DQN}
53
+ AlgoClass = algo_map.get(algorithm, PPO)
54
+
55
+ # Vectorized environments
56
+ env = DummyVecEnv([lambda: gym.make(env_id) for _ in range(n_envs)])
57
+
58
+ # Default hyperparams per algorithm
59
+ default_params = {
60
+ "PPO": {"learning_rate": 3e-4, "n_steps": 2048, "batch_size": 64},
61
+ "SAC": {"learning_rate": 3e-4, "buffer_size": 1_000_000},
62
+ "A2C": {"learning_rate": 7e-4, "n_steps": 5},
63
+ "DQN": {"learning_rate": 1e-4, "buffer_size": 100_000},
64
+ }
65
+ params = default_params.get(algorithm, {})
66
+ if hyperparams:
67
+ params.update(hyperparams)
68
+
69
+ model = AlgoClass("MlpPolicy", env, verbose=1, **params)
70
+
71
+ # Callbacks
72
+ eval_env = gym.make(env_id)
73
+ eval_callback = EvalCallback(
74
+ eval_env, best_model_save_path="./models/best/",
75
+ log_path="./logs/", eval_freq=10_000,
76
+ )
77
+ checkpoint_callback = CheckpointCallback(
78
+ save_freq=25_000, save_path="./models/checkpoints/",
79
+ )
80
+
81
+ model.learn(
82
+ total_timesteps=total_timesteps,
83
+ callback=[eval_callback, checkpoint_callback],
84
+ )
85
+
86
+ # Evaluation
87
+ mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=20)
88
+ print(f"RL Training ({algorithm} on {env_id}): "
89
+ f"reward = {mean_reward:.2f} ± {std_reward:.2f}")
90
+
91
+ return model, {"mean_reward": mean_reward, "std_reward": std_reward}
92
+ ```
93
+
94
+ ## 2. カスタム Gymnasium 環境
95
+
96
+ ```python
97
+ class MoleculeDesignEnv(gym.Env):
98
+ """
99
+ 分子設計用カスタム RL 環境。
100
+
101
+ 状態: 分子フィンガープリント (Morgan FP)
102
+ 行動: 原子/結合の追加・削除・変更
103
+ 報酬: 薬物らしさスコア (QED) + 結合親和性予測
104
+ """
105
+ metadata = {"render_modes": ["human"]}
106
+
107
+ def __init__(self, max_atoms=50, target_property="qed"):
108
+ super().__init__()
109
+ self.max_atoms = max_atoms
110
+ self.target_property = target_property
111
+
112
+ # Action space: discrete (add atom types, add bonds, remove)
113
+ self.action_space = gym.spaces.Discrete(10)
114
+
115
+ # Observation space: molecular fingerprint
116
+ self.observation_space = gym.spaces.Box(
117
+ low=0, high=1, shape=(2048,), dtype=np.float32,
118
+ )
119
+
120
+ self.current_mol = None
121
+ self.step_count = 0
122
+
123
+ def reset(self, seed=None, options=None):
124
+ super().reset(seed=seed)
125
+ self.current_mol = None # Start from scratch
126
+ self.step_count = 0
127
+ obs = np.zeros(2048, dtype=np.float32)
128
+ return obs, {}
129
+
130
+ def step(self, action):
131
+ self.step_count += 1
132
+
133
+ # Apply action to modify molecule
134
+ reward = self._calculate_reward()
135
+ terminated = self.step_count >= self.max_atoms
136
+ truncated = False
137
+ obs = self._get_observation()
138
+
139
+ return obs, reward, terminated, truncated, {}
140
+
141
+ def _calculate_reward(self):
142
+ """Calculate reward based on molecular properties."""
143
+ if self.current_mol is None:
144
+ return 0.0
145
+ # Placeholder: QED score
146
+ return np.random.uniform(0, 1)
147
+
148
+ def _get_observation(self):
149
+ return np.zeros(2048, dtype=np.float32)
150
+
151
+
152
+ def train_molecule_designer(total_timesteps=50_000):
153
+ """分子設計 RL エージェント訓練。"""
154
+ env = MoleculeDesignEnv()
155
+ model = PPO("MlpPolicy", env, verbose=1, learning_rate=1e-4)
156
+ model.learn(total_timesteps=total_timesteps)
157
+
158
+ mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
159
+ print(f"Molecule Designer: reward = {mean_reward:.2f} ± {std_reward:.2f}")
160
+ return model
161
+ ```
162
+
163
+ ## 3. PufferLib 大規模マルチエージェント
164
+
165
+ ```python
166
+ def setup_pufferlib_training(env_name, num_agents=8, algorithm="PPO"):
167
+ """
168
+ PufferLib マルチエージェント RL 設定。
169
+
170
+ Parameters:
171
+ env_name: str — PufferLib 対応環境
172
+ num_agents: int — エージェント数
173
+ algorithm: str — "PPO", "IMPALA"
174
+
175
+ K-Dense: pufferlib — Scalable multi-agent RL
176
+ """
177
+ try:
178
+ import pufferlib
179
+ import pufferlib.environments
180
+
181
+ config = {
182
+ "env": env_name,
183
+ "num_agents": num_agents,
184
+ "algorithm": algorithm,
185
+ "total_timesteps": 1_000_000,
186
+ "batch_size": 256,
187
+ "learning_rate": 2.5e-4,
188
+ "num_envs": 16,
189
+ "num_steps": 128,
190
+ }
191
+ print(f"PufferLib config: {config}")
192
+ return config
193
+
194
+ except ImportError:
195
+ print("PufferLib not installed. Install with: pip install pufferlib")
196
+ return None
197
+ ```
198
+
199
+ ## 4. 実験パラメータ逐次最適化
200
+
201
+ ```python
202
+ def rl_experiment_optimizer(parameter_ranges, objective_fn,
203
+ total_episodes=100, algorithm="PPO"):
204
+ """
205
+ RL による実験パラメータ逐次最適化。
206
+
207
+ Parameters:
208
+ parameter_ranges: dict — {param_name: (min, max)}
209
+ objective_fn: callable — 目的関数 (params → score)
210
+ total_episodes: int — 最適化エピソード数
211
+ """
212
+ n_params = len(parameter_ranges)
213
+ param_names = list(parameter_ranges.keys())
214
+
215
+ class ExperimentEnv(gym.Env):
216
+ def __init__(self):
217
+ super().__init__()
218
+ self.action_space = gym.spaces.Box(
219
+ low=-1, high=1, shape=(n_params,), dtype=np.float32,
220
+ )
221
+ self.observation_space = gym.spaces.Box(
222
+ low=-np.inf, high=np.inf,
223
+ shape=(n_params + 1,), dtype=np.float32,
224
+ )
225
+ self.best_score = -np.inf
226
+ self.history = []
227
+
228
+ def reset(self, seed=None, options=None):
229
+ super().reset(seed=seed)
230
+ self.current_params = np.zeros(n_params, dtype=np.float32)
231
+ return np.zeros(n_params + 1, dtype=np.float32), {}
232
+
233
+ def step(self, action):
234
+ # Scale action to parameter ranges
235
+ params = {}
236
+ for i, name in enumerate(param_names):
237
+ lo, hi = parameter_ranges[name]
238
+ params[name] = lo + (action[i] + 1) / 2 * (hi - lo)
239
+
240
+ score = objective_fn(params)
241
+ self.history.append({"params": params, "score": score})
242
+
243
+ if score > self.best_score:
244
+ self.best_score = score
245
+
246
+ obs = np.append(action, [score]).astype(np.float32)
247
+ return obs, score, False, False, {}
248
+
249
+ env = ExperimentEnv()
250
+ model = SAC("MlpPolicy", env, verbose=0) if algorithm == "SAC" else PPO("MlpPolicy", env, verbose=0)
251
+ model.learn(total_timesteps=total_episodes)
252
+
253
+ best_idx = max(range(len(env.history)), key=lambda i: env.history[i]["score"])
254
+ best = env.history[best_idx]
255
+ print(f"RL Optimization: best score = {best['score']:.4f}")
256
+ print(f" Best params: {best['params']}")
257
+ return best, env.history
258
+ ```
259
+
260
+ ---
261
+
262
+ ## パイプライン出力
263
+
264
+ | 出力ファイル | 説明 | 連携先スキル |
265
+ |---|---|---|
266
+ | `models/rl_model.zip` | 訓練済み RL モデル | → deep-learning (モデル統合) |
267
+ | `results/rl_training_log.json` | 訓練曲線・メトリクス | → publication-figures |
268
+ | `results/rl_optimization.json` | 最適化パラメータ | → doe, process-optimization |
269
+ | `figures/rl_reward_curve.png` | 報酬曲線 | → presentation-design |
270
+
271
+ ## パイプライン統合
272
+
273
+ ```
274
+ doe ──→ reinforcement-learning ──→ lab-automation
275
+ (実験計画) (逐次最適化) (ロボット制御)
276
+
277
+ ├──→ drug-target-profiling (分子設計 RL)
278
+ ├──→ protein-design (構造最適化 RL)
279
+ └──→ deep-learning (DRL パイプライン)
280
+ ```
@@ -0,0 +1,277 @@
1
+ ---
2
+ name: scientific-symbolic-mathematics
3
+ description: |
4
+ 記号数学スキル。SymPy による解析的微積分・線形代数・微分方程式求解、
5
+ 記号式の LaTeX 変換、数値計算との統合、科学モデリング用
6
+ 記号計算パイプライン。
7
+ ---
8
+
9
+ # Scientific Symbolic Mathematics
10
+
11
+ SymPy を中心とした記号数学 (Computer Algebra System)
12
+ パイプラインを提供する。
13
+
14
+ ## When to Use
15
+
16
+ - 微分方程式を解析的に解くとき
17
+ - 数式の記号的微分・積分を行うとき
18
+ - 行列の固有値・固有ベクトルを記号的に求めるとき
19
+ - 科学モデルのパラメータに関する感度解析を記号的に行うとき
20
+ - 数式を LaTeX 形式に変換するとき
21
+ - 記号解と数値解を比較検証するとき
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ## 1. 微分方程式の解析解
28
+
29
+ ```python
30
+ import sympy as sp
31
+ from sympy import (
32
+ symbols, Function, Eq, dsolve, classify_ode,
33
+ exp, sin, cos, sqrt, pi, oo, integrate, diff,
34
+ Matrix, latex, simplify, factor, expand, solve,
35
+ Rational, Sum, Product, series,
36
+ )
37
+ import numpy as np
38
+
39
+
40
+ def solve_ode(ode_expr, dependent_var, independent_var, ics=None):
41
+ """
42
+ 常微分方程式の解析解。
43
+
44
+ Parameters:
45
+ ode_expr: sympy.Eq — ODE (e.g., Eq(f(x).diff(x, 2) + f(x), 0))
46
+ dependent_var: sympy.Function — 従属変数
47
+ independent_var: sympy.Symbol — 独立変数
48
+ ics: dict — 初期条件 {f(0): 1, f'(0): 0}
49
+
50
+ K-Dense: sympy — Symbolic mathematics
51
+ """
52
+ # Classify ODE
53
+ classification = classify_ode(ode_expr, dependent_var(independent_var))
54
+ print(f"ODE classification: {classification[:3]}")
55
+
56
+ # Solve
57
+ solution = dsolve(ode_expr, dependent_var(independent_var), ics=ics)
58
+
59
+ print(f"Solution: {solution}")
60
+ print(f"LaTeX: {latex(solution)}")
61
+ return solution
62
+
63
+
64
+ # Example: damped harmonic oscillator
65
+ x, t, omega, gamma = symbols("x t omega gamma", positive=True)
66
+ f = Function("f")
67
+
68
+ # f''(t) + 2γf'(t) + ω²f(t) = 0
69
+ damped_ode = Eq(f(t).diff(t, 2) + 2*gamma*f(t).diff(t) + omega**2*f(t), 0)
70
+ ```
71
+
72
+ ## 2. 記号的微積分
73
+
74
+ ```python
75
+ def symbolic_calculus(expr, var, operations=None):
76
+ """
77
+ 記号的微積分操作。
78
+
79
+ Parameters:
80
+ expr: sympy expression — 数式
81
+ var: sympy.Symbol — 変数
82
+ operations: list — ["diff", "integrate", "series", "limit"]
83
+ """
84
+ if operations is None:
85
+ operations = ["diff", "integrate"]
86
+
87
+ results = {}
88
+
89
+ if "diff" in operations:
90
+ deriv = diff(expr, var)
91
+ results["derivative"] = {"expr": deriv, "latex": latex(deriv)}
92
+ print(f"d/d{var}({expr}) = {deriv}")
93
+
94
+ if "integrate" in operations:
95
+ integral = integrate(expr, var)
96
+ results["integral"] = {"expr": integral, "latex": latex(integral)}
97
+ print(f"∫{expr} d{var} = {integral}")
98
+
99
+ if "series" in operations:
100
+ ser = series(expr, var, 0, n=6)
101
+ results["series"] = {"expr": ser, "latex": latex(ser)}
102
+ print(f"Taylor series: {ser}")
103
+
104
+ if "limit" in operations:
105
+ from sympy import limit as sp_limit
106
+ lim = sp_limit(expr, var, oo)
107
+ results["limit"] = {"expr": lim, "latex": latex(lim)}
108
+ print(f"lim({var}→∞) {expr} = {lim}")
109
+
110
+ return results
111
+ ```
112
+
113
+ ## 3. 線形代数 (記号的)
114
+
115
+ ```python
116
+ def symbolic_linear_algebra(matrix_data):
117
+ """
118
+ 記号的線形代数 — 固有値・固有ベクトル・行列分解。
119
+
120
+ Parameters:
121
+ matrix_data: list of lists — 行列要素 (記号含む)
122
+ """
123
+ M = Matrix(matrix_data)
124
+ print(f"Matrix ({M.rows}×{M.cols}):")
125
+ sp.pprint(M)
126
+
127
+ results = {}
128
+
129
+ # Determinant
130
+ det = M.det()
131
+ results["determinant"] = {"expr": det, "latex": latex(det)}
132
+ print(f"\nDeterminant: {det}")
133
+
134
+ # Eigenvalues & eigenvectors
135
+ eigenvals = M.eigenvals()
136
+ results["eigenvalues"] = {str(k): v for k, v in eigenvals.items()}
137
+ print(f"Eigenvalues: {eigenvals}")
138
+
139
+ eigenvects = M.eigenvects()
140
+ results["eigenvectors"] = [
141
+ {"eigenvalue": str(ev[0]), "multiplicity": ev[1],
142
+ "vectors": [str(v) for v in ev[2]]}
143
+ for ev in eigenvects
144
+ ]
145
+
146
+ # Characteristic polynomial
147
+ lam = symbols("lambda")
148
+ char_poly = M.charpoly(lam)
149
+ results["characteristic_polynomial"] = {
150
+ "expr": str(char_poly.as_expr()),
151
+ "latex": latex(char_poly.as_expr()),
152
+ }
153
+ print(f"Characteristic polynomial: {char_poly.as_expr()}")
154
+
155
+ # Inverse (if nonsingular)
156
+ if det != 0:
157
+ inv = M.inv()
158
+ results["inverse"] = {"latex": latex(inv)}
159
+ print(f"Inverse exists: {M.rows}×{M.cols}")
160
+
161
+ return results
162
+ ```
163
+
164
+ ## 4. 科学モデリング (薬物動態学 PK モデル例)
165
+
166
+ ```python
167
+ def pk_compartment_model(n_compartments=1):
168
+ """
169
+ 薬物動態学コンパートメントモデルの記号的解法。
170
+
171
+ Parameters:
172
+ n_compartments: int — 1 (1-compartment) or 2 (2-compartment)
173
+ """
174
+ t = symbols("t", positive=True)
175
+
176
+ if n_compartments == 1:
177
+ # 1-compartment: dC/dt = -ke * C
178
+ C = Function("C")
179
+ ke, C0 = symbols("k_e C_0", positive=True)
180
+ ode = Eq(C(t).diff(t), -ke * C(t))
181
+ solution = dsolve(ode, C(t), ics={C(0): C0})
182
+
183
+ # Half-life
184
+ t_half = sp.solve(Eq(solution.rhs, C0/2), t)[0]
185
+
186
+ # AUC (0→∞)
187
+ auc = integrate(solution.rhs, (t, 0, oo))
188
+
189
+ result = {
190
+ "model": "1-compartment IV bolus",
191
+ "ode": latex(ode),
192
+ "solution": latex(solution),
193
+ "half_life": latex(t_half),
194
+ "auc_inf": latex(auc),
195
+ }
196
+ print(f"PK 1-compartment: C(t) = {solution.rhs}")
197
+ print(f" t½ = {t_half}")
198
+ print(f" AUC(0→∞) = {auc}")
199
+
200
+ elif n_compartments == 2:
201
+ # 2-compartment model
202
+ C1, C2 = Function("C1"), Function("C2")
203
+ k10, k12, k21, D, V1 = symbols("k_10 k_12 k_21 D V_1", positive=True)
204
+
205
+ ode1 = Eq(C1(t).diff(t), -(k10 + k12)*C1(t) + k21*C2(t))
206
+ ode2 = Eq(C2(t).diff(t), k12*C1(t) - k21*C2(t))
207
+
208
+ system = [ode1, ode2]
209
+ solution = sp.dsolve(system, [C1(t), C2(t)])
210
+
211
+ result = {
212
+ "model": "2-compartment IV bolus",
213
+ "system": [latex(eq) for eq in system],
214
+ "solution": [latex(sol) for sol in solution],
215
+ }
216
+ print(f"PK 2-compartment system defined")
217
+ for sol in solution:
218
+ print(f" {sol}")
219
+
220
+ return result
221
+ ```
222
+
223
+ ## 5. LaTeX 数式エクスポート
224
+
225
+ ```python
226
+ def export_equations_latex(equations, output_file="equations.tex"):
227
+ """
228
+ 記号数式を LaTeX ファイルにエクスポート。
229
+
230
+ Parameters:
231
+ equations: dict — {name: sympy_expr}
232
+ output_file: str — 出力 LaTeX パス
233
+ """
234
+ lines = [
235
+ r"\documentclass{article}",
236
+ r"\usepackage{amsmath,amssymb}",
237
+ r"\begin{document}",
238
+ "",
239
+ ]
240
+
241
+ for name, expr in equations.items():
242
+ lines.append(f"% {name}")
243
+ lines.append(r"\begin{equation}")
244
+ lines.append(f" {latex(expr)}")
245
+ lines.append(r"\end{equation}")
246
+ lines.append("")
247
+
248
+ lines.append(r"\end{document}")
249
+
250
+ with open(output_file, "w") as f:
251
+ f.write("\n".join(lines))
252
+
253
+ print(f"LaTeX exported: {output_file} ({len(equations)} equations)")
254
+ return output_file
255
+ ```
256
+
257
+ ---
258
+
259
+ ## パイプライン出力
260
+
261
+ | 出力ファイル | 説明 | 連携先スキル |
262
+ |---|---|---|
263
+ | `results/symbolic_solutions.json` | 記号解 (LaTeX 形式) | → latex-formatter, academic-writing |
264
+ | `results/ode_solutions.json` | ODE 解析解 | → systems-biology, admet-pharmacokinetics |
265
+ | `equations.tex` | LaTeX 数式集 | → latex-formatter |
266
+ | `figures/symbolic_plot.png` | 記号解の可視化 | → publication-figures |
267
+
268
+ ## パイプライン統合
269
+
270
+ ```
271
+ systems-biology ──→ symbolic-mathematics ──→ latex-formatter
272
+ (SBML/ODE) (SymPy 解析解) (LaTeX 変換)
273
+
274
+ ├──→ admet-pharmacokinetics (PK モデル)
275
+ ├──→ bayesian-statistics (尤度導出)
276
+ └──→ computational-materials (バンド理論)
277
+ ```