@nahisaho/satori 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -16
- package/package.json +1 -1
- package/src/.github/skills/scientific-anomaly-detection/SKILL.md +296 -0
- package/src/.github/skills/scientific-causal-ml/SKILL.md +240 -0
- package/src/.github/skills/scientific-data-profiling/SKILL.md +247 -0
- package/src/.github/skills/scientific-geospatial-analysis/SKILL.md +274 -0
- package/src/.github/skills/scientific-model-monitoring/SKILL.md +247 -0
- package/src/.github/skills/scientific-network-visualization/SKILL.md +278 -0
- package/src/.github/skills/scientific-reproducible-reporting/SKILL.md +330 -0
- package/src/.github/skills/scientific-time-series-forecasting/SKILL.md +246 -0
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scientific-time-series-forecasting
|
|
3
|
+
description: |
|
|
4
|
+
ML 時系列予測スキル。Prophet/NeuralProphet・N-BEATS・
|
|
5
|
+
Temporal Fusion Transformer (TFT)・時系列特徴量エンジニアリング・
|
|
6
|
+
バックテスト・多段階予測・アンサンブル予測。
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Scientific Time Series Forecasting
|
|
10
|
+
|
|
11
|
+
深層学習・ML ベースの時系列予測パイプラインを提供し、
|
|
12
|
+
Prophet から Transformer まで最新手法を網羅する。
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- Prophet/NeuralProphet で季節性時系列を予測するとき
|
|
17
|
+
- 深層学習 (N-BEATS/TFT) で高精度予測するとき
|
|
18
|
+
- 時系列特徴量エンジニアリングでラグ・ローリング特徴を生成するとき
|
|
19
|
+
- バックテストで予測性能を厳密に評価するとき
|
|
20
|
+
- 複数モデルのアンサンブル予測をするとき
|
|
21
|
+
- 多変量・多段階予測をするとき
|
|
22
|
+
|
|
23
|
+
> **Note**: 古典時系列 (ARIMA/STL/FFT) は `scientific-time-series` を参照。
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
## 1. Prophet / NeuralProphet
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import numpy as np
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def prophet_forecast(df, date_col, value_col, periods=30,
|
|
37
|
+
freq="D", yearly=True, weekly=True,
|
|
38
|
+
changepoint_prior=0.05):
|
|
39
|
+
"""
|
|
40
|
+
Prophet 時系列予測。
|
|
41
|
+
|
|
42
|
+
Parameters:
|
|
43
|
+
df: pd.DataFrame — 時系列データ
|
|
44
|
+
date_col: str — 日付カラム
|
|
45
|
+
value_col: str — 値カラム
|
|
46
|
+
periods: int — 予測期間
|
|
47
|
+
freq: str — 頻度 ("D" / "H" / "M")
|
|
48
|
+
yearly: bool — 年次季節性
|
|
49
|
+
weekly: bool — 週次季節性
|
|
50
|
+
changepoint_prior: float — 変化点感度
|
|
51
|
+
"""
|
|
52
|
+
from prophet import Prophet
|
|
53
|
+
|
|
54
|
+
prophet_df = df[[date_col, value_col]].rename(
|
|
55
|
+
columns={date_col: "ds", value_col: "y"})
|
|
56
|
+
|
|
57
|
+
model = Prophet(
|
|
58
|
+
yearly_seasonality=yearly,
|
|
59
|
+
weekly_seasonality=weekly,
|
|
60
|
+
changepoint_prior_scale=changepoint_prior)
|
|
61
|
+
model.fit(prophet_df)
|
|
62
|
+
|
|
63
|
+
future = model.make_future_dataframe(periods=periods, freq=freq)
|
|
64
|
+
forecast = model.predict(future)
|
|
65
|
+
|
|
66
|
+
# 評価
|
|
67
|
+
merged = forecast.merge(prophet_df, on="ds", how="left")
|
|
68
|
+
valid = merged.dropna(subset=["y"])
|
|
69
|
+
mae = np.mean(np.abs(valid["y"] - valid["yhat"]))
|
|
70
|
+
mape = np.mean(np.abs((valid["y"] - valid["yhat"]) / valid["y"])) * 100
|
|
71
|
+
|
|
72
|
+
fig1 = model.plot(forecast)
|
|
73
|
+
fig1.savefig("prophet_forecast.png", dpi=150, bbox_inches="tight")
|
|
74
|
+
|
|
75
|
+
fig2 = model.plot_components(forecast)
|
|
76
|
+
fig2.savefig("prophet_components.png", dpi=150, bbox_inches="tight")
|
|
77
|
+
|
|
78
|
+
print(f"Prophet: {periods} periods, MAE={mae:.4f}, MAPE={mape:.1f}%")
|
|
79
|
+
return {"forecast": forecast, "model": model,
|
|
80
|
+
"mae": mae, "mape": mape}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def neuralprophet_forecast(df, date_col, value_col, periods=30,
|
|
84
|
+
n_lags=60, n_forecasts=30):
|
|
85
|
+
"""
|
|
86
|
+
NeuralProphet 時系列予測 (AR-Net)。
|
|
87
|
+
|
|
88
|
+
Parameters:
|
|
89
|
+
df: pd.DataFrame — 時系列データ
|
|
90
|
+
date_col: str — 日付カラム
|
|
91
|
+
value_col: str — 値カラム
|
|
92
|
+
periods: int — 予測期間
|
|
93
|
+
n_lags: int — 自己回帰ラグ数
|
|
94
|
+
n_forecasts: int — 多段階予測ステップ
|
|
95
|
+
"""
|
|
96
|
+
from neuralprophet import NeuralProphet
|
|
97
|
+
|
|
98
|
+
np_df = df[[date_col, value_col]].rename(
|
|
99
|
+
columns={date_col: "ds", value_col: "y"})
|
|
100
|
+
|
|
101
|
+
model = NeuralProphet(
|
|
102
|
+
n_lags=n_lags, n_forecasts=n_forecasts,
|
|
103
|
+
yearly_seasonality=True, weekly_seasonality=True,
|
|
104
|
+
learning_rate=0.01, epochs=100)
|
|
105
|
+
|
|
106
|
+
metrics = model.fit(np_df, freq="D")
|
|
107
|
+
|
|
108
|
+
future = model.make_future_dataframe(np_df, periods=periods, n_historic_predictions=True)
|
|
109
|
+
forecast = model.predict(future)
|
|
110
|
+
|
|
111
|
+
fig = model.plot(forecast)
|
|
112
|
+
fig.savefig("neuralprophet_forecast.png", dpi=150, bbox_inches="tight")
|
|
113
|
+
|
|
114
|
+
print(f"NeuralProphet: lags={n_lags}, forecasts={n_forecasts}")
|
|
115
|
+
return {"forecast": forecast, "model": model, "metrics": metrics}
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## 2. 時系列特徴量エンジニアリング
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
def create_ts_features(df, date_col, value_col,
|
|
122
|
+
lags=None, rolling_windows=None):
|
|
123
|
+
"""
|
|
124
|
+
時系列特徴量エンジニアリング。
|
|
125
|
+
|
|
126
|
+
Parameters:
|
|
127
|
+
df: pd.DataFrame — 時系列データ
|
|
128
|
+
date_col: str — 日付カラム
|
|
129
|
+
value_col: str — 値カラム
|
|
130
|
+
lags: list[int] | None — ラグ特徴量 (e.g., [1,7,14,28])
|
|
131
|
+
rolling_windows: list[int] | None — ローリング窓 (e.g., [7,14,30])
|
|
132
|
+
"""
|
|
133
|
+
if lags is None:
|
|
134
|
+
lags = [1, 3, 7, 14, 28]
|
|
135
|
+
if rolling_windows is None:
|
|
136
|
+
rolling_windows = [7, 14, 30]
|
|
137
|
+
|
|
138
|
+
result = df.copy()
|
|
139
|
+
result[date_col] = pd.to_datetime(result[date_col])
|
|
140
|
+
result = result.sort_values(date_col)
|
|
141
|
+
|
|
142
|
+
# カレンダー特徴量
|
|
143
|
+
result["dayofweek"] = result[date_col].dt.dayofweek
|
|
144
|
+
result["dayofyear"] = result[date_col].dt.dayofyear
|
|
145
|
+
result["month"] = result[date_col].dt.month
|
|
146
|
+
result["quarter"] = result[date_col].dt.quarter
|
|
147
|
+
result["is_weekend"] = (result[date_col].dt.dayofweek >= 5).astype(int)
|
|
148
|
+
|
|
149
|
+
# 周期エンコーディング
|
|
150
|
+
result["sin_day"] = np.sin(2 * np.pi * result["dayofyear"] / 365.25)
|
|
151
|
+
result["cos_day"] = np.cos(2 * np.pi * result["dayofyear"] / 365.25)
|
|
152
|
+
result["sin_week"] = np.sin(2 * np.pi * result["dayofweek"] / 7)
|
|
153
|
+
result["cos_week"] = np.cos(2 * np.pi * result["dayofweek"] / 7)
|
|
154
|
+
|
|
155
|
+
# ラグ特徴量
|
|
156
|
+
for lag in lags:
|
|
157
|
+
result[f"lag_{lag}"] = result[value_col].shift(lag)
|
|
158
|
+
|
|
159
|
+
# ローリング統計量
|
|
160
|
+
for window in rolling_windows:
|
|
161
|
+
result[f"rolling_mean_{window}"] = result[value_col].rolling(window).mean()
|
|
162
|
+
result[f"rolling_std_{window}"] = result[value_col].rolling(window).std()
|
|
163
|
+
result[f"rolling_min_{window}"] = result[value_col].rolling(window).min()
|
|
164
|
+
result[f"rolling_max_{window}"] = result[value_col].rolling(window).max()
|
|
165
|
+
|
|
166
|
+
# 差分特徴量
|
|
167
|
+
result["diff_1"] = result[value_col].diff(1)
|
|
168
|
+
result["diff_7"] = result[value_col].diff(7)
|
|
169
|
+
|
|
170
|
+
n_features = len(result.columns) - len(df.columns)
|
|
171
|
+
print(f"TS Features: {n_features} features created "
|
|
172
|
+
f"(lags={lags}, windows={rolling_windows})")
|
|
173
|
+
return result
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def ts_backtest(df, date_col, value_col, model_fn,
|
|
177
|
+
n_splits=5, horizon=30, gap=0):
|
|
178
|
+
"""
|
|
179
|
+
時系列バックテスト (Walk-forward validation)。
|
|
180
|
+
|
|
181
|
+
Parameters:
|
|
182
|
+
df: pd.DataFrame — 時系列データ
|
|
183
|
+
date_col: str — 日付カラム
|
|
184
|
+
value_col: str — 値カラム
|
|
185
|
+
model_fn: callable — モデル学習・予測関数 (train_df → forecast_df)
|
|
186
|
+
n_splits: int — 分割数
|
|
187
|
+
horizon: int — 予測ホライズン
|
|
188
|
+
gap: int — 学習-テスト間ギャップ
|
|
189
|
+
"""
|
|
190
|
+
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
|
191
|
+
|
|
192
|
+
sorted_df = df.sort_values(date_col).reset_index(drop=True)
|
|
193
|
+
n = len(sorted_df)
|
|
194
|
+
fold_size = (n - horizon) // n_splits
|
|
195
|
+
|
|
196
|
+
results = []
|
|
197
|
+
|
|
198
|
+
for i in range(n_splits):
|
|
199
|
+
train_end = fold_size * (i + 1)
|
|
200
|
+
test_start = train_end + gap
|
|
201
|
+
test_end = min(test_start + horizon, n)
|
|
202
|
+
|
|
203
|
+
if test_end > n:
|
|
204
|
+
break
|
|
205
|
+
|
|
206
|
+
train_df = sorted_df.iloc[:train_end]
|
|
207
|
+
test_df = sorted_df.iloc[test_start:test_end]
|
|
208
|
+
|
|
209
|
+
forecast = model_fn(train_df)
|
|
210
|
+
y_true = test_df[value_col].values[:len(forecast)]
|
|
211
|
+
y_pred = forecast[:len(y_true)]
|
|
212
|
+
|
|
213
|
+
mae = mean_absolute_error(y_true, y_pred)
|
|
214
|
+
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
|
|
215
|
+
mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-10))) * 100
|
|
216
|
+
|
|
217
|
+
results.append({
|
|
218
|
+
"fold": i, "train_size": train_end,
|
|
219
|
+
"test_size": test_end - test_start,
|
|
220
|
+
"mae": mae, "rmse": rmse, "mape": mape})
|
|
221
|
+
|
|
222
|
+
results_df = pd.DataFrame(results)
|
|
223
|
+
print(f"Backtest ({n_splits} folds, h={horizon}): "
|
|
224
|
+
f"MAE={results_df['mae'].mean():.4f} ± {results_df['mae'].std():.4f}")
|
|
225
|
+
return results_df
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## パイプライン統合
|
|
231
|
+
|
|
232
|
+
```
|
|
233
|
+
time-series → time-series-forecasting → model-monitoring
|
|
234
|
+
(古典解析) (ML 予測) (監視)
|
|
235
|
+
│ │ ↓
|
|
236
|
+
spectral-signal ────┘ anomaly-detection
|
|
237
|
+
(周波数解析) (異常検知)
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## パイプライン出力
|
|
241
|
+
|
|
242
|
+
| ファイル | 説明 | 次スキル |
|
|
243
|
+
|---------|------|---------|
|
|
244
|
+
| `prophet_forecast.png` | Prophet 予測結果 | → presentation |
|
|
245
|
+
| `ts_features.csv` | 時系列特徴量 | → ml-regression |
|
|
246
|
+
| `backtest_results.csv` | バックテスト結果 | → model selection |
|