@nahisaho/satori 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,246 @@
1
+ ---
2
+ name: scientific-time-series-forecasting
3
+ description: |
4
+ ML 時系列予測スキル。Prophet/NeuralProphet・N-BEATS・
5
+ Temporal Fusion Transformer (TFT)・時系列特徴量エンジニアリング・
6
+ バックテスト・多段階予測・アンサンブル予測。
7
+ ---
8
+
9
+ # Scientific Time Series Forecasting
10
+
11
+ 深層学習・ML ベースの時系列予測パイプラインを提供し、
12
+ Prophet から Transformer まで最新手法を網羅する。
13
+
14
+ ## When to Use
15
+
16
+ - Prophet/NeuralProphet で季節性時系列を予測するとき
17
+ - 深層学習 (N-BEATS/TFT) で高精度予測するとき
18
+ - 時系列特徴量エンジニアリングでラグ・ローリング特徴を生成するとき
19
+ - バックテストで予測性能を厳密に評価するとき
20
+ - 複数モデルのアンサンブル予測をするとき
21
+ - 多変量・多段階予測をするとき
22
+
23
+ > **Note**: 古典時系列 (ARIMA/STL/FFT) は `scientific-time-series` を参照。
24
+
25
+ ---
26
+
27
+ ## Quick Start
28
+
29
+ ## 1. Prophet / NeuralProphet
30
+
31
+ ```python
32
+ import numpy as np
33
+ import pandas as pd
34
+
35
+
36
+ def prophet_forecast(df, date_col, value_col, periods=30,
37
+ freq="D", yearly=True, weekly=True,
38
+ changepoint_prior=0.05):
39
+ """
40
+ Prophet 時系列予測。
41
+
42
+ Parameters:
43
+ df: pd.DataFrame — 時系列データ
44
+ date_col: str — 日付カラム
45
+ value_col: str — 値カラム
46
+ periods: int — 予測期間
47
+ freq: str — 頻度 ("D" / "H" / "M")
48
+ yearly: bool — 年次季節性
49
+ weekly: bool — 週次季節性
50
+ changepoint_prior: float — 変化点感度
51
+ """
52
+ from prophet import Prophet
53
+
54
+ prophet_df = df[[date_col, value_col]].rename(
55
+ columns={date_col: "ds", value_col: "y"})
56
+
57
+ model = Prophet(
58
+ yearly_seasonality=yearly,
59
+ weekly_seasonality=weekly,
60
+ changepoint_prior_scale=changepoint_prior)
61
+ model.fit(prophet_df)
62
+
63
+ future = model.make_future_dataframe(periods=periods, freq=freq)
64
+ forecast = model.predict(future)
65
+
66
+ # 評価
67
+ merged = forecast.merge(prophet_df, on="ds", how="left")
68
+ valid = merged.dropna(subset=["y"])
69
+ mae = np.mean(np.abs(valid["y"] - valid["yhat"]))
70
+ mape = np.mean(np.abs((valid["y"] - valid["yhat"]) / valid["y"])) * 100
71
+
72
+ fig1 = model.plot(forecast)
73
+ fig1.savefig("prophet_forecast.png", dpi=150, bbox_inches="tight")
74
+
75
+ fig2 = model.plot_components(forecast)
76
+ fig2.savefig("prophet_components.png", dpi=150, bbox_inches="tight")
77
+
78
+ print(f"Prophet: {periods} periods, MAE={mae:.4f}, MAPE={mape:.1f}%")
79
+ return {"forecast": forecast, "model": model,
80
+ "mae": mae, "mape": mape}
81
+
82
+
83
+ def neuralprophet_forecast(df, date_col, value_col, periods=30,
84
+ n_lags=60, n_forecasts=30):
85
+ """
86
+ NeuralProphet 時系列予測 (AR-Net)。
87
+
88
+ Parameters:
89
+ df: pd.DataFrame — 時系列データ
90
+ date_col: str — 日付カラム
91
+ value_col: str — 値カラム
92
+ periods: int — 予測期間
93
+ n_lags: int — 自己回帰ラグ数
94
+ n_forecasts: int — 多段階予測ステップ
95
+ """
96
+ from neuralprophet import NeuralProphet
97
+
98
+ np_df = df[[date_col, value_col]].rename(
99
+ columns={date_col: "ds", value_col: "y"})
100
+
101
+ model = NeuralProphet(
102
+ n_lags=n_lags, n_forecasts=n_forecasts,
103
+ yearly_seasonality=True, weekly_seasonality=True,
104
+ learning_rate=0.01, epochs=100)
105
+
106
+ metrics = model.fit(np_df, freq="D")
107
+
108
+ future = model.make_future_dataframe(np_df, periods=periods, n_historic_predictions=True)
109
+ forecast = model.predict(future)
110
+
111
+ fig = model.plot(forecast)
112
+ fig.savefig("neuralprophet_forecast.png", dpi=150, bbox_inches="tight")
113
+
114
+ print(f"NeuralProphet: lags={n_lags}, forecasts={n_forecasts}")
115
+ return {"forecast": forecast, "model": model, "metrics": metrics}
116
+ ```
117
+
118
+ ## 2. 時系列特徴量エンジニアリング
119
+
120
+ ```python
121
+ def create_ts_features(df, date_col, value_col,
122
+ lags=None, rolling_windows=None):
123
+ """
124
+ 時系列特徴量エンジニアリング。
125
+
126
+ Parameters:
127
+ df: pd.DataFrame — 時系列データ
128
+ date_col: str — 日付カラム
129
+ value_col: str — 値カラム
130
+ lags: list[int] | None — ラグ特徴量 (e.g., [1,7,14,28])
131
+ rolling_windows: list[int] | None — ローリング窓 (e.g., [7,14,30])
132
+ """
133
+ if lags is None:
134
+ lags = [1, 3, 7, 14, 28]
135
+ if rolling_windows is None:
136
+ rolling_windows = [7, 14, 30]
137
+
138
+ result = df.copy()
139
+ result[date_col] = pd.to_datetime(result[date_col])
140
+ result = result.sort_values(date_col)
141
+
142
+ # カレンダー特徴量
143
+ result["dayofweek"] = result[date_col].dt.dayofweek
144
+ result["dayofyear"] = result[date_col].dt.dayofyear
145
+ result["month"] = result[date_col].dt.month
146
+ result["quarter"] = result[date_col].dt.quarter
147
+ result["is_weekend"] = (result[date_col].dt.dayofweek >= 5).astype(int)
148
+
149
+ # 周期エンコーディング
150
+ result["sin_day"] = np.sin(2 * np.pi * result["dayofyear"] / 365.25)
151
+ result["cos_day"] = np.cos(2 * np.pi * result["dayofyear"] / 365.25)
152
+ result["sin_week"] = np.sin(2 * np.pi * result["dayofweek"] / 7)
153
+ result["cos_week"] = np.cos(2 * np.pi * result["dayofweek"] / 7)
154
+
155
+ # ラグ特徴量
156
+ for lag in lags:
157
+ result[f"lag_{lag}"] = result[value_col].shift(lag)
158
+
159
+ # ローリング統計量
160
+ for window in rolling_windows:
161
+ result[f"rolling_mean_{window}"] = result[value_col].rolling(window).mean()
162
+ result[f"rolling_std_{window}"] = result[value_col].rolling(window).std()
163
+ result[f"rolling_min_{window}"] = result[value_col].rolling(window).min()
164
+ result[f"rolling_max_{window}"] = result[value_col].rolling(window).max()
165
+
166
+ # 差分特徴量
167
+ result["diff_1"] = result[value_col].diff(1)
168
+ result["diff_7"] = result[value_col].diff(7)
169
+
170
+ n_features = len(result.columns) - len(df.columns)
171
+ print(f"TS Features: {n_features} features created "
172
+ f"(lags={lags}, windows={rolling_windows})")
173
+ return result
174
+
175
+
176
+ def ts_backtest(df, date_col, value_col, model_fn,
177
+ n_splits=5, horizon=30, gap=0):
178
+ """
179
+ 時系列バックテスト (Walk-forward validation)。
180
+
181
+ Parameters:
182
+ df: pd.DataFrame — 時系列データ
183
+ date_col: str — 日付カラム
184
+ value_col: str — 値カラム
185
+ model_fn: callable — モデル学習・予測関数 (train_df → forecast_df)
186
+ n_splits: int — 分割数
187
+ horizon: int — 予測ホライズン
188
+ gap: int — 学習-テスト間ギャップ
189
+ """
190
+ from sklearn.metrics import mean_absolute_error, mean_squared_error
191
+
192
+ sorted_df = df.sort_values(date_col).reset_index(drop=True)
193
+ n = len(sorted_df)
194
+ fold_size = (n - horizon) // n_splits
195
+
196
+ results = []
197
+
198
+ for i in range(n_splits):
199
+ train_end = fold_size * (i + 1)
200
+ test_start = train_end + gap
201
+ test_end = min(test_start + horizon, n)
202
+
203
+ if test_end > n:
204
+ break
205
+
206
+ train_df = sorted_df.iloc[:train_end]
207
+ test_df = sorted_df.iloc[test_start:test_end]
208
+
209
+ forecast = model_fn(train_df)
210
+ y_true = test_df[value_col].values[:len(forecast)]
211
+ y_pred = forecast[:len(y_true)]
212
+
213
+ mae = mean_absolute_error(y_true, y_pred)
214
+ rmse = np.sqrt(mean_squared_error(y_true, y_pred))
215
+ mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-10))) * 100
216
+
217
+ results.append({
218
+ "fold": i, "train_size": train_end,
219
+ "test_size": test_end - test_start,
220
+ "mae": mae, "rmse": rmse, "mape": mape})
221
+
222
+ results_df = pd.DataFrame(results)
223
+ print(f"Backtest ({n_splits} folds, h={horizon}): "
224
+ f"MAE={results_df['mae'].mean():.4f} ± {results_df['mae'].std():.4f}")
225
+ return results_df
226
+ ```
227
+
228
+ ---
229
+
230
+ ## パイプライン統合
231
+
232
+ ```
233
+ time-series → time-series-forecasting → model-monitoring
234
+ (古典解析) (ML 予測) (監視)
235
+ │ │ ↓
236
+ spectral-signal ────┘ anomaly-detection
237
+ (周波数解析) (異常検知)
238
+ ```
239
+
240
+ ## パイプライン出力
241
+
242
+ | ファイル | 説明 | 次スキル |
243
+ |---------|------|---------|
244
+ | `prophet_forecast.png` | Prophet 予測結果 | → presentation |
245
+ | `ts_features.csv` | 時系列特徴量 | → ml-regression |
246
+ | `backtest_results.csv` | バックテスト結果 | → model selection |