randomstatsmodels 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- randomstatsmodels/__init__.py +39 -0
- randomstatsmodels/benchmarking/__init__.py +1 -0
- randomstatsmodels/benchmarking/benchmarking.py +304 -0
- randomstatsmodels/metrics/__init__.py +1 -0
- randomstatsmodels/metrics/metrics.py +27 -0
- randomstatsmodels/models/__init__.py +15 -0
- randomstatsmodels/models/model_utils.py +47 -0
- randomstatsmodels/models/models.py +3263 -0
- randomstatsmodels-0.1.0.dist-info/METADATA +89 -0
- randomstatsmodels-0.1.0.dist-info/RECORD +13 -0
- randomstatsmodels-0.1.0.dist-info/WHEEL +5 -0
- randomstatsmodels-0.1.0.dist-info/licenses/LICENSE +21 -0
- randomstatsmodels-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# randomstatsmodels/__init__.py
|
|
2
|
+
# This makes the folder a Python package.
|
|
3
|
+
|
|
4
|
+
from .metrics.metrics import mae, mape, smape, rmse
|
|
5
|
+
from .models.models import (
|
|
6
|
+
AutoHybridForecaster,
|
|
7
|
+
AutoKNN,
|
|
8
|
+
AutoMELD,
|
|
9
|
+
AutoNEO,
|
|
10
|
+
AutoPALF,
|
|
11
|
+
AutoThetaAR,
|
|
12
|
+
AutoPolymath,
|
|
13
|
+
AutoSeasonalAR,
|
|
14
|
+
AutoFourier,
|
|
15
|
+
AutoRollingMedian,
|
|
16
|
+
AutoTrimmedMean,
|
|
17
|
+
AutoWindow,
|
|
18
|
+
AutoRankInsertion,
|
|
19
|
+
)
|
|
20
|
+
from .benchmarking.benchmarking import benchmark_model, benchmark_models
|
|
21
|
+
|
|
22
|
+
__version__ = "0.1.0"
|
|
23
|
+
__all__ = [
|
|
24
|
+
"__version__",
|
|
25
|
+
"mae",
|
|
26
|
+
"mape",
|
|
27
|
+
"smape",
|
|
28
|
+
"rmse",
|
|
29
|
+
"AutoHybridForecaster",
|
|
30
|
+
"AutoKNN",
|
|
31
|
+
"AutoMELD",
|
|
32
|
+
"AutoNEO",
|
|
33
|
+
"AutoPALF",
|
|
34
|
+
"AutoThetaAR",
|
|
35
|
+
"AutoPolymath",
|
|
36
|
+
"AutoSeasonalAR",
|
|
37
|
+
"benchmark_models",
|
|
38
|
+
"benchmark_model",
|
|
39
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .benchmarking import benchmark_model, benchmark_models
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import math
|
|
3
|
+
import numpy as np
|
|
4
|
+
from ..metrics import mae, rmse, mape, smape
|
|
5
|
+
|
|
6
|
+
import time
|
|
7
|
+
import numpy as np
|
|
8
|
+
from ..metrics import mae, rmse, mape, smape
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def benchmark_model(model_class, data, iterations=1, h=7):
|
|
12
|
+
"""
|
|
13
|
+
Benchmark the training + prediction speed of a time series model,
|
|
14
|
+
and compute MAE, RMSE, MAPE, sMAPE on the last h points.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
model_class : class
|
|
19
|
+
The model class to initialize (e.g., AutoNEO).
|
|
20
|
+
data : array-like
|
|
21
|
+
The time series data.
|
|
22
|
+
iterations : int, default=5
|
|
23
|
+
Number of times to run the benchmark.
|
|
24
|
+
h : int, default=20
|
|
25
|
+
Forecast horizon.
|
|
26
|
+
**fit_kwargs : dict
|
|
27
|
+
Additional arguments passed to model.fit().
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
results : dict
|
|
32
|
+
{
|
|
33
|
+
"avg_total_time_s": float,
|
|
34
|
+
"avg_fit_time_s": float,
|
|
35
|
+
"avg_predict_time_s": float,
|
|
36
|
+
"avg_mae": float,
|
|
37
|
+
"avg_rmse": float,
|
|
38
|
+
"avg_mape": float,
|
|
39
|
+
"avg_smape": float,
|
|
40
|
+
"per_iteration": [
|
|
41
|
+
{
|
|
42
|
+
"fit_time_s": float,
|
|
43
|
+
"predict_time_s": float,
|
|
44
|
+
"total_time_s": float,
|
|
45
|
+
"mae": float,
|
|
46
|
+
"rmse": float,
|
|
47
|
+
"mape": float,
|
|
48
|
+
"smape": float
|
|
49
|
+
},
|
|
50
|
+
...
|
|
51
|
+
]
|
|
52
|
+
}
|
|
53
|
+
"""
|
|
54
|
+
data = np.asarray(data)
|
|
55
|
+
assert len(data) > h, "Data length must be greater than forecast horizon h."
|
|
56
|
+
|
|
57
|
+
per_iter = []
|
|
58
|
+
for i in range(iterations):
|
|
59
|
+
|
|
60
|
+
model = model_class() # fresh model each run
|
|
61
|
+
|
|
62
|
+
model_name = model_class.__name__
|
|
63
|
+
|
|
64
|
+
t0 = time.time()
|
|
65
|
+
model.fit(data[:-h])
|
|
66
|
+
fit_time = time.time() - t0
|
|
67
|
+
|
|
68
|
+
t1 = time.time()
|
|
69
|
+
y_pred = model.predict(h)
|
|
70
|
+
|
|
71
|
+
predict_time = time.time() - t1
|
|
72
|
+
|
|
73
|
+
total_time = fit_time + predict_time
|
|
74
|
+
|
|
75
|
+
y_true = data[-h:]
|
|
76
|
+
# Ensure shapes compatible
|
|
77
|
+
y_pred = np.asarray(y_pred).reshape(-1)[:h]
|
|
78
|
+
|
|
79
|
+
if model_name == "AutoETS":
|
|
80
|
+
y_pred = y_pred[0]["mean"]
|
|
81
|
+
|
|
82
|
+
iter_metrics = {
|
|
83
|
+
"fit_time_s": fit_time,
|
|
84
|
+
"predict_time_s": predict_time,
|
|
85
|
+
"total_time_s": total_time,
|
|
86
|
+
"mae": float(mae(y_true, y_pred)),
|
|
87
|
+
"rmse": float(rmse(y_true, y_pred)),
|
|
88
|
+
"mape": float(mape(y_true, y_pred)),
|
|
89
|
+
"smape": float(smape(y_true, y_pred)),
|
|
90
|
+
}
|
|
91
|
+
per_iter.append(iter_metrics)
|
|
92
|
+
|
|
93
|
+
# Averages
|
|
94
|
+
avg_total = float(np.mean([x["total_time_s"] for x in per_iter]))
|
|
95
|
+
avg_fit = float(np.mean([x["fit_time_s"] for x in per_iter]))
|
|
96
|
+
avg_predict = float(np.mean([x["predict_time_s"] for x in per_iter]))
|
|
97
|
+
avg_mae_ = float(np.mean([x["mae"] for x in per_iter]))
|
|
98
|
+
avg_rmse_ = float(np.mean([x["rmse"] for x in per_iter]))
|
|
99
|
+
avg_mape_ = float(np.mean([x["mape"] for x in per_iter]))
|
|
100
|
+
avg_smape_ = float(np.mean([x["smape"] for x in per_iter]))
|
|
101
|
+
|
|
102
|
+
print(
|
|
103
|
+
f"\nAverages over {iterations} runs --> "
|
|
104
|
+
f"fit: {avg_fit:.4f}s | predict: {avg_predict:.4f}s | total: {avg_total:.4f}s | "
|
|
105
|
+
f"MAE: {avg_mae_:.4f} | RMSE: {avg_rmse_:.4f} | MAPE: {avg_mape_:.4f} | sMAPE: {avg_smape_:.4f}"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
return {
|
|
109
|
+
"avg_total_time_s": round(avg_total, 2),
|
|
110
|
+
"avg_fit_time_s": round(avg_fit, 2),
|
|
111
|
+
"avg_predict_time_s": round(avg_predict, 2),
|
|
112
|
+
"avg_mae": round(avg_mae_, 3),
|
|
113
|
+
"avg_rmse": round(avg_rmse_, 3),
|
|
114
|
+
"avg_mape": round(avg_mape_, 3),
|
|
115
|
+
"avg_smape": round(avg_smape_, 3),
|
|
116
|
+
"per_iteration": per_iter,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
import time
|
|
121
|
+
import numpy as np
|
|
122
|
+
from ..metrics import mae, rmse, mape, smape
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _coerce_forecast(yp, h, model_name):
|
|
126
|
+
"""
|
|
127
|
+
Coerce various model outputs to a 1D np.ndarray of length h.
|
|
128
|
+
Handles special cases like AutoETS structure.
|
|
129
|
+
"""
|
|
130
|
+
# Special-case: your AutoETS wrapper shape
|
|
131
|
+
if model_name == "AutoETS":
|
|
132
|
+
# expected like [{'mean': np.array([...])}, ...] or similar
|
|
133
|
+
try:
|
|
134
|
+
yp = yp["mean"]
|
|
135
|
+
|
|
136
|
+
except Exception:
|
|
137
|
+
pass
|
|
138
|
+
|
|
139
|
+
if yp.shape[0] != h:
|
|
140
|
+
raise ValueError(f"{model_name}.predict({h}) returned length {yp.shape[0]} (expected {h}).")
|
|
141
|
+
return yp
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _metrics_dict(y_true, y_pred):
|
|
145
|
+
return {
|
|
146
|
+
"mae": float(mae(y_true, y_pred)),
|
|
147
|
+
"rmse": float(rmse(y_true, y_pred)),
|
|
148
|
+
"mape": float(mape(y_true, y_pred)),
|
|
149
|
+
"smape": float(smape(y_true, y_pred)),
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _avg_block(per_iter):
|
|
154
|
+
return {
|
|
155
|
+
"avg_total_time_s": (
|
|
156
|
+
round(float(np.mean([x["total_time_s"] for x in per_iter])), 2)
|
|
157
|
+
if per_iter and "total_time_s" in per_iter[0]
|
|
158
|
+
else None
|
|
159
|
+
),
|
|
160
|
+
"avg_fit_time_s": (
|
|
161
|
+
round(float(np.mean([x["fit_time_s"] for x in per_iter])), 2)
|
|
162
|
+
if per_iter and "fit_time_s" in per_iter[0]
|
|
163
|
+
else None
|
|
164
|
+
),
|
|
165
|
+
"avg_predict_time_s": (
|
|
166
|
+
round(float(np.mean([x["predict_time_s"] for x in per_iter])), 2)
|
|
167
|
+
if per_iter and "predict_time_s" in per_iter[0]
|
|
168
|
+
else None
|
|
169
|
+
),
|
|
170
|
+
"avg_mae": round(float(np.mean([x["mae"] for x in per_iter])), 3) if per_iter else None,
|
|
171
|
+
"avg_rmse": round(float(np.mean([x["rmse"] for x in per_iter])), 3) if per_iter else None,
|
|
172
|
+
"avg_mape": round(float(np.mean([x["mape"] for x in per_iter])), 3) if per_iter else None,
|
|
173
|
+
"avg_smape": round(float(np.mean([x["smape"] for x in per_iter])), 3) if per_iter else None,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _safe_fmt(x, fmt=".3f"):
|
|
178
|
+
if x is None:
|
|
179
|
+
return "—"
|
|
180
|
+
try:
|
|
181
|
+
if isinstance(x, float) and (math.isnan(x) or math.isinf(x)):
|
|
182
|
+
return "—"
|
|
183
|
+
return format(x, fmt)
|
|
184
|
+
except Exception:
|
|
185
|
+
return "—"
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def benchmark_models(
|
|
189
|
+
model_classes,
|
|
190
|
+
data,
|
|
191
|
+
iterations=1,
|
|
192
|
+
h=7,
|
|
193
|
+
ensembles=("mean", "median"),
|
|
194
|
+
exclude_from_ensemble=None,
|
|
195
|
+
):
|
|
196
|
+
"""
|
|
197
|
+
(docstring unchanged)
|
|
198
|
+
"""
|
|
199
|
+
data = np.asarray(data)
|
|
200
|
+
assert len(data) > h, "Data length must be greater than forecast horizon h."
|
|
201
|
+
y_true = data[-h:]
|
|
202
|
+
model_classes = list(model_classes)
|
|
203
|
+
|
|
204
|
+
if exclude_from_ensemble is None:
|
|
205
|
+
exclude_from_ensemble = []
|
|
206
|
+
exclude_names = {(cls.__name__ if not isinstance(cls, str) else cls) for cls in exclude_from_ensemble}
|
|
207
|
+
|
|
208
|
+
results = {
|
|
209
|
+
"meta": {"iterations": iterations, "h": h, "n_models": len(model_classes)},
|
|
210
|
+
"models": {},
|
|
211
|
+
"ensembles": {},
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
# Prepare per-model storage
|
|
215
|
+
per_model_iters = {cls.__name__: [] for cls in model_classes}
|
|
216
|
+
failed_models = set()
|
|
217
|
+
|
|
218
|
+
# Per-iteration: collect predictions for ensembles
|
|
219
|
+
ens_iters_preds = [] # list per iteration: 2D array [n_models_used x h]
|
|
220
|
+
|
|
221
|
+
for i in range(iterations):
|
|
222
|
+
iter_preds = []
|
|
223
|
+
for cls in model_classes:
|
|
224
|
+
model_name = cls.__name__
|
|
225
|
+
try:
|
|
226
|
+
# fresh model each run
|
|
227
|
+
t0 = time.time()
|
|
228
|
+
model = cls()
|
|
229
|
+
model.fit(data[:-h])
|
|
230
|
+
fit_time = time.time() - t0
|
|
231
|
+
|
|
232
|
+
t1 = time.time()
|
|
233
|
+
y_pred = model.predict(h)
|
|
234
|
+
predict_time = time.time() - t1
|
|
235
|
+
|
|
236
|
+
y_pred = _coerce_forecast(y_pred, h, model_name)
|
|
237
|
+
if np.isnan(y_pred).any():
|
|
238
|
+
print(f"Skipping model {model_name}: NaN in predictions")
|
|
239
|
+
failed_models.add(model_name)
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
total_time = fit_time + predict_time
|
|
243
|
+
|
|
244
|
+
# metrics
|
|
245
|
+
m = _metrics_dict(y_true, y_pred)
|
|
246
|
+
|
|
247
|
+
per_model_iters[model_name].append(
|
|
248
|
+
{
|
|
249
|
+
"fit_time_s": float(fit_time),
|
|
250
|
+
"predict_time_s": float(predict_time),
|
|
251
|
+
"total_time_s": float(total_time),
|
|
252
|
+
**m,
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# include in ensemble only if not excluded
|
|
257
|
+
if model_name not in exclude_names:
|
|
258
|
+
iter_preds.append(y_pred)
|
|
259
|
+
|
|
260
|
+
except Exception as e:
|
|
261
|
+
print(f"Skipping model {model_name}: {e}")
|
|
262
|
+
failed_models.add(model_name)
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
# Store stacked predictions for ensembles this iteration
|
|
266
|
+
if iter_preds:
|
|
267
|
+
ens_iters_preds.append(np.vstack(iter_preds)) # shape: (n_used_models, h)
|
|
268
|
+
|
|
269
|
+
# Aggregate per-model (only include models with at least one valid iteration)
|
|
270
|
+
for model_name, per_iter in per_model_iters.items():
|
|
271
|
+
if not per_iter:
|
|
272
|
+
# Do not include empty models to avoid None in summary formatting
|
|
273
|
+
continue
|
|
274
|
+
results["models"][model_name] = {**_avg_block(per_iter), "per_iteration": per_iter}
|
|
275
|
+
|
|
276
|
+
# Compute ensembles (metrics only; no timing)
|
|
277
|
+
valid_ens = set([e.lower() for e in ensembles]) if ensembles else set()
|
|
278
|
+
for ens_type in ("mean", "median"):
|
|
279
|
+
if ens_type in valid_ens and ens_iters_preds:
|
|
280
|
+
per_iter_metrics = []
|
|
281
|
+
for stacked in ens_iters_preds:
|
|
282
|
+
if stacked.size == 0:
|
|
283
|
+
continue
|
|
284
|
+
if ens_type == "mean":
|
|
285
|
+
y_ens = np.nanmean(stacked, axis=0)
|
|
286
|
+
else: # median
|
|
287
|
+
y_ens = np.nanmedian(stacked, axis=0)
|
|
288
|
+
per_iter_metrics.append(_metrics_dict(y_true, y_ens))
|
|
289
|
+
|
|
290
|
+
if per_iter_metrics:
|
|
291
|
+
results["ensembles"][ens_type] = {
|
|
292
|
+
"avg_mae": round(float(np.mean([x["mae"] for x in per_iter_metrics])), 3),
|
|
293
|
+
"avg_rmse": round(float(np.mean([x["rmse"] for x in per_iter_metrics])), 3),
|
|
294
|
+
"avg_mape": round(float(np.mean([x["mape"] for x in per_iter_metrics])), 3),
|
|
295
|
+
"avg_smape": round(float(np.mean([x["smape"] for x in per_iter_metrics])), 3),
|
|
296
|
+
"per_iteration": per_iter_metrics,
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
# Optional: brief console summary (use safe formatting)
|
|
300
|
+
print(f"\nBenchmark over {iterations} runs (h={h})")
|
|
301
|
+
if failed_models:
|
|
302
|
+
print(f"(Some models were skipped due to errors/NaNs: {sorted(failed_models)})")
|
|
303
|
+
|
|
304
|
+
return results
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .metrics import mae, rmse, mape, smape
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def mae(y_true, y_pred):
|
|
5
|
+
y_true = np.asarray(y_true, float)
|
|
6
|
+
y_pred = np.asarray(y_pred, float)
|
|
7
|
+
return np.mean(np.abs(y_true - y_pred))
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def rmse(y_true, y_pred):
|
|
11
|
+
y_true = np.asarray(y_true, float)
|
|
12
|
+
y_pred = np.asarray(y_pred, float)
|
|
13
|
+
return np.sqrt(np.mean((y_true - y_pred) ** 2))
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def mape(y_true, y_pred, epsilon=1e-8):
|
|
17
|
+
y_true = np.asarray(y_true, float)
|
|
18
|
+
y_pred = np.asarray(y_pred, float)
|
|
19
|
+
denom = np.maximum(np.abs(y_true), epsilon)
|
|
20
|
+
return np.mean(np.abs((y_true - y_pred) / denom)) * 100.0
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def smape(y_true, y_pred, epsilon=1e-8):
|
|
24
|
+
y_true = np.asarray(y_true, float)
|
|
25
|
+
y_pred = np.asarray(y_pred, float)
|
|
26
|
+
denom = np.maximum((np.abs(y_true) + np.abs(y_pred)) / 2.0, epsilon)
|
|
27
|
+
return np.mean(np.abs(y_true - y_pred) / denom) * 100.0
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def _weighted_quantile(values, weights, q):
|
|
5
|
+
values = np.asarray(values, float)
|
|
6
|
+
weights = np.asarray(weights, float)
|
|
7
|
+
srt = np.argsort(values)
|
|
8
|
+
v, w = values[srt], weights[srt]
|
|
9
|
+
cw = np.cumsum(w) / np.sum(w)
|
|
10
|
+
idx = np.searchsorted(cw, q, side="left")
|
|
11
|
+
idx = np.clip(idx, 0, len(v) - 1)
|
|
12
|
+
return float(v[idx])
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _golden_section_minimize(f, a, b, tol=1e-6, max_iter=200):
|
|
16
|
+
phi = (1 + 5**0.5) / 2
|
|
17
|
+
invphi = 1 / phi
|
|
18
|
+
c = b - invphi * (b - a)
|
|
19
|
+
d = a + invphi * (b - a)
|
|
20
|
+
fc = f(c)
|
|
21
|
+
fd = f(d)
|
|
22
|
+
for _ in range(max_iter):
|
|
23
|
+
if abs(b - a) < tol:
|
|
24
|
+
break
|
|
25
|
+
if fc < fd:
|
|
26
|
+
b, d, fd = d, c, fc
|
|
27
|
+
c = b - invphi * (b - a)
|
|
28
|
+
fc = f(c)
|
|
29
|
+
else:
|
|
30
|
+
a, c, fc = c, d, fd
|
|
31
|
+
d = a + invphi * (b - a)
|
|
32
|
+
fd = f(d)
|
|
33
|
+
return (a + b) / 2
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _penalty_value(r, kind="l2", delta=1.0, tau=0.5):
|
|
37
|
+
if kind == "l2":
|
|
38
|
+
return 0.5 * r * r
|
|
39
|
+
elif kind == "l1":
|
|
40
|
+
return np.abs(r)
|
|
41
|
+
elif kind == "huber":
|
|
42
|
+
a = np.abs(r)
|
|
43
|
+
return np.where(a <= delta, 0.5 * r * r, delta * (a - 0.5 * delta))
|
|
44
|
+
elif kind == "pinball":
|
|
45
|
+
return np.where(r >= 0, tau * r, (tau - 1.0) * r)
|
|
46
|
+
else:
|
|
47
|
+
raise ValueError("Unknown penalty")
|