randomstatsmodels 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,39 @@
1
+ # randomstatsmodels/__init__.py
2
+ # This makes the folder a Python package.
3
+
4
+ from .metrics.metrics import mae, mape, smape, rmse
5
+ from .models.models import (
6
+ AutoHybridForecaster,
7
+ AutoKNN,
8
+ AutoMELD,
9
+ AutoNEO,
10
+ AutoPALF,
11
+ AutoThetaAR,
12
+ AutoPolymath,
13
+ AutoSeasonalAR,
14
+ AutoFourier,
15
+ AutoRollingMedian,
16
+ AutoTrimmedMean,
17
+ AutoWindow,
18
+ AutoRankInsertion,
19
+ )
20
+ from .benchmarking.benchmarking import benchmark_model, benchmark_models
21
+
22
+ __version__ = "0.1.0"
23
+ __all__ = [
24
+ "__version__",
25
+ "mae",
26
+ "mape",
27
+ "smape",
28
+ "rmse",
29
+ "AutoHybridForecaster",
30
+ "AutoKNN",
31
+ "AutoMELD",
32
+ "AutoNEO",
33
+ "AutoPALF",
34
+ "AutoThetaAR",
35
+ "AutoPolymath",
36
+ "AutoSeasonalAR",
37
+ "benchmark_models",
38
+ "benchmark_model",
39
+ ]
@@ -0,0 +1 @@
1
+ from .benchmarking import benchmark_model, benchmark_models
@@ -0,0 +1,304 @@
1
+ import time
2
+ import math
3
+ import numpy as np
4
+ from ..metrics import mae, rmse, mape, smape
5
+
6
+ import time
7
+ import numpy as np
8
+ from ..metrics import mae, rmse, mape, smape
9
+
10
+
11
+ def benchmark_model(model_class, data, iterations=1, h=7):
12
+ """
13
+ Benchmark the training + prediction speed of a time series model,
14
+ and compute MAE, RMSE, MAPE, sMAPE on the last h points.
15
+
16
+ Parameters
17
+ ----------
18
+ model_class : class
19
+ The model class to initialize (e.g., AutoNEO).
20
+ data : array-like
21
+ The time series data.
22
+ iterations : int, default=5
23
+ Number of times to run the benchmark.
24
+ h : int, default=20
25
+ Forecast horizon.
26
+ **fit_kwargs : dict
27
+ Additional arguments passed to model.fit().
28
+
29
+ Returns
30
+ -------
31
+ results : dict
32
+ {
33
+ "avg_total_time_s": float,
34
+ "avg_fit_time_s": float,
35
+ "avg_predict_time_s": float,
36
+ "avg_mae": float,
37
+ "avg_rmse": float,
38
+ "avg_mape": float,
39
+ "avg_smape": float,
40
+ "per_iteration": [
41
+ {
42
+ "fit_time_s": float,
43
+ "predict_time_s": float,
44
+ "total_time_s": float,
45
+ "mae": float,
46
+ "rmse": float,
47
+ "mape": float,
48
+ "smape": float
49
+ },
50
+ ...
51
+ ]
52
+ }
53
+ """
54
+ data = np.asarray(data)
55
+ assert len(data) > h, "Data length must be greater than forecast horizon h."
56
+
57
+ per_iter = []
58
+ for i in range(iterations):
59
+
60
+ model = model_class() # fresh model each run
61
+
62
+ model_name = model_class.__name__
63
+
64
+ t0 = time.time()
65
+ model.fit(data[:-h])
66
+ fit_time = time.time() - t0
67
+
68
+ t1 = time.time()
69
+ y_pred = model.predict(h)
70
+
71
+ predict_time = time.time() - t1
72
+
73
+ total_time = fit_time + predict_time
74
+
75
+ y_true = data[-h:]
76
+ # Ensure shapes compatible
77
+ y_pred = np.asarray(y_pred).reshape(-1)[:h]
78
+
79
+ if model_name == "AutoETS":
80
+ y_pred = y_pred[0]["mean"]
81
+
82
+ iter_metrics = {
83
+ "fit_time_s": fit_time,
84
+ "predict_time_s": predict_time,
85
+ "total_time_s": total_time,
86
+ "mae": float(mae(y_true, y_pred)),
87
+ "rmse": float(rmse(y_true, y_pred)),
88
+ "mape": float(mape(y_true, y_pred)),
89
+ "smape": float(smape(y_true, y_pred)),
90
+ }
91
+ per_iter.append(iter_metrics)
92
+
93
+ # Averages
94
+ avg_total = float(np.mean([x["total_time_s"] for x in per_iter]))
95
+ avg_fit = float(np.mean([x["fit_time_s"] for x in per_iter]))
96
+ avg_predict = float(np.mean([x["predict_time_s"] for x in per_iter]))
97
+ avg_mae_ = float(np.mean([x["mae"] for x in per_iter]))
98
+ avg_rmse_ = float(np.mean([x["rmse"] for x in per_iter]))
99
+ avg_mape_ = float(np.mean([x["mape"] for x in per_iter]))
100
+ avg_smape_ = float(np.mean([x["smape"] for x in per_iter]))
101
+
102
+ print(
103
+ f"\nAverages over {iterations} runs --> "
104
+ f"fit: {avg_fit:.4f}s | predict: {avg_predict:.4f}s | total: {avg_total:.4f}s | "
105
+ f"MAE: {avg_mae_:.4f} | RMSE: {avg_rmse_:.4f} | MAPE: {avg_mape_:.4f} | sMAPE: {avg_smape_:.4f}"
106
+ )
107
+
108
+ return {
109
+ "avg_total_time_s": round(avg_total, 2),
110
+ "avg_fit_time_s": round(avg_fit, 2),
111
+ "avg_predict_time_s": round(avg_predict, 2),
112
+ "avg_mae": round(avg_mae_, 3),
113
+ "avg_rmse": round(avg_rmse_, 3),
114
+ "avg_mape": round(avg_mape_, 3),
115
+ "avg_smape": round(avg_smape_, 3),
116
+ "per_iteration": per_iter,
117
+ }
118
+
119
+
120
+ import time
121
+ import numpy as np
122
+ from ..metrics import mae, rmse, mape, smape
123
+
124
+
125
+ def _coerce_forecast(yp, h, model_name):
126
+ """
127
+ Coerce various model outputs to a 1D np.ndarray of length h.
128
+ Handles special cases like AutoETS structure.
129
+ """
130
+ # Special-case: your AutoETS wrapper shape
131
+ if model_name == "AutoETS":
132
+ # expected like [{'mean': np.array([...])}, ...] or similar
133
+ try:
134
+ yp = yp["mean"]
135
+
136
+ except Exception:
137
+ pass
138
+
139
+ if yp.shape[0] != h:
140
+ raise ValueError(f"{model_name}.predict({h}) returned length {yp.shape[0]} (expected {h}).")
141
+ return yp
142
+
143
+
144
+ def _metrics_dict(y_true, y_pred):
145
+ return {
146
+ "mae": float(mae(y_true, y_pred)),
147
+ "rmse": float(rmse(y_true, y_pred)),
148
+ "mape": float(mape(y_true, y_pred)),
149
+ "smape": float(smape(y_true, y_pred)),
150
+ }
151
+
152
+
153
+ def _avg_block(per_iter):
154
+ return {
155
+ "avg_total_time_s": (
156
+ round(float(np.mean([x["total_time_s"] for x in per_iter])), 2)
157
+ if per_iter and "total_time_s" in per_iter[0]
158
+ else None
159
+ ),
160
+ "avg_fit_time_s": (
161
+ round(float(np.mean([x["fit_time_s"] for x in per_iter])), 2)
162
+ if per_iter and "fit_time_s" in per_iter[0]
163
+ else None
164
+ ),
165
+ "avg_predict_time_s": (
166
+ round(float(np.mean([x["predict_time_s"] for x in per_iter])), 2)
167
+ if per_iter and "predict_time_s" in per_iter[0]
168
+ else None
169
+ ),
170
+ "avg_mae": round(float(np.mean([x["mae"] for x in per_iter])), 3) if per_iter else None,
171
+ "avg_rmse": round(float(np.mean([x["rmse"] for x in per_iter])), 3) if per_iter else None,
172
+ "avg_mape": round(float(np.mean([x["mape"] for x in per_iter])), 3) if per_iter else None,
173
+ "avg_smape": round(float(np.mean([x["smape"] for x in per_iter])), 3) if per_iter else None,
174
+ }
175
+
176
+
177
+ def _safe_fmt(x, fmt=".3f"):
178
+ if x is None:
179
+ return "—"
180
+ try:
181
+ if isinstance(x, float) and (math.isnan(x) or math.isinf(x)):
182
+ return "—"
183
+ return format(x, fmt)
184
+ except Exception:
185
+ return "—"
186
+
187
+
188
+ def benchmark_models(
189
+ model_classes,
190
+ data,
191
+ iterations=1,
192
+ h=7,
193
+ ensembles=("mean", "median"),
194
+ exclude_from_ensemble=None,
195
+ ):
196
+ """
197
+ (docstring unchanged)
198
+ """
199
+ data = np.asarray(data)
200
+ assert len(data) > h, "Data length must be greater than forecast horizon h."
201
+ y_true = data[-h:]
202
+ model_classes = list(model_classes)
203
+
204
+ if exclude_from_ensemble is None:
205
+ exclude_from_ensemble = []
206
+ exclude_names = {(cls.__name__ if not isinstance(cls, str) else cls) for cls in exclude_from_ensemble}
207
+
208
+ results = {
209
+ "meta": {"iterations": iterations, "h": h, "n_models": len(model_classes)},
210
+ "models": {},
211
+ "ensembles": {},
212
+ }
213
+
214
+ # Prepare per-model storage
215
+ per_model_iters = {cls.__name__: [] for cls in model_classes}
216
+ failed_models = set()
217
+
218
+ # Per-iteration: collect predictions for ensembles
219
+ ens_iters_preds = [] # list per iteration: 2D array [n_models_used x h]
220
+
221
+ for i in range(iterations):
222
+ iter_preds = []
223
+ for cls in model_classes:
224
+ model_name = cls.__name__
225
+ try:
226
+ # fresh model each run
227
+ t0 = time.time()
228
+ model = cls()
229
+ model.fit(data[:-h])
230
+ fit_time = time.time() - t0
231
+
232
+ t1 = time.time()
233
+ y_pred = model.predict(h)
234
+ predict_time = time.time() - t1
235
+
236
+ y_pred = _coerce_forecast(y_pred, h, model_name)
237
+ if np.isnan(y_pred).any():
238
+ print(f"Skipping model {model_name}: NaN in predictions")
239
+ failed_models.add(model_name)
240
+ continue
241
+
242
+ total_time = fit_time + predict_time
243
+
244
+ # metrics
245
+ m = _metrics_dict(y_true, y_pred)
246
+
247
+ per_model_iters[model_name].append(
248
+ {
249
+ "fit_time_s": float(fit_time),
250
+ "predict_time_s": float(predict_time),
251
+ "total_time_s": float(total_time),
252
+ **m,
253
+ }
254
+ )
255
+
256
+ # include in ensemble only if not excluded
257
+ if model_name not in exclude_names:
258
+ iter_preds.append(y_pred)
259
+
260
+ except Exception as e:
261
+ print(f"Skipping model {model_name}: {e}")
262
+ failed_models.add(model_name)
263
+ continue
264
+
265
+ # Store stacked predictions for ensembles this iteration
266
+ if iter_preds:
267
+ ens_iters_preds.append(np.vstack(iter_preds)) # shape: (n_used_models, h)
268
+
269
+ # Aggregate per-model (only include models with at least one valid iteration)
270
+ for model_name, per_iter in per_model_iters.items():
271
+ if not per_iter:
272
+ # Do not include empty models to avoid None in summary formatting
273
+ continue
274
+ results["models"][model_name] = {**_avg_block(per_iter), "per_iteration": per_iter}
275
+
276
+ # Compute ensembles (metrics only; no timing)
277
+ valid_ens = set([e.lower() for e in ensembles]) if ensembles else set()
278
+ for ens_type in ("mean", "median"):
279
+ if ens_type in valid_ens and ens_iters_preds:
280
+ per_iter_metrics = []
281
+ for stacked in ens_iters_preds:
282
+ if stacked.size == 0:
283
+ continue
284
+ if ens_type == "mean":
285
+ y_ens = np.nanmean(stacked, axis=0)
286
+ else: # median
287
+ y_ens = np.nanmedian(stacked, axis=0)
288
+ per_iter_metrics.append(_metrics_dict(y_true, y_ens))
289
+
290
+ if per_iter_metrics:
291
+ results["ensembles"][ens_type] = {
292
+ "avg_mae": round(float(np.mean([x["mae"] for x in per_iter_metrics])), 3),
293
+ "avg_rmse": round(float(np.mean([x["rmse"] for x in per_iter_metrics])), 3),
294
+ "avg_mape": round(float(np.mean([x["mape"] for x in per_iter_metrics])), 3),
295
+ "avg_smape": round(float(np.mean([x["smape"] for x in per_iter_metrics])), 3),
296
+ "per_iteration": per_iter_metrics,
297
+ }
298
+
299
+ # Optional: brief console summary (use safe formatting)
300
+ print(f"\nBenchmark over {iterations} runs (h={h})")
301
+ if failed_models:
302
+ print(f"(Some models were skipped due to errors/NaNs: {sorted(failed_models)})")
303
+
304
+ return results
@@ -0,0 +1 @@
1
+ from .metrics import mae, rmse, mape, smape
@@ -0,0 +1,27 @@
1
+ import numpy as np
2
+
3
+
4
+ def mae(y_true, y_pred):
5
+ y_true = np.asarray(y_true, float)
6
+ y_pred = np.asarray(y_pred, float)
7
+ return np.mean(np.abs(y_true - y_pred))
8
+
9
+
10
+ def rmse(y_true, y_pred):
11
+ y_true = np.asarray(y_true, float)
12
+ y_pred = np.asarray(y_pred, float)
13
+ return np.sqrt(np.mean((y_true - y_pred) ** 2))
14
+
15
+
16
+ def mape(y_true, y_pred, epsilon=1e-8):
17
+ y_true = np.asarray(y_true, float)
18
+ y_pred = np.asarray(y_pred, float)
19
+ denom = np.maximum(np.abs(y_true), epsilon)
20
+ return np.mean(np.abs((y_true - y_pred) / denom)) * 100.0
21
+
22
+
23
+ def smape(y_true, y_pred, epsilon=1e-8):
24
+ y_true = np.asarray(y_true, float)
25
+ y_pred = np.asarray(y_pred, float)
26
+ denom = np.maximum((np.abs(y_true) + np.abs(y_pred)) / 2.0, epsilon)
27
+ return np.mean(np.abs(y_true - y_pred) / denom) * 100.0
@@ -0,0 +1,15 @@
1
+ from .models import (
2
+ AutoHybridForecaster,
3
+ AutoKNN,
4
+ AutoMELD,
5
+ AutoNEO,
6
+ AutoPALF,
7
+ AutoThetaAR,
8
+ AutoPolymath,
9
+ AutoSeasonalAR,
10
+ AutoFourier,
11
+ AutoRollingMedian,
12
+ AutoTrimmedMean,
13
+ AutoWindow,
14
+ AutoRankInsertion,
15
+ )
@@ -0,0 +1,47 @@
1
+ import numpy as np
2
+
3
+
4
+ def _weighted_quantile(values, weights, q):
5
+ values = np.asarray(values, float)
6
+ weights = np.asarray(weights, float)
7
+ srt = np.argsort(values)
8
+ v, w = values[srt], weights[srt]
9
+ cw = np.cumsum(w) / np.sum(w)
10
+ idx = np.searchsorted(cw, q, side="left")
11
+ idx = np.clip(idx, 0, len(v) - 1)
12
+ return float(v[idx])
13
+
14
+
15
+ def _golden_section_minimize(f, a, b, tol=1e-6, max_iter=200):
16
+ phi = (1 + 5**0.5) / 2
17
+ invphi = 1 / phi
18
+ c = b - invphi * (b - a)
19
+ d = a + invphi * (b - a)
20
+ fc = f(c)
21
+ fd = f(d)
22
+ for _ in range(max_iter):
23
+ if abs(b - a) < tol:
24
+ break
25
+ if fc < fd:
26
+ b, d, fd = d, c, fc
27
+ c = b - invphi * (b - a)
28
+ fc = f(c)
29
+ else:
30
+ a, c, fc = c, d, fd
31
+ d = a + invphi * (b - a)
32
+ fd = f(d)
33
+ return (a + b) / 2
34
+
35
+
36
+ def _penalty_value(r, kind="l2", delta=1.0, tau=0.5):
37
+ if kind == "l2":
38
+ return 0.5 * r * r
39
+ elif kind == "l1":
40
+ return np.abs(r)
41
+ elif kind == "huber":
42
+ a = np.abs(r)
43
+ return np.where(a <= delta, 0.5 * r * r, delta * (a - 0.5 * delta))
44
+ elif kind == "pinball":
45
+ return np.where(r >= 0, tau * r, (tau - 1.0) * r)
46
+ else:
47
+ raise ValueError("Unknown penalty")