autogluon.timeseries 1.2.1b20250422__py3-none-any.whl → 1.2.1b20250424__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/timeseries/dataset/ts_dataframe.py +18 -3
- autogluon/timeseries/learner.py +0 -4
- autogluon/timeseries/metrics/__init__.py +1 -30
- autogluon/timeseries/metrics/abstract.py +0 -10
- autogluon/timeseries/metrics/point.py +41 -131
- autogluon/timeseries/metrics/quantile.py +15 -36
- autogluon/timeseries/models/abstract/__init__.py +2 -2
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +178 -129
- autogluon/timeseries/models/chronos/model.py +3 -2
- autogluon/timeseries/models/ensemble/__init__.py +3 -2
- autogluon/timeseries/models/ensemble/abstract.py +139 -0
- autogluon/timeseries/models/ensemble/basic.py +88 -0
- autogluon/timeseries/models/ensemble/{greedy_ensemble.py → greedy.py} +67 -61
- autogluon/timeseries/models/presets.py +0 -4
- autogluon/timeseries/predictor.py +51 -26
- autogluon/timeseries/trainer.py +35 -27
- autogluon/timeseries/utils/features.py +4 -1
- autogluon/timeseries/utils/warning_filters.py +1 -1
- autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/METADATA +5 -4
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/RECORD +28 -27
- autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -86
- /autogluon.timeseries-1.2.1b20250422-py3.9-nspkg.pth → /autogluon.timeseries-1.2.1b20250424-py3.9-nspkg.pth +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/LICENSE +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/NOTICE +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/WHEEL +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.2.1b20250422.dist-info → autogluon.timeseries-1.2.1b20250424.dist-info}/zip-safe +0 -0
@@ -9,9 +9,10 @@ import autogluon.core as ag
|
|
9
9
|
from autogluon.core.models.greedy_ensemble.ensemble_selection import EnsembleSelection
|
10
10
|
from autogluon.timeseries import TimeSeriesDataFrame
|
11
11
|
from autogluon.timeseries.metrics import TimeSeriesScorer
|
12
|
-
from autogluon.timeseries.models.ensemble import AbstractTimeSeriesEnsembleModel
|
13
12
|
from autogluon.timeseries.utils.datetime import get_seasonality
|
14
13
|
|
14
|
+
from .abstract import AbstractWeightedTimeSeriesEnsembleModel
|
15
|
+
|
15
16
|
logger = logging.getLogger(__name__)
|
16
17
|
|
17
18
|
|
@@ -24,16 +25,15 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
24
25
|
sorted_initialization: bool = False,
|
25
26
|
bagging: bool = False,
|
26
27
|
tie_breaker: str = "random",
|
27
|
-
random_state: np.random.RandomState = None,
|
28
|
+
random_state: Optional[np.random.RandomState] = None,
|
28
29
|
prediction_length: int = 1,
|
29
30
|
target: str = "target",
|
30
|
-
eval_metric_seasonal_period:
|
31
|
-
horizon_weight: Optional[np.ndarray] = None,
|
31
|
+
eval_metric_seasonal_period: int = 1,
|
32
32
|
**kwargs,
|
33
33
|
):
|
34
34
|
super().__init__(
|
35
35
|
ensemble_size=ensemble_size,
|
36
|
-
metric=metric,
|
36
|
+
metric=metric, # type: ignore
|
37
37
|
problem_type=problem_type,
|
38
38
|
sorted_initialization=sorted_initialization,
|
39
39
|
bagging=bagging,
|
@@ -44,14 +44,33 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
44
44
|
self.prediction_length = prediction_length
|
45
45
|
self.target = target
|
46
46
|
self.eval_metric_seasonal_period = eval_metric_seasonal_period
|
47
|
-
self.
|
47
|
+
self.metric: TimeSeriesScorer
|
48
48
|
|
49
|
-
|
49
|
+
self.dummy_pred_per_window = []
|
50
|
+
self.scorer_per_window = []
|
51
|
+
|
52
|
+
self.dummy_pred_per_window: Optional[List[TimeSeriesDataFrame]]
|
53
|
+
self.scorer_per_window: Optional[List[TimeSeriesScorer]]
|
54
|
+
self.data_future_per_window: Optional[List[TimeSeriesDataFrame]]
|
55
|
+
|
56
|
+
def fit( # type: ignore
|
50
57
|
self,
|
51
|
-
predictions: List[List[TimeSeriesDataFrame]],
|
58
|
+
predictions: List[List[TimeSeriesDataFrame]],
|
52
59
|
labels: List[TimeSeriesDataFrame],
|
53
|
-
time_limit: Optional[
|
54
|
-
|
60
|
+
time_limit: Optional[float] = None,
|
61
|
+
):
|
62
|
+
return super().fit(
|
63
|
+
predictions=predictions, # type: ignore
|
64
|
+
labels=labels, # type: ignore
|
65
|
+
time_limit=time_limit,
|
66
|
+
)
|
67
|
+
|
68
|
+
def _fit( # type: ignore
|
69
|
+
self,
|
70
|
+
predictions: List[List[TimeSeriesDataFrame]],
|
71
|
+
labels: List[TimeSeriesDataFrame],
|
72
|
+
time_limit: Optional[float] = None,
|
73
|
+
sample_weight: Optional[List[float]] = None,
|
55
74
|
):
|
56
75
|
# Stack predictions for each model into a 3d tensor of shape [num_val_windows, num_rows, num_cols]
|
57
76
|
stacked_predictions = [np.stack(preds) for preds in predictions]
|
@@ -77,26 +96,33 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
77
96
|
|
78
97
|
super()._fit(
|
79
98
|
predictions=stacked_predictions,
|
80
|
-
labels=data_future,
|
99
|
+
labels=data_future, # type: ignore
|
81
100
|
time_limit=time_limit,
|
82
101
|
)
|
83
102
|
self.dummy_pred_per_window = None
|
84
103
|
self.evaluator_per_window = None
|
85
104
|
self.data_future_per_window = None
|
86
105
|
|
87
|
-
def _calculate_regret(
|
106
|
+
def _calculate_regret( # type: ignore
|
107
|
+
self,
|
108
|
+
y_true,
|
109
|
+
y_pred_proba,
|
110
|
+
metric: TimeSeriesScorer,
|
111
|
+
sample_weight=None,
|
112
|
+
):
|
88
113
|
# Compute average score across all validation windows
|
89
114
|
total_score = 0.0
|
115
|
+
|
116
|
+
assert self.data_future_per_window is not None
|
117
|
+
assert self.dummy_pred_per_window is not None
|
118
|
+
assert self.scorer_per_window is not None
|
119
|
+
|
90
120
|
for window_idx, data_future in enumerate(self.data_future_per_window):
|
91
121
|
dummy_pred = self.dummy_pred_per_window[window_idx]
|
92
122
|
dummy_pred[list(dummy_pred.columns)] = y_pred_proba[window_idx]
|
93
123
|
# We use scorer.compute_metric instead of scorer.score to avoid repeated calls to scorer.save_past_metrics
|
94
124
|
metric_value = self.scorer_per_window[window_idx].compute_metric(
|
95
|
-
data_future,
|
96
|
-
dummy_pred,
|
97
|
-
target=self.target,
|
98
|
-
prediction_length=self.prediction_length,
|
99
|
-
horizon_weight=self.horizon_weight,
|
125
|
+
data_future, dummy_pred, target=self.target
|
100
126
|
)
|
101
127
|
total_score += metric.sign * metric_value
|
102
128
|
avg_score = total_score / len(self.data_future_per_window)
|
@@ -104,32 +130,46 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
104
130
|
return -avg_score
|
105
131
|
|
106
132
|
|
107
|
-
class
|
108
|
-
"""Constructs a weighted ensemble using the greedy Ensemble Selection algorithm
|
133
|
+
class GreedyEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
|
134
|
+
"""Constructs a weighted ensemble using the greedy Ensemble Selection algorithm by
|
135
|
+
Caruana et al. [Car2004]
|
136
|
+
|
137
|
+
Other Parameters
|
138
|
+
----------------
|
139
|
+
ensemble_size: int, default = 100
|
140
|
+
Number of models (with replacement) to include in the ensemble.
|
141
|
+
|
142
|
+
References
|
143
|
+
----------
|
144
|
+
.. [Car2024] Caruana, Rich, et al. "Ensemble selection from libraries of models."
|
145
|
+
Proceedings of the twenty-first international conference on Machine learning. 2004.
|
146
|
+
"""
|
109
147
|
|
110
|
-
def __init__(self, name: Optional[str] = None,
|
148
|
+
def __init__(self, name: Optional[str] = None, **kwargs):
|
111
149
|
if name is None:
|
150
|
+
# FIXME: the name here is kept for backward compatibility. it will be called
|
151
|
+
# GreedyEnsemble in v1.4 once ensemble choices are exposed
|
112
152
|
name = "WeightedEnsemble"
|
113
153
|
super().__init__(name=name, **kwargs)
|
114
|
-
self.ensemble_size = ensemble_size
|
115
|
-
self.model_to_weight: Dict[str, float] = {}
|
116
154
|
|
117
|
-
def
|
155
|
+
def _get_default_hyperparameters(self) -> Dict:
|
156
|
+
return {"ensemble_size": 100}
|
157
|
+
|
158
|
+
def _fit(
|
118
159
|
self,
|
119
160
|
predictions_per_window: Dict[str, List[TimeSeriesDataFrame]],
|
120
161
|
data_per_window: List[TimeSeriesDataFrame],
|
121
|
-
|
122
|
-
|
162
|
+
model_scores: Optional[Dict[str, float]] = None,
|
163
|
+
time_limit: Optional[float] = None,
|
123
164
|
):
|
124
165
|
if self.eval_metric_seasonal_period is None:
|
125
166
|
self.eval_metric_seasonal_period = get_seasonality(self.freq)
|
126
167
|
ensemble_selection = TimeSeriesEnsembleSelection(
|
127
|
-
ensemble_size=self.ensemble_size,
|
168
|
+
ensemble_size=self.get_hyperparameters()["ensemble_size"],
|
128
169
|
metric=self.eval_metric,
|
129
170
|
prediction_length=self.prediction_length,
|
130
171
|
target=self.target,
|
131
172
|
eval_metric_seasonal_period=self.eval_metric_seasonal_period,
|
132
|
-
horizon_weight=self.horizon_weight,
|
133
173
|
)
|
134
174
|
ensemble_selection.fit(
|
135
175
|
predictions=list(predictions_per_window.values()),
|
@@ -143,37 +183,3 @@ class TimeSeriesGreedyEnsemble(AbstractTimeSeriesEnsembleModel):
|
|
143
183
|
|
144
184
|
weights_for_printing = {model: round(weight, 2) for model, weight in self.model_to_weight.items()}
|
145
185
|
logger.info(f"\tEnsemble weights: {pprint.pformat(weights_for_printing, width=200)}")
|
146
|
-
|
147
|
-
@property
|
148
|
-
def model_names(self) -> List[str]:
|
149
|
-
return list(self.model_to_weight.keys())
|
150
|
-
|
151
|
-
@property
|
152
|
-
def model_weights(self) -> np.ndarray:
|
153
|
-
return np.array(list(self.model_to_weight.values()), dtype=np.float64)
|
154
|
-
|
155
|
-
def predict(self, data: Dict[str, Optional[TimeSeriesDataFrame]], **kwargs) -> TimeSeriesDataFrame:
|
156
|
-
if not set(self.model_names).issubset(set(data.keys())):
|
157
|
-
raise ValueError(
|
158
|
-
f"Set of models given for prediction in {self.name} differ from those provided during initialization."
|
159
|
-
)
|
160
|
-
for model_name, model_pred in data.items():
|
161
|
-
if model_pred is None:
|
162
|
-
raise RuntimeError(f"{self.name} cannot predict because base model {model_name} failed.")
|
163
|
-
|
164
|
-
# Make sure that all predictions have same shape
|
165
|
-
assert len(set(pred.shape for pred in data.values())) == 1
|
166
|
-
|
167
|
-
return sum(data[model_name] * weight for model_name, weight in self.model_to_weight.items())
|
168
|
-
|
169
|
-
def get_info(self) -> dict:
|
170
|
-
info = super().get_info()
|
171
|
-
info["model_weights"] = self.model_to_weight
|
172
|
-
return info
|
173
|
-
|
174
|
-
def remap_base_models(self, model_refit_map: Dict[str, str]) -> None:
|
175
|
-
updated_weights = {}
|
176
|
-
for model, weight in self.model_to_weight.items():
|
177
|
-
model_full_name = model_refit_map.get(model, model)
|
178
|
-
updated_weights[model_full_name] = weight
|
179
|
-
self.model_to_weight = updated_weights
|
@@ -4,8 +4,6 @@ import re
|
|
4
4
|
from collections import defaultdict
|
5
5
|
from typing import Any, Dict, List, Optional, Type, Union
|
6
6
|
|
7
|
-
import numpy as np
|
8
|
-
|
9
7
|
from autogluon.common import space
|
10
8
|
from autogluon.core import constants
|
11
9
|
from autogluon.timeseries.metrics import TimeSeriesScorer
|
@@ -186,7 +184,6 @@ def get_preset_models(
|
|
186
184
|
path: str,
|
187
185
|
eval_metric: Union[str, TimeSeriesScorer],
|
188
186
|
eval_metric_seasonal_period: Optional[int],
|
189
|
-
horizon_weight: Optional[np.ndarray],
|
190
187
|
hyperparameters: Union[str, Dict, None],
|
191
188
|
hyperparameter_tune: bool,
|
192
189
|
covariate_metadata: CovariateMetadata,
|
@@ -265,7 +262,6 @@ def get_preset_models(
|
|
265
262
|
eval_metric=eval_metric,
|
266
263
|
eval_metric_seasonal_period=eval_metric_seasonal_period,
|
267
264
|
covariate_metadata=covariate_metadata,
|
268
|
-
horizon_weight=horizon_weight,
|
269
265
|
hyperparameters=model_hps,
|
270
266
|
**kwargs,
|
271
267
|
)
|
@@ -24,7 +24,7 @@ from autogluon.timeseries import __version__ as current_ag_version
|
|
24
24
|
from autogluon.timeseries.configs import TIMESERIES_PRESETS_CONFIGS
|
25
25
|
from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
|
26
26
|
from autogluon.timeseries.learner import TimeSeriesLearner
|
27
|
-
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
27
|
+
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
28
28
|
from autogluon.timeseries.splitter import ExpandingWindowSplitter
|
29
29
|
from autogluon.timeseries.trainer import TimeSeriesTrainer
|
30
30
|
from autogluon.timeseries.utils.forecast import make_future_data_frame
|
@@ -93,14 +93,6 @@ class TimeSeriesPredictor:
|
|
93
93
|
eval_metric_seasonal_period : int, optional
|
94
94
|
Seasonal period used to compute some evaluation metrics such as mean absolute scaled error (MASE). Defaults to
|
95
95
|
``None``, in which case the seasonal period is computed based on the data frequency.
|
96
|
-
horizon_weight : List[float], optional
|
97
|
-
Weight assigned to each time step in the forecast horizon when computing the `eval_metric`. If provided, this
|
98
|
-
must be a list with `prediction_length` non-negative values, where at least some values are greater than zero.
|
99
|
-
AutoGluon will automatically normalize the weights so that they sum up to `prediction_length`. By default, all
|
100
|
-
time steps in the forecast horizon have the same weight, which is equivalent to setting `horizon_weight = [1] * prediction_length`.
|
101
|
-
|
102
|
-
This parameter only affects model selection and ensemble construction; it has no effect on the loss function of
|
103
|
-
the individual forecasting models.
|
104
96
|
known_covariates_names: List[str], optional
|
105
97
|
Names of the covariates that are known in advance for all time steps in the forecast horizon. These are also
|
106
98
|
known as dynamic features, exogenous variables, additional regressors or related time series. Examples of such
|
@@ -152,7 +144,6 @@ class TimeSeriesPredictor:
|
|
152
144
|
freq: Optional[str] = None,
|
153
145
|
eval_metric: Union[str, TimeSeriesScorer, None] = None,
|
154
146
|
eval_metric_seasonal_period: Optional[int] = None,
|
155
|
-
horizon_weight: list[float] | None = None,
|
156
147
|
path: Optional[Union[str, Path]] = None,
|
157
148
|
verbosity: int = 2,
|
158
149
|
log_to_file: bool = True,
|
@@ -198,7 +189,6 @@ class TimeSeriesPredictor:
|
|
198
189
|
self.freq = std_freq
|
199
190
|
self.eval_metric = check_get_evaluation_metric(eval_metric)
|
200
191
|
self.eval_metric_seasonal_period = eval_metric_seasonal_period
|
201
|
-
self.horizon_weight = check_get_horizon_weight(horizon_weight, prediction_length=self.prediction_length)
|
202
192
|
if quantile_levels is None:
|
203
193
|
quantile_levels = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
|
204
194
|
self.quantile_levels = sorted(quantile_levels)
|
@@ -206,7 +196,6 @@ class TimeSeriesPredictor:
|
|
206
196
|
path_context=self.path,
|
207
197
|
eval_metric=eval_metric,
|
208
198
|
eval_metric_seasonal_period=eval_metric_seasonal_period,
|
209
|
-
horizon_weight=self.horizon_weight,
|
210
199
|
target=self.target,
|
211
200
|
known_covariates_names=self.known_covariates_names,
|
212
201
|
prediction_length=self.prediction_length,
|
@@ -316,15 +305,32 @@ class TimeSeriesPredictor:
|
|
316
305
|
df = df.convert_frequency(freq=self.freq)
|
317
306
|
return df
|
318
307
|
|
319
|
-
def
|
320
|
-
|
321
|
-
|
308
|
+
def _check_and_prepare_data_frame_for_evaluation(
|
309
|
+
self, data: TimeSeriesDataFrame, cutoff: Optional[int] = None, name: str = "data"
|
310
|
+
) -> TimeSeriesDataFrame:
|
311
|
+
"""
|
312
|
+
Make sure that provided evaluation data includes both historical and future time series values.
|
313
|
+
Slices the dataframe based on cutoff, if needed.
|
314
|
+
"""
|
315
|
+
cutoff = -1 * self.prediction_length if cutoff is None else cutoff
|
316
|
+
if not (isinstance(cutoff, int) and cutoff <= -self.prediction_length):
|
317
|
+
raise ValueError(f"`cutoff` should be a negative integer <= -prediction_length, got: {cutoff=}")
|
318
|
+
|
319
|
+
expected_length = -cutoff
|
320
|
+
|
321
|
+
if data.num_timesteps_per_item().min() <= expected_length:
|
322
|
+
var_name = "-cutoff" if expected_length > self.prediction_length else "prediction_length"
|
322
323
|
raise ValueError(
|
323
|
-
f"Cannot reserve last
|
324
|
+
f"Cannot reserve last {expected_length} time steps for evaluation in some "
|
324
325
|
f"time series in {name}. Please make sure that {name} includes both historical and future data, and that"
|
325
|
-
f"all time series have length >
|
326
|
+
f"all time series have length > {var_name} (at least {expected_length + 1})"
|
326
327
|
)
|
327
328
|
|
329
|
+
if cutoff < -self.prediction_length:
|
330
|
+
data = data.slice_by_timestep(None, cutoff + self.prediction_length)
|
331
|
+
|
332
|
+
return data
|
333
|
+
|
328
334
|
def _get_dataset_stats(self, data: TimeSeriesDataFrame) -> str:
|
329
335
|
ts_lengths = data.num_timesteps_per_item()
|
330
336
|
median_length = ts_lengths.median()
|
@@ -712,7 +718,7 @@ class TimeSeriesPredictor:
|
|
712
718
|
|
713
719
|
if tuning_data is not None:
|
714
720
|
tuning_data = self._check_and_prepare_data_frame(tuning_data, name="tuning_data")
|
715
|
-
self.
|
721
|
+
tuning_data = self._check_and_prepare_data_frame_for_evaluation(tuning_data, name="tuning_data")
|
716
722
|
logger.info(f"Provided tuning_data has {self._get_dataset_stats(tuning_data)}")
|
717
723
|
# TODO: Use num_val_windows to perform multi-window backtests on tuning_data
|
718
724
|
if num_val_windows > 0:
|
@@ -857,6 +863,7 @@ class TimeSeriesPredictor:
|
|
857
863
|
data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
|
858
864
|
model: Optional[str] = None,
|
859
865
|
metrics: Optional[Union[str, TimeSeriesScorer, List[Union[str, TimeSeriesScorer]]]] = None,
|
866
|
+
cutoff: Optional[int] = None,
|
860
867
|
display: bool = False,
|
861
868
|
use_cache: bool = True,
|
862
869
|
) -> Dict[str, float]:
|
@@ -874,11 +881,13 @@ class TimeSeriesPredictor:
|
|
874
881
|
Parameters
|
875
882
|
----------
|
876
883
|
data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]
|
877
|
-
The data to evaluate the best model on.
|
878
|
-
``data`` will be held out for prediction and forecast accuracy will
|
884
|
+
The data to evaluate the best model on. If a ``cutoff`` is not provided, the last ``prediction_length``
|
885
|
+
time steps of each time series in ``data`` will be held out for prediction and forecast accuracy will
|
886
|
+
be calculated on these time steps. When a ``cutoff`` is provided, the ``-cutoff``-th to the
|
887
|
+
``-cutoff + prediction_length``-th time steps of each time series are used for evaluation.
|
879
888
|
|
880
889
|
Must include both historical and future data (i.e., length of all time series in ``data`` must be at least
|
881
|
-
``prediction_length + 1``).
|
890
|
+
``prediction_length + 1``, if ``cutoff`` is not provided, ``-cutoff + 1`` otherwise).
|
882
891
|
|
883
892
|
The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
|
884
893
|
the predictor.
|
@@ -891,6 +900,11 @@ class TimeSeriesPredictor:
|
|
891
900
|
metrics : str, TimeSeriesScorer or List[Union[str, TimeSeriesScorer]], optional
|
892
901
|
Metric or a list of metrics to compute scores with. Defaults to ``self.eval_metric``. Supports both
|
893
902
|
metric names as strings and custom metrics based on TimeSeriesScorer.
|
903
|
+
cutoff : int, optional
|
904
|
+
A *negative* integer less than or equal to ``-1 * prediction_length`` denoting the time step in ``data``
|
905
|
+
where the forecast evaluation starts, i.e., time series are evaluated from the ``-cutoff``-th to the
|
906
|
+
``-cutoff + prediction_length``-th time step. Defaults to ``-1 * prediction_length``, using the last
|
907
|
+
``prediction_length`` time steps of each time series for evaluation.
|
894
908
|
display : bool, default = False
|
895
909
|
If True, the scores will be printed.
|
896
910
|
use_cache : bool, default = True
|
@@ -904,8 +918,10 @@ class TimeSeriesPredictor:
|
|
904
918
|
will have their signs flipped to obey this convention. For example, negative MAPE values will be reported.
|
905
919
|
To get the ``eval_metric`` score, do ``output[predictor.eval_metric.name]``.
|
906
920
|
"""
|
921
|
+
|
907
922
|
data = self._check_and_prepare_data_frame(data)
|
908
|
-
self.
|
923
|
+
data = self._check_and_prepare_data_frame_for_evaluation(data, cutoff=cutoff)
|
924
|
+
|
909
925
|
scores_dict = self._learner.evaluate(data, model=model, metrics=metrics, use_cache=use_cache)
|
910
926
|
if display:
|
911
927
|
logger.info("Evaluations on test data:")
|
@@ -1021,7 +1037,7 @@ class TimeSeriesPredictor:
|
|
1021
1037
|
"""
|
1022
1038
|
if data is not None:
|
1023
1039
|
data = self._check_and_prepare_data_frame(data)
|
1024
|
-
self.
|
1040
|
+
data = self._check_and_prepare_data_frame_for_evaluation(data)
|
1025
1041
|
|
1026
1042
|
fi_df = self._learner.get_feature_importance(
|
1027
1043
|
data=data,
|
@@ -1199,6 +1215,7 @@ class TimeSeriesPredictor:
|
|
1199
1215
|
def leaderboard(
|
1200
1216
|
self,
|
1201
1217
|
data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
|
1218
|
+
cutoff: Optional[int] = None,
|
1202
1219
|
extra_info: bool = False,
|
1203
1220
|
extra_metrics: Optional[List[Union[str, TimeSeriesScorer]]] = None,
|
1204
1221
|
display: bool = False,
|
@@ -1227,13 +1244,19 @@ class TimeSeriesPredictor:
|
|
1227
1244
|
----------
|
1228
1245
|
data : Union[TimeSeriesDataFrame, pd.DataFrame, Path, str], optional
|
1229
1246
|
dataset used for additional evaluation. Must include both historical and future data (i.e., length of all
|
1230
|
-
time series in ``data`` must be at least ``prediction_length + 1``
|
1247
|
+
time series in ``data`` must be at least ``prediction_length + 1``, if ``cutoff`` is not provided,
|
1248
|
+
``-cutoff + 1`` otherwise).
|
1231
1249
|
|
1232
1250
|
The names and dtypes of columns and static features in ``data`` must match the ``train_data`` used to train
|
1233
1251
|
the predictor.
|
1234
1252
|
|
1235
1253
|
If provided data is a `pandas.DataFrame`, AutoGluon will attempt to convert it to a `TimeSeriesDataFrame`.
|
1236
1254
|
If a `str` or a `Path` is provided, AutoGluon will attempt to load this file.
|
1255
|
+
cutoff : int, optional
|
1256
|
+
A *negative* integer less than or equal to ``-1 * prediction_length`` denoting the time step in ``data``
|
1257
|
+
where the forecast evaluation starts, i.e., time series are evaluated from the ``-cutoff``-th to the
|
1258
|
+
``-cutoff + prediction_length``-th time step. Defaults to ``-1 * prediction_length``, using the last
|
1259
|
+
``prediction_length`` time steps of each time series for evaluation.
|
1237
1260
|
extra_info : bool, default = False
|
1238
1261
|
If True, the leaderboard will contain an additional column `hyperparameters` with the hyperparameters used
|
1239
1262
|
by each model during training. An empty dictionary `{}` means that the model was trained with default
|
@@ -1269,10 +1292,12 @@ class TimeSeriesPredictor:
|
|
1269
1292
|
raise TypeError(f"TimeSeriesPredictor.leaderboard() got an unexpected keyword argument '{key}'")
|
1270
1293
|
if data is None and extra_metrics is not None:
|
1271
1294
|
raise ValueError("`extra_metrics` is only valid when `data` is specified.")
|
1295
|
+
if data is None and cutoff is not None:
|
1296
|
+
raise ValueError("`cutoff` is only valid when `data` is specified.")
|
1272
1297
|
|
1273
1298
|
if data is not None:
|
1274
1299
|
data = self._check_and_prepare_data_frame(data)
|
1275
|
-
self.
|
1300
|
+
data = self._check_and_prepare_data_frame_for_evaluation(data, cutoff=cutoff)
|
1276
1301
|
|
1277
1302
|
leaderboard = self._learner.leaderboard(
|
1278
1303
|
data, extra_info=extra_info, extra_metrics=extra_metrics, use_cache=use_cache
|
@@ -1441,7 +1466,7 @@ class TimeSeriesPredictor:
|
|
1441
1466
|
return cast(TimeSeriesDataFrame, ts_df[[self.target]])
|
1442
1467
|
|
1443
1468
|
test_data = self._check_and_prepare_data_frame(test_data)
|
1444
|
-
self.
|
1469
|
+
test_data = self._check_and_prepare_data_frame_for_evaluation(test_data, name="test_data")
|
1445
1470
|
test_data = self._learner.feature_generator.transform(test_data)
|
1446
1471
|
|
1447
1472
|
trainer = self._trainer
|
autogluon/timeseries/trainer.py
CHANGED
@@ -19,8 +19,8 @@ from autogluon.core.utils.loaders import load_pkl
|
|
19
19
|
from autogluon.core.utils.savers import save_pkl
|
20
20
|
from autogluon.timeseries import TimeSeriesDataFrame
|
21
21
|
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
22
|
-
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
23
|
-
from autogluon.timeseries.models.ensemble import AbstractTimeSeriesEnsembleModel,
|
22
|
+
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel, TimeSeriesModelBase
|
23
|
+
from autogluon.timeseries.models.ensemble import AbstractTimeSeriesEnsembleModel, GreedyEnsemble
|
24
24
|
from autogluon.timeseries.models.multi_window import MultiWindowBacktestingModel
|
25
25
|
from autogluon.timeseries.models.presets import contains_searchspace, get_preset_models
|
26
26
|
from autogluon.timeseries.splitter import AbstractWindowSplitter, ExpandingWindowSplitter
|
@@ -34,7 +34,7 @@ from autogluon.timeseries.utils.warning_filters import disable_tqdm, warning_fil
|
|
34
34
|
logger = logging.getLogger("autogluon.timeseries.trainer")
|
35
35
|
|
36
36
|
|
37
|
-
class TimeSeriesTrainer(AbstractTrainer[
|
37
|
+
class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
38
38
|
_cached_predictions_filename = "cached_predictions.pkl"
|
39
39
|
|
40
40
|
max_rel_importance_score: float = 1e5
|
@@ -47,7 +47,6 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
47
47
|
prediction_length: int = 1,
|
48
48
|
eval_metric: Union[str, TimeSeriesScorer, None] = None,
|
49
49
|
eval_metric_seasonal_period: Optional[int] = None,
|
50
|
-
horizon_weight: Optional[np.ndarray] = None,
|
51
50
|
save_data: bool = True,
|
52
51
|
skip_model_selection: bool = False,
|
53
52
|
enable_ensemble: bool = True,
|
@@ -74,12 +73,12 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
74
73
|
# Ensemble cannot be fit if val_scores are not computed
|
75
74
|
self.enable_ensemble = enable_ensemble and not skip_model_selection
|
76
75
|
if ensemble_model_type is None:
|
77
|
-
ensemble_model_type =
|
76
|
+
ensemble_model_type = GreedyEnsemble
|
78
77
|
else:
|
79
78
|
logger.warning(
|
80
79
|
"Using a custom `ensemble_model_type` is experimental functionality that may break in future versions."
|
81
80
|
)
|
82
|
-
self.ensemble_model_type = ensemble_model_type
|
81
|
+
self.ensemble_model_type: Type[AbstractTimeSeriesEnsembleModel] = ensemble_model_type
|
83
82
|
|
84
83
|
self.verbosity = verbosity
|
85
84
|
|
@@ -89,7 +88,6 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
89
88
|
|
90
89
|
self.eval_metric: TimeSeriesScorer = check_get_evaluation_metric(eval_metric)
|
91
90
|
self.eval_metric_seasonal_period = eval_metric_seasonal_period
|
92
|
-
self.horizon_weight = horizon_weight
|
93
91
|
if val_splitter is None:
|
94
92
|
val_splitter = ExpandingWindowSplitter(prediction_length=self.prediction_length)
|
95
93
|
assert isinstance(val_splitter, AbstractWindowSplitter), "val_splitter must be of type AbstractWindowSplitter"
|
@@ -147,7 +145,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
147
145
|
|
148
146
|
def _add_model(
|
149
147
|
self,
|
150
|
-
model:
|
148
|
+
model: TimeSeriesModelBase,
|
151
149
|
base_models: Optional[List[str]] = None,
|
152
150
|
):
|
153
151
|
"""Add a model to the model graph of the trainer. If the model is an ensemble, also add
|
@@ -155,7 +153,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
155
153
|
|
156
154
|
Parameters
|
157
155
|
----------
|
158
|
-
model :
|
156
|
+
model : TimeSeriesModelBase
|
159
157
|
The model to be added to the model graph.
|
160
158
|
base_models : List[str], optional, default None
|
161
159
|
If the model is an ensemble, the list of base model names that are included in the ensemble.
|
@@ -444,6 +442,8 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
444
442
|
num_base_models = len(models)
|
445
443
|
model_names_trained = []
|
446
444
|
for i, model in enumerate(models):
|
445
|
+
assert isinstance(model, AbstractTimeSeriesModel)
|
446
|
+
|
447
447
|
if time_limit is None:
|
448
448
|
time_left = None
|
449
449
|
time_left_for_model = None
|
@@ -560,20 +560,24 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
560
560
|
return ensemble_name
|
561
561
|
|
562
562
|
def fit_ensemble(
|
563
|
-
self,
|
563
|
+
self,
|
564
|
+
data_per_window: List[TimeSeriesDataFrame],
|
565
|
+
model_names: List[str],
|
566
|
+
time_limit: Optional[float] = None,
|
564
567
|
) -> str:
|
565
568
|
logger.info("Fitting simple weighted ensemble.")
|
566
569
|
|
567
|
-
|
570
|
+
predictions_per_window: Dict[str, List[TimeSeriesDataFrame]] = {}
|
571
|
+
base_model_scores = self.get_models_attribute_dict(attribute="val_score", models=self.get_model_names(0))
|
572
|
+
|
568
573
|
for model_name in model_names:
|
569
|
-
|
574
|
+
predictions_per_window[model_name] = self._get_model_oof_predictions(model_name=model_name)
|
570
575
|
|
571
576
|
time_start = time.time()
|
572
577
|
ensemble = self.ensemble_model_type(
|
573
578
|
name=self._get_ensemble_model_name(),
|
574
579
|
eval_metric=self.eval_metric,
|
575
580
|
eval_metric_seasonal_period=self.eval_metric_seasonal_period,
|
576
|
-
horizon_weight=self.horizon_weight,
|
577
581
|
target=self.target,
|
578
582
|
prediction_length=self.prediction_length,
|
579
583
|
path=self.path,
|
@@ -582,7 +586,12 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
582
586
|
covariate_metadata=self.covariate_metadata,
|
583
587
|
)
|
584
588
|
with warning_filter():
|
585
|
-
ensemble.
|
589
|
+
ensemble.fit(
|
590
|
+
predictions_per_window=predictions_per_window,
|
591
|
+
data_per_window=data_per_window,
|
592
|
+
model_scores=base_model_scores,
|
593
|
+
time_limit=time_limit,
|
594
|
+
)
|
586
595
|
ensemble.fit_time = time.time() - time_start
|
587
596
|
|
588
597
|
predict_time = 0
|
@@ -592,7 +601,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
592
601
|
|
593
602
|
score_per_fold = []
|
594
603
|
for window_idx, data in enumerate(data_per_window):
|
595
|
-
predictions = ensemble.predict({n:
|
604
|
+
predictions = ensemble.predict({n: predictions_per_window[n][window_idx] for n in ensemble.model_names})
|
596
605
|
score_per_fold.append(self._score_with_predictions(data, predictions))
|
597
606
|
ensemble.val_score = float(np.mean(score_per_fold, dtype=np.float64))
|
598
607
|
|
@@ -737,7 +746,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
737
746
|
return unpersisted_models
|
738
747
|
|
739
748
|
def _get_model_for_prediction(
|
740
|
-
self, model: Optional[Union[str,
|
749
|
+
self, model: Optional[Union[str, TimeSeriesModelBase]] = None, verbose: bool = True
|
741
750
|
) -> str:
|
742
751
|
"""Given an optional identifier or model object, return the name of the model with which to predict.
|
743
752
|
|
@@ -754,7 +763,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
754
763
|
)
|
755
764
|
return self.model_best
|
756
765
|
else:
|
757
|
-
if isinstance(model,
|
766
|
+
if isinstance(model, TimeSeriesModelBase):
|
758
767
|
return model.name
|
759
768
|
else:
|
760
769
|
if model not in self.get_model_names():
|
@@ -765,7 +774,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
765
774
|
self,
|
766
775
|
data: TimeSeriesDataFrame,
|
767
776
|
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
768
|
-
model: Optional[Union[str,
|
777
|
+
model: Optional[Union[str, TimeSeriesModelBase]] = None,
|
769
778
|
use_cache: bool = True,
|
770
779
|
random_seed: Optional[int] = None,
|
771
780
|
) -> TimeSeriesDataFrame:
|
@@ -796,13 +805,12 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
796
805
|
prediction_length=self.prediction_length,
|
797
806
|
target=self.target,
|
798
807
|
seasonal_period=self.eval_metric_seasonal_period,
|
799
|
-
horizon_weight=self.horizon_weight,
|
800
808
|
)
|
801
809
|
|
802
810
|
def score(
|
803
811
|
self,
|
804
812
|
data: TimeSeriesDataFrame,
|
805
|
-
model: Optional[Union[str,
|
813
|
+
model: Optional[Union[str, TimeSeriesModelBase]] = None,
|
806
814
|
metric: Union[str, TimeSeriesScorer, None] = None,
|
807
815
|
use_cache: bool = True,
|
808
816
|
) -> float:
|
@@ -813,7 +821,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
813
821
|
def evaluate(
|
814
822
|
self,
|
815
823
|
data: TimeSeriesDataFrame,
|
816
|
-
model: Optional[Union[str,
|
824
|
+
model: Optional[Union[str, TimeSeriesModelBase]] = None,
|
817
825
|
metrics: Optional[Union[str, TimeSeriesScorer, List[Union[str, TimeSeriesScorer]]]] = None,
|
818
826
|
use_cache: bool = True,
|
819
827
|
) -> Dict[str, float]:
|
@@ -835,7 +843,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
835
843
|
self,
|
836
844
|
data: TimeSeriesDataFrame,
|
837
845
|
features: List[str],
|
838
|
-
model: Optional[Union[str,
|
846
|
+
model: Optional[Union[str, TimeSeriesModelBase]] = None,
|
839
847
|
metric: Optional[Union[str, TimeSeriesScorer]] = None,
|
840
848
|
time_limit: Optional[float] = None,
|
841
849
|
method: Literal["naive", "permutation"] = "permutation",
|
@@ -938,7 +946,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
938
946
|
|
939
947
|
return importance_df
|
940
948
|
|
941
|
-
def _model_uses_feature(self, model: Union[str,
|
949
|
+
def _model_uses_feature(self, model: Union[str, TimeSeriesModelBase], feature: str) -> bool:
|
942
950
|
"""Check if the given model uses the given feature."""
|
943
951
|
models_with_ancestors = set(self.get_minimum_model_set(model))
|
944
952
|
|
@@ -980,7 +988,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
980
988
|
|
981
989
|
def _predict_model(
|
982
990
|
self,
|
983
|
-
model: Union[str,
|
991
|
+
model: Union[str, TimeSeriesModelBase],
|
984
992
|
data: TimeSeriesDataFrame,
|
985
993
|
model_pred_dict: Dict[str, Optional[TimeSeriesDataFrame]],
|
986
994
|
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
@@ -996,7 +1004,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
996
1004
|
|
997
1005
|
def _get_inputs_to_model(
|
998
1006
|
self,
|
999
|
-
model: Union[str,
|
1007
|
+
model: Union[str, TimeSeriesModelBase],
|
1000
1008
|
data: TimeSeriesDataFrame,
|
1001
1009
|
model_pred_dict: Dict[str, Optional[TimeSeriesDataFrame]],
|
1002
1010
|
) -> Union[TimeSeriesDataFrame, Dict[str, Optional[TimeSeriesDataFrame]]]:
|
@@ -1188,6 +1196,7 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
1188
1196
|
model_name = model.name
|
1189
1197
|
if model._get_tags()["can_refit_full"]:
|
1190
1198
|
model_full = model.convert_to_refit_full_template()
|
1199
|
+
assert isinstance(model_full, AbstractTimeSeriesModel)
|
1191
1200
|
logger.info(f"Fitting model: {model_full.name}")
|
1192
1201
|
models_trained = self._train_and_save(
|
1193
1202
|
train_data=refit_full_data,
|
@@ -1253,12 +1262,11 @@ class TimeSeriesTrainer(AbstractTrainer[AbstractTimeSeriesModel]):
|
|
1253
1262
|
freq: Optional[str] = None,
|
1254
1263
|
excluded_model_types: Optional[List[str]] = None,
|
1255
1264
|
hyperparameter_tune: bool = False,
|
1256
|
-
) -> List[
|
1265
|
+
) -> List[TimeSeriesModelBase]:
|
1257
1266
|
return get_preset_models(
|
1258
1267
|
path=self.path,
|
1259
1268
|
eval_metric=self.eval_metric,
|
1260
1269
|
eval_metric_seasonal_period=self.eval_metric_seasonal_period,
|
1261
|
-
horizon_weight=self.horizon_weight,
|
1262
1270
|
prediction_length=self.prediction_length,
|
1263
1271
|
freq=freq,
|
1264
1272
|
hyperparameters=hyperparameters,
|