autogluon.timeseries 1.4.1b20250820__py3-none-any.whl → 1.4.1b20250901__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/timeseries/configs/__init__.py +3 -2
- autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
- autogluon/timeseries/configs/predictor_presets.py +84 -0
- autogluon/timeseries/dataset/ts_dataframe.py +9 -9
- autogluon/timeseries/learner.py +14 -14
- autogluon/timeseries/metrics/__init__.py +5 -5
- autogluon/timeseries/metrics/abstract.py +11 -12
- autogluon/timeseries/models/__init__.py +2 -0
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +39 -41
- autogluon/timeseries/models/abstract/tunable.py +6 -6
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +30 -30
- autogluon/timeseries/models/autogluon_tabular/per_step.py +12 -12
- autogluon/timeseries/models/chronos/model.py +10 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +8 -8
- autogluon/timeseries/models/chronos/pipeline/chronos.py +12 -12
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +12 -12
- autogluon/timeseries/models/chronos/pipeline/utils.py +12 -12
- autogluon/timeseries/models/ensemble/abstract.py +19 -19
- autogluon/timeseries/models/ensemble/basic.py +8 -8
- autogluon/timeseries/models/ensemble/greedy.py +13 -13
- autogluon/timeseries/models/gluonts/abstract.py +24 -24
- autogluon/timeseries/models/gluonts/dataset.py +2 -2
- autogluon/timeseries/models/gluonts/models.py +7 -7
- autogluon/timeseries/models/local/abstract_local_model.py +12 -12
- autogluon/timeseries/models/local/statsforecast.py +11 -11
- autogluon/timeseries/models/multi_window/multi_window_model.py +33 -22
- autogluon/timeseries/models/registry.py +3 -3
- autogluon/timeseries/predictor.py +37 -37
- autogluon/timeseries/regressor.py +13 -13
- autogluon/timeseries/splitter.py +6 -6
- autogluon/timeseries/trainer/__init__.py +3 -0
- autogluon/timeseries/trainer/model_set_builder.py +256 -0
- autogluon/timeseries/trainer/prediction_cache.py +149 -0
- autogluon/timeseries/{trainer.py → trainer/trainer.py} +72 -128
- autogluon/timeseries/transforms/covariate_scaler.py +3 -3
- autogluon/timeseries/transforms/target_scaler.py +7 -7
- autogluon/timeseries/utils/datetime/lags.py +2 -2
- autogluon/timeseries/utils/datetime/time_features.py +2 -2
- autogluon/timeseries/utils/features.py +32 -32
- autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.4.1b20250820.dist-info → autogluon.timeseries-1.4.1b20250901.dist-info}/METADATA +5 -5
- autogluon.timeseries-1.4.1b20250901.dist-info/RECORD +75 -0
- autogluon/timeseries/configs/presets_configs.py +0 -79
- autogluon/timeseries/models/presets.py +0 -280
- autogluon.timeseries-1.4.1b20250820.dist-info/RECORD +0 -72
- /autogluon.timeseries-1.4.1b20250820-py3.9-nspkg.pth → /autogluon.timeseries-1.4.1b20250901-py3.9-nspkg.pth +0 -0
- {autogluon.timeseries-1.4.1b20250820.dist-info → autogluon.timeseries-1.4.1b20250901.dist-info}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20250820.dist-info → autogluon.timeseries-1.4.1b20250901.dist-info}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20250820.dist-info → autogluon.timeseries-1.4.1b20250901.dist-info}/WHEEL +0 -0
- {autogluon.timeseries-1.4.1b20250820.dist-info → autogluon.timeseries-1.4.1b20250901.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20250820.dist-info → autogluon.timeseries-1.4.1b20250901.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20250820.dist-info → autogluon.timeseries-1.4.1b20250901.dist-info}/zip-safe +0 -0
@@ -5,7 +5,7 @@ import os
|
|
5
5
|
import pprint
|
6
6
|
import time
|
7
7
|
from pathlib import Path
|
8
|
-
from typing import Any,
|
8
|
+
from typing import Any, Literal, Optional, Type, Union, cast
|
9
9
|
|
10
10
|
import numpy as np
|
11
11
|
import pandas as pd
|
@@ -21,7 +21,7 @@ from autogluon.core.utils.decorators import apply_presets
|
|
21
21
|
from autogluon.core.utils.loaders import load_pkl, load_str
|
22
22
|
from autogluon.core.utils.savers import save_pkl, save_str
|
23
23
|
from autogluon.timeseries import __version__ as current_ag_version
|
24
|
-
from autogluon.timeseries.configs import
|
24
|
+
from autogluon.timeseries.configs import get_predictor_presets
|
25
25
|
from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
|
26
26
|
from autogluon.timeseries.learner import TimeSeriesLearner
|
27
27
|
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
@@ -93,7 +93,7 @@ class TimeSeriesPredictor:
|
|
93
93
|
eval_metric_seasonal_period : int, optional
|
94
94
|
Seasonal period used to compute some evaluation metrics such as mean absolute scaled error (MASE). Defaults to
|
95
95
|
``None``, in which case the seasonal period is computed based on the data frequency.
|
96
|
-
horizon_weight :
|
96
|
+
horizon_weight : list[float], optional
|
97
97
|
Weight assigned to each time step in the forecast horizon when computing the ``eval_metric``. If provided, this
|
98
98
|
must be a list with ``prediction_length`` non-negative values, where at least some values are greater than zero.
|
99
99
|
AutoGluon will automatically normalize the weights so that they sum up to ``prediction_length``. By default, all
|
@@ -101,7 +101,7 @@ class TimeSeriesPredictor:
|
|
101
101
|
|
102
102
|
This parameter only affects model selection and ensemble construction; it has no effect on the loss function of
|
103
103
|
the individual forecasting models.
|
104
|
-
known_covariates_names:
|
104
|
+
known_covariates_names: list[str], optional
|
105
105
|
Names of the covariates that are known in advance for all time steps in the forecast horizon. These are also
|
106
106
|
known as dynamic features, exogenous variables, additional regressors or related time series. Examples of such
|
107
107
|
covariates include holidays, promotions or weather forecasts.
|
@@ -111,7 +111,7 @@ class TimeSeriesPredictor:
|
|
111
111
|
- :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit`, :meth:`~autogluon.timeseries.TimeSeriesPredictor.evaluate`, and :meth:`~autogluon.timeseries.TimeSeriesPredictor.leaderboard` will expect a dataframe with columns listed in ``known_covariates_names`` (in addition to the ``target`` column).
|
112
112
|
- :meth:`~autogluon.timeseries.TimeSeriesPredictor.predict` will expect an additional keyword argument ``known_covariates`` containing the future values of the known covariates in ``TimeSeriesDataFrame`` format.
|
113
113
|
|
114
|
-
quantile_levels :
|
114
|
+
quantile_levels : list[float], optional
|
115
115
|
List of increasing decimals that specifies which quantiles should be estimated when making distributional
|
116
116
|
forecasts. Defaults to ``[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]``.
|
117
117
|
path : str or pathlib.Path, optional
|
@@ -147,17 +147,17 @@ class TimeSeriesPredictor:
|
|
147
147
|
def __init__(
|
148
148
|
self,
|
149
149
|
target: Optional[str] = None,
|
150
|
-
known_covariates_names: Optional[
|
150
|
+
known_covariates_names: Optional[list[str]] = None,
|
151
151
|
prediction_length: int = 1,
|
152
152
|
freq: Optional[str] = None,
|
153
153
|
eval_metric: Union[str, TimeSeriesScorer, None] = None,
|
154
154
|
eval_metric_seasonal_period: Optional[int] = None,
|
155
|
-
horizon_weight: Optional[
|
155
|
+
horizon_weight: Optional[list[float]] = None,
|
156
156
|
path: Optional[Union[str, Path]] = None,
|
157
157
|
verbosity: int = 2,
|
158
158
|
log_to_file: bool = True,
|
159
159
|
log_file_path: Union[str, Path] = "auto",
|
160
|
-
quantile_levels: Optional[
|
160
|
+
quantile_levels: Optional[list[float]] = None,
|
161
161
|
cache_predictions: bool = True,
|
162
162
|
label: Optional[str] = None,
|
163
163
|
**kwargs,
|
@@ -432,16 +432,16 @@ class TimeSeriesPredictor:
|
|
432
432
|
)
|
433
433
|
return train_data
|
434
434
|
|
435
|
-
@apply_presets(
|
435
|
+
@apply_presets(get_predictor_presets())
|
436
436
|
def fit(
|
437
437
|
self,
|
438
438
|
train_data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
|
439
439
|
tuning_data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
|
440
440
|
time_limit: Optional[int] = None,
|
441
441
|
presets: Optional[str] = None,
|
442
|
-
hyperparameters: Optional[Union[str,
|
443
|
-
hyperparameter_tune_kwargs: Optional[Union[str,
|
444
|
-
excluded_model_types: Optional[
|
442
|
+
hyperparameters: Optional[Union[str, dict[Union[str, Type], Any]]] = None,
|
443
|
+
hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
|
444
|
+
excluded_model_types: Optional[list[str]] = None,
|
445
445
|
num_val_windows: int = 1,
|
446
446
|
val_step_size: Optional[int] = None,
|
447
447
|
refit_every_n_windows: Optional[int] = 1,
|
@@ -614,7 +614,7 @@ class TimeSeriesPredictor:
|
|
614
614
|
"scheduler": "local",
|
615
615
|
},
|
616
616
|
)
|
617
|
-
excluded_model_types:
|
617
|
+
excluded_model_types: list[str], optional
|
618
618
|
Banned subset of model types to avoid training during ``fit()``, even if present in ``hyperparameters``.
|
619
619
|
For example, the following code will train all models included in the ``high_quality`` presets except ``DeepAR``::
|
620
620
|
|
@@ -779,7 +779,7 @@ class TimeSeriesPredictor:
|
|
779
779
|
self.save()
|
780
780
|
return self
|
781
781
|
|
782
|
-
def model_names(self) ->
|
782
|
+
def model_names(self) -> list[str]:
|
783
783
|
"""Returns the list of model names trained by this predictor object."""
|
784
784
|
return self._trainer.get_model_names()
|
785
785
|
|
@@ -872,11 +872,11 @@ class TimeSeriesPredictor:
|
|
872
872
|
self,
|
873
873
|
data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
|
874
874
|
model: Optional[str] = None,
|
875
|
-
metrics: Optional[Union[str, TimeSeriesScorer,
|
875
|
+
metrics: Optional[Union[str, TimeSeriesScorer, list[Union[str, TimeSeriesScorer]]]] = None,
|
876
876
|
cutoff: Optional[int] = None,
|
877
877
|
display: bool = False,
|
878
878
|
use_cache: bool = True,
|
879
|
-
) ->
|
879
|
+
) -> dict[str, float]:
|
880
880
|
"""Evaluate the forecast accuracy for given dataset.
|
881
881
|
|
882
882
|
This method measures the forecast accuracy using the last ``self.prediction_length`` time steps of each time
|
@@ -907,7 +907,7 @@ class TimeSeriesPredictor:
|
|
907
907
|
model : str, optional
|
908
908
|
Name of the model that you would like to evaluate. By default, the best model during training
|
909
909
|
(with highest validation score) will be used.
|
910
|
-
metrics : str, TimeSeriesScorer or
|
910
|
+
metrics : str, TimeSeriesScorer or list[Union[str, TimeSeriesScorer]], optional
|
911
911
|
Metric or a list of metrics to compute scores with. Defaults to ``self.eval_metric``. Supports both
|
912
912
|
metric names as strings and custom metrics based on TimeSeriesScorer.
|
913
913
|
cutoff : int, optional
|
@@ -923,7 +923,7 @@ class TimeSeriesPredictor:
|
|
923
923
|
|
924
924
|
Returns
|
925
925
|
-------
|
926
|
-
scores_dict :
|
926
|
+
scores_dict : dict[str, float]
|
927
927
|
Dictionary where keys = metrics, values = performance along each metric. For consistency, error metrics
|
928
928
|
will have their signs flipped to obey this convention. For example, negative MAPE values will be reported.
|
929
929
|
To get the ``eval_metric`` score, do ``output[predictor.eval_metric.name]``.
|
@@ -943,7 +943,7 @@ class TimeSeriesPredictor:
|
|
943
943
|
data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
|
944
944
|
model: Optional[str] = None,
|
945
945
|
metric: Optional[Union[str, TimeSeriesScorer]] = None,
|
946
|
-
features: Optional[
|
946
|
+
features: Optional[list[str]] = None,
|
947
947
|
time_limit: Optional[float] = None,
|
948
948
|
method: Literal["naive", "permutation"] = "permutation",
|
949
949
|
subsample_size: int = 50,
|
@@ -990,7 +990,7 @@ class TimeSeriesPredictor:
|
|
990
990
|
metric : str or TimeSeriesScorer, optional
|
991
991
|
Metric to be used for computing feature importance. If None, the ``eval_metric`` specified during initialization of
|
992
992
|
the ``TimeSeriesPredictor`` will be used.
|
993
|
-
features :
|
993
|
+
features : list[str], optional
|
994
994
|
List of feature names that feature importances are calculated for and returned. By default, all feature importances
|
995
995
|
will be returned.
|
996
996
|
method : {"permutation", "naive"}, default = "permutation"
|
@@ -1168,7 +1168,7 @@ class TimeSeriesPredictor:
|
|
1168
1168
|
self._learner = tmp_learner
|
1169
1169
|
self._save_version_file()
|
1170
1170
|
|
1171
|
-
def info(self) ->
|
1171
|
+
def info(self) -> dict[str, Any]:
|
1172
1172
|
"""Returns a dictionary of objects each describing an attribute of the training process and trained models."""
|
1173
1173
|
return self._learner.get_info(include_model_info=True)
|
1174
1174
|
|
@@ -1182,8 +1182,8 @@ class TimeSeriesPredictor:
|
|
1182
1182
|
return self._trainer.get_model_best()
|
1183
1183
|
|
1184
1184
|
def persist(
|
1185
|
-
self, models: Union[Literal["all", "best"],
|
1186
|
-
) ->
|
1185
|
+
self, models: Union[Literal["all", "best"], list[str]] = "best", with_ancestors: bool = True
|
1186
|
+
) -> list[str]:
|
1187
1187
|
"""Persist models in memory for reduced inference latency. This is particularly important if the models are being used for online
|
1188
1188
|
inference where low latency is critical. If models are not persisted in memory, they are loaded from disk every time they are
|
1189
1189
|
asked to make predictions. This is especially cumbersome for large deep learning based models which have to be loaded into
|
@@ -1203,12 +1203,12 @@ class TimeSeriesPredictor:
|
|
1203
1203
|
|
1204
1204
|
Returns
|
1205
1205
|
-------
|
1206
|
-
list_of_models :
|
1206
|
+
list_of_models : list[str]
|
1207
1207
|
List of persisted model names.
|
1208
1208
|
"""
|
1209
1209
|
return self._learner.persist_trainer(models=models, with_ancestors=with_ancestors)
|
1210
1210
|
|
1211
|
-
def unpersist(self) ->
|
1211
|
+
def unpersist(self) -> list[str]:
|
1212
1212
|
"""Unpersist models in memory for reduced memory usage. If models are not persisted in memory, they are loaded from
|
1213
1213
|
disk every time they are asked to make predictions.
|
1214
1214
|
|
@@ -1217,7 +1217,7 @@ class TimeSeriesPredictor:
|
|
1217
1217
|
|
1218
1218
|
Returns
|
1219
1219
|
-------
|
1220
|
-
list_of_models :
|
1220
|
+
list_of_models : list[str]
|
1221
1221
|
List of unpersisted model names.
|
1222
1222
|
"""
|
1223
1223
|
return self._learner.unpersist_trainer()
|
@@ -1227,7 +1227,7 @@ class TimeSeriesPredictor:
|
|
1227
1227
|
data: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, Path, str]] = None,
|
1228
1228
|
cutoff: Optional[int] = None,
|
1229
1229
|
extra_info: bool = False,
|
1230
|
-
extra_metrics: Optional[
|
1230
|
+
extra_metrics: Optional[list[Union[str, TimeSeriesScorer]]] = None,
|
1231
1231
|
display: bool = False,
|
1232
1232
|
use_cache: bool = True,
|
1233
1233
|
**kwargs,
|
@@ -1271,7 +1271,7 @@ class TimeSeriesPredictor:
|
|
1271
1271
|
If True, the leaderboard will contain an additional column ``hyperparameters`` with the hyperparameters used
|
1272
1272
|
by each model during training. An empty dictionary ``{}`` means that the model was trained with default
|
1273
1273
|
hyperparameters.
|
1274
|
-
extra_metrics :
|
1274
|
+
extra_metrics : list[Union[str, TimeSeriesScorer]], optional
|
1275
1275
|
A list of metrics to calculate scores for and include in the output DataFrame.
|
1276
1276
|
|
1277
1277
|
Only valid when ``data`` is specified. The scores refer to the scores on ``data`` (same data as used to
|
@@ -1355,7 +1355,7 @@ class TimeSeriesPredictor:
|
|
1355
1355
|
data = self._check_and_prepare_data_frame(data)
|
1356
1356
|
return make_future_data_frame(data, prediction_length=self.prediction_length, freq=self.freq)
|
1357
1357
|
|
1358
|
-
def fit_summary(self, verbosity: int = 1) ->
|
1358
|
+
def fit_summary(self, verbosity: int = 1) -> dict[str, Any]:
|
1359
1359
|
"""Output summary of information about models produced during
|
1360
1360
|
:meth:`~autogluon.timeseries.TimeSeriesPredictor.fit`.
|
1361
1361
|
|
@@ -1366,7 +1366,7 @@ class TimeSeriesPredictor:
|
|
1366
1366
|
|
1367
1367
|
Returns
|
1368
1368
|
-------
|
1369
|
-
summary_dict :
|
1369
|
+
summary_dict : dict[str, Any]
|
1370
1370
|
Dict containing various detailed information. We do not recommend directly printing this dict as it may
|
1371
1371
|
be very large.
|
1372
1372
|
"""
|
@@ -1405,7 +1405,7 @@ class TimeSeriesPredictor:
|
|
1405
1405
|
print("****************** End of fit() summary ******************")
|
1406
1406
|
return results
|
1407
1407
|
|
1408
|
-
def refit_full(self, model: str = "all", set_best_to_refit_full: bool = True) ->
|
1408
|
+
def refit_full(self, model: str = "all", set_best_to_refit_full: bool = True) -> dict[str, str]:
|
1409
1409
|
"""Retrain model on all of the data (training + validation).
|
1410
1410
|
|
1411
1411
|
This method can only be used if no ``tuning_data`` was passed to :meth:`~autogluon.timeseries.TimeSeriesPredictor.fit`.
|
@@ -1483,7 +1483,7 @@ class TimeSeriesPredictor:
|
|
1483
1483
|
train_data = trainer.load_train_data()
|
1484
1484
|
val_data = trainer.load_val_data()
|
1485
1485
|
base_model_names = trainer.get_model_names(level=0)
|
1486
|
-
pred_proba_dict_val:
|
1486
|
+
pred_proba_dict_val: dict[str, list[TimeSeriesDataFrame]] = {
|
1487
1487
|
model_name: trainer._get_model_oof_predictions(model_name)
|
1488
1488
|
for model_name in base_model_names
|
1489
1489
|
if "_FULL" not in model_name
|
@@ -1497,7 +1497,7 @@ class TimeSeriesPredictor:
|
|
1497
1497
|
base_model_names, data=past_data, known_covariates=known_covariates
|
1498
1498
|
)
|
1499
1499
|
|
1500
|
-
y_val:
|
1500
|
+
y_val: list[TimeSeriesDataFrame] = [
|
1501
1501
|
select_target(df) for df in trainer._get_ensemble_oof_data(train_data=train_data, val_data=val_data)
|
1502
1502
|
]
|
1503
1503
|
y_test: TimeSeriesDataFrame = select_target(test_data)
|
@@ -1520,8 +1520,8 @@ class TimeSeriesPredictor:
|
|
1520
1520
|
self,
|
1521
1521
|
data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
|
1522
1522
|
predictions: Optional[TimeSeriesDataFrame] = None,
|
1523
|
-
quantile_levels: Optional[
|
1524
|
-
item_ids: Optional[
|
1523
|
+
quantile_levels: Optional[list[float]] = None,
|
1524
|
+
item_ids: Optional[list[Union[str, int]]] = None,
|
1525
1525
|
max_num_item_ids: int = 8,
|
1526
1526
|
max_history_length: Optional[int] = None,
|
1527
1527
|
point_forecast_column: Optional[str] = None,
|
@@ -1535,10 +1535,10 @@ class TimeSeriesPredictor:
|
|
1535
1535
|
Observed time series data.
|
1536
1536
|
predictions : TimeSeriesDataFrame, optional
|
1537
1537
|
Predictions generated by calling :meth:`~autogluon.timeseries.TimeSeriesPredictor.predict`.
|
1538
|
-
quantile_levels :
|
1538
|
+
quantile_levels : list[float], optional
|
1539
1539
|
Quantile levels for which to plot the prediction intervals. Defaults to lowest & highest quantile levels
|
1540
1540
|
available in ``predictions``.
|
1541
|
-
item_ids :
|
1541
|
+
item_ids : list[Union[str, int]], optional
|
1542
1542
|
If provided, plots will only be generated for time series with these item IDs. By default (if set to
|
1543
1543
|
``None``), item IDs are selected randomly. In either case, plots are generated for at most
|
1544
1544
|
``max_num_item_ids`` time series.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import logging
|
2
2
|
import time
|
3
|
-
from typing import Any,
|
3
|
+
from typing import Any, Optional, Protocol, Union, overload, runtime_checkable
|
4
4
|
|
5
5
|
import numpy as np
|
6
6
|
import pandas as pd
|
@@ -40,42 +40,42 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
40
40
|
|
41
41
|
Parameters
|
42
42
|
----------
|
43
|
-
model_name
|
43
|
+
model_name
|
44
44
|
Name of the tabular regression model. See ``autogluon.tabular.registry.ag_model_registry`` or
|
45
45
|
`the documentation <https://auto.gluon.ai/stable/api/autogluon.tabular.models.html>`_ for the list of available
|
46
46
|
tabular models.
|
47
|
-
model_hyperparameters
|
47
|
+
model_hyperparameters
|
48
48
|
Hyperparameters passed to the tabular regression model.
|
49
|
-
eval_metric
|
49
|
+
eval_metric
|
50
50
|
Metric provided as ``eval_metric`` to the tabular regression model. Must be compatible with `problem_type="regression"`.
|
51
|
-
refit_during_predict
|
51
|
+
refit_during_predict
|
52
52
|
If True, the model will be re-trained every time ``fit_transform`` is called. If False, the model will only be
|
53
53
|
trained the first time that ``fit_transform`` is called, and future calls to ``fit_transform`` will only perform a
|
54
54
|
``transform``.
|
55
|
-
max_num_samples
|
55
|
+
max_num_samples
|
56
56
|
If not None, training dataset passed to regression model will contain at most this many rows.
|
57
|
-
covariate_metadata
|
57
|
+
covariate_metadata
|
58
58
|
Metadata object describing the covariates available in the dataset.
|
59
|
-
target
|
59
|
+
target
|
60
60
|
Name of the target column.
|
61
|
-
validation_fraction
|
61
|
+
validation_fraction
|
62
62
|
Fraction of observations that are reserved as the validation set during training (starting from the end of each
|
63
63
|
time series).
|
64
|
-
fit_time_fraction
|
64
|
+
fit_time_fraction
|
65
65
|
The fraction of the time_limit that will be reserved for model training. The remainder (1 - fit_time_fraction)
|
66
66
|
will be reserved for prediction.
|
67
67
|
|
68
68
|
If the estimated prediction time exceeds ``(1 - fit_time_fraction) * time_limit``, the regressor will be disabled.
|
69
|
-
include_static_features
|
69
|
+
include_static_features
|
70
70
|
If True, static features will be included as features for the regressor.
|
71
|
-
include_item_id
|
71
|
+
include_item_id
|
72
72
|
If True, item_id will be included as a categorical feature for the regressor.
|
73
73
|
"""
|
74
74
|
|
75
75
|
def __init__(
|
76
76
|
self,
|
77
77
|
model_name: str = "CAT",
|
78
|
-
model_hyperparameters: Optional[
|
78
|
+
model_hyperparameters: Optional[dict[str, Any]] = None,
|
79
79
|
eval_metric: str = "mean_absolute_error",
|
80
80
|
refit_during_predict: bool = False,
|
81
81
|
max_num_samples: Optional[int] = 500_000,
|
autogluon/timeseries/splitter.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Iterator, Optional
|
1
|
+
from typing import Iterator, Optional
|
2
2
|
|
3
3
|
from .dataset.ts_dataframe import TimeSeriesDataFrame
|
4
4
|
|
@@ -13,7 +13,7 @@ class AbstractWindowSplitter:
|
|
13
13
|
self.prediction_length = prediction_length
|
14
14
|
self.num_val_windows = num_val_windows
|
15
15
|
|
16
|
-
def split(self, data: TimeSeriesDataFrame) -> Iterator[
|
16
|
+
def split(self, data: TimeSeriesDataFrame) -> Iterator[tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]]:
|
17
17
|
raise NotImplementedError
|
18
18
|
|
19
19
|
|
@@ -33,11 +33,11 @@ class ExpandingWindowSplitter(AbstractWindowSplitter):
|
|
33
33
|
|
34
34
|
Parameters
|
35
35
|
----------
|
36
|
-
prediction_length
|
36
|
+
prediction_length
|
37
37
|
Length of the forecast horizon.
|
38
|
-
num_val_windows
|
38
|
+
num_val_windows
|
39
39
|
Number of windows to generate from each time series in the dataset.
|
40
|
-
val_step_size
|
40
|
+
val_step_size
|
41
41
|
The end of each subsequent window is moved this many time steps forward.
|
42
42
|
"""
|
43
43
|
|
@@ -47,7 +47,7 @@ class ExpandingWindowSplitter(AbstractWindowSplitter):
|
|
47
47
|
val_step_size = prediction_length
|
48
48
|
self.val_step_size = val_step_size
|
49
49
|
|
50
|
-
def split(self, data: TimeSeriesDataFrame) -> Iterator[
|
50
|
+
def split(self, data: TimeSeriesDataFrame) -> Iterator[tuple[TimeSeriesDataFrame, TimeSeriesDataFrame]]:
|
51
51
|
"""Generate train and validation folds for a time series dataset."""
|
52
52
|
for window_idx in range(1, self.num_val_windows + 1):
|
53
53
|
val_end = -(self.num_val_windows - window_idx) * self.val_step_size
|
@@ -0,0 +1,256 @@
|
|
1
|
+
import copy
|
2
|
+
import logging
|
3
|
+
import re
|
4
|
+
from collections import defaultdict
|
5
|
+
from typing import Any, Optional, Type, Union
|
6
|
+
|
7
|
+
from autogluon.common import space
|
8
|
+
from autogluon.core import constants
|
9
|
+
from autogluon.timeseries.configs import get_hyperparameter_presets
|
10
|
+
from autogluon.timeseries.metrics import TimeSeriesScorer
|
11
|
+
from autogluon.timeseries.models import ModelRegistry
|
12
|
+
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
13
|
+
from autogluon.timeseries.models.multi_window import MultiWindowBacktestingModel
|
14
|
+
from autogluon.timeseries.utils.features import CovariateMetadata
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
ModelKey = Union[str, Type[AbstractTimeSeriesModel]]
|
20
|
+
ModelHyperparameters = dict[str, Any]
|
21
|
+
TrainerHyperparameterSpec = dict[ModelKey, list[ModelHyperparameters]]
|
22
|
+
|
23
|
+
|
24
|
+
class TrainableModelSetBuilder:
|
25
|
+
"""Responsible for building a list of model objects, in priority order, that will be trained by the
|
26
|
+
Trainer."""
|
27
|
+
|
28
|
+
VALID_AG_ARGS_KEYS = {
|
29
|
+
"name",
|
30
|
+
"name_prefix",
|
31
|
+
"name_suffix",
|
32
|
+
}
|
33
|
+
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
path: str,
|
37
|
+
freq: Optional[str],
|
38
|
+
prediction_length: int,
|
39
|
+
eval_metric: TimeSeriesScorer,
|
40
|
+
target: str,
|
41
|
+
quantile_levels: list[float],
|
42
|
+
covariate_metadata: CovariateMetadata,
|
43
|
+
multi_window: bool,
|
44
|
+
):
|
45
|
+
self.path = path
|
46
|
+
self.freq = freq
|
47
|
+
self.prediction_length = prediction_length
|
48
|
+
self.eval_metric = eval_metric
|
49
|
+
self.target = target
|
50
|
+
self.quantile_levels = quantile_levels
|
51
|
+
self.covariate_metadata = covariate_metadata
|
52
|
+
self.multi_window = multi_window
|
53
|
+
|
54
|
+
def get_model_set(
|
55
|
+
self,
|
56
|
+
hyperparameters: Union[str, dict, None],
|
57
|
+
hyperparameter_tune: bool,
|
58
|
+
excluded_model_types: Optional[list[str]],
|
59
|
+
banned_model_names: Optional[list[str]] = None,
|
60
|
+
) -> list[AbstractTimeSeriesModel]:
|
61
|
+
"""Resolve hyperparameters and create the requested list of models"""
|
62
|
+
models = []
|
63
|
+
banned_model_names = [] if banned_model_names is None else banned_model_names.copy()
|
64
|
+
|
65
|
+
# resolve and normalize hyperparameters
|
66
|
+
model_hp_map: TrainerHyperparameterSpec = HyperparameterBuilder(
|
67
|
+
hyperparameters=hyperparameters,
|
68
|
+
hyperparameter_tune=hyperparameter_tune,
|
69
|
+
excluded_model_types=excluded_model_types,
|
70
|
+
).get_hyperparameters()
|
71
|
+
|
72
|
+
for k in model_hp_map.keys():
|
73
|
+
if isinstance(k, type) and not issubclass(k, AbstractTimeSeriesModel):
|
74
|
+
raise ValueError(f"Custom model type {k} must inherit from `AbstractTimeSeriesModel`.")
|
75
|
+
|
76
|
+
model_priority_list = sorted(
|
77
|
+
model_hp_map.keys(), key=lambda x: ModelRegistry.get_model_priority(x), reverse=True
|
78
|
+
)
|
79
|
+
|
80
|
+
for model_key in model_priority_list:
|
81
|
+
model_type = self._get_model_type(model_key)
|
82
|
+
|
83
|
+
for model_hps in model_hp_map[model_key]:
|
84
|
+
ag_args = model_hps.pop(constants.AG_ARGS, {})
|
85
|
+
|
86
|
+
for key in ag_args:
|
87
|
+
if key not in self.VALID_AG_ARGS_KEYS:
|
88
|
+
raise ValueError(
|
89
|
+
f"Model {model_type} received unknown ag_args key: {key} (valid keys {self.VALID_AG_ARGS_KEYS})"
|
90
|
+
)
|
91
|
+
model_name_base = self._get_model_name(ag_args, model_type)
|
92
|
+
|
93
|
+
model_type_kwargs: dict[str, Any] = dict(
|
94
|
+
name=model_name_base,
|
95
|
+
hyperparameters=model_hps,
|
96
|
+
**self._get_default_model_init_kwargs(),
|
97
|
+
)
|
98
|
+
|
99
|
+
# add models while preventing name collisions
|
100
|
+
model = model_type(**model_type_kwargs)
|
101
|
+
model_type_kwargs.pop("name", None)
|
102
|
+
|
103
|
+
increment = 1
|
104
|
+
while model.name in banned_model_names:
|
105
|
+
increment += 1
|
106
|
+
model = model_type(name=f"{model_name_base}_{increment}", **model_type_kwargs)
|
107
|
+
|
108
|
+
if self.multi_window:
|
109
|
+
model = MultiWindowBacktestingModel(model_base=model, name=model.name, **model_type_kwargs) # type: ignore
|
110
|
+
|
111
|
+
banned_model_names.append(model.name)
|
112
|
+
models.append(model)
|
113
|
+
|
114
|
+
return models
|
115
|
+
|
116
|
+
def _get_model_type(self, model: ModelKey) -> Type[AbstractTimeSeriesModel]:
|
117
|
+
if isinstance(model, str):
|
118
|
+
model_type: Type[AbstractTimeSeriesModel] = ModelRegistry.get_model_class(model)
|
119
|
+
elif isinstance(model, type):
|
120
|
+
model_type = model
|
121
|
+
else:
|
122
|
+
raise ValueError(
|
123
|
+
f"Keys of the `hyperparameters` dictionary must be strings or types, received {type(model)}."
|
124
|
+
)
|
125
|
+
|
126
|
+
return model_type
|
127
|
+
|
128
|
+
def _get_default_model_init_kwargs(self) -> dict[str, Any]:
|
129
|
+
return dict(
|
130
|
+
path=self.path,
|
131
|
+
freq=self.freq,
|
132
|
+
prediction_length=self.prediction_length,
|
133
|
+
eval_metric=self.eval_metric,
|
134
|
+
target=self.target,
|
135
|
+
quantile_levels=self.quantile_levels,
|
136
|
+
covariate_metadata=self.covariate_metadata,
|
137
|
+
)
|
138
|
+
|
139
|
+
def _get_model_name(self, ag_args: dict[str, Any], model_type: Type[AbstractTimeSeriesModel]) -> str:
|
140
|
+
name = ag_args.get("name")
|
141
|
+
if name is None:
|
142
|
+
name_stem = re.sub(r"Model$", "", model_type.__name__)
|
143
|
+
name_prefix = ag_args.get("name_prefix", "")
|
144
|
+
name_suffix = ag_args.get("name_suffix", "")
|
145
|
+
name = name_prefix + name_stem + name_suffix
|
146
|
+
return name
|
147
|
+
|
148
|
+
|
149
|
+
class HyperparameterBuilder:
|
150
|
+
"""Given user hyperparameter specifications, this class resolves them against presets, removes
|
151
|
+
excluded model types and canonicalizes the hyperparameter specification.
|
152
|
+
"""
|
153
|
+
|
154
|
+
def __init__(
|
155
|
+
self,
|
156
|
+
hyperparameters: Union[str, dict, None],
|
157
|
+
hyperparameter_tune: bool,
|
158
|
+
excluded_model_types: Optional[list[str]],
|
159
|
+
):
|
160
|
+
self.hyperparameters = hyperparameters
|
161
|
+
self.hyperparameter_tune = hyperparameter_tune
|
162
|
+
self.excluded_model_types = excluded_model_types
|
163
|
+
|
164
|
+
def get_hyperparameters(self) -> TrainerHyperparameterSpec:
|
165
|
+
hyperparameter_dict = {}
|
166
|
+
hp_presets = get_hyperparameter_presets()
|
167
|
+
|
168
|
+
if self.hyperparameters is None:
|
169
|
+
hyperparameter_dict = hp_presets["default"]
|
170
|
+
elif isinstance(self.hyperparameters, str):
|
171
|
+
try:
|
172
|
+
hyperparameter_dict = hp_presets[self.hyperparameters]
|
173
|
+
except KeyError:
|
174
|
+
raise ValueError(f"{self.hyperparameters} is not a valid preset.")
|
175
|
+
elif isinstance(self.hyperparameters, dict):
|
176
|
+
hyperparameter_dict = copy.deepcopy(self.hyperparameters)
|
177
|
+
else:
|
178
|
+
raise ValueError(
|
179
|
+
f"hyperparameters must be a dict, a string or None (received {type(self.hyperparameters)}). "
|
180
|
+
f"Please see the documentation for TimeSeriesPredictor.fit"
|
181
|
+
)
|
182
|
+
|
183
|
+
return self._check_and_clean_hyperparameters(hyperparameter_dict) # type: ignore
|
184
|
+
|
185
|
+
def _check_and_clean_hyperparameters(
|
186
|
+
self,
|
187
|
+
hyperparameters: dict[ModelKey, Union[ModelHyperparameters, list[ModelHyperparameters]]],
|
188
|
+
) -> TrainerHyperparameterSpec:
|
189
|
+
"""Convert the hyperparameters dictionary to a unified format:
|
190
|
+
- Remove 'Model' suffix from model names, if present
|
191
|
+
- Make sure that each value in the hyperparameters dict is a list with model configurations
|
192
|
+
- Checks if hyperparameters contain searchspaces
|
193
|
+
"""
|
194
|
+
excluded_models = self._get_excluded_models()
|
195
|
+
hyperparameters_clean = defaultdict(list)
|
196
|
+
for model_name, model_hyperparameters in hyperparameters.items():
|
197
|
+
# Handle model names ending with "Model", e.g., "DeepARModel" is mapped to "DeepAR"
|
198
|
+
if isinstance(model_name, str):
|
199
|
+
model_name = self._normalize_model_type_name(model_name)
|
200
|
+
if model_name in excluded_models:
|
201
|
+
logger.info(
|
202
|
+
f"\tFound '{model_name}' model in `hyperparameters`, but '{model_name}' "
|
203
|
+
"is present in `excluded_model_types` and will be removed."
|
204
|
+
)
|
205
|
+
continue
|
206
|
+
if not isinstance(model_hyperparameters, list):
|
207
|
+
model_hyperparameters = [model_hyperparameters]
|
208
|
+
hyperparameters_clean[model_name].extend(model_hyperparameters)
|
209
|
+
|
210
|
+
self._verify_searchspaces(hyperparameters_clean)
|
211
|
+
|
212
|
+
return dict(hyperparameters_clean)
|
213
|
+
|
214
|
+
def _get_excluded_models(self) -> set[str]:
|
215
|
+
excluded_models = set()
|
216
|
+
if self.excluded_model_types is not None and len(self.excluded_model_types) > 0:
|
217
|
+
if not isinstance(self.excluded_model_types, list):
|
218
|
+
raise ValueError(f"`excluded_model_types` must be a list, received {type(self.excluded_model_types)}")
|
219
|
+
logger.info(f"Excluded model types: {self.excluded_model_types}")
|
220
|
+
for model in self.excluded_model_types:
|
221
|
+
if not isinstance(model, str):
|
222
|
+
raise ValueError(f"Each entry in `excluded_model_types` must be a string, received {type(model)}")
|
223
|
+
excluded_models.add(self._normalize_model_type_name(model))
|
224
|
+
return excluded_models
|
225
|
+
|
226
|
+
@staticmethod
|
227
|
+
def _normalize_model_type_name(model_name: str) -> str:
|
228
|
+
return model_name.removesuffix("Model")
|
229
|
+
|
230
|
+
def _verify_searchspaces(self, hyperparameters: dict[str, list[ModelHyperparameters]]):
|
231
|
+
if self.hyperparameter_tune:
|
232
|
+
for model, model_hps_list in hyperparameters.items():
|
233
|
+
for model_hps in model_hps_list:
|
234
|
+
if contains_searchspace(model_hps):
|
235
|
+
return
|
236
|
+
|
237
|
+
raise ValueError(
|
238
|
+
"Hyperparameter tuning specified, but no model contains a hyperparameter search space. "
|
239
|
+
"Please disable hyperparameter tuning with `hyperparameter_tune_kwargs=None` or provide a search space "
|
240
|
+
"for at least one model."
|
241
|
+
)
|
242
|
+
else:
|
243
|
+
for model, model_hps_list in hyperparameters.items():
|
244
|
+
for model_hps in model_hps_list:
|
245
|
+
if contains_searchspace(model_hps):
|
246
|
+
raise ValueError(
|
247
|
+
f"Hyperparameter tuning not specified, so hyperparameters must have fixed values. "
|
248
|
+
f"However, for model {model} hyperparameters {model_hps} contain a search space."
|
249
|
+
)
|
250
|
+
|
251
|
+
|
252
|
+
def contains_searchspace(model_hyperparameters: ModelHyperparameters) -> bool:
|
253
|
+
for hp_value in model_hyperparameters.values():
|
254
|
+
if isinstance(hp_value, space.Space):
|
255
|
+
return True
|
256
|
+
return False
|