autogluon.timeseries 1.2.1b20250424__py3-none-any.whl → 1.2.1b20250426__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/timeseries/dataset/ts_dataframe.py +9 -2
- autogluon/timeseries/learner.py +1 -4
- autogluon/timeseries/metrics/__init__.py +36 -8
- autogluon/timeseries/metrics/abstract.py +77 -7
- autogluon/timeseries/metrics/point.py +136 -47
- autogluon/timeseries/metrics/quantile.py +42 -17
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +7 -20
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +106 -66
- autogluon/timeseries/models/autogluon_tabular/transforms.py +15 -10
- autogluon/timeseries/models/ensemble/greedy.py +8 -7
- autogluon/timeseries/models/local/abstract_local_model.py +43 -36
- autogluon/timeseries/models/multi_window/multi_window_model.py +1 -1
- autogluon/timeseries/models/presets.py +0 -2
- autogluon/timeseries/predictor.py +37 -29
- autogluon/timeseries/trainer.py +23 -16
- autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/METADATA +5 -5
- {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/RECORD +25 -25
- /autogluon.timeseries-1.2.1b20250424-py3.9-nspkg.pth → /autogluon.timeseries-1.2.1b20250426-py3.9-nspkg.pth +0 -0
- {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/LICENSE +0 -0
- {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/NOTICE +0 -0
- {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/WHEEL +0 -0
- {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.2.1b20250424.dist-info → autogluon.timeseries-1.2.1b20250426.dist-info}/zip-safe +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Literal,
|
1
|
+
from typing import Literal, Union
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
import pandas as pd
|
@@ -13,29 +13,32 @@ from autogluon.timeseries.dataset.ts_dataframe import (
|
|
13
13
|
TIMESTAMP,
|
14
14
|
TimeSeriesDataFrame,
|
15
15
|
)
|
16
|
-
from autogluon.timeseries.transforms.target_scaler import
|
16
|
+
from autogluon.timeseries.transforms.target_scaler import TargetScaler, get_target_scaler
|
17
17
|
|
18
18
|
from .utils import MLF_ITEMID, MLF_TIMESTAMP
|
19
19
|
|
20
20
|
|
21
21
|
class MLForecastScaler(BaseTargetTransform):
|
22
|
-
def __init__(self, scaler_type: Literal["standard", "
|
22
|
+
def __init__(self, scaler_type: Literal["standard", "min_max", "mean_abs", "robust"]):
|
23
23
|
# For backward compatibility
|
24
|
-
self.scaler_type = scaler_type
|
25
|
-
self.ag_scaler:
|
24
|
+
self.scaler_type: Literal["standard", "min_max", "mean_abs", "robust"] = scaler_type
|
25
|
+
self.ag_scaler: TargetScaler
|
26
26
|
|
27
27
|
def _df_to_tsdf(self, df: pd.DataFrame) -> TimeSeriesDataFrame:
|
28
|
-
return
|
28
|
+
return TimeSeriesDataFrame(
|
29
|
+
df.rename(columns={self.id_col: ITEMID, self.time_col: TIMESTAMP}).set_index([ITEMID, TIMESTAMP])
|
30
|
+
)
|
29
31
|
|
30
32
|
def _tsdf_to_df(self, ts_df: TimeSeriesDataFrame) -> pd.DataFrame:
|
31
33
|
return pd.DataFrame(ts_df).reset_index().rename(columns={ITEMID: self.id_col, TIMESTAMP: self.time_col})
|
32
34
|
|
33
|
-
def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
35
|
+
def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame: # type: ignore
|
34
36
|
self.ag_scaler = get_target_scaler(name=self.scaler_type, target=self.target_col)
|
35
|
-
transformed = self.ag_scaler.fit_transform(self._df_to_tsdf(df))
|
37
|
+
transformed = self.ag_scaler.fit_transform(self._df_to_tsdf(df))
|
36
38
|
return self._tsdf_to_df(transformed)
|
37
39
|
|
38
|
-
def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
40
|
+
def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: # type: ignore
|
41
|
+
assert self.ag_scaler is not None
|
39
42
|
transformed = self.ag_scaler.inverse_transform(self._df_to_tsdf(df))
|
40
43
|
return self._tsdf_to_df(transformed)
|
41
44
|
|
@@ -46,7 +49,9 @@ def apply_inverse_transform(
|
|
46
49
|
) -> pd.DataFrame:
|
47
50
|
"""Apply inverse transformation to a dataframe, converting to GroupedArray if necessary"""
|
48
51
|
if isinstance(transform, BaseTargetTransform):
|
49
|
-
|
52
|
+
inverse_transformed = transform.inverse_transform(df=df)
|
53
|
+
assert isinstance(inverse_transformed, pd.DataFrame)
|
54
|
+
return inverse_transformed
|
50
55
|
elif isinstance(transform, _BaseGroupedArrayTargetTransform):
|
51
56
|
indptr = np.concatenate([[0], df[MLF_ITEMID].value_counts().cumsum()])
|
52
57
|
assignment = {}
|
@@ -28,7 +28,6 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
28
28
|
random_state: Optional[np.random.RandomState] = None,
|
29
29
|
prediction_length: int = 1,
|
30
30
|
target: str = "target",
|
31
|
-
eval_metric_seasonal_period: int = 1,
|
32
31
|
**kwargs,
|
33
32
|
):
|
34
33
|
super().__init__(
|
@@ -43,7 +42,6 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
43
42
|
)
|
44
43
|
self.prediction_length = prediction_length
|
45
44
|
self.target = target
|
46
|
-
self.eval_metric_seasonal_period = eval_metric_seasonal_period
|
47
45
|
self.metric: TimeSeriesScorer
|
48
46
|
|
49
47
|
self.dummy_pred_per_window = []
|
@@ -79,6 +77,10 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
79
77
|
self.scorer_per_window = []
|
80
78
|
self.data_future_per_window = []
|
81
79
|
|
80
|
+
seasonal_period = self.metric.seasonal_period
|
81
|
+
if seasonal_period is None:
|
82
|
+
seasonal_period = get_seasonality(labels[0].freq)
|
83
|
+
|
82
84
|
for window_idx, data in enumerate(labels):
|
83
85
|
dummy_pred = copy.deepcopy(predictions[0][window_idx])
|
84
86
|
# This should never happen; sanity check to make sure that all predictions have the same index
|
@@ -90,7 +92,7 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
90
92
|
# Split the observed time series once to avoid repeated computations inside the evaluator
|
91
93
|
data_past = data.slice_by_timestep(None, -self.prediction_length)
|
92
94
|
data_future = data.slice_by_timestep(-self.prediction_length, None)
|
93
|
-
scorer.save_past_metrics(data_past, target=self.target, seasonal_period=
|
95
|
+
scorer.save_past_metrics(data_past, target=self.target, seasonal_period=seasonal_period)
|
94
96
|
self.scorer_per_window.append(scorer)
|
95
97
|
self.data_future_per_window.append(data_future)
|
96
98
|
|
@@ -122,7 +124,9 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
|
|
122
124
|
dummy_pred[list(dummy_pred.columns)] = y_pred_proba[window_idx]
|
123
125
|
# We use scorer.compute_metric instead of scorer.score to avoid repeated calls to scorer.save_past_metrics
|
124
126
|
metric_value = self.scorer_per_window[window_idx].compute_metric(
|
125
|
-
data_future,
|
127
|
+
data_future,
|
128
|
+
dummy_pred,
|
129
|
+
target=self.target,
|
126
130
|
)
|
127
131
|
total_score += metric.sign * metric_value
|
128
132
|
avg_score = total_score / len(self.data_future_per_window)
|
@@ -162,14 +166,11 @@ class GreedyEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
|
|
162
166
|
model_scores: Optional[Dict[str, float]] = None,
|
163
167
|
time_limit: Optional[float] = None,
|
164
168
|
):
|
165
|
-
if self.eval_metric_seasonal_period is None:
|
166
|
-
self.eval_metric_seasonal_period = get_seasonality(self.freq)
|
167
169
|
ensemble_selection = TimeSeriesEnsembleSelection(
|
168
170
|
ensemble_size=self.get_hyperparameters()["ensemble_size"],
|
169
171
|
metric=self.eval_metric,
|
170
172
|
prediction_length=self.prediction_length,
|
171
173
|
target=self.target,
|
172
|
-
eval_metric_seasonal_period=self.eval_metric_seasonal_period,
|
173
174
|
)
|
174
175
|
ensemble_selection.fit(
|
175
176
|
predictions=list(predictions_per_window.values()),
|
@@ -10,6 +10,7 @@ from scipy.stats import norm
|
|
10
10
|
|
11
11
|
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
12
12
|
from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
|
13
|
+
from autogluon.timeseries.metrics import TimeSeriesScorer
|
13
14
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
14
15
|
from autogluon.timeseries.utils.datetime import get_seasonality
|
15
16
|
from autogluon.timeseries.utils.warning_filters import warning_filter
|
@@ -30,8 +31,6 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
30
31
|
----------
|
31
32
|
allowed_local_model_args : List[str]
|
32
33
|
Argument that can be passed to the underlying local model.
|
33
|
-
default_n_jobs : Union[int, float]
|
34
|
-
Default number of CPU cores used to train models. If float, this fraction of CPU cores will be used.
|
35
34
|
default_max_ts_length : Optional[int]
|
36
35
|
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
37
36
|
This significantly speeds up fitting and usually leads to no change in accuracy.
|
@@ -41,7 +40,6 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
41
40
|
"""
|
42
41
|
|
43
42
|
allowed_local_model_args: List[str] = []
|
44
|
-
default_n_jobs: Union[int, float] = AG_DEFAULT_N_JOBS
|
45
43
|
default_max_ts_length: Optional[int] = 2500
|
46
44
|
default_max_time_limit_ratio = 1.0
|
47
45
|
init_time_in_seconds: int = 0
|
@@ -52,26 +50,10 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
52
50
|
prediction_length: int = 1,
|
53
51
|
path: Optional[str] = None,
|
54
52
|
name: Optional[str] = None,
|
55
|
-
eval_metric: str = None,
|
56
|
-
hyperparameters: Dict[str, Any] = None,
|
53
|
+
eval_metric: Union[str, TimeSeriesScorer, None] = None,
|
54
|
+
hyperparameters: Optional[Dict[str, Any]] = None,
|
57
55
|
**kwargs, # noqa
|
58
56
|
):
|
59
|
-
if hyperparameters is None:
|
60
|
-
hyperparameters = {}
|
61
|
-
else:
|
62
|
-
hyperparameters = hyperparameters.copy()
|
63
|
-
# TODO: Replace with 'num_cpus' argument passed to fit (after predictor API is changed)
|
64
|
-
n_jobs = hyperparameters.pop("n_jobs", self.default_n_jobs)
|
65
|
-
if isinstance(n_jobs, float) and 0 < n_jobs <= 1:
|
66
|
-
self.n_jobs = max(int(cpu_count() * n_jobs), 1)
|
67
|
-
elif isinstance(n_jobs, int):
|
68
|
-
self.n_jobs = n_jobs
|
69
|
-
else:
|
70
|
-
raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
|
71
|
-
# Default values, potentially overridden inside _fit()
|
72
|
-
self.use_fallback_model = hyperparameters.pop("use_fallback_model", True)
|
73
|
-
self.max_ts_length = hyperparameters.pop("max_ts_length", self.default_max_ts_length)
|
74
|
-
|
75
57
|
super().__init__(
|
76
58
|
path=path,
|
77
59
|
freq=freq,
|
@@ -82,9 +64,9 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
82
64
|
**kwargs,
|
83
65
|
)
|
84
66
|
|
85
|
-
self._local_model_args: Dict[str, Any]
|
86
|
-
self._seasonal_period:
|
87
|
-
self._dummy_forecast:
|
67
|
+
self._local_model_args: Dict[str, Any]
|
68
|
+
self._seasonal_period: int
|
69
|
+
self._dummy_forecast: pd.DataFrame
|
88
70
|
|
89
71
|
@property
|
90
72
|
def allowed_hyperparameters(self) -> List[str]:
|
@@ -105,19 +87,32 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
105
87
|
data = data.fill_missing_values()
|
106
88
|
return data, known_covariates
|
107
89
|
|
90
|
+
def _get_default_hyperparameters(self) -> dict:
|
91
|
+
return {
|
92
|
+
"n_jobs": AG_DEFAULT_N_JOBS,
|
93
|
+
"use_fallback_model": True,
|
94
|
+
"max_ts_length": self.default_max_ts_length,
|
95
|
+
}
|
96
|
+
|
97
|
+
@staticmethod
|
98
|
+
def _compute_n_jobs(n_jobs: Union[int, float]) -> int:
|
99
|
+
if isinstance(n_jobs, float) and 0 < n_jobs <= 1:
|
100
|
+
return max(int(cpu_count() * n_jobs), 1)
|
101
|
+
elif isinstance(n_jobs, int):
|
102
|
+
return n_jobs
|
103
|
+
else:
|
104
|
+
raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
|
105
|
+
|
108
106
|
def _fit(self, train_data: TimeSeriesDataFrame, time_limit: Optional[int] = None, **kwargs):
|
109
107
|
self._check_fit_params()
|
110
108
|
|
111
109
|
if time_limit is not None and time_limit < self.init_time_in_seconds:
|
112
110
|
raise TimeLimitExceeded
|
113
111
|
|
114
|
-
# Initialize parameters passed to each local model
|
115
|
-
raw_local_model_args = self.get_hyperparameters().copy()
|
116
|
-
|
117
112
|
unused_local_model_args = []
|
118
113
|
local_model_args = {}
|
119
114
|
# TODO: Move filtering logic to AbstractTimeSeriesModel
|
120
|
-
for key, value in
|
115
|
+
for key, value in self.get_hyperparameters().items():
|
121
116
|
if key in self.allowed_local_model_args:
|
122
117
|
local_model_args[key] = value
|
123
118
|
elif key in self.allowed_hyperparameters:
|
@@ -151,9 +146,11 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
151
146
|
return local_model_args
|
152
147
|
|
153
148
|
def _predict(self, data: TimeSeriesDataFrame, **kwargs) -> TimeSeriesDataFrame:
|
154
|
-
|
155
|
-
|
156
|
-
|
149
|
+
model_params = self.get_hyperparameters()
|
150
|
+
max_ts_length = model_params["max_ts_length"]
|
151
|
+
if max_ts_length is not None:
|
152
|
+
logger.debug(f"Shortening all time series to at most {max_ts_length}")
|
153
|
+
data = data.groupby(level=ITEMID, sort=False).tail(max_ts_length)
|
157
154
|
|
158
155
|
df = pd.DataFrame(data).reset_index(level=ITEMID)
|
159
156
|
all_series = (ts for _, ts in df.groupby(by=ITEMID, as_index=False, sort=False)[self.target])
|
@@ -161,15 +158,20 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
161
158
|
# timeout ensures that no individual job takes longer than time_limit
|
162
159
|
# TODO: a job started late may still exceed time_limit - how to prevent that?
|
163
160
|
time_limit = kwargs.get("time_limit")
|
164
|
-
|
161
|
+
# TODO: Take into account num_cpus once the TimeSeriesPredictor API is updated
|
162
|
+
n_jobs = self._compute_n_jobs(model_params["n_jobs"])
|
163
|
+
timeout = None if n_jobs == 1 else time_limit
|
165
164
|
# end_time ensures that no new jobs are started after time_limit is exceeded
|
166
165
|
end_time = None if time_limit is None else time.time() + time_limit
|
167
|
-
executor = Parallel(
|
166
|
+
executor = Parallel(n_jobs=n_jobs, timeout=timeout)
|
168
167
|
|
169
168
|
try:
|
170
169
|
with warning_filter():
|
171
170
|
predictions_with_flags = executor(
|
172
|
-
delayed(self._predict_wrapper)(
|
171
|
+
delayed(self._predict_wrapper)(
|
172
|
+
ts, use_fallback_model=model_params["use_fallback_model"], end_time=end_time
|
173
|
+
)
|
174
|
+
for ts in all_series
|
173
175
|
)
|
174
176
|
except TimeoutError:
|
175
177
|
raise TimeLimitExceeded
|
@@ -185,7 +187,12 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
185
187
|
predictions_df.index = self.get_forecast_horizon_index(data)
|
186
188
|
return TimeSeriesDataFrame(predictions_df)
|
187
189
|
|
188
|
-
def _predict_wrapper(
|
190
|
+
def _predict_wrapper(
|
191
|
+
self,
|
192
|
+
time_series: pd.Series,
|
193
|
+
use_fallback_model: bool,
|
194
|
+
end_time: Optional[float] = None,
|
195
|
+
) -> Tuple[pd.DataFrame, bool]:
|
189
196
|
if end_time is not None and time.time() >= end_time:
|
190
197
|
raise TimeLimitExceeded
|
191
198
|
|
@@ -201,7 +208,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
201
208
|
if not np.isfinite(result.values).all():
|
202
209
|
raise RuntimeError("Forecast contains NaN or Inf values.")
|
203
210
|
except Exception:
|
204
|
-
if
|
211
|
+
if use_fallback_model:
|
205
212
|
result = seasonal_naive_forecast(
|
206
213
|
target=time_series.values.ravel(),
|
207
214
|
prediction_length=self.prediction_length,
|
@@ -215,7 +215,7 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
|
|
215
215
|
def _get_search_space(self):
|
216
216
|
return self.model_base._get_search_space()
|
217
217
|
|
218
|
-
def _initialize_transforms_and_regressor(self
|
218
|
+
def _initialize_transforms_and_regressor(self) -> None:
|
219
219
|
# Do not initialize the target_scaler and covariate_regressor in the multi window model!
|
220
220
|
self.target_scaler = None
|
221
221
|
self.covariate_scaler = None
|
@@ -183,7 +183,6 @@ def get_preset_models(
|
|
183
183
|
prediction_length: int,
|
184
184
|
path: str,
|
185
185
|
eval_metric: Union[str, TimeSeriesScorer],
|
186
|
-
eval_metric_seasonal_period: Optional[int],
|
187
186
|
hyperparameters: Union[str, Dict, None],
|
188
187
|
hyperparameter_tune: bool,
|
189
188
|
covariate_metadata: CovariateMetadata,
|
@@ -260,7 +259,6 @@ def get_preset_models(
|
|
260
259
|
freq=freq,
|
261
260
|
prediction_length=prediction_length,
|
262
261
|
eval_metric=eval_metric,
|
263
|
-
eval_metric_seasonal_period=eval_metric_seasonal_period,
|
264
262
|
covariate_metadata=covariate_metadata,
|
265
263
|
hyperparameters=model_hps,
|
266
264
|
**kwargs,
|
@@ -93,6 +93,14 @@ class TimeSeriesPredictor:
|
|
93
93
|
eval_metric_seasonal_period : int, optional
|
94
94
|
Seasonal period used to compute some evaluation metrics such as mean absolute scaled error (MASE). Defaults to
|
95
95
|
``None``, in which case the seasonal period is computed based on the data frequency.
|
96
|
+
horizon_weight : List[float], optional
|
97
|
+
Weight assigned to each time step in the forecast horizon when computing the `eval_metric`. If provided, this
|
98
|
+
must be a list with `prediction_length` non-negative values, where at least some values are greater than zero.
|
99
|
+
AutoGluon will automatically normalize the weights so that they sum up to `prediction_length`. By default, all
|
100
|
+
time steps in the forecast horizon have the same weight, which is equivalent to setting `horizon_weight = [1] * prediction_length`.
|
101
|
+
|
102
|
+
This parameter only affects model selection and ensemble construction; it has no effect on the loss function of
|
103
|
+
the individual forecasting models.
|
96
104
|
known_covariates_names: List[str], optional
|
97
105
|
Names of the covariates that are known in advance for all time steps in the forecast horizon. These are also
|
98
106
|
known as dynamic features, exogenous variables, additional regressors or related time series. Examples of such
|
@@ -107,7 +115,7 @@ class TimeSeriesPredictor:
|
|
107
115
|
List of increasing decimals that specifies which quantiles should be estimated when making distributional
|
108
116
|
forecasts. Defaults to ``[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]``.
|
109
117
|
path : str or pathlib.Path, optional
|
110
|
-
Path to the directory where models and intermediate outputs will be saved. Defaults to a timestamped folder
|
118
|
+
Path to the local directory where models and intermediate outputs will be saved. Defaults to a timestamped folder
|
111
119
|
``AutogluonModels/ag-[TIMESTAMP]`` that will be created in the working directory.
|
112
120
|
verbosity : int, default = 2
|
113
121
|
Verbosity levels range from 0 to 4 and control how much information is printed to stdout. Higher levels
|
@@ -144,6 +152,7 @@ class TimeSeriesPredictor:
|
|
144
152
|
freq: Optional[str] = None,
|
145
153
|
eval_metric: Union[str, TimeSeriesScorer, None] = None,
|
146
154
|
eval_metric_seasonal_period: Optional[int] = None,
|
155
|
+
horizon_weight: Optional[List[float]] = None,
|
147
156
|
path: Optional[Union[str, Path]] = None,
|
148
157
|
verbosity: int = 2,
|
149
158
|
log_to_file: bool = True,
|
@@ -156,6 +165,11 @@ class TimeSeriesPredictor:
|
|
156
165
|
self.verbosity = verbosity
|
157
166
|
set_logger_verbosity(self.verbosity, logger=logger)
|
158
167
|
self.path = setup_outputdir(path)
|
168
|
+
if self.path.lower().startswith("s3://"):
|
169
|
+
logger.warning(
|
170
|
+
"Warning: S3 paths are not supported for the `path` argument in TimeSeriesPredictor. "
|
171
|
+
"Use a local path and upload the trained predictor to S3 manually if needed"
|
172
|
+
)
|
159
173
|
self._setup_log_to_file(log_to_file=log_to_file, log_file_path=log_file_path)
|
160
174
|
|
161
175
|
self.cache_predictions = cache_predictions
|
@@ -187,15 +201,18 @@ class TimeSeriesPredictor:
|
|
187
201
|
if std_freq != str(self.freq):
|
188
202
|
logger.info(f"Frequency '{self.freq}' stored as '{std_freq}'")
|
189
203
|
self.freq = std_freq
|
190
|
-
self.eval_metric = check_get_evaluation_metric(
|
191
|
-
|
204
|
+
self.eval_metric: TimeSeriesScorer = check_get_evaluation_metric(
|
205
|
+
eval_metric,
|
206
|
+
prediction_length=prediction_length,
|
207
|
+
seasonal_period=eval_metric_seasonal_period,
|
208
|
+
horizon_weight=horizon_weight,
|
209
|
+
)
|
192
210
|
if quantile_levels is None:
|
193
211
|
quantile_levels = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
|
194
212
|
self.quantile_levels = sorted(quantile_levels)
|
195
213
|
self._learner: TimeSeriesLearner = self._learner_type(
|
196
214
|
path_context=self.path,
|
197
|
-
eval_metric=eval_metric,
|
198
|
-
eval_metric_seasonal_period=eval_metric_seasonal_period,
|
215
|
+
eval_metric=self.eval_metric,
|
199
216
|
target=self.target,
|
200
217
|
known_covariates_names=self.known_covariates_names,
|
201
218
|
prediction_length=self.prediction_length,
|
@@ -395,12 +412,12 @@ class TimeSeriesPredictor:
|
|
395
412
|
f"\tRemoving {len(too_short_items)} short time series from train_data. Only series with length "
|
396
413
|
f">= {min_length} will be used for training."
|
397
414
|
)
|
398
|
-
train_data = train_data.query("item_id not in @too_short_items")
|
415
|
+
train_data = train_data.query("item_id not in @too_short_items")
|
399
416
|
|
400
417
|
all_nan_items = train_data.item_ids[train_data[self.target].isna().groupby(ITEMID, sort=False).all()]
|
401
418
|
if len(all_nan_items) > 0:
|
402
419
|
logger.info(f"\tRemoving {len(all_nan_items)} time series consisting of only NaN values from train_data.")
|
403
|
-
train_data = train_data.query("item_id not in @all_nan_items")
|
420
|
+
train_data = train_data.query("item_id not in @all_nan_items")
|
404
421
|
|
405
422
|
if len(too_short_items) or len(all_nan_items):
|
406
423
|
logger.info(f"\tAfter filtering, train_data has {self._get_dataset_stats(train_data)}")
|
@@ -494,33 +511,22 @@ class TimeSeriesPredictor:
|
|
494
511
|
|
495
512
|
Available presets:
|
496
513
|
|
497
|
-
- ``"fast_training"``:
|
498
|
-
|
499
|
-
- ``"
|
500
|
-
- ``"high_quality"``: All ML models available in AutoGluon + additional statistical models (``NPTS``, ``AutoETS``,
|
501
|
-
``DynamicOptimizedTheta``). Much more accurate than ``medium_quality``, but takes longer to train.
|
514
|
+
- ``"fast_training"``: Simple statistical and tree-based ML models. These models are fast to train but may not be very accurate.
|
515
|
+
- ``"medium_quality"``: Same models as above, plus deep learning models ``TemporalFusionTransformer`` and Chronos-Bolt (small). Produces good forecasts with reasonable training time.
|
516
|
+
- ``"high_quality"``: A mix of multiple DL, ML and statistical forecasting models available in AutoGluon that offers the best forecast accuracy. Much more accurate than ``medium_quality``, but takes longer to train.
|
502
517
|
- ``"best_quality"``: Same models as in ``"high_quality"``, but performs validation with multiple backtests. Usually better than ``high_quality``, but takes even longer to train.
|
503
518
|
|
504
|
-
Available presets with the
|
519
|
+
Available presets with the `Chronos-Bolt <https://github.com/amazon-science/chronos-forecasting>`_ model:
|
505
520
|
|
506
521
|
- ``"bolt_{model_size}"``: where model size is one of ``tiny,mini,small,base``. Uses the Chronos-Bolt pretrained model for zero-shot forecasting.
|
507
522
|
See the documentation for ``ChronosModel`` or see `Hugging Face <https://huggingface.co/collections/amazon/chronos-models-65f1791d630a8d57cb718444>`_ for more information.
|
508
523
|
|
509
|
-
|
510
|
-
|
524
|
+
Exact definitions of these presets can be found in the source code
|
525
|
+
[`1 <https://github.com/autogluon/autogluon/blob/stable/timeseries/src/autogluon/timeseries/configs/presets_configs.py>`_,
|
526
|
+
`2 <https://github.com/autogluon/autogluon/blob/stable/timeseries/src/autogluon/timeseries/models/presets.py>`_].
|
511
527
|
|
512
|
-
|
513
|
-
|
514
|
-
Note that a GPU is required for model sizes ``small``, ``base`` and ``large``.
|
515
|
-
- ``"chronos"``: alias for ``"chronos_small"``.
|
516
|
-
- ``"chronos_ensemble"``: builds an ensemble of seasonal naive, tree-based and deep learning models with fast inference
|
517
|
-
and ``"chronos_small"``.
|
518
|
-
- ``"chronos_large_ensemble"``: builds an ensemble of seasonal naive, tree-based and deep learning models
|
519
|
-
with fast inference and ``"chronos_large"``.
|
520
|
-
|
521
|
-
Details for these presets can be found in ``autogluon/timeseries/configs/presets_configs.py``. If not
|
522
|
-
provided, user-provided values for ``hyperparameters`` and ``hyperparameter_tune_kwargs`` will be used
|
523
|
-
(defaulting to their default values specified below).
|
528
|
+
If no `presets` are selected, user-provided values for `hyperparameters` will be used (defaulting to their
|
529
|
+
default values specified below).
|
524
530
|
hyperparameters : str or dict, optional
|
525
531
|
Determines what models are trained and what hyperparameters are used by each model.
|
526
532
|
|
@@ -684,7 +690,8 @@ class TimeSeriesPredictor:
|
|
684
690
|
target=self.target,
|
685
691
|
known_covariates_names=self.known_covariates_names,
|
686
692
|
eval_metric=self.eval_metric,
|
687
|
-
eval_metric_seasonal_period=self.
|
693
|
+
eval_metric_seasonal_period=self.eval_metric.seasonal_period,
|
694
|
+
horizon_weight=self.eval_metric.horizon_weight,
|
688
695
|
quantile_levels=self.quantile_levels,
|
689
696
|
freq=self.freq,
|
690
697
|
time_limit=time_limit,
|
@@ -1500,7 +1507,8 @@ class TimeSeriesPredictor:
|
|
1500
1507
|
target=self.target,
|
1501
1508
|
prediction_length=self.prediction_length,
|
1502
1509
|
eval_metric=self.eval_metric.name,
|
1503
|
-
eval_metric_seasonal_period=self.
|
1510
|
+
eval_metric_seasonal_period=self.eval_metric.seasonal_period,
|
1511
|
+
horizon_weight=self.eval_metric.horizon_weight,
|
1504
1512
|
quantile_levels=self.quantile_levels,
|
1505
1513
|
)
|
1506
1514
|
return simulation_dict
|
autogluon/timeseries/trainer.py
CHANGED
@@ -46,7 +46,6 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
46
46
|
path: str,
|
47
47
|
prediction_length: int = 1,
|
48
48
|
eval_metric: Union[str, TimeSeriesScorer, None] = None,
|
49
|
-
eval_metric_seasonal_period: Optional[int] = None,
|
50
49
|
save_data: bool = True,
|
51
50
|
skip_model_selection: bool = False,
|
52
51
|
enable_ensemble: bool = True,
|
@@ -86,8 +85,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
86
85
|
#: self.refit_single_full() and self.refit_full().
|
87
86
|
self.model_refit_map = {}
|
88
87
|
|
89
|
-
self.eval_metric
|
90
|
-
self.eval_metric_seasonal_period = eval_metric_seasonal_period
|
88
|
+
self.eval_metric = check_get_evaluation_metric(eval_metric, prediction_length=prediction_length)
|
91
89
|
if val_splitter is None:
|
92
90
|
val_splitter = ExpandingWindowSplitter(prediction_length=self.prediction_length)
|
93
91
|
assert isinstance(val_splitter, AbstractWindowSplitter), "val_splitter must be of type AbstractWindowSplitter"
|
@@ -577,7 +575,6 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
577
575
|
ensemble = self.ensemble_model_type(
|
578
576
|
name=self._get_ensemble_model_name(),
|
579
577
|
eval_metric=self.eval_metric,
|
580
|
-
eval_metric_seasonal_period=self.eval_metric_seasonal_period,
|
581
578
|
target=self.target,
|
582
579
|
prediction_length=self.prediction_length,
|
583
580
|
path=self.path,
|
@@ -791,6 +788,17 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
791
788
|
raise ValueError(f"Model {model_name} failed to predict. Please check the model's logs.")
|
792
789
|
return predictions
|
793
790
|
|
791
|
+
def _get_eval_metric(self, metric: Union[str, TimeSeriesScorer, None]) -> TimeSeriesScorer:
|
792
|
+
if metric is None:
|
793
|
+
return self.eval_metric
|
794
|
+
else:
|
795
|
+
return check_get_evaluation_metric(
|
796
|
+
metric,
|
797
|
+
prediction_length=self.prediction_length,
|
798
|
+
seasonal_period=self.eval_metric.seasonal_period,
|
799
|
+
horizon_weight=self.eval_metric.horizon_weight,
|
800
|
+
)
|
801
|
+
|
794
802
|
def _score_with_predictions(
|
795
803
|
self,
|
796
804
|
data: TimeSeriesDataFrame,
|
@@ -798,13 +806,11 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
798
806
|
metric: Union[str, TimeSeriesScorer, None] = None,
|
799
807
|
) -> float:
|
800
808
|
"""Compute the score measuring how well the predictions align with the data."""
|
801
|
-
|
802
|
-
return eval_metric.score(
|
809
|
+
return self._get_eval_metric(metric).score(
|
803
810
|
data=data,
|
804
811
|
predictions=predictions,
|
805
812
|
prediction_length=self.prediction_length,
|
806
813
|
target=self.target,
|
807
|
-
seasonal_period=self.eval_metric_seasonal_period,
|
808
814
|
)
|
809
815
|
|
810
816
|
def score(
|
@@ -814,7 +820,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
814
820
|
metric: Union[str, TimeSeriesScorer, None] = None,
|
815
821
|
use_cache: bool = True,
|
816
822
|
) -> float:
|
817
|
-
eval_metric = self.
|
823
|
+
eval_metric = self._get_eval_metric(metric)
|
818
824
|
scores_dict = self.evaluate(data=data, model=model, metrics=[eval_metric], use_cache=use_cache)
|
819
825
|
return scores_dict[eval_metric.name]
|
820
826
|
|
@@ -833,7 +839,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
833
839
|
metrics_ = [metrics] if not isinstance(metrics, list) else metrics
|
834
840
|
scores_dict = {}
|
835
841
|
for metric in metrics_:
|
836
|
-
eval_metric = self.
|
842
|
+
eval_metric = self._get_eval_metric(metric)
|
837
843
|
scores_dict[eval_metric.name] = self._score_with_predictions(
|
838
844
|
data=data, predictions=predictions, metric=eval_metric
|
839
845
|
)
|
@@ -855,7 +861,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
855
861
|
confidence_level: float = 0.99,
|
856
862
|
) -> pd.DataFrame:
|
857
863
|
assert method in ["naive", "permutation"], f"Invalid feature importance method {method}."
|
858
|
-
|
864
|
+
eval_metric = self._get_eval_metric(metric)
|
859
865
|
|
860
866
|
logger.info("Computing feature importance")
|
861
867
|
|
@@ -902,11 +908,13 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
902
908
|
for n in range(num_iterations):
|
903
909
|
if subsample_size < data.num_items:
|
904
910
|
item_ids_sampled = data.item_ids.to_series().sample(subsample_size) # noqa
|
905
|
-
data_sample: TimeSeriesDataFrame = data.query("item_id in @item_ids_sampled")
|
911
|
+
data_sample: TimeSeriesDataFrame = data.query("item_id in @item_ids_sampled")
|
906
912
|
else:
|
907
913
|
data_sample = data
|
908
914
|
|
909
|
-
base_score = self.evaluate(data=data_sample, model=model, metrics=
|
915
|
+
base_score = self.evaluate(data=data_sample, model=model, metrics=eval_metric, use_cache=False)[
|
916
|
+
eval_metric.name
|
917
|
+
]
|
910
918
|
|
911
919
|
for feature in features:
|
912
920
|
# override importance for unused features
|
@@ -914,9 +922,9 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
914
922
|
continue
|
915
923
|
else:
|
916
924
|
data_sample_replaced = importance_transform.transform(data_sample, feature_name=feature)
|
917
|
-
score = self.evaluate(
|
918
|
-
|
919
|
-
]
|
925
|
+
score = self.evaluate(
|
926
|
+
data=data_sample_replaced, model=model, metrics=eval_metric, use_cache=False
|
927
|
+
)[eval_metric.name]
|
920
928
|
|
921
929
|
importance = base_score - score
|
922
930
|
if relative_scores:
|
@@ -1266,7 +1274,6 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1266
1274
|
return get_preset_models(
|
1267
1275
|
path=self.path,
|
1268
1276
|
eval_metric=self.eval_metric,
|
1269
|
-
eval_metric_seasonal_period=self.eval_metric_seasonal_period,
|
1270
1277
|
prediction_length=self.prediction_length,
|
1271
1278
|
freq=freq,
|
1272
1279
|
hyperparameters=hyperparameters,
|
autogluon/timeseries/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: autogluon.timeseries
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.1b20250426
|
4
4
|
Summary: Fast and Accurate ML in 3 Lines of Code
|
5
5
|
Home-page: https://github.com/autogluon/autogluon
|
6
6
|
Author: AutoGluon Community
|
@@ -55,10 +55,10 @@ Requires-Dist: fugue>=0.9.0
|
|
55
55
|
Requires-Dist: tqdm<5,>=4.38
|
56
56
|
Requires-Dist: orjson~=3.9
|
57
57
|
Requires-Dist: tensorboard<3,>=2.9
|
58
|
-
Requires-Dist: autogluon.core[raytune]==1.2.
|
59
|
-
Requires-Dist: autogluon.common==1.2.
|
60
|
-
Requires-Dist: autogluon.features==1.2.
|
61
|
-
Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.2.
|
58
|
+
Requires-Dist: autogluon.core[raytune]==1.2.1b20250426
|
59
|
+
Requires-Dist: autogluon.common==1.2.1b20250426
|
60
|
+
Requires-Dist: autogluon.features==1.2.1b20250426
|
61
|
+
Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.2.1b20250426
|
62
62
|
Provides-Extra: all
|
63
63
|
Provides-Extra: chronos-onnx
|
64
64
|
Requires-Dist: optimum[onnxruntime]<1.23,>=1.17; extra == "chronos-onnx"
|