autogluon.timeseries 1.4.1b20251016__py3-none-any.whl → 1.4.1b20251218__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/hyperparameter_presets.py +7 -21
- autogluon/timeseries/configs/predictor_presets.py +23 -39
- autogluon/timeseries/dataset/ts_dataframe.py +97 -86
- autogluon/timeseries/learner.py +70 -35
- autogluon/timeseries/metrics/__init__.py +4 -4
- autogluon/timeseries/metrics/abstract.py +8 -8
- autogluon/timeseries/metrics/point.py +9 -9
- autogluon/timeseries/metrics/quantile.py +5 -5
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +2 -1
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -39
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +8 -8
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
- autogluon/timeseries/models/autogluon_tabular/per_step.py +26 -15
- autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +395 -0
- autogluon/timeseries/models/chronos/model.py +126 -88
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +69 -37
- autogluon/timeseries/models/ensemble/__init__.py +36 -2
- autogluon/timeseries/models/ensemble/abstract.py +14 -46
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
- autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
- autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +25 -22
- autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
- autogluon/timeseries/models/gluonts/abstract.py +32 -31
- autogluon/timeseries/models/gluonts/dataset.py +11 -11
- autogluon/timeseries/models/gluonts/models.py +0 -7
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +15 -18
- autogluon/timeseries/models/local/naive.py +2 -2
- autogluon/timeseries/models/local/npts.py +7 -1
- autogluon/timeseries/models/local/statsforecast.py +12 -12
- autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
- autogluon/timeseries/models/registry.py +3 -4
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +3 -4
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +6 -6
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +4 -9
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +2 -3
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +10 -10
- autogluon/timeseries/models/toto/_internal/dataset.py +2 -2
- autogluon/timeseries/models/toto/_internal/forecaster.py +8 -8
- autogluon/timeseries/models/toto/dataloader.py +4 -4
- autogluon/timeseries/models/toto/hf_pretrained_model.py +97 -16
- autogluon/timeseries/models/toto/model.py +35 -20
- autogluon/timeseries/predictor.py +527 -155
- autogluon/timeseries/regressor.py +27 -30
- autogluon/timeseries/splitter.py +3 -27
- autogluon/timeseries/trainer/ensemble_composer.py +444 -0
- autogluon/timeseries/trainer/model_set_builder.py +9 -9
- autogluon/timeseries/trainer/prediction_cache.py +16 -16
- autogluon/timeseries/trainer/trainer.py +300 -278
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/covariate_scaler.py +8 -8
- autogluon/timeseries/transforms/target_scaler.py +15 -15
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +1 -3
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/features.py +31 -14
- autogluon/timeseries/utils/forecast.py +6 -7
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251218-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/METADATA +39 -27
- autogluon_timeseries-1.4.1b20251218.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/WHEEL +1 -1
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
- autogluon.timeseries-1.4.1b20251016-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.4.1b20251016.dist-info/RECORD +0 -90
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/zip-safe +0 -0
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
import time
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
7
|
from contextlib import nullcontext
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import Any
|
|
9
9
|
|
|
10
10
|
from typing_extensions import Self
|
|
11
11
|
|
|
@@ -37,12 +37,12 @@ class TimeSeriesTunable(Tunable, ABC):
|
|
|
37
37
|
def hyperparameter_tune(
|
|
38
38
|
self,
|
|
39
39
|
train_data: TimeSeriesDataFrame,
|
|
40
|
-
val_data:
|
|
40
|
+
val_data: TimeSeriesDataFrame | None,
|
|
41
41
|
val_splitter: Any = None,
|
|
42
|
-
default_num_trials:
|
|
43
|
-
refit_every_n_windows:
|
|
44
|
-
hyperparameter_tune_kwargs:
|
|
45
|
-
time_limit:
|
|
42
|
+
default_num_trials: int | None = 1,
|
|
43
|
+
refit_every_n_windows: int | None = 1,
|
|
44
|
+
hyperparameter_tune_kwargs: str | dict = "auto",
|
|
45
|
+
time_limit: float | None = None,
|
|
46
46
|
) -> tuple[dict[str, Any], Any]:
|
|
47
47
|
hpo_executor = self._get_default_hpo_executor()
|
|
48
48
|
hpo_executor.initialize(
|
|
@@ -144,13 +144,13 @@ class TimeSeriesTunable(Tunable, ABC):
|
|
|
144
144
|
"""
|
|
145
145
|
return None
|
|
146
146
|
|
|
147
|
-
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str,
|
|
147
|
+
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
|
|
148
148
|
return {
|
|
149
149
|
"num_cpus": 1,
|
|
150
150
|
}
|
|
151
151
|
|
|
152
152
|
def _save_with_data(
|
|
153
|
-
self, train_data: TimeSeriesDataFrame, val_data:
|
|
153
|
+
self, train_data: TimeSeriesDataFrame, val_data: TimeSeriesDataFrame | None
|
|
154
154
|
) -> tuple[str, str]:
|
|
155
155
|
self.path = os.path.abspath(self.path)
|
|
156
156
|
self.path_root = self.path.rsplit(self.name, 1)[0]
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
import math
|
|
4
4
|
import time
|
|
5
5
|
import warnings
|
|
6
|
-
from typing import Any, Callable, Collection,
|
|
6
|
+
from typing import Any, Callable, Collection, Type
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
@@ -13,7 +13,7 @@ import autogluon.core as ag
|
|
|
13
13
|
from autogluon.core.models import AbstractModel as AbstractTabularModel
|
|
14
14
|
from autogluon.features import AutoMLPipelineFeatureGenerator
|
|
15
15
|
from autogluon.tabular.registry import ag_model_registry
|
|
16
|
-
from autogluon.timeseries.dataset
|
|
16
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
17
17
|
from autogluon.timeseries.metrics.abstract import TimeSeriesScorer
|
|
18
18
|
from autogluon.timeseries.metrics.utils import in_sample_squared_seasonal_error
|
|
19
19
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
@@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)
|
|
|
33
33
|
class TabularModel(BaseEstimator):
|
|
34
34
|
"""A scikit-learn compatible wrapper for arbitrary autogluon.tabular models"""
|
|
35
35
|
|
|
36
|
-
def __init__(self, model_class: Type[AbstractTabularModel], model_kwargs:
|
|
36
|
+
def __init__(self, model_class: Type[AbstractTabularModel], model_kwargs: dict | None = None):
|
|
37
37
|
self.model_class = model_class
|
|
38
38
|
self.model_kwargs = {} if model_kwargs is None else model_kwargs
|
|
39
39
|
self.feature_pipeline = AutoMLPipelineFeatureGenerator(verbosity=0)
|
|
@@ -63,12 +63,12 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
63
63
|
|
|
64
64
|
def __init__(
|
|
65
65
|
self,
|
|
66
|
-
freq:
|
|
66
|
+
freq: str | None = None,
|
|
67
67
|
prediction_length: int = 1,
|
|
68
|
-
path:
|
|
69
|
-
name:
|
|
70
|
-
eval_metric:
|
|
71
|
-
hyperparameters:
|
|
68
|
+
path: str | None = None,
|
|
69
|
+
name: str | None = None,
|
|
70
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
71
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
72
72
|
**kwargs,
|
|
73
73
|
):
|
|
74
74
|
super().__init__(
|
|
@@ -84,13 +84,13 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
84
84
|
from mlforecast.target_transforms import BaseTargetTransform
|
|
85
85
|
|
|
86
86
|
self._sum_of_differences: int = 0 # number of time steps removed from each series by differencing
|
|
87
|
-
self._max_ts_length:
|
|
87
|
+
self._max_ts_length: int | None = None
|
|
88
88
|
self._target_lags: np.ndarray
|
|
89
89
|
self._date_features: list[Callable]
|
|
90
90
|
self._mlf: MLForecast
|
|
91
|
-
self._scaler:
|
|
91
|
+
self._scaler: BaseTargetTransform | None = None
|
|
92
92
|
self._residuals_std_per_item: pd.Series
|
|
93
|
-
self._train_target_median:
|
|
93
|
+
self._train_target_median: float | None = None
|
|
94
94
|
self._non_boolean_real_covariates: list[str] = []
|
|
95
95
|
|
|
96
96
|
def _initialize_transforms_and_regressor(self):
|
|
@@ -114,13 +114,15 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
114
114
|
def preprocess(
|
|
115
115
|
self,
|
|
116
116
|
data: TimeSeriesDataFrame,
|
|
117
|
-
known_covariates:
|
|
117
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
118
118
|
is_train: bool = False,
|
|
119
119
|
**kwargs,
|
|
120
|
-
) -> tuple[TimeSeriesDataFrame,
|
|
120
|
+
) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
|
|
121
121
|
if is_train:
|
|
122
122
|
# All-NaN series are removed; partially-NaN series in train_data are handled inside _generate_train_val_dfs
|
|
123
|
-
all_nan_items = data.item_ids[
|
|
123
|
+
all_nan_items = data.item_ids[
|
|
124
|
+
data[self.target].isna().groupby(TimeSeriesDataFrame.ITEMID, sort=False).all()
|
|
125
|
+
]
|
|
124
126
|
if len(all_nan_items):
|
|
125
127
|
data = data.query("item_id not in @all_nan_items")
|
|
126
128
|
else:
|
|
@@ -130,31 +132,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
130
132
|
data[self.target] = data[self.target].fillna(value=self._train_target_median)
|
|
131
133
|
return data, known_covariates
|
|
132
134
|
|
|
133
|
-
def _process_deprecated_hyperparameters(self, model_params: dict[str, Any]) -> dict[str, Any]:
|
|
134
|
-
if "tabular_hyperparameters" in model_params:
|
|
135
|
-
logger.warning(
|
|
136
|
-
f"Hyperparameter 'tabular_hyperparameters' for {self.name} is deprecated and will be removed in v1.5. "
|
|
137
|
-
"Please use 'model_name' to specify the tabular model alias and 'model_hyperparameters' "
|
|
138
|
-
"to provide the tabular model hyperparameters."
|
|
139
|
-
)
|
|
140
|
-
tabular_hyperparameters = model_params.pop("tabular_hyperparameters")
|
|
141
|
-
if len(tabular_hyperparameters) == 1:
|
|
142
|
-
# We can automatically convert the hyperparameters if only one model is used
|
|
143
|
-
model_params["model_name"] = list(tabular_hyperparameters.keys())[0]
|
|
144
|
-
model_params["model_hyperparameters"] = tabular_hyperparameters[model_params["model_name"]]
|
|
145
|
-
else:
|
|
146
|
-
raise ValueError(
|
|
147
|
-
f"Provided 'tabular_hyperparameters' {tabular_hyperparameters} cannot be automatically converted "
|
|
148
|
-
f"to the new 'model_name' and 'model_hyperparameters' API for {self.name}."
|
|
149
|
-
)
|
|
150
|
-
if "tabular_fit_kwargs" in model_params:
|
|
151
|
-
logger.warning(
|
|
152
|
-
f"Hyperparameters 'tabular_fit_kwargs' for {self.name} is deprecated and is ignored by the model. "
|
|
153
|
-
"Please use 'model_name' to specify the tabular model alias and 'model_hyperparameters' "
|
|
154
|
-
"to provide the tabular model hyperparameters."
|
|
155
|
-
)
|
|
156
|
-
return model_params
|
|
157
|
-
|
|
158
135
|
def _get_default_hyperparameters(self) -> dict[str, Any]:
|
|
159
136
|
return {
|
|
160
137
|
"max_num_items": 20_000,
|
|
@@ -182,7 +159,11 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
182
159
|
date_features = model_params.get("date_features")
|
|
183
160
|
if date_features is None:
|
|
184
161
|
date_features = get_time_features_for_frequency(self.freq)
|
|
185
|
-
|
|
162
|
+
known_covariates = self.covariate_metadata.known_covariates
|
|
163
|
+
conflicting = [f.__name__ for f in date_features if f.__name__ in known_covariates]
|
|
164
|
+
if conflicting:
|
|
165
|
+
logger.info(f"\tRemoved automatic date_features {conflicting} since they clash with known_covariates")
|
|
166
|
+
self._date_features = [f for f in date_features if f.__name__ not in known_covariates]
|
|
186
167
|
|
|
187
168
|
target_transforms = []
|
|
188
169
|
differences = model_params.get("differences")
|
|
@@ -235,7 +216,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
235
216
|
return mlforecast_df.groupby(MLF_ITEMID, as_index=False, sort=False).tail(max_length)
|
|
236
217
|
|
|
237
218
|
def _generate_train_val_dfs(
|
|
238
|
-
self, data: TimeSeriesDataFrame, max_num_items:
|
|
219
|
+
self, data: TimeSeriesDataFrame, max_num_items: int | None = None, max_num_samples: int | None = None
|
|
239
220
|
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
240
221
|
# Exclude items that are too short for chosen differences - otherwise exception will be raised
|
|
241
222
|
if self._sum_of_differences > 0:
|
|
@@ -289,7 +270,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
289
270
|
def _to_mlforecast_df(
|
|
290
271
|
self,
|
|
291
272
|
data: TimeSeriesDataFrame,
|
|
292
|
-
static_features:
|
|
273
|
+
static_features: pd.DataFrame | None,
|
|
293
274
|
include_target: bool = True,
|
|
294
275
|
) -> pd.DataFrame:
|
|
295
276
|
"""Convert TimeSeriesDataFrame to a format expected by MLForecast methods `predict` and `preprocess`.
|
|
@@ -298,18 +279,28 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
298
279
|
"""
|
|
299
280
|
# TODO: Add support for past_covariates
|
|
300
281
|
selected_columns = self.covariate_metadata.known_covariates.copy()
|
|
301
|
-
column_name_mapping = {ITEMID: MLF_ITEMID, TIMESTAMP: MLF_TIMESTAMP}
|
|
282
|
+
column_name_mapping = {TimeSeriesDataFrame.ITEMID: MLF_ITEMID, TimeSeriesDataFrame.TIMESTAMP: MLF_TIMESTAMP}
|
|
302
283
|
if include_target:
|
|
303
284
|
selected_columns += [self.target]
|
|
304
285
|
column_name_mapping[self.target] = MLF_TARGET
|
|
305
286
|
|
|
306
287
|
df = pd.DataFrame(data)[selected_columns].reset_index()
|
|
307
288
|
if static_features is not None:
|
|
308
|
-
df = pd.merge(
|
|
289
|
+
df = pd.merge(
|
|
290
|
+
df, static_features, how="left", on=TimeSeriesDataFrame.ITEMID, suffixes=(None, "_static_feat")
|
|
291
|
+
)
|
|
309
292
|
|
|
310
293
|
for col in self._non_boolean_real_covariates:
|
|
311
294
|
# Normalize non-boolean features using mean_abs scaling
|
|
312
|
-
df[f"__scaled_{col}"] =
|
|
295
|
+
df[f"__scaled_{col}"] = (
|
|
296
|
+
df[col]
|
|
297
|
+
/ df[col]
|
|
298
|
+
.abs()
|
|
299
|
+
.groupby(df[TimeSeriesDataFrame.ITEMID])
|
|
300
|
+
.mean()
|
|
301
|
+
.reindex(df[TimeSeriesDataFrame.ITEMID])
|
|
302
|
+
.values
|
|
303
|
+
)
|
|
313
304
|
|
|
314
305
|
# Convert float64 to float32 to reduce memory usage
|
|
315
306
|
float64_cols = list(df.select_dtypes(include="float64"))
|
|
@@ -321,10 +312,10 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
321
312
|
def _fit(
|
|
322
313
|
self,
|
|
323
314
|
train_data: TimeSeriesDataFrame,
|
|
324
|
-
val_data:
|
|
325
|
-
time_limit:
|
|
326
|
-
num_cpus:
|
|
327
|
-
num_gpus:
|
|
315
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
316
|
+
time_limit: float | None = None,
|
|
317
|
+
num_cpus: int | None = None,
|
|
318
|
+
num_gpus: int | None = None,
|
|
328
319
|
verbosity: int = 2,
|
|
329
320
|
**kwargs,
|
|
330
321
|
) -> None:
|
|
@@ -338,7 +329,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
338
329
|
if not set(train_data[col].unique()) == set([0, 1]):
|
|
339
330
|
self._non_boolean_real_covariates.append(col)
|
|
340
331
|
model_params = self.get_hyperparameters()
|
|
341
|
-
model_params = self._process_deprecated_hyperparameters(model_params)
|
|
342
332
|
|
|
343
333
|
mlforecast_init_args = self._get_mlforecast_init_args(train_data, model_params)
|
|
344
334
|
assert self.freq is not None
|
|
@@ -399,8 +389,8 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
399
389
|
def _remove_short_ts_and_generate_fallback_forecast(
|
|
400
390
|
self,
|
|
401
391
|
data: TimeSeriesDataFrame,
|
|
402
|
-
known_covariates:
|
|
403
|
-
) -> tuple[TimeSeriesDataFrame,
|
|
392
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
393
|
+
) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
|
|
404
394
|
"""Remove series that are too short for chosen differencing from data and generate naive forecast for them.
|
|
405
395
|
|
|
406
396
|
Returns
|
|
@@ -496,7 +486,7 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
496
486
|
lags : list[int], default = None
|
|
497
487
|
Lags of the target that will be used as features for predictions. If None, will be determined automatically
|
|
498
488
|
based on the frequency of the data.
|
|
499
|
-
date_features : list[
|
|
489
|
+
date_features : list[str | Callable], default = None
|
|
500
490
|
Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
|
|
501
491
|
If None, will be determined automatically based on the frequency of the data.
|
|
502
492
|
differences : list[int], default = []
|
|
@@ -558,7 +548,7 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
558
548
|
def _predict(
|
|
559
549
|
self,
|
|
560
550
|
data: TimeSeriesDataFrame,
|
|
561
|
-
known_covariates:
|
|
551
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
562
552
|
**kwargs,
|
|
563
553
|
) -> TimeSeriesDataFrame:
|
|
564
554
|
from .transforms import apply_inverse_transform
|
|
@@ -612,17 +602,19 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
612
602
|
predictions, repeated_item_ids=predictions[MLF_ITEMID], past_target=data[self.target]
|
|
613
603
|
)
|
|
614
604
|
predictions_tsdf: TimeSeriesDataFrame = TimeSeriesDataFrame(
|
|
615
|
-
predictions.rename(
|
|
605
|
+
predictions.rename(
|
|
606
|
+
columns={MLF_ITEMID: TimeSeriesDataFrame.ITEMID, MLF_TIMESTAMP: TimeSeriesDataFrame.TIMESTAMP}
|
|
607
|
+
)
|
|
616
608
|
)
|
|
617
609
|
|
|
618
610
|
if forecast_for_short_series is not None:
|
|
619
611
|
predictions_tsdf = pd.concat([predictions_tsdf, forecast_for_short_series]) # type: ignore
|
|
620
|
-
predictions_tsdf = predictions_tsdf.reindex(original_item_id_order, level=ITEMID)
|
|
612
|
+
predictions_tsdf = predictions_tsdf.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID)
|
|
621
613
|
|
|
622
614
|
return predictions_tsdf
|
|
623
615
|
|
|
624
616
|
def _postprocess_predictions(
|
|
625
|
-
self, predictions:
|
|
617
|
+
self, predictions: np.ndarray | pd.Series, repeated_item_ids: pd.Series
|
|
626
618
|
) -> pd.DataFrame:
|
|
627
619
|
if self.is_quantile_model:
|
|
628
620
|
predictions_df = pd.DataFrame(predictions, columns=[str(q) for q in self.quantile_levels])
|
|
@@ -676,7 +668,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
|
676
668
|
lags : list[int], default = None
|
|
677
669
|
Lags of the target that will be used as features for predictions. If None, will be determined automatically
|
|
678
670
|
based on the frequency of the data.
|
|
679
|
-
date_features : list[
|
|
671
|
+
date_features : list[str | Callable], default = None
|
|
680
672
|
Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
|
|
681
673
|
If None, will be determined automatically based on the frequency of the data.
|
|
682
674
|
differences : list[int], default = None
|
|
@@ -714,7 +706,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
|
714
706
|
def _predict(
|
|
715
707
|
self,
|
|
716
708
|
data: TimeSeriesDataFrame,
|
|
717
|
-
known_covariates:
|
|
709
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
718
710
|
**kwargs,
|
|
719
711
|
) -> TimeSeriesDataFrame:
|
|
720
712
|
original_item_id_order = data.item_ids
|
|
@@ -745,16 +737,20 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
|
745
737
|
X_df=X_df,
|
|
746
738
|
)
|
|
747
739
|
assert isinstance(raw_predictions, pd.DataFrame)
|
|
748
|
-
raw_predictions = raw_predictions.rename(
|
|
740
|
+
raw_predictions = raw_predictions.rename(
|
|
741
|
+
columns={MLF_ITEMID: TimeSeriesDataFrame.ITEMID, MLF_TIMESTAMP: TimeSeriesDataFrame.TIMESTAMP}
|
|
742
|
+
)
|
|
749
743
|
|
|
750
744
|
predictions: TimeSeriesDataFrame = TimeSeriesDataFrame(
|
|
751
745
|
self._add_gaussian_quantiles(
|
|
752
|
-
raw_predictions,
|
|
746
|
+
raw_predictions,
|
|
747
|
+
repeated_item_ids=raw_predictions[TimeSeriesDataFrame.ITEMID],
|
|
748
|
+
past_target=data[self.target],
|
|
753
749
|
)
|
|
754
750
|
)
|
|
755
751
|
if forecast_for_short_series is not None:
|
|
756
752
|
predictions = pd.concat([predictions, forecast_for_short_series]) # type: ignore
|
|
757
|
-
return predictions.reindex(original_item_id_order, level=ITEMID)
|
|
753
|
+
return predictions.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID)
|
|
758
754
|
|
|
759
755
|
def _create_tabular_model(self, model_name: str, model_hyperparameters: dict[str, Any]) -> TabularModel:
|
|
760
756
|
model_class = ag_model_registry.key_to_cls(model_name)
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
import math
|
|
3
3
|
import os
|
|
4
4
|
import time
|
|
5
|
-
from typing import Any, Callable, Literal,
|
|
5
|
+
from typing import Any, Callable, Literal, Type
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
@@ -17,7 +17,6 @@ from autogluon.core.constants import QUANTILE, REGRESSION
|
|
|
17
17
|
from autogluon.tabular.models import AbstractModel as AbstractTabularModel
|
|
18
18
|
from autogluon.tabular.registry import ag_model_registry
|
|
19
19
|
from autogluon.timeseries import TimeSeriesDataFrame
|
|
20
|
-
from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TIMESTAMP
|
|
21
20
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
22
21
|
from autogluon.timeseries.utils.datetime import get_lags_for_frequency, get_time_features_for_frequency
|
|
23
22
|
from autogluon.timeseries.utils.warning_filters import set_loggers_level, warning_filter
|
|
@@ -57,7 +56,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
57
56
|
Seasonal lags of the target used as features. Unlike trailing lags, seasonal lags are not shifted
|
|
58
57
|
but filtered by availability: model for step ``h`` uses ``[lag for lag in seasonal_lags if lag > h]``.
|
|
59
58
|
If None, determined automatically based on data frequency.
|
|
60
|
-
date_features : list[
|
|
59
|
+
date_features : list[str | Callable], default = None
|
|
61
60
|
Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
|
|
62
61
|
If None, will be determined automatically based on the frequency of the data.
|
|
63
62
|
target_scaler : {"standard", "mean_abs", "min_max", "robust", None}, default = "mean_abs"
|
|
@@ -95,7 +94,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
95
94
|
self._model_cls: Type[AbstractTabularModel]
|
|
96
95
|
self._n_jobs: int
|
|
97
96
|
self._non_boolean_real_covariates: list[str] = []
|
|
98
|
-
self._max_ts_length:
|
|
97
|
+
self._max_ts_length: int | None = None
|
|
99
98
|
|
|
100
99
|
@property
|
|
101
100
|
def allowed_hyperparameters(self) -> list[str]:
|
|
@@ -115,7 +114,11 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
115
114
|
|
|
116
115
|
@property
|
|
117
116
|
def _ag_to_nixtla(self) -> dict:
|
|
118
|
-
return {
|
|
117
|
+
return {
|
|
118
|
+
self.target: MLF_TARGET,
|
|
119
|
+
TimeSeriesDataFrame.ITEMID: MLF_ITEMID,
|
|
120
|
+
TimeSeriesDataFrame.TIMESTAMP: MLF_TIMESTAMP,
|
|
121
|
+
}
|
|
119
122
|
|
|
120
123
|
def _get_default_hyperparameters(self):
|
|
121
124
|
return {
|
|
@@ -137,11 +140,11 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
137
140
|
model_hyperparameters: dict,
|
|
138
141
|
problem_type: Literal["quantile", "regression"],
|
|
139
142
|
eval_metric: str,
|
|
140
|
-
validation_fraction:
|
|
143
|
+
validation_fraction: float | None,
|
|
141
144
|
quantile_levels: list[float],
|
|
142
145
|
lags: list[int],
|
|
143
146
|
date_features: list[Callable],
|
|
144
|
-
time_limit:
|
|
147
|
+
time_limit: float | None,
|
|
145
148
|
num_cpus: int,
|
|
146
149
|
verbosity: int,
|
|
147
150
|
) -> str:
|
|
@@ -234,7 +237,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
234
237
|
def preprocess(
|
|
235
238
|
self,
|
|
236
239
|
data: TimeSeriesDataFrame,
|
|
237
|
-
known_covariates:
|
|
240
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
238
241
|
is_train: bool = False,
|
|
239
242
|
**kwargs,
|
|
240
243
|
):
|
|
@@ -246,7 +249,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
246
249
|
self._non_boolean_real_covariates.append(col)
|
|
247
250
|
|
|
248
251
|
if len(self._non_boolean_real_covariates) > 0:
|
|
249
|
-
item_ids = data.index.get_level_values(level=ITEMID)
|
|
252
|
+
item_ids = data.index.get_level_values(level=TimeSeriesDataFrame.ITEMID)
|
|
250
253
|
scale_per_column: dict[str, pd.Series] = {}
|
|
251
254
|
columns_grouped = data[self._non_boolean_real_covariates].abs().groupby(item_ids)
|
|
252
255
|
for col in self._non_boolean_real_covariates:
|
|
@@ -260,7 +263,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
260
263
|
return data, known_covariates
|
|
261
264
|
|
|
262
265
|
def _get_train_df(
|
|
263
|
-
self, train_data: TimeSeriesDataFrame, max_num_items:
|
|
266
|
+
self, train_data: TimeSeriesDataFrame, max_num_items: int | None, max_num_samples: int | None
|
|
264
267
|
) -> pd.DataFrame:
|
|
265
268
|
if max_num_items is not None and train_data.num_items > max_num_items:
|
|
266
269
|
items_to_keep = train_data.item_ids.to_series().sample(n=int(max_num_items)) # noqa: F841
|
|
@@ -277,7 +280,11 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
277
280
|
train_df = train_data.to_data_frame().reset_index()
|
|
278
281
|
if train_data.static_features is not None:
|
|
279
282
|
train_df = pd.merge(
|
|
280
|
-
left=train_df,
|
|
283
|
+
left=train_df,
|
|
284
|
+
right=train_data.static_features,
|
|
285
|
+
left_on=TimeSeriesDataFrame.ITEMID,
|
|
286
|
+
right_index=True,
|
|
287
|
+
how="left",
|
|
281
288
|
)
|
|
282
289
|
train_df = train_df.rename(columns=self._ag_to_nixtla)
|
|
283
290
|
train_df = train_df.assign(**{MLF_TARGET: train_df[MLF_TARGET].fillna(float("inf"))})
|
|
@@ -298,8 +305,10 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
298
305
|
def _fit(
|
|
299
306
|
self,
|
|
300
307
|
train_data: TimeSeriesDataFrame,
|
|
301
|
-
val_data:
|
|
302
|
-
time_limit:
|
|
308
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
309
|
+
time_limit: float | None = None,
|
|
310
|
+
num_cpus: int | None = None,
|
|
311
|
+
num_gpus: int | None = None,
|
|
303
312
|
verbosity: int = 2,
|
|
304
313
|
**kwargs,
|
|
305
314
|
) -> None:
|
|
@@ -448,7 +457,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
448
457
|
def _predict(
|
|
449
458
|
self,
|
|
450
459
|
data: TimeSeriesDataFrame,
|
|
451
|
-
known_covariates:
|
|
460
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
452
461
|
**kwargs,
|
|
453
462
|
) -> TimeSeriesDataFrame:
|
|
454
463
|
if known_covariates is not None:
|
|
@@ -462,7 +471,9 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
462
471
|
full_df = full_df.slice_by_timestep(-(self._max_ts_length + self.prediction_length), None)
|
|
463
472
|
full_df = full_df.to_data_frame().reset_index()
|
|
464
473
|
if data.static_features is not None:
|
|
465
|
-
full_df = pd.merge(
|
|
474
|
+
full_df = pd.merge(
|
|
475
|
+
full_df, data.static_features, left_on=TimeSeriesDataFrame.ITEMID, right_index=True, how="left"
|
|
476
|
+
)
|
|
466
477
|
|
|
467
478
|
full_df = (
|
|
468
479
|
full_df.rename(columns=self._ag_to_nixtla)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Literal
|
|
1
|
+
from typing import Literal
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
@@ -8,11 +8,7 @@ from mlforecast.target_transforms import (
|
|
|
8
8
|
_BaseGroupedArrayTargetTransform,
|
|
9
9
|
)
|
|
10
10
|
|
|
11
|
-
from autogluon.timeseries.dataset
|
|
12
|
-
ITEMID,
|
|
13
|
-
TIMESTAMP,
|
|
14
|
-
TimeSeriesDataFrame,
|
|
15
|
-
)
|
|
11
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
16
12
|
from autogluon.timeseries.transforms.target_scaler import TargetScaler, get_target_scaler
|
|
17
13
|
|
|
18
14
|
from .utils import MLF_ITEMID, MLF_TIMESTAMP
|
|
@@ -26,11 +22,17 @@ class MLForecastScaler(BaseTargetTransform):
|
|
|
26
22
|
|
|
27
23
|
def _df_to_tsdf(self, df: pd.DataFrame) -> TimeSeriesDataFrame:
|
|
28
24
|
return TimeSeriesDataFrame(
|
|
29
|
-
df.rename(
|
|
25
|
+
df.rename(
|
|
26
|
+
columns={self.id_col: TimeSeriesDataFrame.ITEMID, self.time_col: TimeSeriesDataFrame.TIMESTAMP}
|
|
27
|
+
).set_index([TimeSeriesDataFrame.ITEMID, TimeSeriesDataFrame.TIMESTAMP])
|
|
30
28
|
)
|
|
31
29
|
|
|
32
30
|
def _tsdf_to_df(self, ts_df: TimeSeriesDataFrame) -> pd.DataFrame:
|
|
33
|
-
return
|
|
31
|
+
return (
|
|
32
|
+
pd.DataFrame(ts_df)
|
|
33
|
+
.reset_index()
|
|
34
|
+
.rename(columns={TimeSeriesDataFrame.ITEMID: self.id_col, TimeSeriesDataFrame.TIMESTAMP: self.time_col})
|
|
35
|
+
)
|
|
34
36
|
|
|
35
37
|
def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame: # type: ignore
|
|
36
38
|
self.ag_scaler = get_target_scaler(name=self.scaler_type, target=self.target_col)
|
|
@@ -45,7 +47,7 @@ class MLForecastScaler(BaseTargetTransform):
|
|
|
45
47
|
|
|
46
48
|
def apply_inverse_transform(
|
|
47
49
|
df: pd.DataFrame,
|
|
48
|
-
transform:
|
|
50
|
+
transform: _BaseGroupedArrayTargetTransform | BaseTargetTransform,
|
|
49
51
|
) -> pd.DataFrame:
|
|
50
52
|
"""Apply inverse transformation to a dataframe, converting to GroupedArray if necessary"""
|
|
51
53
|
if isinstance(transform, BaseTargetTransform):
|