autogluon.timeseries 1.4.1b20250830__py3-none-any.whl → 1.4.1b20251116__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/timeseries/dataset/ts_dataframe.py +66 -53
- autogluon/timeseries/learner.py +5 -4
- autogluon/timeseries/metrics/quantile.py +1 -1
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +2 -0
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +28 -36
- autogluon/timeseries/models/autogluon_tabular/per_step.py +14 -5
- autogluon/timeseries/models/autogluon_tabular/transforms.py +9 -7
- autogluon/timeseries/models/chronos/model.py +104 -68
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +64 -32
- autogluon/timeseries/models/ensemble/__init__.py +29 -2
- autogluon/timeseries/models/ensemble/abstract.py +1 -37
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +247 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +50 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +10 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +87 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +133 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +141 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +41 -0
- autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +0 -10
- autogluon/timeseries/models/gluonts/abstract.py +2 -2
- autogluon/timeseries/models/gluonts/dataset.py +2 -2
- autogluon/timeseries/models/local/abstract_local_model.py +2 -2
- autogluon/timeseries/models/multi_window/multi_window_model.py +1 -1
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +197 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +94 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +306 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +119 -0
- autogluon/timeseries/models/toto/model.py +236 -0
- autogluon/timeseries/predictor.py +10 -26
- autogluon/timeseries/regressor.py +9 -7
- autogluon/timeseries/splitter.py +1 -25
- autogluon/timeseries/trainer/ensemble_composer.py +250 -0
- autogluon/timeseries/trainer/trainer.py +124 -193
- autogluon/timeseries/trainer/utils.py +18 -0
- autogluon/timeseries/transforms/covariate_scaler.py +1 -1
- autogluon/timeseries/transforms/target_scaler.py +7 -7
- autogluon/timeseries/utils/features.py +9 -5
- autogluon/timeseries/utils/forecast.py +5 -5
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251116-py3.9-nspkg.pth +1 -0
- {autogluon.timeseries-1.4.1b20250830.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info}/METADATA +28 -13
- autogluon_timeseries-1.4.1b20251116.dist-info/RECORD +96 -0
- {autogluon.timeseries-1.4.1b20250830.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info}/WHEEL +1 -1
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -530
- autogluon.timeseries-1.4.1b20250830-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.4.1b20250830.dist-info/RECORD +0 -75
- /autogluon/timeseries/models/ensemble/{greedy.py → weighted/greedy.py} +0 -0
- {autogluon.timeseries-1.4.1b20250830.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20250830.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20250830.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20250830.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20250830.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Optional, Sequence, Union
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from typing_extensions import Self
|
|
8
|
+
|
|
9
|
+
from autogluon.common.loaders import load_pkl
|
|
10
|
+
from autogluon.timeseries import TimeSeriesDataFrame
|
|
11
|
+
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
12
|
+
from autogluon.timeseries.utils.features import CovariateMetadata
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from ._internal import TotoForecaster
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TotoModel(AbstractTimeSeriesModel):
|
|
21
|
+
"""Toto (Time-Series-Optimized Transformer for Observability) [CohenKhwajaetal2025]_ pretrained time series forecasting model.
|
|
22
|
+
|
|
23
|
+
Toto is a 151M parameter model trained on over 1T data points from DataDog's internal observability systems, as well as
|
|
24
|
+
the GIFT-eval pretrain, Chronos pretraining, and synthetically generated time series corpora. It is a decoder-only
|
|
25
|
+
architecture that autoregressively outputs parametric distribution forecasts. More details can be found on
|
|
26
|
+
`Hugging Face <https://huggingface.co/Datadog/Toto-Open-Base-1.0>`_ and `GitHub <https://github.com/DataDog/toto>`_.
|
|
27
|
+
|
|
28
|
+
The AutoGluon implementation of Toto is on a port of the original implementation. AutoGluon supports Toto for
|
|
29
|
+
**inference only**, i.e., the model will not be trained or fine-tuned on the provided training data. Toto is optimized
|
|
30
|
+
for easy maintenance with the rest of the AutoGluon model zoo, and does not feature some important optimizations such
|
|
31
|
+
as xformers and flash-attention available in the original model repository. The AutoGluon implementation of Toto
|
|
32
|
+
requires a CUDA-compatible GPU.
|
|
33
|
+
|
|
34
|
+
References
|
|
35
|
+
----------
|
|
36
|
+
.. [CohenKhwajaetal2025] Cohen, Ben, Khwaja, Emaad et al.
|
|
37
|
+
"This Time is Different: An Observability Perspective on Time Series Foundation Models."
|
|
38
|
+
https://arxiv.org/abs/2505.14766
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
Other Parameters
|
|
42
|
+
----------------
|
|
43
|
+
model_path : str, default = "Datadog/Toto-Open-Base-1.0"
|
|
44
|
+
Model path used for the model, i.e., a HuggingFace transformers ``name_or_path``. Can be a
|
|
45
|
+
compatible model name on HuggingFace Hub or a local path to a model directory.
|
|
46
|
+
batch_size : int, default = 24
|
|
47
|
+
Size of batches used during inference.
|
|
48
|
+
num_samples : int, default = 256
|
|
49
|
+
Number of samples used during inference.
|
|
50
|
+
device : str, default = "cuda"
|
|
51
|
+
Device to use for inference. Toto requires a CUDA-compatible GPU to run.
|
|
52
|
+
context_length : int or None, default = 4096
|
|
53
|
+
The context length to use in the model. Shorter context lengths will decrease model accuracy, but result
|
|
54
|
+
in faster inference.
|
|
55
|
+
compile_model : bool, default = True
|
|
56
|
+
Whether to compile the model using torch.compile() for faster inference. May increase initial loading time
|
|
57
|
+
but can provide speedups during inference.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
default_model_path: str = "Datadog/Toto-Open-Base-1.0"
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
path: Optional[str] = None,
|
|
65
|
+
name: Optional[str] = None,
|
|
66
|
+
hyperparameters: Optional[dict[str, Any]] = None,
|
|
67
|
+
freq: Optional[str] = None,
|
|
68
|
+
prediction_length: int = 1,
|
|
69
|
+
covariate_metadata: Optional[CovariateMetadata] = None,
|
|
70
|
+
target: str = "target",
|
|
71
|
+
quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
|
|
72
|
+
eval_metric: Any = None,
|
|
73
|
+
):
|
|
74
|
+
hyperparameters = hyperparameters if hyperparameters is not None else {}
|
|
75
|
+
|
|
76
|
+
self.model_path = hyperparameters.get("model_path", self.default_model_path)
|
|
77
|
+
|
|
78
|
+
super().__init__(
|
|
79
|
+
path=path,
|
|
80
|
+
name=name,
|
|
81
|
+
hyperparameters=hyperparameters,
|
|
82
|
+
freq=freq,
|
|
83
|
+
prediction_length=prediction_length,
|
|
84
|
+
covariate_metadata=covariate_metadata,
|
|
85
|
+
target=target,
|
|
86
|
+
quantile_levels=quantile_levels,
|
|
87
|
+
eval_metric=eval_metric,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
self._forecaster: Optional[TotoForecaster] = None
|
|
91
|
+
|
|
92
|
+
def save(self, path: Optional[str] = None, verbose: bool = True) -> str:
|
|
93
|
+
forecaster = self._forecaster
|
|
94
|
+
self._forecaster = None
|
|
95
|
+
path = super().save(path=path, verbose=verbose)
|
|
96
|
+
self._forecaster = forecaster
|
|
97
|
+
|
|
98
|
+
return str(path)
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def load(cls, path: str, reset_paths: bool = True, load_oof: bool = False, verbose: bool = True) -> Self:
|
|
102
|
+
model = load_pkl.load(path=os.path.join(path, cls.model_file_name), verbose=verbose)
|
|
103
|
+
if reset_paths:
|
|
104
|
+
model.set_contexts(path)
|
|
105
|
+
|
|
106
|
+
return model
|
|
107
|
+
|
|
108
|
+
def _is_gpu_available(self) -> bool:
|
|
109
|
+
import torch.cuda
|
|
110
|
+
|
|
111
|
+
return torch.cuda.is_available()
|
|
112
|
+
|
|
113
|
+
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, Union[int, float]]:
|
|
114
|
+
return {"num_cpus": 1, "num_gpus": 1}
|
|
115
|
+
|
|
116
|
+
def load_forecaster(self):
|
|
117
|
+
from ._internal import TotoForecaster
|
|
118
|
+
from .hf_pretrained_model import TotoConfig, TotoPretrainedModel
|
|
119
|
+
|
|
120
|
+
if not self._is_gpu_available():
|
|
121
|
+
raise RuntimeError(
|
|
122
|
+
f"{self.name} requires a GPU to run, but no GPU was detected. "
|
|
123
|
+
"Please make sure that you are using a computer with a CUDA-compatible GPU and "
|
|
124
|
+
"`import torch; torch.cuda.is_available()` returns `True`."
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
hyperparameters = self.get_hyperparameters()
|
|
128
|
+
pretrained_model = TotoPretrainedModel.from_pretrained(
|
|
129
|
+
self.model_path,
|
|
130
|
+
config=TotoConfig.from_pretrained(self.model_path),
|
|
131
|
+
device_map=hyperparameters["device"],
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
if hyperparameters["compile_model"]:
|
|
135
|
+
pretrained_model.model.compile()
|
|
136
|
+
|
|
137
|
+
self._forecaster = TotoForecaster(model=pretrained_model.model)
|
|
138
|
+
|
|
139
|
+
def persist(self) -> Self:
|
|
140
|
+
if self._forecaster is None:
|
|
141
|
+
self.load_forecaster()
|
|
142
|
+
return self
|
|
143
|
+
|
|
144
|
+
def _get_default_hyperparameters(self) -> dict:
|
|
145
|
+
return {
|
|
146
|
+
"batch_size": 24,
|
|
147
|
+
"num_samples": 256,
|
|
148
|
+
"device": "cuda",
|
|
149
|
+
"context_length": 4096,
|
|
150
|
+
"compile_model": True,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def allowed_hyperparameters(self) -> list[str]:
|
|
155
|
+
return super().allowed_hyperparameters + [
|
|
156
|
+
"model_path",
|
|
157
|
+
"batch_size",
|
|
158
|
+
"num_samples",
|
|
159
|
+
"device",
|
|
160
|
+
"context_length",
|
|
161
|
+
"compile_model",
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
def _more_tags(self) -> dict:
|
|
165
|
+
return {
|
|
166
|
+
"allow_nan": True,
|
|
167
|
+
"can_use_train_data": False,
|
|
168
|
+
"can_use_val_data": False,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
def _fit(
|
|
172
|
+
self,
|
|
173
|
+
train_data: TimeSeriesDataFrame,
|
|
174
|
+
val_data: Optional[TimeSeriesDataFrame] = None,
|
|
175
|
+
time_limit: Optional[float] = None,
|
|
176
|
+
num_cpus: Optional[int] = None,
|
|
177
|
+
num_gpus: Optional[int] = None,
|
|
178
|
+
verbosity: int = 2,
|
|
179
|
+
**kwargs,
|
|
180
|
+
) -> None:
|
|
181
|
+
self._check_fit_params()
|
|
182
|
+
self.load_forecaster()
|
|
183
|
+
|
|
184
|
+
def _predict(
|
|
185
|
+
self, data: TimeSeriesDataFrame, known_covariates: Optional[TimeSeriesDataFrame] = None, **kwargs
|
|
186
|
+
) -> TimeSeriesDataFrame:
|
|
187
|
+
import torch
|
|
188
|
+
|
|
189
|
+
from .dataloader import TotoDataLoader, TotoInferenceDataset
|
|
190
|
+
|
|
191
|
+
hyperparameters = self.get_hyperparameters()
|
|
192
|
+
|
|
193
|
+
if self._forecaster is None:
|
|
194
|
+
self.load_forecaster()
|
|
195
|
+
assert self._forecaster, "Toto model failed to load"
|
|
196
|
+
device = self._forecaster.model.device
|
|
197
|
+
|
|
198
|
+
dataset = TotoInferenceDataset(
|
|
199
|
+
target_df=data.fill_missing_values("auto"),
|
|
200
|
+
max_context_length=hyperparameters["context_length"],
|
|
201
|
+
)
|
|
202
|
+
loader = TotoDataLoader(
|
|
203
|
+
dataset,
|
|
204
|
+
freq=self.freq,
|
|
205
|
+
batch_size=hyperparameters["batch_size"],
|
|
206
|
+
time_limit=kwargs.get("time_limit"),
|
|
207
|
+
device=device,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
batch_means, batch_quantiles = [], []
|
|
211
|
+
with torch.inference_mode():
|
|
212
|
+
for masked_timeseries in loader:
|
|
213
|
+
forecast = self._forecaster.forecast(
|
|
214
|
+
masked_timeseries,
|
|
215
|
+
prediction_length=self.prediction_length,
|
|
216
|
+
num_samples=hyperparameters["num_samples"],
|
|
217
|
+
samples_per_batch=32,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
batch_means.append(forecast.mean.cpu().numpy())
|
|
221
|
+
qs = np.array([forecast.quantile(q).cpu().numpy() for q in self.quantile_levels])
|
|
222
|
+
batch_quantiles.append(qs.squeeze(2).transpose(1, 2, 0))
|
|
223
|
+
|
|
224
|
+
df = pd.DataFrame(
|
|
225
|
+
np.concatenate(
|
|
226
|
+
[
|
|
227
|
+
np.concatenate(batch_means, axis=0).reshape(-1, 1),
|
|
228
|
+
np.concatenate(batch_quantiles, axis=0).reshape(-1, len(self.quantile_levels)),
|
|
229
|
+
],
|
|
230
|
+
axis=1,
|
|
231
|
+
),
|
|
232
|
+
columns=["mean"] + [str(q) for q in self.quantile_levels],
|
|
233
|
+
index=self.get_forecast_horizon_index(data),
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
return TimeSeriesDataFrame(df)
|
|
@@ -22,10 +22,9 @@ from autogluon.core.utils.loaders import load_pkl, load_str
|
|
|
22
22
|
from autogluon.core.utils.savers import save_pkl, save_str
|
|
23
23
|
from autogluon.timeseries import __version__ as current_ag_version
|
|
24
24
|
from autogluon.timeseries.configs import get_predictor_presets
|
|
25
|
-
from autogluon.timeseries.dataset
|
|
25
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
26
26
|
from autogluon.timeseries.learner import TimeSeriesLearner
|
|
27
27
|
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
|
28
|
-
from autogluon.timeseries.splitter import ExpandingWindowSplitter
|
|
29
28
|
from autogluon.timeseries.trainer import TimeSeriesTrainer
|
|
30
29
|
from autogluon.timeseries.utils.forecast import make_future_data_frame
|
|
31
30
|
|
|
@@ -221,20 +220,6 @@ class TimeSeriesPredictor:
|
|
|
221
220
|
ensemble_model_type=kwargs.pop("ensemble_model_type", None),
|
|
222
221
|
)
|
|
223
222
|
|
|
224
|
-
if "ignore_time_index" in kwargs:
|
|
225
|
-
raise TypeError(
|
|
226
|
-
"`ignore_time_index` argument to TimeSeriesPredictor.__init__() has been deprecated.\n"
|
|
227
|
-
"If your data has irregular timestamps, please either 1) specify the desired regular frequency when "
|
|
228
|
-
"creating the predictor as `TimeSeriesPredictor(freq=...)` or 2) manually convert timestamps to "
|
|
229
|
-
"regular frequency with `data.convert_frequency(freq=...)`."
|
|
230
|
-
)
|
|
231
|
-
for k in ["learner_type", "learner_kwargs"]:
|
|
232
|
-
if k in kwargs:
|
|
233
|
-
val = kwargs.pop(k)
|
|
234
|
-
logger.warning(
|
|
235
|
-
f"Passing `{k}` to TimeSeriesPredictor has been deprecated and will be removed in v1.4. "
|
|
236
|
-
f"The provided value {val} will be ignored."
|
|
237
|
-
)
|
|
238
223
|
if len(kwargs) > 0:
|
|
239
224
|
for key in kwargs:
|
|
240
225
|
raise TypeError(f"TimeSeriesPredictor.__init__() got an unexpected keyword argument '{key}'")
|
|
@@ -417,7 +402,9 @@ class TimeSeriesPredictor:
|
|
|
417
402
|
)
|
|
418
403
|
train_data = train_data.query("item_id not in @too_short_items")
|
|
419
404
|
|
|
420
|
-
all_nan_items = train_data.item_ids[
|
|
405
|
+
all_nan_items = train_data.item_ids[
|
|
406
|
+
train_data[self.target].isna().groupby(TimeSeriesDataFrame.ITEMID, sort=False).all()
|
|
407
|
+
]
|
|
421
408
|
if len(all_nan_items) > 0:
|
|
422
409
|
logger.info(f"\tRemoving {len(all_nan_items)} time series consisting of only NaN values from train_data.")
|
|
423
410
|
train_data = train_data.query("item_id not in @all_nan_items")
|
|
@@ -751,10 +738,6 @@ class TimeSeriesPredictor:
|
|
|
751
738
|
train_data, num_val_windows=num_val_windows, val_step_size=val_step_size
|
|
752
739
|
)
|
|
753
740
|
|
|
754
|
-
val_splitter = ExpandingWindowSplitter(
|
|
755
|
-
prediction_length=self.prediction_length, num_val_windows=num_val_windows, val_step_size=val_step_size
|
|
756
|
-
)
|
|
757
|
-
|
|
758
741
|
time_left = None if time_limit is None else time_limit - (time.time() - time_start)
|
|
759
742
|
self._learner.fit(
|
|
760
743
|
train_data=train_data,
|
|
@@ -764,7 +747,8 @@ class TimeSeriesPredictor:
|
|
|
764
747
|
excluded_model_types=excluded_model_types,
|
|
765
748
|
time_limit=time_left,
|
|
766
749
|
verbosity=verbosity,
|
|
767
|
-
|
|
750
|
+
num_val_windows=num_val_windows,
|
|
751
|
+
val_step_size=val_step_size,
|
|
768
752
|
refit_every_n_windows=refit_every_n_windows,
|
|
769
753
|
skip_model_selection=skip_model_selection,
|
|
770
754
|
enable_ensemble=enable_ensemble,
|
|
@@ -866,7 +850,7 @@ class TimeSeriesPredictor:
|
|
|
866
850
|
use_cache=use_cache,
|
|
867
851
|
random_seed=random_seed,
|
|
868
852
|
)
|
|
869
|
-
return cast(TimeSeriesDataFrame, predictions.reindex(original_item_id_order, level=ITEMID))
|
|
853
|
+
return cast(TimeSeriesDataFrame, predictions.reindex(original_item_id_order, level=TimeSeriesDataFrame.ITEMID))
|
|
870
854
|
|
|
871
855
|
def evaluate(
|
|
872
856
|
self,
|
|
@@ -1498,7 +1482,7 @@ class TimeSeriesPredictor:
|
|
|
1498
1482
|
)
|
|
1499
1483
|
|
|
1500
1484
|
y_val: list[TimeSeriesDataFrame] = [
|
|
1501
|
-
select_target(df) for df in trainer.
|
|
1485
|
+
select_target(df) for df in trainer._get_validation_windows(train_data=train_data, val_data=val_data)
|
|
1502
1486
|
]
|
|
1503
1487
|
y_test: TimeSeriesDataFrame = select_target(test_data)
|
|
1504
1488
|
|
|
@@ -1621,7 +1605,7 @@ class TimeSeriesPredictor:
|
|
|
1621
1605
|
for q in quantile_levels:
|
|
1622
1606
|
ax.fill_between(forecast.index, point_forecast, forecast[str(q)], color="C1", alpha=0.2)
|
|
1623
1607
|
if len(axes) > len(item_ids):
|
|
1624
|
-
axes[len(item_ids)].set_axis_off()
|
|
1625
|
-
handles, labels = axes[0].get_legend_handles_labels()
|
|
1608
|
+
axes[len(item_ids)].set_axis_off() # type: ignore
|
|
1609
|
+
handles, labels = axes[0].get_legend_handles_labels() # type: ignore
|
|
1626
1610
|
fig.legend(handles, labels, bbox_to_anchor=(0.5, 0.0), ncols=len(handles))
|
|
1627
1611
|
return fig
|
|
@@ -7,7 +7,7 @@ import pandas as pd
|
|
|
7
7
|
|
|
8
8
|
from autogluon.core.models import AbstractModel
|
|
9
9
|
from autogluon.tabular.registry import ag_model_registry as tabular_ag_model_registry
|
|
10
|
-
from autogluon.timeseries.dataset
|
|
10
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
11
11
|
from autogluon.timeseries.utils.features import CovariateMetadata
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
@@ -119,9 +119,9 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
|
119
119
|
median_ts_length = data.num_timesteps_per_item().median()
|
|
120
120
|
features_to_drop = [self.target]
|
|
121
121
|
if not self.include_item_id:
|
|
122
|
-
features_to_drop += [ITEMID]
|
|
122
|
+
features_to_drop += [TimeSeriesDataFrame.ITEMID]
|
|
123
123
|
if self.validation_fraction is not None:
|
|
124
|
-
grouped_df = tabular_df.groupby(ITEMID, observed=False, sort=False)
|
|
124
|
+
grouped_df = tabular_df.groupby(TimeSeriesDataFrame.ITEMID, observed=False, sort=False)
|
|
125
125
|
val_size = max(int(self.validation_fraction * median_ts_length), 1)
|
|
126
126
|
train_df = self._subsample_df(grouped_df.head(-val_size))
|
|
127
127
|
val_df = self._subsample_df(grouped_df.tail(val_size))
|
|
@@ -201,7 +201,7 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
|
201
201
|
assert self.model is not None, "CovariateRegressor must be fit before calling predict."
|
|
202
202
|
tabular_df = self._get_tabular_df(data, static_features=static_features)
|
|
203
203
|
if not self.include_item_id:
|
|
204
|
-
tabular_df = tabular_df.drop(columns=[ITEMID])
|
|
204
|
+
tabular_df = tabular_df.drop(columns=[TimeSeriesDataFrame.ITEMID])
|
|
205
205
|
return self.model.predict(X=tabular_df)
|
|
206
206
|
|
|
207
207
|
def _get_tabular_df(
|
|
@@ -211,12 +211,14 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
|
211
211
|
include_target: bool = False,
|
|
212
212
|
) -> pd.DataFrame:
|
|
213
213
|
"""Construct a tabular dataframe from known covariates and static features."""
|
|
214
|
-
available_columns = [ITEMID] + self.covariate_metadata.known_covariates
|
|
214
|
+
available_columns = [TimeSeriesDataFrame.ITEMID] + self.covariate_metadata.known_covariates
|
|
215
215
|
if include_target:
|
|
216
216
|
available_columns += [self.target]
|
|
217
|
-
tabular_df =
|
|
217
|
+
tabular_df = (
|
|
218
|
+
pd.DataFrame(data).reset_index()[available_columns].astype({TimeSeriesDataFrame.ITEMID: "category"})
|
|
219
|
+
)
|
|
218
220
|
if static_features is not None and self.include_static_features:
|
|
219
|
-
tabular_df = pd.merge(tabular_df, static_features, on=ITEMID)
|
|
221
|
+
tabular_df = pd.merge(tabular_df, static_features, on=TimeSeriesDataFrame.ITEMID)
|
|
220
222
|
return tabular_df
|
|
221
223
|
|
|
222
224
|
def _subsample_df(self, df: pd.DataFrame) -> pd.DataFrame:
|
autogluon/timeseries/splitter.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Iterator, Optional
|
|
2
2
|
|
|
3
|
-
from .dataset
|
|
3
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
4
4
|
|
|
5
5
|
__all__ = [
|
|
6
6
|
"AbstractWindowSplitter",
|
|
@@ -57,27 +57,3 @@ class ExpandingWindowSplitter(AbstractWindowSplitter):
|
|
|
57
57
|
train_data = data.slice_by_timestep(None, train_end)
|
|
58
58
|
val_data = data.slice_by_timestep(None, val_end)
|
|
59
59
|
yield train_data, val_data
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
class AbstractTimeSeriesSplitter:
|
|
63
|
-
def __init__(self, *args, **kwargs):
|
|
64
|
-
raise ValueError(
|
|
65
|
-
"`AbstractTimeSeriesSplitter` has been deprecated. "
|
|
66
|
-
"Please use `autogluon.timeseries.splitter.ExpandingWindowSplitter` instead."
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
class MultiWindowSplitter(AbstractTimeSeriesSplitter):
|
|
71
|
-
def __init__(self, *args, **kwargs):
|
|
72
|
-
raise ValueError(
|
|
73
|
-
"`MultiWindowSplitter` has been deprecated. "
|
|
74
|
-
"Please use `autogluon.timeseries.splitter.ExpandingWindowSplitter` instead."
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
class LastWindowSplitter(MultiWindowSplitter):
|
|
79
|
-
def __init__(self, *args, **kwargs):
|
|
80
|
-
raise ValueError(
|
|
81
|
-
"`LastWindowSplitter` has been deprecated. "
|
|
82
|
-
"Please use `autogluon.timeseries.splitter.ExpandingWindowSplitter` instead."
|
|
83
|
-
)
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
import traceback
|
|
5
|
+
from typing import Iterator, Optional
|
|
6
|
+
|
|
7
|
+
import networkx as nx
|
|
8
|
+
import numpy as np
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
|
|
11
|
+
from autogluon.timeseries import TimeSeriesDataFrame
|
|
12
|
+
from autogluon.timeseries.metrics import TimeSeriesScorer
|
|
13
|
+
from autogluon.timeseries.models.ensemble import AbstractTimeSeriesEnsembleModel, get_ensemble_class
|
|
14
|
+
from autogluon.timeseries.splitter import AbstractWindowSplitter
|
|
15
|
+
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
16
|
+
|
|
17
|
+
from .utils import log_scores_and_times
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger("autogluon.timeseries.trainer")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EnsembleComposer:
|
|
23
|
+
"""Helper class for TimeSeriesTrainer to build multi-layer stack ensembles."""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
path,
|
|
28
|
+
prediction_length: int,
|
|
29
|
+
eval_metric: TimeSeriesScorer,
|
|
30
|
+
target: str,
|
|
31
|
+
quantile_levels: list[float],
|
|
32
|
+
model_graph: nx.DiGraph,
|
|
33
|
+
ensemble_hyperparameters: dict,
|
|
34
|
+
window_splitter: AbstractWindowSplitter,
|
|
35
|
+
):
|
|
36
|
+
self.eval_metric = eval_metric
|
|
37
|
+
self.path = path
|
|
38
|
+
self.prediction_length = prediction_length
|
|
39
|
+
self.target = target
|
|
40
|
+
self.quantile_levels = quantile_levels
|
|
41
|
+
|
|
42
|
+
self.ensemble_hyperparameters = ensemble_hyperparameters
|
|
43
|
+
|
|
44
|
+
self.window_splitter = window_splitter
|
|
45
|
+
|
|
46
|
+
self.banned_model_names = list(model_graph.nodes)
|
|
47
|
+
self.model_graph = self._get_base_model_graph(source_graph=model_graph)
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _get_base_model_graph(source_graph: nx.DiGraph) -> nx.DiGraph:
|
|
51
|
+
"""Return a model graph by copying only base models (nodes without predecessors)
|
|
52
|
+
This ensures we start fresh for ensemble building.
|
|
53
|
+
"""
|
|
54
|
+
rootset = EnsembleComposer._get_rootset(source_graph)
|
|
55
|
+
|
|
56
|
+
dst_graph = nx.DiGraph()
|
|
57
|
+
for node in rootset:
|
|
58
|
+
dst_graph.add_node(node, **source_graph.nodes[node])
|
|
59
|
+
|
|
60
|
+
return dst_graph
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def _get_rootset(graph: nx.DiGraph) -> list[str]:
|
|
64
|
+
return [n for n in graph.nodes if not list(graph.predecessors(n))]
|
|
65
|
+
|
|
66
|
+
def iter_ensembles(self) -> Iterator[tuple[int, AbstractTimeSeriesEnsembleModel, list[str]]]:
|
|
67
|
+
"""Iterate over trained ensemble models, layer by layer.
|
|
68
|
+
|
|
69
|
+
Yields
|
|
70
|
+
------
|
|
71
|
+
layer_ix
|
|
72
|
+
The layer index of the ensemble.
|
|
73
|
+
model
|
|
74
|
+
The ensemble model object
|
|
75
|
+
base_model_names
|
|
76
|
+
The names of the base models that are part of the ensemble.
|
|
77
|
+
"""
|
|
78
|
+
rootset = self._get_rootset(self.model_graph)
|
|
79
|
+
|
|
80
|
+
for layer_ix, layer in enumerate(nx.traversal.bfs_layers(self.model_graph, rootset)):
|
|
81
|
+
if layer_ix == 0: # we don't need base models
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
for model_name in layer:
|
|
85
|
+
attrs = self.model_graph.nodes[model_name]
|
|
86
|
+
model_path = os.path.join(self.path, *attrs["path"])
|
|
87
|
+
model = attrs["type"].load(path=model_path)
|
|
88
|
+
|
|
89
|
+
yield (
|
|
90
|
+
layer_ix,
|
|
91
|
+
model,
|
|
92
|
+
list(self.model_graph.predecessors(model_name)),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def fit(
|
|
96
|
+
self,
|
|
97
|
+
train_data: TimeSeriesDataFrame,
|
|
98
|
+
val_data: Optional[TimeSeriesDataFrame] = None,
|
|
99
|
+
time_limit: Optional[float] = None,
|
|
100
|
+
) -> Self:
|
|
101
|
+
base_model_scores = {k: self.model_graph.nodes[k]["val_score"] for k in self.model_graph.nodes}
|
|
102
|
+
model_names = list(base_model_scores.keys())
|
|
103
|
+
|
|
104
|
+
if not self._can_fit_ensemble(time_limit, len(model_names)):
|
|
105
|
+
return self
|
|
106
|
+
|
|
107
|
+
logger.info(f"Fitting {len(self.ensemble_hyperparameters)} ensemble(s).")
|
|
108
|
+
|
|
109
|
+
# get target and base model prediction data for ensemble training
|
|
110
|
+
data_per_window = self._get_validation_windows(train_data=train_data, val_data=val_data)
|
|
111
|
+
predictions_per_window = self._get_base_model_predictions(model_names)
|
|
112
|
+
|
|
113
|
+
for ensemble_name, ensemble_hp_dict in self.ensemble_hyperparameters.items():
|
|
114
|
+
try:
|
|
115
|
+
time_start = time.monotonic()
|
|
116
|
+
ensemble_class = get_ensemble_class(ensemble_name)
|
|
117
|
+
ensemble = ensemble_class(
|
|
118
|
+
eval_metric=self.eval_metric,
|
|
119
|
+
target=self.target,
|
|
120
|
+
prediction_length=self.prediction_length,
|
|
121
|
+
path=self.path,
|
|
122
|
+
freq=data_per_window[0].freq,
|
|
123
|
+
quantile_levels=self.quantile_levels,
|
|
124
|
+
hyperparameters=ensemble_hp_dict,
|
|
125
|
+
)
|
|
126
|
+
# update name to prevent name collisions
|
|
127
|
+
ensemble.name = self._get_ensemble_model_name(ensemble.name)
|
|
128
|
+
|
|
129
|
+
with warning_filter():
|
|
130
|
+
ensemble.fit(
|
|
131
|
+
predictions_per_window=predictions_per_window,
|
|
132
|
+
data_per_window=data_per_window,
|
|
133
|
+
model_scores=base_model_scores,
|
|
134
|
+
time_limit=time_limit,
|
|
135
|
+
)
|
|
136
|
+
ensemble.fit_time = time.monotonic() - time_start
|
|
137
|
+
|
|
138
|
+
score_per_fold = []
|
|
139
|
+
for window_idx, data in enumerate(data_per_window):
|
|
140
|
+
predictions = ensemble.predict(
|
|
141
|
+
{n: predictions_per_window[n][window_idx] for n in ensemble.model_names}
|
|
142
|
+
)
|
|
143
|
+
score_per_fold.append(self.eval_metric.score(data, predictions, self.target))
|
|
144
|
+
ensemble.val_score = float(np.mean(score_per_fold, dtype=np.float64))
|
|
145
|
+
|
|
146
|
+
# TODO: add ensemble's own time to predict_time
|
|
147
|
+
ensemble.predict_time = self._calculate_base_models_predict_time(ensemble.model_names)
|
|
148
|
+
|
|
149
|
+
log_scores_and_times(
|
|
150
|
+
ensemble.val_score,
|
|
151
|
+
ensemble.fit_time,
|
|
152
|
+
ensemble.predict_time,
|
|
153
|
+
eval_metric_name=self.eval_metric.name_with_sign,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
self._add_model(ensemble, base_models=ensemble.model_names)
|
|
157
|
+
|
|
158
|
+
# Save the ensemble model to disk
|
|
159
|
+
ensemble.save()
|
|
160
|
+
except Exception as err: # noqa
|
|
161
|
+
logger.error(
|
|
162
|
+
f"\tWarning: Exception caused {ensemble_name} to fail during training... Skipping this model."
|
|
163
|
+
)
|
|
164
|
+
logger.error(f"\t{err}")
|
|
165
|
+
logger.debug(traceback.format_exc())
|
|
166
|
+
|
|
167
|
+
return self
|
|
168
|
+
|
|
169
|
+
def _add_model(self, model, base_models: list[str]):
|
|
170
|
+
self.model_graph.add_node(
|
|
171
|
+
model.name,
|
|
172
|
+
path=os.path.relpath(model.path, self.path).split(os.sep),
|
|
173
|
+
type=type(model),
|
|
174
|
+
fit_time=model.fit_time,
|
|
175
|
+
predict_time=model.predict_time,
|
|
176
|
+
val_score=model.val_score,
|
|
177
|
+
)
|
|
178
|
+
for base_model in base_models:
|
|
179
|
+
self.model_graph.add_edge(base_model, model.name)
|
|
180
|
+
|
|
181
|
+
def _can_fit_ensemble(
|
|
182
|
+
self,
|
|
183
|
+
time_limit: Optional[float],
|
|
184
|
+
num_models_available_for_ensemble: int,
|
|
185
|
+
) -> bool:
|
|
186
|
+
if time_limit is not None and time_limit <= 0:
|
|
187
|
+
logger.info(f"Not fitting ensemble due to lack of time remaining. Time left: {time_limit:.1f} seconds")
|
|
188
|
+
return False
|
|
189
|
+
|
|
190
|
+
if num_models_available_for_ensemble <= 1:
|
|
191
|
+
logger.info(
|
|
192
|
+
"Not fitting ensemble as "
|
|
193
|
+
+ (
|
|
194
|
+
"no models were successfully trained."
|
|
195
|
+
if not num_models_available_for_ensemble
|
|
196
|
+
else "only 1 model was trained."
|
|
197
|
+
)
|
|
198
|
+
)
|
|
199
|
+
return False
|
|
200
|
+
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
def _get_validation_windows(
|
|
204
|
+
self, train_data: TimeSeriesDataFrame, val_data: Optional[TimeSeriesDataFrame]
|
|
205
|
+
) -> list[TimeSeriesDataFrame]:
|
|
206
|
+
# TODO: update for window/stack-layer logic and refit logic
|
|
207
|
+
if val_data is None:
|
|
208
|
+
return [val_fold for _, val_fold in self.window_splitter.split(train_data)]
|
|
209
|
+
else:
|
|
210
|
+
return [val_data]
|
|
211
|
+
|
|
212
|
+
def _get_ensemble_model_name(self, name: str) -> str:
|
|
213
|
+
"""Revise name for an ensemble model, ensuring we don't have name collisions"""
|
|
214
|
+
base_name = name
|
|
215
|
+
increment = 1
|
|
216
|
+
while name in self.banned_model_names:
|
|
217
|
+
increment += 1
|
|
218
|
+
name = f"{base_name}_{increment}"
|
|
219
|
+
return name
|
|
220
|
+
|
|
221
|
+
def _get_base_model_predictions(self, model_names: list[str]) -> dict[str, list[TimeSeriesDataFrame]]:
|
|
222
|
+
"""Get base model predictions for ensemble training / inference."""
|
|
223
|
+
# TODO: update for window/stack-layer logic and refit logic
|
|
224
|
+
predictions_per_window = {}
|
|
225
|
+
|
|
226
|
+
for model_name in model_names:
|
|
227
|
+
model_attrs = self.model_graph.nodes[model_name]
|
|
228
|
+
|
|
229
|
+
model_path = os.path.join(self.path, *model_attrs["path"])
|
|
230
|
+
model_type = model_attrs["type"]
|
|
231
|
+
|
|
232
|
+
predictions_per_window[model_name] = model_type.load_oof_predictions(path=model_path)
|
|
233
|
+
|
|
234
|
+
return predictions_per_window
|
|
235
|
+
|
|
236
|
+
def _calculate_base_models_predict_time(self, model_names: list[str]) -> float:
|
|
237
|
+
"""Calculate ensemble predict time as sum of base model predict times."""
|
|
238
|
+
return sum(self.model_graph.nodes[name]["predict_time"] for name in model_names)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def validate_ensemble_hyperparameters(hyperparameters) -> dict:
|
|
242
|
+
"""Validate ensemble hyperparameters dict."""
|
|
243
|
+
if not isinstance(hyperparameters, dict):
|
|
244
|
+
raise ValueError(f"ensemble_hyperparameters must be dict, got {type(hyperparameters)}")
|
|
245
|
+
|
|
246
|
+
# Validate all ensemble names are known
|
|
247
|
+
for ensemble_name, ensemble_hyperparameters in hyperparameters.items():
|
|
248
|
+
get_ensemble_class(ensemble_name) # Will raise if unknown
|
|
249
|
+
assert isinstance(ensemble_hyperparameters, dict)
|
|
250
|
+
return hyperparameters
|