autogluon.timeseries 1.2.1b20250224__py3-none-any.whl → 1.4.1b20251215__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/__init__.py +3 -2
- autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
- autogluon/timeseries/configs/predictor_presets.py +106 -0
- autogluon/timeseries/dataset/ts_dataframe.py +256 -141
- autogluon/timeseries/learner.py +86 -52
- autogluon/timeseries/metrics/__init__.py +42 -8
- autogluon/timeseries/metrics/abstract.py +89 -19
- autogluon/timeseries/metrics/point.py +142 -53
- autogluon/timeseries/metrics/quantile.py +46 -21
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +8 -2
- autogluon/timeseries/models/abstract/__init__.py +2 -2
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +361 -592
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +189 -0
- autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +282 -194
- autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
- autogluon/timeseries/models/autogluon_tabular/transforms.py +25 -18
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +361 -0
- autogluon/timeseries/models/chronos/model.py +219 -138
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +81 -50
- autogluon/timeseries/models/ensemble/__init__.py +37 -2
- autogluon/timeseries/models/ensemble/abstract.py +107 -0
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
- autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
- autogluon/timeseries/models/ensemble/weighted/basic.py +91 -0
- autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
- autogluon/timeseries/models/gluonts/__init__.py +1 -1
- autogluon/timeseries/models/gluonts/{abstract_gluonts.py → abstract.py} +148 -208
- autogluon/timeseries/models/gluonts/dataset.py +109 -0
- autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +38 -22
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +71 -74
- autogluon/timeseries/models/local/naive.py +13 -9
- autogluon/timeseries/models/local/npts.py +9 -2
- autogluon/timeseries/models/local/statsforecast.py +52 -36
- autogluon/timeseries/models/multi_window/multi_window_model.py +65 -45
- autogluon/timeseries/models/registry.py +64 -0
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
- autogluon/timeseries/models/toto/model.py +249 -0
- autogluon/timeseries/predictor.py +685 -297
- autogluon/timeseries/regressor.py +94 -44
- autogluon/timeseries/splitter.py +8 -32
- autogluon/timeseries/trainer/__init__.py +3 -0
- autogluon/timeseries/trainer/ensemble_composer.py +444 -0
- autogluon/timeseries/trainer/model_set_builder.py +256 -0
- autogluon/timeseries/trainer/prediction_cache.py +149 -0
- autogluon/timeseries/{trainer.py → trainer/trainer.py} +387 -390
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/__init__.py +2 -13
- autogluon/timeseries/transforms/covariate_scaler.py +34 -40
- autogluon/timeseries/transforms/target_scaler.py +37 -20
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +3 -5
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/datetime/time_features.py +2 -2
- autogluon/timeseries/utils/features.py +70 -47
- autogluon/timeseries/utils/forecast.py +19 -14
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/utils/warning_filters.py +4 -2
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251215-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/METADATA +49 -36
- autogluon_timeseries-1.4.1b20251215.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/WHEEL +1 -1
- autogluon/timeseries/configs/presets_configs.py +0 -79
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -11
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -585
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -518
- autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -78
- autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
- autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- autogluon/timeseries/models/presets.py +0 -360
- autogluon.timeseries-1.2.1b20250224-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.2.1b20250224.dist-info/RECORD +0 -68
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/zip-safe +0 -0
|
@@ -1,26 +1,23 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import os
|
|
3
|
-
import re
|
|
4
2
|
import time
|
|
5
3
|
from itertools import chain, cycle
|
|
6
|
-
from
|
|
7
|
-
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, List, Literal, Optional
|
|
4
|
+
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Literal
|
|
8
5
|
|
|
9
6
|
import numpy as np
|
|
10
7
|
import torch
|
|
8
|
+
from chronos.chronos_bolt import ChronosBoltModelForForecasting, ResidualBlock
|
|
11
9
|
from gluonts.dataset.field_names import FieldName
|
|
12
10
|
from gluonts.transform import ExpectedNumInstanceSampler, InstanceSplitter, ValidationSplitSampler
|
|
13
11
|
from torch.utils.data import IterableDataset
|
|
14
12
|
from transformers import TrainerCallback
|
|
15
13
|
|
|
16
|
-
from autogluon.common.loaders.load_s3 import download, list_bucket_prefix_suffix_contains_s3
|
|
17
14
|
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
|
18
|
-
from autogluon.timeseries.dataset
|
|
19
|
-
from autogluon.timeseries.models.gluonts.
|
|
15
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
16
|
+
from autogluon.timeseries.models.gluonts.dataset import SimpleGluonTSDataset
|
|
20
17
|
|
|
21
18
|
if TYPE_CHECKING:
|
|
22
19
|
# TODO: fix the underlying reason for this circular import, the pipeline should handle tokenization
|
|
23
|
-
from
|
|
20
|
+
from chronos import ChronosTokenizer
|
|
24
21
|
|
|
25
22
|
|
|
26
23
|
logger = logging.getLogger("autogluon.timeseries.models.chronos")
|
|
@@ -73,19 +70,19 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
73
70
|
|
|
74
71
|
Parameters
|
|
75
72
|
----------
|
|
76
|
-
target_df
|
|
73
|
+
target_df
|
|
77
74
|
The ``TimeSeriesDataFrame`` to be converted
|
|
78
|
-
target_column
|
|
75
|
+
target_column
|
|
79
76
|
The name of the column which contains the target time series, by default "target"
|
|
80
|
-
context_length
|
|
77
|
+
context_length
|
|
81
78
|
The length of the historical context
|
|
82
|
-
prediction_length
|
|
79
|
+
prediction_length
|
|
83
80
|
The prediction_length, i.e., length of label or target
|
|
84
|
-
tokenizer
|
|
81
|
+
tokenizer
|
|
85
82
|
When a ``ChronosTokenizer`` object is provided, data will be converted into the
|
|
86
83
|
HuggingFace format accepted by the original Chronos models using this ``ChronosTokenizer``.
|
|
87
84
|
If None, data will be converted into the format accepted by ChronosBolt models.
|
|
88
|
-
mode
|
|
85
|
+
mode
|
|
89
86
|
When ``training``, random slices from the time series will be returned for training purposes.
|
|
90
87
|
If ``validation``, the last slice of each time series returned in the original order.
|
|
91
88
|
"""
|
|
@@ -96,7 +93,7 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
96
93
|
target_column: str = "target",
|
|
97
94
|
context_length: int = 512,
|
|
98
95
|
prediction_length: int = 64,
|
|
99
|
-
tokenizer:
|
|
96
|
+
tokenizer: "ChronosTokenizer | None" = None,
|
|
100
97
|
mode: Literal["training", "validation"] = "training",
|
|
101
98
|
) -> None:
|
|
102
99
|
super().__init__()
|
|
@@ -132,11 +129,11 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
132
129
|
def _create_training_data(self, data: Iterable[dict]):
|
|
133
130
|
data = chain.from_iterable(cycle([data]))
|
|
134
131
|
split_transform = self._create_instance_splitter("training")
|
|
135
|
-
data = split_transform.apply(data, is_train=True)
|
|
132
|
+
data = split_transform.apply(data, is_train=True) # type: ignore
|
|
136
133
|
return data
|
|
137
134
|
|
|
138
135
|
def _create_validation_data(self, data: Iterable[dict]):
|
|
139
|
-
data = self._create_instance_splitter("validation").apply(data, is_train=False)
|
|
136
|
+
data = self._create_instance_splitter("validation").apply(data, is_train=False) # type: ignore
|
|
140
137
|
return data
|
|
141
138
|
|
|
142
139
|
def to_chronos_format(self, entry: dict) -> dict:
|
|
@@ -145,7 +142,7 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
145
142
|
|
|
146
143
|
Parameters
|
|
147
144
|
----------
|
|
148
|
-
entry
|
|
145
|
+
entry
|
|
149
146
|
time series data entry in GluonTS format with ``past_target`` and ``future_target`` keys
|
|
150
147
|
|
|
151
148
|
Returns
|
|
@@ -172,7 +169,7 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
172
169
|
|
|
173
170
|
Parameters
|
|
174
171
|
----------
|
|
175
|
-
entry
|
|
172
|
+
entry
|
|
176
173
|
time series data entry in GluonTS format with ``past_target`` and ``future_target`` keys
|
|
177
174
|
|
|
178
175
|
Returns
|
|
@@ -190,17 +187,19 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
190
187
|
iterable = self._create_training_data(self.gluonts_dataset)
|
|
191
188
|
elif self.mode == "validation":
|
|
192
189
|
iterable = self._create_validation_data(self.gluonts_dataset)
|
|
190
|
+
else:
|
|
191
|
+
raise ValueError(f"Unknown mode {self.mode}")
|
|
193
192
|
|
|
194
193
|
format_transform_fn = self.to_chronos_format if self.tokenizer is not None else self.to_chronos_bolt_format
|
|
195
194
|
for entry in iterable:
|
|
196
195
|
yield format_transform_fn(entry)
|
|
197
196
|
|
|
198
|
-
def shuffle(self, shuffle_buffer_size:
|
|
197
|
+
def shuffle(self, shuffle_buffer_size: int | None = None):
|
|
199
198
|
"""Returns a (pseudo) shuffled version of this iterable dataset.
|
|
200
199
|
|
|
201
200
|
Parameters
|
|
202
201
|
----------
|
|
203
|
-
shuffle_buffer_size
|
|
202
|
+
shuffle_buffer_size
|
|
204
203
|
The shuffle buffer size used for pseudo shuffling
|
|
205
204
|
"""
|
|
206
205
|
assert shuffle_buffer_size is None or shuffle_buffer_size >= 0
|
|
@@ -209,7 +208,7 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
209
208
|
return PseudoShuffledIterableDataset(self, shuffle_buffer_size)
|
|
210
209
|
|
|
211
210
|
|
|
212
|
-
def left_pad_and_stack_1D(tensors:
|
|
211
|
+
def left_pad_and_stack_1D(tensors: list[torch.Tensor]) -> torch.Tensor:
|
|
213
212
|
max_len = max(len(c) for c in tensors)
|
|
214
213
|
padded = []
|
|
215
214
|
for c in tensors:
|
|
@@ -220,27 +219,6 @@ def left_pad_and_stack_1D(tensors: List[torch.Tensor]) -> torch.Tensor:
|
|
|
220
219
|
return torch.stack(padded)
|
|
221
220
|
|
|
222
221
|
|
|
223
|
-
def cache_model_from_s3(s3_uri: str, force=False):
|
|
224
|
-
if re.match("^s3://([^/]+)/(.*?([^/]+)/?)$", s3_uri) is None:
|
|
225
|
-
raise ValueError(f"Not a valid S3 URI: {s3_uri}")
|
|
226
|
-
|
|
227
|
-
# we expect the prefix to point to a "directory" on S3
|
|
228
|
-
if not s3_uri.endswith("/"):
|
|
229
|
-
s3_uri += "/"
|
|
230
|
-
|
|
231
|
-
cache_home = Path(os.environ.get("XDG_CACHE_HOME") or Path.home() / ".cache")
|
|
232
|
-
bucket, prefix = s3_uri.replace("s3://", "").split("/", 1)
|
|
233
|
-
bucket_cache_path = cache_home / "autogluon" / "timeseries" / bucket
|
|
234
|
-
|
|
235
|
-
for obj_path in list_bucket_prefix_suffix_contains_s3(bucket=bucket, prefix=prefix):
|
|
236
|
-
destination_path = bucket_cache_path / obj_path
|
|
237
|
-
if not force and destination_path.exists():
|
|
238
|
-
continue
|
|
239
|
-
download(bucket, obj_path, local_path=str(destination_path))
|
|
240
|
-
|
|
241
|
-
return str(bucket_cache_path / prefix)
|
|
242
|
-
|
|
243
|
-
|
|
244
222
|
class ChronosInferenceDataset:
|
|
245
223
|
"""A container for time series datasets that implements the ``torch.utils.data.Dataset`` interface"""
|
|
246
224
|
|
|
@@ -255,8 +233,7 @@ class ChronosInferenceDataset:
|
|
|
255
233
|
self.target_array = target_df[target_column].to_numpy(dtype=np.float32)
|
|
256
234
|
|
|
257
235
|
# store pointer to start:end of each time series
|
|
258
|
-
|
|
259
|
-
self.indptr = np.append(0, cum_sizes).astype(np.int32)
|
|
236
|
+
self.indptr = target_df.get_indptr()
|
|
260
237
|
|
|
261
238
|
def __len__(self):
|
|
262
239
|
return len(self.indptr) - 1 # noqa
|
|
@@ -281,7 +258,7 @@ class ChronosInferenceDataLoader(torch.utils.data.DataLoader):
|
|
|
281
258
|
self.callback: Callable = kwargs.pop("on_batch", lambda: None)
|
|
282
259
|
super().__init__(*args, **kwargs)
|
|
283
260
|
|
|
284
|
-
def __iter__(self):
|
|
261
|
+
def __iter__(self): # type: ignore
|
|
285
262
|
for item in super().__iter__():
|
|
286
263
|
yield item
|
|
287
264
|
self.callback()
|
|
@@ -298,13 +275,13 @@ class EvaluateAndSaveFinalStepCallback(TrainerCallback):
|
|
|
298
275
|
|
|
299
276
|
|
|
300
277
|
class TimeLimitCallback(TrainerCallback):
|
|
301
|
-
def __init__(self, time_limit:
|
|
278
|
+
def __init__(self, time_limit: float):
|
|
302
279
|
"""
|
|
303
280
|
Callback to stop training once a specified time has elapsed.
|
|
304
281
|
|
|
305
282
|
Parameters
|
|
306
283
|
----------
|
|
307
|
-
time_limit
|
|
284
|
+
time_limit
|
|
308
285
|
maximum time allowed for training in seconds.
|
|
309
286
|
"""
|
|
310
287
|
self.time_limit = time_limit
|
|
@@ -322,12 +299,13 @@ class TimeLimitCallback(TrainerCallback):
|
|
|
322
299
|
|
|
323
300
|
class LoggerCallback(TrainerCallback):
|
|
324
301
|
def on_log(self, args, state, control, logs=None, **kwargs):
|
|
325
|
-
logs
|
|
302
|
+
if logs:
|
|
303
|
+
logs.pop("total_flos", None)
|
|
326
304
|
if state.is_local_process_zero:
|
|
327
305
|
logger.info(logs)
|
|
328
306
|
|
|
329
307
|
|
|
330
|
-
def timeout_callback(seconds:
|
|
308
|
+
def timeout_callback(seconds: float | None) -> Callable:
|
|
331
309
|
"""Return a callback object that raises an exception if time limit is exceeded."""
|
|
332
310
|
start_time = time.monotonic()
|
|
333
311
|
|
|
@@ -336,3 +314,56 @@ def timeout_callback(seconds: Optional[float]) -> Callable:
|
|
|
336
314
|
raise TimeLimitExceeded
|
|
337
315
|
|
|
338
316
|
return callback
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def update_output_quantiles(model: ChronosBoltModelForForecasting, new_quantiles: list[float]) -> None:
|
|
320
|
+
"""In-place updates model's output layer to support only the specified new quantiles by copying
|
|
321
|
+
weights from closest existing quantiles.
|
|
322
|
+
"""
|
|
323
|
+
old_quantiles = model.chronos_config.quantiles
|
|
324
|
+
new_quantiles = sorted(new_quantiles)
|
|
325
|
+
|
|
326
|
+
if new_quantiles == old_quantiles:
|
|
327
|
+
return
|
|
328
|
+
|
|
329
|
+
model.chronos_config.quantiles = new_quantiles
|
|
330
|
+
model.num_quantiles = len(new_quantiles)
|
|
331
|
+
model.register_buffer("quantiles", torch.tensor(new_quantiles, dtype=model.dtype), persistent=False)
|
|
332
|
+
|
|
333
|
+
old_output_layer = model.output_patch_embedding
|
|
334
|
+
new_output_layer = ResidualBlock(
|
|
335
|
+
in_dim=model.config.d_model,
|
|
336
|
+
h_dim=model.config.d_ff,
|
|
337
|
+
out_dim=len(new_quantiles) * model.chronos_config.prediction_length,
|
|
338
|
+
act_fn_name=model.config.dense_act_fn,
|
|
339
|
+
dropout_p=model.config.dropout_rate,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# hidden_layer is shared across all quantiles
|
|
343
|
+
new_output_layer.hidden_layer.weight.data.copy_(old_output_layer.hidden_layer.weight.data)
|
|
344
|
+
if old_output_layer.hidden_layer.bias is not None:
|
|
345
|
+
new_output_layer.hidden_layer.bias.data.copy_(old_output_layer.hidden_layer.bias.data)
|
|
346
|
+
|
|
347
|
+
def copy_quantile_weights(src_idx: int, dst_idx: int):
|
|
348
|
+
"""Copy weights for one quantile from src_idx to dst_idx"""
|
|
349
|
+
prediction_length = model.chronos_config.prediction_length
|
|
350
|
+
src_start, src_end = src_idx * prediction_length, (src_idx + 1) * prediction_length
|
|
351
|
+
dst_start, dst_end = dst_idx * prediction_length, (dst_idx + 1) * prediction_length
|
|
352
|
+
|
|
353
|
+
for layer_name in ["output_layer", "residual_layer"]:
|
|
354
|
+
old_layer_attr = getattr(old_output_layer, layer_name)
|
|
355
|
+
new_layer_attr = getattr(new_output_layer, layer_name)
|
|
356
|
+
|
|
357
|
+
new_layer_attr.weight[dst_start:dst_end] = old_layer_attr.weight[src_start:src_end]
|
|
358
|
+
if old_layer_attr.bias is not None:
|
|
359
|
+
new_layer_attr.bias[dst_start:dst_end] = old_layer_attr.bias[src_start:src_end]
|
|
360
|
+
|
|
361
|
+
with torch.no_grad():
|
|
362
|
+
for new_idx, new_q in enumerate(new_quantiles):
|
|
363
|
+
closest_q = min(old_quantiles, key=lambda x: abs(x - new_q))
|
|
364
|
+
closest_idx = old_quantiles.index(closest_q)
|
|
365
|
+
copy_quantile_weights(closest_idx, new_idx)
|
|
366
|
+
|
|
367
|
+
model.output_patch_embedding = new_output_layer
|
|
368
|
+
model.config.chronos_config["quantiles"] = new_quantiles
|
|
369
|
+
model.chronos_config.quantiles = new_quantiles
|
|
@@ -1,2 +1,37 @@
|
|
|
1
|
-
from .
|
|
2
|
-
from .
|
|
1
|
+
from .abstract import AbstractTimeSeriesEnsembleModel
|
|
2
|
+
from .array_based import LinearStackerEnsemble, MedianEnsemble, PerQuantileTabularEnsemble, TabularEnsemble
|
|
3
|
+
from .per_item_greedy import PerItemGreedyEnsemble
|
|
4
|
+
from .weighted import GreedyEnsemble, PerformanceWeightedEnsemble, SimpleAverageEnsemble
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_ensemble_class(name: str):
|
|
8
|
+
mapping = {
|
|
9
|
+
"Greedy": GreedyEnsemble,
|
|
10
|
+
"PerItemGreedy": PerItemGreedyEnsemble,
|
|
11
|
+
"PerformanceWeighted": PerformanceWeightedEnsemble,
|
|
12
|
+
"SimpleAverage": SimpleAverageEnsemble,
|
|
13
|
+
"Weighted": GreedyEnsemble, # old alias for this model
|
|
14
|
+
"Median": MedianEnsemble,
|
|
15
|
+
"Tabular": TabularEnsemble,
|
|
16
|
+
"PerQuantileTabular": PerQuantileTabularEnsemble,
|
|
17
|
+
"LinearStacker": LinearStackerEnsemble,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
name_clean = name.removesuffix("Ensemble")
|
|
21
|
+
if name_clean not in mapping:
|
|
22
|
+
raise ValueError(f"Unknown ensemble type: {name}. Available: {list(mapping.keys())}")
|
|
23
|
+
return mapping[name_clean]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"AbstractTimeSeriesEnsembleModel",
|
|
28
|
+
"GreedyEnsemble",
|
|
29
|
+
"LinearStackerEnsemble",
|
|
30
|
+
"MedianEnsemble",
|
|
31
|
+
"PerformanceWeightedEnsemble",
|
|
32
|
+
"PerItemGreedyEnsemble",
|
|
33
|
+
"PerQuantileTabularEnsemble",
|
|
34
|
+
"SimpleAverageEnsemble",
|
|
35
|
+
"TabularEnsemble",
|
|
36
|
+
"get_ensemble_class",
|
|
37
|
+
]
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
|
|
4
|
+
from typing_extensions import final
|
|
5
|
+
|
|
6
|
+
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
|
7
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
8
|
+
from autogluon.timeseries.models.abstract import TimeSeriesModelBase
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AbstractTimeSeriesEnsembleModel(TimeSeriesModelBase, ABC):
|
|
14
|
+
"""Abstract base class for time series ensemble models that combine predictions from multiple base models.
|
|
15
|
+
|
|
16
|
+
Ensemble training process operates on validation predictions from base models rather than raw time series
|
|
17
|
+
data. This allows the ensemble to learn optimal combination strategies based on each model's performance
|
|
18
|
+
across different validation windows and time series patterns.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def model_names(self) -> list[str]:
|
|
24
|
+
"""Names of base models included in the ensemble."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
@final
|
|
28
|
+
def fit(
|
|
29
|
+
self,
|
|
30
|
+
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
31
|
+
data_per_window: list[TimeSeriesDataFrame],
|
|
32
|
+
model_scores: dict[str, float] | None = None,
|
|
33
|
+
time_limit: float | None = None,
|
|
34
|
+
):
|
|
35
|
+
"""Fit ensemble model given predictions of candidate base models and the true data.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
predictions_per_window
|
|
40
|
+
Dictionary that maps the names of component models to their respective predictions for each validation
|
|
41
|
+
window.
|
|
42
|
+
data_per_window
|
|
43
|
+
Observed ground truth data used to train the ensemble for each validation window. Each entry in the list
|
|
44
|
+
includes both the forecast horizon (for which the predictions are given in ``predictions``), as well as the
|
|
45
|
+
"history".
|
|
46
|
+
model_scores
|
|
47
|
+
Scores (higher is better) for the models that will constitute the ensemble.
|
|
48
|
+
time_limit
|
|
49
|
+
Maximum allowed time for training in seconds.
|
|
50
|
+
"""
|
|
51
|
+
if time_limit is not None and time_limit <= 0:
|
|
52
|
+
logger.warning(
|
|
53
|
+
f"\tWarning: Model has no time left to train, skipping model... (Time Left = {round(time_limit, 1)}s)"
|
|
54
|
+
)
|
|
55
|
+
raise TimeLimitExceeded
|
|
56
|
+
if isinstance(data_per_window, TimeSeriesDataFrame):
|
|
57
|
+
raise ValueError("When fitting ensemble, ``data`` should contain ground truth for each validation window")
|
|
58
|
+
num_val_windows = len(data_per_window)
|
|
59
|
+
for model, preds in predictions_per_window.items():
|
|
60
|
+
if len(preds) != num_val_windows:
|
|
61
|
+
raise ValueError(f"For model {model} predictions are unavailable for some validation windows")
|
|
62
|
+
self._fit(
|
|
63
|
+
predictions_per_window=predictions_per_window,
|
|
64
|
+
data_per_window=data_per_window,
|
|
65
|
+
model_scores=model_scores,
|
|
66
|
+
time_limit=time_limit,
|
|
67
|
+
)
|
|
68
|
+
return self
|
|
69
|
+
|
|
70
|
+
def _fit(
|
|
71
|
+
self,
|
|
72
|
+
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
73
|
+
data_per_window: list[TimeSeriesDataFrame],
|
|
74
|
+
model_scores: dict[str, float] | None = None,
|
|
75
|
+
time_limit: float | None = None,
|
|
76
|
+
) -> None:
|
|
77
|
+
"""Private method for ``fit``. See ``fit`` for documentation of arguments. Apart from the model
|
|
78
|
+
training logic, ``fit`` additionally implements other logic such as keeping track of the time limit.
|
|
79
|
+
"""
|
|
80
|
+
raise NotImplementedError
|
|
81
|
+
|
|
82
|
+
@final
|
|
83
|
+
def predict(self, data: dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
|
|
84
|
+
if not set(self.model_names).issubset(set(data.keys())):
|
|
85
|
+
raise ValueError(
|
|
86
|
+
f"Set of models given for prediction in {self.name} differ from those provided during initialization."
|
|
87
|
+
)
|
|
88
|
+
for model_name, model_pred in data.items():
|
|
89
|
+
if model_pred is None:
|
|
90
|
+
raise RuntimeError(f"{self.name} cannot predict because base model {model_name} failed.")
|
|
91
|
+
|
|
92
|
+
# Make sure that all predictions have same shape
|
|
93
|
+
assert len(set(pred.shape for pred in data.values())) == 1
|
|
94
|
+
|
|
95
|
+
return self._predict(data=data, **kwargs)
|
|
96
|
+
|
|
97
|
+
@abstractmethod
|
|
98
|
+
def _predict(self, data: dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
|
|
99
|
+
pass
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
def remap_base_models(self, model_refit_map: dict[str, str]) -> None:
|
|
103
|
+
"""Update names of the base models based on the mapping in model_refit_map.
|
|
104
|
+
|
|
105
|
+
This method should be called after performing refit_full to point to the refitted base models, if necessary.
|
|
106
|
+
"""
|
|
107
|
+
pass
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Any, Sequence
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
7
|
+
from autogluon.timeseries.metrics.abstract import TimeSeriesScorer
|
|
8
|
+
from autogluon.timeseries.utils.features import CovariateMetadata
|
|
9
|
+
|
|
10
|
+
from ..abstract import AbstractTimeSeriesEnsembleModel
|
|
11
|
+
from .regressor import EnsembleRegressor
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ArrayBasedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
|
15
|
+
"""Abstract base class for ensemble models that operate on multi-dimensional arrays of base model predictions.
|
|
16
|
+
|
|
17
|
+
Array-based ensembles convert time series predictions into structured numpy arrays for efficient processing
|
|
18
|
+
and enable sophisticated combination strategies beyond simple weighted averaging. Array-based ensembles also
|
|
19
|
+
support isotonization in quantile forecasts--ensuring quantile crossing does not occur. They also have built-in
|
|
20
|
+
failed model detection and filtering capabilities.
|
|
21
|
+
|
|
22
|
+
Other Parameters
|
|
23
|
+
----------------
|
|
24
|
+
isotonization : str, default = "sort"
|
|
25
|
+
The isotonization method to use (i.e. the algorithm to prevent quantile non-crossing).
|
|
26
|
+
Currently only "sort" is supported.
|
|
27
|
+
detect_and_ignore_failures : bool, default = True
|
|
28
|
+
Whether to detect and ignore "failed models", defined as models which have a loss that is larger
|
|
29
|
+
than 10x the median loss of all the models. This can be very important for the regression-based
|
|
30
|
+
ensembles, as moving the weight from such a "failed model" to zero can require a long training
|
|
31
|
+
time.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
path: str | None = None,
|
|
37
|
+
name: str | None = None,
|
|
38
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
39
|
+
freq: str | None = None,
|
|
40
|
+
prediction_length: int = 1,
|
|
41
|
+
covariate_metadata: CovariateMetadata | None = None,
|
|
42
|
+
target: str = "target",
|
|
43
|
+
quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
|
|
44
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
45
|
+
):
|
|
46
|
+
super().__init__(
|
|
47
|
+
path=path,
|
|
48
|
+
name=name,
|
|
49
|
+
hyperparameters=hyperparameters,
|
|
50
|
+
freq=freq,
|
|
51
|
+
prediction_length=prediction_length,
|
|
52
|
+
covariate_metadata=covariate_metadata,
|
|
53
|
+
target=target,
|
|
54
|
+
quantile_levels=quantile_levels,
|
|
55
|
+
eval_metric=eval_metric,
|
|
56
|
+
)
|
|
57
|
+
self.ensemble_regressor: EnsembleRegressor | None = None
|
|
58
|
+
self._model_names: list[str] = []
|
|
59
|
+
|
|
60
|
+
def _get_default_hyperparameters(self) -> dict[str, Any]:
|
|
61
|
+
return {
|
|
62
|
+
"isotonization": "sort",
|
|
63
|
+
"detect_and_ignore_failures": True,
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def to_array(df: TimeSeriesDataFrame) -> np.ndarray:
|
|
68
|
+
"""Given a TimeSeriesDataFrame object, return a single array composing the values contained
|
|
69
|
+
in the data frame.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
df
|
|
74
|
+
TimeSeriesDataFrame to convert to an array. Must contain exactly ``prediction_length``
|
|
75
|
+
values for each item. The columns of ``df`` can correspond to ground truth values
|
|
76
|
+
or predictions (in which case, these will be the mean or quantile forecasts).
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
array
|
|
81
|
+
of shape (num_items, prediction_length, num_outputs).
|
|
82
|
+
"""
|
|
83
|
+
assert df.index.is_monotonic_increasing
|
|
84
|
+
array = df.to_numpy()
|
|
85
|
+
num_items = df.num_items
|
|
86
|
+
shape = (
|
|
87
|
+
num_items,
|
|
88
|
+
df.shape[0] // num_items, # timesteps per item
|
|
89
|
+
df.shape[1], # num_outputs
|
|
90
|
+
)
|
|
91
|
+
return array.reshape(shape)
|
|
92
|
+
|
|
93
|
+
def _get_base_model_predictions(
|
|
94
|
+
self,
|
|
95
|
+
predictions_per_window: dict[str, list[TimeSeriesDataFrame]] | dict[str, TimeSeriesDataFrame],
|
|
96
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
97
|
+
"""Given a mapping from model names to a list of data frames representing
|
|
98
|
+
their predictions per window, return a multidimensional array representation.
|
|
99
|
+
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
predictions_per_window
|
|
103
|
+
A dictionary with list[TimeSeriesDataFrame] values, where each TimeSeriesDataFrame
|
|
104
|
+
contains predictions for the window in question. If the dictionary values are
|
|
105
|
+
TimeSeriesDataFrame, they will be treated like a single window.
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
base_model_mean_predictions
|
|
110
|
+
Array of shape (num_windows, num_items, prediction_length, 1, num_models)
|
|
111
|
+
base_model_quantile_predictions
|
|
112
|
+
Array of shape (num_windows, num_items, prediction_length, num_quantiles, num_models)
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
if not predictions_per_window:
|
|
116
|
+
raise ValueError("No base model predictions are provided.")
|
|
117
|
+
|
|
118
|
+
first_prediction = list(predictions_per_window.values())[0]
|
|
119
|
+
if isinstance(first_prediction, TimeSeriesDataFrame):
|
|
120
|
+
predictions_per_window = {k: [v] for k, v in predictions_per_window.items()} # type: ignore
|
|
121
|
+
|
|
122
|
+
predictions = {
|
|
123
|
+
model_name: [self.to_array(window) for window in windows] # type: ignore
|
|
124
|
+
for model_name, windows in predictions_per_window.items()
|
|
125
|
+
}
|
|
126
|
+
base_model_predictions = np.stack([x for x in predictions.values()], axis=-1)
|
|
127
|
+
|
|
128
|
+
return base_model_predictions[:, :, :, :1, :], base_model_predictions[:, :, :, 1:, :]
|
|
129
|
+
|
|
130
|
+
def _isotonize(self, prediction_array: np.ndarray) -> np.ndarray:
|
|
131
|
+
"""Apply isotonization to ensure quantile non-crossing.
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
prediction_array
|
|
136
|
+
Array of shape (num_windows, num_items, prediction_length, num_quantiles)
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
isotonized_array
|
|
141
|
+
Array with same shape but quantiles sorted along last dimension
|
|
142
|
+
"""
|
|
143
|
+
isotonization = self.get_hyperparameter("isotonization")
|
|
144
|
+
if isotonization == "sort":
|
|
145
|
+
return np.sort(prediction_array, axis=-1)
|
|
146
|
+
return prediction_array
|
|
147
|
+
|
|
148
|
+
def _fit(
|
|
149
|
+
self,
|
|
150
|
+
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
151
|
+
data_per_window: list[TimeSeriesDataFrame],
|
|
152
|
+
model_scores: dict[str, float] | None = None,
|
|
153
|
+
time_limit: float | None = None,
|
|
154
|
+
) -> None:
|
|
155
|
+
# process inputs
|
|
156
|
+
filtered_predictions = self._filter_failed_models(predictions_per_window, model_scores)
|
|
157
|
+
base_model_mean_predictions, base_model_quantile_predictions = self._get_base_model_predictions(
|
|
158
|
+
filtered_predictions
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# process labels
|
|
162
|
+
ground_truth_per_window = [y.slice_by_timestep(-self.prediction_length, None) for y in data_per_window]
|
|
163
|
+
labels = np.stack(
|
|
164
|
+
[self.to_array(gt) for gt in ground_truth_per_window], axis=0
|
|
165
|
+
) # (num_windows, num_items, prediction_length, 1)
|
|
166
|
+
|
|
167
|
+
self._model_names = list(filtered_predictions.keys())
|
|
168
|
+
self.ensemble_regressor = self._get_ensemble_regressor()
|
|
169
|
+
self.ensemble_regressor.fit(
|
|
170
|
+
base_model_mean_predictions=base_model_mean_predictions,
|
|
171
|
+
base_model_quantile_predictions=base_model_quantile_predictions,
|
|
172
|
+
labels=labels,
|
|
173
|
+
time_limit=time_limit,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
@abstractmethod
|
|
177
|
+
def _get_ensemble_regressor(self) -> EnsembleRegressor:
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
def _predict(self, data: dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
|
|
181
|
+
if self.ensemble_regressor is None:
|
|
182
|
+
if not self._model_names:
|
|
183
|
+
raise ValueError("Ensemble model has not been fitted yet.")
|
|
184
|
+
# Try to recreate the regressor (for loaded models)
|
|
185
|
+
self.ensemble_regressor = self._get_ensemble_regressor()
|
|
186
|
+
|
|
187
|
+
input_data = {}
|
|
188
|
+
for m in self.model_names:
|
|
189
|
+
assert m in data, f"Predictions for model {m} not provided during ensemble prediction."
|
|
190
|
+
input_data[m] = data[m]
|
|
191
|
+
|
|
192
|
+
base_model_mean_predictions, base_model_quantile_predictions = self._get_base_model_predictions(input_data)
|
|
193
|
+
|
|
194
|
+
mean_predictions, quantile_predictions = self.ensemble_regressor.predict(
|
|
195
|
+
base_model_mean_predictions=base_model_mean_predictions,
|
|
196
|
+
base_model_quantile_predictions=base_model_quantile_predictions,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
quantile_predictions = self._isotonize(quantile_predictions)
|
|
200
|
+
prediction_array = np.concatenate([mean_predictions, quantile_predictions], axis=-1)
|
|
201
|
+
|
|
202
|
+
output = list(input_data.values())[0].copy()
|
|
203
|
+
num_folds, num_items, num_timesteps, num_outputs = prediction_array.shape
|
|
204
|
+
assert (num_folds, num_timesteps) == (1, self.prediction_length)
|
|
205
|
+
assert len(output.columns) == num_outputs
|
|
206
|
+
|
|
207
|
+
output[output.columns] = prediction_array.reshape((num_items * num_timesteps, num_outputs))
|
|
208
|
+
|
|
209
|
+
return output
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def model_names(self) -> list[str]:
|
|
213
|
+
return self._model_names
|
|
214
|
+
|
|
215
|
+
def remap_base_models(self, model_refit_map: dict[str, str]) -> None:
|
|
216
|
+
"""Update names of the base models based on the mapping in model_refit_map."""
|
|
217
|
+
self._model_names = [model_refit_map.get(name, name) for name in self._model_names]
|
|
218
|
+
|
|
219
|
+
def _filter_failed_models(
|
|
220
|
+
self,
|
|
221
|
+
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
222
|
+
model_scores: dict[str, float] | None,
|
|
223
|
+
) -> dict[str, list[TimeSeriesDataFrame]]:
|
|
224
|
+
"""Filter out failed models based on detect_and_ignore_failures setting."""
|
|
225
|
+
if not self.get_hyperparameter("detect_and_ignore_failures"):
|
|
226
|
+
return predictions_per_window
|
|
227
|
+
|
|
228
|
+
if model_scores is None or len(model_scores) == 0:
|
|
229
|
+
return predictions_per_window
|
|
230
|
+
|
|
231
|
+
valid_scores = {k: v for k, v in model_scores.items() if np.isfinite(v)}
|
|
232
|
+
if len(valid_scores) == 0:
|
|
233
|
+
raise ValueError("All models have NaN scores. At least one model must run successfully to fit an ensemble")
|
|
234
|
+
|
|
235
|
+
losses = {k: -v for k, v in valid_scores.items()}
|
|
236
|
+
median_loss = np.nanmedian(list(losses.values()))
|
|
237
|
+
threshold = 10 * median_loss
|
|
238
|
+
good_models = {k for k, loss in losses.items() if loss <= threshold}
|
|
239
|
+
|
|
240
|
+
return {k: v for k, v in predictions_per_window.items() if k in good_models}
|