autogluon.timeseries 1.3.2b20250712__py3-none-any.whl → 1.4.1b20251116__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/timeseries/configs/__init__.py +3 -2
- autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
- autogluon/timeseries/configs/predictor_presets.py +84 -0
- autogluon/timeseries/dataset/ts_dataframe.py +98 -72
- autogluon/timeseries/learner.py +19 -18
- autogluon/timeseries/metrics/__init__.py +5 -5
- autogluon/timeseries/metrics/abstract.py +17 -17
- autogluon/timeseries/metrics/point.py +1 -1
- autogluon/timeseries/metrics/quantile.py +2 -2
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +4 -0
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -75
- autogluon/timeseries/models/abstract/tunable.py +6 -6
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +72 -76
- autogluon/timeseries/models/autogluon_tabular/per_step.py +104 -46
- autogluon/timeseries/models/autogluon_tabular/transforms.py +9 -7
- autogluon/timeseries/models/chronos/model.py +115 -78
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +76 -44
- autogluon/timeseries/models/ensemble/__init__.py +29 -2
- autogluon/timeseries/models/ensemble/abstract.py +16 -52
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +247 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +50 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +10 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +87 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +133 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +141 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +41 -0
- autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +8 -18
- autogluon/timeseries/models/ensemble/{greedy.py → weighted/greedy.py} +13 -13
- autogluon/timeseries/models/gluonts/abstract.py +26 -26
- autogluon/timeseries/models/gluonts/dataset.py +4 -4
- autogluon/timeseries/models/gluonts/models.py +27 -12
- autogluon/timeseries/models/local/abstract_local_model.py +14 -14
- autogluon/timeseries/models/local/naive.py +4 -0
- autogluon/timeseries/models/local/npts.py +1 -0
- autogluon/timeseries/models/local/statsforecast.py +30 -14
- autogluon/timeseries/models/multi_window/multi_window_model.py +34 -23
- autogluon/timeseries/models/registry.py +65 -0
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +197 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +94 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +306 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +119 -0
- autogluon/timeseries/models/toto/model.py +236 -0
- autogluon/timeseries/predictor.py +94 -107
- autogluon/timeseries/regressor.py +31 -27
- autogluon/timeseries/splitter.py +7 -31
- autogluon/timeseries/trainer/__init__.py +3 -0
- autogluon/timeseries/trainer/ensemble_composer.py +250 -0
- autogluon/timeseries/trainer/model_set_builder.py +256 -0
- autogluon/timeseries/trainer/prediction_cache.py +149 -0
- autogluon/timeseries/{trainer.py → trainer/trainer.py} +182 -307
- autogluon/timeseries/trainer/utils.py +18 -0
- autogluon/timeseries/transforms/covariate_scaler.py +4 -4
- autogluon/timeseries/transforms/target_scaler.py +14 -14
- autogluon/timeseries/utils/datetime/lags.py +2 -2
- autogluon/timeseries/utils/datetime/time_features.py +2 -2
- autogluon/timeseries/utils/features.py +41 -37
- autogluon/timeseries/utils/forecast.py +5 -5
- autogluon/timeseries/utils/warning_filters.py +3 -1
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251116-py3.9-nspkg.pth +1 -0
- {autogluon.timeseries-1.3.2b20250712.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info}/METADATA +32 -17
- autogluon_timeseries-1.4.1b20251116.dist-info/RECORD +96 -0
- {autogluon.timeseries-1.3.2b20250712.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info}/WHEEL +1 -1
- autogluon/timeseries/configs/presets_configs.py +0 -79
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -530
- autogluon/timeseries/models/presets.py +0 -358
- autogluon.timeseries-1.3.2b20250712-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.3.2b20250712.dist-info/RECORD +0 -71
- {autogluon.timeseries-1.3.2b20250712.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.3.2b20250712.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.3.2b20250712.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.3.2b20250712.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.3.2b20250712.dist-info → autogluon_timeseries-1.4.1b20251116.dist-info}/zip-safe +0 -0
|
@@ -3,14 +3,15 @@ import os
|
|
|
3
3
|
import shutil
|
|
4
4
|
import warnings
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Optional, Union
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
10
|
+
from typing_extensions import Self
|
|
10
11
|
|
|
11
12
|
from autogluon.common.loaders import load_pkl
|
|
12
13
|
from autogluon.common.space import Space
|
|
13
|
-
from autogluon.timeseries.dataset
|
|
14
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
14
15
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
15
16
|
from autogluon.timeseries.utils.warning_filters import disable_duplicate_logs, warning_filter
|
|
16
17
|
|
|
@@ -81,72 +82,92 @@ MODEL_ALIASES = {
|
|
|
81
82
|
|
|
82
83
|
|
|
83
84
|
class ChronosModel(AbstractTimeSeriesModel):
|
|
84
|
-
"""Chronos [Ansari2024]_ pretrained time series forecasting models which can be used for zero-shot
|
|
85
|
-
in a task-specific manner.
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
85
|
+
"""Chronos [Ansari2024]_ pretrained time series forecasting models which can be used for zero-shot
|
|
86
|
+
forecasting or fine-tuned in a task-specific manner.
|
|
87
|
+
|
|
88
|
+
Models can be based on the original
|
|
89
|
+
`Chronos <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos.py>`_
|
|
90
|
+
implementation, as well as a newer family of
|
|
91
|
+
`Chronos-Bolt <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos_bolt.py>`_
|
|
92
|
+
models capable of much faster inference.
|
|
93
|
+
|
|
94
|
+
The original Chronos is a family of pretrained models, based on the T5 family, with number of
|
|
95
|
+
parameters ranging between 8M and 710M. The full collection of Chronos models is available on
|
|
96
|
+
`Hugging Face <https://huggingface.co/collections/amazon/chronos-models-65f1791d630a8d57cb718444>`_.
|
|
97
|
+
|
|
98
|
+
For Chronos (original) ``small``, ``base``, and ``large`` variants a GPU is required to
|
|
99
|
+
perform inference efficiently. Chronos takes a minimalistic approach to pretraining time series
|
|
100
|
+
models, by discretizing time series data directly into bins which are treated as tokens,
|
|
101
|
+
effectively performing regression by classification. This results in a simple and flexible
|
|
102
|
+
framework for using any language model in the context of time series forecasting.
|
|
103
|
+
See [Ansari2024]_ for more information.
|
|
104
|
+
|
|
105
|
+
The newer Chronos-Bolt variants enable much faster inference by first "patching" the time series.
|
|
106
|
+
The resulting time series is then fed into a T5 model for forecasting. The Chronos-Bolt variants
|
|
107
|
+
are capable of much faster inference, and can all run on CPUs.
|
|
108
|
+
|
|
109
|
+
Both Chronos and Chronos-Bolt variants can be fine-tuned by setting ``fine_tune=True`` and selecting
|
|
110
|
+
appropriate fine-tuning parameters such as the learning rate (``fine_tune_lr``) and max steps
|
|
111
|
+
(``fine_tune_steps``).
|
|
103
112
|
|
|
104
113
|
References
|
|
105
114
|
----------
|
|
106
115
|
.. [Ansari2024] Ansari, Abdul Fatir, Stella, Lorenzo et al.
|
|
107
116
|
"Chronos: Learning the Language of Time Series."
|
|
108
|
-
|
|
117
|
+
Transactions on Machine Learning Research (2024).
|
|
118
|
+
https://openreview.net/forum?id=gerNCVqqtR
|
|
109
119
|
|
|
110
120
|
|
|
111
121
|
Other Parameters
|
|
112
122
|
----------------
|
|
113
|
-
model_path: str, default = "autogluon/chronos-bolt-small"
|
|
123
|
+
model_path : str, default = "autogluon/chronos-bolt-small"
|
|
114
124
|
Model path used for the model, i.e., a HuggingFace transformers ``name_or_path``. Can be a
|
|
115
125
|
compatible model name on HuggingFace Hub or a local path to a model directory. Original
|
|
116
126
|
Chronos models (i.e., ``autogluon/chronos-t5-{model_size}``) can be specified with aliases
|
|
117
127
|
``tiny``, ``mini`` , ``small``, ``base``, and ``large``. Chronos-Bolt models can be specified
|
|
118
128
|
with ``bolt_tiny``, ``bolt_mini``, ``bolt_small``, and ``bolt_base``.
|
|
119
129
|
batch_size : int, default = 256
|
|
120
|
-
Size of batches used during inference.
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
130
|
+
Size of batches used during inference.
|
|
131
|
+
|
|
132
|
+
The default ``batch_size`` is selected based on the model type. Chronos (original) models use a
|
|
133
|
+
``batch_size`` of 16, except Chronos (Large) which uses 8.
|
|
134
|
+
|
|
135
|
+
For Chronos-Bolt models the ``batch_size`` is set to 256. However, ``batch_size`` is reduced by
|
|
136
|
+
a factor of 4 when the prediction horizon is greater than the model's
|
|
137
|
+
default prediction length.
|
|
124
138
|
num_samples : int, default = 20
|
|
125
139
|
Number of samples used during inference, only used for the original Chronos models
|
|
126
140
|
device : str, default = None
|
|
127
|
-
Device to use for inference (and fine-tuning, if enabled). If None, model will use the GPU if
|
|
128
|
-
For larger Chronos model sizes ``small``, ``base``, and ``large``; inference will fail
|
|
129
|
-
|
|
130
|
-
|
|
141
|
+
Device to use for inference (and fine-tuning, if enabled). If None, model will use the GPU if
|
|
142
|
+
available. For larger Chronos model sizes ``small``, ``base``, and ``large``; inference will fail
|
|
143
|
+
if no GPU is available.
|
|
144
|
+
|
|
145
|
+
For Chronos-Bolt models, inference can be performed on the CPU. Although fine-tuning the smaller
|
|
146
|
+
Chronos models (``tiny`` and ``mini``) and all Chronos-Bolt is allowed on the CPU, we recommend
|
|
147
|
+
using a GPU for faster fine-tuning.
|
|
131
148
|
context_length : int or None, default = None
|
|
132
|
-
The context length to use in the model.
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
the model
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
149
|
+
The context length to use in the model.
|
|
150
|
+
|
|
151
|
+
Shorter context lengths will decrease model accuracy, but result in faster inference. If None,
|
|
152
|
+
the model will infer context length from the data set length at inference time, but cap it at a
|
|
153
|
+
maximum of 2048.
|
|
154
|
+
|
|
155
|
+
Note that this is only the context length used to pass data into the model. Individual model
|
|
156
|
+
implementations may have different context lengths specified in their configuration, and may
|
|
157
|
+
truncate the context further. For example, original Chronos models have a context length of 512,
|
|
158
|
+
but Chronos-Bolt models handle contexts up to 2048.
|
|
159
|
+
torch_dtype : torch.dtype or {"auto", "bfloat16", "float32"}, default = "auto"
|
|
160
|
+
Torch data type for model weights, provided to ``from_pretrained`` method of Hugging Face
|
|
161
|
+
AutoModels. If original Chronos models are specified and the model size is ``small``, ``base``,
|
|
162
|
+
or ``large``, the ``torch_dtype`` will be set to ``bfloat16`` to enable inference on GPUs.
|
|
142
163
|
data_loader_num_workers : int, default = 0
|
|
143
|
-
Number of worker processes to be used in the data loader. See documentation on
|
|
144
|
-
for more information.
|
|
164
|
+
Number of worker processes to be used in the data loader. See documentation on
|
|
165
|
+
``torch.utils.data.DataLoader`` for more information.
|
|
145
166
|
fine_tune : bool, default = False
|
|
146
167
|
If True, the pretrained model will be fine-tuned
|
|
147
|
-
fine_tune_lr: float, default = 1e-5
|
|
148
|
-
The learning rate used for fine-tuning. This default is suitable for Chronos-Bolt models; for
|
|
149
|
-
Chronos models, we recommend using a higher learning rate such as ``1e-4
|
|
168
|
+
fine_tune_lr : float, default = 1e-5
|
|
169
|
+
The learning rate used for fine-tuning. This default is suitable for Chronos-Bolt models; for
|
|
170
|
+
the original Chronos models, we recommend using a higher learning rate such as ``1e-4``.
|
|
150
171
|
fine_tune_steps : int, default = 1000
|
|
151
172
|
The number of gradient update steps to fine-tune for
|
|
152
173
|
fine_tune_batch_size : int, default = 32
|
|
@@ -162,12 +183,12 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
162
183
|
during fine-tuning. If None, the entire validation dataset will be used.
|
|
163
184
|
fine_tune_trainer_kwargs : dict, optional
|
|
164
185
|
Extra keyword arguments passed to ``transformers.TrainingArguments``
|
|
165
|
-
keep_transformers_logs: bool, default = False
|
|
186
|
+
keep_transformers_logs : bool, default = False
|
|
166
187
|
If True, the logs generated by transformers will NOT be removed after fine-tuning
|
|
167
188
|
"""
|
|
168
189
|
|
|
169
|
-
|
|
170
|
-
default_num_samples: int = 20
|
|
190
|
+
ag_priority = 55
|
|
191
|
+
default_num_samples: int = 20 # default number of samples for prediction
|
|
171
192
|
default_model_path = "autogluon/chronos-bolt-small"
|
|
172
193
|
default_max_time_limit_ratio = 0.8
|
|
173
194
|
maximum_context_length = 2048
|
|
@@ -180,13 +201,13 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
180
201
|
path: Optional[str] = None,
|
|
181
202
|
name: Optional[str] = None,
|
|
182
203
|
eval_metric: Optional[str] = None,
|
|
183
|
-
hyperparameters: Optional[
|
|
204
|
+
hyperparameters: Optional[dict[str, Any]] = None,
|
|
184
205
|
**kwargs, # noqa
|
|
185
206
|
):
|
|
186
207
|
hyperparameters = hyperparameters if hyperparameters is not None else {}
|
|
187
208
|
|
|
188
|
-
model_path_input = hyperparameters.get("model_path", self.default_model_path)
|
|
189
|
-
self.model_path = MODEL_ALIASES.get(model_path_input, model_path_input)
|
|
209
|
+
model_path_input: str = hyperparameters.get("model_path", self.default_model_path)
|
|
210
|
+
self.model_path: str = MODEL_ALIASES.get(model_path_input, model_path_input)
|
|
190
211
|
|
|
191
212
|
name = name if name is not None else "Chronos"
|
|
192
213
|
if not isinstance(model_path_input, Space):
|
|
@@ -216,7 +237,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
216
237
|
return str(path)
|
|
217
238
|
|
|
218
239
|
@classmethod
|
|
219
|
-
def load(cls, path: str, reset_paths: bool = True, verbose: bool = True) ->
|
|
240
|
+
def load(cls, path: str, reset_paths: bool = True, load_oof: bool = False, verbose: bool = True) -> Self:
|
|
220
241
|
model = load_pkl.load(path=os.path.join(path, cls.model_file_name), verbose=verbose)
|
|
221
242
|
if reset_paths:
|
|
222
243
|
model.set_contexts(path)
|
|
@@ -241,7 +262,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
241
262
|
return self._model_pipeline
|
|
242
263
|
|
|
243
264
|
@property
|
|
244
|
-
def ag_default_config(self) ->
|
|
265
|
+
def ag_default_config(self) -> dict[str, Any]:
|
|
245
266
|
"""The default configuration of the model used by AutoGluon if the model is one of those
|
|
246
267
|
defined in MODEL_CONFIGS. For now, these are ``autogluon/chronos-t5-*`` family of models.
|
|
247
268
|
"""
|
|
@@ -271,15 +292,15 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
271
292
|
"""
|
|
272
293
|
return self.ag_default_config.get("default_torch_dtype", "auto")
|
|
273
294
|
|
|
274
|
-
def get_minimum_resources(self, is_gpu_available: bool = False) ->
|
|
275
|
-
minimum_resources:
|
|
295
|
+
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, Union[int, float]]:
|
|
296
|
+
minimum_resources: dict[str, Union[int, float]] = {"num_cpus": 1}
|
|
276
297
|
# if GPU is available, we train with 1 GPU per trial
|
|
277
298
|
if is_gpu_available:
|
|
278
299
|
minimum_resources["num_gpus"] = self.min_num_gpus
|
|
279
300
|
return minimum_resources
|
|
280
301
|
|
|
281
302
|
def load_model_pipeline(self, is_training: bool = False):
|
|
282
|
-
from
|
|
303
|
+
from chronos import BaseChronosPipeline
|
|
283
304
|
|
|
284
305
|
gpu_available = self._is_gpu_available()
|
|
285
306
|
|
|
@@ -292,6 +313,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
292
313
|
|
|
293
314
|
device = self.device or ("cuda" if gpu_available else "cpu")
|
|
294
315
|
|
|
316
|
+
assert self.model_path is not None
|
|
295
317
|
pipeline = BaseChronosPipeline.from_pretrained(
|
|
296
318
|
self.model_path,
|
|
297
319
|
device_map=device,
|
|
@@ -322,7 +344,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
322
344
|
|
|
323
345
|
return init_args.copy()
|
|
324
346
|
|
|
325
|
-
def _get_default_hyperparameters(self) ->
|
|
347
|
+
def _get_default_hyperparameters(self) -> dict:
|
|
326
348
|
return {
|
|
327
349
|
"batch_size": self.default_batch_size,
|
|
328
350
|
"num_samples": self.default_num_samples,
|
|
@@ -416,19 +438,23 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
416
438
|
self,
|
|
417
439
|
train_data: TimeSeriesDataFrame,
|
|
418
440
|
val_data: Optional[TimeSeriesDataFrame] = None,
|
|
419
|
-
time_limit: Optional[
|
|
441
|
+
time_limit: Optional[float] = None,
|
|
442
|
+
num_cpus: Optional[int] = None,
|
|
443
|
+
num_gpus: Optional[int] = None,
|
|
444
|
+
verbosity: int = 2,
|
|
420
445
|
**kwargs,
|
|
421
446
|
) -> None:
|
|
422
447
|
import transformers
|
|
448
|
+
from chronos import ChronosBoltPipeline, ChronosPipeline
|
|
423
449
|
from packaging import version
|
|
424
450
|
from transformers.trainer import PrinterCallback, Trainer, TrainingArguments
|
|
425
451
|
|
|
426
|
-
from .
|
|
427
|
-
from .pipeline.utils import (
|
|
452
|
+
from .utils import (
|
|
428
453
|
ChronosFineTuningDataset,
|
|
429
454
|
EvaluateAndSaveFinalStepCallback,
|
|
430
455
|
LoggerCallback,
|
|
431
456
|
TimeLimitCallback,
|
|
457
|
+
update_output_quantiles,
|
|
432
458
|
)
|
|
433
459
|
|
|
434
460
|
# TODO: Add support for fine-tuning models with context_length longer than the pretrained model
|
|
@@ -481,9 +507,12 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
481
507
|
|
|
482
508
|
if self.prediction_length != fine_tune_prediction_length:
|
|
483
509
|
logger.debug(
|
|
484
|
-
f"\
|
|
510
|
+
f"\tChronos-Bolt models can only be fine-tuned with a maximum prediction_length of {model_prediction_length}. "
|
|
485
511
|
f"Fine-tuning prediction_length has been changed to {fine_tune_prediction_length}."
|
|
486
512
|
)
|
|
513
|
+
if self.quantile_levels != self.model_pipeline.quantiles:
|
|
514
|
+
update_output_quantiles(self.model_pipeline.model, self.quantile_levels)
|
|
515
|
+
logger.info(f"\tChronos-Bolt will be fine-tuned with quantile_levels={self.quantile_levels}")
|
|
487
516
|
else:
|
|
488
517
|
raise ValueError(f"Unsupported model pipeline: {type(self.model_pipeline)}")
|
|
489
518
|
|
|
@@ -513,7 +542,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
513
542
|
# transformers changed the argument name from `evaluation_strategy` to `eval_strategy`
|
|
514
543
|
fine_tune_trainer_kwargs["eval_strategy"] = fine_tune_trainer_kwargs.pop("evaluation_strategy")
|
|
515
544
|
|
|
516
|
-
training_args = TrainingArguments(**fine_tune_trainer_kwargs, **pipeline_specific_trainer_kwargs)
|
|
545
|
+
training_args = TrainingArguments(**fine_tune_trainer_kwargs, **pipeline_specific_trainer_kwargs) # type: ignore
|
|
517
546
|
tokenizer_train_dataset = ChronosFineTuningDataset(
|
|
518
547
|
target_df=train_data,
|
|
519
548
|
target_column=self.target,
|
|
@@ -529,6 +558,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
529
558
|
if time_limit is not None:
|
|
530
559
|
callbacks.append(TimeLimitCallback(time_limit=time_limit))
|
|
531
560
|
|
|
561
|
+
tokenizer_val_dataset: Optional[ChronosFineTuningDataset] = None
|
|
532
562
|
if val_data is not None:
|
|
533
563
|
callbacks.append(EvaluateAndSaveFinalStepCallback())
|
|
534
564
|
# evaluate on a randomly-sampled subset
|
|
@@ -544,6 +574,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
544
574
|
)
|
|
545
575
|
val_data = val_data.loc[eval_items]
|
|
546
576
|
|
|
577
|
+
assert isinstance(val_data, TimeSeriesDataFrame)
|
|
547
578
|
tokenizer_val_dataset = ChronosFineTuningDataset(
|
|
548
579
|
target_df=val_data,
|
|
549
580
|
target_column=self.target,
|
|
@@ -557,7 +588,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
557
588
|
model=self.model_pipeline.inner_model,
|
|
558
589
|
args=training_args,
|
|
559
590
|
train_dataset=tokenizer_train_dataset,
|
|
560
|
-
eval_dataset=tokenizer_val_dataset
|
|
591
|
+
eval_dataset=tokenizer_val_dataset,
|
|
561
592
|
callbacks=callbacks,
|
|
562
593
|
)
|
|
563
594
|
|
|
@@ -590,7 +621,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
590
621
|
num_workers: int = 0,
|
|
591
622
|
time_limit: Optional[float] = None,
|
|
592
623
|
):
|
|
593
|
-
from .
|
|
624
|
+
from .utils import ChronosInferenceDataLoader, ChronosInferenceDataset, timeout_callback
|
|
594
625
|
|
|
595
626
|
chronos_dataset = ChronosInferenceDataset(
|
|
596
627
|
target_df=data,
|
|
@@ -619,7 +650,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
619
650
|
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
|
620
651
|
**kwargs,
|
|
621
652
|
) -> TimeSeriesDataFrame:
|
|
622
|
-
from
|
|
653
|
+
from chronos import ChronosBoltPipeline, ChronosPipeline
|
|
623
654
|
|
|
624
655
|
# We defer initialization of the model pipeline. i.e., the model is only loaded to device memory
|
|
625
656
|
# during inference. We also infer the maximum length of the time series in the inference data set
|
|
@@ -631,20 +662,26 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
631
662
|
# (according to its config.json file) of 512, it will further truncate the series during inference.
|
|
632
663
|
context_length = self._get_context_length(data)
|
|
633
664
|
|
|
665
|
+
extra_predict_kwargs = (
|
|
666
|
+
{"num_samples": self.num_samples} if isinstance(self.model_pipeline, ChronosPipeline) else {}
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
# adapt batch size for Chronos bolt if requested prediction length is longer than model prediction length
|
|
670
|
+
batch_size = self.batch_size
|
|
671
|
+
model_prediction_length = None
|
|
672
|
+
if isinstance(self.model_pipeline, ChronosBoltPipeline):
|
|
673
|
+
model_prediction_length = self.model_pipeline.model.config.chronos_config.get("prediction_length")
|
|
674
|
+
if model_prediction_length and self.prediction_length > model_prediction_length:
|
|
675
|
+
batch_size = max(1, batch_size // 4)
|
|
676
|
+
logger.debug(
|
|
677
|
+
f"\tThe prediction_length {self.prediction_length} exceeds model's prediction_length {model_prediction_length}. "
|
|
678
|
+
f"The inference batch_size has been reduced from {self.batch_size} to {batch_size} to avoid OOM errors."
|
|
679
|
+
)
|
|
680
|
+
|
|
634
681
|
with warning_filter(all_warnings=True):
|
|
635
682
|
import torch
|
|
636
683
|
|
|
637
684
|
self.model_pipeline.model.eval()
|
|
638
|
-
batch_size = self.batch_size
|
|
639
|
-
if (
|
|
640
|
-
isinstance(self.model_pipeline, ChronosBoltPipeline)
|
|
641
|
-
and self.prediction_length > self.model_pipeline.model_prediction_length
|
|
642
|
-
):
|
|
643
|
-
batch_size = max(1, batch_size // 4)
|
|
644
|
-
logger.debug(
|
|
645
|
-
f"\tThe prediction_length {self.prediction_length} exceeds model's prediction_length {self.model_pipeline.model_prediction_length}. "
|
|
646
|
-
f"The inference batch_size has been reduced from {self.batch_size} to {batch_size} to avoid OOM errors."
|
|
647
|
-
)
|
|
648
685
|
|
|
649
686
|
inference_data_loader = self._get_inference_data_loader(
|
|
650
687
|
data=data,
|
|
@@ -662,7 +699,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
662
699
|
batch,
|
|
663
700
|
prediction_length=self.prediction_length,
|
|
664
701
|
quantile_levels=self.quantile_levels,
|
|
665
|
-
|
|
702
|
+
**extra_predict_kwargs,
|
|
666
703
|
)
|
|
667
704
|
except torch.OutOfMemoryError as ex:
|
|
668
705
|
logger.error(
|
|
@@ -687,7 +724,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
687
724
|
|
|
688
725
|
return TimeSeriesDataFrame(df)
|
|
689
726
|
|
|
690
|
-
def _more_tags(self) ->
|
|
727
|
+
def _more_tags(self) -> dict:
|
|
691
728
|
do_fine_tune = self.get_hyperparameters()["fine_tune"]
|
|
692
729
|
return {
|
|
693
730
|
"allow_nan": True,
|
|
@@ -1,26 +1,23 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import os
|
|
3
|
-
import re
|
|
4
2
|
import time
|
|
5
3
|
from itertools import chain, cycle
|
|
6
|
-
from
|
|
7
|
-
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, List, Literal, Optional
|
|
4
|
+
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Literal, Optional
|
|
8
5
|
|
|
9
6
|
import numpy as np
|
|
10
7
|
import torch
|
|
8
|
+
from chronos.chronos_bolt import ChronosBoltModelForForecasting, ResidualBlock
|
|
11
9
|
from gluonts.dataset.field_names import FieldName
|
|
12
10
|
from gluonts.transform import ExpectedNumInstanceSampler, InstanceSplitter, ValidationSplitSampler
|
|
13
11
|
from torch.utils.data import IterableDataset
|
|
14
12
|
from transformers import TrainerCallback
|
|
15
13
|
|
|
16
|
-
from autogluon.common.loaders.load_s3 import download, list_bucket_prefix_suffix_contains_s3
|
|
17
14
|
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
|
18
|
-
from autogluon.timeseries.dataset
|
|
15
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
19
16
|
from autogluon.timeseries.models.gluonts.dataset import SimpleGluonTSDataset
|
|
20
17
|
|
|
21
18
|
if TYPE_CHECKING:
|
|
22
19
|
# TODO: fix the underlying reason for this circular import, the pipeline should handle tokenization
|
|
23
|
-
from
|
|
20
|
+
from chronos import ChronosTokenizer
|
|
24
21
|
|
|
25
22
|
|
|
26
23
|
logger = logging.getLogger("autogluon.timeseries.models.chronos")
|
|
@@ -73,19 +70,19 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
73
70
|
|
|
74
71
|
Parameters
|
|
75
72
|
----------
|
|
76
|
-
target_df
|
|
73
|
+
target_df
|
|
77
74
|
The ``TimeSeriesDataFrame`` to be converted
|
|
78
|
-
target_column
|
|
75
|
+
target_column
|
|
79
76
|
The name of the column which contains the target time series, by default "target"
|
|
80
|
-
context_length
|
|
77
|
+
context_length
|
|
81
78
|
The length of the historical context
|
|
82
|
-
prediction_length
|
|
79
|
+
prediction_length
|
|
83
80
|
The prediction_length, i.e., length of label or target
|
|
84
|
-
tokenizer
|
|
81
|
+
tokenizer
|
|
85
82
|
When a ``ChronosTokenizer`` object is provided, data will be converted into the
|
|
86
83
|
HuggingFace format accepted by the original Chronos models using this ``ChronosTokenizer``.
|
|
87
84
|
If None, data will be converted into the format accepted by ChronosBolt models.
|
|
88
|
-
mode
|
|
85
|
+
mode
|
|
89
86
|
When ``training``, random slices from the time series will be returned for training purposes.
|
|
90
87
|
If ``validation``, the last slice of each time series returned in the original order.
|
|
91
88
|
"""
|
|
@@ -132,11 +129,11 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
132
129
|
def _create_training_data(self, data: Iterable[dict]):
|
|
133
130
|
data = chain.from_iterable(cycle([data]))
|
|
134
131
|
split_transform = self._create_instance_splitter("training")
|
|
135
|
-
data = split_transform.apply(data, is_train=True)
|
|
132
|
+
data = split_transform.apply(data, is_train=True) # type: ignore
|
|
136
133
|
return data
|
|
137
134
|
|
|
138
135
|
def _create_validation_data(self, data: Iterable[dict]):
|
|
139
|
-
data = self._create_instance_splitter("validation").apply(data, is_train=False)
|
|
136
|
+
data = self._create_instance_splitter("validation").apply(data, is_train=False) # type: ignore
|
|
140
137
|
return data
|
|
141
138
|
|
|
142
139
|
def to_chronos_format(self, entry: dict) -> dict:
|
|
@@ -145,7 +142,7 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
145
142
|
|
|
146
143
|
Parameters
|
|
147
144
|
----------
|
|
148
|
-
entry
|
|
145
|
+
entry
|
|
149
146
|
time series data entry in GluonTS format with ``past_target`` and ``future_target`` keys
|
|
150
147
|
|
|
151
148
|
Returns
|
|
@@ -172,7 +169,7 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
172
169
|
|
|
173
170
|
Parameters
|
|
174
171
|
----------
|
|
175
|
-
entry
|
|
172
|
+
entry
|
|
176
173
|
time series data entry in GluonTS format with ``past_target`` and ``future_target`` keys
|
|
177
174
|
|
|
178
175
|
Returns
|
|
@@ -190,6 +187,8 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
190
187
|
iterable = self._create_training_data(self.gluonts_dataset)
|
|
191
188
|
elif self.mode == "validation":
|
|
192
189
|
iterable = self._create_validation_data(self.gluonts_dataset)
|
|
190
|
+
else:
|
|
191
|
+
raise ValueError(f"Unknown mode {self.mode}")
|
|
193
192
|
|
|
194
193
|
format_transform_fn = self.to_chronos_format if self.tokenizer is not None else self.to_chronos_bolt_format
|
|
195
194
|
for entry in iterable:
|
|
@@ -200,7 +199,7 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
200
199
|
|
|
201
200
|
Parameters
|
|
202
201
|
----------
|
|
203
|
-
shuffle_buffer_size
|
|
202
|
+
shuffle_buffer_size
|
|
204
203
|
The shuffle buffer size used for pseudo shuffling
|
|
205
204
|
"""
|
|
206
205
|
assert shuffle_buffer_size is None or shuffle_buffer_size >= 0
|
|
@@ -209,7 +208,7 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
209
208
|
return PseudoShuffledIterableDataset(self, shuffle_buffer_size)
|
|
210
209
|
|
|
211
210
|
|
|
212
|
-
def left_pad_and_stack_1D(tensors:
|
|
211
|
+
def left_pad_and_stack_1D(tensors: list[torch.Tensor]) -> torch.Tensor:
|
|
213
212
|
max_len = max(len(c) for c in tensors)
|
|
214
213
|
padded = []
|
|
215
214
|
for c in tensors:
|
|
@@ -220,27 +219,6 @@ def left_pad_and_stack_1D(tensors: List[torch.Tensor]) -> torch.Tensor:
|
|
|
220
219
|
return torch.stack(padded)
|
|
221
220
|
|
|
222
221
|
|
|
223
|
-
def cache_model_from_s3(s3_uri: str, force=False):
|
|
224
|
-
if re.match("^s3://([^/]+)/(.*?([^/]+)/?)$", s3_uri) is None:
|
|
225
|
-
raise ValueError(f"Not a valid S3 URI: {s3_uri}")
|
|
226
|
-
|
|
227
|
-
# we expect the prefix to point to a "directory" on S3
|
|
228
|
-
if not s3_uri.endswith("/"):
|
|
229
|
-
s3_uri += "/"
|
|
230
|
-
|
|
231
|
-
cache_home = Path(os.environ.get("XDG_CACHE_HOME") or Path.home() / ".cache")
|
|
232
|
-
bucket, prefix = s3_uri.replace("s3://", "").split("/", 1)
|
|
233
|
-
bucket_cache_path = cache_home / "autogluon" / "timeseries" / bucket
|
|
234
|
-
|
|
235
|
-
for obj_path in list_bucket_prefix_suffix_contains_s3(bucket=bucket, prefix=prefix):
|
|
236
|
-
destination_path = bucket_cache_path / obj_path
|
|
237
|
-
if not force and destination_path.exists():
|
|
238
|
-
continue
|
|
239
|
-
download(bucket, obj_path, local_path=str(destination_path))
|
|
240
|
-
|
|
241
|
-
return str(bucket_cache_path / prefix)
|
|
242
|
-
|
|
243
|
-
|
|
244
222
|
class ChronosInferenceDataset:
|
|
245
223
|
"""A container for time series datasets that implements the ``torch.utils.data.Dataset`` interface"""
|
|
246
224
|
|
|
@@ -280,7 +258,7 @@ class ChronosInferenceDataLoader(torch.utils.data.DataLoader):
|
|
|
280
258
|
self.callback: Callable = kwargs.pop("on_batch", lambda: None)
|
|
281
259
|
super().__init__(*args, **kwargs)
|
|
282
260
|
|
|
283
|
-
def __iter__(self):
|
|
261
|
+
def __iter__(self): # type: ignore
|
|
284
262
|
for item in super().__iter__():
|
|
285
263
|
yield item
|
|
286
264
|
self.callback()
|
|
@@ -297,13 +275,13 @@ class EvaluateAndSaveFinalStepCallback(TrainerCallback):
|
|
|
297
275
|
|
|
298
276
|
|
|
299
277
|
class TimeLimitCallback(TrainerCallback):
|
|
300
|
-
def __init__(self, time_limit:
|
|
278
|
+
def __init__(self, time_limit: float):
|
|
301
279
|
"""
|
|
302
280
|
Callback to stop training once a specified time has elapsed.
|
|
303
281
|
|
|
304
282
|
Parameters
|
|
305
283
|
----------
|
|
306
|
-
time_limit
|
|
284
|
+
time_limit
|
|
307
285
|
maximum time allowed for training in seconds.
|
|
308
286
|
"""
|
|
309
287
|
self.time_limit = time_limit
|
|
@@ -321,7 +299,8 @@ class TimeLimitCallback(TrainerCallback):
|
|
|
321
299
|
|
|
322
300
|
class LoggerCallback(TrainerCallback):
|
|
323
301
|
def on_log(self, args, state, control, logs=None, **kwargs):
|
|
324
|
-
logs
|
|
302
|
+
if logs:
|
|
303
|
+
logs.pop("total_flos", None)
|
|
325
304
|
if state.is_local_process_zero:
|
|
326
305
|
logger.info(logs)
|
|
327
306
|
|
|
@@ -335,3 +314,56 @@ def timeout_callback(seconds: Optional[float]) -> Callable:
|
|
|
335
314
|
raise TimeLimitExceeded
|
|
336
315
|
|
|
337
316
|
return callback
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def update_output_quantiles(model: ChronosBoltModelForForecasting, new_quantiles: list[float]) -> None:
|
|
320
|
+
"""In-place updates model's output layer to support only the specified new quantiles by copying
|
|
321
|
+
weights from closest existing quantiles.
|
|
322
|
+
"""
|
|
323
|
+
old_quantiles = model.chronos_config.quantiles
|
|
324
|
+
new_quantiles = sorted(new_quantiles)
|
|
325
|
+
|
|
326
|
+
if new_quantiles == old_quantiles:
|
|
327
|
+
return
|
|
328
|
+
|
|
329
|
+
model.chronos_config.quantiles = new_quantiles
|
|
330
|
+
model.num_quantiles = len(new_quantiles)
|
|
331
|
+
model.register_buffer("quantiles", torch.tensor(new_quantiles, dtype=model.dtype), persistent=False)
|
|
332
|
+
|
|
333
|
+
old_output_layer = model.output_patch_embedding
|
|
334
|
+
new_output_layer = ResidualBlock(
|
|
335
|
+
in_dim=model.config.d_model,
|
|
336
|
+
h_dim=model.config.d_ff,
|
|
337
|
+
out_dim=len(new_quantiles) * model.chronos_config.prediction_length,
|
|
338
|
+
act_fn_name=model.config.dense_act_fn,
|
|
339
|
+
dropout_p=model.config.dropout_rate,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# hidden_layer is shared across all quantiles
|
|
343
|
+
new_output_layer.hidden_layer.weight.data.copy_(old_output_layer.hidden_layer.weight.data)
|
|
344
|
+
if old_output_layer.hidden_layer.bias is not None:
|
|
345
|
+
new_output_layer.hidden_layer.bias.data.copy_(old_output_layer.hidden_layer.bias.data)
|
|
346
|
+
|
|
347
|
+
def copy_quantile_weights(src_idx: int, dst_idx: int):
|
|
348
|
+
"""Copy weights for one quantile from src_idx to dst_idx"""
|
|
349
|
+
prediction_length = model.chronos_config.prediction_length
|
|
350
|
+
src_start, src_end = src_idx * prediction_length, (src_idx + 1) * prediction_length
|
|
351
|
+
dst_start, dst_end = dst_idx * prediction_length, (dst_idx + 1) * prediction_length
|
|
352
|
+
|
|
353
|
+
for layer_name in ["output_layer", "residual_layer"]:
|
|
354
|
+
old_layer_attr = getattr(old_output_layer, layer_name)
|
|
355
|
+
new_layer_attr = getattr(new_output_layer, layer_name)
|
|
356
|
+
|
|
357
|
+
new_layer_attr.weight[dst_start:dst_end] = old_layer_attr.weight[src_start:src_end]
|
|
358
|
+
if old_layer_attr.bias is not None:
|
|
359
|
+
new_layer_attr.bias[dst_start:dst_end] = old_layer_attr.bias[src_start:src_end]
|
|
360
|
+
|
|
361
|
+
with torch.no_grad():
|
|
362
|
+
for new_idx, new_q in enumerate(new_quantiles):
|
|
363
|
+
closest_q = min(old_quantiles, key=lambda x: abs(x - new_q))
|
|
364
|
+
closest_idx = old_quantiles.index(closest_q)
|
|
365
|
+
copy_quantile_weights(closest_idx, new_idx)
|
|
366
|
+
|
|
367
|
+
model.output_patch_embedding = new_output_layer
|
|
368
|
+
model.config.chronos_config["quantiles"] = new_quantiles
|
|
369
|
+
model.chronos_config.quantiles = new_quantiles
|
|
@@ -1,3 +1,30 @@
|
|
|
1
1
|
from .abstract import AbstractTimeSeriesEnsembleModel
|
|
2
|
-
from .
|
|
3
|
-
from .
|
|
2
|
+
from .array_based import MedianEnsemble, PerQuantileTabularEnsemble, TabularEnsemble
|
|
3
|
+
from .weighted import GreedyEnsemble, PerformanceWeightedEnsemble, SimpleAverageEnsemble
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_ensemble_class(name: str):
|
|
7
|
+
mapping = {
|
|
8
|
+
"GreedyEnsemble": GreedyEnsemble,
|
|
9
|
+
"PerformanceWeightedEnsemble": PerformanceWeightedEnsemble,
|
|
10
|
+
"SimpleAverageEnsemble": SimpleAverageEnsemble,
|
|
11
|
+
"WeightedEnsemble": GreedyEnsemble, # old alias for this model
|
|
12
|
+
"MedianEnsemble": MedianEnsemble,
|
|
13
|
+
"TabularEnsemble": TabularEnsemble,
|
|
14
|
+
"PerQuantileTabularEnsemble": PerQuantileTabularEnsemble,
|
|
15
|
+
}
|
|
16
|
+
if name not in mapping:
|
|
17
|
+
raise ValueError(f"Unknown ensemble type: {name}. Available: {list(mapping.keys())}")
|
|
18
|
+
return mapping[name]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"AbstractTimeSeriesEnsembleModel",
|
|
23
|
+
"GreedyEnsemble",
|
|
24
|
+
"MedianEnsemble",
|
|
25
|
+
"PerformanceWeightedEnsemble",
|
|
26
|
+
"PerQuantileTabularEnsemble",
|
|
27
|
+
"SimpleAverageEnsemble",
|
|
28
|
+
"TabularEnsemble",
|
|
29
|
+
"get_ensemble_class",
|
|
30
|
+
]
|