autogluon.timeseries 1.4.1b20250907__py3-none-any.whl → 1.5.1b20260122__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/hyperparameter_presets.py +13 -28
- autogluon/timeseries/configs/predictor_presets.py +23 -39
- autogluon/timeseries/dataset/ts_dataframe.py +97 -86
- autogluon/timeseries/learner.py +70 -35
- autogluon/timeseries/metrics/__init__.py +4 -4
- autogluon/timeseries/metrics/abstract.py +8 -8
- autogluon/timeseries/metrics/point.py +9 -9
- autogluon/timeseries/metrics/quantile.py +5 -5
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +4 -1
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -50
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +8 -8
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
- autogluon/timeseries/models/autogluon_tabular/per_step.py +27 -16
- autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +395 -0
- autogluon/timeseries/models/chronos/model.py +127 -89
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +69 -37
- autogluon/timeseries/models/ensemble/__init__.py +36 -2
- autogluon/timeseries/models/ensemble/abstract.py +14 -46
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
- autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
- autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +25 -22
- autogluon/timeseries/models/ensemble/weighted/greedy.py +64 -0
- autogluon/timeseries/models/gluonts/abstract.py +32 -31
- autogluon/timeseries/models/gluonts/dataset.py +11 -11
- autogluon/timeseries/models/gluonts/models.py +0 -7
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +15 -18
- autogluon/timeseries/models/local/naive.py +2 -2
- autogluon/timeseries/models/local/npts.py +7 -1
- autogluon/timeseries/models/local/statsforecast.py +13 -13
- autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
- autogluon/timeseries/models/registry.py +3 -4
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
- autogluon/timeseries/models/toto/model.py +249 -0
- autogluon/timeseries/predictor.py +541 -162
- autogluon/timeseries/regressor.py +27 -30
- autogluon/timeseries/splitter.py +3 -27
- autogluon/timeseries/trainer/ensemble_composer.py +444 -0
- autogluon/timeseries/trainer/model_set_builder.py +9 -9
- autogluon/timeseries/trainer/prediction_cache.py +16 -16
- autogluon/timeseries/trainer/trainer.py +300 -279
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/covariate_scaler.py +8 -8
- autogluon/timeseries/transforms/target_scaler.py +15 -15
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +1 -3
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/features.py +31 -14
- autogluon/timeseries/utils/forecast.py +6 -7
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.5.1b20260122-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info}/METADATA +39 -22
- autogluon_timeseries-1.5.1b20260122.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info}/WHEEL +1 -1
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
- autogluon.timeseries-1.4.1b20250907-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.4.1b20250907.dist-info/RECORD +0 -75
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info}/zip-safe +0 -0
|
@@ -3,14 +3,15 @@ import os
|
|
|
3
3
|
import shutil
|
|
4
4
|
import warnings
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
10
|
+
from typing_extensions import Self
|
|
10
11
|
|
|
11
12
|
from autogluon.common.loaders import load_pkl
|
|
12
13
|
from autogluon.common.space import Space
|
|
13
|
-
from autogluon.timeseries.dataset
|
|
14
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
14
15
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
15
16
|
from autogluon.timeseries.utils.warning_filters import disable_duplicate_logs, warning_filter
|
|
16
17
|
|
|
@@ -81,31 +82,40 @@ MODEL_ALIASES = {
|
|
|
81
82
|
|
|
82
83
|
|
|
83
84
|
class ChronosModel(AbstractTimeSeriesModel):
|
|
84
|
-
"""Chronos [Ansari2024]_ pretrained time series forecasting models which can be used for zero-shot
|
|
85
|
-
in a task-specific manner.
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
85
|
+
"""Chronos [Ansari2024]_ pretrained time series forecasting models which can be used for zero-shot
|
|
86
|
+
forecasting or fine-tuned in a task-specific manner.
|
|
87
|
+
|
|
88
|
+
Models can be based on the original
|
|
89
|
+
`Chronos <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos.py>`_
|
|
90
|
+
implementation, as well as a newer family of
|
|
91
|
+
`Chronos-Bolt <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos_bolt.py>`_
|
|
92
|
+
models capable of much faster inference.
|
|
93
|
+
|
|
94
|
+
The original Chronos is a family of pretrained models, based on the T5 family, with number of
|
|
95
|
+
parameters ranging between 8M and 710M. The full collection of Chronos models is available on
|
|
96
|
+
`Hugging Face <https://huggingface.co/collections/amazon/chronos-models-65f1791d630a8d57cb718444>`_.
|
|
97
|
+
|
|
98
|
+
For Chronos (original) ``small``, ``base``, and ``large`` variants a GPU is required to
|
|
99
|
+
perform inference efficiently. Chronos takes a minimalistic approach to pretraining time series
|
|
100
|
+
models, by discretizing time series data directly into bins which are treated as tokens,
|
|
101
|
+
effectively performing regression by classification. This results in a simple and flexible
|
|
102
|
+
framework for using any language model in the context of time series forecasting.
|
|
103
|
+
See [Ansari2024]_ for more information.
|
|
104
|
+
|
|
105
|
+
The newer Chronos-Bolt variants enable much faster inference by first "patching" the time series.
|
|
106
|
+
The resulting time series is then fed into a T5 model for forecasting. The Chronos-Bolt variants
|
|
107
|
+
are capable of much faster inference, and can all run on CPUs.
|
|
108
|
+
|
|
109
|
+
Both Chronos and Chronos-Bolt variants can be fine-tuned by setting ``fine_tune=True`` and selecting
|
|
110
|
+
appropriate fine-tuning parameters such as the learning rate (``fine_tune_lr``) and max steps
|
|
111
|
+
(``fine_tune_steps``).
|
|
103
112
|
|
|
104
113
|
References
|
|
105
114
|
----------
|
|
106
115
|
.. [Ansari2024] Ansari, Abdul Fatir, Stella, Lorenzo et al.
|
|
107
116
|
"Chronos: Learning the Language of Time Series."
|
|
108
|
-
|
|
117
|
+
Transactions on Machine Learning Research (2024).
|
|
118
|
+
https://openreview.net/forum?id=gerNCVqqtR
|
|
109
119
|
|
|
110
120
|
|
|
111
121
|
Other Parameters
|
|
@@ -117,36 +127,47 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
117
127
|
``tiny``, ``mini`` , ``small``, ``base``, and ``large``. Chronos-Bolt models can be specified
|
|
118
128
|
with ``bolt_tiny``, ``bolt_mini``, ``bolt_small``, and ``bolt_base``.
|
|
119
129
|
batch_size : int, default = 256
|
|
120
|
-
Size of batches used during inference.
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
130
|
+
Size of batches used during inference.
|
|
131
|
+
|
|
132
|
+
The default ``batch_size`` is selected based on the model type. Chronos (original) models use a
|
|
133
|
+
``batch_size`` of 16, except Chronos (Large) which uses 8.
|
|
134
|
+
|
|
135
|
+
For Chronos-Bolt models the ``batch_size`` is set to 256. However, ``batch_size`` is reduced by
|
|
136
|
+
a factor of 4 when the prediction horizon is greater than the model's
|
|
137
|
+
default prediction length.
|
|
124
138
|
num_samples : int, default = 20
|
|
125
139
|
Number of samples used during inference, only used for the original Chronos models
|
|
126
140
|
device : str, default = None
|
|
127
|
-
Device to use for inference (and fine-tuning, if enabled). If None, model will use the GPU if
|
|
128
|
-
For larger Chronos model sizes ``small``, ``base``, and ``large``; inference will fail
|
|
129
|
-
|
|
130
|
-
|
|
141
|
+
Device to use for inference (and fine-tuning, if enabled). If None, model will use the GPU if
|
|
142
|
+
available. For larger Chronos model sizes ``small``, ``base``, and ``large``; inference will fail
|
|
143
|
+
if no GPU is available.
|
|
144
|
+
|
|
145
|
+
For Chronos-Bolt models, inference can be performed on the CPU. Although fine-tuning the smaller
|
|
146
|
+
Chronos models (``tiny`` and ``mini``) and all Chronos-Bolt is allowed on the CPU, we recommend
|
|
147
|
+
using a GPU for faster fine-tuning.
|
|
131
148
|
context_length : int or None, default = None
|
|
132
|
-
The context length to use in the model.
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
the model
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
149
|
+
The context length to use in the model.
|
|
150
|
+
|
|
151
|
+
Shorter context lengths will decrease model accuracy, but result in faster inference. If None,
|
|
152
|
+
the model will infer context length from the data set length at inference time, but cap it at a
|
|
153
|
+
maximum of 2048.
|
|
154
|
+
|
|
155
|
+
Note that this is only the context length used to pass data into the model. Individual model
|
|
156
|
+
implementations may have different context lengths specified in their configuration, and may
|
|
157
|
+
truncate the context further. For example, original Chronos models have a context length of 512,
|
|
158
|
+
but Chronos-Bolt models handle contexts up to 2048.
|
|
159
|
+
torch_dtype : torch.dtype or {"auto", "bfloat16", "float32"}, default = "auto"
|
|
160
|
+
Torch data type for model weights, provided to ``from_pretrained`` method of Hugging Face
|
|
161
|
+
AutoModels. If original Chronos models are specified and the model size is ``small``, ``base``,
|
|
162
|
+
or ``large``, the ``torch_dtype`` will be set to ``bfloat16`` to enable inference on GPUs.
|
|
142
163
|
data_loader_num_workers : int, default = 0
|
|
143
|
-
Number of worker processes to be used in the data loader. See documentation on
|
|
144
|
-
for more information.
|
|
164
|
+
Number of worker processes to be used in the data loader. See documentation on
|
|
165
|
+
``torch.utils.data.DataLoader`` for more information.
|
|
145
166
|
fine_tune : bool, default = False
|
|
146
167
|
If True, the pretrained model will be fine-tuned
|
|
147
168
|
fine_tune_lr : float, default = 1e-5
|
|
148
|
-
The learning rate used for fine-tuning. This default is suitable for Chronos-Bolt models; for
|
|
149
|
-
Chronos models, we recommend using a higher learning rate such as ``1e-4
|
|
169
|
+
The learning rate used for fine-tuning. This default is suitable for Chronos-Bolt models; for
|
|
170
|
+
the original Chronos models, we recommend using a higher learning rate such as ``1e-4``.
|
|
150
171
|
fine_tune_steps : int, default = 1000
|
|
151
172
|
The number of gradient update steps to fine-tune for
|
|
152
173
|
fine_tune_batch_size : int, default = 32
|
|
@@ -155,8 +176,9 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
155
176
|
The size of the shuffle buffer to shuffle the data during fine-tuning. If None, shuffling will
|
|
156
177
|
be turned off.
|
|
157
178
|
eval_during_fine_tune : bool, default = False
|
|
158
|
-
If True, validation will be performed during fine-tuning to select the best checkpoint.
|
|
159
|
-
|
|
179
|
+
If True, validation will be performed during fine-tuning to select the best checkpoint. Setting this
|
|
180
|
+
argument to True may result in slower fine-tuning. This parameter is ignored if ``skip_model_selection=True``
|
|
181
|
+
in ``TimeSeriesPredictor.fit``.
|
|
160
182
|
fine_tune_eval_max_items : int, default = 256
|
|
161
183
|
The maximum number of randomly-sampled time series to use from the validation set for evaluation
|
|
162
184
|
during fine-tuning. If None, the entire validation dataset will be used.
|
|
@@ -164,11 +186,12 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
164
186
|
Extra keyword arguments passed to ``transformers.TrainingArguments``
|
|
165
187
|
keep_transformers_logs : bool, default = False
|
|
166
188
|
If True, the logs generated by transformers will NOT be removed after fine-tuning
|
|
189
|
+
revision : str, default = None
|
|
190
|
+
Model revision to use (branch name or commit hash). If None, the default branch (usually "main") is used.
|
|
167
191
|
"""
|
|
168
192
|
|
|
169
193
|
ag_priority = 55
|
|
170
|
-
# default number of samples for prediction
|
|
171
|
-
default_num_samples: int = 20
|
|
194
|
+
default_num_samples: int = 20 # default number of samples for prediction
|
|
172
195
|
default_model_path = "autogluon/chronos-bolt-small"
|
|
173
196
|
default_max_time_limit_ratio = 0.8
|
|
174
197
|
maximum_context_length = 2048
|
|
@@ -176,18 +199,18 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
176
199
|
|
|
177
200
|
def __init__(
|
|
178
201
|
self,
|
|
179
|
-
freq:
|
|
202
|
+
freq: str | None = None,
|
|
180
203
|
prediction_length: int = 1,
|
|
181
|
-
path:
|
|
182
|
-
name:
|
|
183
|
-
eval_metric:
|
|
184
|
-
hyperparameters:
|
|
204
|
+
path: str | None = None,
|
|
205
|
+
name: str | None = None,
|
|
206
|
+
eval_metric: str | None = None,
|
|
207
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
185
208
|
**kwargs, # noqa
|
|
186
209
|
):
|
|
187
210
|
hyperparameters = hyperparameters if hyperparameters is not None else {}
|
|
188
211
|
|
|
189
|
-
model_path_input = hyperparameters.get("model_path", self.default_model_path)
|
|
190
|
-
self.model_path = MODEL_ALIASES.get(model_path_input, model_path_input)
|
|
212
|
+
model_path_input: str = hyperparameters.get("model_path", self.default_model_path)
|
|
213
|
+
self.model_path: str = MODEL_ALIASES.get(model_path_input, model_path_input)
|
|
191
214
|
|
|
192
215
|
name = name if name is not None else "Chronos"
|
|
193
216
|
if not isinstance(model_path_input, Space):
|
|
@@ -206,9 +229,9 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
206
229
|
**kwargs,
|
|
207
230
|
)
|
|
208
231
|
|
|
209
|
-
self._model_pipeline:
|
|
232
|
+
self._model_pipeline: Any | None = None # of type BaseChronosPipeline
|
|
210
233
|
|
|
211
|
-
def save(self, path:
|
|
234
|
+
def save(self, path: str | None = None, verbose: bool = True) -> str:
|
|
212
235
|
pipeline = self._model_pipeline
|
|
213
236
|
self._model_pipeline = None
|
|
214
237
|
path = super().save(path=path, verbose=verbose)
|
|
@@ -217,7 +240,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
217
240
|
return str(path)
|
|
218
241
|
|
|
219
242
|
@classmethod
|
|
220
|
-
def load(cls, path: str, reset_paths: bool = True, verbose: bool = True) ->
|
|
243
|
+
def load(cls, path: str, reset_paths: bool = True, load_oof: bool = False, verbose: bool = True) -> Self:
|
|
221
244
|
model = load_pkl.load(path=os.path.join(path, cls.model_file_name), verbose=verbose)
|
|
222
245
|
if reset_paths:
|
|
223
246
|
model.set_contexts(path)
|
|
@@ -272,15 +295,15 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
272
295
|
"""
|
|
273
296
|
return self.ag_default_config.get("default_torch_dtype", "auto")
|
|
274
297
|
|
|
275
|
-
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str,
|
|
276
|
-
minimum_resources: dict[str,
|
|
298
|
+
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
|
|
299
|
+
minimum_resources: dict[str, int | float] = {"num_cpus": 1}
|
|
277
300
|
# if GPU is available, we train with 1 GPU per trial
|
|
278
301
|
if is_gpu_available:
|
|
279
302
|
minimum_resources["num_gpus"] = self.min_num_gpus
|
|
280
303
|
return minimum_resources
|
|
281
304
|
|
|
282
305
|
def load_model_pipeline(self, is_training: bool = False):
|
|
283
|
-
from
|
|
306
|
+
from chronos import BaseChronosPipeline
|
|
284
307
|
|
|
285
308
|
gpu_available = self._is_gpu_available()
|
|
286
309
|
|
|
@@ -291,12 +314,14 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
291
314
|
"`import torch; torch.cuda.is_available()` returns `True`."
|
|
292
315
|
)
|
|
293
316
|
|
|
294
|
-
device = self.device or
|
|
317
|
+
device = (self.device or "cuda") if gpu_available else "cpu"
|
|
295
318
|
|
|
319
|
+
assert self.model_path is not None
|
|
296
320
|
pipeline = BaseChronosPipeline.from_pretrained(
|
|
297
321
|
self.model_path,
|
|
298
322
|
device_map=device,
|
|
299
323
|
torch_dtype=self.torch_dtype,
|
|
324
|
+
revision=self.get_hyperparameter("revision"),
|
|
300
325
|
)
|
|
301
326
|
|
|
302
327
|
self._model_pipeline = pipeline
|
|
@@ -339,6 +364,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
339
364
|
"eval_during_fine_tune": False,
|
|
340
365
|
"fine_tune_eval_max_items": 256,
|
|
341
366
|
"fine_tune_shuffle_buffer_size": 10_000,
|
|
367
|
+
"revision": None,
|
|
342
368
|
}
|
|
343
369
|
|
|
344
370
|
@property
|
|
@@ -360,6 +386,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
360
386
|
"fine_tune_eval_max_items",
|
|
361
387
|
"fine_tune_trainer_kwargs",
|
|
362
388
|
"keep_transformers_logs",
|
|
389
|
+
"revision",
|
|
363
390
|
]
|
|
364
391
|
|
|
365
392
|
def _get_fine_tune_trainer_kwargs(self, init_args, eval_during_fine_tune: bool):
|
|
@@ -416,20 +443,24 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
416
443
|
def _fit(
|
|
417
444
|
self,
|
|
418
445
|
train_data: TimeSeriesDataFrame,
|
|
419
|
-
val_data:
|
|
420
|
-
time_limit:
|
|
446
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
447
|
+
time_limit: float | None = None,
|
|
448
|
+
num_cpus: int | None = None,
|
|
449
|
+
num_gpus: int | None = None,
|
|
450
|
+
verbosity: int = 2,
|
|
421
451
|
**kwargs,
|
|
422
452
|
) -> None:
|
|
423
453
|
import transformers
|
|
454
|
+
from chronos import ChronosBoltPipeline, ChronosPipeline
|
|
424
455
|
from packaging import version
|
|
425
456
|
from transformers.trainer import PrinterCallback, Trainer, TrainingArguments
|
|
426
457
|
|
|
427
|
-
from .
|
|
428
|
-
from .pipeline.utils import (
|
|
458
|
+
from .utils import (
|
|
429
459
|
ChronosFineTuningDataset,
|
|
430
460
|
EvaluateAndSaveFinalStepCallback,
|
|
431
461
|
LoggerCallback,
|
|
432
462
|
TimeLimitCallback,
|
|
463
|
+
update_output_quantiles,
|
|
433
464
|
)
|
|
434
465
|
|
|
435
466
|
# TODO: Add support for fine-tuning models with context_length longer than the pretrained model
|
|
@@ -437,11 +468,10 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
437
468
|
# verbosity < 3: all logs and warnings from transformers will be suppressed
|
|
438
469
|
# verbosity >= 3: progress bar and loss logs will be logged
|
|
439
470
|
# verbosity 4: everything will be logged
|
|
440
|
-
verbosity = kwargs.get("verbosity", 2)
|
|
441
471
|
for logger_name in logging.root.manager.loggerDict:
|
|
442
472
|
if "transformers" in logger_name:
|
|
443
473
|
transformers_logger = logging.getLogger(logger_name)
|
|
444
|
-
transformers_logger.setLevel(logging.ERROR if verbosity <= 3 else logging.
|
|
474
|
+
transformers_logger.setLevel(logging.ERROR if verbosity <= 3 else logging.WARNING)
|
|
445
475
|
|
|
446
476
|
self._check_fit_params()
|
|
447
477
|
self._log_unused_hyperparameters()
|
|
@@ -486,7 +516,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
486
516
|
f"Fine-tuning prediction_length has been changed to {fine_tune_prediction_length}."
|
|
487
517
|
)
|
|
488
518
|
if self.quantile_levels != self.model_pipeline.quantiles:
|
|
489
|
-
self.model_pipeline.model
|
|
519
|
+
update_output_quantiles(self.model_pipeline.model, self.quantile_levels)
|
|
490
520
|
logger.info(f"\tChronos-Bolt will be fine-tuned with quantile_levels={self.quantile_levels}")
|
|
491
521
|
else:
|
|
492
522
|
raise ValueError(f"Unsupported model pipeline: {type(self.model_pipeline)}")
|
|
@@ -517,7 +547,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
517
547
|
# transformers changed the argument name from `evaluation_strategy` to `eval_strategy`
|
|
518
548
|
fine_tune_trainer_kwargs["eval_strategy"] = fine_tune_trainer_kwargs.pop("evaluation_strategy")
|
|
519
549
|
|
|
520
|
-
training_args = TrainingArguments(**fine_tune_trainer_kwargs, **pipeline_specific_trainer_kwargs)
|
|
550
|
+
training_args = TrainingArguments(**fine_tune_trainer_kwargs, **pipeline_specific_trainer_kwargs) # type: ignore
|
|
521
551
|
tokenizer_train_dataset = ChronosFineTuningDataset(
|
|
522
552
|
target_df=train_data,
|
|
523
553
|
target_column=self.target,
|
|
@@ -533,6 +563,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
533
563
|
if time_limit is not None:
|
|
534
564
|
callbacks.append(TimeLimitCallback(time_limit=time_limit))
|
|
535
565
|
|
|
566
|
+
tokenizer_val_dataset: ChronosFineTuningDataset | None = None
|
|
536
567
|
if val_data is not None:
|
|
537
568
|
callbacks.append(EvaluateAndSaveFinalStepCallback())
|
|
538
569
|
# evaluate on a randomly-sampled subset
|
|
@@ -548,6 +579,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
548
579
|
)
|
|
549
580
|
val_data = val_data.loc[eval_items]
|
|
550
581
|
|
|
582
|
+
assert isinstance(val_data, TimeSeriesDataFrame)
|
|
551
583
|
tokenizer_val_dataset = ChronosFineTuningDataset(
|
|
552
584
|
target_df=val_data,
|
|
553
585
|
target_column=self.target,
|
|
@@ -561,7 +593,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
561
593
|
model=self.model_pipeline.inner_model,
|
|
562
594
|
args=training_args,
|
|
563
595
|
train_dataset=tokenizer_train_dataset,
|
|
564
|
-
eval_dataset=tokenizer_val_dataset
|
|
596
|
+
eval_dataset=tokenizer_val_dataset,
|
|
565
597
|
callbacks=callbacks,
|
|
566
598
|
)
|
|
567
599
|
|
|
@@ -572,7 +604,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
572
604
|
if verbosity >= 3:
|
|
573
605
|
logger.warning(
|
|
574
606
|
"Transformers logging is turned on during fine-tuning. Note that losses reported by transformers "
|
|
575
|
-
"
|
|
607
|
+
"do not correspond to those specified via `eval_metric`."
|
|
576
608
|
)
|
|
577
609
|
trainer.add_callback(LoggerCallback())
|
|
578
610
|
|
|
@@ -592,9 +624,9 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
592
624
|
context_length: int,
|
|
593
625
|
batch_size: int,
|
|
594
626
|
num_workers: int = 0,
|
|
595
|
-
time_limit:
|
|
627
|
+
time_limit: float | None = None,
|
|
596
628
|
):
|
|
597
|
-
from .
|
|
629
|
+
from .utils import ChronosInferenceDataLoader, ChronosInferenceDataset, timeout_callback
|
|
598
630
|
|
|
599
631
|
chronos_dataset = ChronosInferenceDataset(
|
|
600
632
|
target_df=data,
|
|
@@ -607,7 +639,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
607
639
|
batch_size=batch_size,
|
|
608
640
|
shuffle=False,
|
|
609
641
|
num_workers=num_workers,
|
|
610
|
-
|
|
642
|
+
after_batch=timeout_callback(seconds=time_limit),
|
|
611
643
|
)
|
|
612
644
|
|
|
613
645
|
def _get_context_length(self, data: TimeSeriesDataFrame) -> int:
|
|
@@ -620,10 +652,10 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
620
652
|
def _predict(
|
|
621
653
|
self,
|
|
622
654
|
data: TimeSeriesDataFrame,
|
|
623
|
-
known_covariates:
|
|
655
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
624
656
|
**kwargs,
|
|
625
657
|
) -> TimeSeriesDataFrame:
|
|
626
|
-
from
|
|
658
|
+
from chronos import ChronosBoltPipeline, ChronosPipeline
|
|
627
659
|
|
|
628
660
|
# We defer initialization of the model pipeline. i.e., the model is only loaded to device memory
|
|
629
661
|
# during inference. We also infer the maximum length of the time series in the inference data set
|
|
@@ -635,20 +667,26 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
635
667
|
# (according to its config.json file) of 512, it will further truncate the series during inference.
|
|
636
668
|
context_length = self._get_context_length(data)
|
|
637
669
|
|
|
670
|
+
extra_predict_kwargs = (
|
|
671
|
+
{"num_samples": self.num_samples} if isinstance(self.model_pipeline, ChronosPipeline) else {}
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
# adapt batch size for Chronos bolt if requested prediction length is longer than model prediction length
|
|
675
|
+
batch_size = self.batch_size
|
|
676
|
+
model_prediction_length = None
|
|
677
|
+
if isinstance(self.model_pipeline, ChronosBoltPipeline):
|
|
678
|
+
model_prediction_length = self.model_pipeline.model.config.chronos_config.get("prediction_length")
|
|
679
|
+
if model_prediction_length and self.prediction_length > model_prediction_length:
|
|
680
|
+
batch_size = max(1, batch_size // 4)
|
|
681
|
+
logger.debug(
|
|
682
|
+
f"\tThe prediction_length {self.prediction_length} exceeds model's prediction_length {model_prediction_length}. "
|
|
683
|
+
f"The inference batch_size has been reduced from {self.batch_size} to {batch_size} to avoid OOM errors."
|
|
684
|
+
)
|
|
685
|
+
|
|
638
686
|
with warning_filter(all_warnings=True):
|
|
639
687
|
import torch
|
|
640
688
|
|
|
641
689
|
self.model_pipeline.model.eval()
|
|
642
|
-
batch_size = self.batch_size
|
|
643
|
-
if (
|
|
644
|
-
isinstance(self.model_pipeline, ChronosBoltPipeline)
|
|
645
|
-
and self.prediction_length > self.model_pipeline.model_prediction_length
|
|
646
|
-
):
|
|
647
|
-
batch_size = max(1, batch_size // 4)
|
|
648
|
-
logger.debug(
|
|
649
|
-
f"\tThe prediction_length {self.prediction_length} exceeds model's prediction_length {self.model_pipeline.model_prediction_length}. "
|
|
650
|
-
f"The inference batch_size has been reduced from {self.batch_size} to {batch_size} to avoid OOM errors."
|
|
651
|
-
)
|
|
652
690
|
|
|
653
691
|
inference_data_loader = self._get_inference_data_loader(
|
|
654
692
|
data=data,
|
|
@@ -666,7 +704,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
666
704
|
batch,
|
|
667
705
|
prediction_length=self.prediction_length,
|
|
668
706
|
quantile_levels=self.quantile_levels,
|
|
669
|
-
|
|
707
|
+
**extra_predict_kwargs,
|
|
670
708
|
)
|
|
671
709
|
except torch.OutOfMemoryError as ex:
|
|
672
710
|
logger.error(
|
|
@@ -692,7 +730,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
692
730
|
return TimeSeriesDataFrame(df)
|
|
693
731
|
|
|
694
732
|
def _more_tags(self) -> dict:
|
|
695
|
-
do_fine_tune = self.
|
|
733
|
+
do_fine_tune = self.get_hyperparameter("fine_tune")
|
|
696
734
|
return {
|
|
697
735
|
"allow_nan": True,
|
|
698
736
|
"can_use_train_data": do_fine_tune,
|
|
@@ -1,26 +1,23 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import os
|
|
3
|
-
import re
|
|
4
2
|
import time
|
|
5
3
|
from itertools import chain, cycle
|
|
6
|
-
from
|
|
7
|
-
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Literal, Optional
|
|
4
|
+
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Literal
|
|
8
5
|
|
|
9
6
|
import numpy as np
|
|
10
7
|
import torch
|
|
8
|
+
from chronos.chronos_bolt import ChronosBoltModelForForecasting, ResidualBlock
|
|
11
9
|
from gluonts.dataset.field_names import FieldName
|
|
12
10
|
from gluonts.transform import ExpectedNumInstanceSampler, InstanceSplitter, ValidationSplitSampler
|
|
13
11
|
from torch.utils.data import IterableDataset
|
|
14
12
|
from transformers import TrainerCallback
|
|
15
13
|
|
|
16
|
-
from autogluon.common.loaders.load_s3 import download, list_bucket_prefix_suffix_contains_s3
|
|
17
14
|
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
|
18
|
-
from autogluon.timeseries.dataset
|
|
15
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
19
16
|
from autogluon.timeseries.models.gluonts.dataset import SimpleGluonTSDataset
|
|
20
17
|
|
|
21
18
|
if TYPE_CHECKING:
|
|
22
19
|
# TODO: fix the underlying reason for this circular import, the pipeline should handle tokenization
|
|
23
|
-
from
|
|
20
|
+
from chronos import ChronosTokenizer
|
|
24
21
|
|
|
25
22
|
|
|
26
23
|
logger = logging.getLogger("autogluon.timeseries.models.chronos")
|
|
@@ -96,7 +93,7 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
96
93
|
target_column: str = "target",
|
|
97
94
|
context_length: int = 512,
|
|
98
95
|
prediction_length: int = 64,
|
|
99
|
-
tokenizer:
|
|
96
|
+
tokenizer: "ChronosTokenizer | None" = None,
|
|
100
97
|
mode: Literal["training", "validation"] = "training",
|
|
101
98
|
) -> None:
|
|
102
99
|
super().__init__()
|
|
@@ -132,11 +129,11 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
132
129
|
def _create_training_data(self, data: Iterable[dict]):
|
|
133
130
|
data = chain.from_iterable(cycle([data]))
|
|
134
131
|
split_transform = self._create_instance_splitter("training")
|
|
135
|
-
data = split_transform.apply(data, is_train=True)
|
|
132
|
+
data = split_transform.apply(data, is_train=True) # type: ignore
|
|
136
133
|
return data
|
|
137
134
|
|
|
138
135
|
def _create_validation_data(self, data: Iterable[dict]):
|
|
139
|
-
data = self._create_instance_splitter("validation").apply(data, is_train=False)
|
|
136
|
+
data = self._create_instance_splitter("validation").apply(data, is_train=False) # type: ignore
|
|
140
137
|
return data
|
|
141
138
|
|
|
142
139
|
def to_chronos_format(self, entry: dict) -> dict:
|
|
@@ -190,12 +187,14 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
190
187
|
iterable = self._create_training_data(self.gluonts_dataset)
|
|
191
188
|
elif self.mode == "validation":
|
|
192
189
|
iterable = self._create_validation_data(self.gluonts_dataset)
|
|
190
|
+
else:
|
|
191
|
+
raise ValueError(f"Unknown mode {self.mode}")
|
|
193
192
|
|
|
194
193
|
format_transform_fn = self.to_chronos_format if self.tokenizer is not None else self.to_chronos_bolt_format
|
|
195
194
|
for entry in iterable:
|
|
196
195
|
yield format_transform_fn(entry)
|
|
197
196
|
|
|
198
|
-
def shuffle(self, shuffle_buffer_size:
|
|
197
|
+
def shuffle(self, shuffle_buffer_size: int | None = None):
|
|
199
198
|
"""Returns a (pseudo) shuffled version of this iterable dataset.
|
|
200
199
|
|
|
201
200
|
Parameters
|
|
@@ -220,27 +219,6 @@ def left_pad_and_stack_1D(tensors: list[torch.Tensor]) -> torch.Tensor:
|
|
|
220
219
|
return torch.stack(padded)
|
|
221
220
|
|
|
222
221
|
|
|
223
|
-
def cache_model_from_s3(s3_uri: str, force=False):
|
|
224
|
-
if re.match("^s3://([^/]+)/(.*?([^/]+)/?)$", s3_uri) is None:
|
|
225
|
-
raise ValueError(f"Not a valid S3 URI: {s3_uri}")
|
|
226
|
-
|
|
227
|
-
# we expect the prefix to point to a "directory" on S3
|
|
228
|
-
if not s3_uri.endswith("/"):
|
|
229
|
-
s3_uri += "/"
|
|
230
|
-
|
|
231
|
-
cache_home = Path(os.environ.get("XDG_CACHE_HOME") or Path.home() / ".cache")
|
|
232
|
-
bucket, prefix = s3_uri.replace("s3://", "").split("/", 1)
|
|
233
|
-
bucket_cache_path = cache_home / "autogluon" / "timeseries" / bucket
|
|
234
|
-
|
|
235
|
-
for obj_path in list_bucket_prefix_suffix_contains_s3(bucket=bucket, prefix=prefix):
|
|
236
|
-
destination_path = bucket_cache_path / obj_path
|
|
237
|
-
if not force and destination_path.exists():
|
|
238
|
-
continue
|
|
239
|
-
download(bucket, obj_path, local_path=str(destination_path))
|
|
240
|
-
|
|
241
|
-
return str(bucket_cache_path / prefix)
|
|
242
|
-
|
|
243
|
-
|
|
244
222
|
class ChronosInferenceDataset:
|
|
245
223
|
"""A container for time series datasets that implements the ``torch.utils.data.Dataset`` interface"""
|
|
246
224
|
|
|
@@ -277,10 +255,10 @@ class ChronosInferenceDataset:
|
|
|
277
255
|
|
|
278
256
|
class ChronosInferenceDataLoader(torch.utils.data.DataLoader):
|
|
279
257
|
def __init__(self, *args, **kwargs):
|
|
280
|
-
self.callback: Callable = kwargs.pop("
|
|
258
|
+
self.callback: Callable = kwargs.pop("after_batch", lambda: None)
|
|
281
259
|
super().__init__(*args, **kwargs)
|
|
282
260
|
|
|
283
|
-
def __iter__(self):
|
|
261
|
+
def __iter__(self): # type: ignore
|
|
284
262
|
for item in super().__iter__():
|
|
285
263
|
yield item
|
|
286
264
|
self.callback()
|
|
@@ -297,7 +275,7 @@ class EvaluateAndSaveFinalStepCallback(TrainerCallback):
|
|
|
297
275
|
|
|
298
276
|
|
|
299
277
|
class TimeLimitCallback(TrainerCallback):
|
|
300
|
-
def __init__(self, time_limit:
|
|
278
|
+
def __init__(self, time_limit: float):
|
|
301
279
|
"""
|
|
302
280
|
Callback to stop training once a specified time has elapsed.
|
|
303
281
|
|
|
@@ -321,12 +299,13 @@ class TimeLimitCallback(TrainerCallback):
|
|
|
321
299
|
|
|
322
300
|
class LoggerCallback(TrainerCallback):
|
|
323
301
|
def on_log(self, args, state, control, logs=None, **kwargs):
|
|
324
|
-
logs
|
|
302
|
+
if logs:
|
|
303
|
+
logs.pop("total_flos", None)
|
|
325
304
|
if state.is_local_process_zero:
|
|
326
305
|
logger.info(logs)
|
|
327
306
|
|
|
328
307
|
|
|
329
|
-
def timeout_callback(seconds:
|
|
308
|
+
def timeout_callback(seconds: float | None) -> Callable:
|
|
330
309
|
"""Return a callback object that raises an exception if time limit is exceeded."""
|
|
331
310
|
start_time = time.monotonic()
|
|
332
311
|
|
|
@@ -335,3 +314,56 @@ def timeout_callback(seconds: Optional[float]) -> Callable:
|
|
|
335
314
|
raise TimeLimitExceeded
|
|
336
315
|
|
|
337
316
|
return callback
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def update_output_quantiles(model: ChronosBoltModelForForecasting, new_quantiles: list[float]) -> None:
|
|
320
|
+
"""In-place updates model's output layer to support only the specified new quantiles by copying
|
|
321
|
+
weights from closest existing quantiles.
|
|
322
|
+
"""
|
|
323
|
+
old_quantiles = model.chronos_config.quantiles
|
|
324
|
+
new_quantiles = sorted(new_quantiles)
|
|
325
|
+
|
|
326
|
+
if new_quantiles == old_quantiles:
|
|
327
|
+
return
|
|
328
|
+
|
|
329
|
+
model.chronos_config.quantiles = new_quantiles
|
|
330
|
+
model.num_quantiles = len(new_quantiles)
|
|
331
|
+
model.register_buffer("quantiles", torch.tensor(new_quantiles, dtype=model.dtype), persistent=False)
|
|
332
|
+
|
|
333
|
+
old_output_layer = model.output_patch_embedding
|
|
334
|
+
new_output_layer = ResidualBlock(
|
|
335
|
+
in_dim=model.config.d_model,
|
|
336
|
+
h_dim=model.config.d_ff,
|
|
337
|
+
out_dim=len(new_quantiles) * model.chronos_config.prediction_length,
|
|
338
|
+
act_fn_name=model.config.dense_act_fn,
|
|
339
|
+
dropout_p=model.config.dropout_rate,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# hidden_layer is shared across all quantiles
|
|
343
|
+
new_output_layer.hidden_layer.weight.data.copy_(old_output_layer.hidden_layer.weight.data)
|
|
344
|
+
if old_output_layer.hidden_layer.bias is not None:
|
|
345
|
+
new_output_layer.hidden_layer.bias.data.copy_(old_output_layer.hidden_layer.bias.data)
|
|
346
|
+
|
|
347
|
+
def copy_quantile_weights(src_idx: int, dst_idx: int):
|
|
348
|
+
"""Copy weights for one quantile from src_idx to dst_idx"""
|
|
349
|
+
prediction_length = model.chronos_config.prediction_length
|
|
350
|
+
src_start, src_end = src_idx * prediction_length, (src_idx + 1) * prediction_length
|
|
351
|
+
dst_start, dst_end = dst_idx * prediction_length, (dst_idx + 1) * prediction_length
|
|
352
|
+
|
|
353
|
+
for layer_name in ["output_layer", "residual_layer"]:
|
|
354
|
+
old_layer_attr = getattr(old_output_layer, layer_name)
|
|
355
|
+
new_layer_attr = getattr(new_output_layer, layer_name)
|
|
356
|
+
|
|
357
|
+
new_layer_attr.weight[dst_start:dst_end] = old_layer_attr.weight[src_start:src_end]
|
|
358
|
+
if old_layer_attr.bias is not None:
|
|
359
|
+
new_layer_attr.bias[dst_start:dst_end] = old_layer_attr.bias[src_start:src_end]
|
|
360
|
+
|
|
361
|
+
with torch.no_grad():
|
|
362
|
+
for new_idx, new_q in enumerate(new_quantiles):
|
|
363
|
+
closest_q = min(old_quantiles, key=lambda x: abs(x - new_q))
|
|
364
|
+
closest_idx = old_quantiles.index(closest_q)
|
|
365
|
+
copy_quantile_weights(closest_idx, new_idx)
|
|
366
|
+
|
|
367
|
+
model.output_patch_embedding = new_output_layer
|
|
368
|
+
model.config.chronos_config["quantiles"] = new_quantiles
|
|
369
|
+
model.chronos_config.quantiles = new_quantiles
|