autogluon.timeseries 1.4.1b20251115__py3-none-any.whl → 1.5.0b20251221__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/hyperparameter_presets.py +13 -28
- autogluon/timeseries/configs/predictor_presets.py +23 -39
- autogluon/timeseries/dataset/ts_dataframe.py +32 -34
- autogluon/timeseries/learner.py +67 -33
- autogluon/timeseries/metrics/__init__.py +4 -4
- autogluon/timeseries/metrics/abstract.py +8 -8
- autogluon/timeseries/metrics/point.py +9 -9
- autogluon/timeseries/metrics/quantile.py +4 -4
- autogluon/timeseries/models/__init__.py +2 -1
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -50
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +8 -8
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +30 -26
- autogluon/timeseries/models/autogluon_tabular/per_step.py +13 -11
- autogluon/timeseries/models/autogluon_tabular/transforms.py +2 -2
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +395 -0
- autogluon/timeseries/models/chronos/model.py +30 -25
- autogluon/timeseries/models/chronos/utils.py +5 -5
- autogluon/timeseries/models/ensemble/__init__.py +17 -10
- autogluon/timeseries/models/ensemble/abstract.py +13 -9
- autogluon/timeseries/models/ensemble/array_based/__init__.py +2 -2
- autogluon/timeseries/models/ensemble/array_based/abstract.py +24 -31
- autogluon/timeseries/models/ensemble/array_based/models.py +146 -11
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +2 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +6 -5
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +44 -83
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +21 -55
- autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
- autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +7 -3
- autogluon/timeseries/models/ensemble/weighted/basic.py +26 -13
- autogluon/timeseries/models/ensemble/weighted/greedy.py +21 -144
- autogluon/timeseries/models/gluonts/abstract.py +30 -29
- autogluon/timeseries/models/gluonts/dataset.py +9 -9
- autogluon/timeseries/models/gluonts/models.py +0 -7
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +13 -16
- autogluon/timeseries/models/local/naive.py +2 -2
- autogluon/timeseries/models/local/npts.py +7 -1
- autogluon/timeseries/models/local/statsforecast.py +13 -13
- autogluon/timeseries/models/multi_window/multi_window_model.py +38 -23
- autogluon/timeseries/models/registry.py +3 -4
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +3 -4
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +6 -6
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +4 -9
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +2 -3
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +10 -10
- autogluon/timeseries/models/toto/_internal/dataset.py +2 -2
- autogluon/timeseries/models/toto/_internal/forecaster.py +8 -8
- autogluon/timeseries/models/toto/dataloader.py +4 -4
- autogluon/timeseries/models/toto/hf_pretrained_model.py +97 -16
- autogluon/timeseries/models/toto/model.py +30 -17
- autogluon/timeseries/predictor.py +531 -136
- autogluon/timeseries/regressor.py +18 -23
- autogluon/timeseries/splitter.py +2 -2
- autogluon/timeseries/trainer/ensemble_composer.py +323 -129
- autogluon/timeseries/trainer/model_set_builder.py +9 -9
- autogluon/timeseries/trainer/prediction_cache.py +16 -16
- autogluon/timeseries/trainer/trainer.py +235 -145
- autogluon/timeseries/trainer/utils.py +3 -4
- autogluon/timeseries/transforms/covariate_scaler.py +7 -7
- autogluon/timeseries/transforms/target_scaler.py +8 -8
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +1 -3
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/features.py +22 -9
- autogluon/timeseries/utils/forecast.py +1 -2
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/version.py +1 -1
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/METADATA +23 -21
- autogluon_timeseries-1.5.0b20251221.dist-info/RECORD +103 -0
- autogluon_timeseries-1.4.1b20251115.dist-info/RECORD +0 -96
- /autogluon.timeseries-1.4.1b20251115-py3.9-nspkg.pth → /autogluon.timeseries-1.5.0b20251221-py3.11-nspkg.pth +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/WHEEL +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/licenses/LICENSE +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/licenses/NOTICE +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/namespace_packages.txt +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/top_level.txt +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from typing_extensions import Self
|
|
8
|
+
|
|
9
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
10
|
+
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Chronos2Model(AbstractTimeSeriesModel):
|
|
16
|
+
"""Chronos-2 pretrained time series forecasting model [Ansari2025]_, which provides strong zero-shot forecasting
|
|
17
|
+
capability natively taking advantage of covariates. The model can also be fine-tuned in a task specific manner.
|
|
18
|
+
|
|
19
|
+
This implementation wraps the original implementation in the `chronos-forecasting`
|
|
20
|
+
`library <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos2/pipeline.py>`_ .
|
|
21
|
+
|
|
22
|
+
Chronos-2 can be used both on GPU and CPU. However, we recommend using a GPU for faster inference and fine-tuning.
|
|
23
|
+
|
|
24
|
+
Chronos-2 variants can be fine-tuned by setting ``fine_tune=True`` and selecting appropriate fine-tuning parameters
|
|
25
|
+
such as the learning rate (``fine_tune_lr``) and max steps (``fine_tune_steps``). By default, a low-rank adapter (LoRA)
|
|
26
|
+
will be used for fine-tuning.
|
|
27
|
+
|
|
28
|
+
References
|
|
29
|
+
----------
|
|
30
|
+
.. [Ansari2025] Ansari, Abdul Fatir, Shchur, Oleksandr, Kuken, Jaris et al.
|
|
31
|
+
"Chronos-2: From Univariate to Universal Forecasting." (2025).
|
|
32
|
+
https://arxiv.org/abs/2510.15821
|
|
33
|
+
|
|
34
|
+
Other Parameters
|
|
35
|
+
----------------
|
|
36
|
+
model_path : str, default = "autogluon/chronos-2"
|
|
37
|
+
Model path used for the model, i.e., a Hugging Face transformers ``name_or_path``. Can be a
|
|
38
|
+
compatible model name on Hugging Face Hub or a local path to a model directory.
|
|
39
|
+
batch_size : int, default = 256
|
|
40
|
+
Size of batches used during inference.
|
|
41
|
+
device : str, default = None
|
|
42
|
+
Device to use for inference (and fine-tuning, if enabled). If None, model will use the GPU if
|
|
43
|
+
available.
|
|
44
|
+
cross_learning : bool, default = True
|
|
45
|
+
If True, the cross-learning mode of Chronos-2 is enabled. This means that the model will make joint
|
|
46
|
+
predictions across time series in a batch, by default True
|
|
47
|
+
Note: Enabling this mode makes the results sensitive to the ``batch_size`` used.
|
|
48
|
+
context_length : int or None, default = None
|
|
49
|
+
The context length to use for inference. If None, the model will use its default context length
|
|
50
|
+
of 8192. Shorter context lengths may reduce accuracy, but result in faster inference.
|
|
51
|
+
fine_tune : bool, default = False
|
|
52
|
+
If True, the pretrained model will be fine-tuned.
|
|
53
|
+
fine_tune_mode : str, default = "lora"
|
|
54
|
+
Fine-tuning mode, either "full" for full fine-tuning or "lora" for Low Rank Adaptation (LoRA).
|
|
55
|
+
LoRA is faster and uses less memory.
|
|
56
|
+
fine_tune_lr : float, default = 1e-5
|
|
57
|
+
The learning rate used for fine-tuning. When using full fine-tuning, a lower learning rate such as 1e-6
|
|
58
|
+
is recommended.
|
|
59
|
+
fine_tune_steps : int, default = 1000
|
|
60
|
+
The number of gradient update steps to fine-tune for.
|
|
61
|
+
fine_tune_batch_size : int, default = 32
|
|
62
|
+
The batch size to use for fine-tuning.
|
|
63
|
+
fine_tune_context_length : int, default = 2048
|
|
64
|
+
The maximum context_length to use for fine-tuning
|
|
65
|
+
eval_during_fine_tune : bool, default = False
|
|
66
|
+
If True, validation will be performed during fine-tuning to select the best checkpoint. Setting this
|
|
67
|
+
argument to True may result in slower fine-tuning. This parameter is ignored if ``skip_model_selection=True``
|
|
68
|
+
in ``TimeSeriesPredictor.fit``.
|
|
69
|
+
fine_tune_eval_max_items : int, default = 256
|
|
70
|
+
The maximum number of randomly-sampled time series to use from the validation set for evaluation
|
|
71
|
+
during fine-tuning. If None, the entire validation dataset will be used.
|
|
72
|
+
fine_tune_lora_config : dict, optional
|
|
73
|
+
Configuration for LoRA fine-tuning when ``fine_tune_mode="lora"``. If None and LoRA is enabled,
|
|
74
|
+
a default configuration will be used. Example: ``{"r": 8, "lora_alpha": 16}``.
|
|
75
|
+
fine_tune_trainer_kwargs : dict, optional
|
|
76
|
+
Extra keyword arguments passed to ``transformers.TrainingArguments``
|
|
77
|
+
revision : str, default = None
|
|
78
|
+
Model revision to use (branch name or commit hash). If None, the default branch (usually "main") is used.
|
|
79
|
+
disable_known_covariates : bool, default = False
|
|
80
|
+
If True, known covariates won't be used by the model even if they are present in the dataset.
|
|
81
|
+
disable_past_covariates : bool, default = False
|
|
82
|
+
If True, past covariates won't be used by the model even if they are present in the dataset.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
ag_model_aliases = ["Chronos-2"]
|
|
86
|
+
ag_priority = 75
|
|
87
|
+
fine_tuned_ckpt_name: str = "fine-tuned-ckpt"
|
|
88
|
+
|
|
89
|
+
_supports_known_covariates = True
|
|
90
|
+
_supports_past_covariates = True
|
|
91
|
+
|
|
92
|
+
def __init__(
|
|
93
|
+
self,
|
|
94
|
+
freq: str | None = None,
|
|
95
|
+
prediction_length: int = 1,
|
|
96
|
+
path: str | None = None,
|
|
97
|
+
name: str | None = None,
|
|
98
|
+
eval_metric: str | None = None,
|
|
99
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
100
|
+
**kwargs,
|
|
101
|
+
):
|
|
102
|
+
super().__init__(
|
|
103
|
+
path=path,
|
|
104
|
+
freq=freq,
|
|
105
|
+
prediction_length=prediction_length,
|
|
106
|
+
name=name,
|
|
107
|
+
eval_metric=eval_metric,
|
|
108
|
+
hyperparameters=hyperparameters,
|
|
109
|
+
**kwargs,
|
|
110
|
+
)
|
|
111
|
+
self._is_fine_tuned: bool = False
|
|
112
|
+
self._model_pipeline = None
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def model_path(self) -> str:
|
|
116
|
+
default_model_path = self.get_hyperparameter("model_path")
|
|
117
|
+
|
|
118
|
+
if self._is_fine_tuned:
|
|
119
|
+
model_path = os.path.join(self.path, self.fine_tuned_ckpt_name)
|
|
120
|
+
if not os.path.exists(model_path):
|
|
121
|
+
raise ValueError("Cannot find finetuned checkpoint for Chronos-2.")
|
|
122
|
+
else:
|
|
123
|
+
return model_path
|
|
124
|
+
|
|
125
|
+
return default_model_path
|
|
126
|
+
|
|
127
|
+
def save(self, path: str | None = None, verbose: bool = True) -> str:
|
|
128
|
+
pipeline = self._model_pipeline
|
|
129
|
+
self._model_pipeline = None
|
|
130
|
+
path = super().save(path=path, verbose=verbose)
|
|
131
|
+
self._model_pipeline = pipeline
|
|
132
|
+
|
|
133
|
+
return str(path)
|
|
134
|
+
|
|
135
|
+
def _fit(
|
|
136
|
+
self,
|
|
137
|
+
train_data: TimeSeriesDataFrame,
|
|
138
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
139
|
+
time_limit: float | None = None,
|
|
140
|
+
num_cpus: int | None = None,
|
|
141
|
+
num_gpus: int | None = None,
|
|
142
|
+
verbosity: int = 2,
|
|
143
|
+
**kwargs,
|
|
144
|
+
) -> None:
|
|
145
|
+
self._check_fit_params()
|
|
146
|
+
self._log_unused_hyperparameters()
|
|
147
|
+
self.load_model_pipeline()
|
|
148
|
+
|
|
149
|
+
# NOTE: This must be placed after load_model_pipeline to ensure that the loggers are available in loggerDict
|
|
150
|
+
self._update_transformers_loggers(logging.ERROR if verbosity <= 3 else logging.WARNING)
|
|
151
|
+
|
|
152
|
+
if self.get_hyperparameter("fine_tune"):
|
|
153
|
+
self._fine_tune(train_data, val_data, time_limit=time_limit, verbosity=verbosity)
|
|
154
|
+
|
|
155
|
+
def get_hyperparameters(self) -> dict:
|
|
156
|
+
"""Gets params that are passed to the inner model."""
|
|
157
|
+
init_args = super().get_hyperparameters()
|
|
158
|
+
|
|
159
|
+
fine_tune_trainer_kwargs = dict(disable_tqdm=True)
|
|
160
|
+
user_fine_tune_trainer_kwargs = init_args.get("fine_tune_trainer_kwargs", {})
|
|
161
|
+
fine_tune_trainer_kwargs.update(user_fine_tune_trainer_kwargs)
|
|
162
|
+
init_args["fine_tune_trainer_kwargs"] = fine_tune_trainer_kwargs
|
|
163
|
+
|
|
164
|
+
return init_args.copy()
|
|
165
|
+
|
|
166
|
+
def _get_default_hyperparameters(self) -> dict:
|
|
167
|
+
return {
|
|
168
|
+
"model_path": "autogluon/chronos-2",
|
|
169
|
+
"batch_size": 256,
|
|
170
|
+
"device": None,
|
|
171
|
+
"cross_learning": True,
|
|
172
|
+
"context_length": None,
|
|
173
|
+
"fine_tune": False,
|
|
174
|
+
"fine_tune_mode": "lora",
|
|
175
|
+
"fine_tune_lr": 1e-5,
|
|
176
|
+
"fine_tune_steps": 1000,
|
|
177
|
+
"fine_tune_batch_size": 32,
|
|
178
|
+
"fine_tune_context_length": 2048,
|
|
179
|
+
"eval_during_fine_tune": False,
|
|
180
|
+
"fine_tune_eval_max_items": 256,
|
|
181
|
+
"fine_tune_lora_config": None,
|
|
182
|
+
"revision": None,
|
|
183
|
+
"disable_known_covariates": False,
|
|
184
|
+
"disable_past_covariates": False,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def allowed_hyperparameters(self) -> list[str]:
|
|
189
|
+
return super().allowed_hyperparameters + [
|
|
190
|
+
"model_path",
|
|
191
|
+
"batch_size",
|
|
192
|
+
"device",
|
|
193
|
+
"cross_learning",
|
|
194
|
+
"context_length",
|
|
195
|
+
"fine_tune",
|
|
196
|
+
"fine_tune_mode",
|
|
197
|
+
"fine_tune_lr",
|
|
198
|
+
"fine_tune_steps",
|
|
199
|
+
"fine_tune_batch_size",
|
|
200
|
+
"fine_tune_context_length",
|
|
201
|
+
"eval_during_fine_tune",
|
|
202
|
+
"fine_tune_eval_max_items",
|
|
203
|
+
"fine_tune_lora_config",
|
|
204
|
+
"fine_tune_trainer_kwargs",
|
|
205
|
+
"revision",
|
|
206
|
+
"disable_known_covariates",
|
|
207
|
+
"disable_past_covariates",
|
|
208
|
+
]
|
|
209
|
+
|
|
210
|
+
def _remove_disabled_covariates(
|
|
211
|
+
self, past_df: pd.DataFrame, future_df: pd.DataFrame | None
|
|
212
|
+
) -> tuple[pd.DataFrame, pd.DataFrame | None]:
|
|
213
|
+
"""Remove covariates from dataframes based on disable flags."""
|
|
214
|
+
cols_to_remove = []
|
|
215
|
+
if self.get_hyperparameter("disable_past_covariates"):
|
|
216
|
+
cols_to_remove.extend(self.covariate_metadata.past_covariates)
|
|
217
|
+
if self.get_hyperparameter("disable_known_covariates"):
|
|
218
|
+
cols_to_remove.extend(self.covariate_metadata.known_covariates)
|
|
219
|
+
future_df = None
|
|
220
|
+
|
|
221
|
+
if cols_to_remove:
|
|
222
|
+
past_df = past_df.drop(columns=cols_to_remove)
|
|
223
|
+
|
|
224
|
+
return past_df, future_df
|
|
225
|
+
|
|
226
|
+
def _predict(
|
|
227
|
+
self,
|
|
228
|
+
data: TimeSeriesDataFrame,
|
|
229
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
230
|
+
**kwargs,
|
|
231
|
+
) -> TimeSeriesDataFrame:
|
|
232
|
+
from .utils import timeout_callback
|
|
233
|
+
|
|
234
|
+
if self._model_pipeline is None:
|
|
235
|
+
self.load_model_pipeline()
|
|
236
|
+
assert self._model_pipeline is not None
|
|
237
|
+
|
|
238
|
+
if max(data.num_timesteps_per_item()) < 3:
|
|
239
|
+
# If all time series have length 2 or less, we prepend 2 dummy timesteps to the first series
|
|
240
|
+
first_item_id = data.index.get_level_values(0)[0]
|
|
241
|
+
dummy_timestamps = pd.date_range(end=data.loc[first_item_id].index[0], periods=3, freq=self.freq)[:-1]
|
|
242
|
+
full_time_index_first_item = data.loc[first_item_id].index.union(dummy_timestamps)
|
|
243
|
+
new_index = (
|
|
244
|
+
pd.MultiIndex.from_product([[first_item_id], full_time_index_first_item], names=data.index.names)
|
|
245
|
+
).union(data.index)
|
|
246
|
+
context_df = data.reindex(new_index).reset_index()
|
|
247
|
+
else:
|
|
248
|
+
context_df = data.reset_index().to_data_frame()
|
|
249
|
+
|
|
250
|
+
batch_size = self.get_hyperparameter("batch_size")
|
|
251
|
+
cross_learning = self.get_hyperparameter("cross_learning")
|
|
252
|
+
context_length = self.get_hyperparameter("context_length")
|
|
253
|
+
future_df = known_covariates.reset_index().to_data_frame() if known_covariates is not None else None
|
|
254
|
+
time_limit = kwargs.get("time_limit")
|
|
255
|
+
|
|
256
|
+
context_df, future_df = self._remove_disabled_covariates(context_df, future_df)
|
|
257
|
+
|
|
258
|
+
forecast_df = self._model_pipeline.predict_df(
|
|
259
|
+
df=context_df,
|
|
260
|
+
future_df=future_df,
|
|
261
|
+
target=self.target,
|
|
262
|
+
prediction_length=self.prediction_length,
|
|
263
|
+
quantile_levels=self.quantile_levels,
|
|
264
|
+
context_length=context_length,
|
|
265
|
+
batch_size=batch_size,
|
|
266
|
+
validate_inputs=False,
|
|
267
|
+
cross_learning=cross_learning,
|
|
268
|
+
after_batch=timeout_callback(time_limit),
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
forecast_df = forecast_df.rename(columns={"predictions": "mean"}).drop(columns="target_name")
|
|
272
|
+
|
|
273
|
+
return TimeSeriesDataFrame(forecast_df)
|
|
274
|
+
|
|
275
|
+
def load_model_pipeline(self):
|
|
276
|
+
from chronos.chronos2.pipeline import Chronos2Pipeline
|
|
277
|
+
|
|
278
|
+
device = (self.get_hyperparameter("device") or "cuda") if self._is_gpu_available() else "cpu"
|
|
279
|
+
|
|
280
|
+
assert self.model_path is not None
|
|
281
|
+
pipeline = Chronos2Pipeline.from_pretrained(
|
|
282
|
+
self.model_path,
|
|
283
|
+
device_map=device,
|
|
284
|
+
revision=self.get_hyperparameter("revision"),
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
self._model_pipeline = pipeline
|
|
288
|
+
|
|
289
|
+
def persist(self) -> Self:
|
|
290
|
+
self.load_model_pipeline()
|
|
291
|
+
return self
|
|
292
|
+
|
|
293
|
+
def _update_transformers_loggers(self, log_level: int):
|
|
294
|
+
for logger_name in logging.root.manager.loggerDict:
|
|
295
|
+
if "transformers" in logger_name:
|
|
296
|
+
transformers_logger = logging.getLogger(logger_name)
|
|
297
|
+
transformers_logger.setLevel(log_level)
|
|
298
|
+
|
|
299
|
+
def _fine_tune(
|
|
300
|
+
self,
|
|
301
|
+
train_data: TimeSeriesDataFrame,
|
|
302
|
+
val_data: TimeSeriesDataFrame | None,
|
|
303
|
+
time_limit: float | None = None,
|
|
304
|
+
verbosity: int = 2,
|
|
305
|
+
):
|
|
306
|
+
from chronos.df_utils import convert_df_input_to_list_of_dicts_input
|
|
307
|
+
|
|
308
|
+
from .utils import LoggerCallback, TimeLimitCallback
|
|
309
|
+
|
|
310
|
+
def convert_data(df: TimeSeriesDataFrame):
|
|
311
|
+
past_df = df.reset_index().to_data_frame()
|
|
312
|
+
past_df, _ = self._remove_disabled_covariates(past_df, None)
|
|
313
|
+
|
|
314
|
+
inputs, _, _ = convert_df_input_to_list_of_dicts_input(
|
|
315
|
+
df=past_df,
|
|
316
|
+
future_df=None,
|
|
317
|
+
target_columns=[self.target],
|
|
318
|
+
prediction_length=self.prediction_length,
|
|
319
|
+
validate_inputs=False,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# The above utility will only split the dataframe into target and past_covariates, where past_covariates contains
|
|
323
|
+
# past values of both past-only and known-future covariates. We need to add future_covariates to enable fine-tuning
|
|
324
|
+
# with known covariates by indicating which covariates are known in the future.
|
|
325
|
+
if not self.get_hyperparameter("disable_known_covariates"):
|
|
326
|
+
known_covariates = self.covariate_metadata.known_covariates
|
|
327
|
+
if len(known_covariates) > 0:
|
|
328
|
+
for input_dict in inputs:
|
|
329
|
+
# NOTE: the covariates are empty because the actual values are not used
|
|
330
|
+
# This only indicates which covariates are known in the future
|
|
331
|
+
input_dict["future_covariates"] = {name: np.array([]) for name in known_covariates}
|
|
332
|
+
|
|
333
|
+
return inputs
|
|
334
|
+
|
|
335
|
+
assert self._model_pipeline is not None
|
|
336
|
+
hyperparameters = self.get_hyperparameters()
|
|
337
|
+
|
|
338
|
+
callbacks = []
|
|
339
|
+
if time_limit is not None:
|
|
340
|
+
callbacks.append(TimeLimitCallback(time_limit=time_limit))
|
|
341
|
+
|
|
342
|
+
val_inputs = None
|
|
343
|
+
if val_data is not None and hyperparameters["eval_during_fine_tune"]:
|
|
344
|
+
# evaluate on a randomly-sampled subset
|
|
345
|
+
fine_tune_eval_max_items = (
|
|
346
|
+
min(val_data.num_items, hyperparameters["fine_tune_eval_max_items"])
|
|
347
|
+
if hyperparameters["fine_tune_eval_max_items"] is not None
|
|
348
|
+
else val_data.num_items
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
if fine_tune_eval_max_items < val_data.num_items:
|
|
352
|
+
eval_items = np.random.choice(val_data.item_ids.values, size=fine_tune_eval_max_items, replace=False) # noqa: F841
|
|
353
|
+
val_data = val_data.query("item_id in @eval_items")
|
|
354
|
+
|
|
355
|
+
assert isinstance(val_data, TimeSeriesDataFrame)
|
|
356
|
+
val_inputs = convert_data(val_data)
|
|
357
|
+
|
|
358
|
+
if verbosity >= 3:
|
|
359
|
+
logger.warning(
|
|
360
|
+
"Transformers logging is turned on during fine-tuning. Note that losses reported by transformers "
|
|
361
|
+
"do not correspond to those specified via `eval_metric`."
|
|
362
|
+
)
|
|
363
|
+
callbacks.append(LoggerCallback())
|
|
364
|
+
|
|
365
|
+
self._model_pipeline = self._model_pipeline.fit(
|
|
366
|
+
inputs=convert_data(train_data),
|
|
367
|
+
prediction_length=self.prediction_length,
|
|
368
|
+
validation_inputs=val_inputs,
|
|
369
|
+
finetune_mode=hyperparameters["fine_tune_mode"],
|
|
370
|
+
lora_config=hyperparameters["fine_tune_lora_config"],
|
|
371
|
+
context_length=hyperparameters["fine_tune_context_length"],
|
|
372
|
+
learning_rate=hyperparameters["fine_tune_lr"],
|
|
373
|
+
num_steps=hyperparameters["fine_tune_steps"],
|
|
374
|
+
batch_size=hyperparameters["fine_tune_batch_size"],
|
|
375
|
+
output_dir=self.path,
|
|
376
|
+
finetuned_ckpt_name=self.fine_tuned_ckpt_name,
|
|
377
|
+
callbacks=callbacks,
|
|
378
|
+
remove_printer_callback=True,
|
|
379
|
+
min_past=1,
|
|
380
|
+
**hyperparameters["fine_tune_trainer_kwargs"],
|
|
381
|
+
)
|
|
382
|
+
self._is_fine_tuned = True
|
|
383
|
+
|
|
384
|
+
def _more_tags(self) -> dict[str, Any]:
|
|
385
|
+
do_fine_tune = self.get_hyperparameter("fine_tune")
|
|
386
|
+
return {
|
|
387
|
+
"allow_nan": True,
|
|
388
|
+
"can_use_train_data": do_fine_tune,
|
|
389
|
+
"can_use_val_data": do_fine_tune,
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
def _is_gpu_available(self) -> bool:
|
|
393
|
+
import torch.cuda
|
|
394
|
+
|
|
395
|
+
return torch.cuda.is_available()
|
|
@@ -3,7 +3,7 @@ import os
|
|
|
3
3
|
import shutil
|
|
4
4
|
import warnings
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
@@ -176,8 +176,9 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
176
176
|
The size of the shuffle buffer to shuffle the data during fine-tuning. If None, shuffling will
|
|
177
177
|
be turned off.
|
|
178
178
|
eval_during_fine_tune : bool, default = False
|
|
179
|
-
If True, validation will be performed during fine-tuning to select the best checkpoint.
|
|
180
|
-
|
|
179
|
+
If True, validation will be performed during fine-tuning to select the best checkpoint. Setting this
|
|
180
|
+
argument to True may result in slower fine-tuning. This parameter is ignored if ``skip_model_selection=True``
|
|
181
|
+
in ``TimeSeriesPredictor.fit``.
|
|
181
182
|
fine_tune_eval_max_items : int, default = 256
|
|
182
183
|
The maximum number of randomly-sampled time series to use from the validation set for evaluation
|
|
183
184
|
during fine-tuning. If None, the entire validation dataset will be used.
|
|
@@ -185,6 +186,8 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
185
186
|
Extra keyword arguments passed to ``transformers.TrainingArguments``
|
|
186
187
|
keep_transformers_logs : bool, default = False
|
|
187
188
|
If True, the logs generated by transformers will NOT be removed after fine-tuning
|
|
189
|
+
revision : str, default = None
|
|
190
|
+
Model revision to use (branch name or commit hash). If None, the default branch (usually "main") is used.
|
|
188
191
|
"""
|
|
189
192
|
|
|
190
193
|
ag_priority = 55
|
|
@@ -196,12 +199,12 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
196
199
|
|
|
197
200
|
def __init__(
|
|
198
201
|
self,
|
|
199
|
-
freq:
|
|
202
|
+
freq: str | None = None,
|
|
200
203
|
prediction_length: int = 1,
|
|
201
|
-
path:
|
|
202
|
-
name:
|
|
203
|
-
eval_metric:
|
|
204
|
-
hyperparameters:
|
|
204
|
+
path: str | None = None,
|
|
205
|
+
name: str | None = None,
|
|
206
|
+
eval_metric: str | None = None,
|
|
207
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
205
208
|
**kwargs, # noqa
|
|
206
209
|
):
|
|
207
210
|
hyperparameters = hyperparameters if hyperparameters is not None else {}
|
|
@@ -226,9 +229,9 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
226
229
|
**kwargs,
|
|
227
230
|
)
|
|
228
231
|
|
|
229
|
-
self._model_pipeline:
|
|
232
|
+
self._model_pipeline: Any | None = None # of type BaseChronosPipeline
|
|
230
233
|
|
|
231
|
-
def save(self, path:
|
|
234
|
+
def save(self, path: str | None = None, verbose: bool = True) -> str:
|
|
232
235
|
pipeline = self._model_pipeline
|
|
233
236
|
self._model_pipeline = None
|
|
234
237
|
path = super().save(path=path, verbose=verbose)
|
|
@@ -292,8 +295,8 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
292
295
|
"""
|
|
293
296
|
return self.ag_default_config.get("default_torch_dtype", "auto")
|
|
294
297
|
|
|
295
|
-
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str,
|
|
296
|
-
minimum_resources: dict[str,
|
|
298
|
+
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
|
|
299
|
+
minimum_resources: dict[str, int | float] = {"num_cpus": 1}
|
|
297
300
|
# if GPU is available, we train with 1 GPU per trial
|
|
298
301
|
if is_gpu_available:
|
|
299
302
|
minimum_resources["num_gpus"] = self.min_num_gpus
|
|
@@ -311,13 +314,14 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
311
314
|
"`import torch; torch.cuda.is_available()` returns `True`."
|
|
312
315
|
)
|
|
313
316
|
|
|
314
|
-
device = self.device or
|
|
317
|
+
device = (self.device or "cuda") if gpu_available else "cpu"
|
|
315
318
|
|
|
316
319
|
assert self.model_path is not None
|
|
317
320
|
pipeline = BaseChronosPipeline.from_pretrained(
|
|
318
321
|
self.model_path,
|
|
319
322
|
device_map=device,
|
|
320
323
|
torch_dtype=self.torch_dtype,
|
|
324
|
+
revision=self.get_hyperparameter("revision"),
|
|
321
325
|
)
|
|
322
326
|
|
|
323
327
|
self._model_pipeline = pipeline
|
|
@@ -360,6 +364,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
360
364
|
"eval_during_fine_tune": False,
|
|
361
365
|
"fine_tune_eval_max_items": 256,
|
|
362
366
|
"fine_tune_shuffle_buffer_size": 10_000,
|
|
367
|
+
"revision": None,
|
|
363
368
|
}
|
|
364
369
|
|
|
365
370
|
@property
|
|
@@ -381,6 +386,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
381
386
|
"fine_tune_eval_max_items",
|
|
382
387
|
"fine_tune_trainer_kwargs",
|
|
383
388
|
"keep_transformers_logs",
|
|
389
|
+
"revision",
|
|
384
390
|
]
|
|
385
391
|
|
|
386
392
|
def _get_fine_tune_trainer_kwargs(self, init_args, eval_during_fine_tune: bool):
|
|
@@ -437,10 +443,10 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
437
443
|
def _fit(
|
|
438
444
|
self,
|
|
439
445
|
train_data: TimeSeriesDataFrame,
|
|
440
|
-
val_data:
|
|
441
|
-
time_limit:
|
|
442
|
-
num_cpus:
|
|
443
|
-
num_gpus:
|
|
446
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
447
|
+
time_limit: float | None = None,
|
|
448
|
+
num_cpus: int | None = None,
|
|
449
|
+
num_gpus: int | None = None,
|
|
444
450
|
verbosity: int = 2,
|
|
445
451
|
**kwargs,
|
|
446
452
|
) -> None:
|
|
@@ -462,11 +468,10 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
462
468
|
# verbosity < 3: all logs and warnings from transformers will be suppressed
|
|
463
469
|
# verbosity >= 3: progress bar and loss logs will be logged
|
|
464
470
|
# verbosity 4: everything will be logged
|
|
465
|
-
verbosity = kwargs.get("verbosity", 2)
|
|
466
471
|
for logger_name in logging.root.manager.loggerDict:
|
|
467
472
|
if "transformers" in logger_name:
|
|
468
473
|
transformers_logger = logging.getLogger(logger_name)
|
|
469
|
-
transformers_logger.setLevel(logging.ERROR if verbosity <= 3 else logging.
|
|
474
|
+
transformers_logger.setLevel(logging.ERROR if verbosity <= 3 else logging.WARNING)
|
|
470
475
|
|
|
471
476
|
self._check_fit_params()
|
|
472
477
|
self._log_unused_hyperparameters()
|
|
@@ -558,7 +563,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
558
563
|
if time_limit is not None:
|
|
559
564
|
callbacks.append(TimeLimitCallback(time_limit=time_limit))
|
|
560
565
|
|
|
561
|
-
tokenizer_val_dataset:
|
|
566
|
+
tokenizer_val_dataset: ChronosFineTuningDataset | None = None
|
|
562
567
|
if val_data is not None:
|
|
563
568
|
callbacks.append(EvaluateAndSaveFinalStepCallback())
|
|
564
569
|
# evaluate on a randomly-sampled subset
|
|
@@ -599,7 +604,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
599
604
|
if verbosity >= 3:
|
|
600
605
|
logger.warning(
|
|
601
606
|
"Transformers logging is turned on during fine-tuning. Note that losses reported by transformers "
|
|
602
|
-
"
|
|
607
|
+
"do not correspond to those specified via `eval_metric`."
|
|
603
608
|
)
|
|
604
609
|
trainer.add_callback(LoggerCallback())
|
|
605
610
|
|
|
@@ -619,7 +624,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
619
624
|
context_length: int,
|
|
620
625
|
batch_size: int,
|
|
621
626
|
num_workers: int = 0,
|
|
622
|
-
time_limit:
|
|
627
|
+
time_limit: float | None = None,
|
|
623
628
|
):
|
|
624
629
|
from .utils import ChronosInferenceDataLoader, ChronosInferenceDataset, timeout_callback
|
|
625
630
|
|
|
@@ -634,7 +639,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
634
639
|
batch_size=batch_size,
|
|
635
640
|
shuffle=False,
|
|
636
641
|
num_workers=num_workers,
|
|
637
|
-
|
|
642
|
+
after_batch=timeout_callback(seconds=time_limit),
|
|
638
643
|
)
|
|
639
644
|
|
|
640
645
|
def _get_context_length(self, data: TimeSeriesDataFrame) -> int:
|
|
@@ -647,7 +652,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
647
652
|
def _predict(
|
|
648
653
|
self,
|
|
649
654
|
data: TimeSeriesDataFrame,
|
|
650
|
-
known_covariates:
|
|
655
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
651
656
|
**kwargs,
|
|
652
657
|
) -> TimeSeriesDataFrame:
|
|
653
658
|
from chronos import ChronosBoltPipeline, ChronosPipeline
|
|
@@ -725,7 +730,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
|
725
730
|
return TimeSeriesDataFrame(df)
|
|
726
731
|
|
|
727
732
|
def _more_tags(self) -> dict:
|
|
728
|
-
do_fine_tune = self.
|
|
733
|
+
do_fine_tune = self.get_hyperparameter("fine_tune")
|
|
729
734
|
return {
|
|
730
735
|
"allow_nan": True,
|
|
731
736
|
"can_use_train_data": do_fine_tune,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
3
|
from itertools import chain, cycle
|
|
4
|
-
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Literal
|
|
4
|
+
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Literal
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import torch
|
|
@@ -93,7 +93,7 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
93
93
|
target_column: str = "target",
|
|
94
94
|
context_length: int = 512,
|
|
95
95
|
prediction_length: int = 64,
|
|
96
|
-
tokenizer:
|
|
96
|
+
tokenizer: "ChronosTokenizer | None" = None,
|
|
97
97
|
mode: Literal["training", "validation"] = "training",
|
|
98
98
|
) -> None:
|
|
99
99
|
super().__init__()
|
|
@@ -194,7 +194,7 @@ class ChronosFineTuningDataset(IterableDataset):
|
|
|
194
194
|
for entry in iterable:
|
|
195
195
|
yield format_transform_fn(entry)
|
|
196
196
|
|
|
197
|
-
def shuffle(self, shuffle_buffer_size:
|
|
197
|
+
def shuffle(self, shuffle_buffer_size: int | None = None):
|
|
198
198
|
"""Returns a (pseudo) shuffled version of this iterable dataset.
|
|
199
199
|
|
|
200
200
|
Parameters
|
|
@@ -255,7 +255,7 @@ class ChronosInferenceDataset:
|
|
|
255
255
|
|
|
256
256
|
class ChronosInferenceDataLoader(torch.utils.data.DataLoader):
|
|
257
257
|
def __init__(self, *args, **kwargs):
|
|
258
|
-
self.callback: Callable = kwargs.pop("
|
|
258
|
+
self.callback: Callable = kwargs.pop("after_batch", lambda: None)
|
|
259
259
|
super().__init__(*args, **kwargs)
|
|
260
260
|
|
|
261
261
|
def __iter__(self): # type: ignore
|
|
@@ -305,7 +305,7 @@ class LoggerCallback(TrainerCallback):
|
|
|
305
305
|
logger.info(logs)
|
|
306
306
|
|
|
307
307
|
|
|
308
|
-
def timeout_callback(seconds:
|
|
308
|
+
def timeout_callback(seconds: float | None) -> Callable:
|
|
309
309
|
"""Return a callback object that raises an exception if time limit is exceeded."""
|
|
310
310
|
start_time = time.monotonic()
|
|
311
311
|
|
|
@@ -1,28 +1,35 @@
|
|
|
1
1
|
from .abstract import AbstractTimeSeriesEnsembleModel
|
|
2
|
-
from .array_based import MedianEnsemble, PerQuantileTabularEnsemble, TabularEnsemble
|
|
2
|
+
from .array_based import LinearStackerEnsemble, MedianEnsemble, PerQuantileTabularEnsemble, TabularEnsemble
|
|
3
|
+
from .per_item_greedy import PerItemGreedyEnsemble
|
|
3
4
|
from .weighted import GreedyEnsemble, PerformanceWeightedEnsemble, SimpleAverageEnsemble
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def get_ensemble_class(name: str):
|
|
7
8
|
mapping = {
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
"
|
|
9
|
+
"Greedy": GreedyEnsemble,
|
|
10
|
+
"PerItemGreedy": PerItemGreedyEnsemble,
|
|
11
|
+
"PerformanceWeighted": PerformanceWeightedEnsemble,
|
|
12
|
+
"SimpleAverage": SimpleAverageEnsemble,
|
|
13
|
+
"Weighted": GreedyEnsemble, # old alias for this model
|
|
14
|
+
"Median": MedianEnsemble,
|
|
15
|
+
"Tabular": TabularEnsemble,
|
|
16
|
+
"PerQuantileTabular": PerQuantileTabularEnsemble,
|
|
17
|
+
"LinearStacker": LinearStackerEnsemble,
|
|
15
18
|
}
|
|
16
|
-
|
|
19
|
+
|
|
20
|
+
name_clean = name.removesuffix("Ensemble")
|
|
21
|
+
if name_clean not in mapping:
|
|
17
22
|
raise ValueError(f"Unknown ensemble type: {name}. Available: {list(mapping.keys())}")
|
|
18
|
-
return mapping[
|
|
23
|
+
return mapping[name_clean]
|
|
19
24
|
|
|
20
25
|
|
|
21
26
|
__all__ = [
|
|
22
27
|
"AbstractTimeSeriesEnsembleModel",
|
|
23
28
|
"GreedyEnsemble",
|
|
29
|
+
"LinearStackerEnsemble",
|
|
24
30
|
"MedianEnsemble",
|
|
25
31
|
"PerformanceWeightedEnsemble",
|
|
32
|
+
"PerItemGreedyEnsemble",
|
|
26
33
|
"PerQuantileTabularEnsemble",
|
|
27
34
|
"SimpleAverageEnsemble",
|
|
28
35
|
"TabularEnsemble",
|