autogluon.timeseries 1.1.2b20241113__tar.gz → 1.1.2b20241114__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/PKG-INFO +1 -1
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/chronos/model.py +269 -12
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/chronos/pipeline/base.py +14 -1
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/chronos/pipeline/chronos.py +86 -19
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +8 -1
- autogluon.timeseries-1.1.2b20241114/src/autogluon/timeseries/models/chronos/pipeline/utils.py +338 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py +33 -22
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/gluonts/torch/models.py +39 -27
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon.timeseries.egg-info/PKG-INFO +1 -1
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon.timeseries.egg-info/requires.txt +3 -3
- autogluon.timeseries-1.1.2b20241113/src/autogluon/timeseries/models/chronos/pipeline/utils.py +0 -102
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/setup.cfg +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/setup.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/configs/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/configs/presets_configs.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/dataset/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/dataset/ts_dataframe.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/evaluator.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/learner.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/metrics/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/metrics/abstract.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/metrics/point.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/metrics/quantile.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/metrics/utils.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/abstract/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/abstract/model_trial.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/autogluon_tabular/transforms.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/autogluon_tabular/utils.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/chronos/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/ensemble/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/gluonts/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/local/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/local/abstract_local_model.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/local/naive.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/local/npts.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/local/statsforecast.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/multi_window/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/multi_window/multi_window_model.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/presets.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/predictor.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/regressor.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/splitter.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/trainer/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/trainer/abstract_trainer.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/trainer/auto_trainer.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/transforms/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/transforms/scaler.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/utils/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/utils/datetime/__init__.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/utils/datetime/base.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/utils/datetime/lags.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/utils/datetime/seasonality.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/utils/datetime/time_features.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/utils/features.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/utils/forecast.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/utils/warning_filters.py +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon.timeseries.egg-info/SOURCES.txt +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon.timeseries.egg-info/dependency_links.txt +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon.timeseries.egg-info/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon.timeseries.egg-info/top_level.txt +0 -0
- {autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon.timeseries.egg-info/zip-safe +0 -0
@@ -1,5 +1,8 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
|
+
import shutil
|
4
|
+
import time
|
5
|
+
from pathlib import Path
|
3
6
|
from typing import Any, Dict, Literal, Optional, Union
|
4
7
|
|
5
8
|
import numpy as np
|
@@ -72,9 +75,10 @@ MODEL_ALIASES = {
|
|
72
75
|
|
73
76
|
|
74
77
|
class ChronosModel(AbstractTimeSeriesModel):
|
75
|
-
"""Chronos pretrained time series forecasting models
|
78
|
+
"""Chronos [Ansari2024]_ pretrained time series forecasting models which can be used for zero-shot forecasting or fine-tuned
|
79
|
+
in a task-specific manner. Models can be based on the original
|
76
80
|
`ChronosModel <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos.py>`_ implementation,
|
77
|
-
as well as a newer family of Chronos-Bolt models
|
81
|
+
as well as a newer family of Chronos-Bolt models capable of much faster inference.
|
78
82
|
|
79
83
|
The original Chronos is a family of pretrained models, based on the T5 family, with number of parameters ranging between
|
80
84
|
8M and 710M. The full collection of Chronos models is available on
|
@@ -88,6 +92,9 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
88
92
|
time series is then fed into a T5 model for forecasting. The Chronos-Bolt variants are capable of much faster inference,
|
89
93
|
and can all run on CPUs. Chronos-Bolt models are also available on Hugging Face <https://huggingface.co/autogluon/>`_.
|
90
94
|
|
95
|
+
Both Chronos and Chronos-Bolt variants can be fine-tuned by setting ``fine_tune=True`` and selecting appropriate
|
96
|
+
fine-tuning parameters such as the learning rate (``fine_tune_lr``) and max steps (``fine_tune_steps``).
|
97
|
+
|
91
98
|
References
|
92
99
|
----------
|
93
100
|
.. [Ansari2024] Ansari, Abdul Fatir, Stella, Lorenzo et al.
|
@@ -108,8 +115,8 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
108
115
|
num_samples : int, default = 20
|
109
116
|
Number of samples used during inference
|
110
117
|
device : str, default = None
|
111
|
-
Device to use for inference. If None, model will use the GPU if available.
|
112
|
-
`small`, `base`, and `large`; inference will fail if no GPU is available.
|
118
|
+
Device to use for inference (and fine-tuning, if enabled). If None, model will use the GPU if available.
|
119
|
+
For larger model sizes `small`, `base`, and `large`; inference will fail if no GPU is available.
|
113
120
|
context_length : int or None, default = None
|
114
121
|
The context length to use in the model. Shorter context lengths will decrease model accuracy, but result
|
115
122
|
in faster inference. If None, the model will infer context length from the data set length at inference
|
@@ -129,12 +136,34 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
129
136
|
data_loader_num_workers : int, default = 0
|
130
137
|
Number of worker processes to be used in the data loader. See documentation on ``torch.utils.data.DataLoader``
|
131
138
|
for more information.
|
139
|
+
fine_tune : bool, default = False
|
140
|
+
If True, the pretrained model will be fine-tuned
|
141
|
+
fine_tune_lr: float, default = 0.0001
|
142
|
+
The learning rate used for fine-tuning
|
143
|
+
fine_tune_steps : int, default = 5000
|
144
|
+
The number of gradient update steps to fine-tune for
|
145
|
+
fine_tune_batch_size : int, default = 16
|
146
|
+
The batch size to use for fine-tuning
|
147
|
+
fine_tune_shuffle_buffer_size : int, default = 10000
|
148
|
+
The size of the shuffle buffer to shuffle the data during fine-tuning. If None, shuffling will
|
149
|
+
be turned off.
|
150
|
+
eval_during_fine_tune : bool, default = False
|
151
|
+
If True, validation will be performed during fine-tuning to select the best checkpoint.
|
152
|
+
Setting this argument to True may result in slower fine-tuning.
|
153
|
+
fine_tune_eval_max_items : int, default = 256
|
154
|
+
The maximum number of randomly-sampled time series to use from the validation set for evaluation
|
155
|
+
during fine-tuning. If None, the entire validation dataset will be used.
|
156
|
+
fine_tune_trainer_kwargs : dict, optional
|
157
|
+
Extra keyword arguments passed to ``transformers.TrainingArguments``
|
158
|
+
keep_transformers_logs: bool, default = False
|
159
|
+
If True, the logs generated by transformers will NOT be removed after fine-tuning
|
132
160
|
"""
|
133
161
|
|
134
162
|
# default number of samples for prediction
|
135
163
|
default_num_samples: int = 20
|
136
164
|
default_model_path = "autogluon/chronos-t5-small"
|
137
165
|
maximum_context_length = 2048
|
166
|
+
fine_tuned_ckpt_name: str = "fine-tuned-ckpt"
|
138
167
|
|
139
168
|
def __init__(
|
140
169
|
self,
|
@@ -202,6 +231,12 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
202
231
|
model = load_pkl.load(path=os.path.join(path, cls.model_file_name), verbose=verbose)
|
203
232
|
if reset_paths:
|
204
233
|
model.set_contexts(path)
|
234
|
+
|
235
|
+
fine_tune_ckpt_path = Path(model.path) / cls.fine_tuned_ckpt_name
|
236
|
+
if fine_tune_ckpt_path.exists():
|
237
|
+
logger.debug(f"Fine-tuned checkpoint exists, setting model_path to {fine_tune_ckpt_path}")
|
238
|
+
model.model_path = fine_tune_ckpt_path
|
239
|
+
|
205
240
|
return model
|
206
241
|
|
207
242
|
def _is_gpu_available(self) -> bool:
|
@@ -245,7 +280,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
245
280
|
minimum_resources["num_gpus"] = self.min_num_gpus
|
246
281
|
return minimum_resources
|
247
282
|
|
248
|
-
def load_model_pipeline(self):
|
283
|
+
def load_model_pipeline(self, is_training: bool = False):
|
249
284
|
from .pipeline import BaseChronosPipeline
|
250
285
|
|
251
286
|
gpu_available = self._is_gpu_available()
|
@@ -262,8 +297,9 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
262
297
|
pipeline = BaseChronosPipeline.from_pretrained(
|
263
298
|
self.model_path,
|
264
299
|
device_map=device,
|
300
|
+
# optimization cannot be used during fine-tuning
|
301
|
+
optimization_strategy=None if is_training else self.optimization_strategy,
|
265
302
|
torch_dtype=self.torch_dtype,
|
266
|
-
optimization_strategy=self.optimization_strategy,
|
267
303
|
)
|
268
304
|
|
269
305
|
self.model_pipeline = pipeline
|
@@ -272,6 +308,59 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
272
308
|
self.load_model_pipeline()
|
273
309
|
return self
|
274
310
|
|
311
|
+
def _has_tf32(self):
|
312
|
+
import torch.cuda
|
313
|
+
|
314
|
+
return torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
|
315
|
+
|
316
|
+
def _get_model_params(self) -> dict:
|
317
|
+
"""Gets params that are passed to the inner model."""
|
318
|
+
init_args = super()._get_model_params().copy()
|
319
|
+
|
320
|
+
init_args.setdefault("fine_tune", False)
|
321
|
+
init_args.setdefault("keep_transformers_logs", False)
|
322
|
+
init_args.setdefault("fine_tune_lr", 1e-4)
|
323
|
+
init_args.setdefault("fine_tune_steps", 5000)
|
324
|
+
init_args.setdefault("fine_tune_batch_size", self.default_batch_size)
|
325
|
+
init_args.setdefault("eval_during_fine_tune", False)
|
326
|
+
init_args.setdefault("fine_tune_eval_max_items", 256)
|
327
|
+
init_args.setdefault("fine_tune_shuffle_buffer_size", 10_000)
|
328
|
+
|
329
|
+
eval_during_fine_tune = init_args["eval_during_fine_tune"]
|
330
|
+
output_dir = Path(self.path) / "transformers_logs"
|
331
|
+
fine_tune_trainer_kwargs = dict(
|
332
|
+
output_dir=str(output_dir),
|
333
|
+
per_device_train_batch_size=init_args["fine_tune_batch_size"],
|
334
|
+
per_device_eval_batch_size=init_args["fine_tune_batch_size"],
|
335
|
+
learning_rate=init_args["fine_tune_lr"],
|
336
|
+
lr_scheduler_type="linear",
|
337
|
+
warmup_ratio=0.0,
|
338
|
+
optim="adamw_torch_fused",
|
339
|
+
logging_dir=str(output_dir),
|
340
|
+
logging_strategy="steps",
|
341
|
+
logging_steps=100,
|
342
|
+
report_to="none",
|
343
|
+
max_steps=init_args["fine_tune_steps"],
|
344
|
+
gradient_accumulation_steps=1,
|
345
|
+
dataloader_num_workers=self.data_loader_num_workers,
|
346
|
+
tf32=self._has_tf32(),
|
347
|
+
save_only_model=True,
|
348
|
+
prediction_loss_only=True,
|
349
|
+
save_total_limit=1,
|
350
|
+
save_strategy="steps" if eval_during_fine_tune else "no",
|
351
|
+
save_steps=100 if eval_during_fine_tune else None,
|
352
|
+
evaluation_strategy="steps" if eval_during_fine_tune else "no",
|
353
|
+
eval_steps=100 if eval_during_fine_tune else None,
|
354
|
+
load_best_model_at_end=True if eval_during_fine_tune else False,
|
355
|
+
metric_for_best_model="eval_loss" if eval_during_fine_tune else None,
|
356
|
+
)
|
357
|
+
user_fine_tune_trainer_kwargs = init_args.get("fine_tune_trainer_kwargs", {})
|
358
|
+
fine_tune_trainer_kwargs.update(user_fine_tune_trainer_kwargs)
|
359
|
+
|
360
|
+
init_args["fine_tune_trainer_kwargs"] = fine_tune_trainer_kwargs
|
361
|
+
|
362
|
+
return init_args
|
363
|
+
|
275
364
|
def _fit(
|
276
365
|
self,
|
277
366
|
train_data: TimeSeriesDataFrame,
|
@@ -279,8 +368,171 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
279
368
|
time_limit: int = None,
|
280
369
|
**kwargs,
|
281
370
|
) -> None:
|
371
|
+
from transformers.trainer import PrinterCallback, Trainer, TrainingArguments
|
372
|
+
|
373
|
+
from .pipeline import ChronosBoltPipeline, ChronosPipeline
|
374
|
+
from .pipeline.utils import (
|
375
|
+
ChronosFineTuningDataset,
|
376
|
+
EvaluateAndSaveFinalStepCallback,
|
377
|
+
LoggerCallback,
|
378
|
+
TimeLimitCallback,
|
379
|
+
)
|
380
|
+
|
381
|
+
# TODO: Add support for fine-tuning models with context_length longer than the pretrained model
|
382
|
+
|
383
|
+
# verbosity < 3: all logs and warnings from transformers will be suppressed
|
384
|
+
# verbosity >= 3: progress bar and loss logs will be logged
|
385
|
+
# verbosity 4: everything will be logged
|
386
|
+
verbosity = kwargs.get("verbosity", 2)
|
387
|
+
for logger_name in logging.root.manager.loggerDict:
|
388
|
+
if "transformers" in logger_name:
|
389
|
+
transformers_logger = logging.getLogger(logger_name)
|
390
|
+
transformers_logger.setLevel(logging.ERROR if verbosity <= 3 else logging.INFO)
|
391
|
+
|
282
392
|
self._check_fit_params()
|
283
|
-
|
393
|
+
|
394
|
+
fine_tune_args = self._get_model_params()
|
395
|
+
do_fine_tune = fine_tune_args["fine_tune"]
|
396
|
+
|
397
|
+
if do_fine_tune:
|
398
|
+
assert train_data is not None, "train_data cannot be None when fine_tune=True"
|
399
|
+
|
400
|
+
eval_during_fine_tune = val_data is not None and fine_tune_args["eval_during_fine_tune"]
|
401
|
+
|
402
|
+
start_time = time.monotonic()
|
403
|
+
if do_fine_tune:
|
404
|
+
context_length = self._get_context_length(train_data)
|
405
|
+
# load model pipeline to device memory
|
406
|
+
self.load_model_pipeline(is_training=True)
|
407
|
+
|
408
|
+
fine_tune_prediction_length = self.prediction_length
|
409
|
+
model_prediction_length = self.model_pipeline.inner_model.config.chronos_config["prediction_length"]
|
410
|
+
|
411
|
+
if isinstance(self.model_pipeline, ChronosPipeline):
|
412
|
+
pipeline_specific_trainer_kwargs = {}
|
413
|
+
|
414
|
+
# Update prediction_length of the model
|
415
|
+
# NOTE: We only do this for ChronosPipeline because the prediction length of ChronosBolt models
|
416
|
+
# is fixed due to direct multistep forecasting setup
|
417
|
+
self.model_pipeline.model.config.prediction_length = fine_tune_prediction_length
|
418
|
+
self.model_pipeline.inner_model.config.chronos_config["prediction_length"] = (
|
419
|
+
fine_tune_prediction_length
|
420
|
+
)
|
421
|
+
|
422
|
+
elif isinstance(self.model_pipeline, ChronosBoltPipeline):
|
423
|
+
# custom label_names is needed for validation to work with ChronosBolt models
|
424
|
+
pipeline_specific_trainer_kwargs = dict(label_names=["target"])
|
425
|
+
|
426
|
+
# truncate prediction_length if it goes beyond ChronosBolt's prediction_length
|
427
|
+
fine_tune_prediction_length = min(model_prediction_length, self.prediction_length)
|
428
|
+
|
429
|
+
if self.prediction_length != fine_tune_prediction_length:
|
430
|
+
logger.debug(
|
431
|
+
f"ChronosBolt models can only be fine-tuned with a maximum prediction_length of {model_prediction_length}. "
|
432
|
+
f"Fine-tuning prediction_length has been changed to {fine_tune_prediction_length}."
|
433
|
+
)
|
434
|
+
|
435
|
+
fine_tune_trainer_kwargs = fine_tune_args["fine_tune_trainer_kwargs"]
|
436
|
+
fine_tune_trainer_kwargs["disable_tqdm"] = fine_tune_trainer_kwargs.get("disable_tqdm", (verbosity < 3))
|
437
|
+
fine_tune_trainer_kwargs["use_cpu"] = str(self.model_pipeline.inner_model.device) == "cpu"
|
438
|
+
output_dir = Path(fine_tune_trainer_kwargs["output_dir"])
|
439
|
+
|
440
|
+
if not eval_during_fine_tune:
|
441
|
+
# turn off eval-related trainer args
|
442
|
+
fine_tune_trainer_kwargs["evaluation_strategy"] = "no"
|
443
|
+
fine_tune_trainer_kwargs["eval_steps"] = None
|
444
|
+
fine_tune_trainer_kwargs["load_best_model_at_end"] = False
|
445
|
+
fine_tune_trainer_kwargs["metric_for_best_model"] = None
|
446
|
+
|
447
|
+
training_args = TrainingArguments(**fine_tune_trainer_kwargs, **pipeline_specific_trainer_kwargs)
|
448
|
+
tokenizer_train_dataset = ChronosFineTuningDataset(
|
449
|
+
target_df=train_data,
|
450
|
+
target_column=self.target,
|
451
|
+
context_length=context_length,
|
452
|
+
prediction_length=fine_tune_prediction_length,
|
453
|
+
# if tokenizer exists, then the data is returned in the HF-style format accepted by
|
454
|
+
# the original Chronos models otherwise the data is returned in ChronosBolt's format
|
455
|
+
tokenizer=getattr(self.model_pipeline, "tokenizer", None),
|
456
|
+
mode="training",
|
457
|
+
).shuffle(fine_tune_args["fine_tune_shuffle_buffer_size"])
|
458
|
+
|
459
|
+
callbacks = []
|
460
|
+
if time_limit is not None:
|
461
|
+
callbacks.append(TimeLimitCallback(time_limit=time_limit))
|
462
|
+
|
463
|
+
if val_data is not None:
|
464
|
+
callbacks.append(EvaluateAndSaveFinalStepCallback())
|
465
|
+
# evaluate on a randomly-sampled subset
|
466
|
+
fine_tune_eval_max_items = (
|
467
|
+
min(val_data.num_items, fine_tune_args["fine_tune_eval_max_items"])
|
468
|
+
if fine_tune_args["fine_tune_eval_max_items"] is not None
|
469
|
+
else val_data.num_items
|
470
|
+
)
|
471
|
+
|
472
|
+
if fine_tune_eval_max_items < val_data.num_items:
|
473
|
+
eval_items = np.random.choice(
|
474
|
+
val_data.item_ids.values, size=fine_tune_eval_max_items, replace=False
|
475
|
+
)
|
476
|
+
val_data = val_data.loc[eval_items]
|
477
|
+
|
478
|
+
tokenizer_val_dataset = ChronosFineTuningDataset(
|
479
|
+
target_df=val_data,
|
480
|
+
target_column=self.target,
|
481
|
+
context_length=context_length,
|
482
|
+
prediction_length=fine_tune_prediction_length,
|
483
|
+
tokenizer=getattr(self.model_pipeline, "tokenizer", None),
|
484
|
+
mode="validation",
|
485
|
+
)
|
486
|
+
|
487
|
+
trainer = Trainer(
|
488
|
+
model=self.model_pipeline.inner_model,
|
489
|
+
args=training_args,
|
490
|
+
train_dataset=tokenizer_train_dataset,
|
491
|
+
eval_dataset=tokenizer_val_dataset if val_data is not None else None,
|
492
|
+
callbacks=callbacks,
|
493
|
+
)
|
494
|
+
|
495
|
+
# remove PrinterCallback from callbacks which logs to the console via a print() call,
|
496
|
+
# so it cannot be handled by setting the log level
|
497
|
+
trainer.pop_callback(PrinterCallback)
|
498
|
+
|
499
|
+
if verbosity >= 3:
|
500
|
+
logger.warning(
|
501
|
+
"Transformers logging is turned on during fine-tuning. Note that losses reported by transformers "
|
502
|
+
"may not correspond to those specified via `eval_metric`."
|
503
|
+
)
|
504
|
+
trainer.add_callback(LoggerCallback())
|
505
|
+
|
506
|
+
if val_data is not None:
|
507
|
+
# evaluate once before training
|
508
|
+
zero_shot_eval_loss = trainer.evaluate()["eval_loss"]
|
509
|
+
|
510
|
+
trainer.train()
|
511
|
+
|
512
|
+
if eval_during_fine_tune:
|
513
|
+
# get the best eval_loss logged during fine-tuning
|
514
|
+
log_history_df = pd.DataFrame(trainer.state.log_history)
|
515
|
+
best_train_eval_loss = log_history_df["eval_loss"].min()
|
516
|
+
elif val_data is not None:
|
517
|
+
# evaluate at the end of fine-tuning
|
518
|
+
best_train_eval_loss = trainer.evaluate()["eval_loss"]
|
519
|
+
|
520
|
+
if val_data is None or best_train_eval_loss <= zero_shot_eval_loss:
|
521
|
+
fine_tuned_ckpt_path = Path(self.path) / self.fine_tuned_ckpt_name
|
522
|
+
logger.info(f"Saving fine-tuned model to {fine_tuned_ckpt_path}")
|
523
|
+
self.model_pipeline.inner_model.save_pretrained(Path(self.path) / self.fine_tuned_ckpt_name)
|
524
|
+
else:
|
525
|
+
# Reset the model to its pretrained state
|
526
|
+
logger.info("Validation loss worsened after fine-tuning. Reverting to the pretrained model.")
|
527
|
+
self.model_pipeline = None
|
528
|
+
self.load_model_pipeline(is_training=False)
|
529
|
+
|
530
|
+
if not fine_tune_args["keep_transformers_logs"]:
|
531
|
+
logger.debug(f"Removing transformers_logs directory {output_dir}")
|
532
|
+
shutil.rmtree(output_dir)
|
533
|
+
|
534
|
+
if time_limit is not None:
|
535
|
+
self.time_limit = time_limit - (time.monotonic() - start_time) # inference time budget
|
284
536
|
|
285
537
|
def _get_inference_data_loader(
|
286
538
|
self,
|
@@ -305,6 +557,13 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
305
557
|
on_batch=timeout_callback(seconds=time_limit),
|
306
558
|
)
|
307
559
|
|
560
|
+
def _get_context_length(self, data: TimeSeriesDataFrame) -> int:
|
561
|
+
context_length = self.context_length or min(
|
562
|
+
data.num_timesteps_per_item().max(),
|
563
|
+
self.maximum_context_length,
|
564
|
+
)
|
565
|
+
return context_length
|
566
|
+
|
308
567
|
def _predict(
|
309
568
|
self,
|
310
569
|
data: TimeSeriesDataFrame,
|
@@ -319,15 +578,13 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
319
578
|
# Note that this is independent of the model's own context length set in the model's config file.
|
320
579
|
# For example, if the context_length is set to 2048 here but the model expects context length
|
321
580
|
# (according to its config.json file) of 512, it will further truncate the series during inference.
|
322
|
-
context_length = self.
|
323
|
-
data.num_timesteps_per_item().max(),
|
324
|
-
self.maximum_context_length,
|
325
|
-
)
|
581
|
+
context_length = self._get_context_length(data)
|
326
582
|
|
327
583
|
with warning_filter(all_warnings=True):
|
328
584
|
import torch
|
329
585
|
|
330
586
|
if self.model_pipeline is None:
|
587
|
+
# FIXME: optimization_strategy is ignored when model is fine-tuned
|
331
588
|
# load model pipeline to device memory
|
332
589
|
self.load_model_pipeline()
|
333
590
|
|
@@ -366,7 +623,7 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
366
623
|
return TimeSeriesDataFrame(df)
|
367
624
|
|
368
625
|
def _more_tags(self) -> Dict:
|
369
|
-
return {"allow_nan": True}
|
626
|
+
return {"allow_nan": True, "can_use_val_data": self._get_model_params()["fine_tune"]}
|
370
627
|
|
371
628
|
def score_and_cache_oof(
|
372
629
|
self,
|
@@ -2,12 +2,15 @@
|
|
2
2
|
|
3
3
|
from enum import Enum
|
4
4
|
from pathlib import Path
|
5
|
-
from typing import Dict, List, Optional, Tuple, Union
|
5
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
|
6
6
|
|
7
7
|
import torch
|
8
8
|
|
9
9
|
from .utils import left_pad_and_stack_1D
|
10
10
|
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from transformers import PreTrainedModel
|
13
|
+
|
11
14
|
|
12
15
|
class ForecastType(Enum):
|
13
16
|
SAMPLES = "samples"
|
@@ -36,6 +39,16 @@ class BaseChronosPipeline(metaclass=PipelineRegistry):
|
|
36
39
|
"float64": torch.float64,
|
37
40
|
}
|
38
41
|
|
42
|
+
def __init__(self, inner_model: "PreTrainedModel"):
|
43
|
+
"""
|
44
|
+
Parameters
|
45
|
+
----------
|
46
|
+
inner_model : PreTrainedModel
|
47
|
+
A hugging-face transformers PreTrainedModel, e.g., T5ForConditionalGeneration
|
48
|
+
"""
|
49
|
+
# for easy access to the inner HF-style model
|
50
|
+
self.inner_model = inner_model
|
51
|
+
|
39
52
|
def _prepare_and_validate_context(self, context: Union[torch.Tensor, List[torch.Tensor]]):
|
40
53
|
if isinstance(context, list):
|
41
54
|
context = left_pad_and_stack_1D(context)
|
@@ -65,9 +65,12 @@ class ChronosTokenizer:
|
|
65
65
|
which concrete classes must implement.
|
66
66
|
"""
|
67
67
|
|
68
|
-
def
|
68
|
+
def context_input_transform(
|
69
|
+
self,
|
70
|
+
context: torch.Tensor,
|
71
|
+
) -> Tuple:
|
69
72
|
"""
|
70
|
-
Turn a batch of time series into token IDs, attention
|
73
|
+
Turn a batch of time series into token IDs, attention mask, and tokenizer_state.
|
71
74
|
|
72
75
|
Parameters
|
73
76
|
----------
|
@@ -87,9 +90,40 @@ class ChronosTokenizer:
|
|
87
90
|
which input observations are not ``torch.nan`` (i.e. not
|
88
91
|
missing nor padding).
|
89
92
|
tokenizer_state
|
90
|
-
An object that
|
91
|
-
Contains the relevant
|
92
|
-
|
93
|
+
An object that can be passed to ``label_input_transform``
|
94
|
+
and ``output_transform``. Contains the relevant information
|
95
|
+
to decode output samples into real values,
|
96
|
+
such as location and scale parameters.
|
97
|
+
"""
|
98
|
+
raise NotImplementedError()
|
99
|
+
|
100
|
+
def label_input_transform(self, label: torch.Tensor, tokenizer_state: Any) -> Tuple:
|
101
|
+
"""
|
102
|
+
Turn a batch of label slices of time series into token IDs and attention mask
|
103
|
+
using the ``tokenizer_state`` provided by ``context_input_transform``.
|
104
|
+
|
105
|
+
Parameters
|
106
|
+
----------
|
107
|
+
label
|
108
|
+
A tensor shaped (batch_size, time_length), containing the
|
109
|
+
timeseries label, i.e., the ground-truth future values.
|
110
|
+
tokenizer_state
|
111
|
+
An object returned by ``context_input_transform`` containing
|
112
|
+
relevant information to preprocess data, such as location and
|
113
|
+
scale. The nature of this depends on the specific tokenizer.
|
114
|
+
This is used for tokenizing the label, in order to use the same
|
115
|
+
scaling used to tokenize the context.
|
116
|
+
|
117
|
+
Returns
|
118
|
+
-------
|
119
|
+
token_ids
|
120
|
+
A tensor of integers, shaped (batch_size, time_length + 1)
|
121
|
+
if ``config.use_eos_token`` and (batch_size, time_length)
|
122
|
+
otherwise, containing token IDs for the input series.
|
123
|
+
attention_mask
|
124
|
+
A boolean tensor, same shape as ``token_ids``, indicating
|
125
|
+
which input observations are not ``torch.nan`` (i.e. not
|
126
|
+
missing nor padding).
|
93
127
|
"""
|
94
128
|
raise NotImplementedError()
|
95
129
|
|
@@ -117,6 +151,11 @@ class ChronosTokenizer:
|
|
117
151
|
|
118
152
|
|
119
153
|
class MeanScaleUniformBins(ChronosTokenizer):
|
154
|
+
"""
|
155
|
+
A tokenizer that performs mean scaling and then quantizes the scaled time series into
|
156
|
+
uniformly-spaced bins between some bounds on the real line.
|
157
|
+
"""
|
158
|
+
|
120
159
|
def __init__(self, low_limit: float, high_limit: float, config: ChronosConfig) -> None:
|
121
160
|
self.config = config
|
122
161
|
self.centers = torch.linspace(
|
@@ -132,15 +171,15 @@ class MeanScaleUniformBins(ChronosTokenizer):
|
|
132
171
|
)
|
133
172
|
)
|
134
173
|
|
135
|
-
def
|
136
|
-
|
174
|
+
def _input_transform(
|
175
|
+
self, context: torch.Tensor, scale: Optional[torch.Tensor] = None
|
176
|
+
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
177
|
+
attention_mask = ~torch.isnan(context)
|
137
178
|
|
138
|
-
if
|
139
|
-
|
179
|
+
if scale is None:
|
180
|
+
scale = torch.nansum(torch.abs(context) * attention_mask, dim=-1) / torch.nansum(attention_mask, dim=-1)
|
181
|
+
scale[~(scale > 0)] = 1.0
|
140
182
|
|
141
|
-
attention_mask = ~torch.isnan(context)
|
142
|
-
scale = torch.nansum(torch.abs(context) * attention_mask, dim=-1) / torch.nansum(attention_mask, dim=-1)
|
143
|
-
scale[~(scale > 0)] = 1.0
|
144
183
|
scaled_context = context / scale.unsqueeze(dim=-1)
|
145
184
|
token_ids = (
|
146
185
|
torch.bucketize(
|
@@ -153,15 +192,42 @@ class MeanScaleUniformBins(ChronosTokenizer):
|
|
153
192
|
+ self.config.n_special_tokens
|
154
193
|
)
|
155
194
|
token_ids[~attention_mask] = self.config.pad_token_id
|
195
|
+
token_ids.clamp_(0, self.config.n_tokens - 1)
|
156
196
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
197
|
+
return token_ids, attention_mask, scale
|
198
|
+
|
199
|
+
def _append_eos_token(
|
200
|
+
self, token_ids: torch.Tensor, attention_mask: torch.Tensor
|
201
|
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
202
|
+
batch_size = token_ids.shape[0]
|
203
|
+
eos_tokens = torch.full((batch_size, 1), fill_value=self.config.eos_token_id)
|
204
|
+
token_ids = torch.concat((token_ids, eos_tokens), dim=1)
|
205
|
+
eos_mask = torch.full((batch_size, 1), fill_value=True)
|
206
|
+
attention_mask = torch.concat((attention_mask, eos_mask), dim=1)
|
207
|
+
|
208
|
+
return token_ids, attention_mask
|
209
|
+
|
210
|
+
def context_input_transform(self, context: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
211
|
+
length = context.shape[-1]
|
212
|
+
|
213
|
+
if length > self.config.context_length:
|
214
|
+
context = context[..., -self.config.context_length :]
|
215
|
+
|
216
|
+
token_ids, attention_mask, scale = self._input_transform(context=context)
|
217
|
+
|
218
|
+
if self.config.use_eos_token and self.config.model_type == "seq2seq":
|
219
|
+
token_ids, attention_mask = self._append_eos_token(token_ids=token_ids, attention_mask=attention_mask)
|
162
220
|
|
163
221
|
return token_ids, attention_mask, scale
|
164
222
|
|
223
|
+
def label_input_transform(self, label: torch.Tensor, scale: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
224
|
+
token_ids, attention_mask, _ = self._input_transform(context=label, scale=scale)
|
225
|
+
|
226
|
+
if self.config.use_eos_token:
|
227
|
+
token_ids, attention_mask = self._append_eos_token(token_ids=token_ids, attention_mask=attention_mask)
|
228
|
+
|
229
|
+
return token_ids, attention_mask
|
230
|
+
|
165
231
|
def output_transform(self, samples: torch.Tensor, scale: torch.Tensor) -> torch.Tensor:
|
166
232
|
scale_unsqueezed = scale.unsqueeze(-1).unsqueeze(-1)
|
167
233
|
indices = torch.clamp(
|
@@ -302,6 +368,7 @@ class ChronosPipeline(BaseChronosPipeline):
|
|
302
368
|
forecast_type: ForecastType = ForecastType.SAMPLES
|
303
369
|
|
304
370
|
def __init__(self, tokenizer, model):
|
371
|
+
super().__init__(inner_model=model.model)
|
305
372
|
self.tokenizer = tokenizer
|
306
373
|
self.model = model
|
307
374
|
|
@@ -330,7 +397,7 @@ class ChronosPipeline(BaseChronosPipeline):
|
|
330
397
|
provided, and the extra 1 is for EOS.
|
331
398
|
"""
|
332
399
|
context = self._prepare_and_validate_context(context=context)
|
333
|
-
token_ids, attention_mask, tokenizer_state = self.tokenizer.
|
400
|
+
token_ids, attention_mask, tokenizer_state = self.tokenizer.context_input_transform(context)
|
334
401
|
embeddings = self.model.encode(
|
335
402
|
input_ids=token_ids.to(self.model.device),
|
336
403
|
attention_mask=attention_mask.to(self.model.device),
|
@@ -402,7 +469,7 @@ class ChronosPipeline(BaseChronosPipeline):
|
|
402
469
|
remaining = prediction_length
|
403
470
|
|
404
471
|
while remaining > 0:
|
405
|
-
token_ids, attention_mask, scale = self.tokenizer.
|
472
|
+
token_ids, attention_mask, scale = self.tokenizer.context_input_transform(context)
|
406
473
|
samples = self.model(
|
407
474
|
token_ids.to(self.model.device),
|
408
475
|
attention_mask.to(self.model.device),
|
@@ -289,7 +289,7 @@ class ChronosBoltModelForForecasting(T5PreTrainedModel):
|
|
289
289
|
# normalize target
|
290
290
|
target, _ = self.instance_norm(target, loc_scale)
|
291
291
|
target = target.unsqueeze(1) # type: ignore
|
292
|
-
assert self.chronos_config.prediction_length
|
292
|
+
assert self.chronos_config.prediction_length >= target.shape[-1]
|
293
293
|
|
294
294
|
target = target.to(quantile_preds.device)
|
295
295
|
target_mask = (
|
@@ -297,6 +297,12 @@ class ChronosBoltModelForForecasting(T5PreTrainedModel):
|
|
297
297
|
)
|
298
298
|
target[~target_mask] = 0.0
|
299
299
|
|
300
|
+
# pad target and target_mask if they are shorter than model's prediction_length
|
301
|
+
if self.chronos_config.prediction_length > target.shape[-1]:
|
302
|
+
padding_shape = (*target.shape[:-1], self.chronos_config.prediction_length - target.shape[-1])
|
303
|
+
target = torch.cat([target, torch.zeros(padding_shape).to(target)], dim=-1)
|
304
|
+
target_mask = torch.cat([target_mask, torch.zeros(padding_shape).to(target_mask)], dim=-1)
|
305
|
+
|
300
306
|
loss = (
|
301
307
|
2
|
302
308
|
* torch.abs(
|
@@ -373,6 +379,7 @@ class ChronosBoltPipeline(BaseChronosPipeline):
|
|
373
379
|
_aliases = ["PatchedT5Pipeline"]
|
374
380
|
|
375
381
|
def __init__(self, model: ChronosBoltModelForForecasting):
|
382
|
+
super().__init__(inner_model=model)
|
376
383
|
self.model = model
|
377
384
|
|
378
385
|
@property
|