autogluon.timeseries 1.1.2b20241112__py3-none-any.whl → 1.1.2b20241114__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/timeseries/metrics/__init__.py +13 -3
- autogluon/timeseries/metrics/point.py +50 -0
- autogluon/timeseries/models/chronos/model.py +269 -12
- autogluon/timeseries/models/chronos/pipeline/base.py +14 -1
- autogluon/timeseries/models/chronos/pipeline/chronos.py +86 -19
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +8 -1
- autogluon/timeseries/models/chronos/pipeline/utils.py +239 -3
- autogluon/timeseries/models/gluonts/abstract_gluonts.py +33 -22
- autogluon/timeseries/models/gluonts/torch/models.py +39 -27
- autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.1.2b20241112.dist-info → autogluon.timeseries-1.1.2b20241114.dist-info}/METADATA +4 -4
- {autogluon.timeseries-1.1.2b20241112.dist-info → autogluon.timeseries-1.1.2b20241114.dist-info}/RECORD +19 -19
- /autogluon.timeseries-1.1.2b20241112-py3.8-nspkg.pth → /autogluon.timeseries-1.1.2b20241114-py3.8-nspkg.pth +0 -0
- {autogluon.timeseries-1.1.2b20241112.dist-info → autogluon.timeseries-1.1.2b20241114.dist-info}/LICENSE +0 -0
- {autogluon.timeseries-1.1.2b20241112.dist-info → autogluon.timeseries-1.1.2b20241114.dist-info}/NOTICE +0 -0
- {autogluon.timeseries-1.1.2b20241112.dist-info → autogluon.timeseries-1.1.2b20241114.dist-info}/WHEEL +0 -0
- {autogluon.timeseries-1.1.2b20241112.dist-info → autogluon.timeseries-1.1.2b20241114.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.1.2b20241112.dist-info → autogluon.timeseries-1.1.2b20241114.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.1.2b20241112.dist-info → autogluon.timeseries-1.1.2b20241114.dist-info}/zip-safe +0 -0
@@ -65,9 +65,12 @@ class ChronosTokenizer:
|
|
65
65
|
which concrete classes must implement.
|
66
66
|
"""
|
67
67
|
|
68
|
-
def
|
68
|
+
def context_input_transform(
|
69
|
+
self,
|
70
|
+
context: torch.Tensor,
|
71
|
+
) -> Tuple:
|
69
72
|
"""
|
70
|
-
Turn a batch of time series into token IDs, attention
|
73
|
+
Turn a batch of time series into token IDs, attention mask, and tokenizer_state.
|
71
74
|
|
72
75
|
Parameters
|
73
76
|
----------
|
@@ -87,9 +90,40 @@ class ChronosTokenizer:
|
|
87
90
|
which input observations are not ``torch.nan`` (i.e. not
|
88
91
|
missing nor padding).
|
89
92
|
tokenizer_state
|
90
|
-
An object that
|
91
|
-
Contains the relevant
|
92
|
-
|
93
|
+
An object that can be passed to ``label_input_transform``
|
94
|
+
and ``output_transform``. Contains the relevant information
|
95
|
+
to decode output samples into real values,
|
96
|
+
such as location and scale parameters.
|
97
|
+
"""
|
98
|
+
raise NotImplementedError()
|
99
|
+
|
100
|
+
def label_input_transform(self, label: torch.Tensor, tokenizer_state: Any) -> Tuple:
|
101
|
+
"""
|
102
|
+
Turn a batch of label slices of time series into token IDs and attention mask
|
103
|
+
using the ``tokenizer_state`` provided by ``context_input_transform``.
|
104
|
+
|
105
|
+
Parameters
|
106
|
+
----------
|
107
|
+
label
|
108
|
+
A tensor shaped (batch_size, time_length), containing the
|
109
|
+
timeseries label, i.e., the ground-truth future values.
|
110
|
+
tokenizer_state
|
111
|
+
An object returned by ``context_input_transform`` containing
|
112
|
+
relevant information to preprocess data, such as location and
|
113
|
+
scale. The nature of this depends on the specific tokenizer.
|
114
|
+
This is used for tokenizing the label, in order to use the same
|
115
|
+
scaling used to tokenize the context.
|
116
|
+
|
117
|
+
Returns
|
118
|
+
-------
|
119
|
+
token_ids
|
120
|
+
A tensor of integers, shaped (batch_size, time_length + 1)
|
121
|
+
if ``config.use_eos_token`` and (batch_size, time_length)
|
122
|
+
otherwise, containing token IDs for the input series.
|
123
|
+
attention_mask
|
124
|
+
A boolean tensor, same shape as ``token_ids``, indicating
|
125
|
+
which input observations are not ``torch.nan`` (i.e. not
|
126
|
+
missing nor padding).
|
93
127
|
"""
|
94
128
|
raise NotImplementedError()
|
95
129
|
|
@@ -117,6 +151,11 @@ class ChronosTokenizer:
|
|
117
151
|
|
118
152
|
|
119
153
|
class MeanScaleUniformBins(ChronosTokenizer):
|
154
|
+
"""
|
155
|
+
A tokenizer that performs mean scaling and then quantizes the scaled time series into
|
156
|
+
uniformly-spaced bins between some bounds on the real line.
|
157
|
+
"""
|
158
|
+
|
120
159
|
def __init__(self, low_limit: float, high_limit: float, config: ChronosConfig) -> None:
|
121
160
|
self.config = config
|
122
161
|
self.centers = torch.linspace(
|
@@ -132,15 +171,15 @@ class MeanScaleUniformBins(ChronosTokenizer):
|
|
132
171
|
)
|
133
172
|
)
|
134
173
|
|
135
|
-
def
|
136
|
-
|
174
|
+
def _input_transform(
|
175
|
+
self, context: torch.Tensor, scale: Optional[torch.Tensor] = None
|
176
|
+
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
177
|
+
attention_mask = ~torch.isnan(context)
|
137
178
|
|
138
|
-
if
|
139
|
-
|
179
|
+
if scale is None:
|
180
|
+
scale = torch.nansum(torch.abs(context) * attention_mask, dim=-1) / torch.nansum(attention_mask, dim=-1)
|
181
|
+
scale[~(scale > 0)] = 1.0
|
140
182
|
|
141
|
-
attention_mask = ~torch.isnan(context)
|
142
|
-
scale = torch.nansum(torch.abs(context) * attention_mask, dim=-1) / torch.nansum(attention_mask, dim=-1)
|
143
|
-
scale[~(scale > 0)] = 1.0
|
144
183
|
scaled_context = context / scale.unsqueeze(dim=-1)
|
145
184
|
token_ids = (
|
146
185
|
torch.bucketize(
|
@@ -153,15 +192,42 @@ class MeanScaleUniformBins(ChronosTokenizer):
|
|
153
192
|
+ self.config.n_special_tokens
|
154
193
|
)
|
155
194
|
token_ids[~attention_mask] = self.config.pad_token_id
|
195
|
+
token_ids.clamp_(0, self.config.n_tokens - 1)
|
156
196
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
197
|
+
return token_ids, attention_mask, scale
|
198
|
+
|
199
|
+
def _append_eos_token(
|
200
|
+
self, token_ids: torch.Tensor, attention_mask: torch.Tensor
|
201
|
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
202
|
+
batch_size = token_ids.shape[0]
|
203
|
+
eos_tokens = torch.full((batch_size, 1), fill_value=self.config.eos_token_id)
|
204
|
+
token_ids = torch.concat((token_ids, eos_tokens), dim=1)
|
205
|
+
eos_mask = torch.full((batch_size, 1), fill_value=True)
|
206
|
+
attention_mask = torch.concat((attention_mask, eos_mask), dim=1)
|
207
|
+
|
208
|
+
return token_ids, attention_mask
|
209
|
+
|
210
|
+
def context_input_transform(self, context: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
211
|
+
length = context.shape[-1]
|
212
|
+
|
213
|
+
if length > self.config.context_length:
|
214
|
+
context = context[..., -self.config.context_length :]
|
215
|
+
|
216
|
+
token_ids, attention_mask, scale = self._input_transform(context=context)
|
217
|
+
|
218
|
+
if self.config.use_eos_token and self.config.model_type == "seq2seq":
|
219
|
+
token_ids, attention_mask = self._append_eos_token(token_ids=token_ids, attention_mask=attention_mask)
|
162
220
|
|
163
221
|
return token_ids, attention_mask, scale
|
164
222
|
|
223
|
+
def label_input_transform(self, label: torch.Tensor, scale: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
224
|
+
token_ids, attention_mask, _ = self._input_transform(context=label, scale=scale)
|
225
|
+
|
226
|
+
if self.config.use_eos_token:
|
227
|
+
token_ids, attention_mask = self._append_eos_token(token_ids=token_ids, attention_mask=attention_mask)
|
228
|
+
|
229
|
+
return token_ids, attention_mask
|
230
|
+
|
165
231
|
def output_transform(self, samples: torch.Tensor, scale: torch.Tensor) -> torch.Tensor:
|
166
232
|
scale_unsqueezed = scale.unsqueeze(-1).unsqueeze(-1)
|
167
233
|
indices = torch.clamp(
|
@@ -302,6 +368,7 @@ class ChronosPipeline(BaseChronosPipeline):
|
|
302
368
|
forecast_type: ForecastType = ForecastType.SAMPLES
|
303
369
|
|
304
370
|
def __init__(self, tokenizer, model):
|
371
|
+
super().__init__(inner_model=model.model)
|
305
372
|
self.tokenizer = tokenizer
|
306
373
|
self.model = model
|
307
374
|
|
@@ -330,7 +397,7 @@ class ChronosPipeline(BaseChronosPipeline):
|
|
330
397
|
provided, and the extra 1 is for EOS.
|
331
398
|
"""
|
332
399
|
context = self._prepare_and_validate_context(context=context)
|
333
|
-
token_ids, attention_mask, tokenizer_state = self.tokenizer.
|
400
|
+
token_ids, attention_mask, tokenizer_state = self.tokenizer.context_input_transform(context)
|
334
401
|
embeddings = self.model.encode(
|
335
402
|
input_ids=token_ids.to(self.model.device),
|
336
403
|
attention_mask=attention_mask.to(self.model.device),
|
@@ -402,7 +469,7 @@ class ChronosPipeline(BaseChronosPipeline):
|
|
402
469
|
remaining = prediction_length
|
403
470
|
|
404
471
|
while remaining > 0:
|
405
|
-
token_ids, attention_mask, scale = self.tokenizer.
|
472
|
+
token_ids, attention_mask, scale = self.tokenizer.context_input_transform(context)
|
406
473
|
samples = self.model(
|
407
474
|
token_ids.to(self.model.device),
|
408
475
|
attention_mask.to(self.model.device),
|
@@ -289,7 +289,7 @@ class ChronosBoltModelForForecasting(T5PreTrainedModel):
|
|
289
289
|
# normalize target
|
290
290
|
target, _ = self.instance_norm(target, loc_scale)
|
291
291
|
target = target.unsqueeze(1) # type: ignore
|
292
|
-
assert self.chronos_config.prediction_length
|
292
|
+
assert self.chronos_config.prediction_length >= target.shape[-1]
|
293
293
|
|
294
294
|
target = target.to(quantile_preds.device)
|
295
295
|
target_mask = (
|
@@ -297,6 +297,12 @@ class ChronosBoltModelForForecasting(T5PreTrainedModel):
|
|
297
297
|
)
|
298
298
|
target[~target_mask] = 0.0
|
299
299
|
|
300
|
+
# pad target and target_mask if they are shorter than model's prediction_length
|
301
|
+
if self.chronos_config.prediction_length > target.shape[-1]:
|
302
|
+
padding_shape = (*target.shape[:-1], self.chronos_config.prediction_length - target.shape[-1])
|
303
|
+
target = torch.cat([target, torch.zeros(padding_shape).to(target)], dim=-1)
|
304
|
+
target_mask = torch.cat([target_mask, torch.zeros(padding_shape).to(target_mask)], dim=-1)
|
305
|
+
|
300
306
|
loss = (
|
301
307
|
2
|
302
308
|
* torch.abs(
|
@@ -373,6 +379,7 @@ class ChronosBoltPipeline(BaseChronosPipeline):
|
|
373
379
|
_aliases = ["PatchedT5Pipeline"]
|
374
380
|
|
375
381
|
def __init__(self, model: ChronosBoltModelForForecasting):
|
382
|
+
super().__init__(inner_model=model)
|
376
383
|
self.model = model
|
377
384
|
|
378
385
|
@property
|
@@ -1,15 +1,212 @@
|
|
1
|
+
import logging
|
1
2
|
import os
|
2
3
|
import re
|
3
4
|
import time
|
5
|
+
from itertools import chain, cycle
|
4
6
|
from pathlib import Path
|
5
|
-
from typing import Callable, List, Optional
|
7
|
+
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, List, Literal, Optional
|
6
8
|
|
7
9
|
import numpy as np
|
8
10
|
import torch
|
11
|
+
from gluonts.dataset.field_names import FieldName
|
12
|
+
from gluonts.transform import ExpectedNumInstanceSampler, InstanceSplitter, ValidationSplitSampler
|
13
|
+
from torch.utils.data import IterableDataset
|
14
|
+
from transformers import TrainerCallback
|
9
15
|
|
10
16
|
from autogluon.common.loaders.load_s3 import download, list_bucket_prefix_suffix_contains_s3
|
11
17
|
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
12
18
|
from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
|
19
|
+
from autogluon.timeseries.models.gluonts.abstract_gluonts import SimpleGluonTSDataset
|
20
|
+
|
21
|
+
if TYPE_CHECKING:
|
22
|
+
# TODO: fix the underlying reason for this circular import, the pipeline should handle tokenization
|
23
|
+
from autogluon.timeseries.models.chronos.pipeline.chronos import ChronosTokenizer
|
24
|
+
|
25
|
+
|
26
|
+
logger = logging.getLogger("autogluon.timeseries.models.chronos")
|
27
|
+
|
28
|
+
|
29
|
+
class PseudoShuffledIterableDataset(IterableDataset):
|
30
|
+
"""
|
31
|
+
Shuffle entries from an iterable by temporarily accumulating them
|
32
|
+
in an intermediate buffer.
|
33
|
+
|
34
|
+
Parameters
|
35
|
+
----------
|
36
|
+
base_dataset
|
37
|
+
The original iterable object, representing the dataset.
|
38
|
+
shuffle_buffer_size
|
39
|
+
Size of the buffer use to shuffle entries from the base dataset.
|
40
|
+
"""
|
41
|
+
|
42
|
+
def __init__(self, base_dataset, shuffle_buffer_size: int = 100) -> None:
|
43
|
+
super().__init__()
|
44
|
+
assert shuffle_buffer_size > 0
|
45
|
+
self.base_dataset = base_dataset
|
46
|
+
self.shuffle_buffer_size = shuffle_buffer_size
|
47
|
+
self.generator = torch.Generator()
|
48
|
+
|
49
|
+
def __iter__(self):
|
50
|
+
shuffle_buffer = []
|
51
|
+
|
52
|
+
for element in self.base_dataset:
|
53
|
+
shuffle_buffer.append(element)
|
54
|
+
if len(shuffle_buffer) >= self.shuffle_buffer_size:
|
55
|
+
idx = torch.randint(len(shuffle_buffer), size=(), generator=self.generator)
|
56
|
+
yield shuffle_buffer.pop(idx)
|
57
|
+
|
58
|
+
while shuffle_buffer:
|
59
|
+
idx = torch.randint(len(shuffle_buffer), size=(), generator=self.generator)
|
60
|
+
yield shuffle_buffer.pop(idx)
|
61
|
+
|
62
|
+
|
63
|
+
class ChronosFineTuningDataset(IterableDataset):
|
64
|
+
"""
|
65
|
+
Dataset wrapper to convert a ``TimeSeriesDataFrame`` into an iterable dataset
|
66
|
+
compatible with Chronos models.
|
67
|
+
|
68
|
+
When a ``tokenizer`` is provided, data is converted into HuggingFace-compatible set of
|
69
|
+
``input_ids``, ``attention_mask`` and ``labels``, used by the original Chronos models.
|
70
|
+
|
71
|
+
When the ``tokenizer`` is omitted, data is converted into the format compatible with
|
72
|
+
ChronosBolt models, i.e., ``context`` and ``target``.
|
73
|
+
|
74
|
+
Parameters
|
75
|
+
----------
|
76
|
+
target_df : TimeSeriesDataFrame
|
77
|
+
The ``TimeSeriesDataFrame`` to be converted
|
78
|
+
target_column : str, default = "target"
|
79
|
+
The name of the column which contains the target time series, by default "target"
|
80
|
+
context_length : int, default = 512
|
81
|
+
The length of the historical context
|
82
|
+
prediction_length : int, default = 64
|
83
|
+
The prediction_length, i.e., length of label or target
|
84
|
+
tokenizer : ``ChronosTokenizer``, default = None
|
85
|
+
When a ``ChronosTokenizer`` object is provided, data will be converted into the
|
86
|
+
HuggingFace format accepted by the original Chronos models using this ``ChronosTokenizer``.
|
87
|
+
If None, data will be converted into the format accepted by ChronosBolt models.
|
88
|
+
mode : Literal["training", "validation"], default = "training"
|
89
|
+
When ``training``, random slices from the time series will be returned for training purposes.
|
90
|
+
If ``validation``, the last slice of each time series returned in the original order.
|
91
|
+
"""
|
92
|
+
|
93
|
+
def __init__(
|
94
|
+
self,
|
95
|
+
target_df: TimeSeriesDataFrame,
|
96
|
+
target_column: str = "target",
|
97
|
+
context_length: int = 512,
|
98
|
+
prediction_length: int = 64,
|
99
|
+
tokenizer: Optional["ChronosTokenizer"] = None,
|
100
|
+
mode: Literal["training", "validation"] = "training",
|
101
|
+
) -> None:
|
102
|
+
super().__init__()
|
103
|
+
|
104
|
+
assert mode in ("training", "validation")
|
105
|
+
|
106
|
+
# A dummy hourly freq is used because the model doesn't actually need the freq
|
107
|
+
self.gluonts_dataset = SimpleGluonTSDataset(target_df=target_df, freq="h", target_column=target_column)
|
108
|
+
self.tokenizer = tokenizer
|
109
|
+
self.context_length = context_length
|
110
|
+
self.prediction_length = prediction_length
|
111
|
+
self.mode = mode
|
112
|
+
|
113
|
+
def _create_instance_splitter(self, mode: str):
|
114
|
+
instance_sampler = {
|
115
|
+
"training": ExpectedNumInstanceSampler(
|
116
|
+
num_instances=1.0, min_future=self.prediction_length, min_instances=1
|
117
|
+
),
|
118
|
+
"validation": ValidationSplitSampler(min_future=self.prediction_length),
|
119
|
+
}[mode]
|
120
|
+
|
121
|
+
return InstanceSplitter(
|
122
|
+
target_field=FieldName.TARGET,
|
123
|
+
is_pad_field=FieldName.IS_PAD,
|
124
|
+
start_field=FieldName.START,
|
125
|
+
forecast_start_field=FieldName.FORECAST_START,
|
126
|
+
instance_sampler=instance_sampler,
|
127
|
+
past_length=self.context_length,
|
128
|
+
future_length=self.prediction_length,
|
129
|
+
dummy_value=np.nan,
|
130
|
+
)
|
131
|
+
|
132
|
+
def _create_training_data(self, data: Iterable[dict]):
|
133
|
+
data = chain.from_iterable(cycle([data]))
|
134
|
+
split_transform = self._create_instance_splitter("training")
|
135
|
+
data = split_transform.apply(data, is_train=True)
|
136
|
+
return data
|
137
|
+
|
138
|
+
def _create_validation_data(self, data: Iterable[dict]):
|
139
|
+
data = self._create_instance_splitter("validation").apply(data, is_train=False)
|
140
|
+
return data
|
141
|
+
|
142
|
+
def to_chronos_format(self, entry: dict) -> dict:
|
143
|
+
"""Converts an entry from GluonTS data format with past and future targets
|
144
|
+
to the HuggingFace format accepted by the original Chronos models using the ChronosTokenizer.
|
145
|
+
|
146
|
+
Parameters
|
147
|
+
----------
|
148
|
+
entry : dict
|
149
|
+
time series data entry in GluonTS format with ``past_target`` and ``future_target`` keys
|
150
|
+
|
151
|
+
Returns
|
152
|
+
-------
|
153
|
+
dict
|
154
|
+
time series data entry in HuggingFace format with ``input_ids``, ``attention_mask``, and ``labels``
|
155
|
+
"""
|
156
|
+
assert self.tokenizer is not None, "A ChronosTokenizer is required to convert data into the Chronos format"
|
157
|
+
past_target = torch.tensor(entry[f"past_{FieldName.TARGET}"]).unsqueeze(0)
|
158
|
+
input_ids, attention_mask, scale = self.tokenizer.context_input_transform(past_target)
|
159
|
+
future_target = torch.tensor(entry[f"future_{FieldName.TARGET}"]).unsqueeze(0)
|
160
|
+
labels, labels_mask = self.tokenizer.label_input_transform(future_target, scale)
|
161
|
+
labels[labels_mask == 0] = -100
|
162
|
+
|
163
|
+
return {
|
164
|
+
"input_ids": input_ids.squeeze(0),
|
165
|
+
"attention_mask": attention_mask.squeeze(0),
|
166
|
+
"labels": labels.squeeze(0),
|
167
|
+
}
|
168
|
+
|
169
|
+
def to_chronos_bolt_format(self, entry: dict) -> dict:
|
170
|
+
"""Converts an entry from GluonTS data format with past and future targets
|
171
|
+
to the format accepted by the ChronosBolt models.
|
172
|
+
|
173
|
+
Parameters
|
174
|
+
----------
|
175
|
+
entry : dict
|
176
|
+
time series data entry in GluonTS format with ``past_target`` and ``future_target`` keys
|
177
|
+
|
178
|
+
Returns
|
179
|
+
-------
|
180
|
+
dict
|
181
|
+
time series data entry in ChronosBolt format with ``context`` and ``target``
|
182
|
+
"""
|
183
|
+
past_target = torch.tensor(entry[f"past_{FieldName.TARGET}"])
|
184
|
+
future_target = torch.tensor(entry[f"future_{FieldName.TARGET}"])
|
185
|
+
|
186
|
+
return {"context": past_target, "target": future_target}
|
187
|
+
|
188
|
+
def __iter__(self) -> Iterator:
|
189
|
+
if self.mode == "training":
|
190
|
+
iterable = self._create_training_data(self.gluonts_dataset)
|
191
|
+
elif self.mode == "validation":
|
192
|
+
iterable = self._create_validation_data(self.gluonts_dataset)
|
193
|
+
|
194
|
+
format_transform_fn = self.to_chronos_format if self.tokenizer is not None else self.to_chronos_bolt_format
|
195
|
+
for entry in iterable:
|
196
|
+
yield format_transform_fn(entry)
|
197
|
+
|
198
|
+
def shuffle(self, shuffle_buffer_size: Optional[int] = None):
|
199
|
+
"""Returns a (pseudo) shuffled version of this iterable dataset.
|
200
|
+
|
201
|
+
Parameters
|
202
|
+
----------
|
203
|
+
shuffle_buffer_size : int, optional, default = None
|
204
|
+
The shuffle buffer size used for pseudo shuffling
|
205
|
+
"""
|
206
|
+
assert shuffle_buffer_size is None or shuffle_buffer_size >= 0
|
207
|
+
if not shuffle_buffer_size:
|
208
|
+
return self
|
209
|
+
return PseudoShuffledIterableDataset(self, shuffle_buffer_size)
|
13
210
|
|
14
211
|
|
15
212
|
def left_pad_and_stack_1D(tensors: List[torch.Tensor]) -> torch.Tensor:
|
@@ -91,12 +288,51 @@ class ChronosInferenceDataLoader(torch.utils.data.DataLoader):
|
|
91
288
|
self.callback()
|
92
289
|
|
93
290
|
|
291
|
+
class EvaluateAndSaveFinalStepCallback(TrainerCallback):
|
292
|
+
"""Callback to evaluate and save the model at last training step."""
|
293
|
+
|
294
|
+
def on_step_end(self, args, state, control, **kwargs):
|
295
|
+
if state.global_step >= state.max_steps:
|
296
|
+
control.should_log = True
|
297
|
+
control.should_evaluate = True
|
298
|
+
control.should_save = True
|
299
|
+
|
300
|
+
|
301
|
+
class TimeLimitCallback(TrainerCallback):
|
302
|
+
def __init__(self, time_limit: int):
|
303
|
+
"""
|
304
|
+
Callback to stop training once a specified time has elapsed.
|
305
|
+
|
306
|
+
Parameters
|
307
|
+
----------
|
308
|
+
time_limit: int
|
309
|
+
maximum time allowed for training in seconds.
|
310
|
+
"""
|
311
|
+
self.time_limit = time_limit
|
312
|
+
self.start_time = None
|
313
|
+
|
314
|
+
def on_train_begin(self, args, state, control, **kwargs):
|
315
|
+
self.start_time = time.monotonic()
|
316
|
+
|
317
|
+
def on_step_end(self, args, state, control, **kwargs):
|
318
|
+
elapsed_time = time.monotonic() - self.start_time
|
319
|
+
if elapsed_time > self.time_limit:
|
320
|
+
raise TimeLimitExceeded
|
321
|
+
|
322
|
+
|
323
|
+
class LoggerCallback(TrainerCallback):
|
324
|
+
def on_log(self, args, state, control, logs=None, **kwargs):
|
325
|
+
logs.pop("total_flos", None)
|
326
|
+
if state.is_local_process_zero:
|
327
|
+
logger.info(logs)
|
328
|
+
|
329
|
+
|
94
330
|
def timeout_callback(seconds: Optional[float]) -> Callable:
|
95
331
|
"""Return a callback object that raises an exception if time limit is exceeded."""
|
96
|
-
start_time = time.
|
332
|
+
start_time = time.monotonic()
|
97
333
|
|
98
334
|
def callback() -> None:
|
99
|
-
if seconds is not None and time.
|
335
|
+
if seconds is not None and time.monotonic() - start_time > seconds:
|
100
336
|
raise TimeLimitExceeded
|
101
337
|
|
102
338
|
return callback
|
@@ -286,10 +286,6 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
286
286
|
|
287
287
|
self.negative_data = (dataset[self.target] < 0).any()
|
288
288
|
|
289
|
-
@property
|
290
|
-
def default_context_length(self) -> int:
|
291
|
-
return min(512, max(10, 2 * self.prediction_length))
|
292
|
-
|
293
289
|
def preprocess(self, data: TimeSeriesDataFrame, is_train: bool = False, **kwargs) -> TimeSeriesDataFrame:
|
294
290
|
# Copy data to avoid SettingWithCopyWarning from pandas
|
295
291
|
data = data.copy()
|
@@ -357,25 +353,40 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
357
353
|
known_covariates[columns] = self._real_column_transformers["known"].transform(known_covariates[columns])
|
358
354
|
return known_covariates
|
359
355
|
|
356
|
+
def _get_default_params(self):
|
357
|
+
"""Gets default parameters for GluonTS estimator initialization that are available after
|
358
|
+
AbstractTimeSeriesModel initialization (i.e., before deferred initialization). Models may
|
359
|
+
override this method to update default parameters.
|
360
|
+
"""
|
361
|
+
return {
|
362
|
+
"batch_size": 64,
|
363
|
+
"context_length": min(512, max(10, 2 * self.prediction_length)),
|
364
|
+
"predict_batch_size": 500,
|
365
|
+
"early_stopping_patience": 20,
|
366
|
+
"max_epochs": 100,
|
367
|
+
"lr": 1e-3,
|
368
|
+
"freq": self._dummy_gluonts_freq,
|
369
|
+
"prediction_length": self.prediction_length,
|
370
|
+
"quantiles": self.quantile_levels,
|
371
|
+
}
|
372
|
+
|
360
373
|
def _get_model_params(self) -> dict:
|
361
374
|
"""Gets params that are passed to the inner model."""
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
init_args.setdefault("max_epochs", init_args.get("epochs", 100))
|
378
|
-
return init_args
|
375
|
+
# for backward compatibility with the old GluonTS MXNet API
|
376
|
+
parameter_name_aliases = {
|
377
|
+
"epochs": "max_epochs",
|
378
|
+
"learning_rate": "lr",
|
379
|
+
}
|
380
|
+
|
381
|
+
init_args = super()._get_model_params()
|
382
|
+
for alias, actual in parameter_name_aliases.items():
|
383
|
+
if alias in init_args:
|
384
|
+
if actual in init_args:
|
385
|
+
raise ValueError(f"Parameter '{alias}' cannot be specified when '{actual}' is also specified.")
|
386
|
+
else:
|
387
|
+
init_args[actual] = init_args.pop(alias)
|
388
|
+
|
389
|
+
return self._get_default_params() | init_args
|
379
390
|
|
380
391
|
def _get_estimator_init_args(self) -> Dict[str, Any]:
|
381
392
|
"""Get GluonTS specific constructor arguments for estimator objects, an alias to `self._get_model_params`
|
@@ -395,7 +406,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
395
406
|
default_trainer_kwargs = {
|
396
407
|
"limit_val_batches": 3,
|
397
408
|
"max_epochs": init_args["max_epochs"],
|
398
|
-
"callbacks":
|
409
|
+
"callbacks": self.callbacks,
|
399
410
|
"enable_progress_bar": False,
|
400
411
|
"default_root_dir": self.path,
|
401
412
|
}
|
@@ -209,15 +209,16 @@ class TemporalFusionTransformerModel(AbstractGluonTSModel):
|
|
209
209
|
supports_cat_covariates = True
|
210
210
|
supports_static_features = True
|
211
211
|
|
212
|
-
@property
|
213
|
-
def default_context_length(self) -> int:
|
214
|
-
return min(512, max(64, 2 * self.prediction_length))
|
215
|
-
|
216
212
|
def _get_estimator_class(self) -> Type[GluonTSEstimator]:
|
217
213
|
from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
|
218
214
|
|
219
215
|
return TemporalFusionTransformerEstimator
|
220
216
|
|
217
|
+
def _get_default_params(self):
|
218
|
+
return super()._get_default_params() | {
|
219
|
+
"context_length": min(512, max(64, 2 * self.prediction_length)),
|
220
|
+
}
|
221
|
+
|
221
222
|
def _get_estimator_init_args(self) -> Dict[str, Any]:
|
222
223
|
init_kwargs = super()._get_estimator_init_args()
|
223
224
|
if self.num_feat_dynamic_real > 0:
|
@@ -282,9 +283,10 @@ class DLinearModel(AbstractGluonTSModel):
|
|
282
283
|
If True, ``lightning_logs`` directory will NOT be removed after the model finished training.
|
283
284
|
"""
|
284
285
|
|
285
|
-
|
286
|
-
|
287
|
-
|
286
|
+
def _get_default_params(self):
|
287
|
+
return super()._get_default_params() | {
|
288
|
+
"context_length": 96,
|
289
|
+
}
|
288
290
|
|
289
291
|
def _get_estimator_class(self) -> Type[GluonTSEstimator]:
|
290
292
|
from gluonts.torch.model.d_linear import DLinearEstimator
|
@@ -341,18 +343,16 @@ class PatchTSTModel(AbstractGluonTSModel):
|
|
341
343
|
|
342
344
|
supports_known_covariates = True
|
343
345
|
|
344
|
-
@property
|
345
|
-
def default_context_length(self) -> int:
|
346
|
-
return 96
|
347
|
-
|
348
346
|
def _get_estimator_class(self) -> Type[GluonTSEstimator]:
|
349
347
|
from gluonts.torch.model.patch_tst import PatchTSTEstimator
|
350
348
|
|
351
349
|
return PatchTSTEstimator
|
352
350
|
|
351
|
+
def _get_default_params(self):
|
352
|
+
return super()._get_default_params() | {"context_length": 96, "patch_len": 16}
|
353
|
+
|
353
354
|
def _get_estimator_init_args(self) -> Dict[str, Any]:
|
354
355
|
init_kwargs = super()._get_estimator_init_args()
|
355
|
-
init_kwargs.setdefault("patch_len", 16)
|
356
356
|
init_kwargs["num_feat_dynamic_real"] = self.num_feat_dynamic_real
|
357
357
|
return init_kwargs
|
358
358
|
|
@@ -467,27 +467,27 @@ class TiDEModel(AbstractGluonTSModel):
|
|
467
467
|
If False, past covariates will be used by the model if they are present in the dataset.
|
468
468
|
feat_proj_hidden_dim : int, default = 4
|
469
469
|
Size of the feature projection layer.
|
470
|
-
encoder_hidden_dim : int, default =
|
470
|
+
encoder_hidden_dim : int, default = 64
|
471
471
|
Size of the dense encoder layer.
|
472
|
-
decoder_hidden_dim : int, default =
|
472
|
+
decoder_hidden_dim : int, default = 64
|
473
473
|
Size of the dense decoder layer.
|
474
|
-
temporal_hidden_dim : int, default =
|
474
|
+
temporal_hidden_dim : int, default = 64
|
475
475
|
Size of the temporal decoder layer.
|
476
|
-
distr_hidden_dim : int, default =
|
476
|
+
distr_hidden_dim : int, default = 64
|
477
477
|
Size of the distribution projection layer.
|
478
|
-
num_layers_encoder : int, default =
|
478
|
+
num_layers_encoder : int, default = 2
|
479
479
|
Number of layers in dense encoder.
|
480
|
-
num_layers_decoder : int, default =
|
480
|
+
num_layers_decoder : int, default = 2
|
481
481
|
Number of layers in dense decoder.
|
482
|
-
decoder_output_dim : int, default =
|
482
|
+
decoder_output_dim : int, default = 16
|
483
483
|
Output size of the dense decoder.
|
484
|
-
dropout_rate : float, default = 0.
|
484
|
+
dropout_rate : float, default = 0.2
|
485
485
|
Dropout regularization parameter.
|
486
486
|
num_feat_dynamic_proj : int, default = 2
|
487
487
|
Output size of feature projection layer.
|
488
488
|
embedding_dimension : int, default = [16] * num_feat_static_cat
|
489
489
|
Dimension of the embeddings for categorical features
|
490
|
-
layer_norm : bool, default =
|
490
|
+
layer_norm : bool, default = True
|
491
491
|
Should layer normalization be enabled?
|
492
492
|
scaling : {"mean", "std", None}, default = "mean"
|
493
493
|
Scaling applied to each *context window* during training & prediction.
|
@@ -496,13 +496,13 @@ class TiDEModel(AbstractGluonTSModel):
|
|
496
496
|
Note that this is different from the `target_scaler` that is applied to the *entire time series*.
|
497
497
|
max_epochs : int, default = 100
|
498
498
|
Number of epochs the model will be trained for
|
499
|
-
batch_size : int, default =
|
499
|
+
batch_size : int, default = 256
|
500
500
|
Size of batches used during training
|
501
501
|
predict_batch_size : int, default = 500
|
502
502
|
Size of batches used during prediction.
|
503
503
|
num_batches_per_epoch : int, default = 50
|
504
504
|
Number of batches processed every epoch
|
505
|
-
lr : float, default = 1e-
|
505
|
+
lr : float, default = 1e-4,
|
506
506
|
Learning rate used during training
|
507
507
|
trainer_kwargs : dict, optional
|
508
508
|
Optional keyword arguments passed to ``lightning.Trainer``.
|
@@ -515,15 +515,27 @@ class TiDEModel(AbstractGluonTSModel):
|
|
515
515
|
supports_known_covariates = True
|
516
516
|
supports_static_features = True
|
517
517
|
|
518
|
-
@property
|
519
|
-
def default_context_length(self) -> int:
|
520
|
-
return min(512, max(64, 2 * self.prediction_length))
|
521
|
-
|
522
518
|
def _get_estimator_class(self) -> Type[GluonTSEstimator]:
|
523
519
|
from gluonts.torch.model.tide import TiDEEstimator
|
524
520
|
|
525
521
|
return TiDEEstimator
|
526
522
|
|
523
|
+
def _get_default_params(self):
|
524
|
+
return super()._get_default_params() | {
|
525
|
+
"context_length": min(512, max(64, 2 * self.prediction_length)),
|
526
|
+
"encoder_hidden_dim": 64,
|
527
|
+
"decoder_hidden_dim": 64,
|
528
|
+
"temporal_hidden_dim": 64,
|
529
|
+
"distr_hidden_dim": 64,
|
530
|
+
"num_layers_encoder": 2,
|
531
|
+
"num_layers_decoder": 2,
|
532
|
+
"decoder_output_dim": 16,
|
533
|
+
"dropout_rate": 0.2,
|
534
|
+
"layer_norm": True,
|
535
|
+
"lr": 1e-4,
|
536
|
+
"batch_size": 256,
|
537
|
+
}
|
538
|
+
|
527
539
|
def _get_estimator_init_args(self) -> Dict[str, Any]:
|
528
540
|
init_kwargs = super()._get_estimator_init_args()
|
529
541
|
init_kwargs["num_feat_static_cat"] = self.num_feat_static_cat
|