autogluon.timeseries 1.1.2b20241113__py3-none-any.whl → 1.1.2b20241115__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. autogluon/timeseries/dataset/ts_dataframe.py +4 -0
  2. autogluon/timeseries/models/chronos/model.py +275 -12
  3. autogluon/timeseries/models/chronos/pipeline/base.py +14 -1
  4. autogluon/timeseries/models/chronos/pipeline/chronos.py +86 -19
  5. autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +8 -1
  6. autogluon/timeseries/models/chronos/pipeline/utils.py +239 -3
  7. autogluon/timeseries/models/gluonts/abstract_gluonts.py +33 -22
  8. autogluon/timeseries/models/gluonts/torch/models.py +39 -27
  9. autogluon/timeseries/version.py +1 -1
  10. {autogluon.timeseries-1.1.2b20241113.dist-info → autogluon.timeseries-1.1.2b20241115.dist-info}/METADATA +4 -4
  11. {autogluon.timeseries-1.1.2b20241113.dist-info → autogluon.timeseries-1.1.2b20241115.dist-info}/RECORD +18 -18
  12. /autogluon.timeseries-1.1.2b20241113-py3.8-nspkg.pth → /autogluon.timeseries-1.1.2b20241115-py3.8-nspkg.pth +0 -0
  13. {autogluon.timeseries-1.1.2b20241113.dist-info → autogluon.timeseries-1.1.2b20241115.dist-info}/LICENSE +0 -0
  14. {autogluon.timeseries-1.1.2b20241113.dist-info → autogluon.timeseries-1.1.2b20241115.dist-info}/NOTICE +0 -0
  15. {autogluon.timeseries-1.1.2b20241113.dist-info → autogluon.timeseries-1.1.2b20241115.dist-info}/WHEEL +0 -0
  16. {autogluon.timeseries-1.1.2b20241113.dist-info → autogluon.timeseries-1.1.2b20241115.dist-info}/namespace_packages.txt +0 -0
  17. {autogluon.timeseries-1.1.2b20241113.dist-info → autogluon.timeseries-1.1.2b20241115.dist-info}/top_level.txt +0 -0
  18. {autogluon.timeseries-1.1.2b20241113.dist-info → autogluon.timeseries-1.1.2b20241115.dist-info}/zip-safe +0 -0
@@ -1067,3 +1067,7 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
1067
1067
  # This hides method from IPython autocomplete, but not VSCode autocomplete
1068
1068
  deprecated = ["get_reindexed_view", "to_regular_index"]
1069
1069
  return [d for d in super().__dir__() if d not in deprecated]
1070
+
1071
+ def to_data_frame(self) -> pd.DataFrame:
1072
+ """Convert `TimeSeriesDataFrame` to a `pandas.DataFrame`"""
1073
+ return pd.DataFrame(self)
@@ -1,5 +1,8 @@
1
1
  import logging
2
2
  import os
3
+ import shutil
4
+ import time
5
+ from pathlib import Path
3
6
  from typing import Any, Dict, Literal, Optional, Union
4
7
 
5
8
  import numpy as np
@@ -72,9 +75,10 @@ MODEL_ALIASES = {
72
75
 
73
76
 
74
77
  class ChronosModel(AbstractTimeSeriesModel):
75
- """Chronos pretrained time series forecasting models. Models can be based on the original
78
+ """Chronos [Ansari2024]_ pretrained time series forecasting models which can be used for zero-shot forecasting or fine-tuned
79
+ in a task-specific manner. Models can be based on the original
76
80
  `ChronosModel <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos.py>`_ implementation,
77
- as well as a newer family of Chronos-Bolt models which are capable of much faster inference.
81
+ as well as a newer family of Chronos-Bolt models capable of much faster inference.
78
82
 
79
83
  The original Chronos is a family of pretrained models, based on the T5 family, with number of parameters ranging between
80
84
  8M and 710M. The full collection of Chronos models is available on
@@ -88,6 +92,9 @@ class ChronosModel(AbstractTimeSeriesModel):
88
92
  time series is then fed into a T5 model for forecasting. The Chronos-Bolt variants are capable of much faster inference,
89
93
  and can all run on CPUs. Chronos-Bolt models are also available on Hugging Face <https://huggingface.co/autogluon/>`_.
90
94
 
95
+ Both Chronos and Chronos-Bolt variants can be fine-tuned by setting ``fine_tune=True`` and selecting appropriate
96
+ fine-tuning parameters such as the learning rate (``fine_tune_lr``) and max steps (``fine_tune_steps``).
97
+
91
98
  References
92
99
  ----------
93
100
  .. [Ansari2024] Ansari, Abdul Fatir, Stella, Lorenzo et al.
@@ -108,8 +115,8 @@ class ChronosModel(AbstractTimeSeriesModel):
108
115
  num_samples : int, default = 20
109
116
  Number of samples used during inference
110
117
  device : str, default = None
111
- Device to use for inference. If None, model will use the GPU if available. For larger model sizes
112
- `small`, `base`, and `large`; inference will fail if no GPU is available.
118
+ Device to use for inference (and fine-tuning, if enabled). If None, model will use the GPU if available.
119
+ For larger model sizes `small`, `base`, and `large`; inference will fail if no GPU is available.
113
120
  context_length : int or None, default = None
114
121
  The context length to use in the model. Shorter context lengths will decrease model accuracy, but result
115
122
  in faster inference. If None, the model will infer context length from the data set length at inference
@@ -129,12 +136,34 @@ class ChronosModel(AbstractTimeSeriesModel):
129
136
  data_loader_num_workers : int, default = 0
130
137
  Number of worker processes to be used in the data loader. See documentation on ``torch.utils.data.DataLoader``
131
138
  for more information.
139
+ fine_tune : bool, default = False
140
+ If True, the pretrained model will be fine-tuned
141
+ fine_tune_lr: float, default = 0.0001
142
+ The learning rate used for fine-tuning
143
+ fine_tune_steps : int, default = 5000
144
+ The number of gradient update steps to fine-tune for
145
+ fine_tune_batch_size : int, default = 16
146
+ The batch size to use for fine-tuning
147
+ fine_tune_shuffle_buffer_size : int, default = 10000
148
+ The size of the shuffle buffer to shuffle the data during fine-tuning. If None, shuffling will
149
+ be turned off.
150
+ eval_during_fine_tune : bool, default = False
151
+ If True, validation will be performed during fine-tuning to select the best checkpoint.
152
+ Setting this argument to True may result in slower fine-tuning.
153
+ fine_tune_eval_max_items : int, default = 256
154
+ The maximum number of randomly-sampled time series to use from the validation set for evaluation
155
+ during fine-tuning. If None, the entire validation dataset will be used.
156
+ fine_tune_trainer_kwargs : dict, optional
157
+ Extra keyword arguments passed to ``transformers.TrainingArguments``
158
+ keep_transformers_logs: bool, default = False
159
+ If True, the logs generated by transformers will NOT be removed after fine-tuning
132
160
  """
133
161
 
134
162
  # default number of samples for prediction
135
163
  default_num_samples: int = 20
136
164
  default_model_path = "autogluon/chronos-t5-small"
137
165
  maximum_context_length = 2048
166
+ fine_tuned_ckpt_name: str = "fine-tuned-ckpt"
138
167
 
139
168
  def __init__(
140
169
  self,
@@ -202,6 +231,12 @@ class ChronosModel(AbstractTimeSeriesModel):
202
231
  model = load_pkl.load(path=os.path.join(path, cls.model_file_name), verbose=verbose)
203
232
  if reset_paths:
204
233
  model.set_contexts(path)
234
+
235
+ fine_tune_ckpt_path = Path(model.path) / cls.fine_tuned_ckpt_name
236
+ if fine_tune_ckpt_path.exists():
237
+ logger.debug(f"Fine-tuned checkpoint exists, setting model_path to {fine_tune_ckpt_path}")
238
+ model.model_path = fine_tune_ckpt_path
239
+
205
240
  return model
206
241
 
207
242
  def _is_gpu_available(self) -> bool:
@@ -245,7 +280,7 @@ class ChronosModel(AbstractTimeSeriesModel):
245
280
  minimum_resources["num_gpus"] = self.min_num_gpus
246
281
  return minimum_resources
247
282
 
248
- def load_model_pipeline(self):
283
+ def load_model_pipeline(self, is_training: bool = False):
249
284
  from .pipeline import BaseChronosPipeline
250
285
 
251
286
  gpu_available = self._is_gpu_available()
@@ -262,8 +297,9 @@ class ChronosModel(AbstractTimeSeriesModel):
262
297
  pipeline = BaseChronosPipeline.from_pretrained(
263
298
  self.model_path,
264
299
  device_map=device,
300
+ # optimization cannot be used during fine-tuning
301
+ optimization_strategy=None if is_training else self.optimization_strategy,
265
302
  torch_dtype=self.torch_dtype,
266
- optimization_strategy=self.optimization_strategy,
267
303
  )
268
304
 
269
305
  self.model_pipeline = pipeline
@@ -272,6 +308,59 @@ class ChronosModel(AbstractTimeSeriesModel):
272
308
  self.load_model_pipeline()
273
309
  return self
274
310
 
311
+ def _has_tf32(self):
312
+ import torch.cuda
313
+
314
+ return torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
315
+
316
+ def _get_model_params(self) -> dict:
317
+ """Gets params that are passed to the inner model."""
318
+ init_args = super()._get_model_params().copy()
319
+
320
+ init_args.setdefault("fine_tune", False)
321
+ init_args.setdefault("keep_transformers_logs", False)
322
+ init_args.setdefault("fine_tune_lr", 1e-4)
323
+ init_args.setdefault("fine_tune_steps", 5000)
324
+ init_args.setdefault("fine_tune_batch_size", self.default_batch_size)
325
+ init_args.setdefault("eval_during_fine_tune", False)
326
+ init_args.setdefault("fine_tune_eval_max_items", 256)
327
+ init_args.setdefault("fine_tune_shuffle_buffer_size", 10_000)
328
+
329
+ eval_during_fine_tune = init_args["eval_during_fine_tune"]
330
+ output_dir = Path(self.path) / "transformers_logs"
331
+ fine_tune_trainer_kwargs = dict(
332
+ output_dir=str(output_dir),
333
+ per_device_train_batch_size=init_args["fine_tune_batch_size"],
334
+ per_device_eval_batch_size=init_args["fine_tune_batch_size"],
335
+ learning_rate=init_args["fine_tune_lr"],
336
+ lr_scheduler_type="linear",
337
+ warmup_ratio=0.0,
338
+ optim="adamw_torch_fused",
339
+ logging_dir=str(output_dir),
340
+ logging_strategy="steps",
341
+ logging_steps=100,
342
+ report_to="none",
343
+ max_steps=init_args["fine_tune_steps"],
344
+ gradient_accumulation_steps=1,
345
+ dataloader_num_workers=self.data_loader_num_workers,
346
+ tf32=self._has_tf32(),
347
+ save_only_model=True,
348
+ prediction_loss_only=True,
349
+ save_total_limit=1,
350
+ save_strategy="steps" if eval_during_fine_tune else "no",
351
+ save_steps=100 if eval_during_fine_tune else None,
352
+ evaluation_strategy="steps" if eval_during_fine_tune else "no",
353
+ eval_steps=100 if eval_during_fine_tune else None,
354
+ load_best_model_at_end=True if eval_during_fine_tune else False,
355
+ metric_for_best_model="eval_loss" if eval_during_fine_tune else None,
356
+ )
357
+ user_fine_tune_trainer_kwargs = init_args.get("fine_tune_trainer_kwargs", {})
358
+ fine_tune_trainer_kwargs.update(user_fine_tune_trainer_kwargs)
359
+
360
+ init_args["fine_tune_trainer_kwargs"] = fine_tune_trainer_kwargs
361
+
362
+ return init_args
363
+
275
364
  def _fit(
276
365
  self,
277
366
  train_data: TimeSeriesDataFrame,
@@ -279,8 +368,177 @@ class ChronosModel(AbstractTimeSeriesModel):
279
368
  time_limit: int = None,
280
369
  **kwargs,
281
370
  ) -> None:
371
+ from transformers.trainer import PrinterCallback, Trainer, TrainingArguments
372
+
373
+ from .pipeline import ChronosBoltPipeline, ChronosPipeline
374
+ from .pipeline.utils import (
375
+ ChronosFineTuningDataset,
376
+ EvaluateAndSaveFinalStepCallback,
377
+ LoggerCallback,
378
+ TimeLimitCallback,
379
+ )
380
+
381
+ # TODO: Add support for fine-tuning models with context_length longer than the pretrained model
382
+
383
+ # verbosity < 3: all logs and warnings from transformers will be suppressed
384
+ # verbosity >= 3: progress bar and loss logs will be logged
385
+ # verbosity 4: everything will be logged
386
+ verbosity = kwargs.get("verbosity", 2)
387
+ for logger_name in logging.root.manager.loggerDict:
388
+ if "transformers" in logger_name:
389
+ transformers_logger = logging.getLogger(logger_name)
390
+ transformers_logger.setLevel(logging.ERROR if verbosity <= 3 else logging.INFO)
391
+
282
392
  self._check_fit_params()
283
- self.time_limit = time_limit
393
+
394
+ fine_tune_args = self._get_model_params()
395
+ do_fine_tune = fine_tune_args["fine_tune"]
396
+
397
+ if do_fine_tune:
398
+ assert train_data is not None, "train_data cannot be None when fine_tune=True"
399
+
400
+ eval_during_fine_tune = val_data is not None and fine_tune_args["eval_during_fine_tune"]
401
+
402
+ start_time = time.monotonic()
403
+ if do_fine_tune:
404
+ context_length = self._get_context_length(train_data)
405
+ # load model pipeline to device memory
406
+ self.load_model_pipeline(is_training=True)
407
+
408
+ fine_tune_prediction_length = self.prediction_length
409
+ model_prediction_length = self.model_pipeline.inner_model.config.chronos_config["prediction_length"]
410
+
411
+ if isinstance(self.model_pipeline, ChronosPipeline):
412
+ pipeline_specific_trainer_kwargs = {}
413
+
414
+ # Update prediction_length of the model
415
+ # NOTE: We only do this for ChronosPipeline because the prediction length of ChronosBolt models
416
+ # is fixed due to direct multistep forecasting setup
417
+ self.model_pipeline.model.config.prediction_length = fine_tune_prediction_length
418
+ self.model_pipeline.inner_model.config.chronos_config["prediction_length"] = (
419
+ fine_tune_prediction_length
420
+ )
421
+
422
+ elif isinstance(self.model_pipeline, ChronosBoltPipeline):
423
+ # custom label_names is needed for validation to work with ChronosBolt models
424
+ pipeline_specific_trainer_kwargs = dict(label_names=["target"])
425
+
426
+ # truncate prediction_length if it goes beyond ChronosBolt's prediction_length
427
+ fine_tune_prediction_length = min(model_prediction_length, self.prediction_length)
428
+
429
+ if self.prediction_length != fine_tune_prediction_length:
430
+ logger.debug(
431
+ f"ChronosBolt models can only be fine-tuned with a maximum prediction_length of {model_prediction_length}. "
432
+ f"Fine-tuning prediction_length has been changed to {fine_tune_prediction_length}."
433
+ )
434
+
435
+ fine_tune_trainer_kwargs = fine_tune_args["fine_tune_trainer_kwargs"]
436
+ fine_tune_trainer_kwargs["disable_tqdm"] = fine_tune_trainer_kwargs.get("disable_tqdm", (verbosity < 3))
437
+ fine_tune_trainer_kwargs["use_cpu"] = str(self.model_pipeline.inner_model.device) == "cpu"
438
+
439
+ # TODO: adamw_torch_fused is not supported on CPU in torch <= 2.3. When torch 2.4 becomes the lower bound
440
+ # this if block can be removed because torch >= 2.4 supports AdamW optimizer with fused=True on CPU
441
+ if fine_tune_trainer_kwargs["use_cpu"] and fine_tune_trainer_kwargs["optim"] == "adamw_torch_fused":
442
+ fine_tune_trainer_kwargs["optim"] = "adamw_torch"
443
+
444
+ output_dir = Path(fine_tune_trainer_kwargs["output_dir"])
445
+
446
+ if not eval_during_fine_tune:
447
+ # turn off eval-related trainer args
448
+ fine_tune_trainer_kwargs["evaluation_strategy"] = "no"
449
+ fine_tune_trainer_kwargs["eval_steps"] = None
450
+ fine_tune_trainer_kwargs["load_best_model_at_end"] = False
451
+ fine_tune_trainer_kwargs["metric_for_best_model"] = None
452
+
453
+ training_args = TrainingArguments(**fine_tune_trainer_kwargs, **pipeline_specific_trainer_kwargs)
454
+ tokenizer_train_dataset = ChronosFineTuningDataset(
455
+ target_df=train_data,
456
+ target_column=self.target,
457
+ context_length=context_length,
458
+ prediction_length=fine_tune_prediction_length,
459
+ # if tokenizer exists, then the data is returned in the HF-style format accepted by
460
+ # the original Chronos models otherwise the data is returned in ChronosBolt's format
461
+ tokenizer=getattr(self.model_pipeline, "tokenizer", None),
462
+ mode="training",
463
+ ).shuffle(fine_tune_args["fine_tune_shuffle_buffer_size"])
464
+
465
+ callbacks = []
466
+ if time_limit is not None:
467
+ callbacks.append(TimeLimitCallback(time_limit=time_limit))
468
+
469
+ if val_data is not None:
470
+ callbacks.append(EvaluateAndSaveFinalStepCallback())
471
+ # evaluate on a randomly-sampled subset
472
+ fine_tune_eval_max_items = (
473
+ min(val_data.num_items, fine_tune_args["fine_tune_eval_max_items"])
474
+ if fine_tune_args["fine_tune_eval_max_items"] is not None
475
+ else val_data.num_items
476
+ )
477
+
478
+ if fine_tune_eval_max_items < val_data.num_items:
479
+ eval_items = np.random.choice(
480
+ val_data.item_ids.values, size=fine_tune_eval_max_items, replace=False
481
+ )
482
+ val_data = val_data.loc[eval_items]
483
+
484
+ tokenizer_val_dataset = ChronosFineTuningDataset(
485
+ target_df=val_data,
486
+ target_column=self.target,
487
+ context_length=context_length,
488
+ prediction_length=fine_tune_prediction_length,
489
+ tokenizer=getattr(self.model_pipeline, "tokenizer", None),
490
+ mode="validation",
491
+ )
492
+
493
+ trainer = Trainer(
494
+ model=self.model_pipeline.inner_model,
495
+ args=training_args,
496
+ train_dataset=tokenizer_train_dataset,
497
+ eval_dataset=tokenizer_val_dataset if val_data is not None else None,
498
+ callbacks=callbacks,
499
+ )
500
+
501
+ # remove PrinterCallback from callbacks which logs to the console via a print() call,
502
+ # so it cannot be handled by setting the log level
503
+ trainer.pop_callback(PrinterCallback)
504
+
505
+ if verbosity >= 3:
506
+ logger.warning(
507
+ "Transformers logging is turned on during fine-tuning. Note that losses reported by transformers "
508
+ "may not correspond to those specified via `eval_metric`."
509
+ )
510
+ trainer.add_callback(LoggerCallback())
511
+
512
+ if val_data is not None:
513
+ # evaluate once before training
514
+ zero_shot_eval_loss = trainer.evaluate()["eval_loss"]
515
+
516
+ trainer.train()
517
+
518
+ if eval_during_fine_tune:
519
+ # get the best eval_loss logged during fine-tuning
520
+ log_history_df = pd.DataFrame(trainer.state.log_history)
521
+ best_train_eval_loss = log_history_df["eval_loss"].min()
522
+ elif val_data is not None:
523
+ # evaluate at the end of fine-tuning
524
+ best_train_eval_loss = trainer.evaluate()["eval_loss"]
525
+
526
+ if val_data is None or best_train_eval_loss <= zero_shot_eval_loss:
527
+ fine_tuned_ckpt_path = Path(self.path) / self.fine_tuned_ckpt_name
528
+ logger.info(f"Saving fine-tuned model to {fine_tuned_ckpt_path}")
529
+ self.model_pipeline.inner_model.save_pretrained(Path(self.path) / self.fine_tuned_ckpt_name)
530
+ else:
531
+ # Reset the model to its pretrained state
532
+ logger.info("Validation loss worsened after fine-tuning. Reverting to the pretrained model.")
533
+ self.model_pipeline = None
534
+ self.load_model_pipeline(is_training=False)
535
+
536
+ if not fine_tune_args["keep_transformers_logs"]:
537
+ logger.debug(f"Removing transformers_logs directory {output_dir}")
538
+ shutil.rmtree(output_dir)
539
+
540
+ if time_limit is not None:
541
+ self.time_limit = time_limit - (time.monotonic() - start_time) # inference time budget
284
542
 
285
543
  def _get_inference_data_loader(
286
544
  self,
@@ -305,6 +563,13 @@ class ChronosModel(AbstractTimeSeriesModel):
305
563
  on_batch=timeout_callback(seconds=time_limit),
306
564
  )
307
565
 
566
+ def _get_context_length(self, data: TimeSeriesDataFrame) -> int:
567
+ context_length = self.context_length or min(
568
+ data.num_timesteps_per_item().max(),
569
+ self.maximum_context_length,
570
+ )
571
+ return context_length
572
+
308
573
  def _predict(
309
574
  self,
310
575
  data: TimeSeriesDataFrame,
@@ -319,15 +584,13 @@ class ChronosModel(AbstractTimeSeriesModel):
319
584
  # Note that this is independent of the model's own context length set in the model's config file.
320
585
  # For example, if the context_length is set to 2048 here but the model expects context length
321
586
  # (according to its config.json file) of 512, it will further truncate the series during inference.
322
- context_length = self.context_length or min(
323
- data.num_timesteps_per_item().max(),
324
- self.maximum_context_length,
325
- )
587
+ context_length = self._get_context_length(data)
326
588
 
327
589
  with warning_filter(all_warnings=True):
328
590
  import torch
329
591
 
330
592
  if self.model_pipeline is None:
593
+ # FIXME: optimization_strategy is ignored when model is fine-tuned
331
594
  # load model pipeline to device memory
332
595
  self.load_model_pipeline()
333
596
 
@@ -366,7 +629,7 @@ class ChronosModel(AbstractTimeSeriesModel):
366
629
  return TimeSeriesDataFrame(df)
367
630
 
368
631
  def _more_tags(self) -> Dict:
369
- return {"allow_nan": True}
632
+ return {"allow_nan": True, "can_use_val_data": self._get_model_params()["fine_tune"]}
370
633
 
371
634
  def score_and_cache_oof(
372
635
  self,
@@ -2,12 +2,15 @@
2
2
 
3
3
  from enum import Enum
4
4
  from pathlib import Path
5
- from typing import Dict, List, Optional, Tuple, Union
5
+ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
6
6
 
7
7
  import torch
8
8
 
9
9
  from .utils import left_pad_and_stack_1D
10
10
 
11
+ if TYPE_CHECKING:
12
+ from transformers import PreTrainedModel
13
+
11
14
 
12
15
  class ForecastType(Enum):
13
16
  SAMPLES = "samples"
@@ -36,6 +39,16 @@ class BaseChronosPipeline(metaclass=PipelineRegistry):
36
39
  "float64": torch.float64,
37
40
  }
38
41
 
42
+ def __init__(self, inner_model: "PreTrainedModel"):
43
+ """
44
+ Parameters
45
+ ----------
46
+ inner_model : PreTrainedModel
47
+ A hugging-face transformers PreTrainedModel, e.g., T5ForConditionalGeneration
48
+ """
49
+ # for easy access to the inner HF-style model
50
+ self.inner_model = inner_model
51
+
39
52
  def _prepare_and_validate_context(self, context: Union[torch.Tensor, List[torch.Tensor]]):
40
53
  if isinstance(context, list):
41
54
  context = left_pad_and_stack_1D(context)
@@ -65,9 +65,12 @@ class ChronosTokenizer:
65
65
  which concrete classes must implement.
66
66
  """
67
67
 
68
- def input_transform(self, context: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, Any]:
68
+ def context_input_transform(
69
+ self,
70
+ context: torch.Tensor,
71
+ ) -> Tuple:
69
72
  """
70
- Turn a batch of time series into token IDs, attention map, and scale.
73
+ Turn a batch of time series into token IDs, attention mask, and tokenizer_state.
71
74
 
72
75
  Parameters
73
76
  ----------
@@ -87,9 +90,40 @@ class ChronosTokenizer:
87
90
  which input observations are not ``torch.nan`` (i.e. not
88
91
  missing nor padding).
89
92
  tokenizer_state
90
- An object that will be passed to ``output_transform``.
91
- Contains the relevant context to decode output samples into
92
- real values, such as location and scale parameters.
93
+ An object that can be passed to ``label_input_transform``
94
+ and ``output_transform``. Contains the relevant information
95
+ to decode output samples into real values,
96
+ such as location and scale parameters.
97
+ """
98
+ raise NotImplementedError()
99
+
100
+ def label_input_transform(self, label: torch.Tensor, tokenizer_state: Any) -> Tuple:
101
+ """
102
+ Turn a batch of label slices of time series into token IDs and attention mask
103
+ using the ``tokenizer_state`` provided by ``context_input_transform``.
104
+
105
+ Parameters
106
+ ----------
107
+ label
108
+ A tensor shaped (batch_size, time_length), containing the
109
+ timeseries label, i.e., the ground-truth future values.
110
+ tokenizer_state
111
+ An object returned by ``context_input_transform`` containing
112
+ relevant information to preprocess data, such as location and
113
+ scale. The nature of this depends on the specific tokenizer.
114
+ This is used for tokenizing the label, in order to use the same
115
+ scaling used to tokenize the context.
116
+
117
+ Returns
118
+ -------
119
+ token_ids
120
+ A tensor of integers, shaped (batch_size, time_length + 1)
121
+ if ``config.use_eos_token`` and (batch_size, time_length)
122
+ otherwise, containing token IDs for the input series.
123
+ attention_mask
124
+ A boolean tensor, same shape as ``token_ids``, indicating
125
+ which input observations are not ``torch.nan`` (i.e. not
126
+ missing nor padding).
93
127
  """
94
128
  raise NotImplementedError()
95
129
 
@@ -117,6 +151,11 @@ class ChronosTokenizer:
117
151
 
118
152
 
119
153
  class MeanScaleUniformBins(ChronosTokenizer):
154
+ """
155
+ A tokenizer that performs mean scaling and then quantizes the scaled time series into
156
+ uniformly-spaced bins between some bounds on the real line.
157
+ """
158
+
120
159
  def __init__(self, low_limit: float, high_limit: float, config: ChronosConfig) -> None:
121
160
  self.config = config
122
161
  self.centers = torch.linspace(
@@ -132,15 +171,15 @@ class MeanScaleUniformBins(ChronosTokenizer):
132
171
  )
133
172
  )
134
173
 
135
- def input_transform(self, context: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
136
- batch_size, length = context.shape
174
+ def _input_transform(
175
+ self, context: torch.Tensor, scale: Optional[torch.Tensor] = None
176
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
177
+ attention_mask = ~torch.isnan(context)
137
178
 
138
- if length > self.config.context_length:
139
- context = context[..., -self.config.context_length :]
179
+ if scale is None:
180
+ scale = torch.nansum(torch.abs(context) * attention_mask, dim=-1) / torch.nansum(attention_mask, dim=-1)
181
+ scale[~(scale > 0)] = 1.0
140
182
 
141
- attention_mask = ~torch.isnan(context)
142
- scale = torch.nansum(torch.abs(context) * attention_mask, dim=-1) / torch.nansum(attention_mask, dim=-1)
143
- scale[~(scale > 0)] = 1.0
144
183
  scaled_context = context / scale.unsqueeze(dim=-1)
145
184
  token_ids = (
146
185
  torch.bucketize(
@@ -153,15 +192,42 @@ class MeanScaleUniformBins(ChronosTokenizer):
153
192
  + self.config.n_special_tokens
154
193
  )
155
194
  token_ids[~attention_mask] = self.config.pad_token_id
195
+ token_ids.clamp_(0, self.config.n_tokens - 1)
156
196
 
157
- if self.config.use_eos_token:
158
- eos_tokens = torch.full((batch_size, 1), fill_value=self.config.eos_token_id)
159
- token_ids = torch.concat((token_ids, eos_tokens), dim=1)
160
- eos_mask = torch.full((batch_size, 1), fill_value=True)
161
- attention_mask = torch.concat((attention_mask, eos_mask), dim=1)
197
+ return token_ids, attention_mask, scale
198
+
199
+ def _append_eos_token(
200
+ self, token_ids: torch.Tensor, attention_mask: torch.Tensor
201
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
202
+ batch_size = token_ids.shape[0]
203
+ eos_tokens = torch.full((batch_size, 1), fill_value=self.config.eos_token_id)
204
+ token_ids = torch.concat((token_ids, eos_tokens), dim=1)
205
+ eos_mask = torch.full((batch_size, 1), fill_value=True)
206
+ attention_mask = torch.concat((attention_mask, eos_mask), dim=1)
207
+
208
+ return token_ids, attention_mask
209
+
210
+ def context_input_transform(self, context: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
211
+ length = context.shape[-1]
212
+
213
+ if length > self.config.context_length:
214
+ context = context[..., -self.config.context_length :]
215
+
216
+ token_ids, attention_mask, scale = self._input_transform(context=context)
217
+
218
+ if self.config.use_eos_token and self.config.model_type == "seq2seq":
219
+ token_ids, attention_mask = self._append_eos_token(token_ids=token_ids, attention_mask=attention_mask)
162
220
 
163
221
  return token_ids, attention_mask, scale
164
222
 
223
+ def label_input_transform(self, label: torch.Tensor, scale: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
224
+ token_ids, attention_mask, _ = self._input_transform(context=label, scale=scale)
225
+
226
+ if self.config.use_eos_token:
227
+ token_ids, attention_mask = self._append_eos_token(token_ids=token_ids, attention_mask=attention_mask)
228
+
229
+ return token_ids, attention_mask
230
+
165
231
  def output_transform(self, samples: torch.Tensor, scale: torch.Tensor) -> torch.Tensor:
166
232
  scale_unsqueezed = scale.unsqueeze(-1).unsqueeze(-1)
167
233
  indices = torch.clamp(
@@ -302,6 +368,7 @@ class ChronosPipeline(BaseChronosPipeline):
302
368
  forecast_type: ForecastType = ForecastType.SAMPLES
303
369
 
304
370
  def __init__(self, tokenizer, model):
371
+ super().__init__(inner_model=model.model)
305
372
  self.tokenizer = tokenizer
306
373
  self.model = model
307
374
 
@@ -330,7 +397,7 @@ class ChronosPipeline(BaseChronosPipeline):
330
397
  provided, and the extra 1 is for EOS.
331
398
  """
332
399
  context = self._prepare_and_validate_context(context=context)
333
- token_ids, attention_mask, tokenizer_state = self.tokenizer.input_transform(context)
400
+ token_ids, attention_mask, tokenizer_state = self.tokenizer.context_input_transform(context)
334
401
  embeddings = self.model.encode(
335
402
  input_ids=token_ids.to(self.model.device),
336
403
  attention_mask=attention_mask.to(self.model.device),
@@ -402,7 +469,7 @@ class ChronosPipeline(BaseChronosPipeline):
402
469
  remaining = prediction_length
403
470
 
404
471
  while remaining > 0:
405
- token_ids, attention_mask, scale = self.tokenizer.input_transform(context)
472
+ token_ids, attention_mask, scale = self.tokenizer.context_input_transform(context)
406
473
  samples = self.model(
407
474
  token_ids.to(self.model.device),
408
475
  attention_mask.to(self.model.device),
@@ -289,7 +289,7 @@ class ChronosBoltModelForForecasting(T5PreTrainedModel):
289
289
  # normalize target
290
290
  target, _ = self.instance_norm(target, loc_scale)
291
291
  target = target.unsqueeze(1) # type: ignore
292
- assert self.chronos_config.prediction_length == target.shape[-1]
292
+ assert self.chronos_config.prediction_length >= target.shape[-1]
293
293
 
294
294
  target = target.to(quantile_preds.device)
295
295
  target_mask = (
@@ -297,6 +297,12 @@ class ChronosBoltModelForForecasting(T5PreTrainedModel):
297
297
  )
298
298
  target[~target_mask] = 0.0
299
299
 
300
+ # pad target and target_mask if they are shorter than model's prediction_length
301
+ if self.chronos_config.prediction_length > target.shape[-1]:
302
+ padding_shape = (*target.shape[:-1], self.chronos_config.prediction_length - target.shape[-1])
303
+ target = torch.cat([target, torch.zeros(padding_shape).to(target)], dim=-1)
304
+ target_mask = torch.cat([target_mask, torch.zeros(padding_shape).to(target_mask)], dim=-1)
305
+
300
306
  loss = (
301
307
  2
302
308
  * torch.abs(
@@ -373,6 +379,7 @@ class ChronosBoltPipeline(BaseChronosPipeline):
373
379
  _aliases = ["PatchedT5Pipeline"]
374
380
 
375
381
  def __init__(self, model: ChronosBoltModelForForecasting):
382
+ super().__init__(inner_model=model)
376
383
  self.model = model
377
384
 
378
385
  @property
@@ -1,15 +1,212 @@
1
+ import logging
1
2
  import os
2
3
  import re
3
4
  import time
5
+ from itertools import chain, cycle
4
6
  from pathlib import Path
5
- from typing import Callable, List, Optional
7
+ from typing import TYPE_CHECKING, Callable, Iterable, Iterator, List, Literal, Optional
6
8
 
7
9
  import numpy as np
8
10
  import torch
11
+ from gluonts.dataset.field_names import FieldName
12
+ from gluonts.transform import ExpectedNumInstanceSampler, InstanceSplitter, ValidationSplitSampler
13
+ from torch.utils.data import IterableDataset
14
+ from transformers import TrainerCallback
9
15
 
10
16
  from autogluon.common.loaders.load_s3 import download, list_bucket_prefix_suffix_contains_s3
11
17
  from autogluon.core.utils.exceptions import TimeLimitExceeded
12
18
  from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
19
+ from autogluon.timeseries.models.gluonts.abstract_gluonts import SimpleGluonTSDataset
20
+
21
+ if TYPE_CHECKING:
22
+ # TODO: fix the underlying reason for this circular import, the pipeline should handle tokenization
23
+ from autogluon.timeseries.models.chronos.pipeline.chronos import ChronosTokenizer
24
+
25
+
26
+ logger = logging.getLogger("autogluon.timeseries.models.chronos")
27
+
28
+
29
+ class PseudoShuffledIterableDataset(IterableDataset):
30
+ """
31
+ Shuffle entries from an iterable by temporarily accumulating them
32
+ in an intermediate buffer.
33
+
34
+ Parameters
35
+ ----------
36
+ base_dataset
37
+ The original iterable object, representing the dataset.
38
+ shuffle_buffer_size
39
+ Size of the buffer use to shuffle entries from the base dataset.
40
+ """
41
+
42
+ def __init__(self, base_dataset, shuffle_buffer_size: int = 100) -> None:
43
+ super().__init__()
44
+ assert shuffle_buffer_size > 0
45
+ self.base_dataset = base_dataset
46
+ self.shuffle_buffer_size = shuffle_buffer_size
47
+ self.generator = torch.Generator()
48
+
49
+ def __iter__(self):
50
+ shuffle_buffer = []
51
+
52
+ for element in self.base_dataset:
53
+ shuffle_buffer.append(element)
54
+ if len(shuffle_buffer) >= self.shuffle_buffer_size:
55
+ idx = torch.randint(len(shuffle_buffer), size=(), generator=self.generator)
56
+ yield shuffle_buffer.pop(idx)
57
+
58
+ while shuffle_buffer:
59
+ idx = torch.randint(len(shuffle_buffer), size=(), generator=self.generator)
60
+ yield shuffle_buffer.pop(idx)
61
+
62
+
63
+ class ChronosFineTuningDataset(IterableDataset):
64
+ """
65
+ Dataset wrapper to convert a ``TimeSeriesDataFrame`` into an iterable dataset
66
+ compatible with Chronos models.
67
+
68
+ When a ``tokenizer`` is provided, data is converted into HuggingFace-compatible set of
69
+ ``input_ids``, ``attention_mask`` and ``labels``, used by the original Chronos models.
70
+
71
+ When the ``tokenizer`` is omitted, data is converted into the format compatible with
72
+ ChronosBolt models, i.e., ``context`` and ``target``.
73
+
74
+ Parameters
75
+ ----------
76
+ target_df : TimeSeriesDataFrame
77
+ The ``TimeSeriesDataFrame`` to be converted
78
+ target_column : str, default = "target"
79
+ The name of the column which contains the target time series, by default "target"
80
+ context_length : int, default = 512
81
+ The length of the historical context
82
+ prediction_length : int, default = 64
83
+ The prediction_length, i.e., length of label or target
84
+ tokenizer : ``ChronosTokenizer``, default = None
85
+ When a ``ChronosTokenizer`` object is provided, data will be converted into the
86
+ HuggingFace format accepted by the original Chronos models using this ``ChronosTokenizer``.
87
+ If None, data will be converted into the format accepted by ChronosBolt models.
88
+ mode : Literal["training", "validation"], default = "training"
89
+ When ``training``, random slices from the time series will be returned for training purposes.
90
+ If ``validation``, the last slice of each time series returned in the original order.
91
+ """
92
+
93
+ def __init__(
94
+ self,
95
+ target_df: TimeSeriesDataFrame,
96
+ target_column: str = "target",
97
+ context_length: int = 512,
98
+ prediction_length: int = 64,
99
+ tokenizer: Optional["ChronosTokenizer"] = None,
100
+ mode: Literal["training", "validation"] = "training",
101
+ ) -> None:
102
+ super().__init__()
103
+
104
+ assert mode in ("training", "validation")
105
+
106
+ # A dummy hourly freq is used because the model doesn't actually need the freq
107
+ self.gluonts_dataset = SimpleGluonTSDataset(target_df=target_df, freq="h", target_column=target_column)
108
+ self.tokenizer = tokenizer
109
+ self.context_length = context_length
110
+ self.prediction_length = prediction_length
111
+ self.mode = mode
112
+
113
+ def _create_instance_splitter(self, mode: str):
114
+ instance_sampler = {
115
+ "training": ExpectedNumInstanceSampler(
116
+ num_instances=1.0, min_future=self.prediction_length, min_instances=1
117
+ ),
118
+ "validation": ValidationSplitSampler(min_future=self.prediction_length),
119
+ }[mode]
120
+
121
+ return InstanceSplitter(
122
+ target_field=FieldName.TARGET,
123
+ is_pad_field=FieldName.IS_PAD,
124
+ start_field=FieldName.START,
125
+ forecast_start_field=FieldName.FORECAST_START,
126
+ instance_sampler=instance_sampler,
127
+ past_length=self.context_length,
128
+ future_length=self.prediction_length,
129
+ dummy_value=np.nan,
130
+ )
131
+
132
+ def _create_training_data(self, data: Iterable[dict]):
133
+ data = chain.from_iterable(cycle([data]))
134
+ split_transform = self._create_instance_splitter("training")
135
+ data = split_transform.apply(data, is_train=True)
136
+ return data
137
+
138
+ def _create_validation_data(self, data: Iterable[dict]):
139
+ data = self._create_instance_splitter("validation").apply(data, is_train=False)
140
+ return data
141
+
142
+ def to_chronos_format(self, entry: dict) -> dict:
143
+ """Converts an entry from GluonTS data format with past and future targets
144
+ to the HuggingFace format accepted by the original Chronos models using the ChronosTokenizer.
145
+
146
+ Parameters
147
+ ----------
148
+ entry : dict
149
+ time series data entry in GluonTS format with ``past_target`` and ``future_target`` keys
150
+
151
+ Returns
152
+ -------
153
+ dict
154
+ time series data entry in HuggingFace format with ``input_ids``, ``attention_mask``, and ``labels``
155
+ """
156
+ assert self.tokenizer is not None, "A ChronosTokenizer is required to convert data into the Chronos format"
157
+ past_target = torch.tensor(entry[f"past_{FieldName.TARGET}"]).unsqueeze(0)
158
+ input_ids, attention_mask, scale = self.tokenizer.context_input_transform(past_target)
159
+ future_target = torch.tensor(entry[f"future_{FieldName.TARGET}"]).unsqueeze(0)
160
+ labels, labels_mask = self.tokenizer.label_input_transform(future_target, scale)
161
+ labels[labels_mask == 0] = -100
162
+
163
+ return {
164
+ "input_ids": input_ids.squeeze(0),
165
+ "attention_mask": attention_mask.squeeze(0),
166
+ "labels": labels.squeeze(0),
167
+ }
168
+
169
+ def to_chronos_bolt_format(self, entry: dict) -> dict:
170
+ """Converts an entry from GluonTS data format with past and future targets
171
+ to the format accepted by the ChronosBolt models.
172
+
173
+ Parameters
174
+ ----------
175
+ entry : dict
176
+ time series data entry in GluonTS format with ``past_target`` and ``future_target`` keys
177
+
178
+ Returns
179
+ -------
180
+ dict
181
+ time series data entry in ChronosBolt format with ``context`` and ``target``
182
+ """
183
+ past_target = torch.tensor(entry[f"past_{FieldName.TARGET}"])
184
+ future_target = torch.tensor(entry[f"future_{FieldName.TARGET}"])
185
+
186
+ return {"context": past_target, "target": future_target}
187
+
188
+ def __iter__(self) -> Iterator:
189
+ if self.mode == "training":
190
+ iterable = self._create_training_data(self.gluonts_dataset)
191
+ elif self.mode == "validation":
192
+ iterable = self._create_validation_data(self.gluonts_dataset)
193
+
194
+ format_transform_fn = self.to_chronos_format if self.tokenizer is not None else self.to_chronos_bolt_format
195
+ for entry in iterable:
196
+ yield format_transform_fn(entry)
197
+
198
+ def shuffle(self, shuffle_buffer_size: Optional[int] = None):
199
+ """Returns a (pseudo) shuffled version of this iterable dataset.
200
+
201
+ Parameters
202
+ ----------
203
+ shuffle_buffer_size : int, optional, default = None
204
+ The shuffle buffer size used for pseudo shuffling
205
+ """
206
+ assert shuffle_buffer_size is None or shuffle_buffer_size >= 0
207
+ if not shuffle_buffer_size:
208
+ return self
209
+ return PseudoShuffledIterableDataset(self, shuffle_buffer_size)
13
210
 
14
211
 
15
212
  def left_pad_and_stack_1D(tensors: List[torch.Tensor]) -> torch.Tensor:
@@ -91,12 +288,51 @@ class ChronosInferenceDataLoader(torch.utils.data.DataLoader):
91
288
  self.callback()
92
289
 
93
290
 
291
+ class EvaluateAndSaveFinalStepCallback(TrainerCallback):
292
+ """Callback to evaluate and save the model at last training step."""
293
+
294
+ def on_step_end(self, args, state, control, **kwargs):
295
+ if state.global_step >= state.max_steps:
296
+ control.should_log = True
297
+ control.should_evaluate = True
298
+ control.should_save = True
299
+
300
+
301
+ class TimeLimitCallback(TrainerCallback):
302
+ def __init__(self, time_limit: int):
303
+ """
304
+ Callback to stop training once a specified time has elapsed.
305
+
306
+ Parameters
307
+ ----------
308
+ time_limit: int
309
+ maximum time allowed for training in seconds.
310
+ """
311
+ self.time_limit = time_limit
312
+ self.start_time = None
313
+
314
+ def on_train_begin(self, args, state, control, **kwargs):
315
+ self.start_time = time.monotonic()
316
+
317
+ def on_step_end(self, args, state, control, **kwargs):
318
+ elapsed_time = time.monotonic() - self.start_time
319
+ if elapsed_time > self.time_limit:
320
+ raise TimeLimitExceeded
321
+
322
+
323
+ class LoggerCallback(TrainerCallback):
324
+ def on_log(self, args, state, control, logs=None, **kwargs):
325
+ logs.pop("total_flos", None)
326
+ if state.is_local_process_zero:
327
+ logger.info(logs)
328
+
329
+
94
330
  def timeout_callback(seconds: Optional[float]) -> Callable:
95
331
  """Return a callback object that raises an exception if time limit is exceeded."""
96
- start_time = time.time()
332
+ start_time = time.monotonic()
97
333
 
98
334
  def callback() -> None:
99
- if seconds is not None and time.time() - start_time > seconds:
335
+ if seconds is not None and time.monotonic() - start_time > seconds:
100
336
  raise TimeLimitExceeded
101
337
 
102
338
  return callback
@@ -286,10 +286,6 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
286
286
 
287
287
  self.negative_data = (dataset[self.target] < 0).any()
288
288
 
289
- @property
290
- def default_context_length(self) -> int:
291
- return min(512, max(10, 2 * self.prediction_length))
292
-
293
289
  def preprocess(self, data: TimeSeriesDataFrame, is_train: bool = False, **kwargs) -> TimeSeriesDataFrame:
294
290
  # Copy data to avoid SettingWithCopyWarning from pandas
295
291
  data = data.copy()
@@ -357,25 +353,40 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
357
353
  known_covariates[columns] = self._real_column_transformers["known"].transform(known_covariates[columns])
358
354
  return known_covariates
359
355
 
356
+ def _get_default_params(self):
357
+ """Gets default parameters for GluonTS estimator initialization that are available after
358
+ AbstractTimeSeriesModel initialization (i.e., before deferred initialization). Models may
359
+ override this method to update default parameters.
360
+ """
361
+ return {
362
+ "batch_size": 64,
363
+ "context_length": min(512, max(10, 2 * self.prediction_length)),
364
+ "predict_batch_size": 500,
365
+ "early_stopping_patience": 20,
366
+ "max_epochs": 100,
367
+ "lr": 1e-3,
368
+ "freq": self._dummy_gluonts_freq,
369
+ "prediction_length": self.prediction_length,
370
+ "quantiles": self.quantile_levels,
371
+ }
372
+
360
373
  def _get_model_params(self) -> dict:
361
374
  """Gets params that are passed to the inner model."""
362
- init_args = super()._get_model_params().copy()
363
- init_args.setdefault("batch_size", 64)
364
- init_args.setdefault("context_length", self.default_context_length)
365
- init_args.setdefault("predict_batch_size", 500)
366
- init_args.setdefault("early_stopping_patience", 20)
367
- init_args.update(
368
- dict(
369
- freq=self._dummy_gluonts_freq,
370
- prediction_length=self.prediction_length,
371
- quantiles=self.quantile_levels,
372
- callbacks=self.callbacks,
373
- )
374
- )
375
- # Support MXNet kwarg names for backwards compatibility
376
- init_args.setdefault("lr", init_args.get("learning_rate", 1e-3))
377
- init_args.setdefault("max_epochs", init_args.get("epochs", 100))
378
- return init_args
375
+ # for backward compatibility with the old GluonTS MXNet API
376
+ parameter_name_aliases = {
377
+ "epochs": "max_epochs",
378
+ "learning_rate": "lr",
379
+ }
380
+
381
+ init_args = super()._get_model_params()
382
+ for alias, actual in parameter_name_aliases.items():
383
+ if alias in init_args:
384
+ if actual in init_args:
385
+ raise ValueError(f"Parameter '{alias}' cannot be specified when '{actual}' is also specified.")
386
+ else:
387
+ init_args[actual] = init_args.pop(alias)
388
+
389
+ return self._get_default_params() | init_args
379
390
 
380
391
  def _get_estimator_init_args(self) -> Dict[str, Any]:
381
392
  """Get GluonTS specific constructor arguments for estimator objects, an alias to `self._get_model_params`
@@ -395,7 +406,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
395
406
  default_trainer_kwargs = {
396
407
  "limit_val_batches": 3,
397
408
  "max_epochs": init_args["max_epochs"],
398
- "callbacks": init_args["callbacks"],
409
+ "callbacks": self.callbacks,
399
410
  "enable_progress_bar": False,
400
411
  "default_root_dir": self.path,
401
412
  }
@@ -209,15 +209,16 @@ class TemporalFusionTransformerModel(AbstractGluonTSModel):
209
209
  supports_cat_covariates = True
210
210
  supports_static_features = True
211
211
 
212
- @property
213
- def default_context_length(self) -> int:
214
- return min(512, max(64, 2 * self.prediction_length))
215
-
216
212
  def _get_estimator_class(self) -> Type[GluonTSEstimator]:
217
213
  from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
218
214
 
219
215
  return TemporalFusionTransformerEstimator
220
216
 
217
+ def _get_default_params(self):
218
+ return super()._get_default_params() | {
219
+ "context_length": min(512, max(64, 2 * self.prediction_length)),
220
+ }
221
+
221
222
  def _get_estimator_init_args(self) -> Dict[str, Any]:
222
223
  init_kwargs = super()._get_estimator_init_args()
223
224
  if self.num_feat_dynamic_real > 0:
@@ -282,9 +283,10 @@ class DLinearModel(AbstractGluonTSModel):
282
283
  If True, ``lightning_logs`` directory will NOT be removed after the model finished training.
283
284
  """
284
285
 
285
- @property
286
- def default_context_length(self) -> int:
287
- return 96
286
+ def _get_default_params(self):
287
+ return super()._get_default_params() | {
288
+ "context_length": 96,
289
+ }
288
290
 
289
291
  def _get_estimator_class(self) -> Type[GluonTSEstimator]:
290
292
  from gluonts.torch.model.d_linear import DLinearEstimator
@@ -341,18 +343,16 @@ class PatchTSTModel(AbstractGluonTSModel):
341
343
 
342
344
  supports_known_covariates = True
343
345
 
344
- @property
345
- def default_context_length(self) -> int:
346
- return 96
347
-
348
346
  def _get_estimator_class(self) -> Type[GluonTSEstimator]:
349
347
  from gluonts.torch.model.patch_tst import PatchTSTEstimator
350
348
 
351
349
  return PatchTSTEstimator
352
350
 
351
+ def _get_default_params(self):
352
+ return super()._get_default_params() | {"context_length": 96, "patch_len": 16}
353
+
353
354
  def _get_estimator_init_args(self) -> Dict[str, Any]:
354
355
  init_kwargs = super()._get_estimator_init_args()
355
- init_kwargs.setdefault("patch_len", 16)
356
356
  init_kwargs["num_feat_dynamic_real"] = self.num_feat_dynamic_real
357
357
  return init_kwargs
358
358
 
@@ -467,27 +467,27 @@ class TiDEModel(AbstractGluonTSModel):
467
467
  If False, past covariates will be used by the model if they are present in the dataset.
468
468
  feat_proj_hidden_dim : int, default = 4
469
469
  Size of the feature projection layer.
470
- encoder_hidden_dim : int, default = 4
470
+ encoder_hidden_dim : int, default = 64
471
471
  Size of the dense encoder layer.
472
- decoder_hidden_dim : int, default = 4
472
+ decoder_hidden_dim : int, default = 64
473
473
  Size of the dense decoder layer.
474
- temporal_hidden_dim : int, default = 4
474
+ temporal_hidden_dim : int, default = 64
475
475
  Size of the temporal decoder layer.
476
- distr_hidden_dim : int, default = 4
476
+ distr_hidden_dim : int, default = 64
477
477
  Size of the distribution projection layer.
478
- num_layers_encoder : int, default = 1
478
+ num_layers_encoder : int, default = 2
479
479
  Number of layers in dense encoder.
480
- num_layers_decoder : int, default = 1
480
+ num_layers_decoder : int, default = 2
481
481
  Number of layers in dense decoder.
482
- decoder_output_dim : int, default = 4
482
+ decoder_output_dim : int, default = 16
483
483
  Output size of the dense decoder.
484
- dropout_rate : float, default = 0.3
484
+ dropout_rate : float, default = 0.2
485
485
  Dropout regularization parameter.
486
486
  num_feat_dynamic_proj : int, default = 2
487
487
  Output size of feature projection layer.
488
488
  embedding_dimension : int, default = [16] * num_feat_static_cat
489
489
  Dimension of the embeddings for categorical features
490
- layer_norm : bool, default = False
490
+ layer_norm : bool, default = True
491
491
  Should layer normalization be enabled?
492
492
  scaling : {"mean", "std", None}, default = "mean"
493
493
  Scaling applied to each *context window* during training & prediction.
@@ -496,13 +496,13 @@ class TiDEModel(AbstractGluonTSModel):
496
496
  Note that this is different from the `target_scaler` that is applied to the *entire time series*.
497
497
  max_epochs : int, default = 100
498
498
  Number of epochs the model will be trained for
499
- batch_size : int, default = 64
499
+ batch_size : int, default = 256
500
500
  Size of batches used during training
501
501
  predict_batch_size : int, default = 500
502
502
  Size of batches used during prediction.
503
503
  num_batches_per_epoch : int, default = 50
504
504
  Number of batches processed every epoch
505
- lr : float, default = 1e-3,
505
+ lr : float, default = 1e-4,
506
506
  Learning rate used during training
507
507
  trainer_kwargs : dict, optional
508
508
  Optional keyword arguments passed to ``lightning.Trainer``.
@@ -515,15 +515,27 @@ class TiDEModel(AbstractGluonTSModel):
515
515
  supports_known_covariates = True
516
516
  supports_static_features = True
517
517
 
518
- @property
519
- def default_context_length(self) -> int:
520
- return min(512, max(64, 2 * self.prediction_length))
521
-
522
518
  def _get_estimator_class(self) -> Type[GluonTSEstimator]:
523
519
  from gluonts.torch.model.tide import TiDEEstimator
524
520
 
525
521
  return TiDEEstimator
526
522
 
523
+ def _get_default_params(self):
524
+ return super()._get_default_params() | {
525
+ "context_length": min(512, max(64, 2 * self.prediction_length)),
526
+ "encoder_hidden_dim": 64,
527
+ "decoder_hidden_dim": 64,
528
+ "temporal_hidden_dim": 64,
529
+ "distr_hidden_dim": 64,
530
+ "num_layers_encoder": 2,
531
+ "num_layers_decoder": 2,
532
+ "decoder_output_dim": 16,
533
+ "dropout_rate": 0.2,
534
+ "layer_norm": True,
535
+ "lr": 1e-4,
536
+ "batch_size": 256,
537
+ }
538
+
527
539
  def _get_estimator_init_args(self) -> Dict[str, Any]:
528
540
  init_kwargs = super()._get_estimator_init_args()
529
541
  init_kwargs["num_feat_static_cat"] = self.num_feat_static_cat
@@ -1,3 +1,3 @@
1
1
  """This is the autogluon version file."""
2
- __version__ = '1.1.2b20241113'
2
+ __version__ = '1.1.2b20241115'
3
3
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 1.1.2b20241113
3
+ Version: 1.1.2b20241115
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -53,9 +53,9 @@ Requires-Dist: fugue>=0.9.0
53
53
  Requires-Dist: tqdm<5,>=4.38
54
54
  Requires-Dist: orjson~=3.9
55
55
  Requires-Dist: tensorboard<3,>=2.9
56
- Requires-Dist: autogluon.core[raytune]==1.1.2b20241113
57
- Requires-Dist: autogluon.common==1.1.2b20241113
58
- Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.1.2b20241113
56
+ Requires-Dist: autogluon.core[raytune]==1.1.2b20241115
57
+ Requires-Dist: autogluon.common==1.1.2b20241115
58
+ Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.1.2b20241115
59
59
  Provides-Extra: all
60
60
  Requires-Dist: optimum[onnxruntime]<1.20,>=1.17; extra == "all"
61
61
  Provides-Extra: chronos-onnx
@@ -1,15 +1,15 @@
1
- autogluon.timeseries-1.1.2b20241113-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
1
+ autogluon.timeseries-1.1.2b20241115-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
2
2
  autogluon/timeseries/__init__.py,sha256=_CrLLc1fkjen7UzWoO0Os8WZoHOgvZbHKy46I8v_4k4,304
3
3
  autogluon/timeseries/evaluator.py,sha256=l642tYfTHsl8WVIq_vV6qhgAFVFr9UuZD7gLra3A_Kc,250
4
4
  autogluon/timeseries/learner.py,sha256=3dUxI-U6TGfNtRQUzWTvBIo1GKeXYOhxIX_q7Fed9eA,14013
5
5
  autogluon/timeseries/predictor.py,sha256=R9m-TYmlA4WoJbdYEL_AnEM26EhRIclynOfSmpO7mBk,84926
6
6
  autogluon/timeseries/regressor.py,sha256=wcYbvE7kFopdscubfhIfeLI3ovxKe_fUVtt0b1zWdV0,6823
7
7
  autogluon/timeseries/splitter.py,sha256=eghGwAAN2_cxGk5aJBILgjGWtLzjxJcytMy49gg_q18,3061
8
- autogluon/timeseries/version.py,sha256=OGArr6c4pNSqH_iw-M1qDHQMDvnxMN7HervCqm8kQw8,90
8
+ autogluon/timeseries/version.py,sha256=k5yEeIHMAa7wwNFT6Xs3BeQ8qrgzCQ6ICoNHeZ7GR9g,90
9
9
  autogluon/timeseries/configs/__init__.py,sha256=BTtHIPCYeGjqgOcvqb8qPD4VNX-ICKOg6wnkew1cPOE,98
10
10
  autogluon/timeseries/configs/presets_configs.py,sha256=94-yL9teDHKs2irWjP3kpewI7FE1ChYCgEgz9XHJ6gc,1965
11
11
  autogluon/timeseries/dataset/__init__.py,sha256=UvnhAN5tjgxXTHoZMQDy64YMDj4Xxa68yY7NP4vAw0o,81
12
- autogluon/timeseries/dataset/ts_dataframe.py,sha256=UQ-iT2dGVJF57hlGkivbSEaBwf-5NP0Amohp4DccLUA,48492
12
+ autogluon/timeseries/dataset/ts_dataframe.py,sha256=9bJQeg3HkPeVnyxzwqAJiTJGYXths7vxUV_3-OsJ6pk,48640
13
13
  autogluon/timeseries/metrics/__init__.py,sha256=LLGmYaexsx7CregV-QaHc5exjZbsJfBSVOtxHRGC0ho,2139
14
14
  autogluon/timeseries/metrics/abstract.py,sha256=9xCFQ3NaR1C0hn01M7oBd72a_CiNV-w6QFcRjwUbKYI,8183
15
15
  autogluon/timeseries/metrics/point.py,sha256=z366XJz3n4MFl4JkXOD6ZxL69F_j7Y-jbrwb7J3yDqk,15513
@@ -25,19 +25,19 @@ autogluon/timeseries/models/autogluon_tabular/mlforecast.py,sha256=C1WVcuNlTcqo_
25
25
  autogluon/timeseries/models/autogluon_tabular/transforms.py,sha256=FozTzwcp1QjevEhrMLXsJHy8fymOcq1146oX4Al60wg,2517
26
26
  autogluon/timeseries/models/autogluon_tabular/utils.py,sha256=Fn3Vu_Q0PCtEUbtNgLp1xIblg7dOdpFlF3W5kLHgruI,63
27
27
  autogluon/timeseries/models/chronos/__init__.py,sha256=wT77HzTtmQxW3sw2k0mA5Ot6PSHivX-Uvn5fjM05EU4,60
28
- autogluon/timeseries/models/chronos/model.py,sha256=ULH7XCBcl93CQ4aKtY-RzhxlXyfb1bTfOmnAJWe2zLU,16589
28
+ autogluon/timeseries/models/chronos/model.py,sha256=Z3CtfRux7W2x7mghbq8YLGFLu9kTAsozid43AN1lH_s,30151
29
29
  autogluon/timeseries/models/chronos/pipeline/__init__.py,sha256=N-YZH9BGBoi99r5cznJe1zEEjwjIg7cOYIHZkKuJq44,247
30
- autogluon/timeseries/models/chronos/pipeline/base.py,sha256=sVhADJ9fkJBL6p6UYw9zRb07Z1rGGHog95uOMrcs7dA,5044
31
- autogluon/timeseries/models/chronos/pipeline/chronos.py,sha256=cbDS7_9Z8Sn1NZdwr2DqqCqxm_zWejDxKHHRLCvLDNE,19284
32
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py,sha256=vKXyz8lfIyOhobFp4kLkCQ61CWhm3BTXeluOq1EZaqU,19563
33
- autogluon/timeseries/models/chronos/pipeline/utils.py,sha256=C8S-1gr_PMxLYes7SP7Ep0hOrA5k9jRjM4wJAcSNuCE,3519
30
+ autogluon/timeseries/models/chronos/pipeline/base.py,sha256=aAXCKy7Jmip4BI2UdPMoPe2gdDMbJHKxEolcTx_5SYQ,5463
31
+ autogluon/timeseries/models/chronos/pipeline/chronos.py,sha256=iHKyw3Juml247jl7bEbGlabtMyp3ibYEoA7rHiUC9f8,22048
32
+ autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py,sha256=2MJuik-YFgONZ3X2DciAph5So6ABys5ppQhBC81gLyk,20083
33
+ autogluon/timeseries/models/chronos/pipeline/utils.py,sha256=_P_9m9Wl4FC2QyoKLluT4l7FLmZU2xw6G4xNcUpZE4k,13043
34
34
  autogluon/timeseries/models/ensemble/__init__.py,sha256=kFr11Gmt7lQJu9Rr8HuIPphQN5l1TsoorfbJm_O3a_s,128
35
35
  autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py,sha256=tifETwmiEGt-YtQ9eNK7ojJ3fBvtFMUJvisbfkIJ7gw,3393
36
36
  autogluon/timeseries/models/ensemble/greedy_ensemble.py,sha256=5HvZuW5osgsZg3V69k82nKEOy_YgeH1JTfQa7F3cU7s,7220
37
37
  autogluon/timeseries/models/gluonts/__init__.py,sha256=asC1PTj4j9xMbilvk1IT1julnpeoKbv5ZNuAR6-DFgA,361
38
- autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=tViaFXFOVjGQi2S6cUIW-ak0Evv7rKUm2QWsmpKDMEk,34076
38
+ autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=4_YPUjT0oIzELHn34C0a7n21zkhjc1Yijgz9egxxPdw,34501
39
39
  autogluon/timeseries/models/gluonts/torch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- autogluon/timeseries/models/gluonts/torch/models.py,sha256=ASTMabSAzBRJ_s5Iqjp-9fZvt4aKN892Uz35kzqGMWM,25018
40
+ autogluon/timeseries/models/gluonts/torch/models.py,sha256=Pu7f43jr1C5S3k_bVqRB8ENuBHNEWT4ssUTdZoA1J58,25556
41
41
  autogluon/timeseries/models/local/__init__.py,sha256=e2UImoJhmj70E148IIObv90C_bHxgyLNk6YsS4p7pfs,701
42
42
  autogluon/timeseries/models/local/abstract_local_model.py,sha256=OxEkqzfAd5diQDUYStw2nI-X2lo3H8GcMLDJ6-1XL_Y,12417
43
43
  autogluon/timeseries/models/local/naive.py,sha256=iwRcFMFmJKPWPbD9TWaIUS51oav69F_VAp6-jb_5SUE,7249
@@ -59,11 +59,11 @@ autogluon/timeseries/utils/datetime/base.py,sha256=3NdsH3NDq4cVAOSoy3XpaNixyNlbj
59
59
  autogluon/timeseries/utils/datetime/lags.py,sha256=GoLtvcZ8oKb3QkoBJ9E59LSPLOP7Qjxrr2UmMSZgjyw,5909
60
60
  autogluon/timeseries/utils/datetime/seasonality.py,sha256=h_4w00iEytAz_N_EpCENQ8RCXy7KQITczrYjBgVqWkQ,764
61
61
  autogluon/timeseries/utils/datetime/time_features.py,sha256=PAXbYbQ0z_5GFbkxSNi41zLY_2-U3x0Ynm1m_WhdtGc,2572
62
- autogluon.timeseries-1.1.2b20241113.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
63
- autogluon.timeseries-1.1.2b20241113.dist-info/METADATA,sha256=BtiE4lTblx5aiB8vlWmsZ3oj0Xy-lfxgWZE27eBa4fc,12388
64
- autogluon.timeseries-1.1.2b20241113.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
65
- autogluon.timeseries-1.1.2b20241113.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
66
- autogluon.timeseries-1.1.2b20241113.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
67
- autogluon.timeseries-1.1.2b20241113.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
68
- autogluon.timeseries-1.1.2b20241113.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
69
- autogluon.timeseries-1.1.2b20241113.dist-info/RECORD,,
62
+ autogluon.timeseries-1.1.2b20241115.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
63
+ autogluon.timeseries-1.1.2b20241115.dist-info/METADATA,sha256=GWCpPfrJry0oxm90irCuNAdgEvasKyVcnn4bnbWnshA,12388
64
+ autogluon.timeseries-1.1.2b20241115.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
65
+ autogluon.timeseries-1.1.2b20241115.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
66
+ autogluon.timeseries-1.1.2b20241115.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
67
+ autogluon.timeseries-1.1.2b20241115.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
68
+ autogluon.timeseries-1.1.2b20241115.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
69
+ autogluon.timeseries-1.1.2b20241115.dist-info/RECORD,,