autogluon.timeseries 1.2.1b20250224__py3-none-any.whl → 1.4.1b20251215__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (108) hide show
  1. autogluon/timeseries/configs/__init__.py +3 -2
  2. autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
  3. autogluon/timeseries/configs/predictor_presets.py +106 -0
  4. autogluon/timeseries/dataset/ts_dataframe.py +256 -141
  5. autogluon/timeseries/learner.py +86 -52
  6. autogluon/timeseries/metrics/__init__.py +42 -8
  7. autogluon/timeseries/metrics/abstract.py +89 -19
  8. autogluon/timeseries/metrics/point.py +142 -53
  9. autogluon/timeseries/metrics/quantile.py +46 -21
  10. autogluon/timeseries/metrics/utils.py +4 -4
  11. autogluon/timeseries/models/__init__.py +8 -2
  12. autogluon/timeseries/models/abstract/__init__.py +2 -2
  13. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +361 -592
  14. autogluon/timeseries/models/abstract/model_trial.py +2 -1
  15. autogluon/timeseries/models/abstract/tunable.py +189 -0
  16. autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
  17. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +282 -194
  18. autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
  19. autogluon/timeseries/models/autogluon_tabular/transforms.py +25 -18
  20. autogluon/timeseries/models/chronos/__init__.py +2 -1
  21. autogluon/timeseries/models/chronos/chronos2.py +361 -0
  22. autogluon/timeseries/models/chronos/model.py +219 -138
  23. autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +81 -50
  24. autogluon/timeseries/models/ensemble/__init__.py +37 -2
  25. autogluon/timeseries/models/ensemble/abstract.py +107 -0
  26. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  27. autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
  28. autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
  29. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  30. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  31. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
  32. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  33. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  34. autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
  35. autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
  36. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  37. autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
  38. autogluon/timeseries/models/ensemble/weighted/basic.py +91 -0
  39. autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
  40. autogluon/timeseries/models/gluonts/__init__.py +1 -1
  41. autogluon/timeseries/models/gluonts/{abstract_gluonts.py → abstract.py} +148 -208
  42. autogluon/timeseries/models/gluonts/dataset.py +109 -0
  43. autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +38 -22
  44. autogluon/timeseries/models/local/__init__.py +0 -7
  45. autogluon/timeseries/models/local/abstract_local_model.py +71 -74
  46. autogluon/timeseries/models/local/naive.py +13 -9
  47. autogluon/timeseries/models/local/npts.py +9 -2
  48. autogluon/timeseries/models/local/statsforecast.py +52 -36
  49. autogluon/timeseries/models/multi_window/multi_window_model.py +65 -45
  50. autogluon/timeseries/models/registry.py +64 -0
  51. autogluon/timeseries/models/toto/__init__.py +3 -0
  52. autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
  53. autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
  54. autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
  55. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
  56. autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
  57. autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
  58. autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
  59. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  60. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
  61. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
  62. autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
  63. autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
  64. autogluon/timeseries/models/toto/dataloader.py +108 -0
  65. autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
  66. autogluon/timeseries/models/toto/model.py +249 -0
  67. autogluon/timeseries/predictor.py +685 -297
  68. autogluon/timeseries/regressor.py +94 -44
  69. autogluon/timeseries/splitter.py +8 -32
  70. autogluon/timeseries/trainer/__init__.py +3 -0
  71. autogluon/timeseries/trainer/ensemble_composer.py +444 -0
  72. autogluon/timeseries/trainer/model_set_builder.py +256 -0
  73. autogluon/timeseries/trainer/prediction_cache.py +149 -0
  74. autogluon/timeseries/{trainer.py → trainer/trainer.py} +387 -390
  75. autogluon/timeseries/trainer/utils.py +17 -0
  76. autogluon/timeseries/transforms/__init__.py +2 -13
  77. autogluon/timeseries/transforms/covariate_scaler.py +34 -40
  78. autogluon/timeseries/transforms/target_scaler.py +37 -20
  79. autogluon/timeseries/utils/constants.py +10 -0
  80. autogluon/timeseries/utils/datetime/lags.py +3 -5
  81. autogluon/timeseries/utils/datetime/seasonality.py +1 -3
  82. autogluon/timeseries/utils/datetime/time_features.py +2 -2
  83. autogluon/timeseries/utils/features.py +70 -47
  84. autogluon/timeseries/utils/forecast.py +19 -14
  85. autogluon/timeseries/utils/timer.py +173 -0
  86. autogluon/timeseries/utils/warning_filters.py +4 -2
  87. autogluon/timeseries/version.py +1 -1
  88. autogluon.timeseries-1.4.1b20251215-py3.11-nspkg.pth +1 -0
  89. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/METADATA +49 -36
  90. autogluon_timeseries-1.4.1b20251215.dist-info/RECORD +103 -0
  91. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/WHEEL +1 -1
  92. autogluon/timeseries/configs/presets_configs.py +0 -79
  93. autogluon/timeseries/evaluator.py +0 -6
  94. autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -11
  95. autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
  96. autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -585
  97. autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -518
  98. autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -78
  99. autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
  100. autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  101. autogluon/timeseries/models/presets.py +0 -360
  102. autogluon.timeseries-1.2.1b20250224-py3.9-nspkg.pth +0 -1
  103. autogluon.timeseries-1.2.1b20250224.dist-info/RECORD +0 -68
  104. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/LICENSE +0 -0
  105. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/NOTICE +0 -0
  106. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/namespace_packages.txt +0 -0
  107. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/top_level.txt +0 -0
  108. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/zip-safe +0 -0
@@ -3,14 +3,15 @@ import os
3
3
  import shutil
4
4
  import warnings
5
5
  from pathlib import Path
6
- from typing import Any, Dict, Literal, Optional, Union
6
+ from typing import Any
7
7
 
8
8
  import numpy as np
9
9
  import pandas as pd
10
+ from typing_extensions import Self
10
11
 
11
12
  from autogluon.common.loaders import load_pkl
12
13
  from autogluon.common.space import Space
13
- from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
14
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
14
15
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
15
16
  from autogluon.timeseries.utils.warning_filters import disable_duplicate_logs, warning_filter
16
17
 
@@ -81,77 +82,92 @@ MODEL_ALIASES = {
81
82
 
82
83
 
83
84
  class ChronosModel(AbstractTimeSeriesModel):
84
- """Chronos [Ansari2024]_ pretrained time series forecasting models which can be used for zero-shot forecasting or fine-tuned
85
- in a task-specific manner. Models can be based on the original
86
- `ChronosModel <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos.py>`_ implementation,
87
- as well as a newer family of Chronos-Bolt models capable of much faster inference.
88
-
89
- The original Chronos is a family of pretrained models, based on the T5 family, with number of parameters ranging between
90
- 8M and 710M. The full collection of Chronos models is available on
91
- `Hugging Face <https://huggingface.co/collections/amazon/chronos-models-65f1791d630a8d57cb718444>`_. For Chronos small,
92
- base, and large variants a GPU is required to perform inference efficiently. Chronos takes a minimalistic approach to
93
- pretraining time series models, by discretizing time series data directly into bins which are treated as tokens,
94
- effectively performing regression by classification. This results in a simple and flexible framework
95
- for using any language model in the context of time series forecasting. See [Ansari2024]_ for more information.
96
-
97
- The newer Chronos-Bolt variants enable much faster inference by first "patching" the time series. The resulting
98
- time series is then fed into a T5 model for forecasting. The Chronos-Bolt variants are capable of much faster inference,
99
- and can all run on CPUs. Chronos-Bolt models are also available on Hugging Face <https://huggingface.co/autogluon/>`_.
100
-
101
- Both Chronos and Chronos-Bolt variants can be fine-tuned by setting ``fine_tune=True`` and selecting appropriate
102
- fine-tuning parameters such as the learning rate (``fine_tune_lr``) and max steps (``fine_tune_steps``).
85
+ """Chronos [Ansari2024]_ pretrained time series forecasting models which can be used for zero-shot
86
+ forecasting or fine-tuned in a task-specific manner.
87
+
88
+ Models can be based on the original
89
+ `Chronos <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos.py>`_
90
+ implementation, as well as a newer family of
91
+ `Chronos-Bolt <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos_bolt.py>`_
92
+ models capable of much faster inference.
93
+
94
+ The original Chronos is a family of pretrained models, based on the T5 family, with number of
95
+ parameters ranging between 8M and 710M. The full collection of Chronos models is available on
96
+ `Hugging Face <https://huggingface.co/collections/amazon/chronos-models-65f1791d630a8d57cb718444>`_.
97
+
98
+ For Chronos (original) ``small``, ``base``, and ``large`` variants a GPU is required to
99
+ perform inference efficiently. Chronos takes a minimalistic approach to pretraining time series
100
+ models, by discretizing time series data directly into bins which are treated as tokens,
101
+ effectively performing regression by classification. This results in a simple and flexible
102
+ framework for using any language model in the context of time series forecasting.
103
+ See [Ansari2024]_ for more information.
104
+
105
+ The newer Chronos-Bolt variants enable much faster inference by first "patching" the time series.
106
+ The resulting time series is then fed into a T5 model for forecasting. The Chronos-Bolt variants
107
+ are capable of much faster inference, and can all run on CPUs.
108
+
109
+ Both Chronos and Chronos-Bolt variants can be fine-tuned by setting ``fine_tune=True`` and selecting
110
+ appropriate fine-tuning parameters such as the learning rate (``fine_tune_lr``) and max steps
111
+ (``fine_tune_steps``).
103
112
 
104
113
  References
105
114
  ----------
106
115
  .. [Ansari2024] Ansari, Abdul Fatir, Stella, Lorenzo et al.
107
116
  "Chronos: Learning the Language of Time Series."
108
- http://arxiv.org/abs/2403.07815
117
+ Transactions on Machine Learning Research (2024).
118
+ https://openreview.net/forum?id=gerNCVqqtR
109
119
 
110
120
 
111
121
  Other Parameters
112
122
  ----------------
113
- model_path: str, default = "autogluon/chronos-bolt-small"
123
+ model_path : str, default = "autogluon/chronos-bolt-small"
114
124
  Model path used for the model, i.e., a HuggingFace transformers ``name_or_path``. Can be a
115
125
  compatible model name on HuggingFace Hub or a local path to a model directory. Original
116
126
  Chronos models (i.e., ``autogluon/chronos-t5-{model_size}``) can be specified with aliases
117
127
  ``tiny``, ``mini`` , ``small``, ``base``, and ``large``. Chronos-Bolt models can be specified
118
128
  with ``bolt_tiny``, ``bolt_mini``, ``bolt_small``, and ``bolt_base``.
119
- batch_size : int, default = 16
120
- Size of batches used during inference
129
+ batch_size : int, default = 256
130
+ Size of batches used during inference.
131
+
132
+ The default ``batch_size`` is selected based on the model type. Chronos (original) models use a
133
+ ``batch_size`` of 16, except Chronos (Large) which uses 8.
134
+
135
+ For Chronos-Bolt models the ``batch_size`` is set to 256. However, ``batch_size`` is reduced by
136
+ a factor of 4 when the prediction horizon is greater than the model's
137
+ default prediction length.
121
138
  num_samples : int, default = 20
122
139
  Number of samples used during inference, only used for the original Chronos models
123
140
  device : str, default = None
124
- Device to use for inference (and fine-tuning, if enabled). If None, model will use the GPU if available.
125
- For larger Chronos model sizes ``small``, ``base``, and ``large``; inference will fail if no GPU is available.
126
- For Chronos-Bolt models, inference can be done on the CPU. Although fine-tuning the smaller Chronos models
127
- (``tiny`` and ``mini``) and all Chronos-Bolt is allowed on the CPU, we recommend using a GPU for faster fine-tuning.
141
+ Device to use for inference (and fine-tuning, if enabled). If None, model will use the GPU if
142
+ available. For larger Chronos model sizes ``small``, ``base``, and ``large``; inference will fail
143
+ if no GPU is available.
144
+
145
+ For Chronos-Bolt models, inference can be performed on the CPU. Although fine-tuning the smaller
146
+ Chronos models (``tiny`` and ``mini``) and all Chronos-Bolt is allowed on the CPU, we recommend
147
+ using a GPU for faster fine-tuning.
128
148
  context_length : int or None, default = None
129
- The context length to use in the model. Shorter context lengths will decrease model accuracy, but result
130
- in faster inference. If None, the model will infer context length from the data set length at inference
131
- time, but set it to a maximum of 2048. Note that this is only the context length used to pass data into
132
- the model. Individual model implementations may have different context lengths specified in their configuration,
133
- and may truncate the context further. For example, original Chronos models have a context length of 512, but
134
- Chronos-Bolt models handle contexts up to 2048.
135
- optimization_strategy : {None, "onnx", "openvino"}, default = None
136
- [deprecated] Optimization strategy to use for inference on CPUs. If None, the model will use the default implementation.
137
- If `onnx`, the model will be converted to ONNX and the inference will be performed using ONNX. If ``openvino``,
138
- inference will be performed with the model compiled to OpenVINO. These optimizations are only available for
139
- the original set of Chronos models, and not in Chronos-Bolt where they are not needed. You will need to
140
- install the appropriate dependencies `optimum[onnxruntime]` or `optimum[openvino,nncf] optimum-intel[openvino,nncf]`
141
- for optimizations to work. Note that support for optimization strategies is deprecated, and will be removed
142
- in a future release. We recommend using Chronos-Bolt models for fast inference on the CPU.
143
- torch_dtype : torch.dtype or {"auto", "bfloat16", "float32", "float64"}, default = "auto"
144
- Torch data type for model weights, provided to ``from_pretrained`` method of Hugging Face AutoModels. If
145
- original Chronos models are specified and the model size is ``small``, ``base``, or ``large``, the
146
- ``torch_dtype`` will be set to ``bfloat16`` to enable inference on GPUs.
149
+ The context length to use in the model.
150
+
151
+ Shorter context lengths will decrease model accuracy, but result in faster inference. If None,
152
+ the model will infer context length from the data set length at inference time, but cap it at a
153
+ maximum of 2048.
154
+
155
+ Note that this is only the context length used to pass data into the model. Individual model
156
+ implementations may have different context lengths specified in their configuration, and may
157
+ truncate the context further. For example, original Chronos models have a context length of 512,
158
+ but Chronos-Bolt models handle contexts up to 2048.
159
+ torch_dtype : torch.dtype or {"auto", "bfloat16", "float32"}, default = "auto"
160
+ Torch data type for model weights, provided to ``from_pretrained`` method of Hugging Face
161
+ AutoModels. If original Chronos models are specified and the model size is ``small``, ``base``,
162
+ or ``large``, the ``torch_dtype`` will be set to ``bfloat16`` to enable inference on GPUs.
147
163
  data_loader_num_workers : int, default = 0
148
- Number of worker processes to be used in the data loader. See documentation on ``torch.utils.data.DataLoader``
149
- for more information.
164
+ Number of worker processes to be used in the data loader. See documentation on
165
+ ``torch.utils.data.DataLoader`` for more information.
150
166
  fine_tune : bool, default = False
151
167
  If True, the pretrained model will be fine-tuned
152
- fine_tune_lr: float, default = 1e-5
153
- The learning rate used for fine-tuning. This default is suitable for Chronos-Bolt models; for the original
154
- Chronos models, we recommend using a higher learning rate such as ``1e-4``
168
+ fine_tune_lr : float, default = 1e-5
169
+ The learning rate used for fine-tuning. This default is suitable for Chronos-Bolt models; for
170
+ the original Chronos models, we recommend using a higher learning rate such as ``1e-4``.
155
171
  fine_tune_steps : int, default = 1000
156
172
  The number of gradient update steps to fine-tune for
157
173
  fine_tune_batch_size : int, default = 32
@@ -160,19 +176,22 @@ class ChronosModel(AbstractTimeSeriesModel):
160
176
  The size of the shuffle buffer to shuffle the data during fine-tuning. If None, shuffling will
161
177
  be turned off.
162
178
  eval_during_fine_tune : bool, default = False
163
- If True, validation will be performed during fine-tuning to select the best checkpoint.
164
- Setting this argument to True may result in slower fine-tuning.
179
+ If True, validation will be performed during fine-tuning to select the best checkpoint. Setting this
180
+ argument to True may result in slower fine-tuning. This parameter is ignored if ``skip_model_selection=True``
181
+ in ``TimeSeriesPredictor.fit``.
165
182
  fine_tune_eval_max_items : int, default = 256
166
183
  The maximum number of randomly-sampled time series to use from the validation set for evaluation
167
184
  during fine-tuning. If None, the entire validation dataset will be used.
168
185
  fine_tune_trainer_kwargs : dict, optional
169
186
  Extra keyword arguments passed to ``transformers.TrainingArguments``
170
- keep_transformers_logs: bool, default = False
187
+ keep_transformers_logs : bool, default = False
171
188
  If True, the logs generated by transformers will NOT be removed after fine-tuning
189
+ revision : str, default = None
190
+ Model revision to use (branch name or commit hash). If None, the default branch (usually "main") is used.
172
191
  """
173
192
 
174
- # default number of samples for prediction
175
- default_num_samples: int = 20
193
+ ag_priority = 55
194
+ default_num_samples: int = 20 # default number of samples for prediction
176
195
  default_model_path = "autogluon/chronos-bolt-small"
177
196
  default_max_time_limit_ratio = 0.8
178
197
  maximum_context_length = 2048
@@ -180,24 +199,25 @@ class ChronosModel(AbstractTimeSeriesModel):
180
199
 
181
200
  def __init__(
182
201
  self,
183
- freq: Optional[str] = None,
202
+ freq: str | None = None,
184
203
  prediction_length: int = 1,
185
- path: Optional[str] = None,
186
- name: Optional[str] = None,
187
- eval_metric: Optional[str] = None,
188
- hyperparameters: Optional[Dict[str, Any]] = None,
204
+ path: str | None = None,
205
+ name: str | None = None,
206
+ eval_metric: str | None = None,
207
+ hyperparameters: dict[str, Any] | None = None,
189
208
  **kwargs, # noqa
190
209
  ):
191
210
  hyperparameters = hyperparameters if hyperparameters is not None else {}
192
211
 
193
- model_path_input = hyperparameters.get("model_path", self.default_model_path)
194
- self.model_path = MODEL_ALIASES.get(model_path_input, model_path_input)
212
+ model_path_input: str = hyperparameters.get("model_path", self.default_model_path)
213
+ self.model_path: str = MODEL_ALIASES.get(model_path_input, model_path_input)
195
214
 
196
215
  name = name if name is not None else "Chronos"
197
216
  if not isinstance(model_path_input, Space):
198
217
  # we truncate the name to avoid long path errors on Windows
199
- model_path_safe = str(model_path_input).replace("/", "__").replace(os.path.sep, "__")[-50:]
200
- name += f"[{model_path_safe}]"
218
+ model_path_suffix = "[" + str(model_path_input).replace("/", "__").replace(os.path.sep, "__")[-50:] + "]"
219
+ if model_path_suffix not in name:
220
+ name += model_path_suffix
201
221
 
202
222
  super().__init__(
203
223
  path=path,
@@ -209,9 +229,9 @@ class ChronosModel(AbstractTimeSeriesModel):
209
229
  **kwargs,
210
230
  )
211
231
 
212
- self._model_pipeline: Optional[Any] = None # of type BaseChronosPipeline
232
+ self._model_pipeline: Any | None = None # of type BaseChronosPipeline
213
233
 
214
- def save(self, path: Optional[str] = None, verbose: bool = True) -> str:
234
+ def save(self, path: str | None = None, verbose: bool = True) -> str:
215
235
  pipeline = self._model_pipeline
216
236
  self._model_pipeline = None
217
237
  path = super().save(path=path, verbose=verbose)
@@ -220,7 +240,7 @@ class ChronosModel(AbstractTimeSeriesModel):
220
240
  return str(path)
221
241
 
222
242
  @classmethod
223
- def load(cls, path: str, reset_paths: bool = True, verbose: bool = True) -> "ChronosModel":
243
+ def load(cls, path: str, reset_paths: bool = True, load_oof: bool = False, verbose: bool = True) -> Self:
224
244
  model = load_pkl.load(path=os.path.join(path, cls.model_file_name), verbose=verbose)
225
245
  if reset_paths:
226
246
  model.set_contexts(path)
@@ -228,7 +248,7 @@ class ChronosModel(AbstractTimeSeriesModel):
228
248
  fine_tune_ckpt_path = Path(model.path) / cls.fine_tuned_ckpt_name
229
249
  if fine_tune_ckpt_path.exists():
230
250
  logger.debug(f"\tFine-tuned checkpoint exists, setting model_path to {fine_tune_ckpt_path}")
231
- model.model_path = fine_tune_ckpt_path
251
+ model.model_path = str(fine_tune_ckpt_path)
232
252
 
233
253
  return model
234
254
 
@@ -245,12 +265,14 @@ class ChronosModel(AbstractTimeSeriesModel):
245
265
  return self._model_pipeline
246
266
 
247
267
  @property
248
- def ag_default_config(self) -> Dict[str, Any]:
268
+ def ag_default_config(self) -> dict[str, Any]:
249
269
  """The default configuration of the model used by AutoGluon if the model is one of those
250
270
  defined in MODEL_CONFIGS. For now, these are ``autogluon/chronos-t5-*`` family of models.
251
271
  """
252
- model_name = str(self.model_path).split("/")[-1]
253
- return MODEL_CONFIGS.get(model_name, {})
272
+ for k in MODEL_CONFIGS:
273
+ if k in self.model_path:
274
+ return MODEL_CONFIGS[k]
275
+ return {}
254
276
 
255
277
  @property
256
278
  def min_num_gpus(self) -> int:
@@ -273,15 +295,15 @@ class ChronosModel(AbstractTimeSeriesModel):
273
295
  """
274
296
  return self.ag_default_config.get("default_torch_dtype", "auto")
275
297
 
276
- def get_minimum_resources(self, is_gpu_available: bool = False) -> Dict[str, Union[int, float]]:
277
- minimum_resources: Dict[str, Union[int, float]] = {"num_cpus": 1}
298
+ def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
299
+ minimum_resources: dict[str, int | float] = {"num_cpus": 1}
278
300
  # if GPU is available, we train with 1 GPU per trial
279
301
  if is_gpu_available:
280
302
  minimum_resources["num_gpus"] = self.min_num_gpus
281
303
  return minimum_resources
282
304
 
283
305
  def load_model_pipeline(self, is_training: bool = False):
284
- from .pipeline import BaseChronosPipeline
306
+ from chronos import BaseChronosPipeline
285
307
 
286
308
  gpu_available = self._is_gpu_available()
287
309
 
@@ -292,14 +314,14 @@ class ChronosModel(AbstractTimeSeriesModel):
292
314
  "`import torch; torch.cuda.is_available()` returns `True`."
293
315
  )
294
316
 
295
- device = self.device or ("cuda" if gpu_available else "cpu")
317
+ device = (self.device or "cuda") if gpu_available else "cpu"
296
318
 
319
+ assert self.model_path is not None
297
320
  pipeline = BaseChronosPipeline.from_pretrained(
298
321
  self.model_path,
299
322
  device_map=device,
300
- # optimization cannot be used during fine-tuning
301
- optimization_strategy=None if is_training else self.optimization_strategy,
302
323
  torch_dtype=self.torch_dtype,
324
+ revision=self.get_hyperparameter("revision"),
303
325
  )
304
326
 
305
327
  self._model_pipeline = pipeline
@@ -314,28 +336,60 @@ class ChronosModel(AbstractTimeSeriesModel):
314
336
 
315
337
  return torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
316
338
 
317
- def _get_model_params(self) -> dict:
339
+ def get_hyperparameters(self) -> dict:
318
340
  """Gets params that are passed to the inner model."""
319
- init_args = super()._get_model_params().copy()
320
-
321
- init_args.setdefault("batch_size", self.default_batch_size)
322
- init_args.setdefault("num_samples", self.default_num_samples)
323
- init_args.setdefault("device", None)
324
- # if the model requires a GPU, set the torch dtype to bfloat16
325
- init_args.setdefault("torch_dtype", self.default_torch_dtype)
326
- init_args.setdefault("data_loader_num_workers", 0)
327
- init_args.setdefault("context_length", None)
328
- init_args.setdefault("optimization_strategy", None)
329
- init_args.setdefault("fine_tune", False)
330
- init_args.setdefault("keep_transformers_logs", False)
331
- init_args.setdefault("fine_tune_lr", 1e-5)
332
- init_args.setdefault("fine_tune_steps", 1000)
333
- init_args.setdefault("fine_tune_batch_size", 32)
334
- init_args.setdefault("eval_during_fine_tune", False)
335
- init_args.setdefault("fine_tune_eval_max_items", 256)
336
- init_args.setdefault("fine_tune_shuffle_buffer_size", 10_000)
341
+ init_args = super().get_hyperparameters()
337
342
 
338
343
  eval_during_fine_tune = init_args["eval_during_fine_tune"]
344
+ fine_tune_trainer_kwargs = self._get_fine_tune_trainer_kwargs(init_args, eval_during_fine_tune)
345
+ user_fine_tune_trainer_kwargs = init_args.get("fine_tune_trainer_kwargs", {})
346
+ fine_tune_trainer_kwargs.update(user_fine_tune_trainer_kwargs)
347
+ init_args["fine_tune_trainer_kwargs"] = fine_tune_trainer_kwargs
348
+
349
+ return init_args.copy()
350
+
351
+ def _get_default_hyperparameters(self) -> dict:
352
+ return {
353
+ "batch_size": self.default_batch_size,
354
+ "num_samples": self.default_num_samples,
355
+ "device": None,
356
+ "torch_dtype": self.default_torch_dtype,
357
+ "data_loader_num_workers": 0,
358
+ "context_length": None,
359
+ "fine_tune": False,
360
+ "keep_transformers_logs": False,
361
+ "fine_tune_lr": 1e-5,
362
+ "fine_tune_steps": 1000,
363
+ "fine_tune_batch_size": 32,
364
+ "eval_during_fine_tune": False,
365
+ "fine_tune_eval_max_items": 256,
366
+ "fine_tune_shuffle_buffer_size": 10_000,
367
+ "revision": None,
368
+ }
369
+
370
+ @property
371
+ def allowed_hyperparameters(self) -> list[str]:
372
+ return super().allowed_hyperparameters + [
373
+ "model_path",
374
+ "batch_size",
375
+ "num_samples",
376
+ "device",
377
+ "context_length",
378
+ "torch_dtype",
379
+ "data_loader_num_workers",
380
+ "fine_tune",
381
+ "fine_tune_lr",
382
+ "fine_tune_steps",
383
+ "fine_tune_batch_size",
384
+ "fine_tune_shuffle_buffer_size",
385
+ "eval_during_fine_tune",
386
+ "fine_tune_eval_max_items",
387
+ "fine_tune_trainer_kwargs",
388
+ "keep_transformers_logs",
389
+ "revision",
390
+ ]
391
+
392
+ def _get_fine_tune_trainer_kwargs(self, init_args, eval_during_fine_tune: bool):
339
393
  output_dir = Path(self.path) / "transformers_logs"
340
394
  fine_tune_trainer_kwargs = dict(
341
395
  output_dir=str(output_dir),
@@ -364,12 +418,8 @@ class ChronosModel(AbstractTimeSeriesModel):
364
418
  load_best_model_at_end=True if eval_during_fine_tune else False,
365
419
  metric_for_best_model="eval_loss" if eval_during_fine_tune else None,
366
420
  )
367
- user_fine_tune_trainer_kwargs = init_args.get("fine_tune_trainer_kwargs", {})
368
- fine_tune_trainer_kwargs.update(user_fine_tune_trainer_kwargs)
369
421
 
370
- init_args["fine_tune_trainer_kwargs"] = fine_tune_trainer_kwargs
371
-
372
- return init_args
422
+ return fine_tune_trainer_kwargs
373
423
 
374
424
  def _validate_and_assign_attributes(self, model_params: dict):
375
425
  # we validate the params here because their values are concrete,
@@ -381,17 +431,6 @@ class ChronosModel(AbstractTimeSeriesModel):
381
431
  self.device = model_params["device"]
382
432
  self.torch_dtype = model_params["torch_dtype"]
383
433
  self.data_loader_num_workers = model_params["data_loader_num_workers"]
384
- self.optimization_strategy: Optional[Literal["onnx", "openvino"]] = model_params["optimization_strategy"]
385
-
386
- if self.optimization_strategy is not None:
387
- warnings.warn(
388
- (
389
- "optimization_strategy is deprecated and will be removed in a future release. "
390
- "We recommend using Chronos-Bolt models for fast inference on the CPU."
391
- ),
392
- category=FutureWarning,
393
- stacklevel=3,
394
- )
395
434
  self.context_length = model_params["context_length"]
396
435
 
397
436
  if self.context_length is not None and self.context_length > self.maximum_context_length:
@@ -404,18 +443,24 @@ class ChronosModel(AbstractTimeSeriesModel):
404
443
  def _fit(
405
444
  self,
406
445
  train_data: TimeSeriesDataFrame,
407
- val_data: Optional[TimeSeriesDataFrame] = None,
408
- time_limit: Optional[int] = None,
446
+ val_data: TimeSeriesDataFrame | None = None,
447
+ time_limit: float | None = None,
448
+ num_cpus: int | None = None,
449
+ num_gpus: int | None = None,
450
+ verbosity: int = 2,
409
451
  **kwargs,
410
452
  ) -> None:
453
+ import transformers
454
+ from chronos import ChronosBoltPipeline, ChronosPipeline
455
+ from packaging import version
411
456
  from transformers.trainer import PrinterCallback, Trainer, TrainingArguments
412
457
 
413
- from .pipeline import ChronosBoltPipeline, ChronosPipeline
414
- from .pipeline.utils import (
458
+ from .utils import (
415
459
  ChronosFineTuningDataset,
416
460
  EvaluateAndSaveFinalStepCallback,
417
461
  LoggerCallback,
418
462
  TimeLimitCallback,
463
+ update_output_quantiles,
419
464
  )
420
465
 
421
466
  # TODO: Add support for fine-tuning models with context_length longer than the pretrained model
@@ -423,15 +468,14 @@ class ChronosModel(AbstractTimeSeriesModel):
423
468
  # verbosity < 3: all logs and warnings from transformers will be suppressed
424
469
  # verbosity >= 3: progress bar and loss logs will be logged
425
470
  # verbosity 4: everything will be logged
426
- verbosity = kwargs.get("verbosity", 2)
427
471
  for logger_name in logging.root.manager.loggerDict:
428
472
  if "transformers" in logger_name:
429
473
  transformers_logger = logging.getLogger(logger_name)
430
474
  transformers_logger.setLevel(logging.ERROR if verbosity <= 3 else logging.INFO)
431
475
 
432
476
  self._check_fit_params()
433
-
434
- model_params = self._get_model_params()
477
+ self._log_unused_hyperparameters()
478
+ model_params = self.get_hyperparameters()
435
479
  self._validate_and_assign_attributes(model_params)
436
480
  do_fine_tune = model_params["fine_tune"]
437
481
 
@@ -468,9 +512,12 @@ class ChronosModel(AbstractTimeSeriesModel):
468
512
 
469
513
  if self.prediction_length != fine_tune_prediction_length:
470
514
  logger.debug(
471
- f"\tChronosBolt models can only be fine-tuned with a maximum prediction_length of {model_prediction_length}. "
515
+ f"\tChronos-Bolt models can only be fine-tuned with a maximum prediction_length of {model_prediction_length}. "
472
516
  f"Fine-tuning prediction_length has been changed to {fine_tune_prediction_length}."
473
517
  )
518
+ if self.quantile_levels != self.model_pipeline.quantiles:
519
+ update_output_quantiles(self.model_pipeline.model, self.quantile_levels)
520
+ logger.info(f"\tChronos-Bolt will be fine-tuned with quantile_levels={self.quantile_levels}")
474
521
  else:
475
522
  raise ValueError(f"Unsupported model pipeline: {type(self.model_pipeline)}")
476
523
 
@@ -496,7 +543,11 @@ class ChronosModel(AbstractTimeSeriesModel):
496
543
  fine_tune_trainer_kwargs["load_best_model_at_end"] = False
497
544
  fine_tune_trainer_kwargs["metric_for_best_model"] = None
498
545
 
499
- training_args = TrainingArguments(**fine_tune_trainer_kwargs, **pipeline_specific_trainer_kwargs)
546
+ if version.parse(transformers.__version__) >= version.parse("4.46"):
547
+ # transformers changed the argument name from `evaluation_strategy` to `eval_strategy`
548
+ fine_tune_trainer_kwargs["eval_strategy"] = fine_tune_trainer_kwargs.pop("evaluation_strategy")
549
+
550
+ training_args = TrainingArguments(**fine_tune_trainer_kwargs, **pipeline_specific_trainer_kwargs) # type: ignore
500
551
  tokenizer_train_dataset = ChronosFineTuningDataset(
501
552
  target_df=train_data,
502
553
  target_column=self.target,
@@ -512,6 +563,7 @@ class ChronosModel(AbstractTimeSeriesModel):
512
563
  if time_limit is not None:
513
564
  callbacks.append(TimeLimitCallback(time_limit=time_limit))
514
565
 
566
+ tokenizer_val_dataset: ChronosFineTuningDataset | None = None
515
567
  if val_data is not None:
516
568
  callbacks.append(EvaluateAndSaveFinalStepCallback())
517
569
  # evaluate on a randomly-sampled subset
@@ -527,6 +579,7 @@ class ChronosModel(AbstractTimeSeriesModel):
527
579
  )
528
580
  val_data = val_data.loc[eval_items]
529
581
 
582
+ assert isinstance(val_data, TimeSeriesDataFrame)
530
583
  tokenizer_val_dataset = ChronosFineTuningDataset(
531
584
  target_df=val_data,
532
585
  target_column=self.target,
@@ -540,7 +593,7 @@ class ChronosModel(AbstractTimeSeriesModel):
540
593
  model=self.model_pipeline.inner_model,
541
594
  args=training_args,
542
595
  train_dataset=tokenizer_train_dataset,
543
- eval_dataset=tokenizer_val_dataset if val_data is not None else None,
596
+ eval_dataset=tokenizer_val_dataset,
544
597
  callbacks=callbacks,
545
598
  )
546
599
 
@@ -551,7 +604,7 @@ class ChronosModel(AbstractTimeSeriesModel):
551
604
  if verbosity >= 3:
552
605
  logger.warning(
553
606
  "Transformers logging is turned on during fine-tuning. Note that losses reported by transformers "
554
- "may not correspond to those specified via `eval_metric`."
607
+ "do not correspond to those specified via `eval_metric`."
555
608
  )
556
609
  trainer.add_callback(LoggerCallback())
557
610
 
@@ -569,10 +622,11 @@ class ChronosModel(AbstractTimeSeriesModel):
569
622
  self,
570
623
  data: TimeSeriesDataFrame,
571
624
  context_length: int,
625
+ batch_size: int,
572
626
  num_workers: int = 0,
573
- time_limit: Optional[float] = None,
627
+ time_limit: float | None = None,
574
628
  ):
575
- from .pipeline.utils import ChronosInferenceDataLoader, ChronosInferenceDataset, timeout_callback
629
+ from .utils import ChronosInferenceDataLoader, ChronosInferenceDataset, timeout_callback
576
630
 
577
631
  chronos_dataset = ChronosInferenceDataset(
578
632
  target_df=data,
@@ -582,7 +636,7 @@ class ChronosModel(AbstractTimeSeriesModel):
582
636
 
583
637
  return ChronosInferenceDataLoader(
584
638
  chronos_dataset,
585
- batch_size=self.batch_size,
639
+ batch_size=batch_size,
586
640
  shuffle=False,
587
641
  num_workers=num_workers,
588
642
  on_batch=timeout_callback(seconds=time_limit),
@@ -598,9 +652,11 @@ class ChronosModel(AbstractTimeSeriesModel):
598
652
  def _predict(
599
653
  self,
600
654
  data: TimeSeriesDataFrame,
601
- known_covariates: Optional[TimeSeriesDataFrame] = None,
655
+ known_covariates: TimeSeriesDataFrame | None = None,
602
656
  **kwargs,
603
657
  ) -> TimeSeriesDataFrame:
658
+ from chronos import ChronosBoltPipeline, ChronosPipeline
659
+
604
660
  # We defer initialization of the model pipeline. i.e., the model is only loaded to device memory
605
661
  # during inference. We also infer the maximum length of the time series in the inference data set
606
662
  # and use that to determine the context length of the model. If the context length is specified
@@ -611,26 +667,51 @@ class ChronosModel(AbstractTimeSeriesModel):
611
667
  # (according to its config.json file) of 512, it will further truncate the series during inference.
612
668
  context_length = self._get_context_length(data)
613
669
 
670
+ extra_predict_kwargs = (
671
+ {"num_samples": self.num_samples} if isinstance(self.model_pipeline, ChronosPipeline) else {}
672
+ )
673
+
674
+ # adapt batch size for Chronos bolt if requested prediction length is longer than model prediction length
675
+ batch_size = self.batch_size
676
+ model_prediction_length = None
677
+ if isinstance(self.model_pipeline, ChronosBoltPipeline):
678
+ model_prediction_length = self.model_pipeline.model.config.chronos_config.get("prediction_length")
679
+ if model_prediction_length and self.prediction_length > model_prediction_length:
680
+ batch_size = max(1, batch_size // 4)
681
+ logger.debug(
682
+ f"\tThe prediction_length {self.prediction_length} exceeds model's prediction_length {model_prediction_length}. "
683
+ f"The inference batch_size has been reduced from {self.batch_size} to {batch_size} to avoid OOM errors."
684
+ )
685
+
614
686
  with warning_filter(all_warnings=True):
615
687
  import torch
616
688
 
689
+ self.model_pipeline.model.eval()
690
+
617
691
  inference_data_loader = self._get_inference_data_loader(
618
692
  data=data,
693
+ batch_size=batch_size,
619
694
  num_workers=self.data_loader_num_workers,
620
695
  context_length=context_length,
621
696
  time_limit=kwargs.get("time_limit"),
622
697
  )
623
698
 
624
- self.model_pipeline.model.eval()
625
699
  with torch.inference_mode(), disable_duplicate_logs(logger):
626
700
  batch_quantiles, batch_means = [], []
627
701
  for batch in inference_data_loader:
628
- qs, mn = self.model_pipeline.predict_quantiles(
629
- batch,
630
- prediction_length=self.prediction_length,
631
- quantile_levels=self.quantile_levels,
632
- num_samples=self.num_samples,
633
- )
702
+ try:
703
+ qs, mn = self.model_pipeline.predict_quantiles(
704
+ batch,
705
+ prediction_length=self.prediction_length,
706
+ quantile_levels=self.quantile_levels,
707
+ **extra_predict_kwargs,
708
+ )
709
+ except torch.OutOfMemoryError as ex:
710
+ logger.error(
711
+ "The call to predict() resulted in an out of memory error. Try reducing the batch_size by setting:"
712
+ f" predictor.fit(..., hyperparameters={{'Chronos': {{'batch_size': {batch_size // 2}, ...}}}})"
713
+ )
714
+ raise ex
634
715
  batch_quantiles.append(qs.numpy())
635
716
  batch_means.append(mn.numpy())
636
717
 
@@ -648,8 +729,8 @@ class ChronosModel(AbstractTimeSeriesModel):
648
729
 
649
730
  return TimeSeriesDataFrame(df)
650
731
 
651
- def _more_tags(self) -> Dict:
652
- do_fine_tune = self._get_model_params()["fine_tune"]
732
+ def _more_tags(self) -> dict:
733
+ do_fine_tune = self.get_hyperparameter("fine_tune")
653
734
  return {
654
735
  "allow_nan": True,
655
736
  "can_use_train_data": do_fine_tune,