autogluon.timeseries 1.4.1b20250906__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (93) hide show
  1. autogluon/timeseries/configs/hyperparameter_presets.py +2 -2
  2. autogluon/timeseries/dataset/ts_dataframe.py +97 -86
  3. autogluon/timeseries/learner.py +68 -35
  4. autogluon/timeseries/metrics/__init__.py +4 -4
  5. autogluon/timeseries/metrics/abstract.py +8 -8
  6. autogluon/timeseries/metrics/point.py +9 -9
  7. autogluon/timeseries/metrics/quantile.py +5 -5
  8. autogluon/timeseries/metrics/utils.py +4 -4
  9. autogluon/timeseries/models/__init__.py +4 -1
  10. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -39
  11. autogluon/timeseries/models/abstract/model_trial.py +2 -1
  12. autogluon/timeseries/models/abstract/tunable.py +8 -8
  13. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
  14. autogluon/timeseries/models/autogluon_tabular/per_step.py +26 -15
  15. autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
  16. autogluon/timeseries/models/chronos/__init__.py +2 -1
  17. autogluon/timeseries/models/chronos/chronos2.py +361 -0
  18. autogluon/timeseries/models/chronos/model.py +125 -87
  19. autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +68 -36
  20. autogluon/timeseries/models/ensemble/__init__.py +34 -2
  21. autogluon/timeseries/models/ensemble/abstract.py +5 -42
  22. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  23. autogluon/timeseries/models/ensemble/array_based/abstract.py +236 -0
  24. autogluon/timeseries/models/ensemble/array_based/models.py +73 -0
  25. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  26. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  27. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +167 -0
  28. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  29. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  30. autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
  31. autogluon/timeseries/models/ensemble/per_item_greedy.py +162 -0
  32. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  33. autogluon/timeseries/models/ensemble/weighted/abstract.py +40 -0
  34. autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +6 -16
  35. autogluon/timeseries/models/ensemble/weighted/greedy.py +57 -0
  36. autogluon/timeseries/models/gluonts/abstract.py +25 -25
  37. autogluon/timeseries/models/gluonts/dataset.py +11 -11
  38. autogluon/timeseries/models/local/__init__.py +0 -7
  39. autogluon/timeseries/models/local/abstract_local_model.py +15 -18
  40. autogluon/timeseries/models/local/naive.py +2 -2
  41. autogluon/timeseries/models/local/npts.py +1 -1
  42. autogluon/timeseries/models/local/statsforecast.py +12 -12
  43. autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
  44. autogluon/timeseries/models/registry.py +3 -4
  45. autogluon/timeseries/models/toto/__init__.py +3 -0
  46. autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
  47. autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
  48. autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
  49. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
  50. autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
  51. autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
  52. autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
  53. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  54. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
  55. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
  56. autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
  57. autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
  58. autogluon/timeseries/models/toto/dataloader.py +108 -0
  59. autogluon/timeseries/models/toto/hf_pretrained_model.py +118 -0
  60. autogluon/timeseries/models/toto/model.py +236 -0
  61. autogluon/timeseries/predictor.py +301 -103
  62. autogluon/timeseries/regressor.py +27 -30
  63. autogluon/timeseries/splitter.py +3 -27
  64. autogluon/timeseries/trainer/ensemble_composer.py +439 -0
  65. autogluon/timeseries/trainer/model_set_builder.py +9 -9
  66. autogluon/timeseries/trainer/prediction_cache.py +16 -16
  67. autogluon/timeseries/trainer/trainer.py +300 -275
  68. autogluon/timeseries/trainer/utils.py +17 -0
  69. autogluon/timeseries/transforms/covariate_scaler.py +8 -8
  70. autogluon/timeseries/transforms/target_scaler.py +15 -15
  71. autogluon/timeseries/utils/constants.py +10 -0
  72. autogluon/timeseries/utils/datetime/lags.py +1 -3
  73. autogluon/timeseries/utils/datetime/seasonality.py +1 -3
  74. autogluon/timeseries/utils/features.py +18 -14
  75. autogluon/timeseries/utils/forecast.py +6 -7
  76. autogluon/timeseries/utils/timer.py +173 -0
  77. autogluon/timeseries/version.py +1 -1
  78. autogluon.timeseries-1.4.1b20251210-py3.11-nspkg.pth +1 -0
  79. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/METADATA +39 -22
  80. autogluon_timeseries-1.4.1b20251210.dist-info/RECORD +103 -0
  81. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/WHEEL +1 -1
  82. autogluon/timeseries/evaluator.py +0 -6
  83. autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
  84. autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
  85. autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
  86. autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
  87. autogluon.timeseries-1.4.1b20250906-py3.9-nspkg.pth +0 -1
  88. autogluon.timeseries-1.4.1b20250906.dist-info/RECORD +0 -75
  89. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/LICENSE +0 -0
  90. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/NOTICE +0 -0
  91. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/namespace_packages.txt +0 -0
  92. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/top_level.txt +0 -0
  93. {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/zip-safe +0 -0
@@ -0,0 +1,118 @@
1
+ import logging
2
+
3
+ from transformers import PretrainedConfig, PreTrainedModel
4
+
5
+ from ._internal.backbone import TotoBackbone
6
+
7
+
8
+ class TotoConfig(PretrainedConfig):
9
+ model_type = "toto"
10
+
11
+ def __init__(
12
+ self,
13
+ dropout: float = 0.0,
14
+ embed_dim: int = 768,
15
+ num_heads: int = 12,
16
+ num_layers: int = 12,
17
+ output_distribution_classes: list[str] | None = None,
18
+ output_distribution_kwargs: dict | None = None,
19
+ patch_size: int = 64,
20
+ scale_factor_exponent: float = 10.0,
21
+ spacewise_every_n_layers: int = 12,
22
+ spacewise_first: bool = False,
23
+ stabilize_with_global: bool = True,
24
+ stride: int = 64,
25
+ transformers_version: str = "4.49.0",
26
+ use_memory_efficient_attention: bool = False,
27
+ **kwargs,
28
+ ):
29
+ self.dropout = dropout
30
+ self.embed_dim = embed_dim
31
+ self.num_heads = num_heads
32
+ self.num_layers = num_layers
33
+ self.output_distribution_classes = output_distribution_classes or ["MixtureOfStudentTsOutput"]
34
+ self.output_distribution_kwargs = output_distribution_kwargs or {"k_components": 24}
35
+ self.patch_size = patch_size
36
+ self.scale_factor_exponent = scale_factor_exponent
37
+ self.spacewise_every_n_layers = spacewise_every_n_layers
38
+ self.spacewise_first = spacewise_first
39
+ self.stabilize_with_global = stabilize_with_global
40
+ self.stride = stride
41
+ self.transformers_version = transformers_version
42
+ self.use_memory_efficient_attention = use_memory_efficient_attention
43
+
44
+ super().__init__(**kwargs)
45
+
46
+
47
+ class TotoPretrainedModel(PreTrainedModel):
48
+ config_class = TotoConfig
49
+ base_model_prefix = "model" # optional, used for weight naming conventions
50
+
51
+ def __init__(self, config: TotoConfig):
52
+ super().__init__(config)
53
+ self.model = TotoBackbone(
54
+ patch_size=config.patch_size,
55
+ stride=config.stride,
56
+ embed_dim=config.embed_dim,
57
+ num_layers=config.num_layers,
58
+ num_heads=config.num_heads,
59
+ mlp_hidden_dim=getattr(config, "mlp_hidden_dim", 3072),
60
+ dropout=config.dropout,
61
+ spacewise_every_n_layers=config.spacewise_every_n_layers,
62
+ scaler_cls=getattr(config, "scaler_cls", "model.scaler.CausalPatchStdMeanScaler"),
63
+ output_distribution_classes=config.output_distribution_classes,
64
+ spacewise_first=config.spacewise_first,
65
+ output_distribution_kwargs=config.output_distribution_kwargs,
66
+ use_memory_efficient_attention=False,
67
+ stabilize_with_global=config.stabilize_with_global,
68
+ scale_factor_exponent=config.scale_factor_exponent,
69
+ **getattr(config, "extra_kwargs", {}),
70
+ )
71
+ self._register_load_state_dict_pre_hook(self._remap_state_dict_keys_hook)
72
+ self.post_init()
73
+
74
+ def _remap_state_dict_keys_hook(
75
+ self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
76
+ ):
77
+ remap = {
78
+ "mlp.0.w12.weight": "mlp.0.weight",
79
+ "mlp.0.w12.bias": "mlp.0.bias",
80
+ "mlp.0.w3.weight": "mlp.2.weight",
81
+ "mlp.0.w3.bias": "mlp.2.bias",
82
+ }
83
+
84
+ keys_to_remap = []
85
+ for key in list(state_dict.keys()):
86
+ for old, new in remap.items():
87
+ if old in key:
88
+ new_key = key.replace(old, new)
89
+ keys_to_remap.append((key, new_key))
90
+ break
91
+
92
+ for old_key, new_key in keys_to_remap:
93
+ state_dict[new_key] = state_dict.pop(old_key)
94
+
95
+ @classmethod
96
+ def from_pretrained(cls, model_name_or_path, config=None, torch_dtype=None, device_map=None, **kwargs):
97
+ transformers_logger = logging.getLogger("transformers.modeling_utils")
98
+ original_level = transformers_logger.level
99
+
100
+ try:
101
+ # Here we suppress transformers logger's "some weights were not initialized" error since the
102
+ # remapping hook is only called after the initial model loading.
103
+ transformers_logger.setLevel(logging.ERROR)
104
+
105
+ # Transformers follows a different load path that does not call load_state_dict hooks when
106
+ # loading with explicit device maps. Here, we first load the model with no device maps and
107
+ # move it.
108
+ model = super().from_pretrained(model_name_or_path, config=config, torch_dtype=torch_dtype, **kwargs)
109
+ if device_map is not None:
110
+ model = model.to(device_map)
111
+
112
+ finally:
113
+ transformers_logger.setLevel(original_level)
114
+
115
+ return model
116
+
117
+ def forward(self, *args, **kwargs):
118
+ return self.model(*args, **kwargs)
@@ -0,0 +1,236 @@
1
+ import logging
2
+ import os
3
+ from typing import TYPE_CHECKING, Any, Sequence
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from typing_extensions import Self
8
+
9
+ from autogluon.common.loaders import load_pkl
10
+ from autogluon.timeseries import TimeSeriesDataFrame
11
+ from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
12
+ from autogluon.timeseries.utils.features import CovariateMetadata
13
+
14
+ if TYPE_CHECKING:
15
+ from ._internal import TotoForecaster
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class TotoModel(AbstractTimeSeriesModel):
21
+ """Toto (Time-Series-Optimized Transformer for Observability) [CohenKhwajaetal2025]_ pretrained time series forecasting model.
22
+
23
+ Toto is a 151M parameter model trained on over 1T data points from DataDog's internal observability systems, as well as
24
+ the GIFT-eval pretrain, Chronos pretraining, and synthetically generated time series corpora. It is a decoder-only
25
+ architecture that autoregressively outputs parametric distribution forecasts. More details can be found on
26
+ `Hugging Face <https://huggingface.co/Datadog/Toto-Open-Base-1.0>`_ and `GitHub <https://github.com/DataDog/toto>`_.
27
+
28
+ The AutoGluon implementation of Toto is on a port of the original implementation. AutoGluon supports Toto for
29
+ **inference only**, i.e., the model will not be trained or fine-tuned on the provided training data. Toto is optimized
30
+ for easy maintenance with the rest of the AutoGluon model zoo, and does not feature some important optimizations such
31
+ as xformers and flash-attention available in the original model repository. The AutoGluon implementation of Toto
32
+ requires a CUDA-compatible GPU.
33
+
34
+ References
35
+ ----------
36
+ .. [CohenKhwajaetal2025] Cohen, Ben, Khwaja, Emaad et al.
37
+ "This Time is Different: An Observability Perspective on Time Series Foundation Models."
38
+ https://arxiv.org/abs/2505.14766
39
+
40
+
41
+ Other Parameters
42
+ ----------------
43
+ model_path : str, default = "Datadog/Toto-Open-Base-1.0"
44
+ Model path used for the model, i.e., a HuggingFace transformers ``name_or_path``. Can be a
45
+ compatible model name on HuggingFace Hub or a local path to a model directory.
46
+ batch_size : int, default = 24
47
+ Size of batches used during inference.
48
+ num_samples : int, default = 256
49
+ Number of samples used during inference.
50
+ device : str, default = "cuda"
51
+ Device to use for inference. Toto requires a CUDA-compatible GPU to run.
52
+ context_length : int or None, default = 4096
53
+ The context length to use in the model. Shorter context lengths will decrease model accuracy, but result
54
+ in faster inference.
55
+ compile_model : bool, default = True
56
+ Whether to compile the model using torch.compile() for faster inference. May increase initial loading time
57
+ but can provide speedups during inference.
58
+ """
59
+
60
+ default_model_path: str = "Datadog/Toto-Open-Base-1.0"
61
+
62
+ def __init__(
63
+ self,
64
+ path: str | None = None,
65
+ name: str | None = None,
66
+ hyperparameters: dict[str, Any] | None = None,
67
+ freq: str | None = None,
68
+ prediction_length: int = 1,
69
+ covariate_metadata: CovariateMetadata | None = None,
70
+ target: str = "target",
71
+ quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
72
+ eval_metric: Any = None,
73
+ ):
74
+ hyperparameters = hyperparameters if hyperparameters is not None else {}
75
+
76
+ self.model_path = hyperparameters.get("model_path", self.default_model_path)
77
+
78
+ super().__init__(
79
+ path=path,
80
+ name=name,
81
+ hyperparameters=hyperparameters,
82
+ freq=freq,
83
+ prediction_length=prediction_length,
84
+ covariate_metadata=covariate_metadata,
85
+ target=target,
86
+ quantile_levels=quantile_levels,
87
+ eval_metric=eval_metric,
88
+ )
89
+
90
+ self._forecaster: TotoForecaster | None = None
91
+
92
+ def save(self, path: str | None = None, verbose: bool = True) -> str:
93
+ forecaster = self._forecaster
94
+ self._forecaster = None
95
+ path = super().save(path=path, verbose=verbose)
96
+ self._forecaster = forecaster
97
+
98
+ return str(path)
99
+
100
+ @classmethod
101
+ def load(cls, path: str, reset_paths: bool = True, load_oof: bool = False, verbose: bool = True) -> Self:
102
+ model = load_pkl.load(path=os.path.join(path, cls.model_file_name), verbose=verbose)
103
+ if reset_paths:
104
+ model.set_contexts(path)
105
+
106
+ return model
107
+
108
+ def _is_gpu_available(self) -> bool:
109
+ import torch.cuda
110
+
111
+ return torch.cuda.is_available()
112
+
113
+ def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
114
+ return {"num_cpus": 1, "num_gpus": 1}
115
+
116
+ def load_forecaster(self):
117
+ from ._internal import TotoForecaster
118
+ from .hf_pretrained_model import TotoConfig, TotoPretrainedModel
119
+
120
+ if not self._is_gpu_available():
121
+ raise RuntimeError(
122
+ f"{self.name} requires a GPU to run, but no GPU was detected. "
123
+ "Please make sure that you are using a computer with a CUDA-compatible GPU and "
124
+ "`import torch; torch.cuda.is_available()` returns `True`."
125
+ )
126
+
127
+ hyperparameters = self.get_hyperparameters()
128
+ pretrained_model = TotoPretrainedModel.from_pretrained(
129
+ self.model_path,
130
+ config=TotoConfig.from_pretrained(self.model_path),
131
+ device_map=hyperparameters["device"],
132
+ )
133
+
134
+ if hyperparameters["compile_model"]:
135
+ pretrained_model.model.compile()
136
+
137
+ self._forecaster = TotoForecaster(model=pretrained_model.model)
138
+
139
+ def persist(self) -> Self:
140
+ if self._forecaster is None:
141
+ self.load_forecaster()
142
+ return self
143
+
144
+ def _get_default_hyperparameters(self) -> dict:
145
+ return {
146
+ "batch_size": 24,
147
+ "num_samples": 256,
148
+ "device": "cuda",
149
+ "context_length": 4096,
150
+ "compile_model": True,
151
+ }
152
+
153
+ @property
154
+ def allowed_hyperparameters(self) -> list[str]:
155
+ return super().allowed_hyperparameters + [
156
+ "model_path",
157
+ "batch_size",
158
+ "num_samples",
159
+ "device",
160
+ "context_length",
161
+ "compile_model",
162
+ ]
163
+
164
+ def _more_tags(self) -> dict:
165
+ return {
166
+ "allow_nan": True,
167
+ "can_use_train_data": False,
168
+ "can_use_val_data": False,
169
+ }
170
+
171
+ def _fit(
172
+ self,
173
+ train_data: TimeSeriesDataFrame,
174
+ val_data: TimeSeriesDataFrame | None = None,
175
+ time_limit: float | None = None,
176
+ num_cpus: int | None = None,
177
+ num_gpus: int | None = None,
178
+ verbosity: int = 2,
179
+ **kwargs,
180
+ ) -> None:
181
+ self._check_fit_params()
182
+ self.load_forecaster()
183
+
184
+ def _predict(
185
+ self, data: TimeSeriesDataFrame, known_covariates: TimeSeriesDataFrame | None = None, **kwargs
186
+ ) -> TimeSeriesDataFrame:
187
+ import torch
188
+
189
+ from .dataloader import TotoDataLoader, TotoInferenceDataset
190
+
191
+ hyperparameters = self.get_hyperparameters()
192
+
193
+ if self._forecaster is None:
194
+ self.load_forecaster()
195
+ assert self._forecaster, "Toto model failed to load"
196
+ device = self._forecaster.model.device
197
+
198
+ dataset = TotoInferenceDataset(
199
+ target_df=data.fill_missing_values("auto"),
200
+ max_context_length=hyperparameters["context_length"],
201
+ )
202
+ loader = TotoDataLoader(
203
+ dataset,
204
+ freq=self.freq,
205
+ batch_size=hyperparameters["batch_size"],
206
+ time_limit=kwargs.get("time_limit"),
207
+ device=device,
208
+ )
209
+
210
+ batch_means, batch_quantiles = [], []
211
+ with torch.inference_mode():
212
+ for masked_timeseries in loader:
213
+ forecast = self._forecaster.forecast(
214
+ masked_timeseries,
215
+ prediction_length=self.prediction_length,
216
+ num_samples=hyperparameters["num_samples"],
217
+ samples_per_batch=32,
218
+ )
219
+
220
+ batch_means.append(forecast.mean.cpu().numpy())
221
+ qs = np.array([forecast.quantile(q).cpu().numpy() for q in self.quantile_levels])
222
+ batch_quantiles.append(qs.squeeze(2).transpose(1, 2, 0))
223
+
224
+ df = pd.DataFrame(
225
+ np.concatenate(
226
+ [
227
+ np.concatenate(batch_means, axis=0).reshape(-1, 1),
228
+ np.concatenate(batch_quantiles, axis=0).reshape(-1, len(self.quantile_levels)),
229
+ ],
230
+ axis=1,
231
+ ),
232
+ columns=["mean"] + [str(q) for q in self.quantile_levels],
233
+ index=self.get_forecast_horizon_index(data),
234
+ )
235
+
236
+ return TimeSeriesDataFrame(df)