autogluon.timeseries 1.4.1b20251016__py3-none-any.whl → 1.4.1b20251218__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (90) hide show
  1. autogluon/timeseries/configs/hyperparameter_presets.py +7 -21
  2. autogluon/timeseries/configs/predictor_presets.py +23 -39
  3. autogluon/timeseries/dataset/ts_dataframe.py +97 -86
  4. autogluon/timeseries/learner.py +70 -35
  5. autogluon/timeseries/metrics/__init__.py +4 -4
  6. autogluon/timeseries/metrics/abstract.py +8 -8
  7. autogluon/timeseries/metrics/point.py +9 -9
  8. autogluon/timeseries/metrics/quantile.py +5 -5
  9. autogluon/timeseries/metrics/utils.py +4 -4
  10. autogluon/timeseries/models/__init__.py +2 -1
  11. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -39
  12. autogluon/timeseries/models/abstract/model_trial.py +2 -1
  13. autogluon/timeseries/models/abstract/tunable.py +8 -8
  14. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
  15. autogluon/timeseries/models/autogluon_tabular/per_step.py +26 -15
  16. autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
  17. autogluon/timeseries/models/chronos/__init__.py +2 -1
  18. autogluon/timeseries/models/chronos/chronos2.py +395 -0
  19. autogluon/timeseries/models/chronos/model.py +126 -88
  20. autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +69 -37
  21. autogluon/timeseries/models/ensemble/__init__.py +36 -2
  22. autogluon/timeseries/models/ensemble/abstract.py +14 -46
  23. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  24. autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
  25. autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
  26. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  27. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  28. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
  29. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  30. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  31. autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
  32. autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
  33. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  34. autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
  35. autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +25 -22
  36. autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
  37. autogluon/timeseries/models/gluonts/abstract.py +32 -31
  38. autogluon/timeseries/models/gluonts/dataset.py +11 -11
  39. autogluon/timeseries/models/gluonts/models.py +0 -7
  40. autogluon/timeseries/models/local/__init__.py +0 -7
  41. autogluon/timeseries/models/local/abstract_local_model.py +15 -18
  42. autogluon/timeseries/models/local/naive.py +2 -2
  43. autogluon/timeseries/models/local/npts.py +7 -1
  44. autogluon/timeseries/models/local/statsforecast.py +12 -12
  45. autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
  46. autogluon/timeseries/models/registry.py +3 -4
  47. autogluon/timeseries/models/toto/_internal/backbone/attention.py +3 -4
  48. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +6 -6
  49. autogluon/timeseries/models/toto/_internal/backbone/rope.py +4 -9
  50. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  51. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +2 -3
  52. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +10 -10
  53. autogluon/timeseries/models/toto/_internal/dataset.py +2 -2
  54. autogluon/timeseries/models/toto/_internal/forecaster.py +8 -8
  55. autogluon/timeseries/models/toto/dataloader.py +4 -4
  56. autogluon/timeseries/models/toto/hf_pretrained_model.py +97 -16
  57. autogluon/timeseries/models/toto/model.py +35 -20
  58. autogluon/timeseries/predictor.py +527 -155
  59. autogluon/timeseries/regressor.py +27 -30
  60. autogluon/timeseries/splitter.py +3 -27
  61. autogluon/timeseries/trainer/ensemble_composer.py +444 -0
  62. autogluon/timeseries/trainer/model_set_builder.py +9 -9
  63. autogluon/timeseries/trainer/prediction_cache.py +16 -16
  64. autogluon/timeseries/trainer/trainer.py +300 -278
  65. autogluon/timeseries/trainer/utils.py +17 -0
  66. autogluon/timeseries/transforms/covariate_scaler.py +8 -8
  67. autogluon/timeseries/transforms/target_scaler.py +15 -15
  68. autogluon/timeseries/utils/constants.py +10 -0
  69. autogluon/timeseries/utils/datetime/lags.py +1 -3
  70. autogluon/timeseries/utils/datetime/seasonality.py +1 -3
  71. autogluon/timeseries/utils/features.py +31 -14
  72. autogluon/timeseries/utils/forecast.py +6 -7
  73. autogluon/timeseries/utils/timer.py +173 -0
  74. autogluon/timeseries/version.py +1 -1
  75. autogluon.timeseries-1.4.1b20251218-py3.11-nspkg.pth +1 -0
  76. {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/METADATA +39 -27
  77. autogluon_timeseries-1.4.1b20251218.dist-info/RECORD +103 -0
  78. {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/WHEEL +1 -1
  79. autogluon/timeseries/evaluator.py +0 -6
  80. autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
  81. autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
  82. autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
  83. autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
  84. autogluon.timeseries-1.4.1b20251016-py3.9-nspkg.pth +0 -1
  85. autogluon.timeseries-1.4.1b20251016.dist-info/RECORD +0 -90
  86. {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info/licenses}/LICENSE +0 -0
  87. {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info/licenses}/NOTICE +0 -0
  88. {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/namespace_packages.txt +0 -0
  89. {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/top_level.txt +0 -0
  90. {autogluon.timeseries-1.4.1b20251016.dist-info → autogluon_timeseries-1.4.1b20251218.dist-info}/zip-safe +0 -0
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  import time
3
3
  from multiprocessing import TimeoutError
4
- from typing import Any, Callable, Optional, Union
4
+ from typing import Any, Callable
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
@@ -9,19 +9,16 @@ from joblib import Parallel, cpu_count, delayed
9
9
  from scipy.stats import norm
10
10
 
11
11
  from autogluon.core.utils.exceptions import TimeLimitExceeded
12
- from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
12
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
13
13
  from autogluon.timeseries.metrics import TimeSeriesScorer
14
14
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
15
+ from autogluon.timeseries.utils.constants import AG_DEFAULT_N_JOBS
15
16
  from autogluon.timeseries.utils.datetime import get_seasonality
16
17
  from autogluon.timeseries.utils.warning_filters import warning_filter
17
18
 
18
19
  logger = logging.getLogger(__name__)
19
20
 
20
21
 
21
- # We use the same default n_jobs across AG-TS to ensure that Joblib reuses the process pool
22
- AG_DEFAULT_N_JOBS = max(cpu_count(only_physical_cores=True), 1)
23
-
24
-
25
22
  class AbstractLocalModel(AbstractTimeSeriesModel):
26
23
  """Abstract class for local forecasting models that are trained separately for each time series.
27
24
 
@@ -40,18 +37,18 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
40
37
  """
41
38
 
42
39
  allowed_local_model_args: list[str] = []
43
- default_max_ts_length: Optional[int] = 2500
40
+ default_max_ts_length: int | None = 2500
44
41
  default_max_time_limit_ratio = 1.0
45
42
  init_time_in_seconds: int = 0
46
43
 
47
44
  def __init__(
48
45
  self,
49
- freq: Optional[str] = None,
46
+ freq: str | None = None,
50
47
  prediction_length: int = 1,
51
- path: Optional[str] = None,
52
- name: Optional[str] = None,
53
- eval_metric: Union[str, TimeSeriesScorer, None] = None,
54
- hyperparameters: Optional[dict[str, Any]] = None,
48
+ path: str | None = None,
49
+ name: str | None = None,
50
+ eval_metric: str | TimeSeriesScorer | None = None,
51
+ hyperparameters: dict[str, Any] | None = None,
55
52
  **kwargs, # noqa
56
53
  ):
57
54
  super().__init__(
@@ -79,10 +76,10 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
79
76
  def preprocess(
80
77
  self,
81
78
  data: TimeSeriesDataFrame,
82
- known_covariates: Optional[TimeSeriesDataFrame] = None,
79
+ known_covariates: TimeSeriesDataFrame | None = None,
83
80
  is_train: bool = False,
84
81
  **kwargs,
85
- ) -> tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
82
+ ) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
86
83
  if not self._get_tags()["allow_nan"]:
87
84
  data = data.fill_missing_values()
88
85
  return data, known_covariates
@@ -95,7 +92,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
95
92
  }
96
93
 
97
94
  @staticmethod
98
- def _compute_n_jobs(n_jobs: Union[int, float]) -> int:
95
+ def _compute_n_jobs(n_jobs: int | float) -> int:
99
96
  if isinstance(n_jobs, float) and 0 < n_jobs <= 1:
100
97
  return max(int(cpu_count() * n_jobs), 1)
101
98
  elif isinstance(n_jobs, int):
@@ -103,7 +100,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
103
100
  else:
104
101
  raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
105
102
 
106
- def _fit(self, train_data: TimeSeriesDataFrame, time_limit: Optional[int] = None, **kwargs):
103
+ def _fit(self, train_data: TimeSeriesDataFrame, time_limit: int | None = None, **kwargs):
107
104
  self._check_fit_params()
108
105
 
109
106
  if time_limit is not None and time_limit < self.init_time_in_seconds:
@@ -145,7 +142,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
145
142
  data = data.slice_by_timestep(-max_ts_length, None)
146
143
 
147
144
  indptr = data.get_indptr()
148
- target_series = data[self.target].droplevel(level=ITEMID)
145
+ target_series = data[self.target].droplevel(level=TimeSeriesDataFrame.ITEMID)
149
146
  all_series = (target_series[indptr[i] : indptr[i + 1]] for i in range(len(indptr) - 1))
150
147
 
151
148
  # timeout ensures that no individual job takes longer than time_limit
@@ -184,7 +181,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
184
181
  self,
185
182
  time_series: pd.Series,
186
183
  use_fallback_model: bool,
187
- end_time: Optional[float] = None,
184
+ end_time: float | None = None,
188
185
  ) -> tuple[pd.DataFrame, bool]:
189
186
  if end_time is not None and time.time() >= end_time:
190
187
  raise TimeLimitExceeded
@@ -96,7 +96,7 @@ class AverageModel(AbstractLocalModel):
96
96
  When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
97
97
  When set to a positive integer, that many cores are used.
98
98
  When set to -1, all CPU cores are used.
99
- max_ts_length : Optional[int], default = None
99
+ max_ts_length : int | None, default = None
100
100
  If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
101
101
  This significantly speeds up fitting and usually leads to no change in accuracy.
102
102
  """
@@ -136,7 +136,7 @@ class SeasonalAverageModel(AbstractLocalModel):
136
136
  When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
137
137
  When set to a positive integer, that many cores are used.
138
138
  When set to -1, all CPU cores are used.
139
- max_ts_length : Optional[int], default = None
139
+ max_ts_length : int | None, default = None
140
140
  If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
141
141
  This significantly speeds up fitting and usually leads to no change in accuracy.
142
142
  """
@@ -31,7 +31,7 @@ class NPTSModel(AbstractLocalModel):
31
31
  When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
32
32
  When set to a positive integer, that many cores are used.
33
33
  When set to -1, all CPU cores are used.
34
- max_ts_length : Optional[int], default = 2500
34
+ max_ts_length : int | None, default = 2500
35
35
  If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
36
36
  This significantly speeds up fitting and usually leads to no change in accuracy.
37
37
  """
@@ -59,6 +59,11 @@ class NPTSModel(AbstractLocalModel):
59
59
  ) -> pd.DataFrame:
60
60
  from gluonts.model.npts import NPTSPredictor
61
61
 
62
+ # NPTS model is non-deterministic due to sampling. Set seed for reproducibility in parallel processes
63
+ # and restore original state to avoid side effects when running with n_jobs=1
64
+ original_random_state = np.random.get_state()
65
+ np.random.seed(123)
66
+
62
67
  local_model_args.pop("seasonal_period")
63
68
  num_samples = local_model_args.pop("num_samples")
64
69
  num_default_time_features = local_model_args.pop("num_default_time_features")
@@ -88,6 +93,7 @@ class NPTSModel(AbstractLocalModel):
88
93
  forecast_dict = {"mean": forecast.mean}
89
94
  for q in self.quantile_levels:
90
95
  forecast_dict[str(q)] = forecast.quantile(q)
96
+ np.random.set_state(original_random_state)
91
97
  return pd.DataFrame(forecast_dict)
92
98
 
93
99
  def _more_tags(self) -> dict:
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Any, Optional, Type
2
+ from typing import Any, Type
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -19,7 +19,7 @@ class AbstractStatsForecastModel(AbstractLocalModel):
19
19
  local_model_args["season_length"] = seasonal_period
20
20
  return local_model_args
21
21
 
22
- def _get_model_type(self, variant: Optional[str] = None) -> Type:
22
+ def _get_model_type(self, variant: str | None = None) -> Type:
23
23
  raise NotImplementedError
24
24
 
25
25
  def _get_local_model(self, local_model_args: dict):
@@ -162,7 +162,7 @@ class AutoARIMAModel(AbstractProbabilisticStatsForecastModel):
162
162
  local_model_args.setdefault("allowmean", True)
163
163
  return local_model_args
164
164
 
165
- def _get_model_type(self, variant: Optional[str] = None):
165
+ def _get_model_type(self, variant: str | None = None):
166
166
  from statsforecast.models import AutoARIMA
167
167
 
168
168
  return AutoARIMA
@@ -232,7 +232,7 @@ class ARIMAModel(AbstractProbabilisticStatsForecastModel):
232
232
  local_model_args.setdefault("order", (1, 1, 1))
233
233
  return local_model_args
234
234
 
235
- def _get_model_type(self, variant: Optional[str] = None):
235
+ def _get_model_type(self, variant: str | None = None):
236
236
  from statsforecast.models import ARIMA
237
237
 
238
238
  return ARIMA
@@ -277,7 +277,7 @@ class AutoETSModel(AbstractProbabilisticStatsForecastModel):
277
277
  "seasonal_period",
278
278
  ]
279
279
 
280
- def _get_model_type(self, variant: Optional[str] = None):
280
+ def _get_model_type(self, variant: str | None = None):
281
281
  from statsforecast.models import AutoETS
282
282
 
283
283
  return AutoETS
@@ -380,7 +380,7 @@ class DynamicOptimizedThetaModel(AbstractProbabilisticStatsForecastModel):
380
380
  "seasonal_period",
381
381
  ]
382
382
 
383
- def _get_model_type(self, variant: Optional[str] = None):
383
+ def _get_model_type(self, variant: str | None = None):
384
384
  from statsforecast.models import DynamicOptimizedTheta
385
385
 
386
386
  return DynamicOptimizedTheta
@@ -425,7 +425,7 @@ class ThetaModel(AbstractProbabilisticStatsForecastModel):
425
425
  "seasonal_period",
426
426
  ]
427
427
 
428
- def _get_model_type(self, variant: Optional[str] = None):
428
+ def _get_model_type(self, variant: str | None = None):
429
429
  from statsforecast.models import Theta
430
430
 
431
431
  return Theta
@@ -546,7 +546,7 @@ class AutoCESModel(AbstractProbabilisticStatsForecastModel):
546
546
  "seasonal_period",
547
547
  ]
548
548
 
549
- def _get_model_type(self, variant: Optional[str] = None):
549
+ def _get_model_type(self, variant: str | None = None):
550
550
  from statsforecast.models import AutoCES
551
551
 
552
552
  return AutoCES
@@ -610,7 +610,7 @@ class ADIDAModel(AbstractStatsForecastIntermittentDemandModel):
610
610
 
611
611
  ag_priority = 10
612
612
 
613
- def _get_model_type(self, variant: Optional[str] = None):
613
+ def _get_model_type(self, variant: str | None = None):
614
614
  from statsforecast.models import ADIDA
615
615
 
616
616
  return ADIDA
@@ -652,7 +652,7 @@ class CrostonModel(AbstractStatsForecastIntermittentDemandModel):
652
652
  "variant",
653
653
  ]
654
654
 
655
- def _get_model_type(self, variant: Optional[str] = None):
655
+ def _get_model_type(self, variant: str | None = None):
656
656
  from statsforecast.models import CrostonClassic, CrostonOptimized, CrostonSBA
657
657
 
658
658
  model_variants = {
@@ -702,7 +702,7 @@ class IMAPAModel(AbstractStatsForecastIntermittentDemandModel):
702
702
 
703
703
  ag_priority = 10
704
704
 
705
- def _get_model_type(self, variant: Optional[str] = None):
705
+ def _get_model_type(self, variant: str | None = None):
706
706
  from statsforecast.models import IMAPA
707
707
 
708
708
  return IMAPA
@@ -726,7 +726,7 @@ class ZeroModel(AbstractStatsForecastIntermittentDemandModel):
726
726
 
727
727
  ag_priority = 100
728
728
 
729
- def _get_model_type(self, variant: Optional[str] = None):
729
+ def _get_model_type(self, variant: str | None = None):
730
730
  # ZeroModel does not depend on a StatsForecast implementation
731
731
  raise NotImplementedError
732
732
 
@@ -4,13 +4,13 @@ import logging
4
4
  import math
5
5
  import os
6
6
  import time
7
- from typing import Any, Optional, Type, Union
7
+ from typing import Any, Type
8
8
 
9
9
  import numpy as np
10
10
  from typing_extensions import Self
11
11
 
12
12
  import autogluon.core as ag
13
- from autogluon.timeseries.dataset.ts_dataframe import TimeSeriesDataFrame
13
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
14
14
  from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
15
15
  from autogluon.timeseries.models.local.abstract_local_model import AbstractLocalModel
16
16
  from autogluon.timeseries.splitter import AbstractWindowSplitter, ExpandingWindowSplitter
@@ -38,8 +38,8 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
38
38
 
39
39
  def __init__(
40
40
  self,
41
- model_base: Union[AbstractTimeSeriesModel, Type[AbstractTimeSeriesModel]],
42
- model_base_kwargs: Optional[dict[str, Any]] = None,
41
+ model_base: AbstractTimeSeriesModel | Type[AbstractTimeSeriesModel],
42
+ model_base_kwargs: dict[str, Any] | None = None,
43
43
  **kwargs,
44
44
  ):
45
45
  if inspect.isclass(model_base) and issubclass(model_base, AbstractTimeSeriesModel):
@@ -58,8 +58,8 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
58
58
  self.model_base_type = type(self.model_base)
59
59
  self.info_per_val_window = []
60
60
 
61
- self.most_recent_model: Optional[AbstractTimeSeriesModel] = None
62
- self.most_recent_model_folder: Optional[str] = None
61
+ self.most_recent_model: AbstractTimeSeriesModel | None = None
62
+ self.most_recent_model_folder: str | None = None
63
63
  super().__init__(**kwargs)
64
64
 
65
65
  @property
@@ -83,19 +83,19 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
83
83
  def _is_gpu_available(self) -> bool:
84
84
  return self._get_model_base()._is_gpu_available()
85
85
 
86
- def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, Union[int, float]]:
86
+ def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
87
87
  return self._get_model_base().get_minimum_resources(is_gpu_available)
88
88
 
89
89
  def _fit(
90
90
  self,
91
91
  train_data: TimeSeriesDataFrame,
92
- val_data: Optional[TimeSeriesDataFrame] = None,
93
- time_limit: Optional[float] = None,
94
- num_cpus: Optional[int] = None,
95
- num_gpus: Optional[int] = None,
92
+ val_data: TimeSeriesDataFrame | None = None,
93
+ time_limit: float | None = None,
94
+ num_cpus: int | None = None,
95
+ num_gpus: int | None = None,
96
96
  verbosity: int = 2,
97
- val_splitter: Optional[AbstractWindowSplitter] = None,
98
- refit_every_n_windows: Optional[int] = 1,
97
+ val_splitter: AbstractWindowSplitter | None = None,
98
+ refit_every_n_windows: int | None = 1,
99
99
  **kwargs,
100
100
  ):
101
101
  # TODO: use incremental training for GluonTS models?
@@ -109,9 +109,9 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
109
109
  if refit_every_n_windows is None:
110
110
  refit_every_n_windows = val_splitter.num_val_windows + 1 # only fit model for the first window
111
111
 
112
- oof_predictions_per_window = []
112
+ oof_predictions_per_window: list[TimeSeriesDataFrame] = []
113
113
  global_fit_start_time = time.time()
114
- model: Optional[AbstractTimeSeriesModel] = None
114
+ model: AbstractTimeSeriesModel | None = None
115
115
 
116
116
  for window_index, (train_fold, val_fold) in enumerate(val_splitter.split(train_data)):
117
117
  logger.debug(f"\tWindow {window_index}")
@@ -142,6 +142,7 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
142
142
  train_data=train_fold,
143
143
  val_data=val_fold,
144
144
  time_limit=time_left_for_window,
145
+ verbosity=verbosity,
145
146
  **kwargs,
146
147
  )
147
148
  model.fit_time = time.time() - model_fit_start_time
@@ -182,8 +183,9 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
182
183
  self.most_recent_model_folder = most_recent_refit_window # type: ignore
183
184
  self.predict_time = self.most_recent_model.predict_time
184
185
  self.fit_time = time.time() - global_fit_start_time - self.predict_time # type: ignore
185
- self._oof_predictions = oof_predictions_per_window
186
- self.val_score = np.mean([info["val_score"] for info in self.info_per_val_window]) # type: ignore
186
+ self.cache_oof_predictions(oof_predictions_per_window)
187
+
188
+ self.val_score = float(np.mean([info["val_score"] for info in self.info_per_val_window]))
187
189
 
188
190
  def get_info(self) -> dict:
189
191
  info = super().get_info()
@@ -198,7 +200,7 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
198
200
  def _predict(
199
201
  self,
200
202
  data: TimeSeriesDataFrame,
201
- known_covariates: Optional[TimeSeriesDataFrame] = None,
203
+ known_covariates: TimeSeriesDataFrame | None = None,
202
204
  **kwargs,
203
205
  ) -> TimeSeriesDataFrame:
204
206
  if self.most_recent_model is None:
@@ -212,12 +214,25 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
212
214
  store_predict_time: bool = False,
213
215
  **predict_kwargs,
214
216
  ) -> None:
215
- # self.val_score, self.predict_time, self._oof_predictions already saved during _fit()
216
- assert self._oof_predictions is not None
217
- if store_val_score:
218
- assert self.val_score is not None
217
+ if self._oof_predictions is None or self.most_recent_model is None:
218
+ raise ValueError(f"{self.name} must be fit before calling score_and_cache_oof")
219
+
220
+ # Score on val_data using the most recent model
221
+ past_data, known_covariates = val_data.get_model_inputs_for_scoring(
222
+ prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
223
+ )
224
+ predict_start_time = time.time()
225
+ val_predictions = self.most_recent_model.predict(
226
+ past_data, known_covariates=known_covariates, **predict_kwargs
227
+ )
228
+
229
+ self._oof_predictions.append(val_predictions)
230
+
219
231
  if store_predict_time:
220
- assert self.predict_time is not None
232
+ self.predict_time = time.time() - predict_start_time
233
+
234
+ if store_val_score:
235
+ self.val_score = self._score_with_predictions(val_data, val_predictions)
221
236
 
222
237
  def _get_search_space(self):
223
238
  return self.model_base._get_search_space()
@@ -234,7 +249,7 @@ class MultiWindowBacktestingModel(AbstractTimeSeriesModel):
234
249
  train_fn_kwargs["init_params"]["model_base_kwargs"] = self.get_params()
235
250
  return train_fn_kwargs
236
251
 
237
- def save(self, path: Optional[str] = None, verbose: bool = True) -> str:
252
+ def save(self, path: str | None = None, verbose: bool = True) -> str:
238
253
  most_recent_model = self.most_recent_model
239
254
  self.most_recent_model = None
240
255
  save_path = super().save(path, verbose)
@@ -1,7 +1,6 @@
1
1
  from abc import ABCMeta
2
2
  from dataclasses import dataclass
3
3
  from inspect import isabstract
4
- from typing import Union
5
4
 
6
5
 
7
6
  @dataclass
@@ -44,7 +43,7 @@ class ModelRegistry(ABCMeta):
44
43
  cls.REGISTRY[alias] = record
45
44
 
46
45
  @classmethod
47
- def _get_model_record(cls, alias: Union[str, type]) -> ModelRecord:
46
+ def _get_model_record(cls, alias: str | type) -> ModelRecord:
48
47
  if isinstance(alias, type):
49
48
  alias = alias.__name__
50
49
  alias = alias.removesuffix("Model")
@@ -53,11 +52,11 @@ class ModelRegistry(ABCMeta):
53
52
  return cls.REGISTRY[alias]
54
53
 
55
54
  @classmethod
56
- def get_model_class(cls, alias: Union[str, type]) -> type:
55
+ def get_model_class(cls, alias: str | type) -> type:
57
56
  return cls._get_model_record(alias).model_class
58
57
 
59
58
  @classmethod
60
- def get_model_priority(cls, alias: Union[str, type]) -> int:
59
+ def get_model_priority(cls, alias: str | type) -> int:
61
60
  return cls._get_model_record(alias).ag_priority
62
61
 
63
62
  @classmethod
@@ -5,7 +5,6 @@
5
5
 
6
6
  import logging
7
7
  from enum import Enum
8
- from typing import Optional, Union
9
8
 
10
9
  import torch
11
10
  from einops import rearrange
@@ -27,7 +26,7 @@ class BaseMultiheadAttention(torch.nn.Module):
27
26
  embed_dim: int,
28
27
  num_heads: int,
29
28
  dropout: float,
30
- rotary_emb: Optional[TimeAwareRotaryEmbedding],
29
+ rotary_emb: TimeAwareRotaryEmbedding | None,
31
30
  use_memory_efficient_attention: bool,
32
31
  ):
33
32
  super().__init__()
@@ -151,7 +150,7 @@ class BaseMultiheadAttention(torch.nn.Module):
151
150
  self,
152
151
  layer_idx: int,
153
152
  inputs: torch.Tensor,
154
- attention_mask: Optional[torch.Tensor] = None,
153
+ attention_mask: torch.Tensor | None = None,
155
154
  kv_cache=None,
156
155
  ) -> torch.Tensor:
157
156
  batch_size, variate, seq_len, _ = inputs.shape
@@ -194,4 +193,4 @@ class SpaceWiseMultiheadAttention(BaseMultiheadAttention):
194
193
  attention_axis = AttentionAxis.SPACE
195
194
 
196
195
 
197
- MultiHeadAttention = Union[TimeWiseMultiheadAttention, SpaceWiseMultiheadAttention]
196
+ MultiHeadAttention = TimeWiseMultiheadAttention | SpaceWiseMultiheadAttention
@@ -4,7 +4,7 @@
4
4
  # Copyright 2025 Datadog, Inc.
5
5
 
6
6
  import math
7
- from typing import NamedTuple, Optional
7
+ from typing import NamedTuple
8
8
 
9
9
  import torch
10
10
 
@@ -131,7 +131,7 @@ class TotoBackbone(torch.nn.Module):
131
131
  scaler_cls: str,
132
132
  output_distribution_classes: list[str],
133
133
  spacewise_first: bool = True,
134
- output_distribution_kwargs: Optional[dict] = None,
134
+ output_distribution_kwargs: dict | None = None,
135
135
  use_memory_efficient_attention: bool = True,
136
136
  stabilize_with_global: bool = True,
137
137
  scale_factor_exponent: float = 10.0,
@@ -192,8 +192,8 @@ class TotoBackbone(torch.nn.Module):
192
192
  inputs: torch.Tensor,
193
193
  input_padding_mask: torch.Tensor,
194
194
  id_mask: torch.Tensor,
195
- kv_cache: Optional[KVCache] = None,
196
- scaling_prefix_length: Optional[int] = None,
195
+ kv_cache: KVCache | None = None,
196
+ scaling_prefix_length: int | None = None,
197
197
  ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
198
198
  scaled_inputs: torch.Tensor
199
199
  loc: torch.Tensor
@@ -244,8 +244,8 @@ class TotoBackbone(torch.nn.Module):
244
244
  inputs: torch.Tensor,
245
245
  input_padding_mask: torch.Tensor,
246
246
  id_mask: torch.Tensor,
247
- kv_cache: Optional[KVCache] = None,
248
- scaling_prefix_length: Optional[int] = None,
247
+ kv_cache: KVCache | None = None,
248
+ scaling_prefix_length: int | None = None,
249
249
  ) -> TotoOutput:
250
250
  flattened, loc, scale = self.backbone(
251
251
  inputs,
@@ -3,16 +3,11 @@
3
3
  # This product includes software developed at Datadog (https://www.datadoghq.com/)
4
4
  # Copyright 2025 Datadog, Inc.
5
5
 
6
- from typing import Optional
7
6
 
8
7
  import torch
9
8
  from einops import rearrange
10
- from rotary_embedding_torch import RotaryEmbedding, apply_rotary_emb
11
- from rotary_embedding_torch.rotary_embedding_torch import default
12
9
 
13
-
14
- def exists(val):
15
- return val is not None
10
+ from .rotary_embedding_torch import RotaryEmbedding, apply_rotary_emb, default
16
11
 
17
12
 
18
13
  class TimeAwareRotaryEmbedding(RotaryEmbedding):
@@ -41,8 +36,8 @@ class TimeAwareRotaryEmbedding(RotaryEmbedding):
41
36
  self,
42
37
  q: torch.Tensor,
43
38
  k: torch.Tensor,
44
- seq_dim: Optional[int] = None,
45
- seq_pos: Optional[torch.Tensor] = None,
39
+ seq_dim: int | None = None,
40
+ seq_pos: torch.Tensor | None = None,
46
41
  seq_pos_offset: int = 0,
47
42
  ):
48
43
  """
@@ -78,7 +73,7 @@ class TimeAwareRotaryEmbedding(RotaryEmbedding):
78
73
 
79
74
  return rotated_q, rotated_k
80
75
 
81
- def get_scale(self, t: torch.Tensor, seq_len: Optional[int] = None, offset=0):
76
+ def get_scale(self, t: torch.Tensor, seq_len: int | None = None, offset=0):
82
77
  """
83
78
  This method is adapted closely from the base class, but it knows how to handle
84
79
  when `t` has more than 1 dim (as is the case when we're using time-aware RoPE, and have a different