autogluon.timeseries 1.0.1b20240304__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (108) hide show
  1. autogluon/timeseries/configs/__init__.py +3 -2
  2. autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
  3. autogluon/timeseries/configs/predictor_presets.py +84 -0
  4. autogluon/timeseries/dataset/ts_dataframe.py +339 -186
  5. autogluon/timeseries/learner.py +192 -60
  6. autogluon/timeseries/metrics/__init__.py +55 -11
  7. autogluon/timeseries/metrics/abstract.py +96 -25
  8. autogluon/timeseries/metrics/point.py +186 -39
  9. autogluon/timeseries/metrics/quantile.py +47 -20
  10. autogluon/timeseries/metrics/utils.py +6 -6
  11. autogluon/timeseries/models/__init__.py +13 -7
  12. autogluon/timeseries/models/abstract/__init__.py +2 -2
  13. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +533 -273
  14. autogluon/timeseries/models/abstract/model_trial.py +10 -10
  15. autogluon/timeseries/models/abstract/tunable.py +189 -0
  16. autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
  17. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +369 -215
  18. autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
  19. autogluon/timeseries/models/autogluon_tabular/transforms.py +67 -0
  20. autogluon/timeseries/models/autogluon_tabular/utils.py +3 -51
  21. autogluon/timeseries/models/chronos/__init__.py +4 -0
  22. autogluon/timeseries/models/chronos/chronos2.py +361 -0
  23. autogluon/timeseries/models/chronos/model.py +738 -0
  24. autogluon/timeseries/models/chronos/utils.py +369 -0
  25. autogluon/timeseries/models/ensemble/__init__.py +35 -2
  26. autogluon/timeseries/models/ensemble/{abstract_timeseries_ensemble.py → abstract.py} +50 -26
  27. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  28. autogluon/timeseries/models/ensemble/array_based/abstract.py +236 -0
  29. autogluon/timeseries/models/ensemble/array_based/models.py +73 -0
  30. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  31. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  32. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +167 -0
  33. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  34. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  35. autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
  36. autogluon/timeseries/models/ensemble/per_item_greedy.py +162 -0
  37. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  38. autogluon/timeseries/models/ensemble/weighted/abstract.py +40 -0
  39. autogluon/timeseries/models/ensemble/weighted/basic.py +78 -0
  40. autogluon/timeseries/models/ensemble/weighted/greedy.py +57 -0
  41. autogluon/timeseries/models/gluonts/__init__.py +3 -1
  42. autogluon/timeseries/models/gluonts/abstract.py +583 -0
  43. autogluon/timeseries/models/gluonts/dataset.py +109 -0
  44. autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +185 -44
  45. autogluon/timeseries/models/local/__init__.py +1 -10
  46. autogluon/timeseries/models/local/abstract_local_model.py +150 -97
  47. autogluon/timeseries/models/local/naive.py +31 -23
  48. autogluon/timeseries/models/local/npts.py +6 -2
  49. autogluon/timeseries/models/local/statsforecast.py +99 -112
  50. autogluon/timeseries/models/multi_window/multi_window_model.py +99 -40
  51. autogluon/timeseries/models/registry.py +64 -0
  52. autogluon/timeseries/models/toto/__init__.py +3 -0
  53. autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
  54. autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
  55. autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
  56. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
  57. autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
  58. autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
  59. autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
  60. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  61. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
  62. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
  63. autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
  64. autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
  65. autogluon/timeseries/models/toto/dataloader.py +108 -0
  66. autogluon/timeseries/models/toto/hf_pretrained_model.py +118 -0
  67. autogluon/timeseries/models/toto/model.py +236 -0
  68. autogluon/timeseries/predictor.py +826 -305
  69. autogluon/timeseries/regressor.py +253 -0
  70. autogluon/timeseries/splitter.py +10 -31
  71. autogluon/timeseries/trainer/__init__.py +2 -3
  72. autogluon/timeseries/trainer/ensemble_composer.py +439 -0
  73. autogluon/timeseries/trainer/model_set_builder.py +256 -0
  74. autogluon/timeseries/trainer/prediction_cache.py +149 -0
  75. autogluon/timeseries/trainer/trainer.py +1298 -0
  76. autogluon/timeseries/trainer/utils.py +17 -0
  77. autogluon/timeseries/transforms/__init__.py +2 -0
  78. autogluon/timeseries/transforms/covariate_scaler.py +164 -0
  79. autogluon/timeseries/transforms/target_scaler.py +149 -0
  80. autogluon/timeseries/utils/constants.py +10 -0
  81. autogluon/timeseries/utils/datetime/base.py +38 -20
  82. autogluon/timeseries/utils/datetime/lags.py +18 -16
  83. autogluon/timeseries/utils/datetime/seasonality.py +14 -14
  84. autogluon/timeseries/utils/datetime/time_features.py +17 -14
  85. autogluon/timeseries/utils/features.py +317 -53
  86. autogluon/timeseries/utils/forecast.py +31 -17
  87. autogluon/timeseries/utils/timer.py +173 -0
  88. autogluon/timeseries/utils/warning_filters.py +44 -6
  89. autogluon/timeseries/version.py +2 -1
  90. autogluon.timeseries-1.4.1b20251210-py3.11-nspkg.pth +1 -0
  91. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/METADATA +71 -47
  92. autogluon_timeseries-1.4.1b20251210.dist-info/RECORD +103 -0
  93. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/WHEEL +1 -1
  94. autogluon/timeseries/configs/presets_configs.py +0 -11
  95. autogluon/timeseries/evaluator.py +0 -6
  96. autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
  97. autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -550
  98. autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  99. autogluon/timeseries/models/presets.py +0 -325
  100. autogluon/timeseries/trainer/abstract_trainer.py +0 -1144
  101. autogluon/timeseries/trainer/auto_trainer.py +0 -74
  102. autogluon.timeseries-1.0.1b20240304-py3.8-nspkg.pth +0 -1
  103. autogluon.timeseries-1.0.1b20240304.dist-info/RECORD +0 -58
  104. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/LICENSE +0 -0
  105. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/NOTICE +0 -0
  106. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/namespace_packages.txt +0 -0
  107. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/top_level.txt +0 -0
  108. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/zip-safe +0 -0
@@ -0,0 +1,107 @@
1
+ import logging
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from typing_extensions import Self
6
+
7
+ from autogluon.tabular.registry import ag_model_registry as tabular_ag_model_registry
8
+
9
+ from .abstract import EnsembleRegressor
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class TabularEnsembleRegressor(EnsembleRegressor):
15
+ """Ensemble regressor based on a single model from AutoGluon-Tabular that predicts all quantiles simultaneously."""
16
+
17
+ def __init__(
18
+ self,
19
+ quantile_levels: list[float],
20
+ model_name: str,
21
+ model_hyperparameters: dict | None = None,
22
+ ):
23
+ super().__init__()
24
+ self.quantile_levels = quantile_levels
25
+ model_type = tabular_ag_model_registry.key_to_cls(model_name)
26
+ model_hyperparameters = model_hyperparameters or {}
27
+ self.model = model_type(
28
+ problem_type="quantile",
29
+ hyperparameters=model_hyperparameters | {"ag.quantile_levels": quantile_levels},
30
+ path="",
31
+ name=model_name,
32
+ )
33
+
34
+ def fit(
35
+ self,
36
+ base_model_mean_predictions: np.ndarray,
37
+ base_model_quantile_predictions: np.ndarray,
38
+ labels: np.ndarray,
39
+ time_limit: float | None = None,
40
+ ) -> Self:
41
+ X = self._get_feature_df(base_model_mean_predictions, base_model_quantile_predictions)
42
+ num_windows, num_items, prediction_length = base_model_mean_predictions.shape[:3]
43
+ y = pd.Series(labels.reshape(num_windows * num_items * prediction_length))
44
+ self.model.fit(X=X, y=y, time_limit=time_limit)
45
+ return self
46
+
47
+ def predict(
48
+ self,
49
+ base_model_mean_predictions: np.ndarray,
50
+ base_model_quantile_predictions: np.ndarray,
51
+ ) -> tuple[np.ndarray, np.ndarray]:
52
+ assert self.model.is_fit()
53
+ num_windows, num_items, prediction_length = base_model_mean_predictions.shape[:3]
54
+ assert num_windows == 1, "Prediction expects a single window to be provided"
55
+
56
+ X = self._get_feature_df(base_model_mean_predictions, base_model_quantile_predictions)
57
+
58
+ pred = self.model.predict(X)
59
+
60
+ # Reshape back to (num_windows, num_items, prediction_length, num_quantiles)
61
+ pred = pred.reshape(num_windows, num_items, prediction_length, len(self.quantile_levels))
62
+
63
+ # Use median quantile as mean prediction
64
+ median_idx = self._get_median_quantile_index()
65
+ mean_pred = pred[:, :, :, median_idx : median_idx + 1]
66
+ quantile_pred = pred
67
+
68
+ return mean_pred, quantile_pred
69
+
70
+ def _get_feature_df(
71
+ self,
72
+ base_model_mean_predictions: np.ndarray,
73
+ base_model_quantile_predictions: np.ndarray,
74
+ ) -> pd.DataFrame:
75
+ num_windows, num_items, prediction_length, _, num_models = base_model_mean_predictions.shape
76
+ num_tabular_items = num_windows * num_items * prediction_length
77
+ features_array = np.hstack(
78
+ [
79
+ base_model_mean_predictions.reshape(num_tabular_items, -1),
80
+ base_model_quantile_predictions.reshape(num_tabular_items, -1),
81
+ ]
82
+ )
83
+ return pd.DataFrame(features_array, columns=self._get_feature_names(num_models))
84
+
85
+ def _get_feature_names(self, num_models: int) -> list[str]:
86
+ feature_names = []
87
+ for mi in range(num_models):
88
+ feature_names.append(f"model_{mi}_mean")
89
+ for quantile in self.quantile_levels:
90
+ for mi in range(num_models):
91
+ feature_names.append(f"model_{mi}_q{quantile}")
92
+
93
+ return feature_names
94
+
95
+ def _get_median_quantile_index(self):
96
+ """Get quantile index closest to 0.5"""
97
+ quantile_array = np.array(self.quantile_levels)
98
+ median_idx = int(np.argmin(np.abs(quantile_array - 0.5)))
99
+ selected_quantile = quantile_array[median_idx]
100
+
101
+ if selected_quantile != 0.5:
102
+ logger.warning(
103
+ f"Selected quantile {selected_quantile} is not exactly 0.5. "
104
+ f"Using closest available quantile for median prediction."
105
+ )
106
+
107
+ return median_idx
@@ -0,0 +1,167 @@
1
+ import copy
2
+
3
+ import numpy as np
4
+
5
+ import autogluon.core as ag
6
+ from autogluon.core.models.greedy_ensemble.ensemble_selection import EnsembleSelection
7
+ from autogluon.timeseries import TimeSeriesDataFrame
8
+ from autogluon.timeseries.metrics import TimeSeriesScorer
9
+ from autogluon.timeseries.utils.datetime import get_seasonality
10
+
11
+
12
+ class TimeSeriesEnsembleSelection(EnsembleSelection):
13
+ def __init__(
14
+ self,
15
+ ensemble_size: int,
16
+ metric: TimeSeriesScorer,
17
+ problem_type: str = ag.constants.QUANTILE,
18
+ sorted_initialization: bool = False,
19
+ bagging: bool = False,
20
+ tie_breaker: str = "random",
21
+ random_state: np.random.RandomState | None = None,
22
+ prediction_length: int = 1,
23
+ target: str = "target",
24
+ **kwargs,
25
+ ):
26
+ super().__init__(
27
+ ensemble_size=ensemble_size,
28
+ metric=metric, # type: ignore
29
+ problem_type=problem_type,
30
+ sorted_initialization=sorted_initialization,
31
+ bagging=bagging,
32
+ tie_breaker=tie_breaker,
33
+ random_state=random_state,
34
+ **kwargs,
35
+ )
36
+ self.prediction_length = prediction_length
37
+ self.target = target
38
+ self.metric: TimeSeriesScorer
39
+
40
+ self.dummy_pred_per_window = []
41
+ self.scorer_per_window = []
42
+
43
+ self.dummy_pred_per_window: list[TimeSeriesDataFrame] | None
44
+ self.scorer_per_window: list[TimeSeriesScorer] | None
45
+ self.data_future_per_window: list[TimeSeriesDataFrame] | None
46
+
47
+ def fit( # type: ignore
48
+ self,
49
+ predictions: list[list[TimeSeriesDataFrame]],
50
+ labels: list[TimeSeriesDataFrame],
51
+ time_limit: float | None = None,
52
+ ):
53
+ return super().fit(
54
+ predictions=predictions, # type: ignore
55
+ labels=labels, # type: ignore
56
+ time_limit=time_limit,
57
+ )
58
+
59
+ def _fit( # type: ignore
60
+ self,
61
+ predictions: list[list[TimeSeriesDataFrame]],
62
+ labels: list[TimeSeriesDataFrame],
63
+ time_limit: float | None = None,
64
+ sample_weight: list[float] | None = None,
65
+ ):
66
+ # Stack predictions for each model into a 3d tensor of shape [num_val_windows, num_rows, num_cols]
67
+ stacked_predictions = [np.stack(preds) for preds in predictions]
68
+
69
+ self.dummy_pred_per_window = []
70
+ self.scorer_per_window = []
71
+ self.data_future_per_window = []
72
+
73
+ seasonal_period = self.metric.seasonal_period
74
+ if seasonal_period is None:
75
+ seasonal_period = get_seasonality(labels[0].freq)
76
+
77
+ for window_idx, data in enumerate(labels):
78
+ dummy_pred = copy.deepcopy(predictions[0][window_idx])
79
+ # This should never happen; sanity check to make sure that all predictions have the same index
80
+ assert all(dummy_pred.index.equals(pred[window_idx].index) for pred in predictions)
81
+ assert all(dummy_pred.columns.equals(pred[window_idx].columns) for pred in predictions)
82
+
83
+ self.dummy_pred_per_window.append(dummy_pred)
84
+
85
+ scorer = copy.deepcopy(self.metric)
86
+ # Split the observed time series once to avoid repeated computations inside the evaluator
87
+ data_past = data.slice_by_timestep(None, -self.prediction_length)
88
+ data_future = data.slice_by_timestep(-self.prediction_length, None)
89
+ scorer.save_past_metrics(data_past, target=self.target, seasonal_period=seasonal_period)
90
+ self.scorer_per_window.append(scorer)
91
+ self.data_future_per_window.append(data_future)
92
+
93
+ super()._fit(
94
+ predictions=stacked_predictions,
95
+ labels=data_future, # type: ignore
96
+ time_limit=time_limit,
97
+ )
98
+ self.dummy_pred_per_window = None
99
+ self.evaluator_per_window = None
100
+ self.data_future_per_window = None
101
+
102
+ def _calculate_regret( # type: ignore
103
+ self,
104
+ y_true,
105
+ y_pred_proba,
106
+ metric: TimeSeriesScorer,
107
+ sample_weight=None,
108
+ ):
109
+ # Compute average score across all validation windows
110
+ total_score = 0.0
111
+
112
+ assert self.data_future_per_window is not None
113
+ assert self.dummy_pred_per_window is not None
114
+ assert self.scorer_per_window is not None
115
+
116
+ for window_idx, data_future in enumerate(self.data_future_per_window):
117
+ dummy_pred = self.dummy_pred_per_window[window_idx]
118
+ dummy_pred[list(dummy_pred.columns)] = y_pred_proba[window_idx]
119
+ # We use scorer.compute_metric instead of scorer.score to avoid repeated calls to scorer.save_past_metrics
120
+ metric_value = self.scorer_per_window[window_idx].compute_metric(
121
+ data_future,
122
+ dummy_pred,
123
+ target=self.target,
124
+ )
125
+ total_score += metric.sign * metric_value
126
+ avg_score = total_score / len(self.data_future_per_window)
127
+ # score: higher is better, regret: lower is better, so we flip the sign
128
+ return -avg_score
129
+
130
+
131
+ def fit_time_series_ensemble_selection(
132
+ data_per_window: list[TimeSeriesDataFrame],
133
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
134
+ ensemble_size: int,
135
+ eval_metric: TimeSeriesScorer,
136
+ prediction_length: int = 1,
137
+ target: str = "target",
138
+ time_limit: float | None = None,
139
+ ) -> dict[str, float]:
140
+ """Fit ensemble selection for time series forecasting and return ensemble weights.
141
+
142
+ Parameters
143
+ ----------
144
+ data_per_window:
145
+ List of ground truth time series data for each validation window.
146
+ predictions_per_window:
147
+ Dictionary mapping model names to their predictions for each validation window.
148
+ ensemble_size:
149
+ Number of iterations of the ensemble selection algorithm.
150
+
151
+ Returns
152
+ -------
153
+ weights:
154
+ Dictionary mapping the model name to its weight in the ensemble.
155
+ """
156
+ ensemble_selection = TimeSeriesEnsembleSelection(
157
+ ensemble_size=ensemble_size,
158
+ metric=eval_metric,
159
+ prediction_length=prediction_length,
160
+ target=target,
161
+ )
162
+ ensemble_selection.fit(
163
+ predictions=list(predictions_per_window.values()),
164
+ labels=data_per_window,
165
+ time_limit=time_limit,
166
+ )
167
+ return {model: float(weight) for model, weight in zip(predictions_per_window.keys(), ensemble_selection.weights_)}
@@ -0,0 +1,162 @@
1
+ import logging
2
+ import pprint
3
+ import time
4
+ from typing import Any
5
+
6
+ import pandas as pd
7
+ from joblib import Parallel, delayed
8
+
9
+ from autogluon.timeseries import TimeSeriesDataFrame
10
+ from autogluon.timeseries.utils.constants import AG_DEFAULT_N_JOBS
11
+
12
+ from .abstract import AbstractTimeSeriesEnsembleModel
13
+ from .ensemble_selection import fit_time_series_ensemble_selection
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class PerItemGreedyEnsemble(AbstractTimeSeriesEnsembleModel):
19
+ """Fits a separate greedy weighted ensemble for each individual time series in the dataset.
20
+ Constructs a weighted ensemble using the greedy Ensemble Selection algorithm by Caruana et al. [Car2004]
21
+
22
+ Other Parameters
23
+ ----------------
24
+ ensemble_size: int, default = 100
25
+ Number of models (with replacement) to include in the ensemble.
26
+ n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
27
+ Number of CPU cores used to fit the ensembles in parallel.
28
+
29
+ References
30
+ ----------
31
+ .. [Car2004] Caruana, Rich, et al. "Ensemble selection from libraries of models."
32
+ Proceedings of the twenty-first international conference on Machine learning. 2004.
33
+ """
34
+
35
+ def __init__(self, name: str | None = None, **kwargs):
36
+ if name is None:
37
+ name = "PerItemWeightedEnsemble"
38
+ super().__init__(name=name, **kwargs)
39
+ self.weights_df: pd.DataFrame
40
+ self.average_weight: pd.Series
41
+
42
+ @property
43
+ def model_names(self) -> list[str]:
44
+ return list(self.weights_df.columns)
45
+
46
+ def _get_default_hyperparameters(self) -> dict[str, Any]:
47
+ return {"ensemble_size": 100, "n_jobs": AG_DEFAULT_N_JOBS}
48
+
49
+ def _fit(
50
+ self,
51
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
52
+ data_per_window: list[TimeSeriesDataFrame],
53
+ model_scores: dict[str, float] | None = None,
54
+ time_limit: float | None = None,
55
+ ) -> None:
56
+ model_names = list(predictions_per_window.keys())
57
+ item_ids = data_per_window[0].item_ids
58
+ n_jobs = min(self.get_hyperparameter("n_jobs"), len(item_ids))
59
+
60
+ predictions_per_item = self._split_predictions_per_item(predictions_per_window)
61
+ data_per_item = self._split_data_per_item(data_per_window)
62
+
63
+ ensemble_selection_kwargs = dict(
64
+ ensemble_size=self.get_hyperparameter("ensemble_size"),
65
+ eval_metric=self.eval_metric,
66
+ prediction_length=self.prediction_length,
67
+ target=self.target,
68
+ )
69
+
70
+ time_limit_per_item = None if time_limit is None else time_limit * n_jobs / len(item_ids)
71
+ end_time = None if time_limit is None else time.time() + time_limit
72
+
73
+ # Fit ensemble for each item in parallel
74
+ executor = Parallel(n_jobs=n_jobs)
75
+ weights_per_item = executor(
76
+ delayed(self._fit_item_ensemble)(
77
+ data_per_item[item_id],
78
+ predictions_per_item[item_id],
79
+ time_limit_per_item=time_limit_per_item,
80
+ end_time=end_time,
81
+ **ensemble_selection_kwargs,
82
+ )
83
+ for item_id in item_ids
84
+ )
85
+ self.weights_df = pd.DataFrame(weights_per_item, index=item_ids, columns=model_names)
86
+ self.average_weight = self.weights_df.mean(axis=0)
87
+
88
+ # Drop models with zero average weight
89
+ if (self.average_weight == 0).any():
90
+ models_to_keep = self.average_weight[self.average_weight > 0].index
91
+ self.weights_df = self.weights_df[models_to_keep]
92
+ self.average_weight = self.average_weight[models_to_keep]
93
+
94
+ weights_for_printing = {model: round(float(weight), 2) for model, weight in self.average_weight.items()}
95
+ logger.info(f"\tAverage ensemble weights: {pprint.pformat(weights_for_printing, width=200)}")
96
+
97
+ def _split_predictions_per_item(
98
+ self, predictions_per_window: dict[str, list[TimeSeriesDataFrame]]
99
+ ) -> dict[str, dict[str, list[TimeSeriesDataFrame]]]:
100
+ """Build a dictionary mapping item_id -> dict[model_name, list[TimeSeriesDataFrame]]."""
101
+ item_ids = list(predictions_per_window.values())[0][0].item_ids
102
+
103
+ predictions_per_item = {}
104
+ for i, item_id in enumerate(item_ids):
105
+ item_predictions = {}
106
+ for model_name, preds_per_window in predictions_per_window.items():
107
+ item_preds_per_window = [
108
+ pred.iloc[i * self.prediction_length : (i + 1) * self.prediction_length]
109
+ for pred in preds_per_window
110
+ ]
111
+ item_predictions[model_name] = item_preds_per_window
112
+ predictions_per_item[item_id] = item_predictions
113
+ return predictions_per_item
114
+
115
+ def _split_data_per_item(self, data_per_window: list[TimeSeriesDataFrame]) -> dict[str, list[TimeSeriesDataFrame]]:
116
+ """Build a dictionary mapping item_id -> ground truth values across all windows."""
117
+ item_ids = data_per_window[0].item_ids
118
+ data_per_item = {item_id: [] for item_id in item_ids}
119
+
120
+ for data in data_per_window:
121
+ indptr = data.get_indptr()
122
+ for item_idx, item_id in enumerate(item_ids):
123
+ new_slice = data.iloc[indptr[item_idx] : indptr[item_idx + 1]]
124
+ data_per_item[item_id].append(new_slice)
125
+ return data_per_item
126
+
127
+ @staticmethod
128
+ def _fit_item_ensemble(
129
+ data_per_window: list[TimeSeriesDataFrame],
130
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
131
+ time_limit_per_item: float | None = None,
132
+ end_time: float | None = None,
133
+ **ensemble_selection_kwargs,
134
+ ) -> dict[str, float]:
135
+ """Fit ensemble for a single item."""
136
+ if end_time is not None:
137
+ assert time_limit_per_item is not None
138
+ time_left = end_time - time.time()
139
+ time_limit_per_item = min(time_limit_per_item, time_left)
140
+ return fit_time_series_ensemble_selection(
141
+ data_per_window, predictions_per_window, time_limit=time_limit_per_item, **ensemble_selection_kwargs
142
+ )
143
+
144
+ def _predict(self, data: dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
145
+ assert all(model in data for model in self.weights_df.columns)
146
+ item_ids = list(data.values())[0].item_ids
147
+ unseen_item_ids = set(item_ids) - set(self.weights_df.index)
148
+ if unseen_item_ids:
149
+ logger.debug(f"Using average weights for {len(unseen_item_ids)} unseen items")
150
+ weights = self.weights_df.reindex(item_ids).fillna(self.average_weight)
151
+
152
+ result = None
153
+ for model_name in self.weights_df.columns:
154
+ model_pred = data[model_name]
155
+ model_weights = weights[model_name].to_numpy().repeat(self.prediction_length)
156
+ weighted_pred = model_pred.to_data_frame().multiply(model_weights, axis=0)
157
+ result = weighted_pred if result is None else result + weighted_pred
158
+
159
+ return TimeSeriesDataFrame(result)
160
+
161
+ def remap_base_models(self, model_refit_map: dict[str, str]) -> None:
162
+ self.weights_df.rename(columns=model_refit_map, inplace=True)
@@ -0,0 +1,8 @@
1
+ from .basic import PerformanceWeightedEnsemble, SimpleAverageEnsemble
2
+ from .greedy import GreedyEnsemble
3
+
4
+ __all__ = [
5
+ "SimpleAverageEnsemble",
6
+ "PerformanceWeightedEnsemble",
7
+ "GreedyEnsemble",
8
+ ]
@@ -0,0 +1,40 @@
1
+ import functools
2
+ from abc import ABC
3
+
4
+ import numpy as np
5
+
6
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
7
+
8
+ from ..abstract import AbstractTimeSeriesEnsembleModel
9
+
10
+
11
+ class AbstractWeightedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
12
+ """Abstract class for weighted ensembles which assign one (global) weight per model."""
13
+
14
+ def __init__(self, name: str | None = None, **kwargs):
15
+ super().__init__(name=name, **kwargs)
16
+ self.model_to_weight: dict[str, float] = {}
17
+
18
+ @property
19
+ def model_names(self) -> list[str]:
20
+ return list(self.model_to_weight.keys())
21
+
22
+ @property
23
+ def model_weights(self) -> np.ndarray:
24
+ return np.array(list(self.model_to_weight.values()), dtype=np.float64)
25
+
26
+ def _predict(self, data: dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
27
+ weighted_predictions = [data[model_name] * weight for model_name, weight in self.model_to_weight.items()]
28
+ return functools.reduce(lambda x, y: x + y, weighted_predictions)
29
+
30
+ def get_info(self) -> dict:
31
+ info = super().get_info()
32
+ info["model_weights"] = self.model_to_weight.copy()
33
+ return info
34
+
35
+ def remap_base_models(self, model_refit_map: dict[str, str]) -> None:
36
+ updated_weights = {}
37
+ for model, weight in self.model_to_weight.items():
38
+ model_full_name = model_refit_map.get(model, model)
39
+ updated_weights[model_full_name] = weight
40
+ self.model_to_weight = updated_weights
@@ -0,0 +1,78 @@
1
+ from typing import Any
2
+
3
+ import numpy as np
4
+
5
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
6
+
7
+ from .abstract import AbstractWeightedTimeSeriesEnsembleModel
8
+
9
+
10
+ class SimpleAverageEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
11
+ """Constructs a weighted ensemble using a simple average of the constituent models' predictions."""
12
+
13
+ def _fit(
14
+ self,
15
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
16
+ data_per_window: list[TimeSeriesDataFrame],
17
+ model_scores: dict[str, float] | None = None,
18
+ time_limit: float | None = None,
19
+ ):
20
+ self.model_to_weight = {}
21
+ num_models = len(predictions_per_window)
22
+ for model_name in predictions_per_window.keys():
23
+ self.model_to_weight[model_name] = 1.0 / num_models
24
+
25
+
26
+ class PerformanceWeightedEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
27
+ """Constructs a weighted ensemble, where the weights are assigned in proportion to the
28
+ (inverse) validation scores.
29
+
30
+ Other Parameters
31
+ ----------------
32
+ weight_scheme: Literal["sq", "inv", "loginv"], default = "loginv"
33
+ Method used to compute the weights as a function of the validation scores.
34
+ - "sqrt" computes weights in proportion to `sqrt(1 / S)`. This is the default.
35
+ - "inv" computes weights in proportion to `(1 / S)`.
36
+ - "sq" computes the weights in proportion to `(1 / S)^2` as outlined in [PC2020]_.
37
+
38
+ References
39
+ ----------
40
+ .. [PC2020] Pawlikowski, Maciej, and Agata Chorowska.
41
+ "Weighted ensemble of statistical models." International Journal of Forecasting
42
+ 36.1 (2020): 93-97.
43
+ """
44
+
45
+ def _get_default_hyperparameters(self) -> dict[str, Any]:
46
+ return {"weight_scheme": "sqrt"}
47
+
48
+ def _fit(
49
+ self,
50
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
51
+ data_per_window: list[TimeSeriesDataFrame],
52
+ model_scores: dict[str, float] | None = None,
53
+ time_limit: float | None = None,
54
+ ):
55
+ assert model_scores is not None
56
+
57
+ weight_scheme = self.get_hyperparameter("weight_scheme")
58
+
59
+ # drop NaNs
60
+ model_scores = {k: v for k, v in model_scores.items() if np.isfinite(v)}
61
+ assert len(model_scores) > 0, (
62
+ "All models have NaN scores. At least one model must score successfully to fit an ensemble"
63
+ )
64
+ assert all(s <= 0 for s in model_scores.values()), (
65
+ "All model scores must be negative, in higher-is-better format."
66
+ )
67
+
68
+ score_transform = {
69
+ "sq": lambda x: np.square(np.reciprocal(x)),
70
+ "inv": lambda x: np.reciprocal(x),
71
+ "sqrt": lambda x: np.sqrt(np.reciprocal(x)),
72
+ }[weight_scheme]
73
+
74
+ self.model_to_weight = {
75
+ model_name: score_transform(-model_scores[model_name] + 1e-5) for model_name in model_scores.keys()
76
+ }
77
+ total_weight = sum(self.model_to_weight.values())
78
+ self.model_to_weight = {k: v / total_weight for k, v in self.model_to_weight.items()}
@@ -0,0 +1,57 @@
1
+ import logging
2
+ import pprint
3
+ from typing import Any
4
+
5
+ from autogluon.timeseries import TimeSeriesDataFrame
6
+
7
+ from ..ensemble_selection import fit_time_series_ensemble_selection
8
+ from .abstract import AbstractWeightedTimeSeriesEnsembleModel
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class GreedyEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
14
+ """Constructs a weighted ensemble using the greedy Ensemble Selection algorithm by
15
+ Caruana et al. [Car2004]
16
+
17
+ Other Parameters
18
+ ----------------
19
+ ensemble_size: int, default = 100
20
+ Number of models (with replacement) to include in the ensemble.
21
+
22
+ References
23
+ ----------
24
+ .. [Car2024] Caruana, Rich, et al. "Ensemble selection from libraries of models."
25
+ Proceedings of the twenty-first international conference on Machine learning. 2004.
26
+ """
27
+
28
+ def __init__(self, name: str | None = None, **kwargs):
29
+ if name is None:
30
+ # FIXME: the name here is kept for backward compatibility. it will be called
31
+ # GreedyEnsemble in v1.4 once ensemble choices are exposed
32
+ name = "WeightedEnsemble"
33
+ super().__init__(name=name, **kwargs)
34
+
35
+ def _get_default_hyperparameters(self) -> dict[str, Any]:
36
+ return {"ensemble_size": 100}
37
+
38
+ def _fit(
39
+ self,
40
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
41
+ data_per_window: list[TimeSeriesDataFrame],
42
+ model_scores: dict[str, float] | None = None,
43
+ time_limit: float | None = None,
44
+ ):
45
+ model_to_weight = fit_time_series_ensemble_selection(
46
+ data_per_window=data_per_window,
47
+ predictions_per_window=predictions_per_window,
48
+ ensemble_size=self.get_hyperparameter("ensemble_size"),
49
+ eval_metric=self.eval_metric,
50
+ prediction_length=self.prediction_length,
51
+ target=self.target,
52
+ time_limit=time_limit,
53
+ )
54
+ self.model_to_weight = {model: weight for model, weight in model_to_weight.items() if weight > 0}
55
+
56
+ weights_for_printing = {model: round(float(weight), 2) for model, weight in self.model_to_weight.items()}
57
+ logger.info(f"\tEnsemble weights: {pprint.pformat(weights_for_printing, width=200)}")
@@ -1,9 +1,10 @@
1
- from .torch.models import (
1
+ from .models import (
2
2
  DeepARModel,
3
3
  DLinearModel,
4
4
  PatchTSTModel,
5
5
  SimpleFeedForwardModel,
6
6
  TemporalFusionTransformerModel,
7
+ TiDEModel,
7
8
  WaveNetModel,
8
9
  )
9
10
 
@@ -13,5 +14,6 @@ __all__ = [
13
14
  "PatchTSTModel",
14
15
  "SimpleFeedForwardModel",
15
16
  "TemporalFusionTransformerModel",
17
+ "TiDEModel",
16
18
  "WaveNetModel",
17
19
  ]