autogluon.timeseries 1.2.1b20250224__py3-none-any.whl → 1.4.1b20251215__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (108) hide show
  1. autogluon/timeseries/configs/__init__.py +3 -2
  2. autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
  3. autogluon/timeseries/configs/predictor_presets.py +106 -0
  4. autogluon/timeseries/dataset/ts_dataframe.py +256 -141
  5. autogluon/timeseries/learner.py +86 -52
  6. autogluon/timeseries/metrics/__init__.py +42 -8
  7. autogluon/timeseries/metrics/abstract.py +89 -19
  8. autogluon/timeseries/metrics/point.py +142 -53
  9. autogluon/timeseries/metrics/quantile.py +46 -21
  10. autogluon/timeseries/metrics/utils.py +4 -4
  11. autogluon/timeseries/models/__init__.py +8 -2
  12. autogluon/timeseries/models/abstract/__init__.py +2 -2
  13. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +361 -592
  14. autogluon/timeseries/models/abstract/model_trial.py +2 -1
  15. autogluon/timeseries/models/abstract/tunable.py +189 -0
  16. autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
  17. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +282 -194
  18. autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
  19. autogluon/timeseries/models/autogluon_tabular/transforms.py +25 -18
  20. autogluon/timeseries/models/chronos/__init__.py +2 -1
  21. autogluon/timeseries/models/chronos/chronos2.py +361 -0
  22. autogluon/timeseries/models/chronos/model.py +219 -138
  23. autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +81 -50
  24. autogluon/timeseries/models/ensemble/__init__.py +37 -2
  25. autogluon/timeseries/models/ensemble/abstract.py +107 -0
  26. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  27. autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
  28. autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
  29. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  30. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  31. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
  32. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  33. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  34. autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
  35. autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
  36. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  37. autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
  38. autogluon/timeseries/models/ensemble/weighted/basic.py +91 -0
  39. autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
  40. autogluon/timeseries/models/gluonts/__init__.py +1 -1
  41. autogluon/timeseries/models/gluonts/{abstract_gluonts.py → abstract.py} +148 -208
  42. autogluon/timeseries/models/gluonts/dataset.py +109 -0
  43. autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +38 -22
  44. autogluon/timeseries/models/local/__init__.py +0 -7
  45. autogluon/timeseries/models/local/abstract_local_model.py +71 -74
  46. autogluon/timeseries/models/local/naive.py +13 -9
  47. autogluon/timeseries/models/local/npts.py +9 -2
  48. autogluon/timeseries/models/local/statsforecast.py +52 -36
  49. autogluon/timeseries/models/multi_window/multi_window_model.py +65 -45
  50. autogluon/timeseries/models/registry.py +64 -0
  51. autogluon/timeseries/models/toto/__init__.py +3 -0
  52. autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
  53. autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
  54. autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
  55. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
  56. autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
  57. autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
  58. autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
  59. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  60. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
  61. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
  62. autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
  63. autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
  64. autogluon/timeseries/models/toto/dataloader.py +108 -0
  65. autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
  66. autogluon/timeseries/models/toto/model.py +249 -0
  67. autogluon/timeseries/predictor.py +685 -297
  68. autogluon/timeseries/regressor.py +94 -44
  69. autogluon/timeseries/splitter.py +8 -32
  70. autogluon/timeseries/trainer/__init__.py +3 -0
  71. autogluon/timeseries/trainer/ensemble_composer.py +444 -0
  72. autogluon/timeseries/trainer/model_set_builder.py +256 -0
  73. autogluon/timeseries/trainer/prediction_cache.py +149 -0
  74. autogluon/timeseries/{trainer.py → trainer/trainer.py} +387 -390
  75. autogluon/timeseries/trainer/utils.py +17 -0
  76. autogluon/timeseries/transforms/__init__.py +2 -13
  77. autogluon/timeseries/transforms/covariate_scaler.py +34 -40
  78. autogluon/timeseries/transforms/target_scaler.py +37 -20
  79. autogluon/timeseries/utils/constants.py +10 -0
  80. autogluon/timeseries/utils/datetime/lags.py +3 -5
  81. autogluon/timeseries/utils/datetime/seasonality.py +1 -3
  82. autogluon/timeseries/utils/datetime/time_features.py +2 -2
  83. autogluon/timeseries/utils/features.py +70 -47
  84. autogluon/timeseries/utils/forecast.py +19 -14
  85. autogluon/timeseries/utils/timer.py +173 -0
  86. autogluon/timeseries/utils/warning_filters.py +4 -2
  87. autogluon/timeseries/version.py +1 -1
  88. autogluon.timeseries-1.4.1b20251215-py3.11-nspkg.pth +1 -0
  89. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/METADATA +49 -36
  90. autogluon_timeseries-1.4.1b20251215.dist-info/RECORD +103 -0
  91. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/WHEEL +1 -1
  92. autogluon/timeseries/configs/presets_configs.py +0 -79
  93. autogluon/timeseries/evaluator.py +0 -6
  94. autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -11
  95. autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
  96. autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -585
  97. autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -518
  98. autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -78
  99. autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
  100. autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  101. autogluon/timeseries/models/presets.py +0 -360
  102. autogluon.timeseries-1.2.1b20250224-py3.9-nspkg.pth +0 -1
  103. autogluon.timeseries-1.2.1b20250224.dist-info/RECORD +0 -68
  104. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/LICENSE +0 -0
  105. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/NOTICE +0 -0
  106. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/namespace_packages.txt +0 -0
  107. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/top_level.txt +0 -0
  108. {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/zip-safe +0 -0
@@ -0,0 +1,167 @@
1
+ import copy
2
+
3
+ import numpy as np
4
+
5
+ import autogluon.core as ag
6
+ from autogluon.core.models.greedy_ensemble.ensemble_selection import EnsembleSelection
7
+ from autogluon.timeseries import TimeSeriesDataFrame
8
+ from autogluon.timeseries.metrics import TimeSeriesScorer
9
+ from autogluon.timeseries.utils.datetime import get_seasonality
10
+
11
+
12
+ class TimeSeriesEnsembleSelection(EnsembleSelection):
13
+ def __init__(
14
+ self,
15
+ ensemble_size: int,
16
+ metric: TimeSeriesScorer,
17
+ problem_type: str = ag.constants.QUANTILE,
18
+ sorted_initialization: bool = False,
19
+ bagging: bool = False,
20
+ tie_breaker: str = "random",
21
+ random_state: np.random.RandomState | None = None,
22
+ prediction_length: int = 1,
23
+ target: str = "target",
24
+ **kwargs,
25
+ ):
26
+ super().__init__(
27
+ ensemble_size=ensemble_size,
28
+ metric=metric, # type: ignore
29
+ problem_type=problem_type,
30
+ sorted_initialization=sorted_initialization,
31
+ bagging=bagging,
32
+ tie_breaker=tie_breaker,
33
+ random_state=random_state,
34
+ **kwargs,
35
+ )
36
+ self.prediction_length = prediction_length
37
+ self.target = target
38
+ self.metric: TimeSeriesScorer
39
+
40
+ self.dummy_pred_per_window = []
41
+ self.scorer_per_window = []
42
+
43
+ self.dummy_pred_per_window: list[TimeSeriesDataFrame] | None
44
+ self.scorer_per_window: list[TimeSeriesScorer] | None
45
+ self.data_future_per_window: list[TimeSeriesDataFrame] | None
46
+
47
+ def fit( # type: ignore
48
+ self,
49
+ predictions: list[list[TimeSeriesDataFrame]],
50
+ labels: list[TimeSeriesDataFrame],
51
+ time_limit: float | None = None,
52
+ ):
53
+ return super().fit(
54
+ predictions=predictions, # type: ignore
55
+ labels=labels, # type: ignore
56
+ time_limit=time_limit,
57
+ )
58
+
59
+ def _fit( # type: ignore
60
+ self,
61
+ predictions: list[list[TimeSeriesDataFrame]],
62
+ labels: list[TimeSeriesDataFrame],
63
+ time_limit: float | None = None,
64
+ sample_weight: list[float] | None = None,
65
+ ):
66
+ # Stack predictions for each model into a 3d tensor of shape [num_val_windows, num_rows, num_cols]
67
+ stacked_predictions = [np.stack(preds) for preds in predictions]
68
+
69
+ self.dummy_pred_per_window = []
70
+ self.scorer_per_window = []
71
+ self.data_future_per_window = []
72
+
73
+ seasonal_period = self.metric.seasonal_period
74
+ if seasonal_period is None:
75
+ seasonal_period = get_seasonality(labels[0].freq)
76
+
77
+ for window_idx, data in enumerate(labels):
78
+ dummy_pred = copy.deepcopy(predictions[0][window_idx])
79
+ # This should never happen; sanity check to make sure that all predictions have the same index
80
+ assert all(dummy_pred.index.equals(pred[window_idx].index) for pred in predictions)
81
+ assert all(dummy_pred.columns.equals(pred[window_idx].columns) for pred in predictions)
82
+
83
+ self.dummy_pred_per_window.append(dummy_pred)
84
+
85
+ scorer = copy.deepcopy(self.metric)
86
+ # Split the observed time series once to avoid repeated computations inside the evaluator
87
+ data_past = data.slice_by_timestep(None, -self.prediction_length)
88
+ data_future = data.slice_by_timestep(-self.prediction_length, None)
89
+ scorer.save_past_metrics(data_past, target=self.target, seasonal_period=seasonal_period)
90
+ self.scorer_per_window.append(scorer)
91
+ self.data_future_per_window.append(data_future)
92
+
93
+ super()._fit(
94
+ predictions=stacked_predictions,
95
+ labels=data_future, # type: ignore
96
+ time_limit=time_limit,
97
+ )
98
+ self.dummy_pred_per_window = None
99
+ self.evaluator_per_window = None
100
+ self.data_future_per_window = None
101
+
102
+ def _calculate_regret( # type: ignore
103
+ self,
104
+ y_true,
105
+ y_pred_proba,
106
+ metric: TimeSeriesScorer,
107
+ sample_weight=None,
108
+ ):
109
+ # Compute average score across all validation windows
110
+ total_score = 0.0
111
+
112
+ assert self.data_future_per_window is not None
113
+ assert self.dummy_pred_per_window is not None
114
+ assert self.scorer_per_window is not None
115
+
116
+ for window_idx, data_future in enumerate(self.data_future_per_window):
117
+ dummy_pred = self.dummy_pred_per_window[window_idx]
118
+ dummy_pred[list(dummy_pred.columns)] = y_pred_proba[window_idx]
119
+ # We use scorer.compute_metric instead of scorer.score to avoid repeated calls to scorer.save_past_metrics
120
+ metric_value = self.scorer_per_window[window_idx].compute_metric(
121
+ data_future,
122
+ dummy_pred,
123
+ target=self.target,
124
+ )
125
+ total_score += metric.sign * metric_value
126
+ avg_score = total_score / len(self.data_future_per_window)
127
+ # score: higher is better, regret: lower is better, so we flip the sign
128
+ return -avg_score
129
+
130
+
131
+ def fit_time_series_ensemble_selection(
132
+ data_per_window: list[TimeSeriesDataFrame],
133
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
134
+ ensemble_size: int,
135
+ eval_metric: TimeSeriesScorer,
136
+ prediction_length: int = 1,
137
+ target: str = "target",
138
+ time_limit: float | None = None,
139
+ ) -> dict[str, float]:
140
+ """Fit ensemble selection for time series forecasting and return ensemble weights.
141
+
142
+ Parameters
143
+ ----------
144
+ data_per_window:
145
+ List of ground truth time series data for each validation window.
146
+ predictions_per_window:
147
+ Dictionary mapping model names to their predictions for each validation window.
148
+ ensemble_size:
149
+ Number of iterations of the ensemble selection algorithm.
150
+
151
+ Returns
152
+ -------
153
+ weights:
154
+ Dictionary mapping the model name to its weight in the ensemble.
155
+ """
156
+ ensemble_selection = TimeSeriesEnsembleSelection(
157
+ ensemble_size=ensemble_size,
158
+ metric=eval_metric,
159
+ prediction_length=prediction_length,
160
+ target=target,
161
+ )
162
+ ensemble_selection.fit(
163
+ predictions=list(predictions_per_window.values()),
164
+ labels=data_per_window,
165
+ time_limit=time_limit,
166
+ )
167
+ return {model: float(weight) for model, weight in zip(predictions_per_window.keys(), ensemble_selection.weights_)}
@@ -0,0 +1,172 @@
1
+ import logging
2
+ import pprint
3
+ import time
4
+ from typing import Any
5
+
6
+ import pandas as pd
7
+ from joblib import Parallel, delayed
8
+
9
+ from autogluon.timeseries import TimeSeriesDataFrame
10
+ from autogluon.timeseries.utils.constants import AG_DEFAULT_N_JOBS
11
+
12
+ from .abstract import AbstractTimeSeriesEnsembleModel
13
+ from .ensemble_selection import fit_time_series_ensemble_selection
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class PerItemGreedyEnsemble(AbstractTimeSeriesEnsembleModel):
19
+ """Per-item greedy ensemble that fits separate weighted ensembles for each individual time series.
20
+
21
+ This ensemble applies the greedy Ensemble Selection algorithm by Caruana et al. [Car2004]_ independently
22
+ to each time series in the dataset, allowing for customized model combinations that adapt to the
23
+ specific characteristics of individual series. Each time series gets its own optimal ensemble weights
24
+ based on predictions for that particular series. If items not seen during training are provided at prediction
25
+ time, average model weight across the training items will be used for their predictions.
26
+
27
+ The per-item approach is particularly effective for datasets with heterogeneous time series that
28
+ exhibit different patterns, seasonalities, or noise characteristics.
29
+
30
+ The algorithm uses parallel processing to efficiently fit ensembles across all time series.
31
+
32
+ Other Parameters
33
+ ----------------
34
+ ensemble_size : int, default = 100
35
+ Number of models (with replacement) to include in the ensemble.
36
+ n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
37
+ Number of CPU cores used to fit the ensembles in parallel.
38
+
39
+ References
40
+ ----------
41
+ .. [Car2004] Caruana, Rich, et al. "Ensemble selection from libraries of models."
42
+ Proceedings of the twenty-first international conference on Machine learning. 2004.
43
+ """
44
+
45
+ def __init__(self, name: str | None = None, **kwargs):
46
+ if name is None:
47
+ name = "PerItemWeightedEnsemble"
48
+ super().__init__(name=name, **kwargs)
49
+ self.weights_df: pd.DataFrame
50
+ self.average_weight: pd.Series
51
+
52
+ @property
53
+ def model_names(self) -> list[str]:
54
+ return list(self.weights_df.columns)
55
+
56
+ def _get_default_hyperparameters(self) -> dict[str, Any]:
57
+ return {"ensemble_size": 100, "n_jobs": AG_DEFAULT_N_JOBS}
58
+
59
+ def _fit(
60
+ self,
61
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
62
+ data_per_window: list[TimeSeriesDataFrame],
63
+ model_scores: dict[str, float] | None = None,
64
+ time_limit: float | None = None,
65
+ ) -> None:
66
+ model_names = list(predictions_per_window.keys())
67
+ item_ids = data_per_window[0].item_ids
68
+ n_jobs = min(self.get_hyperparameter("n_jobs"), len(item_ids))
69
+
70
+ predictions_per_item = self._split_predictions_per_item(predictions_per_window)
71
+ data_per_item = self._split_data_per_item(data_per_window)
72
+
73
+ ensemble_selection_kwargs = dict(
74
+ ensemble_size=self.get_hyperparameter("ensemble_size"),
75
+ eval_metric=self.eval_metric,
76
+ prediction_length=self.prediction_length,
77
+ target=self.target,
78
+ )
79
+
80
+ time_limit_per_item = None if time_limit is None else time_limit * n_jobs / len(item_ids)
81
+ end_time = None if time_limit is None else time.time() + time_limit
82
+
83
+ # Fit ensemble for each item in parallel
84
+ executor = Parallel(n_jobs=n_jobs)
85
+ weights_per_item = executor(
86
+ delayed(self._fit_item_ensemble)(
87
+ data_per_item[item_id],
88
+ predictions_per_item[item_id],
89
+ time_limit_per_item=time_limit_per_item,
90
+ end_time=end_time,
91
+ **ensemble_selection_kwargs,
92
+ )
93
+ for item_id in item_ids
94
+ )
95
+ self.weights_df = pd.DataFrame(weights_per_item, index=item_ids, columns=model_names) # type: ignore
96
+ self.average_weight = self.weights_df.mean(axis=0)
97
+
98
+ # Drop models with zero average weight
99
+ if (self.average_weight == 0).any():
100
+ models_to_keep = self.average_weight[self.average_weight > 0].index
101
+ self.weights_df = self.weights_df[models_to_keep]
102
+ self.average_weight = self.average_weight[models_to_keep]
103
+
104
+ weights_for_printing = {model: round(float(weight), 2) for model, weight in self.average_weight.items()}
105
+ logger.info(f"\tAverage ensemble weights: {pprint.pformat(weights_for_printing, width=200)}")
106
+
107
+ def _split_predictions_per_item(
108
+ self, predictions_per_window: dict[str, list[TimeSeriesDataFrame]]
109
+ ) -> dict[str, dict[str, list[TimeSeriesDataFrame]]]:
110
+ """Build a dictionary mapping item_id -> dict[model_name, list[TimeSeriesDataFrame]]."""
111
+ item_ids = list(predictions_per_window.values())[0][0].item_ids
112
+
113
+ predictions_per_item = {}
114
+ for i, item_id in enumerate(item_ids):
115
+ item_predictions = {}
116
+ for model_name, preds_per_window in predictions_per_window.items():
117
+ item_preds_per_window = [
118
+ pred.iloc[i * self.prediction_length : (i + 1) * self.prediction_length]
119
+ for pred in preds_per_window
120
+ ]
121
+ item_predictions[model_name] = item_preds_per_window
122
+ predictions_per_item[item_id] = item_predictions
123
+ return predictions_per_item
124
+
125
+ def _split_data_per_item(self, data_per_window: list[TimeSeriesDataFrame]) -> dict[str, list[TimeSeriesDataFrame]]:
126
+ """Build a dictionary mapping item_id -> ground truth values across all windows."""
127
+ item_ids = data_per_window[0].item_ids
128
+ data_per_item = {item_id: [] for item_id in item_ids}
129
+
130
+ for data in data_per_window:
131
+ indptr = data.get_indptr()
132
+ for item_idx, item_id in enumerate(item_ids):
133
+ new_slice = data.iloc[indptr[item_idx] : indptr[item_idx + 1]]
134
+ data_per_item[item_id].append(new_slice)
135
+ return data_per_item
136
+
137
+ @staticmethod
138
+ def _fit_item_ensemble(
139
+ data_per_window: list[TimeSeriesDataFrame],
140
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
141
+ time_limit_per_item: float | None = None,
142
+ end_time: float | None = None,
143
+ **ensemble_selection_kwargs,
144
+ ) -> dict[str, float]:
145
+ """Fit ensemble for a single item."""
146
+ if end_time is not None:
147
+ assert time_limit_per_item is not None
148
+ time_left = end_time - time.time()
149
+ time_limit_per_item = min(time_limit_per_item, time_left)
150
+ return fit_time_series_ensemble_selection(
151
+ data_per_window, predictions_per_window, time_limit=time_limit_per_item, **ensemble_selection_kwargs
152
+ )
153
+
154
+ def _predict(self, data: dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
155
+ assert all(model in data for model in self.weights_df.columns)
156
+ item_ids = list(data.values())[0].item_ids
157
+ unseen_item_ids = set(item_ids) - set(self.weights_df.index)
158
+ if unseen_item_ids:
159
+ logger.debug(f"Using average weights for {len(unseen_item_ids)} unseen items")
160
+ weights = self.weights_df.reindex(item_ids).fillna(self.average_weight)
161
+
162
+ result = None
163
+ for model_name in self.weights_df.columns:
164
+ model_pred = data[model_name]
165
+ model_weights = weights[model_name].to_numpy().repeat(self.prediction_length)
166
+ weighted_pred = model_pred.to_data_frame().multiply(model_weights, axis=0)
167
+ result = weighted_pred if result is None else result + weighted_pred
168
+
169
+ return TimeSeriesDataFrame(result) # type: ignore
170
+
171
+ def remap_base_models(self, model_refit_map: dict[str, str]) -> None:
172
+ self.weights_df.rename(columns=model_refit_map, inplace=True)
@@ -0,0 +1,8 @@
1
+ from .basic import PerformanceWeightedEnsemble, SimpleAverageEnsemble
2
+ from .greedy import GreedyEnsemble
3
+
4
+ __all__ = [
5
+ "SimpleAverageEnsemble",
6
+ "PerformanceWeightedEnsemble",
7
+ "GreedyEnsemble",
8
+ ]
@@ -0,0 +1,45 @@
1
+ import functools
2
+ from abc import ABC
3
+
4
+ import numpy as np
5
+
6
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
7
+
8
+ from ..abstract import AbstractTimeSeriesEnsembleModel
9
+
10
+
11
+ class AbstractWeightedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
12
+ """Abstract base class for weighted ensemble models that assign global weights to base models.
13
+
14
+ Weighted ensembles combine predictions from multiple base models using learned or computed weights,
15
+ where each base model receives a single global weight applied across all time series and forecast
16
+ horizons. The final prediction is computed as a weighted linear combination of base model forecasts.
17
+ """
18
+
19
+ def __init__(self, name: str | None = None, **kwargs):
20
+ super().__init__(name=name, **kwargs)
21
+ self.model_to_weight: dict[str, float] = {}
22
+
23
+ @property
24
+ def model_names(self) -> list[str]:
25
+ return list(self.model_to_weight.keys())
26
+
27
+ @property
28
+ def model_weights(self) -> np.ndarray:
29
+ return np.array(list(self.model_to_weight.values()), dtype=np.float64)
30
+
31
+ def _predict(self, data: dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
32
+ weighted_predictions = [data[model_name] * weight for model_name, weight in self.model_to_weight.items()]
33
+ return functools.reduce(lambda x, y: x + y, weighted_predictions)
34
+
35
+ def get_info(self) -> dict:
36
+ info = super().get_info()
37
+ info["model_weights"] = self.model_to_weight.copy()
38
+ return info
39
+
40
+ def remap_base_models(self, model_refit_map: dict[str, str]) -> None:
41
+ updated_weights = {}
42
+ for model, weight in self.model_to_weight.items():
43
+ model_full_name = model_refit_map.get(model, model)
44
+ updated_weights[model_full_name] = weight
45
+ self.model_to_weight = updated_weights
@@ -0,0 +1,91 @@
1
+ from typing import Any
2
+
3
+ import numpy as np
4
+
5
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
6
+
7
+ from .abstract import AbstractWeightedTimeSeriesEnsembleModel
8
+
9
+
10
+ class SimpleAverageEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
11
+ """Simple ensemble that assigns equal weights to all base models for uniform averaging.
12
+
13
+ This ensemble computes predictions as the arithmetic mean of all base model forecasts,
14
+ giving each model equal influence. Simple averaging is robust and often performs well when base
15
+ models have similar accuracy levels or when validation data is insufficient to reliably
16
+ estimate performance differences.
17
+ """
18
+
19
+ def _fit(
20
+ self,
21
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
22
+ data_per_window: list[TimeSeriesDataFrame],
23
+ model_scores: dict[str, float] | None = None,
24
+ time_limit: float | None = None,
25
+ ):
26
+ self.model_to_weight = {}
27
+ num_models = len(predictions_per_window)
28
+ for model_name in predictions_per_window.keys():
29
+ self.model_to_weight[model_name] = 1.0 / num_models
30
+
31
+
32
+ class PerformanceWeightedEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
33
+ """Performance-based weighted ensemble that assigns weights proportional to validation scores.
34
+
35
+ This ensemble computes model weights based on their validation performance, giving higher
36
+ weights to better-performing models. The weighting scheme transforms validation scores
37
+ (higher is better) into ensemble weights using configurable transformation functions.
38
+
39
+ .. warning::
40
+ This ensemble method is deprecated and may be removed in a future version.
41
+
42
+ Other Parameters
43
+ ----------------
44
+ weight_scheme : Literal["sq", "inv", "sqrt"], default = "sqrt"
45
+ Method used to compute the weights as a function of the validation scores.
46
+
47
+ - "sqrt" computes weights in proportion to ``sqrt(1 / S)``. This is the default.
48
+ - "inv" computes weights in proportion to ``(1 / S)``.
49
+ - "sq" computes the weights in proportion to ``(1 / S)^2`` as outlined in [PC2020]_.
50
+
51
+ References
52
+ ----------
53
+ .. [PC2020] Pawlikowski, Maciej, and Agata Chorowska.
54
+ "Weighted ensemble of statistical models." International Journal of Forecasting
55
+ 36.1 (2020): 93-97.
56
+ """
57
+
58
+ def _get_default_hyperparameters(self) -> dict[str, Any]:
59
+ return {"weight_scheme": "sqrt"}
60
+
61
+ def _fit(
62
+ self,
63
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
64
+ data_per_window: list[TimeSeriesDataFrame],
65
+ model_scores: dict[str, float] | None = None,
66
+ time_limit: float | None = None,
67
+ ):
68
+ assert model_scores is not None
69
+
70
+ weight_scheme = self.get_hyperparameter("weight_scheme")
71
+
72
+ # drop NaNs
73
+ model_scores = {k: v for k, v in model_scores.items() if np.isfinite(v)}
74
+ assert len(model_scores) > 0, (
75
+ "All models have NaN scores. At least one model must score successfully to fit an ensemble"
76
+ )
77
+ assert all(s <= 0 for s in model_scores.values()), (
78
+ "All model scores must be negative, in higher-is-better format."
79
+ )
80
+
81
+ score_transform = {
82
+ "sq": lambda x: np.square(np.reciprocal(x)),
83
+ "inv": lambda x: np.reciprocal(x),
84
+ "sqrt": lambda x: np.sqrt(np.reciprocal(x)),
85
+ }[weight_scheme]
86
+
87
+ self.model_to_weight = {
88
+ model_name: score_transform(-model_scores[model_name] + 1e-5) for model_name in model_scores.keys()
89
+ }
90
+ total_weight = sum(self.model_to_weight.values())
91
+ self.model_to_weight = {k: v / total_weight for k, v in self.model_to_weight.items()}
@@ -0,0 +1,62 @@
1
+ import logging
2
+ import pprint
3
+ from typing import Any
4
+
5
+ from autogluon.timeseries import TimeSeriesDataFrame
6
+
7
+ from ..ensemble_selection import fit_time_series_ensemble_selection
8
+ from .abstract import AbstractWeightedTimeSeriesEnsembleModel
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class GreedyEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
14
+ """Greedy ensemble selection algorithm that iteratively builds an ensemble by selecting models with
15
+ replacement.
16
+
17
+ This class implements the Ensemble Selection algorithm by Caruana et al. [Car2004]_, which starts
18
+ with an empty ensemble and repeatedly adds the model that most improves the ensemble's validation
19
+ performance. Models can be selected multiple times, allowing the algorithm to assign higher effective
20
+ weights to better-performing models.
21
+
22
+ Other Parameters
23
+ ----------------
24
+ ensemble_size : int, default = 100
25
+ Number of models (with replacement) to include in the ensemble.
26
+
27
+ References
28
+ ----------
29
+ .. [Car2004] Caruana, Rich, et al. "Ensemble selection from libraries of models."
30
+ Proceedings of the twenty-first international conference on Machine learning. 2004.
31
+ """
32
+
33
+ def __init__(self, name: str | None = None, **kwargs):
34
+ if name is None:
35
+ # FIXME: the name here is kept for backward compatibility. it will be called
36
+ # GreedyEnsemble in v1.4 once ensemble choices are exposed
37
+ name = "WeightedEnsemble"
38
+ super().__init__(name=name, **kwargs)
39
+
40
+ def _get_default_hyperparameters(self) -> dict[str, Any]:
41
+ return {"ensemble_size": 100}
42
+
43
+ def _fit(
44
+ self,
45
+ predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
46
+ data_per_window: list[TimeSeriesDataFrame],
47
+ model_scores: dict[str, float] | None = None,
48
+ time_limit: float | None = None,
49
+ ):
50
+ model_to_weight = fit_time_series_ensemble_selection(
51
+ data_per_window=data_per_window,
52
+ predictions_per_window=predictions_per_window,
53
+ ensemble_size=self.get_hyperparameter("ensemble_size"),
54
+ eval_metric=self.eval_metric,
55
+ prediction_length=self.prediction_length,
56
+ target=self.target,
57
+ time_limit=time_limit,
58
+ )
59
+ self.model_to_weight = {model: weight for model, weight in model_to_weight.items() if weight > 0}
60
+
61
+ weights_for_printing = {model: round(float(weight), 2) for model, weight in self.model_to_weight.items()}
62
+ logger.info(f"\tEnsemble weights: {pprint.pformat(weights_for_printing, width=200)}")
@@ -1,4 +1,4 @@
1
- from .torch.models import (
1
+ from .models import (
2
2
  DeepARModel,
3
3
  DLinearModel,
4
4
  PatchTSTModel,