autogluon.timeseries 1.0.1b20240304__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/__init__.py +3 -2
- autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
- autogluon/timeseries/configs/predictor_presets.py +84 -0
- autogluon/timeseries/dataset/ts_dataframe.py +339 -186
- autogluon/timeseries/learner.py +192 -60
- autogluon/timeseries/metrics/__init__.py +55 -11
- autogluon/timeseries/metrics/abstract.py +96 -25
- autogluon/timeseries/metrics/point.py +186 -39
- autogluon/timeseries/metrics/quantile.py +47 -20
- autogluon/timeseries/metrics/utils.py +6 -6
- autogluon/timeseries/models/__init__.py +13 -7
- autogluon/timeseries/models/abstract/__init__.py +2 -2
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +533 -273
- autogluon/timeseries/models/abstract/model_trial.py +10 -10
- autogluon/timeseries/models/abstract/tunable.py +189 -0
- autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +369 -215
- autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
- autogluon/timeseries/models/autogluon_tabular/transforms.py +67 -0
- autogluon/timeseries/models/autogluon_tabular/utils.py +3 -51
- autogluon/timeseries/models/chronos/__init__.py +4 -0
- autogluon/timeseries/models/chronos/chronos2.py +361 -0
- autogluon/timeseries/models/chronos/model.py +738 -0
- autogluon/timeseries/models/chronos/utils.py +369 -0
- autogluon/timeseries/models/ensemble/__init__.py +35 -2
- autogluon/timeseries/models/ensemble/{abstract_timeseries_ensemble.py → abstract.py} +50 -26
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +236 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +73 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +167 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
- autogluon/timeseries/models/ensemble/per_item_greedy.py +162 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +40 -0
- autogluon/timeseries/models/ensemble/weighted/basic.py +78 -0
- autogluon/timeseries/models/ensemble/weighted/greedy.py +57 -0
- autogluon/timeseries/models/gluonts/__init__.py +3 -1
- autogluon/timeseries/models/gluonts/abstract.py +583 -0
- autogluon/timeseries/models/gluonts/dataset.py +109 -0
- autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +185 -44
- autogluon/timeseries/models/local/__init__.py +1 -10
- autogluon/timeseries/models/local/abstract_local_model.py +150 -97
- autogluon/timeseries/models/local/naive.py +31 -23
- autogluon/timeseries/models/local/npts.py +6 -2
- autogluon/timeseries/models/local/statsforecast.py +99 -112
- autogluon/timeseries/models/multi_window/multi_window_model.py +99 -40
- autogluon/timeseries/models/registry.py +64 -0
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +118 -0
- autogluon/timeseries/models/toto/model.py +236 -0
- autogluon/timeseries/predictor.py +826 -305
- autogluon/timeseries/regressor.py +253 -0
- autogluon/timeseries/splitter.py +10 -31
- autogluon/timeseries/trainer/__init__.py +2 -3
- autogluon/timeseries/trainer/ensemble_composer.py +439 -0
- autogluon/timeseries/trainer/model_set_builder.py +256 -0
- autogluon/timeseries/trainer/prediction_cache.py +149 -0
- autogluon/timeseries/trainer/trainer.py +1298 -0
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/__init__.py +2 -0
- autogluon/timeseries/transforms/covariate_scaler.py +164 -0
- autogluon/timeseries/transforms/target_scaler.py +149 -0
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/base.py +38 -20
- autogluon/timeseries/utils/datetime/lags.py +18 -16
- autogluon/timeseries/utils/datetime/seasonality.py +14 -14
- autogluon/timeseries/utils/datetime/time_features.py +17 -14
- autogluon/timeseries/utils/features.py +317 -53
- autogluon/timeseries/utils/forecast.py +31 -17
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/utils/warning_filters.py +44 -6
- autogluon/timeseries/version.py +2 -1
- autogluon.timeseries-1.4.1b20251210-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/METADATA +71 -47
- autogluon_timeseries-1.4.1b20251210.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/WHEEL +1 -1
- autogluon/timeseries/configs/presets_configs.py +0 -11
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
- autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -550
- autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- autogluon/timeseries/models/presets.py +0 -325
- autogluon/timeseries/trainer/abstract_trainer.py +0 -1144
- autogluon/timeseries/trainer/auto_trainer.py +0 -74
- autogluon.timeseries-1.0.1b20240304-py3.8-nspkg.pth +0 -1
- autogluon.timeseries-1.0.1b20240304.dist-info/RECORD +0 -58
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/zip-safe +0 -0
autogluon/timeseries/learner.py
CHANGED
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import reprlib
|
|
3
3
|
import time
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Literal, Type
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
8
|
from autogluon.core.learner import AbstractLearner
|
|
9
|
-
from autogluon.timeseries.dataset
|
|
9
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
10
10
|
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
|
11
11
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
12
|
-
from autogluon.timeseries.
|
|
13
|
-
from autogluon.timeseries.trainer import AbstractTimeSeriesTrainer, AutoTimeSeriesTrainer
|
|
12
|
+
from autogluon.timeseries.trainer import TimeSeriesTrainer
|
|
14
13
|
from autogluon.timeseries.utils.features import TimeSeriesFeatureGenerator
|
|
15
|
-
from autogluon.timeseries.utils.forecast import
|
|
14
|
+
from autogluon.timeseries.utils.forecast import make_future_data_frame
|
|
16
15
|
|
|
17
16
|
logger = logging.getLogger(__name__)
|
|
18
17
|
|
|
@@ -26,85 +25,77 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
26
25
|
self,
|
|
27
26
|
path_context: str,
|
|
28
27
|
target: str = "target",
|
|
29
|
-
known_covariates_names:
|
|
30
|
-
trainer_type: Type[
|
|
31
|
-
eval_metric:
|
|
32
|
-
eval_metric_seasonal_period: Optional[int] = None,
|
|
28
|
+
known_covariates_names: list[str] | None = None,
|
|
29
|
+
trainer_type: Type[TimeSeriesTrainer] = TimeSeriesTrainer,
|
|
30
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
33
31
|
prediction_length: int = 1,
|
|
34
32
|
cache_predictions: bool = True,
|
|
33
|
+
ensemble_model_type: Type | None = None,
|
|
35
34
|
**kwargs,
|
|
36
35
|
):
|
|
37
36
|
super().__init__(path_context=path_context)
|
|
38
|
-
self.eval_metric
|
|
39
|
-
self.eval_metric_seasonal_period = eval_metric_seasonal_period
|
|
37
|
+
self.eval_metric = check_get_evaluation_metric(eval_metric, prediction_length=prediction_length)
|
|
40
38
|
self.trainer_type = trainer_type
|
|
41
39
|
self.target = target
|
|
42
40
|
self.known_covariates_names = [] if known_covariates_names is None else known_covariates_names
|
|
43
41
|
self.prediction_length = prediction_length
|
|
44
42
|
self.quantile_levels = kwargs.get("quantile_levels", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
|
|
45
43
|
self.cache_predictions = cache_predictions
|
|
44
|
+
self.freq: str | None = None
|
|
45
|
+
self.ensemble_model_type = ensemble_model_type
|
|
46
46
|
|
|
47
47
|
self.feature_generator = TimeSeriesFeatureGenerator(
|
|
48
48
|
target=self.target, known_covariates_names=self.known_covariates_names
|
|
49
49
|
)
|
|
50
50
|
|
|
51
|
-
def load_trainer(self) ->
|
|
51
|
+
def load_trainer(self) -> TimeSeriesTrainer: # type: ignore
|
|
52
52
|
"""Return the trainer object corresponding to the learner."""
|
|
53
|
-
return super().load_trainer() #
|
|
53
|
+
return super().load_trainer() # type: ignore
|
|
54
54
|
|
|
55
55
|
def fit(
|
|
56
56
|
self,
|
|
57
57
|
train_data: TimeSeriesDataFrame,
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
hyperparameter_tune_kwargs:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
hyperparameters=hyperparameters,
|
|
67
|
-
hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
|
|
68
|
-
**kwargs,
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
def _fit(
|
|
72
|
-
self,
|
|
73
|
-
train_data: TimeSeriesDataFrame,
|
|
74
|
-
val_data: Optional[TimeSeriesDataFrame] = None,
|
|
75
|
-
hyperparameters: Union[str, Dict] = None,
|
|
76
|
-
hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
|
|
77
|
-
time_limit: Optional[int] = None,
|
|
78
|
-
val_splitter: Optional[AbstractWindowSplitter] = None,
|
|
79
|
-
refit_every_n_windows: Optional[int] = 1,
|
|
80
|
-
random_seed: Optional[int] = None,
|
|
58
|
+
hyperparameters: str | dict,
|
|
59
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
60
|
+
hyperparameter_tune_kwargs: str | dict | None = None,
|
|
61
|
+
time_limit: float | None = None,
|
|
62
|
+
num_val_windows: tuple[int, ...] = (1,),
|
|
63
|
+
val_step_size: int | None = None,
|
|
64
|
+
refit_every_n_windows: int | None = 1,
|
|
65
|
+
random_seed: int | None = None,
|
|
81
66
|
**kwargs,
|
|
82
67
|
) -> None:
|
|
83
68
|
self._time_limit = time_limit
|
|
84
69
|
time_start = time.time()
|
|
85
70
|
|
|
86
|
-
train_data = self.feature_generator.fit_transform(train_data
|
|
71
|
+
train_data = self.feature_generator.fit_transform(train_data)
|
|
87
72
|
if val_data is not None:
|
|
88
73
|
val_data = self.feature_generator.transform(val_data, data_frame_name="tuning_data")
|
|
89
74
|
|
|
75
|
+
self.freq = train_data.freq
|
|
76
|
+
|
|
90
77
|
trainer_init_kwargs = kwargs.copy()
|
|
91
78
|
trainer_init_kwargs.update(
|
|
92
79
|
dict(
|
|
93
80
|
path=self.model_context,
|
|
94
81
|
prediction_length=self.prediction_length,
|
|
95
82
|
eval_metric=self.eval_metric,
|
|
96
|
-
eval_metric_seasonal_period=self.eval_metric_seasonal_period,
|
|
97
83
|
target=self.target,
|
|
98
84
|
quantile_levels=self.quantile_levels,
|
|
99
85
|
verbosity=kwargs.get("verbosity", 2),
|
|
86
|
+
skip_model_selection=kwargs.get("skip_model_selection", False),
|
|
100
87
|
enable_ensemble=kwargs.get("enable_ensemble", True),
|
|
101
|
-
|
|
102
|
-
|
|
88
|
+
covariate_metadata=self.feature_generator.covariate_metadata,
|
|
89
|
+
num_val_windows=num_val_windows,
|
|
90
|
+
val_step_size=val_step_size,
|
|
103
91
|
refit_every_n_windows=refit_every_n_windows,
|
|
104
92
|
cache_predictions=self.cache_predictions,
|
|
93
|
+
ensemble_model_type=self.ensemble_model_type,
|
|
105
94
|
)
|
|
106
95
|
)
|
|
107
|
-
|
|
96
|
+
|
|
97
|
+
assert issubclass(self.trainer_type, TimeSeriesTrainer)
|
|
98
|
+
self.trainer: TimeSeriesTrainer | None = self.trainer_type(**trainer_init_kwargs)
|
|
108
99
|
self.trainer_path = self.trainer.path
|
|
109
100
|
self.save()
|
|
110
101
|
|
|
@@ -131,9 +122,9 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
131
122
|
|
|
132
123
|
def _align_covariates_with_forecast_index(
|
|
133
124
|
self,
|
|
134
|
-
known_covariates:
|
|
125
|
+
known_covariates: TimeSeriesDataFrame | None,
|
|
135
126
|
data: TimeSeriesDataFrame,
|
|
136
|
-
) ->
|
|
127
|
+
) -> TimeSeriesDataFrame | None:
|
|
137
128
|
"""Select the relevant item_ids and timestamps from the known_covariates dataframe.
|
|
138
129
|
|
|
139
130
|
If some of the item_ids or timestamps are missing, an exception is raised.
|
|
@@ -144,6 +135,7 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
144
135
|
raise ValueError(
|
|
145
136
|
f"known_covariates {self.known_covariates_names} for the forecast horizon should be provided at prediction time."
|
|
146
137
|
)
|
|
138
|
+
assert known_covariates is not None
|
|
147
139
|
|
|
148
140
|
if self.target in known_covariates.columns:
|
|
149
141
|
known_covariates = known_covariates.drop(self.target, axis=1)
|
|
@@ -154,23 +146,27 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
154
146
|
f"known_covariates are missing information for the following item_ids: {reprlib.repr(missing_item_ids.to_list())}."
|
|
155
147
|
)
|
|
156
148
|
|
|
157
|
-
forecast_index =
|
|
149
|
+
forecast_index = pd.MultiIndex.from_frame(
|
|
150
|
+
make_future_data_frame(data, prediction_length=self.prediction_length, freq=self.freq)
|
|
151
|
+
)
|
|
158
152
|
try:
|
|
159
|
-
known_covariates = known_covariates.loc[forecast_index]
|
|
153
|
+
known_covariates = known_covariates.loc[forecast_index] # type: ignore
|
|
160
154
|
except KeyError:
|
|
161
155
|
raise ValueError(
|
|
162
|
-
|
|
163
|
-
"
|
|
156
|
+
"`known_covariates` should include the `item_id` and `timestamp` values covering the forecast horizon "
|
|
157
|
+
"(i.e., the next `prediction_length` time steps following the end of each time series in the input "
|
|
158
|
+
"data). Use `TimeSeriesPredictor.make_future_data_frame` to generate the required `item_id` and "
|
|
159
|
+
"`timestamp` combinations for the `known_covariates`."
|
|
164
160
|
)
|
|
165
161
|
return known_covariates
|
|
166
162
|
|
|
167
163
|
def predict(
|
|
168
164
|
self,
|
|
169
165
|
data: TimeSeriesDataFrame,
|
|
170
|
-
known_covariates:
|
|
171
|
-
model:
|
|
166
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
167
|
+
model: str | AbstractTimeSeriesModel | None = None,
|
|
172
168
|
use_cache: bool = True,
|
|
173
|
-
random_seed:
|
|
169
|
+
random_seed: int | None = None,
|
|
174
170
|
**kwargs,
|
|
175
171
|
) -> TimeSeriesDataFrame:
|
|
176
172
|
data = self.feature_generator.transform(data)
|
|
@@ -188,8 +184,8 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
188
184
|
def score(
|
|
189
185
|
self,
|
|
190
186
|
data: TimeSeriesDataFrame,
|
|
191
|
-
model: AbstractTimeSeriesModel = None,
|
|
192
|
-
metric:
|
|
187
|
+
model: str | AbstractTimeSeriesModel | None = None,
|
|
188
|
+
metric: str | TimeSeriesScorer | None = None,
|
|
193
189
|
use_cache: bool = True,
|
|
194
190
|
) -> float:
|
|
195
191
|
data = self.feature_generator.transform(data)
|
|
@@ -197,20 +193,97 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
197
193
|
|
|
198
194
|
def evaluate(
|
|
199
195
|
self,
|
|
200
|
-
data:
|
|
201
|
-
model:
|
|
202
|
-
metrics:
|
|
196
|
+
data: TimeSeriesDataFrame,
|
|
197
|
+
model: str | None = None,
|
|
198
|
+
metrics: str | TimeSeriesScorer | list[str | TimeSeriesScorer] | None = None,
|
|
203
199
|
use_cache: bool = True,
|
|
204
|
-
) ->
|
|
200
|
+
) -> dict[str, float]:
|
|
205
201
|
data = self.feature_generator.transform(data)
|
|
206
202
|
return self.load_trainer().evaluate(data=data, model=model, metrics=metrics, use_cache=use_cache)
|
|
207
203
|
|
|
208
|
-
def
|
|
204
|
+
def get_feature_importance(
|
|
205
|
+
self,
|
|
206
|
+
data: TimeSeriesDataFrame | None = None,
|
|
207
|
+
model: str | None = None,
|
|
208
|
+
metric: str | TimeSeriesScorer | None = None,
|
|
209
|
+
features: list[str] | None = None,
|
|
210
|
+
time_limit: float | None = None,
|
|
211
|
+
method: Literal["naive", "permutation"] = "permutation",
|
|
212
|
+
subsample_size: int = 50,
|
|
213
|
+
num_iterations: int | None = None,
|
|
214
|
+
random_seed: int | None = None,
|
|
215
|
+
relative_scores: bool = False,
|
|
216
|
+
include_confidence_band: bool = True,
|
|
217
|
+
confidence_level: float = 0.99,
|
|
218
|
+
) -> pd.DataFrame:
|
|
219
|
+
trainer = self.load_trainer()
|
|
220
|
+
if data is None:
|
|
221
|
+
data = trainer.load_val_data() or trainer.load_train_data()
|
|
222
|
+
|
|
223
|
+
# if features are provided in the dataframe, check that they are valid features in the covariate metadata
|
|
224
|
+
provided_static_columns = [] if data.static_features is None else data.static_features.columns
|
|
225
|
+
unused_features = [
|
|
226
|
+
f
|
|
227
|
+
for f in set(provided_static_columns).union(set(data.columns) - {self.target})
|
|
228
|
+
if f not in self.feature_generator.covariate_metadata.all_features
|
|
229
|
+
]
|
|
230
|
+
|
|
231
|
+
if features is None:
|
|
232
|
+
features = self.feature_generator.covariate_metadata.all_features
|
|
233
|
+
else:
|
|
234
|
+
if len(features) == 0:
|
|
235
|
+
raise ValueError(
|
|
236
|
+
"No features provided to compute feature importance. At least some valid features should be provided."
|
|
237
|
+
)
|
|
238
|
+
for fn in features:
|
|
239
|
+
if fn not in self.feature_generator.covariate_metadata.all_features and fn not in unused_features:
|
|
240
|
+
raise ValueError(f"Feature {fn} not found in covariate metadata or the dataset.")
|
|
241
|
+
|
|
242
|
+
if len(set(features)) < len(features):
|
|
243
|
+
raise ValueError(
|
|
244
|
+
"Duplicate feature names provided to compute feature importance. "
|
|
245
|
+
"Please provide unique feature names across both static features and covariates."
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
data = self.feature_generator.transform(data)
|
|
249
|
+
|
|
250
|
+
importance_df = trainer.get_feature_importance(
|
|
251
|
+
data=data,
|
|
252
|
+
features=features,
|
|
253
|
+
model=model,
|
|
254
|
+
metric=metric,
|
|
255
|
+
time_limit=time_limit,
|
|
256
|
+
method=method,
|
|
257
|
+
subsample_size=subsample_size,
|
|
258
|
+
num_iterations=num_iterations,
|
|
259
|
+
random_seed=random_seed,
|
|
260
|
+
relative_scores=relative_scores,
|
|
261
|
+
include_confidence_band=include_confidence_band,
|
|
262
|
+
confidence_level=confidence_level,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
for feature in set(features).union(unused_features):
|
|
266
|
+
if feature not in importance_df.index:
|
|
267
|
+
importance_df.loc[feature] = (
|
|
268
|
+
[0, 0, 0] if not include_confidence_band else [0, 0, 0, float("nan"), float("nan")]
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
return importance_df
|
|
272
|
+
|
|
273
|
+
def leaderboard(
|
|
274
|
+
self,
|
|
275
|
+
data: TimeSeriesDataFrame | None = None,
|
|
276
|
+
extra_info: bool = False,
|
|
277
|
+
extra_metrics: list[str | TimeSeriesScorer] | None = None,
|
|
278
|
+
use_cache: bool = True,
|
|
279
|
+
) -> pd.DataFrame:
|
|
209
280
|
if data is not None:
|
|
210
281
|
data = self.feature_generator.transform(data)
|
|
211
|
-
return self.load_trainer().leaderboard(
|
|
282
|
+
return self.load_trainer().leaderboard(
|
|
283
|
+
data, extra_info=extra_info, extra_metrics=extra_metrics, use_cache=use_cache
|
|
284
|
+
)
|
|
212
285
|
|
|
213
|
-
def get_info(self, include_model_info: bool = False, **kwargs) ->
|
|
286
|
+
def get_info(self, include_model_info: bool = False, **kwargs) -> dict[str, Any]:
|
|
214
287
|
learner_info = super().get_info(include_model_info=include_model_info)
|
|
215
288
|
trainer = self.load_trainer()
|
|
216
289
|
trainer_info = trainer.get_info(include_model_info=include_model_info)
|
|
@@ -227,5 +300,64 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
227
300
|
learner_info.pop("random_state", None)
|
|
228
301
|
return learner_info
|
|
229
302
|
|
|
230
|
-
def
|
|
303
|
+
def persist_trainer(
|
|
304
|
+
self, models: Literal["all", "best"] | list[str] = "all", with_ancestors: bool = False
|
|
305
|
+
) -> list[str]:
|
|
306
|
+
"""Loads models and trainer in memory so that they don't have to be
|
|
307
|
+
loaded during predictions
|
|
308
|
+
|
|
309
|
+
Returns
|
|
310
|
+
-------
|
|
311
|
+
list_of_models
|
|
312
|
+
List of models persisted in memory
|
|
313
|
+
"""
|
|
314
|
+
self.trainer = self.load_trainer()
|
|
315
|
+
return self.trainer.persist(models, with_ancestors=with_ancestors)
|
|
316
|
+
|
|
317
|
+
def unpersist_trainer(self) -> list[str]:
|
|
318
|
+
"""Unloads models and trainer from memory. Models will have to be reloaded from disk
|
|
319
|
+
when predicting.
|
|
320
|
+
|
|
321
|
+
Returns
|
|
322
|
+
-------
|
|
323
|
+
list_of_models
|
|
324
|
+
List of models removed from memory
|
|
325
|
+
"""
|
|
326
|
+
unpersisted_models = self.load_trainer().unpersist()
|
|
327
|
+
self.trainer = None # type: ignore
|
|
328
|
+
return unpersisted_models
|
|
329
|
+
|
|
330
|
+
def refit_full(self, model: str = "all") -> dict[str, str]:
|
|
231
331
|
return self.load_trainer().refit_full(model=model)
|
|
332
|
+
|
|
333
|
+
def backtest_predictions(
|
|
334
|
+
self,
|
|
335
|
+
data: TimeSeriesDataFrame | None,
|
|
336
|
+
model_names: list[str],
|
|
337
|
+
num_val_windows: int | None = None,
|
|
338
|
+
val_step_size: int | None = None,
|
|
339
|
+
use_cache: bool = True,
|
|
340
|
+
) -> dict[str, list[TimeSeriesDataFrame]]:
|
|
341
|
+
if data is not None:
|
|
342
|
+
data = self.feature_generator.transform(data)
|
|
343
|
+
return self.load_trainer().backtest_predictions(
|
|
344
|
+
model_names=model_names,
|
|
345
|
+
data=data,
|
|
346
|
+
num_val_windows=num_val_windows,
|
|
347
|
+
val_step_size=val_step_size,
|
|
348
|
+
use_cache=use_cache,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
def backtest_targets(
|
|
352
|
+
self,
|
|
353
|
+
data: TimeSeriesDataFrame | None,
|
|
354
|
+
num_val_windows: int | None = None,
|
|
355
|
+
val_step_size: int | None = None,
|
|
356
|
+
) -> list[TimeSeriesDataFrame]:
|
|
357
|
+
if data is not None:
|
|
358
|
+
data = self.feature_generator.transform(data)
|
|
359
|
+
return self.load_trainer().backtest_targets(
|
|
360
|
+
data=data,
|
|
361
|
+
num_val_windows=num_val_windows,
|
|
362
|
+
val_step_size=val_step_size,
|
|
363
|
+
)
|
|
@@ -1,11 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from pprint import pformat
|
|
2
|
-
from typing import
|
|
4
|
+
from typing import Any, Sequence, Type
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
3
7
|
|
|
4
8
|
from .abstract import TimeSeriesScorer
|
|
5
|
-
from .point import MAE, MAPE, MASE, MSE, RMSE, RMSLE, RMSSE, SMAPE, WAPE
|
|
9
|
+
from .point import MAE, MAPE, MASE, MSE, RMSE, RMSLE, RMSSE, SMAPE, WAPE, WCD
|
|
6
10
|
from .quantile import SQL, WQL
|
|
7
11
|
|
|
8
12
|
__all__ = [
|
|
13
|
+
"TimeSeriesScorer",
|
|
14
|
+
"check_get_evaluation_metric",
|
|
9
15
|
"MAE",
|
|
10
16
|
"MAPE",
|
|
11
17
|
"MASE",
|
|
@@ -16,12 +22,13 @@ __all__ = [
|
|
|
16
22
|
"RMSSE",
|
|
17
23
|
"SQL",
|
|
18
24
|
"WAPE",
|
|
25
|
+
"WCD",
|
|
19
26
|
"WQL",
|
|
20
27
|
]
|
|
21
28
|
|
|
22
29
|
DEFAULT_METRIC_NAME = "WQL"
|
|
23
30
|
|
|
24
|
-
AVAILABLE_METRICS = {
|
|
31
|
+
AVAILABLE_METRICS: dict[str, Type[TimeSeriesScorer]] = {
|
|
25
32
|
"MASE": MASE,
|
|
26
33
|
"MAPE": MAPE,
|
|
27
34
|
"SMAPE": SMAPE,
|
|
@@ -40,28 +47,65 @@ DEPRECATED_METRICS = {
|
|
|
40
47
|
"mean_wQuantileLoss": "WQL",
|
|
41
48
|
}
|
|
42
49
|
|
|
50
|
+
# Experimental metrics that are not yet user facing
|
|
51
|
+
EXPERIMENTAL_METRICS: dict[str, Type[TimeSeriesScorer]] = {
|
|
52
|
+
"WCD": WCD,
|
|
53
|
+
}
|
|
54
|
+
|
|
43
55
|
|
|
44
56
|
def check_get_evaluation_metric(
|
|
45
|
-
eval_metric:
|
|
57
|
+
eval_metric: str | TimeSeriesScorer | Type[TimeSeriesScorer] | None,
|
|
58
|
+
prediction_length: int,
|
|
59
|
+
seasonal_period: int | None = None,
|
|
60
|
+
horizon_weight: Sequence[float] | np.ndarray | None = None,
|
|
46
61
|
) -> TimeSeriesScorer:
|
|
62
|
+
"""Factory method for TimeSeriesScorer objects.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
scorer
|
|
67
|
+
A `TimeSeriesScorer` object based on the provided `eval_metric`.
|
|
68
|
+
|
|
69
|
+
`scorer.prediction_length` is always set to the `prediction_length` provided to this method.
|
|
70
|
+
|
|
71
|
+
If `seasonal_period` is not `None`, then `scorer.seasonal_period` is set to this value. Otherwise the original
|
|
72
|
+
value of `seasonal_period` is kept.
|
|
73
|
+
|
|
74
|
+
If `horizon_weight` is not `None`, then `scorer.horizon_weight` is set to this value. Otherwise the original
|
|
75
|
+
value of `horizon_weight` is kept.
|
|
76
|
+
"""
|
|
77
|
+
scorer: TimeSeriesScorer
|
|
78
|
+
metric_kwargs: dict[str, Any] = dict(
|
|
79
|
+
prediction_length=prediction_length, seasonal_period=seasonal_period, horizon_weight=horizon_weight
|
|
80
|
+
)
|
|
47
81
|
if isinstance(eval_metric, TimeSeriesScorer):
|
|
48
|
-
|
|
82
|
+
scorer = eval_metric
|
|
83
|
+
scorer.prediction_length = prediction_length
|
|
84
|
+
if seasonal_period is not None:
|
|
85
|
+
scorer.seasonal_period = seasonal_period
|
|
86
|
+
if horizon_weight is not None:
|
|
87
|
+
scorer.horizon_weight = scorer.check_get_horizon_weight(
|
|
88
|
+
horizon_weight, prediction_length=prediction_length
|
|
89
|
+
)
|
|
49
90
|
elif isinstance(eval_metric, type) and issubclass(eval_metric, TimeSeriesScorer):
|
|
50
91
|
# e.g., user passed `eval_metric=CustomMetric` instead of `eval_metric=CustomMetric()`
|
|
51
|
-
|
|
92
|
+
scorer = eval_metric(**metric_kwargs)
|
|
52
93
|
elif isinstance(eval_metric, str):
|
|
53
|
-
|
|
54
|
-
if
|
|
94
|
+
metric_name = DEPRECATED_METRICS.get(eval_metric, eval_metric).upper()
|
|
95
|
+
if metric_name in AVAILABLE_METRICS:
|
|
96
|
+
scorer = AVAILABLE_METRICS[metric_name](**metric_kwargs)
|
|
97
|
+
elif metric_name in EXPERIMENTAL_METRICS:
|
|
98
|
+
scorer = EXPERIMENTAL_METRICS[metric_name](**metric_kwargs)
|
|
99
|
+
else:
|
|
55
100
|
raise ValueError(
|
|
56
101
|
f"Time series metric {eval_metric} not supported. Available metrics are:\n"
|
|
57
102
|
f"{pformat(sorted(AVAILABLE_METRICS.keys()))}"
|
|
58
103
|
)
|
|
59
|
-
eval_metric = AVAILABLE_METRICS[eval_metric.upper()]()
|
|
60
104
|
elif eval_metric is None:
|
|
61
|
-
|
|
105
|
+
scorer = AVAILABLE_METRICS[DEFAULT_METRIC_NAME](**metric_kwargs)
|
|
62
106
|
else:
|
|
63
107
|
raise ValueError(
|
|
64
108
|
f"eval_metric must be of type str, TimeSeriesScorer or None "
|
|
65
109
|
f"(received eval_metric = {eval_metric} of type {type(eval_metric)})"
|
|
66
110
|
)
|
|
67
|
-
return
|
|
111
|
+
return scorer
|