autogluon.timeseries 1.4.1b20250821__py3-none-any.whl → 1.4.1b20250823__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/dataset/ts_dataframe.py +9 -9
- autogluon/timeseries/learner.py +14 -14
- autogluon/timeseries/metrics/__init__.py +5 -5
- autogluon/timeseries/metrics/abstract.py +9 -9
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +39 -41
- autogluon/timeseries/models/abstract/tunable.py +6 -6
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +30 -30
- autogluon/timeseries/models/autogluon_tabular/per_step.py +12 -12
- autogluon/timeseries/models/chronos/model.py +10 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +8 -8
- autogluon/timeseries/models/chronos/pipeline/chronos.py +12 -12
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +12 -12
- autogluon/timeseries/models/chronos/pipeline/utils.py +12 -12
- autogluon/timeseries/models/ensemble/abstract.py +19 -19
- autogluon/timeseries/models/ensemble/basic.py +8 -8
- autogluon/timeseries/models/ensemble/greedy.py +13 -13
- autogluon/timeseries/models/gluonts/abstract.py +24 -24
- autogluon/timeseries/models/gluonts/dataset.py +2 -2
- autogluon/timeseries/models/gluonts/models.py +7 -7
- autogluon/timeseries/models/local/abstract_local_model.py +12 -12
- autogluon/timeseries/models/local/statsforecast.py +11 -11
- autogluon/timeseries/models/multi_window/multi_window_model.py +4 -4
- autogluon/timeseries/models/presets.py +14 -14
- autogluon/timeseries/models/registry.py +3 -3
- autogluon/timeseries/predictor.py +35 -35
- autogluon/timeseries/regressor.py +13 -13
- autogluon/timeseries/splitter.py +6 -6
- autogluon/timeseries/trainer.py +50 -49
- autogluon/timeseries/transforms/covariate_scaler.py +3 -3
- autogluon/timeseries/transforms/target_scaler.py +7 -7
- autogluon/timeseries/utils/datetime/lags.py +2 -2
- autogluon/timeseries/utils/datetime/time_features.py +2 -2
- autogluon/timeseries/utils/features.py +31 -31
- autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.4.1b20250821.dist-info → autogluon.timeseries-1.4.1b20250823.dist-info}/METADATA +5 -5
- autogluon.timeseries-1.4.1b20250823.dist-info/RECORD +72 -0
- autogluon.timeseries-1.4.1b20250821.dist-info/RECORD +0 -72
- /autogluon.timeseries-1.4.1b20250821-py3.9-nspkg.pth → /autogluon.timeseries-1.4.1b20250823-py3.9-nspkg.pth +0 -0
- {autogluon.timeseries-1.4.1b20250821.dist-info → autogluon.timeseries-1.4.1b20250823.dist-info}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20250821.dist-info → autogluon.timeseries-1.4.1b20250823.dist-info}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20250821.dist-info → autogluon.timeseries-1.4.1b20250823.dist-info}/WHEEL +0 -0
- {autogluon.timeseries-1.4.1b20250821.dist-info → autogluon.timeseries-1.4.1b20250823.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20250821.dist-info → autogluon.timeseries-1.4.1b20250823.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20250821.dist-info → autogluon.timeseries-1.4.1b20250823.dist-info}/zip-safe +0 -0
autogluon/timeseries/trainer.py
CHANGED
@@ -5,7 +5,7 @@ import time
|
|
5
5
|
import traceback
|
6
6
|
from collections import defaultdict
|
7
7
|
from pathlib import Path
|
8
|
-
from typing import Any,
|
8
|
+
from typing import Any, Literal, Optional, Type, Union
|
9
9
|
|
10
10
|
import networkx as nx
|
11
11
|
import numpy as np
|
@@ -81,7 +81,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
81
81
|
|
82
82
|
self.verbosity = verbosity
|
83
83
|
|
84
|
-
#:
|
84
|
+
#: dict of normal model -> FULL model. FULL models are produced by
|
85
85
|
#: self.refit_single_full() and self.refit_full().
|
86
86
|
self.model_refit_map = {}
|
87
87
|
|
@@ -121,7 +121,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
121
121
|
else:
|
122
122
|
return None
|
123
123
|
|
124
|
-
def load_data(self) ->
|
124
|
+
def load_data(self) -> tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
|
125
125
|
train_data = self.load_train_data()
|
126
126
|
val_data = self.load_val_data()
|
127
127
|
return train_data, val_data
|
@@ -136,7 +136,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
136
136
|
|
137
137
|
self.models = models
|
138
138
|
|
139
|
-
def _get_model_oof_predictions(self, model_name: str) ->
|
139
|
+
def _get_model_oof_predictions(self, model_name: str) -> list[TimeSeriesDataFrame]:
|
140
140
|
model_path = os.path.join(self.path, self.get_model_attribute(model=model_name, attribute="path"))
|
141
141
|
model_type = self.get_model_attribute(model=model_name, attribute="type")
|
142
142
|
return model_type.load_oof_predictions(path=model_path)
|
@@ -144,16 +144,16 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
144
144
|
def _add_model(
|
145
145
|
self,
|
146
146
|
model: TimeSeriesModelBase,
|
147
|
-
base_models: Optional[
|
147
|
+
base_models: Optional[list[str]] = None,
|
148
148
|
):
|
149
149
|
"""Add a model to the model graph of the trainer. If the model is an ensemble, also add
|
150
150
|
information about dependencies to the model graph (list of models specified via ``base_models``).
|
151
151
|
|
152
152
|
Parameters
|
153
153
|
----------
|
154
|
-
model
|
154
|
+
model
|
155
155
|
The model to be added to the model graph.
|
156
|
-
base_models
|
156
|
+
base_models
|
157
157
|
If the model is an ensemble, the list of base model names that are included in the ensemble.
|
158
158
|
Expected only when ``model`` is a ``AbstractTimeSeriesEnsembleModel``.
|
159
159
|
|
@@ -176,7 +176,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
176
176
|
for base_model in base_models:
|
177
177
|
self.model_graph.add_edge(base_model, model.name)
|
178
178
|
|
179
|
-
def _get_model_levels(self) ->
|
179
|
+
def _get_model_levels(self) -> dict[str, int]:
|
180
180
|
"""Get a dictionary mapping each model to their level in the model graph"""
|
181
181
|
|
182
182
|
# get nodes without a parent
|
@@ -197,7 +197,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
197
197
|
|
198
198
|
return levels
|
199
199
|
|
200
|
-
def get_models_attribute_dict(self, attribute: str, models: Optional[
|
200
|
+
def get_models_attribute_dict(self, attribute: str, models: Optional[list[str]] = None) -> dict[str, Any]:
|
201
201
|
"""Get an attribute from the `model_graph` for each of the model names
|
202
202
|
specified. If `models` is none, the attribute will be returned for all models"""
|
203
203
|
results = {}
|
@@ -230,13 +230,13 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
230
230
|
key=lambda mns: (mns[1], -mns[2]), # (score, -level)
|
231
231
|
)[0]
|
232
232
|
|
233
|
-
def get_model_names(self, level: Optional[int] = None) ->
|
233
|
+
def get_model_names(self, level: Optional[int] = None) -> list[str]:
|
234
234
|
"""Get model names that are registered in the model graph"""
|
235
235
|
if level is not None:
|
236
236
|
return list(node for node, l in self._get_model_levels().items() if l == level) # noqa: E741
|
237
237
|
return list(self.model_graph.nodes)
|
238
238
|
|
239
|
-
def get_info(self, include_model_info: bool = False) ->
|
239
|
+
def get_info(self, include_model_info: bool = False) -> dict[str, Any]:
|
240
240
|
num_models_trained = len(self.get_model_names())
|
241
241
|
if self.model_best is not None:
|
242
242
|
best_model = self.model_best
|
@@ -339,12 +339,13 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
339
339
|
model: AbstractTimeSeriesModel,
|
340
340
|
val_data: Optional[TimeSeriesDataFrame] = None,
|
341
341
|
time_limit: Optional[float] = None,
|
342
|
-
) ->
|
342
|
+
) -> list[str]:
|
343
343
|
"""Fit and save the given model on given training and validation data and save the trained model.
|
344
344
|
|
345
345
|
Returns
|
346
346
|
-------
|
347
|
-
model_names_trained
|
347
|
+
model_names_trained
|
348
|
+
the list of model names that were successfully trained
|
348
349
|
"""
|
349
350
|
fit_start_time = time.time()
|
350
351
|
model_names_trained = []
|
@@ -397,13 +398,13 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
397
398
|
def _train_multi(
|
398
399
|
self,
|
399
400
|
train_data: TimeSeriesDataFrame,
|
400
|
-
hyperparameters: Union[str,
|
401
|
+
hyperparameters: Union[str, dict],
|
401
402
|
val_data: Optional[TimeSeriesDataFrame] = None,
|
402
403
|
hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
|
403
|
-
excluded_model_types: Optional[
|
404
|
+
excluded_model_types: Optional[list[str]] = None,
|
404
405
|
time_limit: Optional[float] = None,
|
405
406
|
random_seed: Optional[int] = None,
|
406
|
-
) ->
|
407
|
+
) -> list[str]:
|
407
408
|
logger.info(f"\nStarting training. Start time is {time.strftime('%Y-%m-%d %H:%M:%S')}")
|
408
409
|
|
409
410
|
time_start = time.time()
|
@@ -541,7 +542,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
541
542
|
|
542
543
|
def _get_ensemble_oof_data(
|
543
544
|
self, train_data: TimeSeriesDataFrame, val_data: Optional[TimeSeriesDataFrame]
|
544
|
-
) ->
|
545
|
+
) -> list[TimeSeriesDataFrame]:
|
545
546
|
if val_data is None:
|
546
547
|
return [val_fold for _, val_fold in self.val_splitter.split(train_data)]
|
547
548
|
else:
|
@@ -558,13 +559,13 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
558
559
|
|
559
560
|
def fit_ensemble(
|
560
561
|
self,
|
561
|
-
data_per_window:
|
562
|
-
model_names:
|
562
|
+
data_per_window: list[TimeSeriesDataFrame],
|
563
|
+
model_names: list[str],
|
563
564
|
time_limit: Optional[float] = None,
|
564
565
|
) -> str:
|
565
566
|
logger.info("Fitting simple weighted ensemble.")
|
566
567
|
|
567
|
-
predictions_per_window:
|
568
|
+
predictions_per_window: dict[str, list[TimeSeriesDataFrame]] = {}
|
568
569
|
base_model_scores = self.get_models_attribute_dict(attribute="val_score", models=self.get_model_names(0))
|
569
570
|
|
570
571
|
for model_name in model_names:
|
@@ -614,7 +615,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
614
615
|
self,
|
615
616
|
data: Optional[TimeSeriesDataFrame] = None,
|
616
617
|
extra_info: bool = False,
|
617
|
-
extra_metrics: Optional[
|
618
|
+
extra_metrics: Optional[list[Union[str, TimeSeriesScorer]]] = None,
|
618
619
|
use_cache: bool = True,
|
619
620
|
) -> pd.DataFrame:
|
620
621
|
logger.debug("Generating leaderboard for all models trained")
|
@@ -704,8 +705,8 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
704
705
|
return df[explicit_column_order]
|
705
706
|
|
706
707
|
def persist(
|
707
|
-
self, model_names: Union[Literal["all", "best"],
|
708
|
-
) ->
|
708
|
+
self, model_names: Union[Literal["all", "best"], list[str]] = "all", with_ancestors: bool = False
|
709
|
+
) -> list[str]:
|
709
710
|
if model_names == "all":
|
710
711
|
model_names = self.get_model_names()
|
711
712
|
elif model_names == "best":
|
@@ -729,7 +730,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
729
730
|
|
730
731
|
return model_names
|
731
732
|
|
732
|
-
def unpersist(self, model_names: Union[Literal["all"],
|
733
|
+
def unpersist(self, model_names: Union[Literal["all"], list[str]] = "all") -> list[str]:
|
733
734
|
if model_names == "all":
|
734
735
|
model_names = list(self.models.keys())
|
735
736
|
if not isinstance(model_names, list):
|
@@ -826,9 +827,9 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
826
827
|
self,
|
827
828
|
data: TimeSeriesDataFrame,
|
828
829
|
model: Optional[Union[str, TimeSeriesModelBase]] = None,
|
829
|
-
metrics: Optional[Union[str, TimeSeriesScorer,
|
830
|
+
metrics: Optional[Union[str, TimeSeriesScorer, list[Union[str, TimeSeriesScorer]]]] = None,
|
830
831
|
use_cache: bool = True,
|
831
|
-
) ->
|
832
|
+
) -> dict[str, float]:
|
832
833
|
past_data, known_covariates = data.get_model_inputs_for_scoring(
|
833
834
|
prediction_length=self.prediction_length, known_covariates_names=self.covariate_metadata.known_covariates
|
834
835
|
)
|
@@ -846,7 +847,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
846
847
|
def get_feature_importance(
|
847
848
|
self,
|
848
849
|
data: TimeSeriesDataFrame,
|
849
|
-
features:
|
850
|
+
features: list[str],
|
850
851
|
model: Optional[Union[str, TimeSeriesModelBase]] = None,
|
851
852
|
metric: Optional[Union[str, TimeSeriesScorer]] = None,
|
852
853
|
time_limit: Optional[float] = None,
|
@@ -996,7 +997,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
996
997
|
self,
|
997
998
|
model: Union[str, TimeSeriesModelBase],
|
998
999
|
data: TimeSeriesDataFrame,
|
999
|
-
model_pred_dict:
|
1000
|
+
model_pred_dict: dict[str, Optional[TimeSeriesDataFrame]],
|
1000
1001
|
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
1001
1002
|
) -> TimeSeriesDataFrame:
|
1002
1003
|
"""Generate predictions using the given model.
|
@@ -1012,8 +1013,8 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1012
1013
|
self,
|
1013
1014
|
model: Union[str, TimeSeriesModelBase],
|
1014
1015
|
data: TimeSeriesDataFrame,
|
1015
|
-
model_pred_dict:
|
1016
|
-
) -> Union[TimeSeriesDataFrame,
|
1016
|
+
model_pred_dict: dict[str, Optional[TimeSeriesDataFrame]],
|
1017
|
+
) -> Union[TimeSeriesDataFrame, dict[str, Optional[TimeSeriesDataFrame]]]:
|
1017
1018
|
"""Get the first argument that should be passed to model.predict.
|
1018
1019
|
|
1019
1020
|
This method assumes that model_pred_dict contains the predictions of all base models, if model is an ensemble.
|
@@ -1029,13 +1030,13 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1029
1030
|
|
1030
1031
|
def get_model_pred_dict(
|
1031
1032
|
self,
|
1032
|
-
model_names:
|
1033
|
+
model_names: list[str],
|
1033
1034
|
data: TimeSeriesDataFrame,
|
1034
1035
|
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
1035
1036
|
raise_exception_if_failed: bool = True,
|
1036
1037
|
use_cache: bool = True,
|
1037
1038
|
random_seed: Optional[int] = None,
|
1038
|
-
) ->
|
1039
|
+
) -> tuple[dict[str, Optional[TimeSeriesDataFrame]], dict[str, float]]:
|
1039
1040
|
"""Return a dictionary with predictions of all models for the given dataset.
|
1040
1041
|
|
1041
1042
|
Parameters
|
@@ -1060,7 +1061,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1060
1061
|
model_pred_dict, pred_time_dict_marginal = self._get_cached_pred_dicts(dataset_hash)
|
1061
1062
|
else:
|
1062
1063
|
model_pred_dict = {}
|
1063
|
-
pred_time_dict_marginal:
|
1064
|
+
pred_time_dict_marginal: dict[str, Any] = {}
|
1064
1065
|
|
1065
1066
|
model_set = set()
|
1066
1067
|
for model_name in model_names:
|
@@ -1106,7 +1107,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1106
1107
|
|
1107
1108
|
return final_model_pred_dict, final_pred_time_dict_total
|
1108
1109
|
|
1109
|
-
def _get_total_pred_time_from_marginal(self, pred_time_dict_marginal:
|
1110
|
+
def _get_total_pred_time_from_marginal(self, pred_time_dict_marginal: dict[str, float]) -> dict[str, float]:
|
1110
1111
|
pred_time_dict_total = defaultdict(float)
|
1111
1112
|
for model_name in pred_time_dict_marginal.keys():
|
1112
1113
|
for base_model in self.get_minimum_model_set(model_name):
|
@@ -1139,7 +1140,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1139
1140
|
|
1140
1141
|
def _get_cached_pred_dicts(
|
1141
1142
|
self, dataset_hash: str
|
1142
|
-
) ->
|
1143
|
+
) -> tuple[dict[str, Optional[TimeSeriesDataFrame]], dict[str, float]]:
|
1143
1144
|
"""Load cached predictions for given dataset_hash from disk, if possible.
|
1144
1145
|
|
1145
1146
|
If loading fails for any reason, empty dicts are returned.
|
@@ -1158,8 +1159,8 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1158
1159
|
def _save_cached_pred_dicts(
|
1159
1160
|
self,
|
1160
1161
|
dataset_hash: str,
|
1161
|
-
model_pred_dict:
|
1162
|
-
pred_time_dict:
|
1162
|
+
model_pred_dict: dict[str, Optional[TimeSeriesDataFrame]],
|
1163
|
+
pred_time_dict: dict[str, float],
|
1163
1164
|
) -> None:
|
1164
1165
|
cached_predictions = self._load_cached_predictions()
|
1165
1166
|
# Do not save results for models that failed
|
@@ -1183,8 +1184,8 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1183
1184
|
self,
|
1184
1185
|
train_data: Optional[TimeSeriesDataFrame] = None,
|
1185
1186
|
val_data: Optional[TimeSeriesDataFrame] = None,
|
1186
|
-
models: Optional[
|
1187
|
-
) ->
|
1187
|
+
models: Optional[list[str]] = None,
|
1188
|
+
) -> list[str]:
|
1188
1189
|
train_data = train_data or self.load_train_data()
|
1189
1190
|
val_data = val_data or self.load_val_data()
|
1190
1191
|
refit_full_data = self._merge_refit_full_data(train_data, val_data)
|
@@ -1228,7 +1229,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1228
1229
|
self.save()
|
1229
1230
|
return models_trained_full
|
1230
1231
|
|
1231
|
-
def refit_full(self, model: str = "all") ->
|
1232
|
+
def refit_full(self, model: str = "all") -> dict[str, str]:
|
1232
1233
|
time_start = time.time()
|
1233
1234
|
existing_models = self.get_model_names()
|
1234
1235
|
if model == "all":
|
@@ -1262,13 +1263,13 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1262
1263
|
|
1263
1264
|
def construct_model_templates(
|
1264
1265
|
self,
|
1265
|
-
hyperparameters: Union[str,
|
1266
|
+
hyperparameters: Union[str, dict[str, Any]],
|
1266
1267
|
*,
|
1267
1268
|
multi_window: bool = False,
|
1268
1269
|
freq: Optional[str] = None,
|
1269
|
-
excluded_model_types: Optional[
|
1270
|
+
excluded_model_types: Optional[list[str]] = None,
|
1270
1271
|
hyperparameter_tune: bool = False,
|
1271
|
-
) ->
|
1272
|
+
) -> list[TimeSeriesModelBase]:
|
1272
1273
|
return get_preset_models(
|
1273
1274
|
path=self.path,
|
1274
1275
|
eval_metric=self.eval_metric,
|
@@ -1288,10 +1289,10 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1288
1289
|
def fit(
|
1289
1290
|
self,
|
1290
1291
|
train_data: TimeSeriesDataFrame,
|
1291
|
-
hyperparameters: Union[str,
|
1292
|
+
hyperparameters: Union[str, dict[Any, dict]],
|
1292
1293
|
val_data: Optional[TimeSeriesDataFrame] = None,
|
1293
|
-
hyperparameter_tune_kwargs: Optional[Union[str,
|
1294
|
-
excluded_model_types: Optional[
|
1294
|
+
hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
|
1295
|
+
excluded_model_types: Optional[list[str]] = None,
|
1295
1296
|
time_limit: Optional[float] = None,
|
1296
1297
|
random_seed: Optional[int] = None,
|
1297
1298
|
):
|
@@ -1301,13 +1302,13 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
1301
1302
|
|
1302
1303
|
Parameters
|
1303
1304
|
----------
|
1304
|
-
train_data
|
1305
|
+
train_data
|
1305
1306
|
Training data for fitting time series timeseries models.
|
1306
|
-
hyperparameters
|
1307
|
+
hyperparameters
|
1307
1308
|
A dictionary mapping selected model names, model classes or model factory to hyperparameter
|
1308
1309
|
settings. Model names should be present in `trainer.presets.DEFAULT_MODEL_NAMES`. Optionally,
|
1309
1310
|
the user may provide one of "default", "light" and "very_light" to specify presets.
|
1310
|
-
val_data
|
1311
|
+
val_data
|
1311
1312
|
Optional validation data set to report validation scores on.
|
1312
1313
|
hyperparameter_tune_kwargs
|
1313
1314
|
Args for hyperparameter tuning
|
@@ -1,5 +1,5 @@
|
|
1
1
|
import logging
|
2
|
-
from typing import
|
2
|
+
from typing import Literal, Optional, Protocol, overload, runtime_checkable
|
3
3
|
|
4
4
|
import numpy as np
|
5
5
|
import pandas as pd
|
@@ -53,7 +53,7 @@ class GlobalCovariateScaler(CovariateScaler):
|
|
53
53
|
self.use_past_covariates = use_past_covariates
|
54
54
|
self.use_static_features = use_static_features
|
55
55
|
self.skew_threshold = skew_threshold
|
56
|
-
self._column_transformers: Optional[
|
56
|
+
self._column_transformers: Optional[dict[Literal["known", "past", "static"], ColumnTransformer]] = None
|
57
57
|
|
58
58
|
def is_fit(self) -> bool:
|
59
59
|
return self._column_transformers is not None
|
@@ -117,7 +117,7 @@ class GlobalCovariateScaler(CovariateScaler):
|
|
117
117
|
known_covariates[columns] = self._column_transformers["known"].transform(known_covariates[columns])
|
118
118
|
return known_covariates
|
119
119
|
|
120
|
-
def _get_transformer_for_columns(self, df: pd.DataFrame, columns:
|
120
|
+
def _get_transformer_for_columns(self, df: pd.DataFrame, columns: list[str]) -> ColumnTransformer:
|
121
121
|
"""Passthrough bool features, use QuantileTransform for skewed features, and use StandardScaler for the rest.
|
122
122
|
|
123
123
|
The preprocessing logic is similar to the TORCH_NN model from Tabular.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Literal, Optional, Protocol,
|
1
|
+
from typing import Literal, Optional, Protocol, Union, overload
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
import pandas as pd
|
@@ -30,7 +30,7 @@ class LocalTargetScaler(TargetScaler):
|
|
30
30
|
self.loc: Optional[pd.Series] = None
|
31
31
|
self.scale: Optional[pd.Series] = None
|
32
32
|
|
33
|
-
def _compute_loc_scale(self, target_series: pd.Series) ->
|
33
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[Optional[pd.Series], Optional[pd.Series]]:
|
34
34
|
raise NotImplementedError
|
35
35
|
|
36
36
|
def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
@@ -45,7 +45,7 @@ class LocalTargetScaler(TargetScaler):
|
|
45
45
|
self.scale = self.scale.clip(lower=self.min_scale).replace([np.inf, -np.inf], np.nan).fillna(1.0)
|
46
46
|
return self
|
47
47
|
|
48
|
-
def _reindex_loc_scale(self, item_index: pd.Index) ->
|
48
|
+
def _reindex_loc_scale(self, item_index: pd.Index) -> tuple[Union[np.ndarray, float], Union[np.ndarray, float]]:
|
49
49
|
"""Reindex loc and scale parameters for the given item_ids and convert them to an array-like."""
|
50
50
|
if self.loc is not None:
|
51
51
|
loc = self.loc.reindex(item_index).to_numpy()
|
@@ -74,7 +74,7 @@ class LocalStandardScaler(LocalTargetScaler):
|
|
74
74
|
The resulting affine transformation is (x - loc) / scale, where scale = std(x), loc = mean(x).
|
75
75
|
"""
|
76
76
|
|
77
|
-
def _compute_loc_scale(self, target_series: pd.Series) ->
|
77
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
78
78
|
stats = target_series.groupby(level=ITEMID, sort=False).agg(["mean", "std"])
|
79
79
|
return stats["mean"], stats["std"]
|
80
80
|
|
@@ -82,7 +82,7 @@ class LocalStandardScaler(LocalTargetScaler):
|
|
82
82
|
class LocalMeanAbsScaler(LocalTargetScaler):
|
83
83
|
"""Applies mean absolute scaling to each time series in the dataset."""
|
84
84
|
|
85
|
-
def _compute_loc_scale(self, target_series: pd.Series) ->
|
85
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[Optional[pd.Series], pd.Series]:
|
86
86
|
scale = target_series.abs().groupby(level=ITEMID, sort=False).agg("mean")
|
87
87
|
return None, scale
|
88
88
|
|
@@ -93,7 +93,7 @@ class LocalMinMaxScaler(LocalTargetScaler):
|
|
93
93
|
The resulting affine transformation is (x - loc) / scale, where scale = max(x) - min(x), loc = min(x) / scale.
|
94
94
|
"""
|
95
95
|
|
96
|
-
def _compute_loc_scale(self, target_series: pd.Series) ->
|
96
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
97
97
|
stats = target_series.abs().groupby(level=ITEMID, sort=False).agg(["min", "max"])
|
98
98
|
scale = (stats["max"] - stats["min"]).clip(lower=self.min_scale)
|
99
99
|
loc = stats["min"]
|
@@ -117,7 +117,7 @@ class LocalRobustScaler(LocalTargetScaler):
|
|
117
117
|
self.q_max = 0.75
|
118
118
|
assert 0 < self.q_min < self.q_max < 1
|
119
119
|
|
120
|
-
def _compute_loc_scale(self, target_series: pd.Series) ->
|
120
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
121
121
|
grouped = target_series.groupby(level=ITEMID, sort=False)
|
122
122
|
loc = grouped.median()
|
123
123
|
lower = grouped.quantile(self.q_min)
|
@@ -2,7 +2,7 @@
|
|
2
2
|
Generate lag indices based on frequency string. Adapted from gluonts.time_feature.lag.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from typing import
|
5
|
+
from typing import Optional
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
@@ -72,7 +72,7 @@ def get_lags_for_frequency(
|
|
72
72
|
lag_ub: int = 1200,
|
73
73
|
num_lags: Optional[int] = None,
|
74
74
|
num_default_lags: int = 7,
|
75
|
-
) ->
|
75
|
+
) -> list[int]:
|
76
76
|
"""
|
77
77
|
Generates a list of lags that that are appropriate for the given frequency
|
78
78
|
string.
|
@@ -2,7 +2,7 @@
|
|
2
2
|
Generate time features based on frequency string. Adapted from gluonts.time_feature.time_feature.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from typing import Callable
|
5
|
+
from typing import Callable
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
@@ -56,7 +56,7 @@ def second_of_minute(index: pd.DatetimeIndex) -> np.ndarray:
|
|
56
56
|
return _normalize(index.second, num=60)
|
57
57
|
|
58
58
|
|
59
|
-
def get_time_features_for_frequency(freq) ->
|
59
|
+
def get_time_features_for_frequency(freq) -> list[Callable]:
|
60
60
|
features_by_offset_name = {
|
61
61
|
"YE": [],
|
62
62
|
"QE": [quarter_of_year],
|
@@ -2,7 +2,7 @@ import logging
|
|
2
2
|
import reprlib
|
3
3
|
import time
|
4
4
|
from dataclasses import asdict, dataclass, field
|
5
|
-
from typing import Any,
|
5
|
+
from typing import Any, Literal, Optional
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
@@ -24,50 +24,50 @@ logger = logging.getLogger(__name__)
|
|
24
24
|
class CovariateMetadata:
|
25
25
|
"""Provides mapping from different covariate types to columns in the dataset."""
|
26
26
|
|
27
|
-
static_features_cat:
|
28
|
-
static_features_real:
|
29
|
-
known_covariates_real:
|
30
|
-
known_covariates_cat:
|
31
|
-
past_covariates_real:
|
32
|
-
past_covariates_cat:
|
27
|
+
static_features_cat: list[str] = field(default_factory=list)
|
28
|
+
static_features_real: list[str] = field(default_factory=list)
|
29
|
+
known_covariates_real: list[str] = field(default_factory=list)
|
30
|
+
known_covariates_cat: list[str] = field(default_factory=list)
|
31
|
+
past_covariates_real: list[str] = field(default_factory=list)
|
32
|
+
past_covariates_cat: list[str] = field(default_factory=list)
|
33
33
|
|
34
34
|
@property
|
35
|
-
def static_features(self) ->
|
35
|
+
def static_features(self) -> list[str]:
|
36
36
|
return self.static_features_cat + self.static_features_real
|
37
37
|
|
38
38
|
@property
|
39
|
-
def known_covariates(self) ->
|
39
|
+
def known_covariates(self) -> list[str]:
|
40
40
|
return self.known_covariates_cat + self.known_covariates_real
|
41
41
|
|
42
42
|
@property
|
43
|
-
def past_covariates(self) ->
|
43
|
+
def past_covariates(self) -> list[str]:
|
44
44
|
return self.past_covariates_cat + self.past_covariates_real
|
45
45
|
|
46
46
|
@property
|
47
|
-
def covariates(self) ->
|
47
|
+
def covariates(self) -> list[str]:
|
48
48
|
return self.known_covariates + self.past_covariates
|
49
49
|
|
50
50
|
@property
|
51
|
-
def covariates_real(self) ->
|
51
|
+
def covariates_real(self) -> list[str]:
|
52
52
|
return self.known_covariates_real + self.past_covariates_real
|
53
53
|
|
54
54
|
@property
|
55
|
-
def covariates_cat(self) ->
|
55
|
+
def covariates_cat(self) -> list[str]:
|
56
56
|
return self.known_covariates_cat + self.past_covariates_cat
|
57
57
|
|
58
58
|
@property
|
59
|
-
def real_features(self) ->
|
59
|
+
def real_features(self) -> list[str]:
|
60
60
|
return self.static_features_real + self.covariates_real
|
61
61
|
|
62
62
|
@property
|
63
|
-
def cat_features(self) ->
|
63
|
+
def cat_features(self) -> list[str]:
|
64
64
|
return self.static_features_cat + self.covariates_cat
|
65
65
|
|
66
66
|
@property
|
67
|
-
def all_features(self) ->
|
67
|
+
def all_features(self) -> list[str]:
|
68
68
|
return self.static_features + self.covariates
|
69
69
|
|
70
|
-
def to_dict(self) ->
|
70
|
+
def to_dict(self) -> dict[str, Any]:
|
71
71
|
return asdict(self)
|
72
72
|
|
73
73
|
|
@@ -120,13 +120,13 @@ class TimeSeriesFeatureGenerator:
|
|
120
120
|
|
121
121
|
Parameters
|
122
122
|
----------
|
123
|
-
target
|
123
|
+
target
|
124
124
|
Name of the target column.
|
125
|
-
known_covariates_names
|
125
|
+
known_covariates_names
|
126
126
|
Columns that contain covariates that are known into the future.
|
127
|
-
float_dtype
|
127
|
+
float_dtype
|
128
128
|
Numpy float dtype to which all numeric columns (float, int, bool) will be converted both in static & dynamic dfs.
|
129
|
-
num_samples
|
129
|
+
num_samples
|
130
130
|
Number of rows sampled from the training dataset to speed up computation of the median (used later for imputation).
|
131
131
|
If set to `None`, median will be computed using all rows.
|
132
132
|
"""
|
@@ -134,7 +134,7 @@ class TimeSeriesFeatureGenerator:
|
|
134
134
|
def __init__(
|
135
135
|
self,
|
136
136
|
target: str,
|
137
|
-
known_covariates_names:
|
137
|
+
known_covariates_names: list[str],
|
138
138
|
float_dtype: str = "float32",
|
139
139
|
num_samples: Optional[int] = 20_000,
|
140
140
|
):
|
@@ -143,8 +143,8 @@ class TimeSeriesFeatureGenerator:
|
|
143
143
|
self.num_samples = num_samples
|
144
144
|
|
145
145
|
self._is_fit = False
|
146
|
-
self.known_covariates_names:
|
147
|
-
self.past_covariates_names:
|
146
|
+
self.known_covariates_names: list[str] = list(known_covariates_names)
|
147
|
+
self.past_covariates_names: list[str] = []
|
148
148
|
self.known_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
|
149
149
|
self.past_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
|
150
150
|
# Cat features with cat_count=1 are fine in static_features since they are repeated for all time steps in a TS
|
@@ -154,7 +154,7 @@ class TimeSeriesFeatureGenerator:
|
|
154
154
|
self._train_static_real_median: Optional[pd.Series] = None
|
155
155
|
|
156
156
|
@property
|
157
|
-
def required_column_names(self) ->
|
157
|
+
def required_column_names(self) -> list[str]:
|
158
158
|
return [self.target] + list(self.known_covariates_names) + list(self.past_covariates_names)
|
159
159
|
|
160
160
|
@property
|
@@ -262,13 +262,13 @@ class TimeSeriesFeatureGenerator:
|
|
262
262
|
return self._impute_covariates(ts_df, column_names=self.covariate_metadata.covariates_real)
|
263
263
|
|
264
264
|
@staticmethod
|
265
|
-
def _concat_dfs(dfs_to_concat:
|
265
|
+
def _concat_dfs(dfs_to_concat: list[pd.DataFrame]) -> pd.DataFrame:
|
266
266
|
if len(dfs_to_concat) == 1:
|
267
267
|
return dfs_to_concat[0]
|
268
268
|
else:
|
269
269
|
return pd.concat(dfs_to_concat, axis=1, copy=False)
|
270
270
|
|
271
|
-
def _impute_covariates(self, ts_df: TimeSeriesDataFrame, column_names:
|
271
|
+
def _impute_covariates(self, ts_df: TimeSeriesDataFrame, column_names: list[str]) -> TimeSeriesDataFrame:
|
272
272
|
"""Impute missing values in selected columns with ffill, bfill, and median imputation."""
|
273
273
|
if len(column_names) > 0:
|
274
274
|
# ffill + bfill covariates that have at least some observed values
|
@@ -346,10 +346,10 @@ class TimeSeriesFeatureGenerator:
|
|
346
346
|
return None
|
347
347
|
|
348
348
|
@staticmethod
|
349
|
-
def _detect_and_log_column_types(transformed_df: pd.DataFrame) ->
|
349
|
+
def _detect_and_log_column_types(transformed_df: pd.DataFrame) -> tuple[list[str], list[str]]:
|
350
350
|
"""Log & return names of categorical and real-valued columns in the DataFrame."""
|
351
|
-
cat_column_names:
|
352
|
-
real_column_names:
|
351
|
+
cat_column_names: list[str] = []
|
352
|
+
real_column_names: list[str] = []
|
353
353
|
for column_name, column_dtype in transformed_df.dtypes.items():
|
354
354
|
if isinstance(column_dtype, pd.CategoricalDtype):
|
355
355
|
cat_column_names.append(str(column_name))
|
@@ -362,7 +362,7 @@ class TimeSeriesFeatureGenerator:
|
|
362
362
|
|
363
363
|
@staticmethod
|
364
364
|
def _check_required_columns_are_present(
|
365
|
-
data: TimeSeriesDataFrame, required_column_names:
|
365
|
+
data: TimeSeriesDataFrame, required_column_names: list[str], data_frame_name: str
|
366
366
|
) -> None:
|
367
367
|
missing_columns = pd.Index(required_column_names).difference(data.columns)
|
368
368
|
if len(missing_columns) > 0:
|
autogluon/timeseries/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: autogluon.timeseries
|
3
|
-
Version: 1.4.
|
3
|
+
Version: 1.4.1b20250823
|
4
4
|
Summary: Fast and Accurate ML in 3 Lines of Code
|
5
5
|
Home-page: https://github.com/autogluon/autogluon
|
6
6
|
Author: AutoGluon Community
|
@@ -55,10 +55,10 @@ Requires-Dist: fugue>=0.9.0
|
|
55
55
|
Requires-Dist: tqdm<5,>=4.38
|
56
56
|
Requires-Dist: orjson~=3.9
|
57
57
|
Requires-Dist: tensorboard<3,>=2.9
|
58
|
-
Requires-Dist: autogluon.core[raytune]==1.4.
|
59
|
-
Requires-Dist: autogluon.common==1.4.
|
60
|
-
Requires-Dist: autogluon.features==1.4.
|
61
|
-
Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.4.
|
58
|
+
Requires-Dist: autogluon.core[raytune]==1.4.1b20250823
|
59
|
+
Requires-Dist: autogluon.common==1.4.1b20250823
|
60
|
+
Requires-Dist: autogluon.features==1.4.1b20250823
|
61
|
+
Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.4.1b20250823
|
62
62
|
Provides-Extra: all
|
63
63
|
Provides-Extra: tests
|
64
64
|
Requires-Dist: pytest; extra == "tests"
|