autogluon.timeseries 1.4.1b20251115__py3-none-any.whl → 1.5.0b20251221__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/hyperparameter_presets.py +13 -28
- autogluon/timeseries/configs/predictor_presets.py +23 -39
- autogluon/timeseries/dataset/ts_dataframe.py +32 -34
- autogluon/timeseries/learner.py +67 -33
- autogluon/timeseries/metrics/__init__.py +4 -4
- autogluon/timeseries/metrics/abstract.py +8 -8
- autogluon/timeseries/metrics/point.py +9 -9
- autogluon/timeseries/metrics/quantile.py +4 -4
- autogluon/timeseries/models/__init__.py +2 -1
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -50
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +8 -8
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +30 -26
- autogluon/timeseries/models/autogluon_tabular/per_step.py +13 -11
- autogluon/timeseries/models/autogluon_tabular/transforms.py +2 -2
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +395 -0
- autogluon/timeseries/models/chronos/model.py +30 -25
- autogluon/timeseries/models/chronos/utils.py +5 -5
- autogluon/timeseries/models/ensemble/__init__.py +17 -10
- autogluon/timeseries/models/ensemble/abstract.py +13 -9
- autogluon/timeseries/models/ensemble/array_based/__init__.py +2 -2
- autogluon/timeseries/models/ensemble/array_based/abstract.py +24 -31
- autogluon/timeseries/models/ensemble/array_based/models.py +146 -11
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +2 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +6 -5
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +44 -83
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +21 -55
- autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
- autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +7 -3
- autogluon/timeseries/models/ensemble/weighted/basic.py +26 -13
- autogluon/timeseries/models/ensemble/weighted/greedy.py +21 -144
- autogluon/timeseries/models/gluonts/abstract.py +30 -29
- autogluon/timeseries/models/gluonts/dataset.py +9 -9
- autogluon/timeseries/models/gluonts/models.py +0 -7
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +13 -16
- autogluon/timeseries/models/local/naive.py +2 -2
- autogluon/timeseries/models/local/npts.py +7 -1
- autogluon/timeseries/models/local/statsforecast.py +13 -13
- autogluon/timeseries/models/multi_window/multi_window_model.py +38 -23
- autogluon/timeseries/models/registry.py +3 -4
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +3 -4
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +6 -6
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +4 -9
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +2 -3
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +10 -10
- autogluon/timeseries/models/toto/_internal/dataset.py +2 -2
- autogluon/timeseries/models/toto/_internal/forecaster.py +8 -8
- autogluon/timeseries/models/toto/dataloader.py +4 -4
- autogluon/timeseries/models/toto/hf_pretrained_model.py +97 -16
- autogluon/timeseries/models/toto/model.py +30 -17
- autogluon/timeseries/predictor.py +531 -136
- autogluon/timeseries/regressor.py +18 -23
- autogluon/timeseries/splitter.py +2 -2
- autogluon/timeseries/trainer/ensemble_composer.py +323 -129
- autogluon/timeseries/trainer/model_set_builder.py +9 -9
- autogluon/timeseries/trainer/prediction_cache.py +16 -16
- autogluon/timeseries/trainer/trainer.py +235 -145
- autogluon/timeseries/trainer/utils.py +3 -4
- autogluon/timeseries/transforms/covariate_scaler.py +7 -7
- autogluon/timeseries/transforms/target_scaler.py +8 -8
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +1 -3
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/features.py +22 -9
- autogluon/timeseries/utils/forecast.py +1 -2
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/version.py +1 -1
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/METADATA +23 -21
- autogluon_timeseries-1.5.0b20251221.dist-info/RECORD +103 -0
- autogluon_timeseries-1.4.1b20251115.dist-info/RECORD +0 -96
- /autogluon.timeseries-1.4.1b20251115-py3.9-nspkg.pth → /autogluon.timeseries-1.5.0b20251221-py3.11-nspkg.pth +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/WHEEL +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/licenses/LICENSE +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/licenses/NOTICE +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/namespace_packages.txt +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/top_level.txt +0 -0
- {autogluon_timeseries-1.4.1b20251115.dist-info → autogluon_timeseries-1.5.0b20251221.dist-info}/zip-safe +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Sequence
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
@@ -92,13 +92,13 @@ class SQL(TimeSeriesScorer):
|
|
|
92
92
|
def __init__(
|
|
93
93
|
self,
|
|
94
94
|
prediction_length: int = 1,
|
|
95
|
-
seasonal_period:
|
|
96
|
-
horizon_weight:
|
|
95
|
+
seasonal_period: int | None = None,
|
|
96
|
+
horizon_weight: Sequence[float] | None = None,
|
|
97
97
|
):
|
|
98
98
|
super().__init__(
|
|
99
99
|
prediction_length=prediction_length, seasonal_period=seasonal_period, horizon_weight=horizon_weight
|
|
100
100
|
)
|
|
101
|
-
self._past_abs_seasonal_error:
|
|
101
|
+
self._past_abs_seasonal_error: pd.Series | None = None
|
|
102
102
|
|
|
103
103
|
def save_past_metrics(
|
|
104
104
|
self, data_past: TimeSeriesDataFrame, target: str = "target", seasonal_period: int = 1, **kwargs
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from .autogluon_tabular import DirectTabularModel, PerStepTabularModel, RecursiveTabularModel
|
|
2
|
-
from .chronos import ChronosModel
|
|
2
|
+
from .chronos import Chronos2Model, ChronosModel
|
|
3
3
|
from .gluonts import (
|
|
4
4
|
DeepARModel,
|
|
5
5
|
DLinearModel,
|
|
@@ -45,6 +45,7 @@ __all__ = [
|
|
|
45
45
|
"ETSModel",
|
|
46
46
|
"IMAPAModel",
|
|
47
47
|
"ChronosModel",
|
|
48
|
+
"Chronos2Model",
|
|
48
49
|
"ModelRegistry",
|
|
49
50
|
"NPTSModel",
|
|
50
51
|
"NaiveModel",
|
|
@@ -4,7 +4,7 @@ import os
|
|
|
4
4
|
import re
|
|
5
5
|
import time
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Sequence
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
from typing_extensions import Self
|
|
@@ -75,15 +75,15 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
|
75
75
|
|
|
76
76
|
def __init__(
|
|
77
77
|
self,
|
|
78
|
-
path:
|
|
79
|
-
name:
|
|
80
|
-
hyperparameters:
|
|
81
|
-
freq:
|
|
78
|
+
path: str | None = None,
|
|
79
|
+
name: str | None = None,
|
|
80
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
81
|
+
freq: str | None = None,
|
|
82
82
|
prediction_length: int = 1,
|
|
83
|
-
covariate_metadata:
|
|
83
|
+
covariate_metadata: CovariateMetadata | None = None,
|
|
84
84
|
target: str = "target",
|
|
85
85
|
quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
|
|
86
|
-
eval_metric:
|
|
86
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
87
87
|
):
|
|
88
88
|
self.name = name or re.sub(r"Model$", "", self.__class__.__name__)
|
|
89
89
|
|
|
@@ -102,7 +102,7 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
|
102
102
|
self.target: str = target
|
|
103
103
|
self.covariate_metadata = covariate_metadata or CovariateMetadata()
|
|
104
104
|
|
|
105
|
-
self.freq:
|
|
105
|
+
self.freq: str | None = freq
|
|
106
106
|
self.prediction_length: int = prediction_length
|
|
107
107
|
self.quantile_levels: list[float] = list(quantile_levels)
|
|
108
108
|
|
|
@@ -117,17 +117,21 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
|
117
117
|
else:
|
|
118
118
|
self.must_drop_median = False
|
|
119
119
|
|
|
120
|
-
self._oof_predictions:
|
|
120
|
+
self._oof_predictions: list[TimeSeriesDataFrame] | None = None
|
|
121
121
|
|
|
122
122
|
# user provided hyperparameters and extra arguments that are used during model training
|
|
123
123
|
self._hyperparameters, self._extra_ag_args = self._check_and_split_hyperparameters(hyperparameters)
|
|
124
124
|
|
|
125
|
-
|
|
126
|
-
self.
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
)
|
|
130
|
-
self.
|
|
125
|
+
# Time taken to fit in seconds (Training data)
|
|
126
|
+
self.fit_time: float | None = None
|
|
127
|
+
# Time taken to predict in seconds, for a single prediction horizon on validation data
|
|
128
|
+
self.predict_time: float | None = None
|
|
129
|
+
# Time taken to predict 1 row of data in seconds (with batch size `predict_1_batch_size`)
|
|
130
|
+
self.predict_1_time: float | None = None
|
|
131
|
+
# Useful for ensembles, additional prediction time excluding base models. None for base models.
|
|
132
|
+
self.predict_time_marginal: float | None = None
|
|
133
|
+
# Score with eval_metric on validation data
|
|
134
|
+
self.val_score: float | None = None
|
|
131
135
|
|
|
132
136
|
def __repr__(self) -> str:
|
|
133
137
|
return self.name
|
|
@@ -143,9 +147,14 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
|
143
147
|
self.path = path_context
|
|
144
148
|
self.path_root = self.path.rsplit(self.name, 1)[0]
|
|
145
149
|
|
|
150
|
+
def cache_oof_predictions(self, predictions: TimeSeriesDataFrame | list[TimeSeriesDataFrame]) -> None:
|
|
151
|
+
if isinstance(predictions, TimeSeriesDataFrame):
|
|
152
|
+
predictions = [predictions]
|
|
153
|
+
self._oof_predictions = predictions
|
|
154
|
+
|
|
146
155
|
@classmethod
|
|
147
156
|
def _check_and_split_hyperparameters(
|
|
148
|
-
cls, hyperparameters:
|
|
157
|
+
cls, hyperparameters: dict[str, Any] | None = None
|
|
149
158
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
150
159
|
"""Given the user-specified hyperparameters, split into `hyperparameters` and `extra_ag_args`, intended
|
|
151
160
|
to be used during model initialization.
|
|
@@ -180,7 +189,7 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
|
180
189
|
)
|
|
181
190
|
return hyperparameters, extra_ag_args
|
|
182
191
|
|
|
183
|
-
def save(self, path:
|
|
192
|
+
def save(self, path: str | None = None, verbose: bool = True) -> str:
|
|
184
193
|
if path is None:
|
|
185
194
|
path = self.path
|
|
186
195
|
|
|
@@ -242,9 +251,13 @@ class TimeSeriesModelBase(ModelBase, ABC):
|
|
|
242
251
|
return {}
|
|
243
252
|
|
|
244
253
|
def get_hyperparameters(self) -> dict:
|
|
245
|
-
"""Get hyperparameters that will be passed to the "inner model" that AutoGluon wraps."""
|
|
254
|
+
"""Get dictionary of hyperparameters that will be passed to the "inner model" that AutoGluon wraps."""
|
|
246
255
|
return {**self._get_default_hyperparameters(), **self._hyperparameters}
|
|
247
256
|
|
|
257
|
+
def get_hyperparameter(self, key: str) -> Any:
|
|
258
|
+
"""Get a single hyperparameter value for the "inner model"."""
|
|
259
|
+
return self.get_hyperparameters()[key]
|
|
260
|
+
|
|
248
261
|
def get_info(self) -> dict:
|
|
249
262
|
"""
|
|
250
263
|
Returns a dictionary of numerous fields describing the model.
|
|
@@ -384,15 +397,15 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, metaclass=
|
|
|
384
397
|
|
|
385
398
|
def __init__(
|
|
386
399
|
self,
|
|
387
|
-
path:
|
|
388
|
-
name:
|
|
389
|
-
hyperparameters:
|
|
390
|
-
freq:
|
|
400
|
+
path: str | None = None,
|
|
401
|
+
name: str | None = None,
|
|
402
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
403
|
+
freq: str | None = None,
|
|
391
404
|
prediction_length: int = 1,
|
|
392
|
-
covariate_metadata:
|
|
405
|
+
covariate_metadata: CovariateMetadata | None = None,
|
|
393
406
|
target: str = "target",
|
|
394
407
|
quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
|
|
395
|
-
eval_metric:
|
|
408
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
396
409
|
):
|
|
397
410
|
# TODO: make freq a required argument in AbstractTimeSeriesModel
|
|
398
411
|
super().__init__(
|
|
@@ -406,9 +419,9 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, metaclass=
|
|
|
406
419
|
quantile_levels=quantile_levels,
|
|
407
420
|
eval_metric=eval_metric,
|
|
408
421
|
)
|
|
409
|
-
self.target_scaler:
|
|
410
|
-
self.covariate_scaler:
|
|
411
|
-
self.covariate_regressor:
|
|
422
|
+
self.target_scaler: TargetScaler | None
|
|
423
|
+
self.covariate_scaler: CovariateScaler | None
|
|
424
|
+
self.covariate_regressor: CovariateRegressor | None
|
|
412
425
|
|
|
413
426
|
def _initialize_transforms_and_regressor(self) -> None:
|
|
414
427
|
self.target_scaler = get_target_scaler(self.get_hyperparameters().get("target_scaler"), target=self.target)
|
|
@@ -433,8 +446,8 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, metaclass=
|
|
|
433
446
|
def fit(
|
|
434
447
|
self,
|
|
435
448
|
train_data: TimeSeriesDataFrame,
|
|
436
|
-
val_data:
|
|
437
|
-
time_limit:
|
|
449
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
450
|
+
time_limit: float | None = None,
|
|
438
451
|
verbosity: int = 2,
|
|
439
452
|
**kwargs,
|
|
440
453
|
) -> Self:
|
|
@@ -527,10 +540,10 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, metaclass=
|
|
|
527
540
|
def _fit(
|
|
528
541
|
self,
|
|
529
542
|
train_data: TimeSeriesDataFrame,
|
|
530
|
-
val_data:
|
|
531
|
-
time_limit:
|
|
532
|
-
num_cpus:
|
|
533
|
-
num_gpus:
|
|
543
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
544
|
+
time_limit: float | None = None,
|
|
545
|
+
num_cpus: int | None = None,
|
|
546
|
+
num_gpus: int | None = None,
|
|
534
547
|
verbosity: int = 2,
|
|
535
548
|
**kwargs,
|
|
536
549
|
) -> None:
|
|
@@ -551,7 +564,7 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, metaclass=
|
|
|
551
564
|
"as hyperparameters when initializing or use `hyperparameter_tune` instead."
|
|
552
565
|
)
|
|
553
566
|
|
|
554
|
-
def _log_unused_hyperparameters(self, extra_allowed_hyperparameters:
|
|
567
|
+
def _log_unused_hyperparameters(self, extra_allowed_hyperparameters: list[str] | None = None) -> None:
|
|
555
568
|
"""Log a warning if unused hyperparameters were provided to the model."""
|
|
556
569
|
allowed_hyperparameters = self.allowed_hyperparameters
|
|
557
570
|
if extra_allowed_hyperparameters is not None:
|
|
@@ -567,7 +580,7 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, metaclass=
|
|
|
567
580
|
def predict(
|
|
568
581
|
self,
|
|
569
582
|
data: TimeSeriesDataFrame,
|
|
570
|
-
known_covariates:
|
|
583
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
571
584
|
**kwargs,
|
|
572
585
|
) -> TimeSeriesDataFrame:
|
|
573
586
|
"""Given a dataset, predict the next `self.prediction_length` time steps.
|
|
@@ -648,14 +661,13 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, metaclass=
|
|
|
648
661
|
def _predict(
|
|
649
662
|
self,
|
|
650
663
|
data: TimeSeriesDataFrame,
|
|
651
|
-
known_covariates:
|
|
664
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
652
665
|
**kwargs,
|
|
653
666
|
) -> TimeSeriesDataFrame:
|
|
654
667
|
"""Private method for `predict`. See `predict` for documentation of arguments."""
|
|
655
668
|
pass
|
|
656
669
|
|
|
657
670
|
def _preprocess_time_limit(self, time_limit: float) -> float:
|
|
658
|
-
original_time_limit = time_limit
|
|
659
671
|
max_time_limit_ratio = self._extra_ag_args.get("max_time_limit_ratio", self.default_max_time_limit_ratio)
|
|
660
672
|
max_time_limit = self._extra_ag_args.get("max_time_limit")
|
|
661
673
|
|
|
@@ -664,16 +676,6 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, metaclass=
|
|
|
664
676
|
if max_time_limit is not None:
|
|
665
677
|
time_limit = min(time_limit, max_time_limit)
|
|
666
678
|
|
|
667
|
-
if original_time_limit != time_limit:
|
|
668
|
-
time_limit_og_str = f"{original_time_limit:.2f}s" if original_time_limit is not None else "None"
|
|
669
|
-
time_limit_str = f"{time_limit:.2f}s" if time_limit is not None else "None"
|
|
670
|
-
logger.debug(
|
|
671
|
-
f"\tTime limit adjusted due to model hyperparameters: "
|
|
672
|
-
f"{time_limit_og_str} -> {time_limit_str} "
|
|
673
|
-
f"(ag.max_time_limit={max_time_limit}, "
|
|
674
|
-
f"ag.max_time_limit_ratio={max_time_limit_ratio}"
|
|
675
|
-
)
|
|
676
|
-
|
|
677
679
|
return time_limit
|
|
678
680
|
|
|
679
681
|
def _get_search_space(self):
|
|
@@ -731,7 +733,7 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, metaclass=
|
|
|
731
733
|
)
|
|
732
734
|
predict_start_time = time.time()
|
|
733
735
|
oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
|
|
734
|
-
self.
|
|
736
|
+
self.cache_oof_predictions(oof_predictions)
|
|
735
737
|
if store_predict_time:
|
|
736
738
|
self.predict_time = time.time() - predict_start_time
|
|
737
739
|
if store_val_score:
|
|
@@ -740,9 +742,9 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, metaclass=
|
|
|
740
742
|
def preprocess(
|
|
741
743
|
self,
|
|
742
744
|
data: TimeSeriesDataFrame,
|
|
743
|
-
known_covariates:
|
|
745
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
744
746
|
is_train: bool = False,
|
|
745
747
|
**kwargs,
|
|
746
|
-
) -> tuple[TimeSeriesDataFrame,
|
|
748
|
+
) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
|
|
747
749
|
"""Method that implements model-specific preprocessing logic."""
|
|
748
750
|
return data, known_covariates
|
|
@@ -76,7 +76,8 @@ def fit_and_save_model(model, fit_kwargs, train_data, val_data, eval_metric, tim
|
|
|
76
76
|
time_fit_start = time.time()
|
|
77
77
|
model.fit(train_data=train_data, val_data=val_data, time_limit=time_left, **fit_kwargs)
|
|
78
78
|
model.fit_time = time.time() - time_fit_start
|
|
79
|
-
|
|
79
|
+
if val_data is not None:
|
|
80
|
+
model.score_and_cache_oof(val_data, store_val_score=True, store_predict_time=True)
|
|
80
81
|
|
|
81
82
|
logger.debug(f"\tHyperparameter tune run: {model.name}")
|
|
82
83
|
logger.debug(f"\t\t{model.val_score:<7.4f}".ljust(15) + f"= Validation score ({eval_metric.name_with_sign})")
|
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
import time
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
7
|
from contextlib import nullcontext
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import Any
|
|
9
9
|
|
|
10
10
|
from typing_extensions import Self
|
|
11
11
|
|
|
@@ -37,12 +37,12 @@ class TimeSeriesTunable(Tunable, ABC):
|
|
|
37
37
|
def hyperparameter_tune(
|
|
38
38
|
self,
|
|
39
39
|
train_data: TimeSeriesDataFrame,
|
|
40
|
-
val_data:
|
|
40
|
+
val_data: TimeSeriesDataFrame | None,
|
|
41
41
|
val_splitter: Any = None,
|
|
42
|
-
default_num_trials:
|
|
43
|
-
refit_every_n_windows:
|
|
44
|
-
hyperparameter_tune_kwargs:
|
|
45
|
-
time_limit:
|
|
42
|
+
default_num_trials: int | None = 1,
|
|
43
|
+
refit_every_n_windows: int | None = 1,
|
|
44
|
+
hyperparameter_tune_kwargs: str | dict = "auto",
|
|
45
|
+
time_limit: float | None = None,
|
|
46
46
|
) -> tuple[dict[str, Any], Any]:
|
|
47
47
|
hpo_executor = self._get_default_hpo_executor()
|
|
48
48
|
hpo_executor.initialize(
|
|
@@ -144,13 +144,13 @@ class TimeSeriesTunable(Tunable, ABC):
|
|
|
144
144
|
"""
|
|
145
145
|
return None
|
|
146
146
|
|
|
147
|
-
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str,
|
|
147
|
+
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
|
|
148
148
|
return {
|
|
149
149
|
"num_cpus": 1,
|
|
150
150
|
}
|
|
151
151
|
|
|
152
152
|
def _save_with_data(
|
|
153
|
-
self, train_data: TimeSeriesDataFrame, val_data:
|
|
153
|
+
self, train_data: TimeSeriesDataFrame, val_data: TimeSeriesDataFrame | None
|
|
154
154
|
) -> tuple[str, str]:
|
|
155
155
|
self.path = os.path.abspath(self.path)
|
|
156
156
|
self.path_root = self.path.rsplit(self.name, 1)[0]
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
import math
|
|
4
4
|
import time
|
|
5
5
|
import warnings
|
|
6
|
-
from typing import Any, Callable, Collection,
|
|
6
|
+
from typing import Any, Callable, Collection, Type
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
@@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)
|
|
|
33
33
|
class TabularModel(BaseEstimator):
|
|
34
34
|
"""A scikit-learn compatible wrapper for arbitrary autogluon.tabular models"""
|
|
35
35
|
|
|
36
|
-
def __init__(self, model_class: Type[AbstractTabularModel], model_kwargs:
|
|
36
|
+
def __init__(self, model_class: Type[AbstractTabularModel], model_kwargs: dict | None = None):
|
|
37
37
|
self.model_class = model_class
|
|
38
38
|
self.model_kwargs = {} if model_kwargs is None else model_kwargs
|
|
39
39
|
self.feature_pipeline = AutoMLPipelineFeatureGenerator(verbosity=0)
|
|
@@ -63,12 +63,12 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
63
63
|
|
|
64
64
|
def __init__(
|
|
65
65
|
self,
|
|
66
|
-
freq:
|
|
66
|
+
freq: str | None = None,
|
|
67
67
|
prediction_length: int = 1,
|
|
68
|
-
path:
|
|
69
|
-
name:
|
|
70
|
-
eval_metric:
|
|
71
|
-
hyperparameters:
|
|
68
|
+
path: str | None = None,
|
|
69
|
+
name: str | None = None,
|
|
70
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
71
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
72
72
|
**kwargs,
|
|
73
73
|
):
|
|
74
74
|
super().__init__(
|
|
@@ -84,13 +84,13 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
84
84
|
from mlforecast.target_transforms import BaseTargetTransform
|
|
85
85
|
|
|
86
86
|
self._sum_of_differences: int = 0 # number of time steps removed from each series by differencing
|
|
87
|
-
self._max_ts_length:
|
|
87
|
+
self._max_ts_length: int | None = None
|
|
88
88
|
self._target_lags: np.ndarray
|
|
89
89
|
self._date_features: list[Callable]
|
|
90
90
|
self._mlf: MLForecast
|
|
91
|
-
self._scaler:
|
|
91
|
+
self._scaler: BaseTargetTransform | None = None
|
|
92
92
|
self._residuals_std_per_item: pd.Series
|
|
93
|
-
self._train_target_median:
|
|
93
|
+
self._train_target_median: float | None = None
|
|
94
94
|
self._non_boolean_real_covariates: list[str] = []
|
|
95
95
|
|
|
96
96
|
def _initialize_transforms_and_regressor(self):
|
|
@@ -114,10 +114,10 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
114
114
|
def preprocess(
|
|
115
115
|
self,
|
|
116
116
|
data: TimeSeriesDataFrame,
|
|
117
|
-
known_covariates:
|
|
117
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
118
118
|
is_train: bool = False,
|
|
119
119
|
**kwargs,
|
|
120
|
-
) -> tuple[TimeSeriesDataFrame,
|
|
120
|
+
) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
|
|
121
121
|
if is_train:
|
|
122
122
|
# All-NaN series are removed; partially-NaN series in train_data are handled inside _generate_train_val_dfs
|
|
123
123
|
all_nan_items = data.item_ids[
|
|
@@ -159,7 +159,11 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
159
159
|
date_features = model_params.get("date_features")
|
|
160
160
|
if date_features is None:
|
|
161
161
|
date_features = get_time_features_for_frequency(self.freq)
|
|
162
|
-
|
|
162
|
+
known_covariates = self.covariate_metadata.known_covariates
|
|
163
|
+
conflicting = [f.__name__ for f in date_features if f.__name__ in known_covariates]
|
|
164
|
+
if conflicting:
|
|
165
|
+
logger.info(f"\tRemoved automatic date_features {conflicting} since they clash with known_covariates")
|
|
166
|
+
self._date_features = [f for f in date_features if f.__name__ not in known_covariates]
|
|
163
167
|
|
|
164
168
|
target_transforms = []
|
|
165
169
|
differences = model_params.get("differences")
|
|
@@ -212,7 +216,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
212
216
|
return mlforecast_df.groupby(MLF_ITEMID, as_index=False, sort=False).tail(max_length)
|
|
213
217
|
|
|
214
218
|
def _generate_train_val_dfs(
|
|
215
|
-
self, data: TimeSeriesDataFrame, max_num_items:
|
|
219
|
+
self, data: TimeSeriesDataFrame, max_num_items: int | None = None, max_num_samples: int | None = None
|
|
216
220
|
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
217
221
|
# Exclude items that are too short for chosen differences - otherwise exception will be raised
|
|
218
222
|
if self._sum_of_differences > 0:
|
|
@@ -266,7 +270,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
266
270
|
def _to_mlforecast_df(
|
|
267
271
|
self,
|
|
268
272
|
data: TimeSeriesDataFrame,
|
|
269
|
-
static_features:
|
|
273
|
+
static_features: pd.DataFrame | None,
|
|
270
274
|
include_target: bool = True,
|
|
271
275
|
) -> pd.DataFrame:
|
|
272
276
|
"""Convert TimeSeriesDataFrame to a format expected by MLForecast methods `predict` and `preprocess`.
|
|
@@ -308,10 +312,10 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
308
312
|
def _fit(
|
|
309
313
|
self,
|
|
310
314
|
train_data: TimeSeriesDataFrame,
|
|
311
|
-
val_data:
|
|
312
|
-
time_limit:
|
|
313
|
-
num_cpus:
|
|
314
|
-
num_gpus:
|
|
315
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
316
|
+
time_limit: float | None = None,
|
|
317
|
+
num_cpus: int | None = None,
|
|
318
|
+
num_gpus: int | None = None,
|
|
315
319
|
verbosity: int = 2,
|
|
316
320
|
**kwargs,
|
|
317
321
|
) -> None:
|
|
@@ -385,8 +389,8 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
385
389
|
def _remove_short_ts_and_generate_fallback_forecast(
|
|
386
390
|
self,
|
|
387
391
|
data: TimeSeriesDataFrame,
|
|
388
|
-
known_covariates:
|
|
389
|
-
) -> tuple[TimeSeriesDataFrame,
|
|
392
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
393
|
+
) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
|
|
390
394
|
"""Remove series that are too short for chosen differencing from data and generate naive forecast for them.
|
|
391
395
|
|
|
392
396
|
Returns
|
|
@@ -482,7 +486,7 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
482
486
|
lags : list[int], default = None
|
|
483
487
|
Lags of the target that will be used as features for predictions. If None, will be determined automatically
|
|
484
488
|
based on the frequency of the data.
|
|
485
|
-
date_features : list[
|
|
489
|
+
date_features : list[str | Callable], default = None
|
|
486
490
|
Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
|
|
487
491
|
If None, will be determined automatically based on the frequency of the data.
|
|
488
492
|
differences : list[int], default = []
|
|
@@ -544,7 +548,7 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
544
548
|
def _predict(
|
|
545
549
|
self,
|
|
546
550
|
data: TimeSeriesDataFrame,
|
|
547
|
-
known_covariates:
|
|
551
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
548
552
|
**kwargs,
|
|
549
553
|
) -> TimeSeriesDataFrame:
|
|
550
554
|
from .transforms import apply_inverse_transform
|
|
@@ -610,7 +614,7 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
|
610
614
|
return predictions_tsdf
|
|
611
615
|
|
|
612
616
|
def _postprocess_predictions(
|
|
613
|
-
self, predictions:
|
|
617
|
+
self, predictions: np.ndarray | pd.Series, repeated_item_ids: pd.Series
|
|
614
618
|
) -> pd.DataFrame:
|
|
615
619
|
if self.is_quantile_model:
|
|
616
620
|
predictions_df = pd.DataFrame(predictions, columns=[str(q) for q in self.quantile_levels])
|
|
@@ -664,7 +668,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
|
664
668
|
lags : list[int], default = None
|
|
665
669
|
Lags of the target that will be used as features for predictions. If None, will be determined automatically
|
|
666
670
|
based on the frequency of the data.
|
|
667
|
-
date_features : list[
|
|
671
|
+
date_features : list[str | Callable], default = None
|
|
668
672
|
Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
|
|
669
673
|
If None, will be determined automatically based on the frequency of the data.
|
|
670
674
|
differences : list[int], default = None
|
|
@@ -702,7 +706,7 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
|
702
706
|
def _predict(
|
|
703
707
|
self,
|
|
704
708
|
data: TimeSeriesDataFrame,
|
|
705
|
-
known_covariates:
|
|
709
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
706
710
|
**kwargs,
|
|
707
711
|
) -> TimeSeriesDataFrame:
|
|
708
712
|
original_item_id_order = data.item_ids
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
import math
|
|
3
3
|
import os
|
|
4
4
|
import time
|
|
5
|
-
from typing import Any, Callable, Literal,
|
|
5
|
+
from typing import Any, Callable, Literal, Type
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
@@ -56,7 +56,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
56
56
|
Seasonal lags of the target used as features. Unlike trailing lags, seasonal lags are not shifted
|
|
57
57
|
but filtered by availability: model for step ``h`` uses ``[lag for lag in seasonal_lags if lag > h]``.
|
|
58
58
|
If None, determined automatically based on data frequency.
|
|
59
|
-
date_features : list[
|
|
59
|
+
date_features : list[str | Callable], default = None
|
|
60
60
|
Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
|
|
61
61
|
If None, will be determined automatically based on the frequency of the data.
|
|
62
62
|
target_scaler : {"standard", "mean_abs", "min_max", "robust", None}, default = "mean_abs"
|
|
@@ -80,7 +80,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
80
80
|
If None, automatically determined based on available memory to prevent OOM errors.
|
|
81
81
|
"""
|
|
82
82
|
|
|
83
|
-
ag_priority =
|
|
83
|
+
ag_priority = 80
|
|
84
84
|
_dummy_freq = "D"
|
|
85
85
|
|
|
86
86
|
def __init__(self, *args, **kwargs):
|
|
@@ -94,7 +94,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
94
94
|
self._model_cls: Type[AbstractTabularModel]
|
|
95
95
|
self._n_jobs: int
|
|
96
96
|
self._non_boolean_real_covariates: list[str] = []
|
|
97
|
-
self._max_ts_length:
|
|
97
|
+
self._max_ts_length: int | None = None
|
|
98
98
|
|
|
99
99
|
@property
|
|
100
100
|
def allowed_hyperparameters(self) -> list[str]:
|
|
@@ -140,11 +140,11 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
140
140
|
model_hyperparameters: dict,
|
|
141
141
|
problem_type: Literal["quantile", "regression"],
|
|
142
142
|
eval_metric: str,
|
|
143
|
-
validation_fraction:
|
|
143
|
+
validation_fraction: float | None,
|
|
144
144
|
quantile_levels: list[float],
|
|
145
145
|
lags: list[int],
|
|
146
146
|
date_features: list[Callable],
|
|
147
|
-
time_limit:
|
|
147
|
+
time_limit: float | None,
|
|
148
148
|
num_cpus: int,
|
|
149
149
|
verbosity: int,
|
|
150
150
|
) -> str:
|
|
@@ -237,7 +237,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
237
237
|
def preprocess(
|
|
238
238
|
self,
|
|
239
239
|
data: TimeSeriesDataFrame,
|
|
240
|
-
known_covariates:
|
|
240
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
241
241
|
is_train: bool = False,
|
|
242
242
|
**kwargs,
|
|
243
243
|
):
|
|
@@ -263,7 +263,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
263
263
|
return data, known_covariates
|
|
264
264
|
|
|
265
265
|
def _get_train_df(
|
|
266
|
-
self, train_data: TimeSeriesDataFrame, max_num_items:
|
|
266
|
+
self, train_data: TimeSeriesDataFrame, max_num_items: int | None, max_num_samples: int | None
|
|
267
267
|
) -> pd.DataFrame:
|
|
268
268
|
if max_num_items is not None and train_data.num_items > max_num_items:
|
|
269
269
|
items_to_keep = train_data.item_ids.to_series().sample(n=int(max_num_items)) # noqa: F841
|
|
@@ -305,8 +305,10 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
305
305
|
def _fit(
|
|
306
306
|
self,
|
|
307
307
|
train_data: TimeSeriesDataFrame,
|
|
308
|
-
val_data:
|
|
309
|
-
time_limit:
|
|
308
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
309
|
+
time_limit: float | None = None,
|
|
310
|
+
num_cpus: int | None = None,
|
|
311
|
+
num_gpus: int | None = None,
|
|
310
312
|
verbosity: int = 2,
|
|
311
313
|
**kwargs,
|
|
312
314
|
) -> None:
|
|
@@ -455,7 +457,7 @@ class PerStepTabularModel(AbstractTimeSeriesModel):
|
|
|
455
457
|
def _predict(
|
|
456
458
|
self,
|
|
457
459
|
data: TimeSeriesDataFrame,
|
|
458
|
-
known_covariates:
|
|
460
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
459
461
|
**kwargs,
|
|
460
462
|
) -> TimeSeriesDataFrame:
|
|
461
463
|
if known_covariates is not None:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Literal
|
|
1
|
+
from typing import Literal
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
@@ -47,7 +47,7 @@ class MLForecastScaler(BaseTargetTransform):
|
|
|
47
47
|
|
|
48
48
|
def apply_inverse_transform(
|
|
49
49
|
df: pd.DataFrame,
|
|
50
|
-
transform:
|
|
50
|
+
transform: _BaseGroupedArrayTargetTransform | BaseTargetTransform,
|
|
51
51
|
) -> pd.DataFrame:
|
|
52
52
|
"""Apply inverse transformation to a dataframe, converting to GroupedArray if necessary"""
|
|
53
53
|
if isinstance(transform, BaseTargetTransform):
|