autogluon.timeseries 1.4.1b20250906__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/hyperparameter_presets.py +2 -2
- autogluon/timeseries/dataset/ts_dataframe.py +97 -86
- autogluon/timeseries/learner.py +68 -35
- autogluon/timeseries/metrics/__init__.py +4 -4
- autogluon/timeseries/metrics/abstract.py +8 -8
- autogluon/timeseries/metrics/point.py +9 -9
- autogluon/timeseries/metrics/quantile.py +5 -5
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +4 -1
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -39
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +8 -8
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
- autogluon/timeseries/models/autogluon_tabular/per_step.py +26 -15
- autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +361 -0
- autogluon/timeseries/models/chronos/model.py +125 -87
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +68 -36
- autogluon/timeseries/models/ensemble/__init__.py +34 -2
- autogluon/timeseries/models/ensemble/abstract.py +5 -42
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +236 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +73 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +167 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
- autogluon/timeseries/models/ensemble/per_item_greedy.py +162 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +40 -0
- autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +6 -16
- autogluon/timeseries/models/ensemble/weighted/greedy.py +57 -0
- autogluon/timeseries/models/gluonts/abstract.py +25 -25
- autogluon/timeseries/models/gluonts/dataset.py +11 -11
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +15 -18
- autogluon/timeseries/models/local/naive.py +2 -2
- autogluon/timeseries/models/local/npts.py +1 -1
- autogluon/timeseries/models/local/statsforecast.py +12 -12
- autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
- autogluon/timeseries/models/registry.py +3 -4
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +118 -0
- autogluon/timeseries/models/toto/model.py +236 -0
- autogluon/timeseries/predictor.py +301 -103
- autogluon/timeseries/regressor.py +27 -30
- autogluon/timeseries/splitter.py +3 -27
- autogluon/timeseries/trainer/ensemble_composer.py +439 -0
- autogluon/timeseries/trainer/model_set_builder.py +9 -9
- autogluon/timeseries/trainer/prediction_cache.py +16 -16
- autogluon/timeseries/trainer/trainer.py +300 -275
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/covariate_scaler.py +8 -8
- autogluon/timeseries/transforms/target_scaler.py +15 -15
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +1 -3
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/features.py +18 -14
- autogluon/timeseries/utils/forecast.py +6 -7
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251210-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/METADATA +39 -22
- autogluon_timeseries-1.4.1b20251210.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/WHEEL +1 -1
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
- autogluon.timeseries-1.4.1b20250906-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.4.1b20250906.dist-info/RECORD +0 -75
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
from abc import ABC
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
7
|
+
|
|
8
|
+
from ..abstract import AbstractTimeSeriesEnsembleModel
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AbstractWeightedTimeSeriesEnsembleModel(AbstractTimeSeriesEnsembleModel, ABC):
|
|
12
|
+
"""Abstract class for weighted ensembles which assign one (global) weight per model."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, name: str | None = None, **kwargs):
|
|
15
|
+
super().__init__(name=name, **kwargs)
|
|
16
|
+
self.model_to_weight: dict[str, float] = {}
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def model_names(self) -> list[str]:
|
|
20
|
+
return list(self.model_to_weight.keys())
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def model_weights(self) -> np.ndarray:
|
|
24
|
+
return np.array(list(self.model_to_weight.values()), dtype=np.float64)
|
|
25
|
+
|
|
26
|
+
def _predict(self, data: dict[str, TimeSeriesDataFrame], **kwargs) -> TimeSeriesDataFrame:
|
|
27
|
+
weighted_predictions = [data[model_name] * weight for model_name, weight in self.model_to_weight.items()]
|
|
28
|
+
return functools.reduce(lambda x, y: x + y, weighted_predictions)
|
|
29
|
+
|
|
30
|
+
def get_info(self) -> dict:
|
|
31
|
+
info = super().get_info()
|
|
32
|
+
info["model_weights"] = self.model_to_weight.copy()
|
|
33
|
+
return info
|
|
34
|
+
|
|
35
|
+
def remap_base_models(self, model_refit_map: dict[str, str]) -> None:
|
|
36
|
+
updated_weights = {}
|
|
37
|
+
for model, weight in self.model_to_weight.items():
|
|
38
|
+
model_full_name = model_refit_map.get(model, model)
|
|
39
|
+
updated_weights[model_full_name] = weight
|
|
40
|
+
self.model_to_weight = updated_weights
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
|
|
@@ -10,17 +10,12 @@ from .abstract import AbstractWeightedTimeSeriesEnsembleModel
|
|
|
10
10
|
class SimpleAverageEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
|
|
11
11
|
"""Constructs a weighted ensemble using a simple average of the constituent models' predictions."""
|
|
12
12
|
|
|
13
|
-
def __init__(self, name: Optional[str] = None, **kwargs):
|
|
14
|
-
if name is None:
|
|
15
|
-
name = "SimpleAverageEnsemble"
|
|
16
|
-
super().__init__(name=name, **kwargs)
|
|
17
|
-
|
|
18
13
|
def _fit(
|
|
19
14
|
self,
|
|
20
15
|
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
21
16
|
data_per_window: list[TimeSeriesDataFrame],
|
|
22
|
-
model_scores:
|
|
23
|
-
time_limit:
|
|
17
|
+
model_scores: dict[str, float] | None = None,
|
|
18
|
+
time_limit: float | None = None,
|
|
24
19
|
):
|
|
25
20
|
self.model_to_weight = {}
|
|
26
21
|
num_models = len(predictions_per_window)
|
|
@@ -47,11 +42,6 @@ class PerformanceWeightedEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
|
|
|
47
42
|
36.1 (2020): 93-97.
|
|
48
43
|
"""
|
|
49
44
|
|
|
50
|
-
def __init__(self, name: Optional[str] = None, **kwargs):
|
|
51
|
-
if name is None:
|
|
52
|
-
name = "PerformanceWeightedEnsemble"
|
|
53
|
-
super().__init__(name=name, **kwargs)
|
|
54
|
-
|
|
55
45
|
def _get_default_hyperparameters(self) -> dict[str, Any]:
|
|
56
46
|
return {"weight_scheme": "sqrt"}
|
|
57
47
|
|
|
@@ -59,12 +49,12 @@ class PerformanceWeightedEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
|
|
|
59
49
|
self,
|
|
60
50
|
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
61
51
|
data_per_window: list[TimeSeriesDataFrame],
|
|
62
|
-
model_scores:
|
|
63
|
-
time_limit:
|
|
52
|
+
model_scores: dict[str, float] | None = None,
|
|
53
|
+
time_limit: float | None = None,
|
|
64
54
|
):
|
|
65
55
|
assert model_scores is not None
|
|
66
56
|
|
|
67
|
-
weight_scheme = self.
|
|
57
|
+
weight_scheme = self.get_hyperparameter("weight_scheme")
|
|
68
58
|
|
|
69
59
|
# drop NaNs
|
|
70
60
|
model_scores = {k: v for k, v in model_scores.items() if np.isfinite(v)}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import pprint
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from autogluon.timeseries import TimeSeriesDataFrame
|
|
6
|
+
|
|
7
|
+
from ..ensemble_selection import fit_time_series_ensemble_selection
|
|
8
|
+
from .abstract import AbstractWeightedTimeSeriesEnsembleModel
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GreedyEnsemble(AbstractWeightedTimeSeriesEnsembleModel):
|
|
14
|
+
"""Constructs a weighted ensemble using the greedy Ensemble Selection algorithm by
|
|
15
|
+
Caruana et al. [Car2004]
|
|
16
|
+
|
|
17
|
+
Other Parameters
|
|
18
|
+
----------------
|
|
19
|
+
ensemble_size: int, default = 100
|
|
20
|
+
Number of models (with replacement) to include in the ensemble.
|
|
21
|
+
|
|
22
|
+
References
|
|
23
|
+
----------
|
|
24
|
+
.. [Car2024] Caruana, Rich, et al. "Ensemble selection from libraries of models."
|
|
25
|
+
Proceedings of the twenty-first international conference on Machine learning. 2004.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, name: str | None = None, **kwargs):
|
|
29
|
+
if name is None:
|
|
30
|
+
# FIXME: the name here is kept for backward compatibility. it will be called
|
|
31
|
+
# GreedyEnsemble in v1.4 once ensemble choices are exposed
|
|
32
|
+
name = "WeightedEnsemble"
|
|
33
|
+
super().__init__(name=name, **kwargs)
|
|
34
|
+
|
|
35
|
+
def _get_default_hyperparameters(self) -> dict[str, Any]:
|
|
36
|
+
return {"ensemble_size": 100}
|
|
37
|
+
|
|
38
|
+
def _fit(
|
|
39
|
+
self,
|
|
40
|
+
predictions_per_window: dict[str, list[TimeSeriesDataFrame]],
|
|
41
|
+
data_per_window: list[TimeSeriesDataFrame],
|
|
42
|
+
model_scores: dict[str, float] | None = None,
|
|
43
|
+
time_limit: float | None = None,
|
|
44
|
+
):
|
|
45
|
+
model_to_weight = fit_time_series_ensemble_selection(
|
|
46
|
+
data_per_window=data_per_window,
|
|
47
|
+
predictions_per_window=predictions_per_window,
|
|
48
|
+
ensemble_size=self.get_hyperparameter("ensemble_size"),
|
|
49
|
+
eval_metric=self.eval_metric,
|
|
50
|
+
prediction_length=self.prediction_length,
|
|
51
|
+
target=self.target,
|
|
52
|
+
time_limit=time_limit,
|
|
53
|
+
)
|
|
54
|
+
self.model_to_weight = {model: weight for model, weight in model_to_weight.items() if weight > 0}
|
|
55
|
+
|
|
56
|
+
weights_for_printing = {model: round(float(weight), 2) for model, weight in self.model_to_weight.items()}
|
|
57
|
+
logger.info(f"\tEnsemble weights: {pprint.pformat(weights_for_printing, width=200)}")
|
|
@@ -3,7 +3,7 @@ import os
|
|
|
3
3
|
import shutil
|
|
4
4
|
from datetime import timedelta
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Callable,
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, Type, cast, overload
|
|
7
7
|
|
|
8
8
|
import gluonts
|
|
9
9
|
import gluonts.core.settings
|
|
@@ -21,7 +21,7 @@ from autogluon.core.hpo.constants import RAY_BACKEND
|
|
|
21
21
|
from autogluon.tabular.models.tabular_nn.utils.categorical_encoders import (
|
|
22
22
|
OneHotMergeRaresHandleUnknownEncoder as OneHotEncoder,
|
|
23
23
|
)
|
|
24
|
-
from autogluon.timeseries.dataset
|
|
24
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
25
25
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
26
26
|
from autogluon.timeseries.utils.warning_filters import disable_root_logger, warning_filter
|
|
27
27
|
|
|
@@ -72,12 +72,12 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
72
72
|
|
|
73
73
|
def __init__(
|
|
74
74
|
self,
|
|
75
|
-
freq:
|
|
75
|
+
freq: str | None = None,
|
|
76
76
|
prediction_length: int = 1,
|
|
77
|
-
path:
|
|
78
|
-
name:
|
|
79
|
-
eval_metric:
|
|
80
|
-
hyperparameters:
|
|
77
|
+
path: str | None = None,
|
|
78
|
+
name: str | None = None,
|
|
79
|
+
eval_metric: str | None = None,
|
|
80
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
81
81
|
**kwargs, # noqa
|
|
82
82
|
):
|
|
83
83
|
super().__init__(
|
|
@@ -89,9 +89,9 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
89
89
|
hyperparameters=hyperparameters,
|
|
90
90
|
**kwargs,
|
|
91
91
|
)
|
|
92
|
-
self.gts_predictor:
|
|
93
|
-
self._ohe_generator_known:
|
|
94
|
-
self._ohe_generator_past:
|
|
92
|
+
self.gts_predictor: GluonTSPredictor | None = None
|
|
93
|
+
self._ohe_generator_known: OneHotEncoder | None = None
|
|
94
|
+
self._ohe_generator_past: OneHotEncoder | None = None
|
|
95
95
|
self.callbacks = []
|
|
96
96
|
# Following attributes may be overridden during fit() based on train_data & model parameters
|
|
97
97
|
self.num_feat_static_cat = 0
|
|
@@ -105,7 +105,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
105
105
|
self.past_feat_dynamic_cat_cardinality: list[int] = []
|
|
106
106
|
self.negative_data = True
|
|
107
107
|
|
|
108
|
-
def save(self, path:
|
|
108
|
+
def save(self, path: str | None = None, verbose: bool = True) -> str:
|
|
109
109
|
# we flush callbacks instance variable if it has been set. it can keep weak references which breaks training
|
|
110
110
|
self.callbacks = []
|
|
111
111
|
# The GluonTS predictor is serialized using custom logic
|
|
@@ -277,8 +277,8 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
277
277
|
|
|
278
278
|
return torch.cuda.is_available()
|
|
279
279
|
|
|
280
|
-
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str,
|
|
281
|
-
minimum_resources: dict[str,
|
|
280
|
+
def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
|
|
281
|
+
minimum_resources: dict[str, int | float] = {"num_cpus": 1}
|
|
282
282
|
# if GPU is available, we train with 1 GPU per trial
|
|
283
283
|
if is_gpu_available:
|
|
284
284
|
minimum_resources["num_gpus"] = 1
|
|
@@ -289,8 +289,8 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
289
289
|
@overload
|
|
290
290
|
def _to_gluonts_dataset(self, time_series_df: TimeSeriesDataFrame, known_covariates=None) -> GluonTSDataset: ...
|
|
291
291
|
def _to_gluonts_dataset(
|
|
292
|
-
self, time_series_df:
|
|
293
|
-
) ->
|
|
292
|
+
self, time_series_df: TimeSeriesDataFrame | None, known_covariates: TimeSeriesDataFrame | None = None
|
|
293
|
+
) -> GluonTSDataset | None:
|
|
294
294
|
if time_series_df is not None:
|
|
295
295
|
# TODO: Preprocess real-valued features with StdScaler?
|
|
296
296
|
if self.num_feat_static_cat > 0:
|
|
@@ -388,10 +388,10 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
388
388
|
def _fit(
|
|
389
389
|
self,
|
|
390
390
|
train_data: TimeSeriesDataFrame,
|
|
391
|
-
val_data:
|
|
392
|
-
time_limit:
|
|
393
|
-
num_cpus:
|
|
394
|
-
num_gpus:
|
|
391
|
+
val_data: TimeSeriesDataFrame | None = None,
|
|
392
|
+
time_limit: float | None = None,
|
|
393
|
+
num_cpus: int | None = None,
|
|
394
|
+
num_gpus: int | None = None,
|
|
395
395
|
verbosity: int = 2,
|
|
396
396
|
**kwargs,
|
|
397
397
|
) -> None:
|
|
@@ -438,8 +438,8 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
438
438
|
|
|
439
439
|
def _get_callbacks(
|
|
440
440
|
self,
|
|
441
|
-
time_limit:
|
|
442
|
-
early_stopping_patience:
|
|
441
|
+
time_limit: float | None,
|
|
442
|
+
early_stopping_patience: int | None = None,
|
|
443
443
|
) -> list[Callable]:
|
|
444
444
|
"""Retrieve a list of callback objects for the GluonTS trainer"""
|
|
445
445
|
from lightning.pytorch.callbacks import EarlyStopping, Timer
|
|
@@ -454,7 +454,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
454
454
|
def _predict(
|
|
455
455
|
self,
|
|
456
456
|
data: TimeSeriesDataFrame,
|
|
457
|
-
known_covariates:
|
|
457
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
458
458
|
**kwargs,
|
|
459
459
|
) -> TimeSeriesDataFrame:
|
|
460
460
|
if self.gts_predictor is None:
|
|
@@ -471,8 +471,8 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
471
471
|
def _predict_gluonts_forecasts(
|
|
472
472
|
self,
|
|
473
473
|
data: TimeSeriesDataFrame,
|
|
474
|
-
known_covariates:
|
|
475
|
-
num_samples:
|
|
474
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
475
|
+
num_samples: int | None = None,
|
|
476
476
|
) -> list[Forecast]:
|
|
477
477
|
assert self.gts_predictor is not None, "GluonTS models must be fit before predicting."
|
|
478
478
|
gts_data = self._to_gluonts_dataset(data, known_covariates=known_covariates)
|
|
@@ -566,7 +566,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
566
566
|
) -> TimeSeriesDataFrame:
|
|
567
567
|
from gluonts.torch.model.forecast import DistributionForecast
|
|
568
568
|
|
|
569
|
-
item_ids = forecast_index.unique(level=ITEMID)
|
|
569
|
+
item_ids = forecast_index.unique(level=TimeSeriesDataFrame.ITEMID)
|
|
570
570
|
if isinstance(forecasts[0], SampleForecast):
|
|
571
571
|
forecast_df = self._stack_sample_forecasts(cast(list[SampleForecast], forecasts), item_ids)
|
|
572
572
|
elif isinstance(forecasts[0], QuantileForecast):
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from typing import Any, Iterator,
|
|
1
|
+
from typing import Any, Iterator, Type
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from gluonts.dataset.common import Dataset as GluonTSDataset
|
|
6
6
|
from gluonts.dataset.field_names import FieldName
|
|
7
7
|
|
|
8
|
-
from autogluon.timeseries.dataset
|
|
8
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
9
9
|
from autogluon.timeseries.utils.datetime import norm_freq_str
|
|
10
10
|
|
|
11
11
|
|
|
@@ -17,14 +17,14 @@ class SimpleGluonTSDataset(GluonTSDataset):
|
|
|
17
17
|
target_df: TimeSeriesDataFrame,
|
|
18
18
|
freq: str,
|
|
19
19
|
target_column: str = "target",
|
|
20
|
-
feat_static_cat:
|
|
21
|
-
feat_static_real:
|
|
22
|
-
feat_dynamic_cat:
|
|
23
|
-
feat_dynamic_real:
|
|
24
|
-
past_feat_dynamic_cat:
|
|
25
|
-
past_feat_dynamic_real:
|
|
20
|
+
feat_static_cat: np.ndarray | None = None,
|
|
21
|
+
feat_static_real: np.ndarray | None = None,
|
|
22
|
+
feat_dynamic_cat: np.ndarray | None = None,
|
|
23
|
+
feat_dynamic_real: np.ndarray | None = None,
|
|
24
|
+
past_feat_dynamic_cat: np.ndarray | None = None,
|
|
25
|
+
past_feat_dynamic_real: np.ndarray | None = None,
|
|
26
26
|
includes_future: bool = False,
|
|
27
|
-
prediction_length:
|
|
27
|
+
prediction_length: int | None = None,
|
|
28
28
|
):
|
|
29
29
|
assert target_df is not None
|
|
30
30
|
# Convert TimeSeriesDataFrame to pd.Series for faster processing
|
|
@@ -44,11 +44,11 @@ class SimpleGluonTSDataset(GluonTSDataset):
|
|
|
44
44
|
# Replace inefficient groupby ITEMID with indptr that stores start:end of each time series
|
|
45
45
|
self.item_ids = target_df.item_ids
|
|
46
46
|
self.indptr = target_df.get_indptr()
|
|
47
|
-
self.start_timestamps = target_df.index[self.indptr[:-1]].to_frame(index=False)[TIMESTAMP]
|
|
47
|
+
self.start_timestamps = target_df.index[self.indptr[:-1]].to_frame(index=False)[TimeSeriesDataFrame.TIMESTAMP]
|
|
48
48
|
assert len(self.item_ids) == len(self.start_timestamps)
|
|
49
49
|
|
|
50
50
|
@staticmethod
|
|
51
|
-
def _astype(array:
|
|
51
|
+
def _astype(array: np.ndarray | None, dtype: Type[np.generic]) -> np.ndarray | None:
|
|
52
52
|
if array is None:
|
|
53
53
|
return None
|
|
54
54
|
else:
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import joblib.externals.loky
|
|
2
|
-
|
|
3
1
|
from .naive import AverageModel, NaiveModel, SeasonalAverageModel, SeasonalNaiveModel
|
|
4
2
|
from .npts import NPTSModel
|
|
5
3
|
from .statsforecast import (
|
|
@@ -15,8 +13,3 @@ from .statsforecast import (
|
|
|
15
13
|
ThetaModel,
|
|
16
14
|
ZeroModel,
|
|
17
15
|
)
|
|
18
|
-
|
|
19
|
-
# By default, joblib w/ loky backend kills processes that take >300MB of RAM assuming that this is caused by a memory
|
|
20
|
-
# leak. This leads to problems for some memory-hungry models like AutoARIMA/Theta.
|
|
21
|
-
# This monkey patch removes this undesired behavior
|
|
22
|
-
joblib.externals.loky.process_executor._MAX_MEMORY_LEAK_SIZE = int(3e10)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
3
|
from multiprocessing import TimeoutError
|
|
4
|
-
from typing import Any, Callable
|
|
4
|
+
from typing import Any, Callable
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
@@ -9,19 +9,16 @@ from joblib import Parallel, cpu_count, delayed
|
|
|
9
9
|
from scipy.stats import norm
|
|
10
10
|
|
|
11
11
|
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
|
12
|
-
from autogluon.timeseries.dataset
|
|
12
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
13
13
|
from autogluon.timeseries.metrics import TimeSeriesScorer
|
|
14
14
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
15
|
+
from autogluon.timeseries.utils.constants import AG_DEFAULT_N_JOBS
|
|
15
16
|
from autogluon.timeseries.utils.datetime import get_seasonality
|
|
16
17
|
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
# We use the same default n_jobs across AG-TS to ensure that Joblib reuses the process pool
|
|
22
|
-
AG_DEFAULT_N_JOBS = max(cpu_count(only_physical_cores=True), 1)
|
|
23
|
-
|
|
24
|
-
|
|
25
22
|
class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
26
23
|
"""Abstract class for local forecasting models that are trained separately for each time series.
|
|
27
24
|
|
|
@@ -40,18 +37,18 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
40
37
|
"""
|
|
41
38
|
|
|
42
39
|
allowed_local_model_args: list[str] = []
|
|
43
|
-
default_max_ts_length:
|
|
40
|
+
default_max_ts_length: int | None = 2500
|
|
44
41
|
default_max_time_limit_ratio = 1.0
|
|
45
42
|
init_time_in_seconds: int = 0
|
|
46
43
|
|
|
47
44
|
def __init__(
|
|
48
45
|
self,
|
|
49
|
-
freq:
|
|
46
|
+
freq: str | None = None,
|
|
50
47
|
prediction_length: int = 1,
|
|
51
|
-
path:
|
|
52
|
-
name:
|
|
53
|
-
eval_metric:
|
|
54
|
-
hyperparameters:
|
|
48
|
+
path: str | None = None,
|
|
49
|
+
name: str | None = None,
|
|
50
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
51
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
55
52
|
**kwargs, # noqa
|
|
56
53
|
):
|
|
57
54
|
super().__init__(
|
|
@@ -79,10 +76,10 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
79
76
|
def preprocess(
|
|
80
77
|
self,
|
|
81
78
|
data: TimeSeriesDataFrame,
|
|
82
|
-
known_covariates:
|
|
79
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
83
80
|
is_train: bool = False,
|
|
84
81
|
**kwargs,
|
|
85
|
-
) -> tuple[TimeSeriesDataFrame,
|
|
82
|
+
) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
|
|
86
83
|
if not self._get_tags()["allow_nan"]:
|
|
87
84
|
data = data.fill_missing_values()
|
|
88
85
|
return data, known_covariates
|
|
@@ -95,7 +92,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
95
92
|
}
|
|
96
93
|
|
|
97
94
|
@staticmethod
|
|
98
|
-
def _compute_n_jobs(n_jobs:
|
|
95
|
+
def _compute_n_jobs(n_jobs: int | float) -> int:
|
|
99
96
|
if isinstance(n_jobs, float) and 0 < n_jobs <= 1:
|
|
100
97
|
return max(int(cpu_count() * n_jobs), 1)
|
|
101
98
|
elif isinstance(n_jobs, int):
|
|
@@ -103,7 +100,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
103
100
|
else:
|
|
104
101
|
raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
|
|
105
102
|
|
|
106
|
-
def _fit(self, train_data: TimeSeriesDataFrame, time_limit:
|
|
103
|
+
def _fit(self, train_data: TimeSeriesDataFrame, time_limit: int | None = None, **kwargs):
|
|
107
104
|
self._check_fit_params()
|
|
108
105
|
|
|
109
106
|
if time_limit is not None and time_limit < self.init_time_in_seconds:
|
|
@@ -145,7 +142,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
145
142
|
data = data.slice_by_timestep(-max_ts_length, None)
|
|
146
143
|
|
|
147
144
|
indptr = data.get_indptr()
|
|
148
|
-
target_series = data[self.target].droplevel(level=ITEMID)
|
|
145
|
+
target_series = data[self.target].droplevel(level=TimeSeriesDataFrame.ITEMID)
|
|
149
146
|
all_series = (target_series[indptr[i] : indptr[i + 1]] for i in range(len(indptr) - 1))
|
|
150
147
|
|
|
151
148
|
# timeout ensures that no individual job takes longer than time_limit
|
|
@@ -184,7 +181,7 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
184
181
|
self,
|
|
185
182
|
time_series: pd.Series,
|
|
186
183
|
use_fallback_model: bool,
|
|
187
|
-
end_time:
|
|
184
|
+
end_time: float | None = None,
|
|
188
185
|
) -> tuple[pd.DataFrame, bool]:
|
|
189
186
|
if end_time is not None and time.time() >= end_time:
|
|
190
187
|
raise TimeLimitExceeded
|
|
@@ -96,7 +96,7 @@ class AverageModel(AbstractLocalModel):
|
|
|
96
96
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
97
97
|
When set to a positive integer, that many cores are used.
|
|
98
98
|
When set to -1, all CPU cores are used.
|
|
99
|
-
max_ts_length :
|
|
99
|
+
max_ts_length : int | None, default = None
|
|
100
100
|
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
|
101
101
|
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
102
102
|
"""
|
|
@@ -136,7 +136,7 @@ class SeasonalAverageModel(AbstractLocalModel):
|
|
|
136
136
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
137
137
|
When set to a positive integer, that many cores are used.
|
|
138
138
|
When set to -1, all CPU cores are used.
|
|
139
|
-
max_ts_length :
|
|
139
|
+
max_ts_length : int | None, default = None
|
|
140
140
|
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
|
141
141
|
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
142
142
|
"""
|
|
@@ -31,7 +31,7 @@ class NPTSModel(AbstractLocalModel):
|
|
|
31
31
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
32
32
|
When set to a positive integer, that many cores are used.
|
|
33
33
|
When set to -1, all CPU cores are used.
|
|
34
|
-
max_ts_length :
|
|
34
|
+
max_ts_length : int | None, default = 2500
|
|
35
35
|
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
|
36
36
|
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
37
37
|
"""
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any,
|
|
2
|
+
from typing import Any, Type
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
@@ -19,7 +19,7 @@ class AbstractStatsForecastModel(AbstractLocalModel):
|
|
|
19
19
|
local_model_args["season_length"] = seasonal_period
|
|
20
20
|
return local_model_args
|
|
21
21
|
|
|
22
|
-
def _get_model_type(self, variant:
|
|
22
|
+
def _get_model_type(self, variant: str | None = None) -> Type:
|
|
23
23
|
raise NotImplementedError
|
|
24
24
|
|
|
25
25
|
def _get_local_model(self, local_model_args: dict):
|
|
@@ -162,7 +162,7 @@ class AutoARIMAModel(AbstractProbabilisticStatsForecastModel):
|
|
|
162
162
|
local_model_args.setdefault("allowmean", True)
|
|
163
163
|
return local_model_args
|
|
164
164
|
|
|
165
|
-
def _get_model_type(self, variant:
|
|
165
|
+
def _get_model_type(self, variant: str | None = None):
|
|
166
166
|
from statsforecast.models import AutoARIMA
|
|
167
167
|
|
|
168
168
|
return AutoARIMA
|
|
@@ -232,7 +232,7 @@ class ARIMAModel(AbstractProbabilisticStatsForecastModel):
|
|
|
232
232
|
local_model_args.setdefault("order", (1, 1, 1))
|
|
233
233
|
return local_model_args
|
|
234
234
|
|
|
235
|
-
def _get_model_type(self, variant:
|
|
235
|
+
def _get_model_type(self, variant: str | None = None):
|
|
236
236
|
from statsforecast.models import ARIMA
|
|
237
237
|
|
|
238
238
|
return ARIMA
|
|
@@ -277,7 +277,7 @@ class AutoETSModel(AbstractProbabilisticStatsForecastModel):
|
|
|
277
277
|
"seasonal_period",
|
|
278
278
|
]
|
|
279
279
|
|
|
280
|
-
def _get_model_type(self, variant:
|
|
280
|
+
def _get_model_type(self, variant: str | None = None):
|
|
281
281
|
from statsforecast.models import AutoETS
|
|
282
282
|
|
|
283
283
|
return AutoETS
|
|
@@ -380,7 +380,7 @@ class DynamicOptimizedThetaModel(AbstractProbabilisticStatsForecastModel):
|
|
|
380
380
|
"seasonal_period",
|
|
381
381
|
]
|
|
382
382
|
|
|
383
|
-
def _get_model_type(self, variant:
|
|
383
|
+
def _get_model_type(self, variant: str | None = None):
|
|
384
384
|
from statsforecast.models import DynamicOptimizedTheta
|
|
385
385
|
|
|
386
386
|
return DynamicOptimizedTheta
|
|
@@ -425,7 +425,7 @@ class ThetaModel(AbstractProbabilisticStatsForecastModel):
|
|
|
425
425
|
"seasonal_period",
|
|
426
426
|
]
|
|
427
427
|
|
|
428
|
-
def _get_model_type(self, variant:
|
|
428
|
+
def _get_model_type(self, variant: str | None = None):
|
|
429
429
|
from statsforecast.models import Theta
|
|
430
430
|
|
|
431
431
|
return Theta
|
|
@@ -546,7 +546,7 @@ class AutoCESModel(AbstractProbabilisticStatsForecastModel):
|
|
|
546
546
|
"seasonal_period",
|
|
547
547
|
]
|
|
548
548
|
|
|
549
|
-
def _get_model_type(self, variant:
|
|
549
|
+
def _get_model_type(self, variant: str | None = None):
|
|
550
550
|
from statsforecast.models import AutoCES
|
|
551
551
|
|
|
552
552
|
return AutoCES
|
|
@@ -610,7 +610,7 @@ class ADIDAModel(AbstractStatsForecastIntermittentDemandModel):
|
|
|
610
610
|
|
|
611
611
|
ag_priority = 10
|
|
612
612
|
|
|
613
|
-
def _get_model_type(self, variant:
|
|
613
|
+
def _get_model_type(self, variant: str | None = None):
|
|
614
614
|
from statsforecast.models import ADIDA
|
|
615
615
|
|
|
616
616
|
return ADIDA
|
|
@@ -652,7 +652,7 @@ class CrostonModel(AbstractStatsForecastIntermittentDemandModel):
|
|
|
652
652
|
"variant",
|
|
653
653
|
]
|
|
654
654
|
|
|
655
|
-
def _get_model_type(self, variant:
|
|
655
|
+
def _get_model_type(self, variant: str | None = None):
|
|
656
656
|
from statsforecast.models import CrostonClassic, CrostonOptimized, CrostonSBA
|
|
657
657
|
|
|
658
658
|
model_variants = {
|
|
@@ -702,7 +702,7 @@ class IMAPAModel(AbstractStatsForecastIntermittentDemandModel):
|
|
|
702
702
|
|
|
703
703
|
ag_priority = 10
|
|
704
704
|
|
|
705
|
-
def _get_model_type(self, variant:
|
|
705
|
+
def _get_model_type(self, variant: str | None = None):
|
|
706
706
|
from statsforecast.models import IMAPA
|
|
707
707
|
|
|
708
708
|
return IMAPA
|
|
@@ -726,7 +726,7 @@ class ZeroModel(AbstractStatsForecastIntermittentDemandModel):
|
|
|
726
726
|
|
|
727
727
|
ag_priority = 100
|
|
728
728
|
|
|
729
|
-
def _get_model_type(self, variant:
|
|
729
|
+
def _get_model_type(self, variant: str | None = None):
|
|
730
730
|
# ZeroModel does not depend on a StatsForecast implementation
|
|
731
731
|
raise NotImplementedError
|
|
732
732
|
|