autogluon.timeseries 1.0.1b20240304__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/__init__.py +3 -2
- autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
- autogluon/timeseries/configs/predictor_presets.py +84 -0
- autogluon/timeseries/dataset/ts_dataframe.py +339 -186
- autogluon/timeseries/learner.py +192 -60
- autogluon/timeseries/metrics/__init__.py +55 -11
- autogluon/timeseries/metrics/abstract.py +96 -25
- autogluon/timeseries/metrics/point.py +186 -39
- autogluon/timeseries/metrics/quantile.py +47 -20
- autogluon/timeseries/metrics/utils.py +6 -6
- autogluon/timeseries/models/__init__.py +13 -7
- autogluon/timeseries/models/abstract/__init__.py +2 -2
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +533 -273
- autogluon/timeseries/models/abstract/model_trial.py +10 -10
- autogluon/timeseries/models/abstract/tunable.py +189 -0
- autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +369 -215
- autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
- autogluon/timeseries/models/autogluon_tabular/transforms.py +67 -0
- autogluon/timeseries/models/autogluon_tabular/utils.py +3 -51
- autogluon/timeseries/models/chronos/__init__.py +4 -0
- autogluon/timeseries/models/chronos/chronos2.py +361 -0
- autogluon/timeseries/models/chronos/model.py +738 -0
- autogluon/timeseries/models/chronos/utils.py +369 -0
- autogluon/timeseries/models/ensemble/__init__.py +35 -2
- autogluon/timeseries/models/ensemble/{abstract_timeseries_ensemble.py → abstract.py} +50 -26
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +236 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +73 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +167 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
- autogluon/timeseries/models/ensemble/per_item_greedy.py +162 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +40 -0
- autogluon/timeseries/models/ensemble/weighted/basic.py +78 -0
- autogluon/timeseries/models/ensemble/weighted/greedy.py +57 -0
- autogluon/timeseries/models/gluonts/__init__.py +3 -1
- autogluon/timeseries/models/gluonts/abstract.py +583 -0
- autogluon/timeseries/models/gluonts/dataset.py +109 -0
- autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +185 -44
- autogluon/timeseries/models/local/__init__.py +1 -10
- autogluon/timeseries/models/local/abstract_local_model.py +150 -97
- autogluon/timeseries/models/local/naive.py +31 -23
- autogluon/timeseries/models/local/npts.py +6 -2
- autogluon/timeseries/models/local/statsforecast.py +99 -112
- autogluon/timeseries/models/multi_window/multi_window_model.py +99 -40
- autogluon/timeseries/models/registry.py +64 -0
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +118 -0
- autogluon/timeseries/models/toto/model.py +236 -0
- autogluon/timeseries/predictor.py +826 -305
- autogluon/timeseries/regressor.py +253 -0
- autogluon/timeseries/splitter.py +10 -31
- autogluon/timeseries/trainer/__init__.py +2 -3
- autogluon/timeseries/trainer/ensemble_composer.py +439 -0
- autogluon/timeseries/trainer/model_set_builder.py +256 -0
- autogluon/timeseries/trainer/prediction_cache.py +149 -0
- autogluon/timeseries/trainer/trainer.py +1298 -0
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/__init__.py +2 -0
- autogluon/timeseries/transforms/covariate_scaler.py +164 -0
- autogluon/timeseries/transforms/target_scaler.py +149 -0
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/base.py +38 -20
- autogluon/timeseries/utils/datetime/lags.py +18 -16
- autogluon/timeseries/utils/datetime/seasonality.py +14 -14
- autogluon/timeseries/utils/datetime/time_features.py +17 -14
- autogluon/timeseries/utils/features.py +317 -53
- autogluon/timeseries/utils/forecast.py +31 -17
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/utils/warning_filters.py +44 -6
- autogluon/timeseries/version.py +2 -1
- autogluon.timeseries-1.4.1b20251210-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/METADATA +71 -47
- autogluon_timeseries-1.4.1b20251210.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/WHEEL +1 -1
- autogluon/timeseries/configs/presets_configs.py +0 -11
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
- autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -550
- autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- autogluon/timeseries/models/presets.py +0 -325
- autogluon/timeseries/trainer/abstract_trainer.py +0 -1144
- autogluon/timeseries/trainer/auto_trainer.py +0 -74
- autogluon.timeseries-1.0.1b20240304-py3.8-nspkg.pth +0 -1
- autogluon.timeseries-1.0.1b20240304.dist-info/RECORD +0 -58
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/zip-safe +0 -0
|
@@ -1,27 +1,24 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
|
-
from multiprocessing import TimeoutError
|
|
4
|
-
from typing import Any,
|
|
3
|
+
from multiprocessing import TimeoutError
|
|
4
|
+
from typing import Any, Callable
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
8
|
-
from joblib import Parallel, delayed
|
|
8
|
+
from joblib import Parallel, cpu_count, delayed
|
|
9
9
|
from scipy.stats import norm
|
|
10
10
|
|
|
11
11
|
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
|
12
|
-
from autogluon.timeseries.dataset
|
|
12
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
13
|
+
from autogluon.timeseries.metrics import TimeSeriesScorer
|
|
13
14
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
|
15
|
+
from autogluon.timeseries.utils.constants import AG_DEFAULT_N_JOBS
|
|
14
16
|
from autogluon.timeseries.utils.datetime import get_seasonality
|
|
15
|
-
from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
|
|
16
17
|
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
# We use the same default n_jobs across AG-TS to ensure that Joblib reuses the process pool
|
|
22
|
-
AG_DEFAULT_N_JOBS = max(int(cpu_count() * 0.5), 1)
|
|
23
|
-
|
|
24
|
-
|
|
25
22
|
class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
26
23
|
"""Abstract class for local forecasting models that are trained separately for each time series.
|
|
27
24
|
|
|
@@ -29,49 +26,31 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
29
26
|
|
|
30
27
|
Attributes
|
|
31
28
|
----------
|
|
32
|
-
allowed_local_model_args
|
|
29
|
+
allowed_local_model_args
|
|
33
30
|
Argument that can be passed to the underlying local model.
|
|
34
|
-
|
|
35
|
-
Default number of CPU cores used to train models. If float, this fraction of CPU cores will be used.
|
|
36
|
-
default_max_ts_length : Optional[int]
|
|
31
|
+
default_max_ts_length
|
|
37
32
|
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
|
38
33
|
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
39
|
-
init_time_in_seconds
|
|
34
|
+
init_time_in_seconds
|
|
40
35
|
Time that it takes to initialize the model in seconds (e.g., because of JIT compilation by Numba).
|
|
41
36
|
If time_limit is below this number, model won't be trained.
|
|
42
37
|
"""
|
|
43
38
|
|
|
44
|
-
allowed_local_model_args:
|
|
45
|
-
|
|
46
|
-
|
|
39
|
+
allowed_local_model_args: list[str] = []
|
|
40
|
+
default_max_ts_length: int | None = 2500
|
|
41
|
+
default_max_time_limit_ratio = 1.0
|
|
47
42
|
init_time_in_seconds: int = 0
|
|
48
43
|
|
|
49
44
|
def __init__(
|
|
50
45
|
self,
|
|
51
|
-
freq:
|
|
46
|
+
freq: str | None = None,
|
|
52
47
|
prediction_length: int = 1,
|
|
53
|
-
path:
|
|
54
|
-
name:
|
|
55
|
-
eval_metric: str = None,
|
|
56
|
-
hyperparameters:
|
|
48
|
+
path: str | None = None,
|
|
49
|
+
name: str | None = None,
|
|
50
|
+
eval_metric: str | TimeSeriesScorer | None = None,
|
|
51
|
+
hyperparameters: dict[str, Any] | None = None,
|
|
57
52
|
**kwargs, # noqa
|
|
58
53
|
):
|
|
59
|
-
if hyperparameters is None:
|
|
60
|
-
hyperparameters = {}
|
|
61
|
-
else:
|
|
62
|
-
hyperparameters = hyperparameters.copy()
|
|
63
|
-
# TODO: Replace with 'num_cpus' argument passed to fit (after predictor API is changed)
|
|
64
|
-
n_jobs = hyperparameters.pop("n_jobs", self.default_n_jobs)
|
|
65
|
-
if isinstance(n_jobs, float) and 0 < n_jobs <= 1:
|
|
66
|
-
self.n_jobs = max(int(cpu_count() * n_jobs), 1)
|
|
67
|
-
elif isinstance(n_jobs, int):
|
|
68
|
-
self.n_jobs = n_jobs
|
|
69
|
-
else:
|
|
70
|
-
raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
|
|
71
|
-
# Default values, potentially overridden inside _fit()
|
|
72
|
-
self.use_fallback_model = hyperparameters.pop("use_fallback_model", True)
|
|
73
|
-
self.max_ts_length = hyperparameters.pop("max_ts_length", self.default_max_ts_length)
|
|
74
|
-
|
|
75
54
|
super().__init__(
|
|
76
55
|
path=path,
|
|
77
56
|
freq=freq,
|
|
@@ -82,63 +61,107 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
82
61
|
**kwargs,
|
|
83
62
|
)
|
|
84
63
|
|
|
85
|
-
self._local_model_args:
|
|
86
|
-
self._seasonal_period:
|
|
87
|
-
self.
|
|
64
|
+
self._local_model_args: dict[str, Any]
|
|
65
|
+
self._seasonal_period: int
|
|
66
|
+
self._dummy_forecast: pd.DataFrame
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def allowed_hyperparameters(self) -> list[str]:
|
|
70
|
+
return (
|
|
71
|
+
super().allowed_hyperparameters
|
|
72
|
+
+ ["use_fallback_model", "max_ts_length", "n_jobs"]
|
|
73
|
+
+ self.allowed_local_model_args
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def preprocess(
|
|
77
|
+
self,
|
|
78
|
+
data: TimeSeriesDataFrame,
|
|
79
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
|
80
|
+
is_train: bool = False,
|
|
81
|
+
**kwargs,
|
|
82
|
+
) -> tuple[TimeSeriesDataFrame, TimeSeriesDataFrame | None]:
|
|
83
|
+
if not self._get_tags()["allow_nan"]:
|
|
84
|
+
data = data.fill_missing_values()
|
|
85
|
+
return data, known_covariates
|
|
86
|
+
|
|
87
|
+
def _get_default_hyperparameters(self) -> dict:
|
|
88
|
+
return {
|
|
89
|
+
"n_jobs": AG_DEFAULT_N_JOBS,
|
|
90
|
+
"use_fallback_model": True,
|
|
91
|
+
"max_ts_length": self.default_max_ts_length,
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def _compute_n_jobs(n_jobs: int | float) -> int:
|
|
96
|
+
if isinstance(n_jobs, float) and 0 < n_jobs <= 1:
|
|
97
|
+
return max(int(cpu_count() * n_jobs), 1)
|
|
98
|
+
elif isinstance(n_jobs, int):
|
|
99
|
+
return n_jobs
|
|
100
|
+
else:
|
|
101
|
+
raise ValueError(f"n_jobs must be a float between 0 and 1 or an integer (received n_jobs = {n_jobs})")
|
|
88
102
|
|
|
89
|
-
def _fit(self, train_data: TimeSeriesDataFrame, time_limit:
|
|
103
|
+
def _fit(self, train_data: TimeSeriesDataFrame, time_limit: int | None = None, **kwargs):
|
|
90
104
|
self._check_fit_params()
|
|
91
105
|
|
|
92
106
|
if time_limit is not None and time_limit < self.init_time_in_seconds:
|
|
93
107
|
raise TimeLimitExceeded
|
|
94
108
|
|
|
95
|
-
# Initialize parameters passed to each local model
|
|
96
|
-
raw_local_model_args = self._get_model_params().copy()
|
|
97
|
-
|
|
98
|
-
unused_local_model_args = []
|
|
99
109
|
local_model_args = {}
|
|
100
|
-
for key, value in
|
|
110
|
+
for key, value in self.get_hyperparameters().items():
|
|
101
111
|
if key in self.allowed_local_model_args:
|
|
102
112
|
local_model_args[key] = value
|
|
103
|
-
else:
|
|
104
|
-
unused_local_model_args.append(key)
|
|
105
113
|
|
|
106
|
-
|
|
107
|
-
logger.warning(
|
|
108
|
-
f"{self.name} ignores following hyperparameters: {unused_local_model_args}. "
|
|
109
|
-
f"See the docstring of {self.name} for the list of supported hyperparameters."
|
|
110
|
-
)
|
|
114
|
+
self._log_unused_hyperparameters(extra_allowed_hyperparameters=self.allowed_local_model_args)
|
|
111
115
|
|
|
112
116
|
if "seasonal_period" not in local_model_args or local_model_args["seasonal_period"] is None:
|
|
113
|
-
local_model_args["seasonal_period"] = get_seasonality(
|
|
117
|
+
local_model_args["seasonal_period"] = get_seasonality(self.freq)
|
|
114
118
|
self._seasonal_period = local_model_args["seasonal_period"]
|
|
115
119
|
|
|
116
120
|
self._local_model_args = self._update_local_model_args(local_model_args=local_model_args)
|
|
117
|
-
|
|
121
|
+
|
|
122
|
+
self._dummy_forecast = self._get_dummy_forecast(train_data)
|
|
118
123
|
return self
|
|
119
124
|
|
|
120
|
-
def
|
|
125
|
+
def _get_dummy_forecast(self, train_data: TimeSeriesDataFrame, max_num_rows: int = 20_000) -> pd.DataFrame:
|
|
126
|
+
agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
|
|
127
|
+
target_series = train_data[self.target]
|
|
128
|
+
if len(target_series) > max_num_rows:
|
|
129
|
+
target_series = target_series.sample(max_num_rows, replace=True)
|
|
130
|
+
stats_marginal = target_series.agg(agg_functions)
|
|
131
|
+
stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
|
|
132
|
+
return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
|
|
133
|
+
|
|
134
|
+
def _update_local_model_args(self, local_model_args: dict[str, Any]) -> dict[str, Any]:
|
|
121
135
|
return local_model_args
|
|
122
136
|
|
|
123
137
|
def _predict(self, data: TimeSeriesDataFrame, **kwargs) -> TimeSeriesDataFrame:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
138
|
+
model_params = self.get_hyperparameters()
|
|
139
|
+
max_ts_length = model_params["max_ts_length"]
|
|
140
|
+
if max_ts_length is not None:
|
|
141
|
+
logger.debug(f"Shortening all time series to at most {max_ts_length}")
|
|
142
|
+
data = data.slice_by_timestep(-max_ts_length, None)
|
|
127
143
|
|
|
128
|
-
|
|
129
|
-
|
|
144
|
+
indptr = data.get_indptr()
|
|
145
|
+
target_series = data[self.target].droplevel(level=TimeSeriesDataFrame.ITEMID)
|
|
146
|
+
all_series = (target_series[indptr[i] : indptr[i + 1]] for i in range(len(indptr) - 1))
|
|
130
147
|
|
|
131
148
|
# timeout ensures that no individual job takes longer than time_limit
|
|
132
149
|
# TODO: a job started late may still exceed time_limit - how to prevent that?
|
|
133
|
-
|
|
150
|
+
time_limit = kwargs.get("time_limit")
|
|
151
|
+
# TODO: Take into account num_cpus once the TimeSeriesPredictor API is updated
|
|
152
|
+
n_jobs = self._compute_n_jobs(model_params["n_jobs"])
|
|
153
|
+
timeout = None if n_jobs == 1 else time_limit
|
|
134
154
|
# end_time ensures that no new jobs are started after time_limit is exceeded
|
|
135
|
-
end_time = None if
|
|
136
|
-
executor = Parallel(
|
|
155
|
+
end_time = None if time_limit is None else time.time() + time_limit
|
|
156
|
+
executor = Parallel(n_jobs=n_jobs, timeout=timeout)
|
|
137
157
|
|
|
138
158
|
try:
|
|
139
159
|
with warning_filter():
|
|
140
160
|
predictions_with_flags = executor(
|
|
141
|
-
delayed(self._predict_wrapper)(
|
|
161
|
+
delayed(self._predict_wrapper)(
|
|
162
|
+
ts, use_fallback_model=model_params["use_fallback_model"], end_time=end_time
|
|
163
|
+
)
|
|
164
|
+
for ts in all_series
|
|
142
165
|
)
|
|
143
166
|
except TimeoutError:
|
|
144
167
|
raise TimeLimitExceeded
|
|
@@ -151,38 +174,40 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
151
174
|
f"({fraction_failed_models:.1%}). Fallback model SeasonalNaive was used for these time series."
|
|
152
175
|
)
|
|
153
176
|
predictions_df = pd.concat([pred for pred, _ in predictions_with_flags])
|
|
154
|
-
predictions_df.index =
|
|
177
|
+
predictions_df.index = self.get_forecast_horizon_index(data)
|
|
155
178
|
return TimeSeriesDataFrame(predictions_df)
|
|
156
179
|
|
|
157
|
-
def
|
|
158
|
-
self,
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def _predict_wrapper(self, time_series: pd.Series, end_time: Optional[float] = None) -> Tuple[pd.DataFrame, bool]:
|
|
180
|
+
def _predict_wrapper(
|
|
181
|
+
self,
|
|
182
|
+
time_series: pd.Series,
|
|
183
|
+
use_fallback_model: bool,
|
|
184
|
+
end_time: float | None = None,
|
|
185
|
+
) -> tuple[pd.DataFrame, bool]:
|
|
165
186
|
if end_time is not None and time.time() >= end_time:
|
|
166
187
|
raise TimeLimitExceeded
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
if self.use_fallback_model:
|
|
177
|
-
result = seasonal_naive_forecast(
|
|
178
|
-
target=time_series.values.ravel(),
|
|
179
|
-
prediction_length=self.prediction_length,
|
|
180
|
-
quantile_levels=self.quantile_levels,
|
|
181
|
-
seasonal_period=self._seasonal_period,
|
|
188
|
+
|
|
189
|
+
model_failed = False
|
|
190
|
+
if time_series.isna().all():
|
|
191
|
+
result = self._dummy_forecast.copy()
|
|
192
|
+
else:
|
|
193
|
+
try:
|
|
194
|
+
result = self._predict_with_local_model(
|
|
195
|
+
time_series=time_series,
|
|
196
|
+
local_model_args=self._local_model_args.copy(),
|
|
182
197
|
)
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
198
|
+
if not np.isfinite(result.values).all():
|
|
199
|
+
raise RuntimeError("Forecast contains NaN or Inf values.")
|
|
200
|
+
except Exception:
|
|
201
|
+
if use_fallback_model:
|
|
202
|
+
result = seasonal_naive_forecast(
|
|
203
|
+
target=time_series.values.ravel(),
|
|
204
|
+
prediction_length=self.prediction_length,
|
|
205
|
+
quantile_levels=self.quantile_levels,
|
|
206
|
+
seasonal_period=self._seasonal_period,
|
|
207
|
+
)
|
|
208
|
+
model_failed = True
|
|
209
|
+
else:
|
|
210
|
+
raise
|
|
186
211
|
return result, model_failed
|
|
187
212
|
|
|
188
213
|
def _predict_with_local_model(
|
|
@@ -194,28 +219,56 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
194
219
|
|
|
195
220
|
|
|
196
221
|
def seasonal_naive_forecast(
|
|
197
|
-
target: np.ndarray, prediction_length: int, quantile_levels:
|
|
222
|
+
target: np.ndarray, prediction_length: int, quantile_levels: list[float], seasonal_period: int
|
|
198
223
|
) -> pd.DataFrame:
|
|
199
224
|
"""Generate seasonal naive forecast, predicting the last observed value from the same period."""
|
|
225
|
+
|
|
226
|
+
def numpy_fillna(arr: np.ndarray) -> np.ndarray:
|
|
227
|
+
"""Fast implementation of forward fill + avg fill in numpy."""
|
|
228
|
+
# First apply forward fill
|
|
229
|
+
idx = np.arange(len(arr))
|
|
230
|
+
mask = np.isnan(arr)
|
|
231
|
+
idx[mask] = 0
|
|
232
|
+
arr_filled = arr[np.maximum.accumulate(idx)]
|
|
233
|
+
# Leading NaNs are filled with the mean
|
|
234
|
+
arr_filled[np.isnan(arr_filled)] = np.nanmean(arr_filled)
|
|
235
|
+
return arr_filled
|
|
236
|
+
|
|
200
237
|
forecast = {}
|
|
201
238
|
# At least seasonal_period + 2 values are required to compute sigma for seasonal naive
|
|
202
239
|
if len(target) > seasonal_period + 1 and seasonal_period > 1:
|
|
240
|
+
if np.isnan(target[-(seasonal_period + 2) :]).any():
|
|
241
|
+
target = numpy_fillna(target)
|
|
242
|
+
|
|
203
243
|
indices = [len(target) - seasonal_period + k % seasonal_period for k in range(prediction_length)]
|
|
204
244
|
forecast["mean"] = target[indices]
|
|
205
245
|
residuals = target[seasonal_period:] - target[:-seasonal_period]
|
|
206
246
|
|
|
207
|
-
sigma = np.sqrt(np.
|
|
247
|
+
sigma = np.sqrt(np.nanmean(np.square(residuals)))
|
|
208
248
|
num_full_seasons = np.arange(1, prediction_length + 1) // seasonal_period
|
|
209
249
|
sigma_per_timestep = sigma * np.sqrt(num_full_seasons + 1)
|
|
210
250
|
else:
|
|
211
251
|
# Fall back to naive forecast
|
|
212
|
-
|
|
252
|
+
last_observed_value = target[np.isfinite(target)][-1]
|
|
253
|
+
forecast["mean"] = np.full(shape=[prediction_length], fill_value=last_observed_value)
|
|
213
254
|
residuals = target[1:] - target[:-1]
|
|
214
255
|
|
|
215
|
-
sigma = np.sqrt(np.
|
|
256
|
+
sigma = np.sqrt(np.nanmean(np.square(residuals)))
|
|
257
|
+
if np.isnan(sigma): # happens if there are no two consecutive non-nan observations
|
|
258
|
+
sigma = 0.0
|
|
216
259
|
sigma_per_timestep = sigma * np.sqrt(np.arange(1, prediction_length + 1))
|
|
217
260
|
|
|
218
261
|
for q in quantile_levels:
|
|
219
262
|
forecast[str(q)] = forecast["mean"] + norm.ppf(q) * sigma_per_timestep
|
|
220
263
|
|
|
221
264
|
return pd.DataFrame(forecast)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def get_quantile_function(q: float) -> Callable:
|
|
268
|
+
"""Returns a function with name "q" that computes the q'th quantile of a pandas.Series."""
|
|
269
|
+
|
|
270
|
+
def quantile_fn(x: pd.Series) -> pd.Series:
|
|
271
|
+
return x.quantile(q)
|
|
272
|
+
|
|
273
|
+
quantile_fn.__name__ = str(q)
|
|
274
|
+
return quantile_fn
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
from typing import Callable
|
|
2
|
-
|
|
3
1
|
import numpy as np
|
|
4
2
|
import pandas as pd
|
|
5
3
|
|
|
6
|
-
from autogluon.timeseries.models.local.abstract_local_model import
|
|
4
|
+
from autogluon.timeseries.models.local.abstract_local_model import (
|
|
5
|
+
AbstractLocalModel,
|
|
6
|
+
get_quantile_function,
|
|
7
|
+
seasonal_naive_forecast,
|
|
8
|
+
)
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
class NaiveModel(AbstractLocalModel):
|
|
@@ -15,13 +17,14 @@ class NaiveModel(AbstractLocalModel):
|
|
|
15
17
|
|
|
16
18
|
Other Parameters
|
|
17
19
|
----------------
|
|
18
|
-
n_jobs : int or float, default =
|
|
20
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
|
19
21
|
Number of CPU cores used to fit the models in parallel.
|
|
20
22
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
21
23
|
When set to a positive integer, that many cores are used.
|
|
22
24
|
When set to -1, all CPU cores are used.
|
|
23
25
|
"""
|
|
24
26
|
|
|
27
|
+
ag_priority = 100
|
|
25
28
|
allowed_local_model_args = ["seasonal_period"]
|
|
26
29
|
|
|
27
30
|
def _predict_with_local_model(
|
|
@@ -36,6 +39,9 @@ class NaiveModel(AbstractLocalModel):
|
|
|
36
39
|
seasonal_period=1,
|
|
37
40
|
)
|
|
38
41
|
|
|
42
|
+
def _more_tags(self) -> dict:
|
|
43
|
+
return {"allow_nan": True}
|
|
44
|
+
|
|
39
45
|
|
|
40
46
|
class SeasonalNaiveModel(AbstractLocalModel):
|
|
41
47
|
"""Baseline model that sets the forecast equal to the last observed value from the same season.
|
|
@@ -54,18 +60,19 @@ class SeasonalNaiveModel(AbstractLocalModel):
|
|
|
54
60
|
specified manually by providing an integer > 1.
|
|
55
61
|
If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
|
|
56
62
|
Seasonality will also be disabled, if the length of the time series is < seasonal_period.
|
|
57
|
-
n_jobs : int or float, default =
|
|
63
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
|
58
64
|
Number of CPU cores used to fit the models in parallel.
|
|
59
65
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
60
66
|
When set to a positive integer, that many cores are used.
|
|
61
67
|
When set to -1, all CPU cores are used.
|
|
62
68
|
"""
|
|
63
69
|
|
|
70
|
+
ag_priority = 100
|
|
64
71
|
allowed_local_model_args = ["seasonal_period"]
|
|
65
72
|
|
|
66
73
|
def _predict_with_local_model(
|
|
67
74
|
self,
|
|
68
|
-
time_series:
|
|
75
|
+
time_series: pd.Series,
|
|
69
76
|
local_model_args: dict,
|
|
70
77
|
) -> pd.DataFrame:
|
|
71
78
|
return seasonal_naive_forecast(
|
|
@@ -75,32 +82,26 @@ class SeasonalNaiveModel(AbstractLocalModel):
|
|
|
75
82
|
seasonal_period=local_model_args["seasonal_period"],
|
|
76
83
|
)
|
|
77
84
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
"""Returns a function with name "q" that computes the q'th quantile of a pandas.Series."""
|
|
81
|
-
|
|
82
|
-
def quantile_fn(x: pd.Series) -> pd.Series:
|
|
83
|
-
return x.quantile(q)
|
|
84
|
-
|
|
85
|
-
quantile_fn.__name__ = str(q)
|
|
86
|
-
return quantile_fn
|
|
85
|
+
def _more_tags(self) -> dict:
|
|
86
|
+
return {"allow_nan": True}
|
|
87
87
|
|
|
88
88
|
|
|
89
89
|
class AverageModel(AbstractLocalModel):
|
|
90
|
-
"""Baseline model that sets the forecast equal to the
|
|
90
|
+
"""Baseline model that sets the forecast equal to the historical average or quantile.
|
|
91
91
|
|
|
92
92
|
Other Parameters
|
|
93
93
|
----------------
|
|
94
|
-
n_jobs : int or float, default =
|
|
94
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
|
95
95
|
Number of CPU cores used to fit the models in parallel.
|
|
96
96
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
97
97
|
When set to a positive integer, that many cores are used.
|
|
98
98
|
When set to -1, all CPU cores are used.
|
|
99
|
-
max_ts_length :
|
|
99
|
+
max_ts_length : int | None, default = None
|
|
100
100
|
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
|
101
101
|
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
102
102
|
"""
|
|
103
103
|
|
|
104
|
+
ag_priority = 100
|
|
104
105
|
allowed_local_model_args = ["seasonal_period"]
|
|
105
106
|
default_max_ts_length = None
|
|
106
107
|
|
|
@@ -109,14 +110,17 @@ class AverageModel(AbstractLocalModel):
|
|
|
109
110
|
time_series: pd.Series,
|
|
110
111
|
local_model_args: dict,
|
|
111
112
|
) -> pd.DataFrame:
|
|
112
|
-
agg_functions = ["mean"] + [
|
|
113
|
+
agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
|
|
113
114
|
stats_marginal = time_series.agg(agg_functions)
|
|
114
115
|
stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
|
|
115
116
|
return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
|
|
116
117
|
|
|
118
|
+
def _more_tags(self) -> dict:
|
|
119
|
+
return {"allow_nan": True}
|
|
120
|
+
|
|
117
121
|
|
|
118
122
|
class SeasonalAverageModel(AbstractLocalModel):
|
|
119
|
-
"""Baseline model that sets the forecast equal to the
|
|
123
|
+
"""Baseline model that sets the forecast equal to the historical average or quantile in the same season.
|
|
120
124
|
|
|
121
125
|
Other Parameters
|
|
122
126
|
----------------
|
|
@@ -127,16 +131,17 @@ class SeasonalAverageModel(AbstractLocalModel):
|
|
|
127
131
|
specified manually by providing an integer > 1.
|
|
128
132
|
If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
|
|
129
133
|
Seasonality will also be disabled, if the length of the time series is < seasonal_period.
|
|
130
|
-
n_jobs : int or float, default =
|
|
134
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
|
131
135
|
Number of CPU cores used to fit the models in parallel.
|
|
132
136
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
133
137
|
When set to a positive integer, that many cores are used.
|
|
134
138
|
When set to -1, all CPU cores are used.
|
|
135
|
-
max_ts_length :
|
|
139
|
+
max_ts_length : int | None, default = None
|
|
136
140
|
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
|
137
141
|
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
138
142
|
"""
|
|
139
143
|
|
|
144
|
+
ag_priority = 100
|
|
140
145
|
allowed_local_model_args = ["seasonal_period"]
|
|
141
146
|
default_max_ts_length = None
|
|
142
147
|
|
|
@@ -146,7 +151,7 @@ class SeasonalAverageModel(AbstractLocalModel):
|
|
|
146
151
|
local_model_args: dict,
|
|
147
152
|
) -> pd.DataFrame:
|
|
148
153
|
seasonal_period = local_model_args["seasonal_period"]
|
|
149
|
-
agg_functions = ["mean"] + [
|
|
154
|
+
agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
|
|
150
155
|
|
|
151
156
|
# Compute mean & quantiles for each season
|
|
152
157
|
ts_df = time_series.reset_index(drop=True).to_frame()
|
|
@@ -162,3 +167,6 @@ class SeasonalAverageModel(AbstractLocalModel):
|
|
|
162
167
|
stats_marginal = time_series.agg(agg_functions)
|
|
163
168
|
result = result.fillna(stats_marginal)
|
|
164
169
|
return result
|
|
170
|
+
|
|
171
|
+
def _more_tags(self) -> dict:
|
|
172
|
+
return {"allow_nan": True}
|
|
@@ -26,16 +26,17 @@ class NPTSModel(AbstractLocalModel):
|
|
|
26
26
|
Number of samples generated by the forecast.
|
|
27
27
|
num_default_time_features : int, default = 1
|
|
28
28
|
Number of time features used by seasonal model.
|
|
29
|
-
n_jobs : int or float, default =
|
|
29
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
|
30
30
|
Number of CPU cores used to fit the models in parallel.
|
|
31
31
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
|
32
32
|
When set to a positive integer, that many cores are used.
|
|
33
33
|
When set to -1, all CPU cores are used.
|
|
34
|
-
max_ts_length :
|
|
34
|
+
max_ts_length : int | None, default = 2500
|
|
35
35
|
If not None, only the last ``max_ts_length`` time steps of each time series will be used to train the model.
|
|
36
36
|
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
|
+
ag_priority = 80
|
|
39
40
|
allowed_local_model_args = [
|
|
40
41
|
"kernel_type",
|
|
41
42
|
"exp_kernel_weights",
|
|
@@ -88,3 +89,6 @@ class NPTSModel(AbstractLocalModel):
|
|
|
88
89
|
for q in self.quantile_levels:
|
|
89
90
|
forecast_dict[str(q)] = forecast.quantile(q)
|
|
90
91
|
return pd.DataFrame(forecast_dict)
|
|
92
|
+
|
|
93
|
+
def _more_tags(self) -> dict:
|
|
94
|
+
return {"allow_nan": True}
|