autogluon.timeseries 1.0.1b20240304__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/__init__.py +3 -2
- autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
- autogluon/timeseries/configs/predictor_presets.py +84 -0
- autogluon/timeseries/dataset/ts_dataframe.py +339 -186
- autogluon/timeseries/learner.py +192 -60
- autogluon/timeseries/metrics/__init__.py +55 -11
- autogluon/timeseries/metrics/abstract.py +96 -25
- autogluon/timeseries/metrics/point.py +186 -39
- autogluon/timeseries/metrics/quantile.py +47 -20
- autogluon/timeseries/metrics/utils.py +6 -6
- autogluon/timeseries/models/__init__.py +13 -7
- autogluon/timeseries/models/abstract/__init__.py +2 -2
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +533 -273
- autogluon/timeseries/models/abstract/model_trial.py +10 -10
- autogluon/timeseries/models/abstract/tunable.py +189 -0
- autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +369 -215
- autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
- autogluon/timeseries/models/autogluon_tabular/transforms.py +67 -0
- autogluon/timeseries/models/autogluon_tabular/utils.py +3 -51
- autogluon/timeseries/models/chronos/__init__.py +4 -0
- autogluon/timeseries/models/chronos/chronos2.py +361 -0
- autogluon/timeseries/models/chronos/model.py +738 -0
- autogluon/timeseries/models/chronos/utils.py +369 -0
- autogluon/timeseries/models/ensemble/__init__.py +35 -2
- autogluon/timeseries/models/ensemble/{abstract_timeseries_ensemble.py → abstract.py} +50 -26
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +236 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +73 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +167 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
- autogluon/timeseries/models/ensemble/per_item_greedy.py +162 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +40 -0
- autogluon/timeseries/models/ensemble/weighted/basic.py +78 -0
- autogluon/timeseries/models/ensemble/weighted/greedy.py +57 -0
- autogluon/timeseries/models/gluonts/__init__.py +3 -1
- autogluon/timeseries/models/gluonts/abstract.py +583 -0
- autogluon/timeseries/models/gluonts/dataset.py +109 -0
- autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +185 -44
- autogluon/timeseries/models/local/__init__.py +1 -10
- autogluon/timeseries/models/local/abstract_local_model.py +150 -97
- autogluon/timeseries/models/local/naive.py +31 -23
- autogluon/timeseries/models/local/npts.py +6 -2
- autogluon/timeseries/models/local/statsforecast.py +99 -112
- autogluon/timeseries/models/multi_window/multi_window_model.py +99 -40
- autogluon/timeseries/models/registry.py +64 -0
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +118 -0
- autogluon/timeseries/models/toto/model.py +236 -0
- autogluon/timeseries/predictor.py +826 -305
- autogluon/timeseries/regressor.py +253 -0
- autogluon/timeseries/splitter.py +10 -31
- autogluon/timeseries/trainer/__init__.py +2 -3
- autogluon/timeseries/trainer/ensemble_composer.py +439 -0
- autogluon/timeseries/trainer/model_set_builder.py +256 -0
- autogluon/timeseries/trainer/prediction_cache.py +149 -0
- autogluon/timeseries/trainer/trainer.py +1298 -0
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/__init__.py +2 -0
- autogluon/timeseries/transforms/covariate_scaler.py +164 -0
- autogluon/timeseries/transforms/target_scaler.py +149 -0
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/base.py +38 -20
- autogluon/timeseries/utils/datetime/lags.py +18 -16
- autogluon/timeseries/utils/datetime/seasonality.py +14 -14
- autogluon/timeseries/utils/datetime/time_features.py +17 -14
- autogluon/timeseries/utils/features.py +317 -53
- autogluon/timeseries/utils/forecast.py +31 -17
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/utils/warning_filters.py +44 -6
- autogluon/timeseries/version.py +2 -1
- autogluon.timeseries-1.4.1b20251210-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/METADATA +71 -47
- autogluon_timeseries-1.4.1b20251210.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/WHEEL +1 -1
- autogluon/timeseries/configs/presets_configs.py +0 -11
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
- autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -550
- autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- autogluon/timeseries/models/presets.py +0 -325
- autogluon/timeseries/trainer/abstract_trainer.py +0 -1144
- autogluon/timeseries/trainer/auto_trainer.py +0 -74
- autogluon.timeseries-1.0.1b20240304-py3.8-nspkg.pth +0 -1
- autogluon.timeseries-1.0.1b20240304.dist-info/RECORD +0 -58
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
logger = logging.getLogger("autogluon.timeseries.trainer")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def log_scores_and_times(
|
|
7
|
+
val_score: float | None,
|
|
8
|
+
fit_time: float | None,
|
|
9
|
+
predict_time: float | None,
|
|
10
|
+
eval_metric_name: str,
|
|
11
|
+
):
|
|
12
|
+
if val_score is not None:
|
|
13
|
+
logger.info(f"\t{val_score:<7.4f}".ljust(15) + f"= Validation score ({eval_metric_name})")
|
|
14
|
+
if fit_time is not None:
|
|
15
|
+
logger.info(f"\t{fit_time:<7.2f} s".ljust(15) + "= Training runtime")
|
|
16
|
+
if predict_time is not None:
|
|
17
|
+
logger.info(f"\t{predict_time:<7.2f} s".ljust(15) + "= Validation (prediction) runtime")
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Literal, Protocol, overload, runtime_checkable
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from sklearn.compose import ColumnTransformer
|
|
7
|
+
from sklearn.preprocessing import QuantileTransformer, StandardScaler
|
|
8
|
+
|
|
9
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
10
|
+
from autogluon.timeseries.utils.features import CovariateMetadata
|
|
11
|
+
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@runtime_checkable
|
|
17
|
+
class CovariateScaler(Protocol):
|
|
18
|
+
"""Apply scaling to covariates and static features.
|
|
19
|
+
|
|
20
|
+
This can be helpful for deep learning models that assume that the inputs are normalized.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
|
|
24
|
+
|
|
25
|
+
def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
|
|
26
|
+
|
|
27
|
+
def transform_known_covariates(
|
|
28
|
+
self, known_covariates: TimeSeriesDataFrame | None = None
|
|
29
|
+
) -> TimeSeriesDataFrame | None: ...
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class GlobalCovariateScaler(CovariateScaler):
|
|
33
|
+
"""Applies preprocessing logic similar to tabular's NN_TORCH model to the covariates.
|
|
34
|
+
|
|
35
|
+
Performs following preprocessing for real-valued columns:
|
|
36
|
+
- sklearn.preprocessing.QuantileTransform for skewed features
|
|
37
|
+
- passthrough (ignore) boolean features
|
|
38
|
+
- sklearn.preprocessing.StandardScaler for the rest of the features
|
|
39
|
+
|
|
40
|
+
Preprocessing is done globally across all items.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
covariate_metadata: CovariateMetadata,
|
|
46
|
+
use_known_covariates: bool = True,
|
|
47
|
+
use_past_covariates: bool = True,
|
|
48
|
+
use_static_features: bool = True,
|
|
49
|
+
skew_threshold: float = 0.99,
|
|
50
|
+
):
|
|
51
|
+
self.covariate_metadata = covariate_metadata
|
|
52
|
+
self.use_known_covariates = use_known_covariates
|
|
53
|
+
self.use_past_covariates = use_past_covariates
|
|
54
|
+
self.use_static_features = use_static_features
|
|
55
|
+
self.skew_threshold = skew_threshold
|
|
56
|
+
self._column_transformers: dict[Literal["known", "past", "static"], ColumnTransformer] | None = None
|
|
57
|
+
|
|
58
|
+
def is_fit(self) -> bool:
|
|
59
|
+
return self._column_transformers is not None
|
|
60
|
+
|
|
61
|
+
def fit(self, data: TimeSeriesDataFrame) -> "GlobalCovariateScaler":
|
|
62
|
+
self._column_transformers = {}
|
|
63
|
+
|
|
64
|
+
if self.use_known_covariates and len(self.covariate_metadata.known_covariates_real) > 0:
|
|
65
|
+
self._column_transformers["known"] = self._get_transformer_for_columns(
|
|
66
|
+
data, columns=self.covariate_metadata.known_covariates_real
|
|
67
|
+
)
|
|
68
|
+
if self.use_past_covariates and len(self.covariate_metadata.past_covariates_real) > 0:
|
|
69
|
+
self._column_transformers["past"] = self._get_transformer_for_columns(
|
|
70
|
+
data, columns=self.covariate_metadata.past_covariates_real
|
|
71
|
+
)
|
|
72
|
+
if self.use_static_features and len(self.covariate_metadata.static_features_real) > 0:
|
|
73
|
+
assert data.static_features is not None
|
|
74
|
+
self._column_transformers["static"] = self._get_transformer_for_columns(
|
|
75
|
+
data.static_features, columns=self.covariate_metadata.static_features_real
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
return self
|
|
79
|
+
|
|
80
|
+
def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
81
|
+
if not self.is_fit():
|
|
82
|
+
self.fit(data=data)
|
|
83
|
+
return self.transform(data=data)
|
|
84
|
+
|
|
85
|
+
def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
86
|
+
# Copy data to avoid inplace modification
|
|
87
|
+
data = data.copy()
|
|
88
|
+
assert self._column_transformers is not None, "CovariateScaler must be fit before transform can be called"
|
|
89
|
+
|
|
90
|
+
if "known" in self._column_transformers:
|
|
91
|
+
columns = self.covariate_metadata.known_covariates_real
|
|
92
|
+
data[columns] = self._column_transformers["known"].transform(data[columns])
|
|
93
|
+
|
|
94
|
+
if "past" in self._column_transformers:
|
|
95
|
+
columns = self.covariate_metadata.past_covariates_real
|
|
96
|
+
data[columns] = self._column_transformers["past"].transform(data[columns])
|
|
97
|
+
|
|
98
|
+
if "static" in self._column_transformers:
|
|
99
|
+
columns = self.covariate_metadata.static_features_real
|
|
100
|
+
assert data.static_features is not None
|
|
101
|
+
|
|
102
|
+
data.static_features[columns] = self._column_transformers["static"].transform(
|
|
103
|
+
data.static_features[columns]
|
|
104
|
+
)
|
|
105
|
+
return data
|
|
106
|
+
|
|
107
|
+
def transform_known_covariates(
|
|
108
|
+
self, known_covariates: TimeSeriesDataFrame | None = None
|
|
109
|
+
) -> TimeSeriesDataFrame | None:
|
|
110
|
+
assert self._column_transformers is not None, "CovariateScaler must be fit before transform can be called"
|
|
111
|
+
|
|
112
|
+
if "known" in self._column_transformers:
|
|
113
|
+
columns = self.covariate_metadata.known_covariates_real
|
|
114
|
+
assert known_covariates is not None
|
|
115
|
+
|
|
116
|
+
known_covariates = known_covariates.copy()
|
|
117
|
+
known_covariates[columns] = self._column_transformers["known"].transform(known_covariates[columns])
|
|
118
|
+
return known_covariates
|
|
119
|
+
|
|
120
|
+
def _get_transformer_for_columns(self, df: pd.DataFrame, columns: list[str]) -> ColumnTransformer:
|
|
121
|
+
"""Passthrough bool features, use QuantileTransform for skewed features, and use StandardScaler for the rest.
|
|
122
|
+
|
|
123
|
+
The preprocessing logic is similar to the TORCH_NN model from Tabular.
|
|
124
|
+
"""
|
|
125
|
+
bool_features = []
|
|
126
|
+
skewed_features = []
|
|
127
|
+
continuous_features = []
|
|
128
|
+
for col in columns:
|
|
129
|
+
if set(df[col].unique()) == set([0, 1]):
|
|
130
|
+
bool_features.append(col)
|
|
131
|
+
elif np.abs(df[col].skew()) > self.skew_threshold: # type: ignore
|
|
132
|
+
skewed_features.append(col)
|
|
133
|
+
else:
|
|
134
|
+
continuous_features.append(col)
|
|
135
|
+
transformers = []
|
|
136
|
+
logger.debug(
|
|
137
|
+
f"\tbool_features: {bool_features}, continuous_features: {continuous_features}, skewed_features: {skewed_features}"
|
|
138
|
+
)
|
|
139
|
+
if continuous_features:
|
|
140
|
+
transformers.append(("scaler", StandardScaler(), continuous_features))
|
|
141
|
+
if skewed_features:
|
|
142
|
+
transformers.append(("skew", QuantileTransformer(output_distribution="normal"), skewed_features))
|
|
143
|
+
with warning_filter():
|
|
144
|
+
column_transformer = ColumnTransformer(transformers=transformers, remainder="passthrough").fit(df[columns])
|
|
145
|
+
return column_transformer
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
AVAILABLE_COVARIATE_SCALERS = {
|
|
149
|
+
"global": GlobalCovariateScaler,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@overload
|
|
154
|
+
def get_covariate_scaler(name: None, **scaler_kwargs) -> None: ...
|
|
155
|
+
@overload
|
|
156
|
+
def get_covariate_scaler(name: Literal["global"], **scaler_kwargs) -> GlobalCovariateScaler: ...
|
|
157
|
+
def get_covariate_scaler(name: Literal["global"] | None = None, **scaler_kwargs) -> CovariateScaler | None:
|
|
158
|
+
if name is None:
|
|
159
|
+
return None
|
|
160
|
+
if name not in AVAILABLE_COVARIATE_SCALERS:
|
|
161
|
+
raise KeyError(
|
|
162
|
+
f"Covariate scaler type {name} not supported. Available scalers: {list(AVAILABLE_COVARIATE_SCALERS)}"
|
|
163
|
+
)
|
|
164
|
+
return AVAILABLE_COVARIATE_SCALERS[name](**scaler_kwargs)
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from typing import Literal, Protocol, overload
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from typing_extensions import Self
|
|
6
|
+
|
|
7
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TargetScaler(Protocol):
|
|
11
|
+
def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
|
|
12
|
+
|
|
13
|
+
def fit(self, data: TimeSeriesDataFrame) -> Self: ...
|
|
14
|
+
|
|
15
|
+
def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
|
|
16
|
+
|
|
17
|
+
def inverse_transform(self, predictions: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LocalTargetScaler(TargetScaler):
|
|
21
|
+
"""Applies an affine transformation (x - loc) / scale independently to each time series in the dataset."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
target: str = "target",
|
|
26
|
+
min_scale: float = 1e-2,
|
|
27
|
+
):
|
|
28
|
+
self.target = target
|
|
29
|
+
self.min_scale = min_scale
|
|
30
|
+
self.loc: pd.Series | None = None
|
|
31
|
+
self.scale: pd.Series | None = None
|
|
32
|
+
|
|
33
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series | None, pd.Series | None]:
|
|
34
|
+
raise NotImplementedError
|
|
35
|
+
|
|
36
|
+
def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
37
|
+
return self.fit(data=data).transform(data=data)
|
|
38
|
+
|
|
39
|
+
def fit(self, data: TimeSeriesDataFrame) -> "LocalTargetScaler":
|
|
40
|
+
target_series = data[self.target].replace([np.inf, -np.inf], np.nan)
|
|
41
|
+
self.loc, self.scale = self._compute_loc_scale(target_series)
|
|
42
|
+
if self.loc is not None:
|
|
43
|
+
self.loc = self.loc.replace([np.inf, -np.inf], np.nan).fillna(0.0)
|
|
44
|
+
if self.scale is not None:
|
|
45
|
+
self.scale = self.scale.clip(lower=self.min_scale).replace([np.inf, -np.inf], np.nan).fillna(1.0)
|
|
46
|
+
return self
|
|
47
|
+
|
|
48
|
+
def _reindex_loc_scale(self, item_index: pd.Index) -> tuple[np.ndarray | float, np.ndarray | float]:
|
|
49
|
+
"""Reindex loc and scale parameters for the given item_ids and convert them to an array-like."""
|
|
50
|
+
if self.loc is not None:
|
|
51
|
+
loc = self.loc.reindex(item_index).to_numpy()
|
|
52
|
+
else:
|
|
53
|
+
loc = 0.0
|
|
54
|
+
if self.scale is not None:
|
|
55
|
+
scale = self.scale.reindex(item_index).to_numpy()
|
|
56
|
+
else:
|
|
57
|
+
scale = 1.0
|
|
58
|
+
return loc, scale
|
|
59
|
+
|
|
60
|
+
def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
61
|
+
"""Apply scaling to the target column in the dataframe."""
|
|
62
|
+
loc, scale = self._reindex_loc_scale(item_index=data.index.get_level_values(TimeSeriesDataFrame.ITEMID))
|
|
63
|
+
return data.assign(**{self.target: (data[self.target] - loc) / scale})
|
|
64
|
+
|
|
65
|
+
def inverse_transform(self, predictions: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
66
|
+
"""Apply inverse scaling to all columns in the predictions dataframe."""
|
|
67
|
+
loc, scale = self._reindex_loc_scale(item_index=predictions.index.get_level_values(TimeSeriesDataFrame.ITEMID))
|
|
68
|
+
return predictions.assign(**{col: predictions[col] * scale + loc for col in predictions.columns})
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class LocalStandardScaler(LocalTargetScaler):
|
|
72
|
+
"""Applies standard scaling to each time series in the dataset.
|
|
73
|
+
|
|
74
|
+
The resulting affine transformation is (x - loc) / scale, where scale = std(x), loc = mean(x).
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
78
|
+
stats = target_series.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg(["mean", "std"])
|
|
79
|
+
return stats["mean"], stats["std"]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class LocalMeanAbsScaler(LocalTargetScaler):
|
|
83
|
+
"""Applies mean absolute scaling to each time series in the dataset."""
|
|
84
|
+
|
|
85
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series | None, pd.Series]:
|
|
86
|
+
scale = target_series.abs().groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg("mean")
|
|
87
|
+
return None, scale
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class LocalMinMaxScaler(LocalTargetScaler):
|
|
91
|
+
"""Applies min/max scaling to each time series in the dataset.
|
|
92
|
+
|
|
93
|
+
The resulting affine transformation is (x - loc) / scale, where scale = max(x) - min(x), loc = min(x) / scale.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
97
|
+
stats = target_series.abs().groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg(["min", "max"])
|
|
98
|
+
scale = (stats["max"] - stats["min"]).clip(lower=self.min_scale)
|
|
99
|
+
loc = stats["min"]
|
|
100
|
+
return loc, scale
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class LocalRobustScaler(LocalTargetScaler):
|
|
104
|
+
"""Applies a robust scaler based on the interquartile range. Less sensitive to outliers compared to other scaler.
|
|
105
|
+
|
|
106
|
+
The resulting affine transformation is (x - loc) / scale, where scale = quantile(x, 0.75) - quantile(x, 0.25), loc = median(x).
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
def __init__(
|
|
110
|
+
self,
|
|
111
|
+
target: str = "target",
|
|
112
|
+
min_scale: float = 1e-2,
|
|
113
|
+
**kwargs,
|
|
114
|
+
):
|
|
115
|
+
super().__init__(target=target, min_scale=min_scale)
|
|
116
|
+
self.q_min = 0.25
|
|
117
|
+
self.q_max = 0.75
|
|
118
|
+
assert 0 < self.q_min < self.q_max < 1
|
|
119
|
+
|
|
120
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
121
|
+
grouped = target_series.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False)
|
|
122
|
+
loc = grouped.median()
|
|
123
|
+
lower = grouped.quantile(self.q_min)
|
|
124
|
+
upper = grouped.quantile(self.q_max)
|
|
125
|
+
scale = upper - lower
|
|
126
|
+
return loc, scale
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
AVAILABLE_TARGET_SCALERS = {
|
|
130
|
+
"standard": LocalStandardScaler,
|
|
131
|
+
"mean_abs": LocalMeanAbsScaler,
|
|
132
|
+
"min_max": LocalMinMaxScaler,
|
|
133
|
+
"robust": LocalRobustScaler,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@overload
|
|
138
|
+
def get_target_scaler(name: None, **scaler_kwargs) -> None: ...
|
|
139
|
+
@overload
|
|
140
|
+
def get_target_scaler(name: Literal["standard", "mean_abs", "min_max", "robust"], **scaler_kwargs) -> TargetScaler: ...
|
|
141
|
+
def get_target_scaler(
|
|
142
|
+
name: Literal["standard", "mean_abs", "min_max", "robust"] | None, **scaler_kwargs
|
|
143
|
+
) -> TargetScaler | None:
|
|
144
|
+
"""Get LocalTargetScaler object from a string."""
|
|
145
|
+
if name is None:
|
|
146
|
+
return None
|
|
147
|
+
if name not in AVAILABLE_TARGET_SCALERS:
|
|
148
|
+
raise KeyError(f"Scaler type {name} not supported. Available scalers: {list(AVAILABLE_TARGET_SCALERS)}")
|
|
149
|
+
return AVAILABLE_TARGET_SCALERS[name](**scaler_kwargs)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import joblib.externals.loky
|
|
2
|
+
from joblib import cpu_count
|
|
3
|
+
|
|
4
|
+
# By default, joblib w/ loky backend kills processes that take >300MB of RAM assuming that this is caused by a memory
|
|
5
|
+
# leak. This leads to problems for some memory-hungry models like AutoARIMA/Theta.
|
|
6
|
+
# This monkey patch removes this undesired behavior
|
|
7
|
+
joblib.externals.loky.process_executor._MAX_MEMORY_LEAK_SIZE = int(3e10)
|
|
8
|
+
|
|
9
|
+
# We use the same default n_jobs across AG-TS to ensure that Joblib reuses the process pool
|
|
10
|
+
AG_DEFAULT_N_JOBS = max(cpu_count(only_physical_cores=True), 1)
|
|
@@ -1,31 +1,46 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
|
|
3
3
|
TO_MAJOR_FREQ = {
|
|
4
|
-
|
|
5
|
-
"
|
|
6
|
-
"
|
|
4
|
+
# sub-daily
|
|
5
|
+
"H": "h",
|
|
6
|
+
"BH": "bh",
|
|
7
|
+
"cbh": "bh",
|
|
8
|
+
"CBH": "bh",
|
|
9
|
+
"T": "min",
|
|
10
|
+
"S": "s",
|
|
11
|
+
"L": "ms",
|
|
12
|
+
"U": "us",
|
|
13
|
+
"N": "ns",
|
|
7
14
|
# business day
|
|
8
15
|
"C": "B",
|
|
9
16
|
# month
|
|
10
|
-
"
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
"
|
|
17
|
+
"M": "ME",
|
|
18
|
+
"BM": "ME",
|
|
19
|
+
"BME": "ME",
|
|
20
|
+
"CBM": "ME",
|
|
21
|
+
"CBME": "ME",
|
|
22
|
+
"MS": "ME",
|
|
23
|
+
"BMS": "ME",
|
|
24
|
+
"CBMS": "ME",
|
|
15
25
|
# semi-month
|
|
16
|
-
"
|
|
26
|
+
"SM": "SME",
|
|
27
|
+
"SMS": "SME",
|
|
17
28
|
# quarter
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"
|
|
29
|
+
"Q": "QE",
|
|
30
|
+
"BQ": "QE",
|
|
31
|
+
"BQE": "QE",
|
|
32
|
+
"QS": "QE",
|
|
33
|
+
"BQS": "QE",
|
|
21
34
|
# annual
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
35
|
+
"A": "YE",
|
|
36
|
+
"Y": "YE",
|
|
37
|
+
"BA": "YE",
|
|
38
|
+
"BY": "YE",
|
|
39
|
+
"BYE": "YE",
|
|
40
|
+
"AS": "YE",
|
|
41
|
+
"YS": "YE",
|
|
42
|
+
"BAS": "YE",
|
|
43
|
+
"BYS": "YE",
|
|
29
44
|
}
|
|
30
45
|
|
|
31
46
|
|
|
@@ -33,7 +48,10 @@ def norm_freq_str(offset: pd.DateOffset) -> str:
|
|
|
33
48
|
"""Obtain frequency string from a pandas.DateOffset object.
|
|
34
49
|
|
|
35
50
|
"Non-standard" frequencies are converted to their "standard" counterparts. For example, MS (month start) is mapped
|
|
36
|
-
to
|
|
51
|
+
to ME (month end) since both correspond to the same seasonality, lags and time features.
|
|
52
|
+
|
|
53
|
+
The frequencies are always mapped to the new non-deprecated aliases (pandas>=2.2), e.g., "H" is mapped to "h". The
|
|
54
|
+
downstream functions like `get_seasonality` handle the new aliases even if older version of pandas is used.
|
|
37
55
|
"""
|
|
38
56
|
base_freq = offset.name.split("-")[0]
|
|
39
57
|
return TO_MAJOR_FREQ.get(base_freq, base_freq)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Generate lag indices based on frequency string. Adapted from gluonts.time_feature.lag.
|
|
3
3
|
"""
|
|
4
|
-
from typing import List, Optional
|
|
5
4
|
|
|
6
5
|
import numpy as np
|
|
7
6
|
import pandas as pd
|
|
@@ -69,9 +68,9 @@ def _make_lags_for_semi_month(multiple, num_cycles=3):
|
|
|
69
68
|
def get_lags_for_frequency(
|
|
70
69
|
freq: str,
|
|
71
70
|
lag_ub: int = 1200,
|
|
72
|
-
num_lags:
|
|
71
|
+
num_lags: int | None = None,
|
|
73
72
|
num_default_lags: int = 7,
|
|
74
|
-
) ->
|
|
73
|
+
) -> list[int]:
|
|
75
74
|
"""
|
|
76
75
|
Generates a list of lags that that are appropriate for the given frequency
|
|
77
76
|
string.
|
|
@@ -94,15 +93,18 @@ def get_lags_for_frequency(
|
|
|
94
93
|
"""
|
|
95
94
|
|
|
96
95
|
offset = pd.tseries.frequencies.to_offset(freq)
|
|
96
|
+
|
|
97
|
+
if offset is None:
|
|
98
|
+
raise ValueError(f"Invalid frequency: {freq}")
|
|
97
99
|
offset_name = norm_freq_str(offset)
|
|
98
100
|
|
|
99
|
-
if offset_name == "
|
|
101
|
+
if offset_name == "YE":
|
|
100
102
|
lags = []
|
|
101
|
-
elif offset_name == "
|
|
103
|
+
elif offset_name == "QE":
|
|
102
104
|
lags = _make_lags_for_quarter(offset.n)
|
|
103
|
-
elif offset_name == "
|
|
105
|
+
elif offset_name == "ME":
|
|
104
106
|
lags = _make_lags_for_month(offset.n)
|
|
105
|
-
elif offset_name == "
|
|
107
|
+
elif offset_name == "SME":
|
|
106
108
|
lags = _make_lags_for_semi_month(offset.n)
|
|
107
109
|
elif offset_name == "W":
|
|
108
110
|
lags = _make_lags_for_week(offset.n)
|
|
@@ -110,21 +112,21 @@ def get_lags_for_frequency(
|
|
|
110
112
|
lags = _make_lags_for_day(offset.n) + _make_lags_for_week(offset.n / 7.0)
|
|
111
113
|
elif offset_name == "B":
|
|
112
114
|
lags = _make_lags_for_day(offset.n, days_in_week=5, days_in_month=22) + _make_lags_for_week(offset.n / 5.0)
|
|
113
|
-
elif offset_name == "
|
|
115
|
+
elif offset_name == "h":
|
|
114
116
|
lags = (
|
|
115
117
|
_make_lags_for_hour(offset.n)
|
|
116
118
|
+ _make_lags_for_day(offset.n / 24)
|
|
117
119
|
+ _make_lags_for_week(offset.n / (24 * 7))
|
|
118
120
|
)
|
|
119
121
|
# business hour
|
|
120
|
-
elif offset_name == "
|
|
122
|
+
elif offset_name == "bh":
|
|
121
123
|
lags = (
|
|
122
124
|
_make_lags_for_business_hour(offset.n)
|
|
123
125
|
+ _make_lags_for_day(offset.n / 9)
|
|
124
126
|
+ _make_lags_for_week(offset.n / (9 * 7))
|
|
125
127
|
)
|
|
126
128
|
# minutes
|
|
127
|
-
elif offset_name == "
|
|
129
|
+
elif offset_name == "min":
|
|
128
130
|
lags = (
|
|
129
131
|
_make_lags_for_minute(offset.n)
|
|
130
132
|
+ _make_lags_for_hour(offset.n / 60)
|
|
@@ -132,35 +134,35 @@ def get_lags_for_frequency(
|
|
|
132
134
|
+ _make_lags_for_week(offset.n / (60 * 24 * 7))
|
|
133
135
|
)
|
|
134
136
|
# second
|
|
135
|
-
elif offset_name == "
|
|
137
|
+
elif offset_name == "s":
|
|
136
138
|
lags = (
|
|
137
139
|
_make_lags_for_second(offset.n)
|
|
138
140
|
+ _make_lags_for_minute(offset.n / 60)
|
|
139
141
|
+ _make_lags_for_hour(offset.n / (60 * 60))
|
|
140
142
|
)
|
|
141
|
-
elif offset_name == "
|
|
143
|
+
elif offset_name == "ms":
|
|
142
144
|
lags = (
|
|
143
145
|
_make_lags_for_second(offset.n / 1e3)
|
|
144
146
|
+ _make_lags_for_minute(offset.n / (60 * 1e3))
|
|
145
147
|
+ _make_lags_for_hour(offset.n / (60 * 60 * 1e3))
|
|
146
148
|
)
|
|
147
|
-
elif offset_name == "
|
|
149
|
+
elif offset_name == "us":
|
|
148
150
|
lags = (
|
|
149
151
|
_make_lags_for_second(offset.n / 1e6)
|
|
150
152
|
+ _make_lags_for_minute(offset.n / (60 * 1e6))
|
|
151
153
|
+ _make_lags_for_hour(offset.n / (60 * 60 * 1e6))
|
|
152
154
|
)
|
|
153
|
-
elif offset_name == "
|
|
155
|
+
elif offset_name == "ns":
|
|
154
156
|
lags = (
|
|
155
157
|
_make_lags_for_second(offset.n / 1e9)
|
|
156
158
|
+ _make_lags_for_minute(offset.n / (60 * 1e9))
|
|
157
159
|
+ _make_lags_for_hour(offset.n / (60 * 60 * 1e9))
|
|
158
160
|
)
|
|
159
161
|
else:
|
|
160
|
-
raise Exception(f"
|
|
162
|
+
raise Exception(f"Cannot get lags for unsupported frequency {freq}")
|
|
161
163
|
|
|
162
164
|
# flatten lags list and filter
|
|
163
|
-
lags = [int(lag) for sub_list in lags for lag in sub_list if
|
|
165
|
+
lags = [int(lag) for sub_list in lags for lag in sub_list if num_default_lags < lag <= lag_ub]
|
|
164
166
|
lags = list(range(1, num_default_lags + 1)) + sorted(list(set(lags)))
|
|
165
167
|
|
|
166
168
|
return sorted(set(lags))[:num_lags]
|
|
@@ -1,33 +1,33 @@
|
|
|
1
|
-
from typing import Union
|
|
2
|
-
|
|
3
1
|
import pandas as pd
|
|
4
2
|
|
|
5
3
|
from .base import norm_freq_str
|
|
6
4
|
|
|
7
5
|
DEFAULT_SEASONALITIES = {
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
"
|
|
6
|
+
"YE": 1,
|
|
7
|
+
"QE": 4,
|
|
8
|
+
"ME": 12,
|
|
9
|
+
"SME": 24,
|
|
12
10
|
"W": 1,
|
|
13
11
|
"D": 7,
|
|
14
12
|
"B": 5,
|
|
15
|
-
"
|
|
16
|
-
"
|
|
17
|
-
"
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
13
|
+
"bh": 9,
|
|
14
|
+
"h": 24,
|
|
15
|
+
"min": 60 * 24,
|
|
16
|
+
"s": 1,
|
|
17
|
+
"ms": 1,
|
|
18
|
+
"us": 1,
|
|
19
|
+
"ns": 1,
|
|
22
20
|
}
|
|
23
21
|
|
|
24
22
|
|
|
25
|
-
def get_seasonality(freq:
|
|
23
|
+
def get_seasonality(freq: str | None) -> int:
|
|
26
24
|
"""Return the seasonality of a given frequency. Adapted from ``gluonts.time_feature.seasonality``."""
|
|
27
25
|
if freq is None:
|
|
28
26
|
return 1
|
|
29
27
|
|
|
30
28
|
offset = pd.tseries.frequencies.to_offset(freq)
|
|
29
|
+
|
|
30
|
+
assert offset is not None # offset is only None if freq is None
|
|
31
31
|
offset_name = norm_freq_str(offset)
|
|
32
32
|
base_seasonality = DEFAULT_SEASONALITIES.get(offset_name, 1)
|
|
33
33
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Generate time features based on frequency string. Adapted from gluonts.time_feature.time_feature.
|
|
3
3
|
"""
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
from typing import Callable
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
7
8
|
import pandas as pd
|
|
@@ -26,7 +27,7 @@ def week_of_year(index: pd.DatetimeIndex) -> np.ndarray:
|
|
|
26
27
|
try:
|
|
27
28
|
week = index.isocalendar().week
|
|
28
29
|
except AttributeError:
|
|
29
|
-
week = index.week
|
|
30
|
+
week = index.week # type: ignore[attr-defined]
|
|
30
31
|
|
|
31
32
|
return _normalize(week - 1, num=53)
|
|
32
33
|
|
|
@@ -55,23 +56,25 @@ def second_of_minute(index: pd.DatetimeIndex) -> np.ndarray:
|
|
|
55
56
|
return _normalize(index.second, num=60)
|
|
56
57
|
|
|
57
58
|
|
|
58
|
-
def get_time_features_for_frequency(freq) ->
|
|
59
|
+
def get_time_features_for_frequency(freq) -> list[Callable]:
|
|
59
60
|
features_by_offset_name = {
|
|
60
|
-
"
|
|
61
|
-
"
|
|
62
|
-
"
|
|
63
|
-
"
|
|
61
|
+
"YE": [],
|
|
62
|
+
"QE": [quarter_of_year],
|
|
63
|
+
"ME": [month_of_year],
|
|
64
|
+
"SME": [day_of_month, month_of_year],
|
|
64
65
|
"W": [day_of_month, week_of_year],
|
|
65
66
|
"D": [day_of_week, day_of_month, day_of_year],
|
|
66
67
|
"B": [day_of_week, day_of_month, day_of_year],
|
|
67
|
-
"
|
|
68
|
-
"
|
|
69
|
-
"
|
|
70
|
-
"
|
|
71
|
-
"
|
|
72
|
-
"
|
|
73
|
-
"
|
|
68
|
+
"bh": [hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
69
|
+
"h": [hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
70
|
+
"min": [minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
71
|
+
"s": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
72
|
+
"ms": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
73
|
+
"us": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
74
|
+
"ns": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
74
75
|
}
|
|
75
76
|
offset = pd.tseries.frequencies.to_offset(freq)
|
|
77
|
+
|
|
78
|
+
assert offset is not None # offset is only None if freq is None
|
|
76
79
|
offset_name = norm_freq_str(offset)
|
|
77
80
|
return features_by_offset_name[offset_name]
|