autogluon.timeseries 1.4.1b20250907__py3-none-any.whl → 1.5.1b20260122__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/hyperparameter_presets.py +13 -28
- autogluon/timeseries/configs/predictor_presets.py +23 -39
- autogluon/timeseries/dataset/ts_dataframe.py +97 -86
- autogluon/timeseries/learner.py +70 -35
- autogluon/timeseries/metrics/__init__.py +4 -4
- autogluon/timeseries/metrics/abstract.py +8 -8
- autogluon/timeseries/metrics/point.py +9 -9
- autogluon/timeseries/metrics/quantile.py +5 -5
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +4 -1
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -50
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +8 -8
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
- autogluon/timeseries/models/autogluon_tabular/per_step.py +27 -16
- autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +395 -0
- autogluon/timeseries/models/chronos/model.py +127 -89
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +69 -37
- autogluon/timeseries/models/ensemble/__init__.py +36 -2
- autogluon/timeseries/models/ensemble/abstract.py +14 -46
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
- autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
- autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +25 -22
- autogluon/timeseries/models/ensemble/weighted/greedy.py +64 -0
- autogluon/timeseries/models/gluonts/abstract.py +32 -31
- autogluon/timeseries/models/gluonts/dataset.py +11 -11
- autogluon/timeseries/models/gluonts/models.py +0 -7
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +15 -18
- autogluon/timeseries/models/local/naive.py +2 -2
- autogluon/timeseries/models/local/npts.py +7 -1
- autogluon/timeseries/models/local/statsforecast.py +13 -13
- autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
- autogluon/timeseries/models/registry.py +3 -4
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
- autogluon/timeseries/models/toto/model.py +249 -0
- autogluon/timeseries/predictor.py +541 -162
- autogluon/timeseries/regressor.py +27 -30
- autogluon/timeseries/splitter.py +3 -27
- autogluon/timeseries/trainer/ensemble_composer.py +444 -0
- autogluon/timeseries/trainer/model_set_builder.py +9 -9
- autogluon/timeseries/trainer/prediction_cache.py +16 -16
- autogluon/timeseries/trainer/trainer.py +300 -279
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/covariate_scaler.py +8 -8
- autogluon/timeseries/transforms/target_scaler.py +15 -15
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +1 -3
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/features.py +31 -14
- autogluon/timeseries/utils/forecast.py +6 -7
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.5.1b20260122-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info}/METADATA +39 -22
- autogluon_timeseries-1.5.1b20260122.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info}/WHEEL +1 -1
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
- autogluon.timeseries-1.4.1b20250907-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.4.1b20250907.dist-info/RECORD +0 -75
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20250907.dist-info → autogluon_timeseries-1.5.1b20260122.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
logger = logging.getLogger("autogluon.timeseries.trainer")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def log_scores_and_times(
|
|
7
|
+
val_score: float | None,
|
|
8
|
+
fit_time: float | None,
|
|
9
|
+
predict_time: float | None,
|
|
10
|
+
eval_metric_name: str,
|
|
11
|
+
):
|
|
12
|
+
if val_score is not None:
|
|
13
|
+
logger.info(f"\t{val_score:<7.4f}".ljust(15) + f"= Validation score ({eval_metric_name})")
|
|
14
|
+
if fit_time is not None:
|
|
15
|
+
logger.info(f"\t{fit_time:<7.2f} s".ljust(15) + "= Training runtime")
|
|
16
|
+
if predict_time is not None:
|
|
17
|
+
logger.info(f"\t{predict_time:<7.2f} s".ljust(15) + "= Validation (prediction) runtime")
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Literal,
|
|
2
|
+
from typing import Literal, Protocol, overload, runtime_checkable
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
6
6
|
from sklearn.compose import ColumnTransformer
|
|
7
7
|
from sklearn.preprocessing import QuantileTransformer, StandardScaler
|
|
8
8
|
|
|
9
|
-
from autogluon.timeseries.dataset
|
|
9
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
10
10
|
from autogluon.timeseries.utils.features import CovariateMetadata
|
|
11
11
|
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
12
12
|
|
|
@@ -25,8 +25,8 @@ class CovariateScaler(Protocol):
|
|
|
25
25
|
def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
|
|
26
26
|
|
|
27
27
|
def transform_known_covariates(
|
|
28
|
-
self, known_covariates:
|
|
29
|
-
) ->
|
|
28
|
+
self, known_covariates: TimeSeriesDataFrame | None = None
|
|
29
|
+
) -> TimeSeriesDataFrame | None: ...
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class GlobalCovariateScaler(CovariateScaler):
|
|
@@ -53,7 +53,7 @@ class GlobalCovariateScaler(CovariateScaler):
|
|
|
53
53
|
self.use_past_covariates = use_past_covariates
|
|
54
54
|
self.use_static_features = use_static_features
|
|
55
55
|
self.skew_threshold = skew_threshold
|
|
56
|
-
self._column_transformers:
|
|
56
|
+
self._column_transformers: dict[Literal["known", "past", "static"], ColumnTransformer] | None = None
|
|
57
57
|
|
|
58
58
|
def is_fit(self) -> bool:
|
|
59
59
|
return self._column_transformers is not None
|
|
@@ -105,8 +105,8 @@ class GlobalCovariateScaler(CovariateScaler):
|
|
|
105
105
|
return data
|
|
106
106
|
|
|
107
107
|
def transform_known_covariates(
|
|
108
|
-
self, known_covariates:
|
|
109
|
-
) ->
|
|
108
|
+
self, known_covariates: TimeSeriesDataFrame | None = None
|
|
109
|
+
) -> TimeSeriesDataFrame | None:
|
|
110
110
|
assert self._column_transformers is not None, "CovariateScaler must be fit before transform can be called"
|
|
111
111
|
|
|
112
112
|
if "known" in self._column_transformers:
|
|
@@ -154,7 +154,7 @@ AVAILABLE_COVARIATE_SCALERS = {
|
|
|
154
154
|
def get_covariate_scaler(name: None, **scaler_kwargs) -> None: ...
|
|
155
155
|
@overload
|
|
156
156
|
def get_covariate_scaler(name: Literal["global"], **scaler_kwargs) -> GlobalCovariateScaler: ...
|
|
157
|
-
def get_covariate_scaler(name:
|
|
157
|
+
def get_covariate_scaler(name: Literal["global"] | None = None, **scaler_kwargs) -> CovariateScaler | None:
|
|
158
158
|
if name is None:
|
|
159
159
|
return None
|
|
160
160
|
if name not in AVAILABLE_COVARIATE_SCALERS:
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from typing import Literal,
|
|
1
|
+
from typing import Literal, Protocol, overload
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from typing_extensions import Self
|
|
6
6
|
|
|
7
|
-
from autogluon.timeseries.dataset
|
|
7
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class TargetScaler(Protocol):
|
|
@@ -27,10 +27,10 @@ class LocalTargetScaler(TargetScaler):
|
|
|
27
27
|
):
|
|
28
28
|
self.target = target
|
|
29
29
|
self.min_scale = min_scale
|
|
30
|
-
self.loc:
|
|
31
|
-
self.scale:
|
|
30
|
+
self.loc: pd.Series | None = None
|
|
31
|
+
self.scale: pd.Series | None = None
|
|
32
32
|
|
|
33
|
-
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[
|
|
33
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series | None, pd.Series | None]:
|
|
34
34
|
raise NotImplementedError
|
|
35
35
|
|
|
36
36
|
def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
@@ -45,7 +45,7 @@ class LocalTargetScaler(TargetScaler):
|
|
|
45
45
|
self.scale = self.scale.clip(lower=self.min_scale).replace([np.inf, -np.inf], np.nan).fillna(1.0)
|
|
46
46
|
return self
|
|
47
47
|
|
|
48
|
-
def _reindex_loc_scale(self, item_index: pd.Index) -> tuple[
|
|
48
|
+
def _reindex_loc_scale(self, item_index: pd.Index) -> tuple[np.ndarray | float, np.ndarray | float]:
|
|
49
49
|
"""Reindex loc and scale parameters for the given item_ids and convert them to an array-like."""
|
|
50
50
|
if self.loc is not None:
|
|
51
51
|
loc = self.loc.reindex(item_index).to_numpy()
|
|
@@ -59,12 +59,12 @@ class LocalTargetScaler(TargetScaler):
|
|
|
59
59
|
|
|
60
60
|
def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
61
61
|
"""Apply scaling to the target column in the dataframe."""
|
|
62
|
-
loc, scale = self._reindex_loc_scale(item_index=data.index.get_level_values(ITEMID))
|
|
62
|
+
loc, scale = self._reindex_loc_scale(item_index=data.index.get_level_values(TimeSeriesDataFrame.ITEMID))
|
|
63
63
|
return data.assign(**{self.target: (data[self.target] - loc) / scale})
|
|
64
64
|
|
|
65
65
|
def inverse_transform(self, predictions: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
66
66
|
"""Apply inverse scaling to all columns in the predictions dataframe."""
|
|
67
|
-
loc, scale = self._reindex_loc_scale(item_index=predictions.index.get_level_values(ITEMID))
|
|
67
|
+
loc, scale = self._reindex_loc_scale(item_index=predictions.index.get_level_values(TimeSeriesDataFrame.ITEMID))
|
|
68
68
|
return predictions.assign(**{col: predictions[col] * scale + loc for col in predictions.columns})
|
|
69
69
|
|
|
70
70
|
|
|
@@ -75,15 +75,15 @@ class LocalStandardScaler(LocalTargetScaler):
|
|
|
75
75
|
"""
|
|
76
76
|
|
|
77
77
|
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
78
|
-
stats = target_series.groupby(level=ITEMID, sort=False).agg(["mean", "std"])
|
|
78
|
+
stats = target_series.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg(["mean", "std"])
|
|
79
79
|
return stats["mean"], stats["std"]
|
|
80
80
|
|
|
81
81
|
|
|
82
82
|
class LocalMeanAbsScaler(LocalTargetScaler):
|
|
83
83
|
"""Applies mean absolute scaling to each time series in the dataset."""
|
|
84
84
|
|
|
85
|
-
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[
|
|
86
|
-
scale = target_series.abs().groupby(level=ITEMID, sort=False).agg("mean")
|
|
85
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series | None, pd.Series]:
|
|
86
|
+
scale = target_series.abs().groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg("mean")
|
|
87
87
|
return None, scale
|
|
88
88
|
|
|
89
89
|
|
|
@@ -94,7 +94,7 @@ class LocalMinMaxScaler(LocalTargetScaler):
|
|
|
94
94
|
"""
|
|
95
95
|
|
|
96
96
|
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
97
|
-
stats = target_series.abs().groupby(level=ITEMID, sort=False).agg(["min", "max"])
|
|
97
|
+
stats = target_series.abs().groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg(["min", "max"])
|
|
98
98
|
scale = (stats["max"] - stats["min"]).clip(lower=self.min_scale)
|
|
99
99
|
loc = stats["min"]
|
|
100
100
|
return loc, scale
|
|
@@ -118,7 +118,7 @@ class LocalRobustScaler(LocalTargetScaler):
|
|
|
118
118
|
assert 0 < self.q_min < self.q_max < 1
|
|
119
119
|
|
|
120
120
|
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
121
|
-
grouped = target_series.groupby(level=ITEMID, sort=False)
|
|
121
|
+
grouped = target_series.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False)
|
|
122
122
|
loc = grouped.median()
|
|
123
123
|
lower = grouped.quantile(self.q_min)
|
|
124
124
|
upper = grouped.quantile(self.q_max)
|
|
@@ -139,8 +139,8 @@ def get_target_scaler(name: None, **scaler_kwargs) -> None: ...
|
|
|
139
139
|
@overload
|
|
140
140
|
def get_target_scaler(name: Literal["standard", "mean_abs", "min_max", "robust"], **scaler_kwargs) -> TargetScaler: ...
|
|
141
141
|
def get_target_scaler(
|
|
142
|
-
name:
|
|
143
|
-
) ->
|
|
142
|
+
name: Literal["standard", "mean_abs", "min_max", "robust"] | None, **scaler_kwargs
|
|
143
|
+
) -> TargetScaler | None:
|
|
144
144
|
"""Get LocalTargetScaler object from a string."""
|
|
145
145
|
if name is None:
|
|
146
146
|
return None
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import joblib.externals.loky
|
|
2
|
+
from joblib import cpu_count
|
|
3
|
+
|
|
4
|
+
# By default, joblib w/ loky backend kills processes that take >300MB of RAM assuming that this is caused by a memory
|
|
5
|
+
# leak. This leads to problems for some memory-hungry models like AutoARIMA/Theta.
|
|
6
|
+
# This monkey patch removes this undesired behavior
|
|
7
|
+
joblib.externals.loky.process_executor._MAX_MEMORY_LEAK_SIZE = int(3e10)
|
|
8
|
+
|
|
9
|
+
# We use the same default n_jobs across AG-TS to ensure that Joblib reuses the process pool
|
|
10
|
+
AG_DEFAULT_N_JOBS = max(cpu_count(only_physical_cores=True), 1)
|
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
Generate lag indices based on frequency string. Adapted from gluonts.time_feature.lag.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from typing import Optional
|
|
6
|
-
|
|
7
5
|
import numpy as np
|
|
8
6
|
import pandas as pd
|
|
9
7
|
|
|
@@ -70,7 +68,7 @@ def _make_lags_for_semi_month(multiple, num_cycles=3):
|
|
|
70
68
|
def get_lags_for_frequency(
|
|
71
69
|
freq: str,
|
|
72
70
|
lag_ub: int = 1200,
|
|
73
|
-
num_lags:
|
|
71
|
+
num_lags: int | None = None,
|
|
74
72
|
num_default_lags: int = 7,
|
|
75
73
|
) -> list[int]:
|
|
76
74
|
"""
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Union
|
|
2
|
-
|
|
3
1
|
import pandas as pd
|
|
4
2
|
|
|
5
3
|
from .base import norm_freq_str
|
|
@@ -22,7 +20,7 @@ DEFAULT_SEASONALITIES = {
|
|
|
22
20
|
}
|
|
23
21
|
|
|
24
22
|
|
|
25
|
-
def get_seasonality(freq:
|
|
23
|
+
def get_seasonality(freq: str | None) -> int:
|
|
26
24
|
"""Return the seasonality of a given frequency. Adapted from ``gluonts.time_feature.seasonality``."""
|
|
27
25
|
if freq is None:
|
|
28
26
|
return 1
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
import reprlib
|
|
3
3
|
import time
|
|
4
4
|
from dataclasses import asdict, dataclass, field
|
|
5
|
-
from typing import Any, Literal
|
|
5
|
+
from typing import Any, Literal
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
@@ -14,7 +14,7 @@ from autogluon.features.generators import (
|
|
|
14
14
|
IdentityFeatureGenerator,
|
|
15
15
|
PipelineFeatureGenerator,
|
|
16
16
|
)
|
|
17
|
-
from autogluon.timeseries.dataset
|
|
17
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
18
18
|
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
@@ -30,6 +30,14 @@ class CovariateMetadata:
|
|
|
30
30
|
known_covariates_cat: list[str] = field(default_factory=list)
|
|
31
31
|
past_covariates_real: list[str] = field(default_factory=list)
|
|
32
32
|
past_covariates_cat: list[str] = field(default_factory=list)
|
|
33
|
+
static_cat_cardinality: dict[str, int] = field(default_factory=dict)
|
|
34
|
+
known_cat_cardinality: dict[str, int] = field(default_factory=dict)
|
|
35
|
+
past_cat_cardinality: dict[str, int] = field(default_factory=dict)
|
|
36
|
+
|
|
37
|
+
def __post_init__(self):
|
|
38
|
+
assert list(self.static_cat_cardinality.keys()) == self.static_features_cat
|
|
39
|
+
assert list(self.known_cat_cardinality.keys()) == self.known_covariates_cat
|
|
40
|
+
assert list(self.past_cat_cardinality.keys()) == self.past_covariates_cat
|
|
33
41
|
|
|
34
42
|
@property
|
|
35
43
|
def static_features(self) -> list[str]:
|
|
@@ -136,7 +144,7 @@ class TimeSeriesFeatureGenerator:
|
|
|
136
144
|
target: str,
|
|
137
145
|
known_covariates_names: list[str],
|
|
138
146
|
float_dtype: str = "float32",
|
|
139
|
-
num_samples:
|
|
147
|
+
num_samples: int | None = 20_000,
|
|
140
148
|
):
|
|
141
149
|
self.target = target
|
|
142
150
|
self.float_dtype = float_dtype
|
|
@@ -149,9 +157,9 @@ class TimeSeriesFeatureGenerator:
|
|
|
149
157
|
self.past_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
|
|
150
158
|
# Cat features with cat_count=1 are fine in static_features since they are repeated for all time steps in a TS
|
|
151
159
|
self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator(minimum_cat_count=1)
|
|
152
|
-
self._covariate_metadata:
|
|
153
|
-
self._train_covariates_real_median:
|
|
154
|
-
self._train_static_real_median:
|
|
160
|
+
self._covariate_metadata: CovariateMetadata | None = None # type ignore
|
|
161
|
+
self._train_covariates_real_median: pd.Series | None = None
|
|
162
|
+
self._train_static_real_median: pd.Series | None = None
|
|
155
163
|
|
|
156
164
|
@property
|
|
157
165
|
def required_column_names(self) -> list[str]:
|
|
@@ -221,11 +229,13 @@ class TimeSeriesFeatureGenerator:
|
|
|
221
229
|
static_features_cat, static_features_real = self._detect_and_log_column_types(static_features_df)
|
|
222
230
|
ignored_static_features = data.static_features.columns.difference(self.static_feature_pipeline.features_in)
|
|
223
231
|
self._train_static_real_median = data.static_features[static_features_real].median()
|
|
232
|
+
static_cat_cardinality = static_features_df[static_features_cat].nunique().to_dict()
|
|
224
233
|
else:
|
|
225
234
|
static_features_cat = []
|
|
226
235
|
static_features_real = []
|
|
227
236
|
ignored_static_features = []
|
|
228
237
|
static_features_df = None
|
|
238
|
+
static_cat_cardinality = {}
|
|
229
239
|
|
|
230
240
|
if len(ignored_covariates) > 0 or len(ignored_static_features) > 0:
|
|
231
241
|
logger.info("\nAutoGluon will ignore following non-numeric/non-informative columns:")
|
|
@@ -246,6 +256,9 @@ class TimeSeriesFeatureGenerator:
|
|
|
246
256
|
past_covariates_real=past_covariates_real,
|
|
247
257
|
static_features_cat=static_features_cat,
|
|
248
258
|
static_features_real=static_features_real,
|
|
259
|
+
static_cat_cardinality=static_cat_cardinality,
|
|
260
|
+
known_cat_cardinality=df[known_covariates_cat].nunique().to_dict(),
|
|
261
|
+
past_cat_cardinality=df[past_covariates_cat].nunique().to_dict(),
|
|
249
262
|
)
|
|
250
263
|
|
|
251
264
|
# Median of real-valued covariates will be used for missing value imputation
|
|
@@ -279,7 +292,7 @@ class TimeSeriesFeatureGenerator:
|
|
|
279
292
|
ts_df[column_names] = covariates_real
|
|
280
293
|
return ts_df
|
|
281
294
|
|
|
282
|
-
def _impute_static_features(self, static_df:
|
|
295
|
+
def _impute_static_features(self, static_df: pd.DataFrame | None) -> pd.DataFrame | None:
|
|
283
296
|
"""Impute missing values in static features using the median."""
|
|
284
297
|
static_real_names = self.covariate_metadata.static_features_real
|
|
285
298
|
if static_df is not None and static_real_names:
|
|
@@ -328,8 +341,8 @@ class TimeSeriesFeatureGenerator:
|
|
|
328
341
|
return self._impute_covariates(ts_df, column_names=self.covariate_metadata.covariates_real)
|
|
329
342
|
|
|
330
343
|
def transform_future_known_covariates(
|
|
331
|
-
self, known_covariates:
|
|
332
|
-
) ->
|
|
344
|
+
self, known_covariates: TimeSeriesDataFrame | None
|
|
345
|
+
) -> TimeSeriesDataFrame | None:
|
|
333
346
|
assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
|
|
334
347
|
if len(self.known_covariates_names) > 0:
|
|
335
348
|
assert known_covariates is not None, "known_covariates must be provided at prediction time"
|
|
@@ -415,7 +428,9 @@ class AbstractFeatureImportanceTransform:
|
|
|
415
428
|
if feature_name in self.covariate_metadata.past_covariates:
|
|
416
429
|
# we'll have to work on the history of the data alone
|
|
417
430
|
data[feature_name] = data[feature_name].copy()
|
|
418
|
-
feature_data =
|
|
431
|
+
feature_data = (
|
|
432
|
+
data[feature_name].groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).head(-self.prediction_length)
|
|
433
|
+
)
|
|
419
434
|
# Silence spurious FutureWarning raised by DataFrame.update https://github.com/pandas-dev/pandas/issues/57124
|
|
420
435
|
with warning_filter():
|
|
421
436
|
data[feature_name].update(self._transform_series(feature_data, is_categorical=is_categorical))
|
|
@@ -439,7 +454,7 @@ class PermutationFeatureImportanceTransform(AbstractFeatureImportanceTransform):
|
|
|
439
454
|
self,
|
|
440
455
|
covariate_metadata: CovariateMetadata,
|
|
441
456
|
prediction_length: int,
|
|
442
|
-
random_seed:
|
|
457
|
+
random_seed: int | None = None,
|
|
443
458
|
shuffle_type: Literal["itemwise", "naive"] = "itemwise",
|
|
444
459
|
**kwargs,
|
|
445
460
|
):
|
|
@@ -455,7 +470,7 @@ class PermutationFeatureImportanceTransform(AbstractFeatureImportanceTransform):
|
|
|
455
470
|
rng = np.random.RandomState(self.random_seed)
|
|
456
471
|
|
|
457
472
|
if self.shuffle_type == "itemwise":
|
|
458
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(
|
|
473
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(
|
|
459
474
|
lambda x: x.sample(frac=1, random_state=rng).values
|
|
460
475
|
)
|
|
461
476
|
elif self.shuffle_type == "naive":
|
|
@@ -483,6 +498,8 @@ class ConstantReplacementFeatureImportanceTransform(AbstractFeatureImportanceTra
|
|
|
483
498
|
|
|
484
499
|
def _transform_series(self, feature_data: pd.Series, is_categorical: bool) -> pd.Series:
|
|
485
500
|
if is_categorical:
|
|
486
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(lambda x: x.mode()[0])
|
|
501
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(lambda x: x.mode()[0])
|
|
487
502
|
else:
|
|
488
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(
|
|
503
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(
|
|
504
|
+
self.real_value_aggregation
|
|
505
|
+
) # type: ignore
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import warnings
|
|
2
|
-
from typing import Optional
|
|
3
2
|
|
|
4
3
|
import numpy as np
|
|
5
4
|
import pandas as pd
|
|
6
5
|
|
|
7
6
|
from autogluon.common.utils.deprecated_utils import Deprecated
|
|
8
|
-
from autogluon.timeseries.dataset
|
|
7
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
def get_forecast_horizon_index_single_time_series(
|
|
@@ -16,7 +15,7 @@ def get_forecast_horizon_index_single_time_series(
|
|
|
16
15
|
if offset is None:
|
|
17
16
|
raise ValueError(f"Invalid frequency: {freq}")
|
|
18
17
|
start_ts = past_timestamps.max() + 1 * offset
|
|
19
|
-
return pd.date_range(start=start_ts, periods=prediction_length, freq=freq, name=TIMESTAMP)
|
|
18
|
+
return pd.date_range(start=start_ts, periods=prediction_length, freq=freq, name=TimeSeriesDataFrame.TIMESTAMP)
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
@Deprecated(
|
|
@@ -29,7 +28,7 @@ def get_forecast_horizon_index_ts_dataframe(*args, **kwargs) -> pd.MultiIndex:
|
|
|
29
28
|
def make_future_data_frame(
|
|
30
29
|
ts_dataframe: TimeSeriesDataFrame,
|
|
31
30
|
prediction_length: int,
|
|
32
|
-
freq:
|
|
31
|
+
freq: str | None = None,
|
|
33
32
|
) -> pd.DataFrame:
|
|
34
33
|
"""For each item in the dataframe, get timestamps for the next `prediction_length` time steps into the future.
|
|
35
34
|
|
|
@@ -37,14 +36,14 @@ def make_future_data_frame(
|
|
|
37
36
|
"""
|
|
38
37
|
indptr = ts_dataframe.get_indptr()
|
|
39
38
|
last = ts_dataframe.index[indptr[1:] - 1].to_frame(index=False)
|
|
40
|
-
item_ids = np.repeat(last[ITEMID].to_numpy(), prediction_length)
|
|
39
|
+
item_ids = np.repeat(last[TimeSeriesDataFrame.ITEMID].to_numpy(), prediction_length)
|
|
41
40
|
|
|
42
41
|
if freq is None:
|
|
43
42
|
freq = ts_dataframe.freq
|
|
44
43
|
offset = pd.tseries.frequencies.to_offset(freq)
|
|
45
|
-
last_ts = pd.DatetimeIndex(last[TIMESTAMP])
|
|
44
|
+
last_ts = pd.DatetimeIndex(last[TimeSeriesDataFrame.TIMESTAMP])
|
|
46
45
|
# Non-vectorized offsets like BusinessDay may produce a PerformanceWarning - we filter them
|
|
47
46
|
with warnings.catch_warnings():
|
|
48
47
|
warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning)
|
|
49
48
|
timestamps = np.dstack([last_ts + step * offset for step in range(1, prediction_length + 1)]).ravel() # type: ignore[operator]
|
|
50
|
-
return pd.DataFrame({ITEMID: item_ids, TIMESTAMP: timestamps})
|
|
49
|
+
return pd.DataFrame({TimeSeriesDataFrame.ITEMID: item_ids, TimeSeriesDataFrame.TIMESTAMP: timestamps})
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Self
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Timer:
|
|
7
|
+
"""A timer class that tracks a start time, and computes the time elapsed and
|
|
8
|
+
time remaining, used for handling ``time_limit`` parameters in AutoGluon.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
time_limit
|
|
13
|
+
The time limit to set. If None, then ``time_remaining`` will return None, and
|
|
14
|
+
``timed_out`` will return False.
|
|
15
|
+
|
|
16
|
+
Examples
|
|
17
|
+
--------
|
|
18
|
+
Basic usage with time limit:
|
|
19
|
+
|
|
20
|
+
>>> timer = Timer(time_limit=10.0).start()
|
|
21
|
+
>>> # Do some work...
|
|
22
|
+
>>> if timer.timed_out():
|
|
23
|
+
... print("Time limit exceeded!")
|
|
24
|
+
>>> print(f"Time remaining: {timer.time_remaining():.2f}s")
|
|
25
|
+
|
|
26
|
+
Using as a stopwatch (no time limit):
|
|
27
|
+
|
|
28
|
+
>>> timer = Timer(time_limit=None).start()
|
|
29
|
+
>>> # Do some work...
|
|
30
|
+
>>> print(f"Elapsed time: {timer.time_elapsed():.2f}s")
|
|
31
|
+
|
|
32
|
+
Checking time in a loop:
|
|
33
|
+
|
|
34
|
+
>>> timer = Timer(time_limit=5.0).start()
|
|
35
|
+
>>> for i in range(100):
|
|
36
|
+
... if timer.timed_out():
|
|
37
|
+
... break
|
|
38
|
+
... # Do work for iteration i
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
time_limit: float | None,
|
|
44
|
+
):
|
|
45
|
+
self.time_limit = time_limit
|
|
46
|
+
|
|
47
|
+
self.start_time = None
|
|
48
|
+
|
|
49
|
+
def start(self) -> Self:
|
|
50
|
+
"""Start or reset the timer."""
|
|
51
|
+
self.start_time = time.monotonic()
|
|
52
|
+
return self
|
|
53
|
+
|
|
54
|
+
def time_elapsed(self) -> float:
|
|
55
|
+
"""Total time elapsed since the timer was started. This method can also be used
|
|
56
|
+
when ``time_limit`` is set to None to count time forward (i.e., as opposed to
|
|
57
|
+
a countdown timer which other methods imply).
|
|
58
|
+
"""
|
|
59
|
+
if self.start_time is None:
|
|
60
|
+
raise RuntimeError("Timer has not been started")
|
|
61
|
+
return time.monotonic() - self.start_time
|
|
62
|
+
|
|
63
|
+
def time_remaining(self) -> float | None:
|
|
64
|
+
"""Total time remaining on the timer. If ``time_limit`` is None,
|
|
65
|
+
this method also returns None.
|
|
66
|
+
"""
|
|
67
|
+
if self.start_time is None:
|
|
68
|
+
raise RuntimeError("Timer has not been started")
|
|
69
|
+
if self.time_limit is None:
|
|
70
|
+
return None
|
|
71
|
+
return self.time_limit - (time.monotonic() - self.start_time)
|
|
72
|
+
|
|
73
|
+
def timed_out(self) -> bool:
|
|
74
|
+
"""Whether the timer has timed out. If ``time_limit`` is None, this method
|
|
75
|
+
always returns False.
|
|
76
|
+
"""
|
|
77
|
+
if self.start_time is None:
|
|
78
|
+
raise RuntimeError("Timer has not been started")
|
|
79
|
+
if self.time_limit is None:
|
|
80
|
+
return False
|
|
81
|
+
return self.time_elapsed() >= self.time_limit
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class SplitTimer(Timer):
|
|
85
|
+
"""A timer that splits remaining time across multiple rounds.
|
|
86
|
+
|
|
87
|
+
Extends Timer to divide the total time limit across a specified number of rounds,
|
|
88
|
+
useful for allocating time budgets to sequential operations. At each call of
|
|
89
|
+
``next_round``, the timer re-distributes the remaining time evenly among
|
|
90
|
+
the remaining rounds.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
time_limit
|
|
95
|
+
Total time limit to split across all rounds. If None, ``round_time_remaining``
|
|
96
|
+
returns None.
|
|
97
|
+
rounds
|
|
98
|
+
Number of rounds to split the time across. Default is 1.
|
|
99
|
+
|
|
100
|
+
Examples
|
|
101
|
+
--------
|
|
102
|
+
Split time across 3 rounds:
|
|
103
|
+
|
|
104
|
+
>>> timer = SplitTimer(time_limit=10.0, rounds=3).start()
|
|
105
|
+
>>> time_round_1 = timer.round_time_remaining() # Returns ~3.33
|
|
106
|
+
>>> # Do work for round 1
|
|
107
|
+
>>> timer.next_round()
|
|
108
|
+
>>> time_round_2 = timer.round_time_remaining() # Returns remaining time divided by 2
|
|
109
|
+
>>> # Do work for round 2
|
|
110
|
+
>>> timer.next_round()
|
|
111
|
+
>>> time_round_3 = timer.round_time_remaining() # Returns all remaining time
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def __init__(
|
|
115
|
+
self,
|
|
116
|
+
time_limit: float | None,
|
|
117
|
+
rounds: int = 1,
|
|
118
|
+
):
|
|
119
|
+
super().__init__(time_limit)
|
|
120
|
+
self.rounds = rounds
|
|
121
|
+
|
|
122
|
+
self.round_index: int
|
|
123
|
+
self.round_start_time: float
|
|
124
|
+
|
|
125
|
+
def start(self) -> Self:
|
|
126
|
+
"""Reset and start the timer."""
|
|
127
|
+
super().start()
|
|
128
|
+
self.round_index = 0
|
|
129
|
+
self.round_start_time = time.monotonic()
|
|
130
|
+
return self
|
|
131
|
+
|
|
132
|
+
def round_time_remaining(self) -> float | None:
|
|
133
|
+
"""Get the time budget for the current round.
|
|
134
|
+
|
|
135
|
+
Calculates the time allocation by dividing the remaining time equally among
|
|
136
|
+
the remaining rounds. This means if a previous round used less time than
|
|
137
|
+
allocated, subsequent rounds get more time, and vice versa.
|
|
138
|
+
|
|
139
|
+
Returns time budget for the current round in seconds. Returns None if
|
|
140
|
+
``time_limit`` is None. Returns 0.0 if all rounds have been exhausted.
|
|
141
|
+
"""
|
|
142
|
+
if self.time_limit is None:
|
|
143
|
+
return None
|
|
144
|
+
if self.start_time is None:
|
|
145
|
+
raise RuntimeError("Timer has not been started")
|
|
146
|
+
|
|
147
|
+
remaining_rounds = self.rounds - self.round_index
|
|
148
|
+
if remaining_rounds <= 0:
|
|
149
|
+
return 0.0
|
|
150
|
+
|
|
151
|
+
elapsed_time_at_round_start = self.round_start_time - self.start_time
|
|
152
|
+
remaining_time_at_round_start = self.time_limit - elapsed_time_at_round_start
|
|
153
|
+
round_time_budget = remaining_time_at_round_start / remaining_rounds
|
|
154
|
+
|
|
155
|
+
return round_time_budget - self.round_time_elapsed()
|
|
156
|
+
|
|
157
|
+
def round_time_elapsed(self) -> float:
|
|
158
|
+
"""Total time elapsed since the start of this round."""
|
|
159
|
+
if self.start_time is None:
|
|
160
|
+
raise RuntimeError("Timer has not been started")
|
|
161
|
+
return time.monotonic() - self.round_start_time
|
|
162
|
+
|
|
163
|
+
def next_round(self) -> Self:
|
|
164
|
+
"""Advance timer to the next round.
|
|
165
|
+
|
|
166
|
+
Increments the round counter, which affects the time allocation returned
|
|
167
|
+
by subsequent ``round_time_remaining`` calls.
|
|
168
|
+
"""
|
|
169
|
+
if self.start_time is None:
|
|
170
|
+
raise RuntimeError("Timer has not been started")
|
|
171
|
+
self.round_index += 1
|
|
172
|
+
self.round_start_time = time.monotonic()
|
|
173
|
+
return self
|
autogluon/timeseries/version.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import sys, types, os;p = os.path.join(sys._getframe(1).f_locals['sitedir'], *('autogluon',));importlib = __import__('importlib.util');__import__('importlib.machinery');m = sys.modules.setdefault('autogluon', importlib.util.module_from_spec(importlib.machinery.PathFinder.find_spec('autogluon', [os.path.dirname(p)])));m = m or sys.modules.setdefault('autogluon', types.ModuleType('autogluon'));mp = (m or []) and m.__dict__.setdefault('__path__',[]);(p not in mp) and mp.append(p)
|