autogluon.timeseries 1.4.1b20250906__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/hyperparameter_presets.py +2 -2
- autogluon/timeseries/dataset/ts_dataframe.py +97 -86
- autogluon/timeseries/learner.py +68 -35
- autogluon/timeseries/metrics/__init__.py +4 -4
- autogluon/timeseries/metrics/abstract.py +8 -8
- autogluon/timeseries/metrics/point.py +9 -9
- autogluon/timeseries/metrics/quantile.py +5 -5
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +4 -1
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +52 -39
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +8 -8
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +58 -62
- autogluon/timeseries/models/autogluon_tabular/per_step.py +26 -15
- autogluon/timeseries/models/autogluon_tabular/transforms.py +11 -9
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +361 -0
- autogluon/timeseries/models/chronos/model.py +125 -87
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +68 -36
- autogluon/timeseries/models/ensemble/__init__.py +34 -2
- autogluon/timeseries/models/ensemble/abstract.py +5 -42
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +236 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +73 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +167 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/{greedy.py → ensemble_selection.py} +41 -61
- autogluon/timeseries/models/ensemble/per_item_greedy.py +162 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +40 -0
- autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +6 -16
- autogluon/timeseries/models/ensemble/weighted/greedy.py +57 -0
- autogluon/timeseries/models/gluonts/abstract.py +25 -25
- autogluon/timeseries/models/gluonts/dataset.py +11 -11
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +15 -18
- autogluon/timeseries/models/local/naive.py +2 -2
- autogluon/timeseries/models/local/npts.py +1 -1
- autogluon/timeseries/models/local/statsforecast.py +12 -12
- autogluon/timeseries/models/multi_window/multi_window_model.py +39 -24
- autogluon/timeseries/models/registry.py +3 -4
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +118 -0
- autogluon/timeseries/models/toto/model.py +236 -0
- autogluon/timeseries/predictor.py +301 -103
- autogluon/timeseries/regressor.py +27 -30
- autogluon/timeseries/splitter.py +3 -27
- autogluon/timeseries/trainer/ensemble_composer.py +439 -0
- autogluon/timeseries/trainer/model_set_builder.py +9 -9
- autogluon/timeseries/trainer/prediction_cache.py +16 -16
- autogluon/timeseries/trainer/trainer.py +300 -275
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/covariate_scaler.py +8 -8
- autogluon/timeseries/transforms/target_scaler.py +15 -15
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +1 -3
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/features.py +18 -14
- autogluon/timeseries/utils/forecast.py +6 -7
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251210-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/METADATA +39 -22
- autogluon_timeseries-1.4.1b20251210.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/WHEEL +1 -1
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
- autogluon.timeseries-1.4.1b20250906-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.4.1b20250906.dist-info/RECORD +0 -75
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20250906.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
logger = logging.getLogger("autogluon.timeseries.trainer")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def log_scores_and_times(
|
|
7
|
+
val_score: float | None,
|
|
8
|
+
fit_time: float | None,
|
|
9
|
+
predict_time: float | None,
|
|
10
|
+
eval_metric_name: str,
|
|
11
|
+
):
|
|
12
|
+
if val_score is not None:
|
|
13
|
+
logger.info(f"\t{val_score:<7.4f}".ljust(15) + f"= Validation score ({eval_metric_name})")
|
|
14
|
+
if fit_time is not None:
|
|
15
|
+
logger.info(f"\t{fit_time:<7.2f} s".ljust(15) + "= Training runtime")
|
|
16
|
+
if predict_time is not None:
|
|
17
|
+
logger.info(f"\t{predict_time:<7.2f} s".ljust(15) + "= Validation (prediction) runtime")
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Literal,
|
|
2
|
+
from typing import Literal, Protocol, overload, runtime_checkable
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
6
6
|
from sklearn.compose import ColumnTransformer
|
|
7
7
|
from sklearn.preprocessing import QuantileTransformer, StandardScaler
|
|
8
8
|
|
|
9
|
-
from autogluon.timeseries.dataset
|
|
9
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
10
10
|
from autogluon.timeseries.utils.features import CovariateMetadata
|
|
11
11
|
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
12
12
|
|
|
@@ -25,8 +25,8 @@ class CovariateScaler(Protocol):
|
|
|
25
25
|
def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
|
|
26
26
|
|
|
27
27
|
def transform_known_covariates(
|
|
28
|
-
self, known_covariates:
|
|
29
|
-
) ->
|
|
28
|
+
self, known_covariates: TimeSeriesDataFrame | None = None
|
|
29
|
+
) -> TimeSeriesDataFrame | None: ...
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class GlobalCovariateScaler(CovariateScaler):
|
|
@@ -53,7 +53,7 @@ class GlobalCovariateScaler(CovariateScaler):
|
|
|
53
53
|
self.use_past_covariates = use_past_covariates
|
|
54
54
|
self.use_static_features = use_static_features
|
|
55
55
|
self.skew_threshold = skew_threshold
|
|
56
|
-
self._column_transformers:
|
|
56
|
+
self._column_transformers: dict[Literal["known", "past", "static"], ColumnTransformer] | None = None
|
|
57
57
|
|
|
58
58
|
def is_fit(self) -> bool:
|
|
59
59
|
return self._column_transformers is not None
|
|
@@ -105,8 +105,8 @@ class GlobalCovariateScaler(CovariateScaler):
|
|
|
105
105
|
return data
|
|
106
106
|
|
|
107
107
|
def transform_known_covariates(
|
|
108
|
-
self, known_covariates:
|
|
109
|
-
) ->
|
|
108
|
+
self, known_covariates: TimeSeriesDataFrame | None = None
|
|
109
|
+
) -> TimeSeriesDataFrame | None:
|
|
110
110
|
assert self._column_transformers is not None, "CovariateScaler must be fit before transform can be called"
|
|
111
111
|
|
|
112
112
|
if "known" in self._column_transformers:
|
|
@@ -154,7 +154,7 @@ AVAILABLE_COVARIATE_SCALERS = {
|
|
|
154
154
|
def get_covariate_scaler(name: None, **scaler_kwargs) -> None: ...
|
|
155
155
|
@overload
|
|
156
156
|
def get_covariate_scaler(name: Literal["global"], **scaler_kwargs) -> GlobalCovariateScaler: ...
|
|
157
|
-
def get_covariate_scaler(name:
|
|
157
|
+
def get_covariate_scaler(name: Literal["global"] | None = None, **scaler_kwargs) -> CovariateScaler | None:
|
|
158
158
|
if name is None:
|
|
159
159
|
return None
|
|
160
160
|
if name not in AVAILABLE_COVARIATE_SCALERS:
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from typing import Literal,
|
|
1
|
+
from typing import Literal, Protocol, overload
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from typing_extensions import Self
|
|
6
6
|
|
|
7
|
-
from autogluon.timeseries.dataset
|
|
7
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class TargetScaler(Protocol):
|
|
@@ -27,10 +27,10 @@ class LocalTargetScaler(TargetScaler):
|
|
|
27
27
|
):
|
|
28
28
|
self.target = target
|
|
29
29
|
self.min_scale = min_scale
|
|
30
|
-
self.loc:
|
|
31
|
-
self.scale:
|
|
30
|
+
self.loc: pd.Series | None = None
|
|
31
|
+
self.scale: pd.Series | None = None
|
|
32
32
|
|
|
33
|
-
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[
|
|
33
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series | None, pd.Series | None]:
|
|
34
34
|
raise NotImplementedError
|
|
35
35
|
|
|
36
36
|
def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
@@ -45,7 +45,7 @@ class LocalTargetScaler(TargetScaler):
|
|
|
45
45
|
self.scale = self.scale.clip(lower=self.min_scale).replace([np.inf, -np.inf], np.nan).fillna(1.0)
|
|
46
46
|
return self
|
|
47
47
|
|
|
48
|
-
def _reindex_loc_scale(self, item_index: pd.Index) -> tuple[
|
|
48
|
+
def _reindex_loc_scale(self, item_index: pd.Index) -> tuple[np.ndarray | float, np.ndarray | float]:
|
|
49
49
|
"""Reindex loc and scale parameters for the given item_ids and convert them to an array-like."""
|
|
50
50
|
if self.loc is not None:
|
|
51
51
|
loc = self.loc.reindex(item_index).to_numpy()
|
|
@@ -59,12 +59,12 @@ class LocalTargetScaler(TargetScaler):
|
|
|
59
59
|
|
|
60
60
|
def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
61
61
|
"""Apply scaling to the target column in the dataframe."""
|
|
62
|
-
loc, scale = self._reindex_loc_scale(item_index=data.index.get_level_values(ITEMID))
|
|
62
|
+
loc, scale = self._reindex_loc_scale(item_index=data.index.get_level_values(TimeSeriesDataFrame.ITEMID))
|
|
63
63
|
return data.assign(**{self.target: (data[self.target] - loc) / scale})
|
|
64
64
|
|
|
65
65
|
def inverse_transform(self, predictions: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
66
66
|
"""Apply inverse scaling to all columns in the predictions dataframe."""
|
|
67
|
-
loc, scale = self._reindex_loc_scale(item_index=predictions.index.get_level_values(ITEMID))
|
|
67
|
+
loc, scale = self._reindex_loc_scale(item_index=predictions.index.get_level_values(TimeSeriesDataFrame.ITEMID))
|
|
68
68
|
return predictions.assign(**{col: predictions[col] * scale + loc for col in predictions.columns})
|
|
69
69
|
|
|
70
70
|
|
|
@@ -75,15 +75,15 @@ class LocalStandardScaler(LocalTargetScaler):
|
|
|
75
75
|
"""
|
|
76
76
|
|
|
77
77
|
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
78
|
-
stats = target_series.groupby(level=ITEMID, sort=False).agg(["mean", "std"])
|
|
78
|
+
stats = target_series.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg(["mean", "std"])
|
|
79
79
|
return stats["mean"], stats["std"]
|
|
80
80
|
|
|
81
81
|
|
|
82
82
|
class LocalMeanAbsScaler(LocalTargetScaler):
|
|
83
83
|
"""Applies mean absolute scaling to each time series in the dataset."""
|
|
84
84
|
|
|
85
|
-
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[
|
|
86
|
-
scale = target_series.abs().groupby(level=ITEMID, sort=False).agg("mean")
|
|
85
|
+
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series | None, pd.Series]:
|
|
86
|
+
scale = target_series.abs().groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg("mean")
|
|
87
87
|
return None, scale
|
|
88
88
|
|
|
89
89
|
|
|
@@ -94,7 +94,7 @@ class LocalMinMaxScaler(LocalTargetScaler):
|
|
|
94
94
|
"""
|
|
95
95
|
|
|
96
96
|
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
97
|
-
stats = target_series.abs().groupby(level=ITEMID, sort=False).agg(["min", "max"])
|
|
97
|
+
stats = target_series.abs().groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg(["min", "max"])
|
|
98
98
|
scale = (stats["max"] - stats["min"]).clip(lower=self.min_scale)
|
|
99
99
|
loc = stats["min"]
|
|
100
100
|
return loc, scale
|
|
@@ -118,7 +118,7 @@ class LocalRobustScaler(LocalTargetScaler):
|
|
|
118
118
|
assert 0 < self.q_min < self.q_max < 1
|
|
119
119
|
|
|
120
120
|
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
121
|
-
grouped = target_series.groupby(level=ITEMID, sort=False)
|
|
121
|
+
grouped = target_series.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False)
|
|
122
122
|
loc = grouped.median()
|
|
123
123
|
lower = grouped.quantile(self.q_min)
|
|
124
124
|
upper = grouped.quantile(self.q_max)
|
|
@@ -139,8 +139,8 @@ def get_target_scaler(name: None, **scaler_kwargs) -> None: ...
|
|
|
139
139
|
@overload
|
|
140
140
|
def get_target_scaler(name: Literal["standard", "mean_abs", "min_max", "robust"], **scaler_kwargs) -> TargetScaler: ...
|
|
141
141
|
def get_target_scaler(
|
|
142
|
-
name:
|
|
143
|
-
) ->
|
|
142
|
+
name: Literal["standard", "mean_abs", "min_max", "robust"] | None, **scaler_kwargs
|
|
143
|
+
) -> TargetScaler | None:
|
|
144
144
|
"""Get LocalTargetScaler object from a string."""
|
|
145
145
|
if name is None:
|
|
146
146
|
return None
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import joblib.externals.loky
|
|
2
|
+
from joblib import cpu_count
|
|
3
|
+
|
|
4
|
+
# By default, joblib w/ loky backend kills processes that take >300MB of RAM assuming that this is caused by a memory
|
|
5
|
+
# leak. This leads to problems for some memory-hungry models like AutoARIMA/Theta.
|
|
6
|
+
# This monkey patch removes this undesired behavior
|
|
7
|
+
joblib.externals.loky.process_executor._MAX_MEMORY_LEAK_SIZE = int(3e10)
|
|
8
|
+
|
|
9
|
+
# We use the same default n_jobs across AG-TS to ensure that Joblib reuses the process pool
|
|
10
|
+
AG_DEFAULT_N_JOBS = max(cpu_count(only_physical_cores=True), 1)
|
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
Generate lag indices based on frequency string. Adapted from gluonts.time_feature.lag.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from typing import Optional
|
|
6
|
-
|
|
7
5
|
import numpy as np
|
|
8
6
|
import pandas as pd
|
|
9
7
|
|
|
@@ -70,7 +68,7 @@ def _make_lags_for_semi_month(multiple, num_cycles=3):
|
|
|
70
68
|
def get_lags_for_frequency(
|
|
71
69
|
freq: str,
|
|
72
70
|
lag_ub: int = 1200,
|
|
73
|
-
num_lags:
|
|
71
|
+
num_lags: int | None = None,
|
|
74
72
|
num_default_lags: int = 7,
|
|
75
73
|
) -> list[int]:
|
|
76
74
|
"""
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Union
|
|
2
|
-
|
|
3
1
|
import pandas as pd
|
|
4
2
|
|
|
5
3
|
from .base import norm_freq_str
|
|
@@ -22,7 +20,7 @@ DEFAULT_SEASONALITIES = {
|
|
|
22
20
|
}
|
|
23
21
|
|
|
24
22
|
|
|
25
|
-
def get_seasonality(freq:
|
|
23
|
+
def get_seasonality(freq: str | None) -> int:
|
|
26
24
|
"""Return the seasonality of a given frequency. Adapted from ``gluonts.time_feature.seasonality``."""
|
|
27
25
|
if freq is None:
|
|
28
26
|
return 1
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
import reprlib
|
|
3
3
|
import time
|
|
4
4
|
from dataclasses import asdict, dataclass, field
|
|
5
|
-
from typing import Any, Literal
|
|
5
|
+
from typing import Any, Literal
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
@@ -14,7 +14,7 @@ from autogluon.features.generators import (
|
|
|
14
14
|
IdentityFeatureGenerator,
|
|
15
15
|
PipelineFeatureGenerator,
|
|
16
16
|
)
|
|
17
|
-
from autogluon.timeseries.dataset
|
|
17
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
18
18
|
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
@@ -136,7 +136,7 @@ class TimeSeriesFeatureGenerator:
|
|
|
136
136
|
target: str,
|
|
137
137
|
known_covariates_names: list[str],
|
|
138
138
|
float_dtype: str = "float32",
|
|
139
|
-
num_samples:
|
|
139
|
+
num_samples: int | None = 20_000,
|
|
140
140
|
):
|
|
141
141
|
self.target = target
|
|
142
142
|
self.float_dtype = float_dtype
|
|
@@ -149,9 +149,9 @@ class TimeSeriesFeatureGenerator:
|
|
|
149
149
|
self.past_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
|
|
150
150
|
# Cat features with cat_count=1 are fine in static_features since they are repeated for all time steps in a TS
|
|
151
151
|
self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator(minimum_cat_count=1)
|
|
152
|
-
self._covariate_metadata:
|
|
153
|
-
self._train_covariates_real_median:
|
|
154
|
-
self._train_static_real_median:
|
|
152
|
+
self._covariate_metadata: CovariateMetadata | None = None # type ignore
|
|
153
|
+
self._train_covariates_real_median: pd.Series | None = None
|
|
154
|
+
self._train_static_real_median: pd.Series | None = None
|
|
155
155
|
|
|
156
156
|
@property
|
|
157
157
|
def required_column_names(self) -> list[str]:
|
|
@@ -279,7 +279,7 @@ class TimeSeriesFeatureGenerator:
|
|
|
279
279
|
ts_df[column_names] = covariates_real
|
|
280
280
|
return ts_df
|
|
281
281
|
|
|
282
|
-
def _impute_static_features(self, static_df:
|
|
282
|
+
def _impute_static_features(self, static_df: pd.DataFrame | None) -> pd.DataFrame | None:
|
|
283
283
|
"""Impute missing values in static features using the median."""
|
|
284
284
|
static_real_names = self.covariate_metadata.static_features_real
|
|
285
285
|
if static_df is not None and static_real_names:
|
|
@@ -328,8 +328,8 @@ class TimeSeriesFeatureGenerator:
|
|
|
328
328
|
return self._impute_covariates(ts_df, column_names=self.covariate_metadata.covariates_real)
|
|
329
329
|
|
|
330
330
|
def transform_future_known_covariates(
|
|
331
|
-
self, known_covariates:
|
|
332
|
-
) ->
|
|
331
|
+
self, known_covariates: TimeSeriesDataFrame | None
|
|
332
|
+
) -> TimeSeriesDataFrame | None:
|
|
333
333
|
assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
|
|
334
334
|
if len(self.known_covariates_names) > 0:
|
|
335
335
|
assert known_covariates is not None, "known_covariates must be provided at prediction time"
|
|
@@ -415,7 +415,9 @@ class AbstractFeatureImportanceTransform:
|
|
|
415
415
|
if feature_name in self.covariate_metadata.past_covariates:
|
|
416
416
|
# we'll have to work on the history of the data alone
|
|
417
417
|
data[feature_name] = data[feature_name].copy()
|
|
418
|
-
feature_data =
|
|
418
|
+
feature_data = (
|
|
419
|
+
data[feature_name].groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).head(-self.prediction_length)
|
|
420
|
+
)
|
|
419
421
|
# Silence spurious FutureWarning raised by DataFrame.update https://github.com/pandas-dev/pandas/issues/57124
|
|
420
422
|
with warning_filter():
|
|
421
423
|
data[feature_name].update(self._transform_series(feature_data, is_categorical=is_categorical))
|
|
@@ -439,7 +441,7 @@ class PermutationFeatureImportanceTransform(AbstractFeatureImportanceTransform):
|
|
|
439
441
|
self,
|
|
440
442
|
covariate_metadata: CovariateMetadata,
|
|
441
443
|
prediction_length: int,
|
|
442
|
-
random_seed:
|
|
444
|
+
random_seed: int | None = None,
|
|
443
445
|
shuffle_type: Literal["itemwise", "naive"] = "itemwise",
|
|
444
446
|
**kwargs,
|
|
445
447
|
):
|
|
@@ -455,7 +457,7 @@ class PermutationFeatureImportanceTransform(AbstractFeatureImportanceTransform):
|
|
|
455
457
|
rng = np.random.RandomState(self.random_seed)
|
|
456
458
|
|
|
457
459
|
if self.shuffle_type == "itemwise":
|
|
458
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(
|
|
460
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(
|
|
459
461
|
lambda x: x.sample(frac=1, random_state=rng).values
|
|
460
462
|
)
|
|
461
463
|
elif self.shuffle_type == "naive":
|
|
@@ -483,6 +485,8 @@ class ConstantReplacementFeatureImportanceTransform(AbstractFeatureImportanceTra
|
|
|
483
485
|
|
|
484
486
|
def _transform_series(self, feature_data: pd.Series, is_categorical: bool) -> pd.Series:
|
|
485
487
|
if is_categorical:
|
|
486
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(lambda x: x.mode()[0])
|
|
488
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(lambda x: x.mode()[0])
|
|
487
489
|
else:
|
|
488
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(
|
|
490
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(
|
|
491
|
+
self.real_value_aggregation
|
|
492
|
+
) # type: ignore
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import warnings
|
|
2
|
-
from typing import Optional
|
|
3
2
|
|
|
4
3
|
import numpy as np
|
|
5
4
|
import pandas as pd
|
|
6
5
|
|
|
7
6
|
from autogluon.common.utils.deprecated_utils import Deprecated
|
|
8
|
-
from autogluon.timeseries.dataset
|
|
7
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
def get_forecast_horizon_index_single_time_series(
|
|
@@ -16,7 +15,7 @@ def get_forecast_horizon_index_single_time_series(
|
|
|
16
15
|
if offset is None:
|
|
17
16
|
raise ValueError(f"Invalid frequency: {freq}")
|
|
18
17
|
start_ts = past_timestamps.max() + 1 * offset
|
|
19
|
-
return pd.date_range(start=start_ts, periods=prediction_length, freq=freq, name=TIMESTAMP)
|
|
18
|
+
return pd.date_range(start=start_ts, periods=prediction_length, freq=freq, name=TimeSeriesDataFrame.TIMESTAMP)
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
@Deprecated(
|
|
@@ -29,7 +28,7 @@ def get_forecast_horizon_index_ts_dataframe(*args, **kwargs) -> pd.MultiIndex:
|
|
|
29
28
|
def make_future_data_frame(
|
|
30
29
|
ts_dataframe: TimeSeriesDataFrame,
|
|
31
30
|
prediction_length: int,
|
|
32
|
-
freq:
|
|
31
|
+
freq: str | None = None,
|
|
33
32
|
) -> pd.DataFrame:
|
|
34
33
|
"""For each item in the dataframe, get timestamps for the next `prediction_length` time steps into the future.
|
|
35
34
|
|
|
@@ -37,14 +36,14 @@ def make_future_data_frame(
|
|
|
37
36
|
"""
|
|
38
37
|
indptr = ts_dataframe.get_indptr()
|
|
39
38
|
last = ts_dataframe.index[indptr[1:] - 1].to_frame(index=False)
|
|
40
|
-
item_ids = np.repeat(last[ITEMID].to_numpy(), prediction_length)
|
|
39
|
+
item_ids = np.repeat(last[TimeSeriesDataFrame.ITEMID].to_numpy(), prediction_length)
|
|
41
40
|
|
|
42
41
|
if freq is None:
|
|
43
42
|
freq = ts_dataframe.freq
|
|
44
43
|
offset = pd.tseries.frequencies.to_offset(freq)
|
|
45
|
-
last_ts = pd.DatetimeIndex(last[TIMESTAMP])
|
|
44
|
+
last_ts = pd.DatetimeIndex(last[TimeSeriesDataFrame.TIMESTAMP])
|
|
46
45
|
# Non-vectorized offsets like BusinessDay may produce a PerformanceWarning - we filter them
|
|
47
46
|
with warnings.catch_warnings():
|
|
48
47
|
warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning)
|
|
49
48
|
timestamps = np.dstack([last_ts + step * offset for step in range(1, prediction_length + 1)]).ravel() # type: ignore[operator]
|
|
50
|
-
return pd.DataFrame({ITEMID: item_ids, TIMESTAMP: timestamps})
|
|
49
|
+
return pd.DataFrame({TimeSeriesDataFrame.ITEMID: item_ids, TimeSeriesDataFrame.TIMESTAMP: timestamps})
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Self
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Timer:
|
|
7
|
+
"""A timer class that tracks a start time, and computes the time elapsed and
|
|
8
|
+
time remaining, used for handling ``time_limit`` parameters in AutoGluon.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
time_limit
|
|
13
|
+
The time limit to set. If None, then ``time_remaining`` will return None, and
|
|
14
|
+
``timed_out`` will return False.
|
|
15
|
+
|
|
16
|
+
Examples
|
|
17
|
+
--------
|
|
18
|
+
Basic usage with time limit:
|
|
19
|
+
|
|
20
|
+
>>> timer = Timer(time_limit=10.0).start()
|
|
21
|
+
>>> # Do some work...
|
|
22
|
+
>>> if timer.timed_out():
|
|
23
|
+
... print("Time limit exceeded!")
|
|
24
|
+
>>> print(f"Time remaining: {timer.time_remaining():.2f}s")
|
|
25
|
+
|
|
26
|
+
Using as a stopwatch (no time limit):
|
|
27
|
+
|
|
28
|
+
>>> timer = Timer(time_limit=None).start()
|
|
29
|
+
>>> # Do some work...
|
|
30
|
+
>>> print(f"Elapsed time: {timer.time_elapsed():.2f}s")
|
|
31
|
+
|
|
32
|
+
Checking time in a loop:
|
|
33
|
+
|
|
34
|
+
>>> timer = Timer(time_limit=5.0).start()
|
|
35
|
+
>>> for i in range(100):
|
|
36
|
+
... if timer.timed_out():
|
|
37
|
+
... break
|
|
38
|
+
... # Do work for iteration i
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
time_limit: float | None,
|
|
44
|
+
):
|
|
45
|
+
self.time_limit = time_limit
|
|
46
|
+
|
|
47
|
+
self.start_time = None
|
|
48
|
+
|
|
49
|
+
def start(self) -> Self:
|
|
50
|
+
"""Start or reset the timer."""
|
|
51
|
+
self.start_time = time.monotonic()
|
|
52
|
+
return self
|
|
53
|
+
|
|
54
|
+
def time_elapsed(self) -> float:
|
|
55
|
+
"""Total time elapsed since the timer was started. This method can also be used
|
|
56
|
+
when ``time_limit`` is set to None to count time forward (i.e., as opposed to
|
|
57
|
+
a countdown timer which other methods imply).
|
|
58
|
+
"""
|
|
59
|
+
if self.start_time is None:
|
|
60
|
+
raise RuntimeError("Timer has not been started")
|
|
61
|
+
return time.monotonic() - self.start_time
|
|
62
|
+
|
|
63
|
+
def time_remaining(self) -> float | None:
|
|
64
|
+
"""Total time remaining on the timer. If ``time_limit`` is None,
|
|
65
|
+
this method also returns None.
|
|
66
|
+
"""
|
|
67
|
+
if self.start_time is None:
|
|
68
|
+
raise RuntimeError("Timer has not been started")
|
|
69
|
+
if self.time_limit is None:
|
|
70
|
+
return None
|
|
71
|
+
return self.time_limit - (time.monotonic() - self.start_time)
|
|
72
|
+
|
|
73
|
+
def timed_out(self) -> bool:
|
|
74
|
+
"""Whether the timer has timed out. If ``time_limit`` is None, this method
|
|
75
|
+
always returns False.
|
|
76
|
+
"""
|
|
77
|
+
if self.start_time is None:
|
|
78
|
+
raise RuntimeError("Timer has not been started")
|
|
79
|
+
if self.time_limit is None:
|
|
80
|
+
return False
|
|
81
|
+
return self.time_elapsed() >= self.time_limit
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class SplitTimer(Timer):
|
|
85
|
+
"""A timer that splits remaining time across multiple rounds.
|
|
86
|
+
|
|
87
|
+
Extends Timer to divide the total time limit across a specified number of rounds,
|
|
88
|
+
useful for allocating time budgets to sequential operations. At each call of
|
|
89
|
+
``next_round``, the timer re-distributes the remaining time evenly among
|
|
90
|
+
the remaining rounds.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
time_limit
|
|
95
|
+
Total time limit to split across all rounds. If None, ``round_time_remaining``
|
|
96
|
+
returns None.
|
|
97
|
+
rounds
|
|
98
|
+
Number of rounds to split the time across. Default is 1.
|
|
99
|
+
|
|
100
|
+
Examples
|
|
101
|
+
--------
|
|
102
|
+
Split time across 3 rounds:
|
|
103
|
+
|
|
104
|
+
>>> timer = SplitTimer(time_limit=10.0, rounds=3).start()
|
|
105
|
+
>>> time_round_1 = timer.round_time_remaining() # Returns ~3.33
|
|
106
|
+
>>> # Do work for round 1
|
|
107
|
+
>>> timer.next_round()
|
|
108
|
+
>>> time_round_2 = timer.round_time_remaining() # Returns remaining time divided by 2
|
|
109
|
+
>>> # Do work for round 2
|
|
110
|
+
>>> timer.next_round()
|
|
111
|
+
>>> time_round_3 = timer.round_time_remaining() # Returns all remaining time
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def __init__(
|
|
115
|
+
self,
|
|
116
|
+
time_limit: float | None,
|
|
117
|
+
rounds: int = 1,
|
|
118
|
+
):
|
|
119
|
+
super().__init__(time_limit)
|
|
120
|
+
self.rounds = rounds
|
|
121
|
+
|
|
122
|
+
self.round_index: int
|
|
123
|
+
self.round_start_time: float
|
|
124
|
+
|
|
125
|
+
def start(self) -> Self:
|
|
126
|
+
"""Reset and start the timer."""
|
|
127
|
+
super().start()
|
|
128
|
+
self.round_index = 0
|
|
129
|
+
self.round_start_time = time.monotonic()
|
|
130
|
+
return self
|
|
131
|
+
|
|
132
|
+
def round_time_remaining(self) -> float | None:
|
|
133
|
+
"""Get the time budget for the current round.
|
|
134
|
+
|
|
135
|
+
Calculates the time allocation by dividing the remaining time equally among
|
|
136
|
+
the remaining rounds. This means if a previous round used less time than
|
|
137
|
+
allocated, subsequent rounds get more time, and vice versa.
|
|
138
|
+
|
|
139
|
+
Returns time budget for the current round in seconds. Returns None if
|
|
140
|
+
``time_limit`` is None. Returns 0.0 if all rounds have been exhausted.
|
|
141
|
+
"""
|
|
142
|
+
if self.time_limit is None:
|
|
143
|
+
return None
|
|
144
|
+
if self.start_time is None:
|
|
145
|
+
raise RuntimeError("Timer has not been started")
|
|
146
|
+
|
|
147
|
+
remaining_rounds = self.rounds - self.round_index
|
|
148
|
+
if remaining_rounds <= 0:
|
|
149
|
+
return 0.0
|
|
150
|
+
|
|
151
|
+
elapsed_time_at_round_start = self.round_start_time - self.start_time
|
|
152
|
+
remaining_time_at_round_start = self.time_limit - elapsed_time_at_round_start
|
|
153
|
+
round_time_budget = remaining_time_at_round_start / remaining_rounds
|
|
154
|
+
|
|
155
|
+
return round_time_budget - self.round_time_elapsed()
|
|
156
|
+
|
|
157
|
+
def round_time_elapsed(self) -> float:
|
|
158
|
+
"""Total time elapsed since the start of this round."""
|
|
159
|
+
if self.start_time is None:
|
|
160
|
+
raise RuntimeError("Timer has not been started")
|
|
161
|
+
return time.monotonic() - self.round_start_time
|
|
162
|
+
|
|
163
|
+
def next_round(self) -> Self:
|
|
164
|
+
"""Advance timer to the next round.
|
|
165
|
+
|
|
166
|
+
Increments the round counter, which affects the time allocation returned
|
|
167
|
+
by subsequent ``round_time_remaining`` calls.
|
|
168
|
+
"""
|
|
169
|
+
if self.start_time is None:
|
|
170
|
+
raise RuntimeError("Timer has not been started")
|
|
171
|
+
self.round_index += 1
|
|
172
|
+
self.round_start_time = time.monotonic()
|
|
173
|
+
return self
|
autogluon/timeseries/version.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import sys, types, os;p = os.path.join(sys._getframe(1).f_locals['sitedir'], *('autogluon',));importlib = __import__('importlib.util');__import__('importlib.machinery');m = sys.modules.setdefault('autogluon', importlib.util.module_from_spec(importlib.machinery.PathFinder.find_spec('autogluon', [os.path.dirname(p)])));m = m or sys.modules.setdefault('autogluon', types.ModuleType('autogluon'));mp = (m or []) and m.__dict__.setdefault('__path__',[]);(p not in mp) and mp.append(p)
|