autogluon.timeseries 1.0.1b20240304__py3-none-any.whl → 1.4.1b20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (108) hide show
  1. autogluon/timeseries/configs/__init__.py +3 -2
  2. autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
  3. autogluon/timeseries/configs/predictor_presets.py +84 -0
  4. autogluon/timeseries/dataset/ts_dataframe.py +339 -186
  5. autogluon/timeseries/learner.py +192 -60
  6. autogluon/timeseries/metrics/__init__.py +55 -11
  7. autogluon/timeseries/metrics/abstract.py +96 -25
  8. autogluon/timeseries/metrics/point.py +186 -39
  9. autogluon/timeseries/metrics/quantile.py +47 -20
  10. autogluon/timeseries/metrics/utils.py +6 -6
  11. autogluon/timeseries/models/__init__.py +13 -7
  12. autogluon/timeseries/models/abstract/__init__.py +2 -2
  13. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +533 -273
  14. autogluon/timeseries/models/abstract/model_trial.py +10 -10
  15. autogluon/timeseries/models/abstract/tunable.py +189 -0
  16. autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
  17. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +369 -215
  18. autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
  19. autogluon/timeseries/models/autogluon_tabular/transforms.py +67 -0
  20. autogluon/timeseries/models/autogluon_tabular/utils.py +3 -51
  21. autogluon/timeseries/models/chronos/__init__.py +4 -0
  22. autogluon/timeseries/models/chronos/chronos2.py +361 -0
  23. autogluon/timeseries/models/chronos/model.py +738 -0
  24. autogluon/timeseries/models/chronos/utils.py +369 -0
  25. autogluon/timeseries/models/ensemble/__init__.py +35 -2
  26. autogluon/timeseries/models/ensemble/{abstract_timeseries_ensemble.py → abstract.py} +50 -26
  27. autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
  28. autogluon/timeseries/models/ensemble/array_based/abstract.py +236 -0
  29. autogluon/timeseries/models/ensemble/array_based/models.py +73 -0
  30. autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
  31. autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
  32. autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +167 -0
  33. autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
  34. autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
  35. autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
  36. autogluon/timeseries/models/ensemble/per_item_greedy.py +162 -0
  37. autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
  38. autogluon/timeseries/models/ensemble/weighted/abstract.py +40 -0
  39. autogluon/timeseries/models/ensemble/weighted/basic.py +78 -0
  40. autogluon/timeseries/models/ensemble/weighted/greedy.py +57 -0
  41. autogluon/timeseries/models/gluonts/__init__.py +3 -1
  42. autogluon/timeseries/models/gluonts/abstract.py +583 -0
  43. autogluon/timeseries/models/gluonts/dataset.py +109 -0
  44. autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +185 -44
  45. autogluon/timeseries/models/local/__init__.py +1 -10
  46. autogluon/timeseries/models/local/abstract_local_model.py +150 -97
  47. autogluon/timeseries/models/local/naive.py +31 -23
  48. autogluon/timeseries/models/local/npts.py +6 -2
  49. autogluon/timeseries/models/local/statsforecast.py +99 -112
  50. autogluon/timeseries/models/multi_window/multi_window_model.py +99 -40
  51. autogluon/timeseries/models/registry.py +64 -0
  52. autogluon/timeseries/models/toto/__init__.py +3 -0
  53. autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
  54. autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
  55. autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
  56. autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
  57. autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
  58. autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
  59. autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
  60. autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
  61. autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
  62. autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
  63. autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
  64. autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
  65. autogluon/timeseries/models/toto/dataloader.py +108 -0
  66. autogluon/timeseries/models/toto/hf_pretrained_model.py +118 -0
  67. autogluon/timeseries/models/toto/model.py +236 -0
  68. autogluon/timeseries/predictor.py +826 -305
  69. autogluon/timeseries/regressor.py +253 -0
  70. autogluon/timeseries/splitter.py +10 -31
  71. autogluon/timeseries/trainer/__init__.py +2 -3
  72. autogluon/timeseries/trainer/ensemble_composer.py +439 -0
  73. autogluon/timeseries/trainer/model_set_builder.py +256 -0
  74. autogluon/timeseries/trainer/prediction_cache.py +149 -0
  75. autogluon/timeseries/trainer/trainer.py +1298 -0
  76. autogluon/timeseries/trainer/utils.py +17 -0
  77. autogluon/timeseries/transforms/__init__.py +2 -0
  78. autogluon/timeseries/transforms/covariate_scaler.py +164 -0
  79. autogluon/timeseries/transforms/target_scaler.py +149 -0
  80. autogluon/timeseries/utils/constants.py +10 -0
  81. autogluon/timeseries/utils/datetime/base.py +38 -20
  82. autogluon/timeseries/utils/datetime/lags.py +18 -16
  83. autogluon/timeseries/utils/datetime/seasonality.py +14 -14
  84. autogluon/timeseries/utils/datetime/time_features.py +17 -14
  85. autogluon/timeseries/utils/features.py +317 -53
  86. autogluon/timeseries/utils/forecast.py +31 -17
  87. autogluon/timeseries/utils/timer.py +173 -0
  88. autogluon/timeseries/utils/warning_filters.py +44 -6
  89. autogluon/timeseries/version.py +2 -1
  90. autogluon.timeseries-1.4.1b20251210-py3.11-nspkg.pth +1 -0
  91. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/METADATA +71 -47
  92. autogluon_timeseries-1.4.1b20251210.dist-info/RECORD +103 -0
  93. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/WHEEL +1 -1
  94. autogluon/timeseries/configs/presets_configs.py +0 -11
  95. autogluon/timeseries/evaluator.py +0 -6
  96. autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
  97. autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -550
  98. autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  99. autogluon/timeseries/models/presets.py +0 -325
  100. autogluon/timeseries/trainer/abstract_trainer.py +0 -1144
  101. autogluon/timeseries/trainer/auto_trainer.py +0 -74
  102. autogluon.timeseries-1.0.1b20240304-py3.8-nspkg.pth +0 -1
  103. autogluon.timeseries-1.0.1b20240304.dist-info/RECORD +0 -58
  104. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/LICENSE +0 -0
  105. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info/licenses}/NOTICE +0 -0
  106. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/namespace_packages.txt +0 -0
  107. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/top_level.txt +0 -0
  108. {autogluon.timeseries-1.0.1b20240304.dist-info → autogluon_timeseries-1.4.1b20251210.dist-info}/zip-safe +0 -0
@@ -0,0 +1,17 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("autogluon.timeseries.trainer")
4
+
5
+
6
+ def log_scores_and_times(
7
+ val_score: float | None,
8
+ fit_time: float | None,
9
+ predict_time: float | None,
10
+ eval_metric_name: str,
11
+ ):
12
+ if val_score is not None:
13
+ logger.info(f"\t{val_score:<7.4f}".ljust(15) + f"= Validation score ({eval_metric_name})")
14
+ if fit_time is not None:
15
+ logger.info(f"\t{fit_time:<7.2f} s".ljust(15) + "= Training runtime")
16
+ if predict_time is not None:
17
+ logger.info(f"\t{predict_time:<7.2f} s".ljust(15) + "= Validation (prediction) runtime")
@@ -0,0 +1,2 @@
1
+ from .covariate_scaler import CovariateScaler, get_covariate_scaler
2
+ from .target_scaler import TargetScaler, get_target_scaler
@@ -0,0 +1,164 @@
1
+ import logging
2
+ from typing import Literal, Protocol, overload, runtime_checkable
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.compose import ColumnTransformer
7
+ from sklearn.preprocessing import QuantileTransformer, StandardScaler
8
+
9
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
10
+ from autogluon.timeseries.utils.features import CovariateMetadata
11
+ from autogluon.timeseries.utils.warning_filters import warning_filter
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ @runtime_checkable
17
+ class CovariateScaler(Protocol):
18
+ """Apply scaling to covariates and static features.
19
+
20
+ This can be helpful for deep learning models that assume that the inputs are normalized.
21
+ """
22
+
23
+ def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
24
+
25
+ def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
26
+
27
+ def transform_known_covariates(
28
+ self, known_covariates: TimeSeriesDataFrame | None = None
29
+ ) -> TimeSeriesDataFrame | None: ...
30
+
31
+
32
+ class GlobalCovariateScaler(CovariateScaler):
33
+ """Applies preprocessing logic similar to tabular's NN_TORCH model to the covariates.
34
+
35
+ Performs following preprocessing for real-valued columns:
36
+ - sklearn.preprocessing.QuantileTransform for skewed features
37
+ - passthrough (ignore) boolean features
38
+ - sklearn.preprocessing.StandardScaler for the rest of the features
39
+
40
+ Preprocessing is done globally across all items.
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ covariate_metadata: CovariateMetadata,
46
+ use_known_covariates: bool = True,
47
+ use_past_covariates: bool = True,
48
+ use_static_features: bool = True,
49
+ skew_threshold: float = 0.99,
50
+ ):
51
+ self.covariate_metadata = covariate_metadata
52
+ self.use_known_covariates = use_known_covariates
53
+ self.use_past_covariates = use_past_covariates
54
+ self.use_static_features = use_static_features
55
+ self.skew_threshold = skew_threshold
56
+ self._column_transformers: dict[Literal["known", "past", "static"], ColumnTransformer] | None = None
57
+
58
+ def is_fit(self) -> bool:
59
+ return self._column_transformers is not None
60
+
61
+ def fit(self, data: TimeSeriesDataFrame) -> "GlobalCovariateScaler":
62
+ self._column_transformers = {}
63
+
64
+ if self.use_known_covariates and len(self.covariate_metadata.known_covariates_real) > 0:
65
+ self._column_transformers["known"] = self._get_transformer_for_columns(
66
+ data, columns=self.covariate_metadata.known_covariates_real
67
+ )
68
+ if self.use_past_covariates and len(self.covariate_metadata.past_covariates_real) > 0:
69
+ self._column_transformers["past"] = self._get_transformer_for_columns(
70
+ data, columns=self.covariate_metadata.past_covariates_real
71
+ )
72
+ if self.use_static_features and len(self.covariate_metadata.static_features_real) > 0:
73
+ assert data.static_features is not None
74
+ self._column_transformers["static"] = self._get_transformer_for_columns(
75
+ data.static_features, columns=self.covariate_metadata.static_features_real
76
+ )
77
+
78
+ return self
79
+
80
+ def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
81
+ if not self.is_fit():
82
+ self.fit(data=data)
83
+ return self.transform(data=data)
84
+
85
+ def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
86
+ # Copy data to avoid inplace modification
87
+ data = data.copy()
88
+ assert self._column_transformers is not None, "CovariateScaler must be fit before transform can be called"
89
+
90
+ if "known" in self._column_transformers:
91
+ columns = self.covariate_metadata.known_covariates_real
92
+ data[columns] = self._column_transformers["known"].transform(data[columns])
93
+
94
+ if "past" in self._column_transformers:
95
+ columns = self.covariate_metadata.past_covariates_real
96
+ data[columns] = self._column_transformers["past"].transform(data[columns])
97
+
98
+ if "static" in self._column_transformers:
99
+ columns = self.covariate_metadata.static_features_real
100
+ assert data.static_features is not None
101
+
102
+ data.static_features[columns] = self._column_transformers["static"].transform(
103
+ data.static_features[columns]
104
+ )
105
+ return data
106
+
107
+ def transform_known_covariates(
108
+ self, known_covariates: TimeSeriesDataFrame | None = None
109
+ ) -> TimeSeriesDataFrame | None:
110
+ assert self._column_transformers is not None, "CovariateScaler must be fit before transform can be called"
111
+
112
+ if "known" in self._column_transformers:
113
+ columns = self.covariate_metadata.known_covariates_real
114
+ assert known_covariates is not None
115
+
116
+ known_covariates = known_covariates.copy()
117
+ known_covariates[columns] = self._column_transformers["known"].transform(known_covariates[columns])
118
+ return known_covariates
119
+
120
+ def _get_transformer_for_columns(self, df: pd.DataFrame, columns: list[str]) -> ColumnTransformer:
121
+ """Passthrough bool features, use QuantileTransform for skewed features, and use StandardScaler for the rest.
122
+
123
+ The preprocessing logic is similar to the TORCH_NN model from Tabular.
124
+ """
125
+ bool_features = []
126
+ skewed_features = []
127
+ continuous_features = []
128
+ for col in columns:
129
+ if set(df[col].unique()) == set([0, 1]):
130
+ bool_features.append(col)
131
+ elif np.abs(df[col].skew()) > self.skew_threshold: # type: ignore
132
+ skewed_features.append(col)
133
+ else:
134
+ continuous_features.append(col)
135
+ transformers = []
136
+ logger.debug(
137
+ f"\tbool_features: {bool_features}, continuous_features: {continuous_features}, skewed_features: {skewed_features}"
138
+ )
139
+ if continuous_features:
140
+ transformers.append(("scaler", StandardScaler(), continuous_features))
141
+ if skewed_features:
142
+ transformers.append(("skew", QuantileTransformer(output_distribution="normal"), skewed_features))
143
+ with warning_filter():
144
+ column_transformer = ColumnTransformer(transformers=transformers, remainder="passthrough").fit(df[columns])
145
+ return column_transformer
146
+
147
+
148
+ AVAILABLE_COVARIATE_SCALERS = {
149
+ "global": GlobalCovariateScaler,
150
+ }
151
+
152
+
153
+ @overload
154
+ def get_covariate_scaler(name: None, **scaler_kwargs) -> None: ...
155
+ @overload
156
+ def get_covariate_scaler(name: Literal["global"], **scaler_kwargs) -> GlobalCovariateScaler: ...
157
+ def get_covariate_scaler(name: Literal["global"] | None = None, **scaler_kwargs) -> CovariateScaler | None:
158
+ if name is None:
159
+ return None
160
+ if name not in AVAILABLE_COVARIATE_SCALERS:
161
+ raise KeyError(
162
+ f"Covariate scaler type {name} not supported. Available scalers: {list(AVAILABLE_COVARIATE_SCALERS)}"
163
+ )
164
+ return AVAILABLE_COVARIATE_SCALERS[name](**scaler_kwargs)
@@ -0,0 +1,149 @@
1
+ from typing import Literal, Protocol, overload
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from typing_extensions import Self
6
+
7
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
8
+
9
+
10
+ class TargetScaler(Protocol):
11
+ def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
12
+
13
+ def fit(self, data: TimeSeriesDataFrame) -> Self: ...
14
+
15
+ def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
16
+
17
+ def inverse_transform(self, predictions: TimeSeriesDataFrame) -> TimeSeriesDataFrame: ...
18
+
19
+
20
+ class LocalTargetScaler(TargetScaler):
21
+ """Applies an affine transformation (x - loc) / scale independently to each time series in the dataset."""
22
+
23
+ def __init__(
24
+ self,
25
+ target: str = "target",
26
+ min_scale: float = 1e-2,
27
+ ):
28
+ self.target = target
29
+ self.min_scale = min_scale
30
+ self.loc: pd.Series | None = None
31
+ self.scale: pd.Series | None = None
32
+
33
+ def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series | None, pd.Series | None]:
34
+ raise NotImplementedError
35
+
36
+ def fit_transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
37
+ return self.fit(data=data).transform(data=data)
38
+
39
+ def fit(self, data: TimeSeriesDataFrame) -> "LocalTargetScaler":
40
+ target_series = data[self.target].replace([np.inf, -np.inf], np.nan)
41
+ self.loc, self.scale = self._compute_loc_scale(target_series)
42
+ if self.loc is not None:
43
+ self.loc = self.loc.replace([np.inf, -np.inf], np.nan).fillna(0.0)
44
+ if self.scale is not None:
45
+ self.scale = self.scale.clip(lower=self.min_scale).replace([np.inf, -np.inf], np.nan).fillna(1.0)
46
+ return self
47
+
48
+ def _reindex_loc_scale(self, item_index: pd.Index) -> tuple[np.ndarray | float, np.ndarray | float]:
49
+ """Reindex loc and scale parameters for the given item_ids and convert them to an array-like."""
50
+ if self.loc is not None:
51
+ loc = self.loc.reindex(item_index).to_numpy()
52
+ else:
53
+ loc = 0.0
54
+ if self.scale is not None:
55
+ scale = self.scale.reindex(item_index).to_numpy()
56
+ else:
57
+ scale = 1.0
58
+ return loc, scale
59
+
60
+ def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
61
+ """Apply scaling to the target column in the dataframe."""
62
+ loc, scale = self._reindex_loc_scale(item_index=data.index.get_level_values(TimeSeriesDataFrame.ITEMID))
63
+ return data.assign(**{self.target: (data[self.target] - loc) / scale})
64
+
65
+ def inverse_transform(self, predictions: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
66
+ """Apply inverse scaling to all columns in the predictions dataframe."""
67
+ loc, scale = self._reindex_loc_scale(item_index=predictions.index.get_level_values(TimeSeriesDataFrame.ITEMID))
68
+ return predictions.assign(**{col: predictions[col] * scale + loc for col in predictions.columns})
69
+
70
+
71
+ class LocalStandardScaler(LocalTargetScaler):
72
+ """Applies standard scaling to each time series in the dataset.
73
+
74
+ The resulting affine transformation is (x - loc) / scale, where scale = std(x), loc = mean(x).
75
+ """
76
+
77
+ def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
78
+ stats = target_series.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg(["mean", "std"])
79
+ return stats["mean"], stats["std"]
80
+
81
+
82
+ class LocalMeanAbsScaler(LocalTargetScaler):
83
+ """Applies mean absolute scaling to each time series in the dataset."""
84
+
85
+ def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series | None, pd.Series]:
86
+ scale = target_series.abs().groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg("mean")
87
+ return None, scale
88
+
89
+
90
+ class LocalMinMaxScaler(LocalTargetScaler):
91
+ """Applies min/max scaling to each time series in the dataset.
92
+
93
+ The resulting affine transformation is (x - loc) / scale, where scale = max(x) - min(x), loc = min(x) / scale.
94
+ """
95
+
96
+ def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
97
+ stats = target_series.abs().groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg(["min", "max"])
98
+ scale = (stats["max"] - stats["min"]).clip(lower=self.min_scale)
99
+ loc = stats["min"]
100
+ return loc, scale
101
+
102
+
103
+ class LocalRobustScaler(LocalTargetScaler):
104
+ """Applies a robust scaler based on the interquartile range. Less sensitive to outliers compared to other scaler.
105
+
106
+ The resulting affine transformation is (x - loc) / scale, where scale = quantile(x, 0.75) - quantile(x, 0.25), loc = median(x).
107
+ """
108
+
109
+ def __init__(
110
+ self,
111
+ target: str = "target",
112
+ min_scale: float = 1e-2,
113
+ **kwargs,
114
+ ):
115
+ super().__init__(target=target, min_scale=min_scale)
116
+ self.q_min = 0.25
117
+ self.q_max = 0.75
118
+ assert 0 < self.q_min < self.q_max < 1
119
+
120
+ def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
121
+ grouped = target_series.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False)
122
+ loc = grouped.median()
123
+ lower = grouped.quantile(self.q_min)
124
+ upper = grouped.quantile(self.q_max)
125
+ scale = upper - lower
126
+ return loc, scale
127
+
128
+
129
+ AVAILABLE_TARGET_SCALERS = {
130
+ "standard": LocalStandardScaler,
131
+ "mean_abs": LocalMeanAbsScaler,
132
+ "min_max": LocalMinMaxScaler,
133
+ "robust": LocalRobustScaler,
134
+ }
135
+
136
+
137
+ @overload
138
+ def get_target_scaler(name: None, **scaler_kwargs) -> None: ...
139
+ @overload
140
+ def get_target_scaler(name: Literal["standard", "mean_abs", "min_max", "robust"], **scaler_kwargs) -> TargetScaler: ...
141
+ def get_target_scaler(
142
+ name: Literal["standard", "mean_abs", "min_max", "robust"] | None, **scaler_kwargs
143
+ ) -> TargetScaler | None:
144
+ """Get LocalTargetScaler object from a string."""
145
+ if name is None:
146
+ return None
147
+ if name not in AVAILABLE_TARGET_SCALERS:
148
+ raise KeyError(f"Scaler type {name} not supported. Available scalers: {list(AVAILABLE_TARGET_SCALERS)}")
149
+ return AVAILABLE_TARGET_SCALERS[name](**scaler_kwargs)
@@ -0,0 +1,10 @@
1
+ import joblib.externals.loky
2
+ from joblib import cpu_count
3
+
4
+ # By default, joblib w/ loky backend kills processes that take >300MB of RAM assuming that this is caused by a memory
5
+ # leak. This leads to problems for some memory-hungry models like AutoARIMA/Theta.
6
+ # This monkey patch removes this undesired behavior
7
+ joblib.externals.loky.process_executor._MAX_MEMORY_LEAK_SIZE = int(3e10)
8
+
9
+ # We use the same default n_jobs across AG-TS to ensure that Joblib reuses the process pool
10
+ AG_DEFAULT_N_JOBS = max(cpu_count(only_physical_cores=True), 1)
@@ -1,31 +1,46 @@
1
1
  import pandas as pd
2
2
 
3
3
  TO_MAJOR_FREQ = {
4
- "min": "T",
5
- "ms": "L",
6
- "us": "U",
4
+ # sub-daily
5
+ "H": "h",
6
+ "BH": "bh",
7
+ "cbh": "bh",
8
+ "CBH": "bh",
9
+ "T": "min",
10
+ "S": "s",
11
+ "L": "ms",
12
+ "U": "us",
13
+ "N": "ns",
7
14
  # business day
8
15
  "C": "B",
9
16
  # month
10
- "BM": "M",
11
- "CBM": "M",
12
- "MS": "M",
13
- "BMS": "M",
14
- "CBMS": "M",
17
+ "M": "ME",
18
+ "BM": "ME",
19
+ "BME": "ME",
20
+ "CBM": "ME",
21
+ "CBME": "ME",
22
+ "MS": "ME",
23
+ "BMS": "ME",
24
+ "CBMS": "ME",
15
25
  # semi-month
16
- "SMS": "SM",
26
+ "SM": "SME",
27
+ "SMS": "SME",
17
28
  # quarter
18
- "BQ": "Q",
19
- "QS": "Q",
20
- "BQS": "Q",
29
+ "Q": "QE",
30
+ "BQ": "QE",
31
+ "BQE": "QE",
32
+ "QS": "QE",
33
+ "BQS": "QE",
21
34
  # annual
22
- "Y": "A",
23
- "BA": "A",
24
- "BY": "A",
25
- "AS": "A",
26
- "YS": "A",
27
- "BAS": "A",
28
- "BYS": "A",
35
+ "A": "YE",
36
+ "Y": "YE",
37
+ "BA": "YE",
38
+ "BY": "YE",
39
+ "BYE": "YE",
40
+ "AS": "YE",
41
+ "YS": "YE",
42
+ "BAS": "YE",
43
+ "BYS": "YE",
29
44
  }
30
45
 
31
46
 
@@ -33,7 +48,10 @@ def norm_freq_str(offset: pd.DateOffset) -> str:
33
48
  """Obtain frequency string from a pandas.DateOffset object.
34
49
 
35
50
  "Non-standard" frequencies are converted to their "standard" counterparts. For example, MS (month start) is mapped
36
- to M (month) since both correspond to the same seasonality, lags and time features.
51
+ to ME (month end) since both correspond to the same seasonality, lags and time features.
52
+
53
+ The frequencies are always mapped to the new non-deprecated aliases (pandas>=2.2), e.g., "H" is mapped to "h". The
54
+ downstream functions like `get_seasonality` handle the new aliases even if older version of pandas is used.
37
55
  """
38
56
  base_freq = offset.name.split("-")[0]
39
57
  return TO_MAJOR_FREQ.get(base_freq, base_freq)
@@ -1,7 +1,6 @@
1
1
  """
2
2
  Generate lag indices based on frequency string. Adapted from gluonts.time_feature.lag.
3
3
  """
4
- from typing import List, Optional
5
4
 
6
5
  import numpy as np
7
6
  import pandas as pd
@@ -69,9 +68,9 @@ def _make_lags_for_semi_month(multiple, num_cycles=3):
69
68
  def get_lags_for_frequency(
70
69
  freq: str,
71
70
  lag_ub: int = 1200,
72
- num_lags: Optional[int] = None,
71
+ num_lags: int | None = None,
73
72
  num_default_lags: int = 7,
74
- ) -> List[int]:
73
+ ) -> list[int]:
75
74
  """
76
75
  Generates a list of lags that that are appropriate for the given frequency
77
76
  string.
@@ -94,15 +93,18 @@ def get_lags_for_frequency(
94
93
  """
95
94
 
96
95
  offset = pd.tseries.frequencies.to_offset(freq)
96
+
97
+ if offset is None:
98
+ raise ValueError(f"Invalid frequency: {freq}")
97
99
  offset_name = norm_freq_str(offset)
98
100
 
99
- if offset_name == "A":
101
+ if offset_name == "YE":
100
102
  lags = []
101
- elif offset_name == "Q":
103
+ elif offset_name == "QE":
102
104
  lags = _make_lags_for_quarter(offset.n)
103
- elif offset_name == "M":
105
+ elif offset_name == "ME":
104
106
  lags = _make_lags_for_month(offset.n)
105
- elif offset_name == "SM":
107
+ elif offset_name == "SME":
106
108
  lags = _make_lags_for_semi_month(offset.n)
107
109
  elif offset_name == "W":
108
110
  lags = _make_lags_for_week(offset.n)
@@ -110,21 +112,21 @@ def get_lags_for_frequency(
110
112
  lags = _make_lags_for_day(offset.n) + _make_lags_for_week(offset.n / 7.0)
111
113
  elif offset_name == "B":
112
114
  lags = _make_lags_for_day(offset.n, days_in_week=5, days_in_month=22) + _make_lags_for_week(offset.n / 5.0)
113
- elif offset_name == "H":
115
+ elif offset_name == "h":
114
116
  lags = (
115
117
  _make_lags_for_hour(offset.n)
116
118
  + _make_lags_for_day(offset.n / 24)
117
119
  + _make_lags_for_week(offset.n / (24 * 7))
118
120
  )
119
121
  # business hour
120
- elif offset_name == "BH":
122
+ elif offset_name == "bh":
121
123
  lags = (
122
124
  _make_lags_for_business_hour(offset.n)
123
125
  + _make_lags_for_day(offset.n / 9)
124
126
  + _make_lags_for_week(offset.n / (9 * 7))
125
127
  )
126
128
  # minutes
127
- elif offset_name == "T":
129
+ elif offset_name == "min":
128
130
  lags = (
129
131
  _make_lags_for_minute(offset.n)
130
132
  + _make_lags_for_hour(offset.n / 60)
@@ -132,35 +134,35 @@ def get_lags_for_frequency(
132
134
  + _make_lags_for_week(offset.n / (60 * 24 * 7))
133
135
  )
134
136
  # second
135
- elif offset_name == "S":
137
+ elif offset_name == "s":
136
138
  lags = (
137
139
  _make_lags_for_second(offset.n)
138
140
  + _make_lags_for_minute(offset.n / 60)
139
141
  + _make_lags_for_hour(offset.n / (60 * 60))
140
142
  )
141
- elif offset_name == "L":
143
+ elif offset_name == "ms":
142
144
  lags = (
143
145
  _make_lags_for_second(offset.n / 1e3)
144
146
  + _make_lags_for_minute(offset.n / (60 * 1e3))
145
147
  + _make_lags_for_hour(offset.n / (60 * 60 * 1e3))
146
148
  )
147
- elif offset_name == "U":
149
+ elif offset_name == "us":
148
150
  lags = (
149
151
  _make_lags_for_second(offset.n / 1e6)
150
152
  + _make_lags_for_minute(offset.n / (60 * 1e6))
151
153
  + _make_lags_for_hour(offset.n / (60 * 60 * 1e6))
152
154
  )
153
- elif offset_name == "N":
155
+ elif offset_name == "ns":
154
156
  lags = (
155
157
  _make_lags_for_second(offset.n / 1e9)
156
158
  + _make_lags_for_minute(offset.n / (60 * 1e9))
157
159
  + _make_lags_for_hour(offset.n / (60 * 60 * 1e9))
158
160
  )
159
161
  else:
160
- raise Exception(f"invalid frequency {freq}")
162
+ raise Exception(f"Cannot get lags for unsupported frequency {freq}")
161
163
 
162
164
  # flatten lags list and filter
163
- lags = [int(lag) for sub_list in lags for lag in sub_list if 7 < lag <= lag_ub]
165
+ lags = [int(lag) for sub_list in lags for lag in sub_list if num_default_lags < lag <= lag_ub]
164
166
  lags = list(range(1, num_default_lags + 1)) + sorted(list(set(lags)))
165
167
 
166
168
  return sorted(set(lags))[:num_lags]
@@ -1,33 +1,33 @@
1
- from typing import Union
2
-
3
1
  import pandas as pd
4
2
 
5
3
  from .base import norm_freq_str
6
4
 
7
5
  DEFAULT_SEASONALITIES = {
8
- "A": 1,
9
- "Q": 4,
10
- "M": 12,
11
- "SM": 24,
6
+ "YE": 1,
7
+ "QE": 4,
8
+ "ME": 12,
9
+ "SME": 24,
12
10
  "W": 1,
13
11
  "D": 7,
14
12
  "B": 5,
15
- "BH": 9,
16
- "H": 24,
17
- "T": 60 * 24,
18
- "S": 1,
19
- "L": 1,
20
- "U": 1,
21
- "N": 1,
13
+ "bh": 9,
14
+ "h": 24,
15
+ "min": 60 * 24,
16
+ "s": 1,
17
+ "ms": 1,
18
+ "us": 1,
19
+ "ns": 1,
22
20
  }
23
21
 
24
22
 
25
- def get_seasonality(freq: Union[str, None]) -> int:
23
+ def get_seasonality(freq: str | None) -> int:
26
24
  """Return the seasonality of a given frequency. Adapted from ``gluonts.time_feature.seasonality``."""
27
25
  if freq is None:
28
26
  return 1
29
27
 
30
28
  offset = pd.tseries.frequencies.to_offset(freq)
29
+
30
+ assert offset is not None # offset is only None if freq is None
31
31
  offset_name = norm_freq_str(offset)
32
32
  base_seasonality = DEFAULT_SEASONALITIES.get(offset_name, 1)
33
33
 
@@ -1,7 +1,8 @@
1
1
  """
2
2
  Generate time features based on frequency string. Adapted from gluonts.time_feature.time_feature.
3
3
  """
4
- from typing import Callable, List
4
+
5
+ from typing import Callable
5
6
 
6
7
  import numpy as np
7
8
  import pandas as pd
@@ -26,7 +27,7 @@ def week_of_year(index: pd.DatetimeIndex) -> np.ndarray:
26
27
  try:
27
28
  week = index.isocalendar().week
28
29
  except AttributeError:
29
- week = index.week
30
+ week = index.week # type: ignore[attr-defined]
30
31
 
31
32
  return _normalize(week - 1, num=53)
32
33
 
@@ -55,23 +56,25 @@ def second_of_minute(index: pd.DatetimeIndex) -> np.ndarray:
55
56
  return _normalize(index.second, num=60)
56
57
 
57
58
 
58
- def get_time_features_for_frequency(freq) -> List[Callable]:
59
+ def get_time_features_for_frequency(freq) -> list[Callable]:
59
60
  features_by_offset_name = {
60
- "A": [],
61
- "Q": [quarter_of_year],
62
- "M": [month_of_year],
63
- "SM": [day_of_month, month_of_year],
61
+ "YE": [],
62
+ "QE": [quarter_of_year],
63
+ "ME": [month_of_year],
64
+ "SME": [day_of_month, month_of_year],
64
65
  "W": [day_of_month, week_of_year],
65
66
  "D": [day_of_week, day_of_month, day_of_year],
66
67
  "B": [day_of_week, day_of_month, day_of_year],
67
- "BH": [hour_of_day, day_of_week, day_of_month, day_of_year],
68
- "H": [hour_of_day, day_of_week, day_of_month, day_of_year],
69
- "T": [minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
70
- "S": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
71
- "L": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
72
- "U": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
73
- "N": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
68
+ "bh": [hour_of_day, day_of_week, day_of_month, day_of_year],
69
+ "h": [hour_of_day, day_of_week, day_of_month, day_of_year],
70
+ "min": [minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
71
+ "s": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
72
+ "ms": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
73
+ "us": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
74
+ "ns": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
74
75
  }
75
76
  offset = pd.tseries.frequencies.to_offset(freq)
77
+
78
+ assert offset is not None # offset is only None if freq is None
76
79
  offset_name = norm_freq_str(offset)
77
80
  return features_by_offset_name[offset_name]