autogluon.timeseries 1.2.1b20250224__py3-none-any.whl → 1.4.1b20251215__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/configs/__init__.py +3 -2
- autogluon/timeseries/configs/hyperparameter_presets.py +62 -0
- autogluon/timeseries/configs/predictor_presets.py +106 -0
- autogluon/timeseries/dataset/ts_dataframe.py +256 -141
- autogluon/timeseries/learner.py +86 -52
- autogluon/timeseries/metrics/__init__.py +42 -8
- autogluon/timeseries/metrics/abstract.py +89 -19
- autogluon/timeseries/metrics/point.py +142 -53
- autogluon/timeseries/metrics/quantile.py +46 -21
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/__init__.py +8 -2
- autogluon/timeseries/models/abstract/__init__.py +2 -2
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +361 -592
- autogluon/timeseries/models/abstract/model_trial.py +2 -1
- autogluon/timeseries/models/abstract/tunable.py +189 -0
- autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +282 -194
- autogluon/timeseries/models/autogluon_tabular/per_step.py +513 -0
- autogluon/timeseries/models/autogluon_tabular/transforms.py +25 -18
- autogluon/timeseries/models/chronos/__init__.py +2 -1
- autogluon/timeseries/models/chronos/chronos2.py +361 -0
- autogluon/timeseries/models/chronos/model.py +219 -138
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +81 -50
- autogluon/timeseries/models/ensemble/__init__.py +37 -2
- autogluon/timeseries/models/ensemble/abstract.py +107 -0
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +240 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +185 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +12 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +88 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/linear_stacker.py +186 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +94 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +107 -0
- autogluon/timeseries/models/ensemble/ensemble_selection.py +167 -0
- autogluon/timeseries/models/ensemble/per_item_greedy.py +172 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +45 -0
- autogluon/timeseries/models/ensemble/weighted/basic.py +91 -0
- autogluon/timeseries/models/ensemble/weighted/greedy.py +62 -0
- autogluon/timeseries/models/gluonts/__init__.py +1 -1
- autogluon/timeseries/models/gluonts/{abstract_gluonts.py → abstract.py} +148 -208
- autogluon/timeseries/models/gluonts/dataset.py +109 -0
- autogluon/timeseries/models/gluonts/{torch/models.py → models.py} +38 -22
- autogluon/timeseries/models/local/__init__.py +0 -7
- autogluon/timeseries/models/local/abstract_local_model.py +71 -74
- autogluon/timeseries/models/local/naive.py +13 -9
- autogluon/timeseries/models/local/npts.py +9 -2
- autogluon/timeseries/models/local/statsforecast.py +52 -36
- autogluon/timeseries/models/multi_window/multi_window_model.py +65 -45
- autogluon/timeseries/models/registry.py +64 -0
- autogluon/timeseries/models/toto/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/__init__.py +9 -0
- autogluon/timeseries/models/toto/_internal/backbone/__init__.py +3 -0
- autogluon/timeseries/models/toto/_internal/backbone/attention.py +196 -0
- autogluon/timeseries/models/toto/_internal/backbone/backbone.py +262 -0
- autogluon/timeseries/models/toto/_internal/backbone/distribution.py +70 -0
- autogluon/timeseries/models/toto/_internal/backbone/kvcache.py +136 -0
- autogluon/timeseries/models/toto/_internal/backbone/rope.py +89 -0
- autogluon/timeseries/models/toto/_internal/backbone/rotary_embedding_torch.py +342 -0
- autogluon/timeseries/models/toto/_internal/backbone/scaler.py +305 -0
- autogluon/timeseries/models/toto/_internal/backbone/transformer.py +333 -0
- autogluon/timeseries/models/toto/_internal/dataset.py +165 -0
- autogluon/timeseries/models/toto/_internal/forecaster.py +423 -0
- autogluon/timeseries/models/toto/dataloader.py +108 -0
- autogluon/timeseries/models/toto/hf_pretrained_model.py +200 -0
- autogluon/timeseries/models/toto/model.py +249 -0
- autogluon/timeseries/predictor.py +685 -297
- autogluon/timeseries/regressor.py +94 -44
- autogluon/timeseries/splitter.py +8 -32
- autogluon/timeseries/trainer/__init__.py +3 -0
- autogluon/timeseries/trainer/ensemble_composer.py +444 -0
- autogluon/timeseries/trainer/model_set_builder.py +256 -0
- autogluon/timeseries/trainer/prediction_cache.py +149 -0
- autogluon/timeseries/{trainer.py → trainer/trainer.py} +387 -390
- autogluon/timeseries/trainer/utils.py +17 -0
- autogluon/timeseries/transforms/__init__.py +2 -13
- autogluon/timeseries/transforms/covariate_scaler.py +34 -40
- autogluon/timeseries/transforms/target_scaler.py +37 -20
- autogluon/timeseries/utils/constants.py +10 -0
- autogluon/timeseries/utils/datetime/lags.py +3 -5
- autogluon/timeseries/utils/datetime/seasonality.py +1 -3
- autogluon/timeseries/utils/datetime/time_features.py +2 -2
- autogluon/timeseries/utils/features.py +70 -47
- autogluon/timeseries/utils/forecast.py +19 -14
- autogluon/timeseries/utils/timer.py +173 -0
- autogluon/timeseries/utils/warning_filters.py +4 -2
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251215-py3.11-nspkg.pth +1 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/METADATA +49 -36
- autogluon_timeseries-1.4.1b20251215.dist-info/RECORD +103 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/WHEEL +1 -1
- autogluon/timeseries/configs/presets_configs.py +0 -79
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -11
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -585
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -518
- autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -78
- autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -170
- autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- autogluon/timeseries/models/presets.py +0 -360
- autogluon.timeseries-1.2.1b20250224-py3.9-nspkg.pth +0 -1
- autogluon.timeseries-1.2.1b20250224.dist-info/RECORD +0 -68
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.2.1b20250224.dist-info → autogluon_timeseries-1.4.1b20251215.dist-info}/zip-safe +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import reprlib
|
|
3
3
|
import time
|
|
4
|
-
from dataclasses import dataclass, field
|
|
5
|
-
from typing import Any,
|
|
4
|
+
from dataclasses import asdict, dataclass, field
|
|
5
|
+
from typing import Any, Literal
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
@@ -14,7 +14,7 @@ from autogluon.features.generators import (
|
|
|
14
14
|
IdentityFeatureGenerator,
|
|
15
15
|
PipelineFeatureGenerator,
|
|
16
16
|
)
|
|
17
|
-
from autogluon.timeseries.dataset
|
|
17
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
18
18
|
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
@@ -24,49 +24,60 @@ logger = logging.getLogger(__name__)
|
|
|
24
24
|
class CovariateMetadata:
|
|
25
25
|
"""Provides mapping from different covariate types to columns in the dataset."""
|
|
26
26
|
|
|
27
|
-
static_features_cat:
|
|
28
|
-
static_features_real:
|
|
29
|
-
known_covariates_real:
|
|
30
|
-
known_covariates_cat:
|
|
31
|
-
past_covariates_real:
|
|
32
|
-
past_covariates_cat:
|
|
27
|
+
static_features_cat: list[str] = field(default_factory=list)
|
|
28
|
+
static_features_real: list[str] = field(default_factory=list)
|
|
29
|
+
known_covariates_real: list[str] = field(default_factory=list)
|
|
30
|
+
known_covariates_cat: list[str] = field(default_factory=list)
|
|
31
|
+
past_covariates_real: list[str] = field(default_factory=list)
|
|
32
|
+
past_covariates_cat: list[str] = field(default_factory=list)
|
|
33
|
+
static_cat_cardinality: dict[str, int] = field(default_factory=dict)
|
|
34
|
+
known_cat_cardinality: dict[str, int] = field(default_factory=dict)
|
|
35
|
+
past_cat_cardinality: dict[str, int] = field(default_factory=dict)
|
|
36
|
+
|
|
37
|
+
def __post_init__(self):
|
|
38
|
+
assert list(self.static_cat_cardinality.keys()) == self.static_features_cat
|
|
39
|
+
assert list(self.known_cat_cardinality.keys()) == self.known_covariates_cat
|
|
40
|
+
assert list(self.past_cat_cardinality.keys()) == self.past_covariates_cat
|
|
33
41
|
|
|
34
42
|
@property
|
|
35
|
-
def static_features(self) ->
|
|
43
|
+
def static_features(self) -> list[str]:
|
|
36
44
|
return self.static_features_cat + self.static_features_real
|
|
37
45
|
|
|
38
46
|
@property
|
|
39
|
-
def known_covariates(self) ->
|
|
47
|
+
def known_covariates(self) -> list[str]:
|
|
40
48
|
return self.known_covariates_cat + self.known_covariates_real
|
|
41
49
|
|
|
42
50
|
@property
|
|
43
|
-
def past_covariates(self) ->
|
|
51
|
+
def past_covariates(self) -> list[str]:
|
|
44
52
|
return self.past_covariates_cat + self.past_covariates_real
|
|
45
53
|
|
|
46
54
|
@property
|
|
47
|
-
def covariates(self) ->
|
|
55
|
+
def covariates(self) -> list[str]:
|
|
48
56
|
return self.known_covariates + self.past_covariates
|
|
49
57
|
|
|
50
58
|
@property
|
|
51
|
-
def covariates_real(self) ->
|
|
59
|
+
def covariates_real(self) -> list[str]:
|
|
52
60
|
return self.known_covariates_real + self.past_covariates_real
|
|
53
61
|
|
|
54
62
|
@property
|
|
55
|
-
def covariates_cat(self) ->
|
|
63
|
+
def covariates_cat(self) -> list[str]:
|
|
56
64
|
return self.known_covariates_cat + self.past_covariates_cat
|
|
57
65
|
|
|
58
66
|
@property
|
|
59
|
-
def real_features(self) ->
|
|
67
|
+
def real_features(self) -> list[str]:
|
|
60
68
|
return self.static_features_real + self.covariates_real
|
|
61
69
|
|
|
62
70
|
@property
|
|
63
|
-
def cat_features(self) ->
|
|
71
|
+
def cat_features(self) -> list[str]:
|
|
64
72
|
return self.static_features_cat + self.covariates_cat
|
|
65
73
|
|
|
66
74
|
@property
|
|
67
|
-
def all_features(self) ->
|
|
75
|
+
def all_features(self) -> list[str]:
|
|
68
76
|
return self.static_features + self.covariates
|
|
69
77
|
|
|
78
|
+
def to_dict(self) -> dict[str, Any]:
|
|
79
|
+
return asdict(self)
|
|
80
|
+
|
|
70
81
|
|
|
71
82
|
class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
|
|
72
83
|
"""Generates categorical and continuous features for time series models.
|
|
@@ -117,13 +128,13 @@ class TimeSeriesFeatureGenerator:
|
|
|
117
128
|
|
|
118
129
|
Parameters
|
|
119
130
|
----------
|
|
120
|
-
target
|
|
131
|
+
target
|
|
121
132
|
Name of the target column.
|
|
122
|
-
known_covariates_names
|
|
133
|
+
known_covariates_names
|
|
123
134
|
Columns that contain covariates that are known into the future.
|
|
124
|
-
float_dtype
|
|
135
|
+
float_dtype
|
|
125
136
|
Numpy float dtype to which all numeric columns (float, int, bool) will be converted both in static & dynamic dfs.
|
|
126
|
-
num_samples
|
|
137
|
+
num_samples
|
|
127
138
|
Number of rows sampled from the training dataset to speed up computation of the median (used later for imputation).
|
|
128
139
|
If set to `None`, median will be computed using all rows.
|
|
129
140
|
"""
|
|
@@ -131,27 +142,27 @@ class TimeSeriesFeatureGenerator:
|
|
|
131
142
|
def __init__(
|
|
132
143
|
self,
|
|
133
144
|
target: str,
|
|
134
|
-
known_covariates_names:
|
|
145
|
+
known_covariates_names: list[str],
|
|
135
146
|
float_dtype: str = "float32",
|
|
136
|
-
num_samples:
|
|
147
|
+
num_samples: int | None = 20_000,
|
|
137
148
|
):
|
|
138
149
|
self.target = target
|
|
139
150
|
self.float_dtype = float_dtype
|
|
140
151
|
self.num_samples = num_samples
|
|
141
152
|
|
|
142
153
|
self._is_fit = False
|
|
143
|
-
self.known_covariates_names:
|
|
144
|
-
self.past_covariates_names:
|
|
154
|
+
self.known_covariates_names: list[str] = list(known_covariates_names)
|
|
155
|
+
self.past_covariates_names: list[str] = []
|
|
145
156
|
self.known_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
|
|
146
157
|
self.past_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
|
|
147
158
|
# Cat features with cat_count=1 are fine in static_features since they are repeated for all time steps in a TS
|
|
148
159
|
self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator(minimum_cat_count=1)
|
|
149
|
-
self._covariate_metadata:
|
|
150
|
-
self._train_covariates_real_median:
|
|
151
|
-
self._train_static_real_median:
|
|
160
|
+
self._covariate_metadata: CovariateMetadata | None = None # type ignore
|
|
161
|
+
self._train_covariates_real_median: pd.Series | None = None
|
|
162
|
+
self._train_static_real_median: pd.Series | None = None
|
|
152
163
|
|
|
153
164
|
@property
|
|
154
|
-
def required_column_names(self) ->
|
|
165
|
+
def required_column_names(self) -> list[str]:
|
|
155
166
|
return [self.target] + list(self.known_covariates_names) + list(self.past_covariates_names)
|
|
156
167
|
|
|
157
168
|
@property
|
|
@@ -218,11 +229,13 @@ class TimeSeriesFeatureGenerator:
|
|
|
218
229
|
static_features_cat, static_features_real = self._detect_and_log_column_types(static_features_df)
|
|
219
230
|
ignored_static_features = data.static_features.columns.difference(self.static_feature_pipeline.features_in)
|
|
220
231
|
self._train_static_real_median = data.static_features[static_features_real].median()
|
|
232
|
+
static_cat_cardinality = static_features_df[static_features_cat].nunique().to_dict()
|
|
221
233
|
else:
|
|
222
234
|
static_features_cat = []
|
|
223
235
|
static_features_real = []
|
|
224
236
|
ignored_static_features = []
|
|
225
237
|
static_features_df = None
|
|
238
|
+
static_cat_cardinality = {}
|
|
226
239
|
|
|
227
240
|
if len(ignored_covariates) > 0 or len(ignored_static_features) > 0:
|
|
228
241
|
logger.info("\nAutoGluon will ignore following non-numeric/non-informative columns:")
|
|
@@ -243,6 +256,9 @@ class TimeSeriesFeatureGenerator:
|
|
|
243
256
|
past_covariates_real=past_covariates_real,
|
|
244
257
|
static_features_cat=static_features_cat,
|
|
245
258
|
static_features_real=static_features_real,
|
|
259
|
+
static_cat_cardinality=static_cat_cardinality,
|
|
260
|
+
known_cat_cardinality=df[known_covariates_cat].nunique().to_dict(),
|
|
261
|
+
past_cat_cardinality=df[past_covariates_cat].nunique().to_dict(),
|
|
246
262
|
)
|
|
247
263
|
|
|
248
264
|
# Median of real-valued covariates will be used for missing value imputation
|
|
@@ -259,13 +275,13 @@ class TimeSeriesFeatureGenerator:
|
|
|
259
275
|
return self._impute_covariates(ts_df, column_names=self.covariate_metadata.covariates_real)
|
|
260
276
|
|
|
261
277
|
@staticmethod
|
|
262
|
-
def _concat_dfs(dfs_to_concat:
|
|
278
|
+
def _concat_dfs(dfs_to_concat: list[pd.DataFrame]) -> pd.DataFrame:
|
|
263
279
|
if len(dfs_to_concat) == 1:
|
|
264
280
|
return dfs_to_concat[0]
|
|
265
281
|
else:
|
|
266
282
|
return pd.concat(dfs_to_concat, axis=1, copy=False)
|
|
267
283
|
|
|
268
|
-
def _impute_covariates(self, ts_df: TimeSeriesDataFrame, column_names:
|
|
284
|
+
def _impute_covariates(self, ts_df: TimeSeriesDataFrame, column_names: list[str]) -> TimeSeriesDataFrame:
|
|
269
285
|
"""Impute missing values in selected columns with ffill, bfill, and median imputation."""
|
|
270
286
|
if len(column_names) > 0:
|
|
271
287
|
# ffill + bfill covariates that have at least some observed values
|
|
@@ -276,7 +292,7 @@ class TimeSeriesFeatureGenerator:
|
|
|
276
292
|
ts_df[column_names] = covariates_real
|
|
277
293
|
return ts_df
|
|
278
294
|
|
|
279
|
-
def _impute_static_features(self, static_df:
|
|
295
|
+
def _impute_static_features(self, static_df: pd.DataFrame | None) -> pd.DataFrame | None:
|
|
280
296
|
"""Impute missing values in static features using the median."""
|
|
281
297
|
static_real_names = self.covariate_metadata.static_features_real
|
|
282
298
|
if static_df is not None and static_real_names:
|
|
@@ -325,8 +341,8 @@ class TimeSeriesFeatureGenerator:
|
|
|
325
341
|
return self._impute_covariates(ts_df, column_names=self.covariate_metadata.covariates_real)
|
|
326
342
|
|
|
327
343
|
def transform_future_known_covariates(
|
|
328
|
-
self, known_covariates:
|
|
329
|
-
) ->
|
|
344
|
+
self, known_covariates: TimeSeriesDataFrame | None
|
|
345
|
+
) -> TimeSeriesDataFrame | None:
|
|
330
346
|
assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
|
|
331
347
|
if len(self.known_covariates_names) > 0:
|
|
332
348
|
assert known_covariates is not None, "known_covariates must be provided at prediction time"
|
|
@@ -343,10 +359,10 @@ class TimeSeriesFeatureGenerator:
|
|
|
343
359
|
return None
|
|
344
360
|
|
|
345
361
|
@staticmethod
|
|
346
|
-
def _detect_and_log_column_types(transformed_df: pd.DataFrame) ->
|
|
362
|
+
def _detect_and_log_column_types(transformed_df: pd.DataFrame) -> tuple[list[str], list[str]]:
|
|
347
363
|
"""Log & return names of categorical and real-valued columns in the DataFrame."""
|
|
348
|
-
cat_column_names:
|
|
349
|
-
real_column_names:
|
|
364
|
+
cat_column_names: list[str] = []
|
|
365
|
+
real_column_names: list[str] = []
|
|
350
366
|
for column_name, column_dtype in transformed_df.dtypes.items():
|
|
351
367
|
if isinstance(column_dtype, pd.CategoricalDtype):
|
|
352
368
|
cat_column_names.append(str(column_name))
|
|
@@ -359,9 +375,9 @@ class TimeSeriesFeatureGenerator:
|
|
|
359
375
|
|
|
360
376
|
@staticmethod
|
|
361
377
|
def _check_required_columns_are_present(
|
|
362
|
-
data: TimeSeriesDataFrame, required_column_names:
|
|
378
|
+
data: TimeSeriesDataFrame, required_column_names: list[str], data_frame_name: str
|
|
363
379
|
) -> None:
|
|
364
|
-
missing_columns = pd.Index(required_column_names).difference(data.columns)
|
|
380
|
+
missing_columns = pd.Index(required_column_names).difference(data.columns) # type: ignore
|
|
365
381
|
if len(missing_columns) > 0:
|
|
366
382
|
raise ValueError(
|
|
367
383
|
f"{len(missing_columns)} columns are missing from {data_frame_name}: {reprlib.repr(missing_columns.to_list())}"
|
|
@@ -395,7 +411,7 @@ class AbstractFeatureImportanceTransform:
|
|
|
395
411
|
"""Transforms a series with the same index as the pandas DataFrame"""
|
|
396
412
|
raise NotImplementedError
|
|
397
413
|
|
|
398
|
-
def _transform_series(self, feature_data: pd.Series, is_categorical: bool) ->
|
|
414
|
+
def _transform_series(self, feature_data: pd.Series, is_categorical: bool) -> pd.Series:
|
|
399
415
|
"""Transforms a series with the same index as the pandas DataFrame"""
|
|
400
416
|
raise NotImplementedError
|
|
401
417
|
|
|
@@ -403,7 +419,7 @@ class AbstractFeatureImportanceTransform:
|
|
|
403
419
|
if feature_name not in self.covariate_metadata.all_features:
|
|
404
420
|
raise ValueError(f"Target feature {feature_name} not found in covariate metadata")
|
|
405
421
|
|
|
406
|
-
# feature transform works on a shallow copy of the main time series
|
|
422
|
+
# feature transform works on a shallow copy of the main time series dataframe
|
|
407
423
|
# but a deep copy of the static features.
|
|
408
424
|
data = data.copy(deep=False)
|
|
409
425
|
|
|
@@ -412,11 +428,14 @@ class AbstractFeatureImportanceTransform:
|
|
|
412
428
|
if feature_name in self.covariate_metadata.past_covariates:
|
|
413
429
|
# we'll have to work on the history of the data alone
|
|
414
430
|
data[feature_name] = data[feature_name].copy()
|
|
415
|
-
feature_data =
|
|
431
|
+
feature_data = (
|
|
432
|
+
data[feature_name].groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).head(-self.prediction_length)
|
|
433
|
+
)
|
|
416
434
|
# Silence spurious FutureWarning raised by DataFrame.update https://github.com/pandas-dev/pandas/issues/57124
|
|
417
435
|
with warning_filter():
|
|
418
436
|
data[feature_name].update(self._transform_series(feature_data, is_categorical=is_categorical))
|
|
419
437
|
elif feature_name in self.covariate_metadata.static_features:
|
|
438
|
+
assert data.static_features is not None
|
|
420
439
|
feature_data = data.static_features[feature_name].copy()
|
|
421
440
|
feature_data.reset_index(drop=True, inplace=True)
|
|
422
441
|
data.static_features[feature_name] = self._transform_static_series(
|
|
@@ -435,7 +454,7 @@ class PermutationFeatureImportanceTransform(AbstractFeatureImportanceTransform):
|
|
|
435
454
|
self,
|
|
436
455
|
covariate_metadata: CovariateMetadata,
|
|
437
456
|
prediction_length: int,
|
|
438
|
-
random_seed:
|
|
457
|
+
random_seed: int | None = None,
|
|
439
458
|
shuffle_type: Literal["itemwise", "naive"] = "itemwise",
|
|
440
459
|
**kwargs,
|
|
441
460
|
):
|
|
@@ -451,11 +470,13 @@ class PermutationFeatureImportanceTransform(AbstractFeatureImportanceTransform):
|
|
|
451
470
|
rng = np.random.RandomState(self.random_seed)
|
|
452
471
|
|
|
453
472
|
if self.shuffle_type == "itemwise":
|
|
454
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(
|
|
473
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(
|
|
455
474
|
lambda x: x.sample(frac=1, random_state=rng).values
|
|
456
475
|
)
|
|
457
476
|
elif self.shuffle_type == "naive":
|
|
458
477
|
return pd.Series(feature_data.sample(frac=1, random_state=rng).values, index=feature_data.index)
|
|
478
|
+
else:
|
|
479
|
+
raise ValueError(f"Unknown shuffle_type: {self.shuffle_type}")
|
|
459
480
|
|
|
460
481
|
|
|
461
482
|
class ConstantReplacementFeatureImportanceTransform(AbstractFeatureImportanceTransform):
|
|
@@ -477,6 +498,8 @@ class ConstantReplacementFeatureImportanceTransform(AbstractFeatureImportanceTra
|
|
|
477
498
|
|
|
478
499
|
def _transform_series(self, feature_data: pd.Series, is_categorical: bool) -> pd.Series:
|
|
479
500
|
if is_categorical:
|
|
480
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(lambda x: x.mode()[0])
|
|
501
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(lambda x: x.mode()[0])
|
|
481
502
|
else:
|
|
482
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(
|
|
503
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(
|
|
504
|
+
self.real_value_aggregation
|
|
505
|
+
) # type: ignore
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import warnings
|
|
2
|
-
from typing import Optional
|
|
3
2
|
|
|
4
3
|
import numpy as np
|
|
5
4
|
import pandas as pd
|
|
6
5
|
|
|
7
|
-
from autogluon.
|
|
6
|
+
from autogluon.common.utils.deprecated_utils import Deprecated
|
|
7
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def get_forecast_horizon_index_single_time_series(
|
|
@@ -15,30 +15,35 @@ def get_forecast_horizon_index_single_time_series(
|
|
|
15
15
|
if offset is None:
|
|
16
16
|
raise ValueError(f"Invalid frequency: {freq}")
|
|
17
17
|
start_ts = past_timestamps.max() + 1 * offset
|
|
18
|
-
return pd.date_range(start=start_ts, periods=prediction_length, freq=freq, name=TIMESTAMP)
|
|
18
|
+
return pd.date_range(start=start_ts, periods=prediction_length, freq=freq, name=TimeSeriesDataFrame.TIMESTAMP)
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
@Deprecated(
|
|
22
|
+
min_version_to_warn="1.3", min_version_to_error="2.0", new="TimeSeriesPredictor.forecast_horizon_data_frame"
|
|
23
|
+
)
|
|
24
|
+
def get_forecast_horizon_index_ts_dataframe(*args, **kwargs) -> pd.MultiIndex:
|
|
25
|
+
return pd.MultiIndex.from_frame(make_future_data_frame(*args, **kwargs))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def make_future_data_frame(
|
|
23
29
|
ts_dataframe: TimeSeriesDataFrame,
|
|
24
30
|
prediction_length: int,
|
|
25
|
-
freq:
|
|
26
|
-
) -> pd.
|
|
31
|
+
freq: str | None = None,
|
|
32
|
+
) -> pd.DataFrame:
|
|
27
33
|
"""For each item in the dataframe, get timestamps for the next `prediction_length` time steps into the future.
|
|
28
34
|
|
|
29
|
-
Returns a pandas.
|
|
30
|
-
- level 0 ("item_id") contains the same item_ids as the input ts_dataframe.
|
|
31
|
-
- level 1 ("timestamp") contains the next prediction_length time steps starting from the end of each time series.
|
|
35
|
+
Returns a pandas.DataFrame, with columns "item_id" and "timestamp" corresponding to the forecast horizon.
|
|
32
36
|
"""
|
|
33
|
-
|
|
34
|
-
|
|
37
|
+
indptr = ts_dataframe.get_indptr()
|
|
38
|
+
last = ts_dataframe.index[indptr[1:] - 1].to_frame(index=False)
|
|
39
|
+
item_ids = np.repeat(last[TimeSeriesDataFrame.ITEMID].to_numpy(), prediction_length)
|
|
35
40
|
|
|
36
41
|
if freq is None:
|
|
37
42
|
freq = ts_dataframe.freq
|
|
38
43
|
offset = pd.tseries.frequencies.to_offset(freq)
|
|
39
|
-
last_ts = pd.DatetimeIndex(last[TIMESTAMP])
|
|
44
|
+
last_ts = pd.DatetimeIndex(last[TimeSeriesDataFrame.TIMESTAMP])
|
|
40
45
|
# Non-vectorized offsets like BusinessDay may produce a PerformanceWarning - we filter them
|
|
41
46
|
with warnings.catch_warnings():
|
|
42
47
|
warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning)
|
|
43
48
|
timestamps = np.dstack([last_ts + step * offset for step in range(1, prediction_length + 1)]).ravel() # type: ignore[operator]
|
|
44
|
-
return pd.
|
|
49
|
+
return pd.DataFrame({TimeSeriesDataFrame.ITEMID: item_ids, TimeSeriesDataFrame.TIMESTAMP: timestamps})
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Self
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Timer:
|
|
7
|
+
"""A timer class that tracks a start time, and computes the time elapsed and
|
|
8
|
+
time remaining, used for handling ``time_limit`` parameters in AutoGluon.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
time_limit
|
|
13
|
+
The time limit to set. If None, then ``time_remaining`` will return None, and
|
|
14
|
+
``timed_out`` will return False.
|
|
15
|
+
|
|
16
|
+
Examples
|
|
17
|
+
--------
|
|
18
|
+
Basic usage with time limit:
|
|
19
|
+
|
|
20
|
+
>>> timer = Timer(time_limit=10.0).start()
|
|
21
|
+
>>> # Do some work...
|
|
22
|
+
>>> if timer.timed_out():
|
|
23
|
+
... print("Time limit exceeded!")
|
|
24
|
+
>>> print(f"Time remaining: {timer.time_remaining():.2f}s")
|
|
25
|
+
|
|
26
|
+
Using as a stopwatch (no time limit):
|
|
27
|
+
|
|
28
|
+
>>> timer = Timer(time_limit=None).start()
|
|
29
|
+
>>> # Do some work...
|
|
30
|
+
>>> print(f"Elapsed time: {timer.time_elapsed():.2f}s")
|
|
31
|
+
|
|
32
|
+
Checking time in a loop:
|
|
33
|
+
|
|
34
|
+
>>> timer = Timer(time_limit=5.0).start()
|
|
35
|
+
>>> for i in range(100):
|
|
36
|
+
... if timer.timed_out():
|
|
37
|
+
... break
|
|
38
|
+
... # Do work for iteration i
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
time_limit: float | None,
|
|
44
|
+
):
|
|
45
|
+
self.time_limit = time_limit
|
|
46
|
+
|
|
47
|
+
self.start_time = None
|
|
48
|
+
|
|
49
|
+
def start(self) -> Self:
|
|
50
|
+
"""Start or reset the timer."""
|
|
51
|
+
self.start_time = time.monotonic()
|
|
52
|
+
return self
|
|
53
|
+
|
|
54
|
+
def time_elapsed(self) -> float:
|
|
55
|
+
"""Total time elapsed since the timer was started. This method can also be used
|
|
56
|
+
when ``time_limit`` is set to None to count time forward (i.e., as opposed to
|
|
57
|
+
a countdown timer which other methods imply).
|
|
58
|
+
"""
|
|
59
|
+
if self.start_time is None:
|
|
60
|
+
raise RuntimeError("Timer has not been started")
|
|
61
|
+
return time.monotonic() - self.start_time
|
|
62
|
+
|
|
63
|
+
def time_remaining(self) -> float | None:
|
|
64
|
+
"""Total time remaining on the timer. If ``time_limit`` is None,
|
|
65
|
+
this method also returns None.
|
|
66
|
+
"""
|
|
67
|
+
if self.start_time is None:
|
|
68
|
+
raise RuntimeError("Timer has not been started")
|
|
69
|
+
if self.time_limit is None:
|
|
70
|
+
return None
|
|
71
|
+
return self.time_limit - (time.monotonic() - self.start_time)
|
|
72
|
+
|
|
73
|
+
def timed_out(self) -> bool:
|
|
74
|
+
"""Whether the timer has timed out. If ``time_limit`` is None, this method
|
|
75
|
+
always returns False.
|
|
76
|
+
"""
|
|
77
|
+
if self.start_time is None:
|
|
78
|
+
raise RuntimeError("Timer has not been started")
|
|
79
|
+
if self.time_limit is None:
|
|
80
|
+
return False
|
|
81
|
+
return self.time_elapsed() >= self.time_limit
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class SplitTimer(Timer):
|
|
85
|
+
"""A timer that splits remaining time across multiple rounds.
|
|
86
|
+
|
|
87
|
+
Extends Timer to divide the total time limit across a specified number of rounds,
|
|
88
|
+
useful for allocating time budgets to sequential operations. At each call of
|
|
89
|
+
``next_round``, the timer re-distributes the remaining time evenly among
|
|
90
|
+
the remaining rounds.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
time_limit
|
|
95
|
+
Total time limit to split across all rounds. If None, ``round_time_remaining``
|
|
96
|
+
returns None.
|
|
97
|
+
rounds
|
|
98
|
+
Number of rounds to split the time across. Default is 1.
|
|
99
|
+
|
|
100
|
+
Examples
|
|
101
|
+
--------
|
|
102
|
+
Split time across 3 rounds:
|
|
103
|
+
|
|
104
|
+
>>> timer = SplitTimer(time_limit=10.0, rounds=3).start()
|
|
105
|
+
>>> time_round_1 = timer.round_time_remaining() # Returns ~3.33
|
|
106
|
+
>>> # Do work for round 1
|
|
107
|
+
>>> timer.next_round()
|
|
108
|
+
>>> time_round_2 = timer.round_time_remaining() # Returns remaining time divided by 2
|
|
109
|
+
>>> # Do work for round 2
|
|
110
|
+
>>> timer.next_round()
|
|
111
|
+
>>> time_round_3 = timer.round_time_remaining() # Returns all remaining time
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def __init__(
|
|
115
|
+
self,
|
|
116
|
+
time_limit: float | None,
|
|
117
|
+
rounds: int = 1,
|
|
118
|
+
):
|
|
119
|
+
super().__init__(time_limit)
|
|
120
|
+
self.rounds = rounds
|
|
121
|
+
|
|
122
|
+
self.round_index: int
|
|
123
|
+
self.round_start_time: float
|
|
124
|
+
|
|
125
|
+
def start(self) -> Self:
|
|
126
|
+
"""Reset and start the timer."""
|
|
127
|
+
super().start()
|
|
128
|
+
self.round_index = 0
|
|
129
|
+
self.round_start_time = time.monotonic()
|
|
130
|
+
return self
|
|
131
|
+
|
|
132
|
+
def round_time_remaining(self) -> float | None:
|
|
133
|
+
"""Get the time budget for the current round.
|
|
134
|
+
|
|
135
|
+
Calculates the time allocation by dividing the remaining time equally among
|
|
136
|
+
the remaining rounds. This means if a previous round used less time than
|
|
137
|
+
allocated, subsequent rounds get more time, and vice versa.
|
|
138
|
+
|
|
139
|
+
Returns time budget for the current round in seconds. Returns None if
|
|
140
|
+
``time_limit`` is None. Returns 0.0 if all rounds have been exhausted.
|
|
141
|
+
"""
|
|
142
|
+
if self.time_limit is None:
|
|
143
|
+
return None
|
|
144
|
+
if self.start_time is None:
|
|
145
|
+
raise RuntimeError("Timer has not been started")
|
|
146
|
+
|
|
147
|
+
remaining_rounds = self.rounds - self.round_index
|
|
148
|
+
if remaining_rounds <= 0:
|
|
149
|
+
return 0.0
|
|
150
|
+
|
|
151
|
+
elapsed_time_at_round_start = self.round_start_time - self.start_time
|
|
152
|
+
remaining_time_at_round_start = self.time_limit - elapsed_time_at_round_start
|
|
153
|
+
round_time_budget = remaining_time_at_round_start / remaining_rounds
|
|
154
|
+
|
|
155
|
+
return round_time_budget - self.round_time_elapsed()
|
|
156
|
+
|
|
157
|
+
def round_time_elapsed(self) -> float:
|
|
158
|
+
"""Total time elapsed since the start of this round."""
|
|
159
|
+
if self.start_time is None:
|
|
160
|
+
raise RuntimeError("Timer has not been started")
|
|
161
|
+
return time.monotonic() - self.round_start_time
|
|
162
|
+
|
|
163
|
+
def next_round(self) -> Self:
|
|
164
|
+
"""Advance timer to the next round.
|
|
165
|
+
|
|
166
|
+
Increments the round counter, which affects the time allocation returned
|
|
167
|
+
by subsequent ``round_time_remaining`` calls.
|
|
168
|
+
"""
|
|
169
|
+
if self.start_time is None:
|
|
170
|
+
raise RuntimeError("Timer has not been started")
|
|
171
|
+
self.round_index += 1
|
|
172
|
+
self.round_start_time = time.monotonic()
|
|
173
|
+
return self
|
|
@@ -8,12 +8,14 @@ import sys
|
|
|
8
8
|
import warnings
|
|
9
9
|
from collections import Counter
|
|
10
10
|
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
11
13
|
__all__ = ["warning_filter", "disable_root_logger", "disable_tqdm"]
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
@contextlib.contextmanager
|
|
15
17
|
def warning_filter(all_warnings: bool = False):
|
|
16
|
-
categories = [RuntimeWarning, UserWarning, FutureWarning]
|
|
18
|
+
categories = [RuntimeWarning, UserWarning, FutureWarning, pd.errors.PerformanceWarning]
|
|
17
19
|
if all_warnings:
|
|
18
20
|
categories.append(Warning)
|
|
19
21
|
with warnings.catch_warnings():
|
|
@@ -57,7 +59,7 @@ def disable_tqdm():
|
|
|
57
59
|
from tqdm import tqdm
|
|
58
60
|
|
|
59
61
|
_init = tqdm.__init__
|
|
60
|
-
tqdm.__init__ = functools.partialmethod(tqdm.__init__, disable=True)
|
|
62
|
+
tqdm.__init__ = functools.partialmethod(tqdm.__init__, disable=True) # type: ignore
|
|
61
63
|
yield
|
|
62
64
|
except ImportError:
|
|
63
65
|
yield
|
autogluon/timeseries/version.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import sys, types, os;p = os.path.join(sys._getframe(1).f_locals['sitedir'], *('autogluon',));importlib = __import__('importlib.util');__import__('importlib.machinery');m = sys.modules.setdefault('autogluon', importlib.util.module_from_spec(importlib.machinery.PathFinder.find_spec('autogluon', [os.path.dirname(p)])));m = m or sys.modules.setdefault('autogluon', types.ModuleType('autogluon'));mp = (m or []) and m.__dict__.setdefault('__path__',[]);(p not in mp) and mp.append(p)
|