autogluon.timeseries 1.1.0b20240411__tar.gz → 1.1.0b20240413__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/PKG-INFO +1 -1
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/setup.py +1 -1
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/dataset/ts_dataframe.py +3 -3
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/learner.py +2 -2
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py +1 -1
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py +1 -1
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py +19 -18
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/gluonts/torch/models.py +0 -2
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/predictor.py +2 -2
- autogluon.timeseries-1.1.0b20240413/src/autogluon/timeseries/utils/datetime/base.py +57 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/utils/datetime/lags.py +13 -12
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/utils/datetime/seasonality.py +11 -11
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/utils/datetime/time_features.py +12 -11
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/utils/features.py +4 -1
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon.timeseries.egg-info/PKG-INFO +1 -1
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon.timeseries.egg-info/requires.txt +5 -5
- autogluon.timeseries-1.1.0b20240411/src/autogluon/timeseries/utils/datetime/base.py +0 -39
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/setup.cfg +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/configs/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/configs/presets_configs.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/dataset/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/evaluator.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/metrics/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/metrics/abstract.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/metrics/point.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/metrics/quantile.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/metrics/utils.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/abstract/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/abstract/model_trial.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/autogluon_tabular/utils.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/chronos/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/chronos/model.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/chronos/pipeline.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/chronos/utils.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/ensemble/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/gluonts/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/local/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/local/abstract_local_model.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/local/naive.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/local/npts.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/local/statsforecast.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/multi_window/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/multi_window/multi_window_model.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/models/presets.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/splitter.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/trainer/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/trainer/abstract_trainer.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/trainer/auto_trainer.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/utils/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/utils/datetime/__init__.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/utils/forecast.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon/timeseries/utils/warning_filters.py +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon.timeseries.egg-info/SOURCES.txt +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon.timeseries.egg-info/dependency_links.txt +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon.timeseries.egg-info/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon.timeseries.egg-info/top_level.txt +0 -0
- {autogluon.timeseries-1.1.0b20240411 → autogluon.timeseries-1.1.0b20240413}/src/autogluon.timeseries.egg-info/zip-safe +0 -0
|
@@ -31,7 +31,7 @@ install_requires = [
|
|
|
31
31
|
"transformers[sentencepiece]", # version range defined in `core/_setup_utils.py`
|
|
32
32
|
"accelerate", # version range defined in `core/_setup_utils.py`
|
|
33
33
|
"statsmodels>=0.13.0,<0.15",
|
|
34
|
-
"gluonts>=0.14.0,<0.
|
|
34
|
+
"gluonts>=0.14.0,<0.14.4", # 0.14.4 caps pandas<2.2
|
|
35
35
|
"networkx", # version range defined in `core/_setup_utils.py`
|
|
36
36
|
# TODO: update statsforecast to v1.5.0 - resolve antlr4-python3-runtime dependency clash with multimodal
|
|
37
37
|
"statsforecast>=1.4.0,<1.5",
|
|
@@ -134,7 +134,7 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
|
134
134
|
----------
|
|
135
135
|
freq : str
|
|
136
136
|
A pandas-compatible string describing the frequency of the time series. For example ``"D"`` for daily data,
|
|
137
|
-
``"
|
|
137
|
+
``"h"`` for hourly data, etc. This attribute is determined automatically based on the timestamps. For the full
|
|
138
138
|
list of possible values, see `pandas documentation <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
|
|
139
139
|
num_items : int
|
|
140
140
|
Number of items (time series) in the data set.
|
|
@@ -961,12 +961,12 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
|
961
961
|
2021-06-30 6.0
|
|
962
962
|
2021-09-30 7.0
|
|
963
963
|
2021-12-31 8.0
|
|
964
|
-
>>> ts_df.convert_frequency("
|
|
964
|
+
>>> ts_df.convert_frequency("YE")
|
|
965
965
|
target
|
|
966
966
|
item_id timestamp
|
|
967
967
|
0 2020-12-31 2.5
|
|
968
968
|
2021-12-31 6.5
|
|
969
|
-
>>> ts_df.convert_frequency("
|
|
969
|
+
>>> ts_df.convert_frequency("YE", agg_numeric="sum")
|
|
970
970
|
target
|
|
971
971
|
item_id timestamp
|
|
972
972
|
0 2020-12-31 10.0
|
|
@@ -250,8 +250,8 @@ class TimeSeriesLearner(AbstractLearner):
|
|
|
250
250
|
raise ValueError(f"Feature {fn} not found in covariate metadata or the dataset.")
|
|
251
251
|
|
|
252
252
|
if len(set(features)) < len(features):
|
|
253
|
-
|
|
254
|
-
"Duplicate feature names provided to compute feature importance.
|
|
253
|
+
raise ValueError(
|
|
254
|
+
"Duplicate feature names provided to compute feature importance. "
|
|
255
255
|
"Please provide unique feature names across both static features and covariates."
|
|
256
256
|
)
|
|
257
257
|
|
|
@@ -31,7 +31,7 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
31
31
|
If None, a new unique time-stamped directory is chosen.
|
|
32
32
|
freq: str
|
|
33
33
|
Frequency string (cf. gluonts frequency strings) describing the frequency
|
|
34
|
-
of the time series data. For example, "
|
|
34
|
+
of the time series data. For example, "h" for hourly or "D" for daily data.
|
|
35
35
|
prediction_length: int
|
|
36
36
|
Length of the prediction horizon, i.e., the number of time steps the model
|
|
37
37
|
is fit to forecast.
|
|
@@ -337,7 +337,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
337
337
|
Seasonal naive forecast for short series, if there are any in the dataset.
|
|
338
338
|
"""
|
|
339
339
|
ts_lengths = data.num_timesteps_per_item()
|
|
340
|
-
short_series = ts_lengths.index[ts_lengths <= self._sum_of_differences]
|
|
340
|
+
short_series = ts_lengths.index[ts_lengths <= self._sum_of_differences + 1]
|
|
341
341
|
if len(short_series) > 0:
|
|
342
342
|
logger.warning(
|
|
343
343
|
f"Warning: {len(short_series)} time series ({len(short_series) / len(ts_lengths):.1%}) are shorter "
|
|
@@ -15,7 +15,6 @@ from gluonts.dataset.field_names import FieldName
|
|
|
15
15
|
from gluonts.model.estimator import Estimator as GluonTSEstimator
|
|
16
16
|
from gluonts.model.forecast import Forecast, QuantileForecast, SampleForecast
|
|
17
17
|
from gluonts.model.predictor import Predictor as GluonTSPredictor
|
|
18
|
-
from pandas.tseries.frequencies import to_offset
|
|
19
18
|
from sklearn.compose import ColumnTransformer
|
|
20
19
|
from sklearn.preprocessing import QuantileTransformer, StandardScaler
|
|
21
20
|
|
|
@@ -37,9 +36,6 @@ logger = logging.getLogger(__name__)
|
|
|
37
36
|
gts_logger = logging.getLogger(gluonts.__name__)
|
|
38
37
|
|
|
39
38
|
|
|
40
|
-
GLUONTS_SUPPORTED_OFFSETS = ["Y", "Q", "M", "W", "D", "B", "H", "T", "min", "S"]
|
|
41
|
-
|
|
42
|
-
|
|
43
39
|
class SimpleGluonTSDataset(GluonTSDataset):
|
|
44
40
|
"""Wrapper for TimeSeriesDataFrame that is compatible with the GluonTS Dataset API."""
|
|
45
41
|
|
|
@@ -66,7 +62,7 @@ class SimpleGluonTSDataset(GluonTSDataset):
|
|
|
66
62
|
self.feat_dynamic_real = self._astype(feat_dynamic_real, dtype=np.float32)
|
|
67
63
|
self.past_feat_dynamic_cat = self._astype(past_feat_dynamic_cat, dtype=np.int64)
|
|
68
64
|
self.past_feat_dynamic_real = self._astype(past_feat_dynamic_real, dtype=np.float32)
|
|
69
|
-
self.freq = self.
|
|
65
|
+
self.freq = self._get_freq_for_period(freq)
|
|
70
66
|
|
|
71
67
|
# Necessary to compute indptr for known_covariates at prediction time
|
|
72
68
|
self.includes_future = includes_future
|
|
@@ -89,19 +85,22 @@ class SimpleGluonTSDataset(GluonTSDataset):
|
|
|
89
85
|
return array.astype(dtype)
|
|
90
86
|
|
|
91
87
|
@staticmethod
|
|
92
|
-
def
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
88
|
+
def _get_freq_for_period(freq: str) -> str:
|
|
89
|
+
"""Convert freq to format compatible with pd.Period.
|
|
90
|
+
|
|
91
|
+
For example, ME freq must be converted to M when creating a pd.Period.
|
|
92
|
+
"""
|
|
93
|
+
offset = pd.tseries.frequencies.to_offset(freq)
|
|
94
|
+
freq_name = norm_freq_str(offset)
|
|
95
|
+
if freq_name == "SME":
|
|
96
|
+
# Replace unsupported frequency "SME" with "2W"
|
|
97
|
+
return "2W"
|
|
98
|
+
elif freq_name == "bh":
|
|
99
|
+
# Replace unsupported frequency "bh" with dummy value "Y"
|
|
100
|
+
return "Y"
|
|
103
101
|
else:
|
|
104
|
-
|
|
102
|
+
freq_name_for_period = {"YE": "Y", "QE": "Q", "ME": "M"}.get(freq_name, freq_name)
|
|
103
|
+
return f"{offset.n}{freq_name_for_period}"
|
|
105
104
|
|
|
106
105
|
def __len__(self):
|
|
107
106
|
return len(self.indptr) - 1 # noqa
|
|
@@ -161,6 +160,8 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
161
160
|
"""
|
|
162
161
|
|
|
163
162
|
gluonts_model_path = "gluon_ts"
|
|
163
|
+
# we pass dummy freq compatible with pandas 2.1 & 2.2 to GluonTS models
|
|
164
|
+
_dummy_gluonts_freq = "D"
|
|
164
165
|
# default number of samples for prediction
|
|
165
166
|
default_num_samples: int = 250
|
|
166
167
|
supports_cat_covariates: bool = False
|
|
@@ -364,7 +365,7 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
364
365
|
init_args.setdefault("early_stopping_patience", 20)
|
|
365
366
|
init_args.update(
|
|
366
367
|
dict(
|
|
367
|
-
freq=self.
|
|
368
|
+
freq=self._dummy_gluonts_freq,
|
|
368
369
|
prediction_length=self.prediction_length,
|
|
369
370
|
quantiles=self.quantile_levels,
|
|
370
371
|
callbacks=self.callbacks,
|
|
@@ -423,6 +423,4 @@ class WaveNetModel(AbstractGluonTSModel):
|
|
|
423
423
|
init_kwargs.setdefault("seasonality", get_seasonality(self.freq))
|
|
424
424
|
init_kwargs.setdefault("time_features", get_time_features_for_frequency(self.freq))
|
|
425
425
|
init_kwargs.setdefault("num_parallel_samples", self.default_num_samples)
|
|
426
|
-
# WaveNet model fails if an unsupported frequency such as "SM" is provided. We provide a dummy freq instead
|
|
427
|
-
init_kwargs["freq"] = "H"
|
|
428
426
|
return init_kwargs
|
|
@@ -69,7 +69,7 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
|
|
|
69
69
|
models that predict up to 3 days into the future from the most recent observation.
|
|
70
70
|
freq : str, optional
|
|
71
71
|
Frequency of the time series data (see `pandas documentation <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
|
|
72
|
-
for available frequencies). For example, ``"D"`` for daily data or ``"
|
|
72
|
+
for available frequencies). For example, ``"D"`` for daily data or ``"h"`` for hourly data.
|
|
73
73
|
|
|
74
74
|
By default, the predictor will attempt to automatically infer the frequency from the data. This argument should
|
|
75
75
|
only be set in two cases:
|
|
@@ -195,7 +195,7 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
|
|
|
195
195
|
self._min_train_length = max(self.prediction_length + 1, 5)
|
|
196
196
|
self.freq = freq
|
|
197
197
|
if self.freq is not None:
|
|
198
|
-
# Standardize frequency string (e.g., "
|
|
198
|
+
# Standardize frequency string (e.g., "T" -> "min", "Y" -> "YE")
|
|
199
199
|
std_freq = pd.tseries.frequencies.to_offset(self.freq).freqstr
|
|
200
200
|
if std_freq != str(self.freq):
|
|
201
201
|
logger.info(f"Frequency '{self.freq}' stored as '{std_freq}'")
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
TO_MAJOR_FREQ = {
|
|
4
|
+
# sub-daily
|
|
5
|
+
"H": "h",
|
|
6
|
+
"BH": "bh",
|
|
7
|
+
"cbh": "bh",
|
|
8
|
+
"CBH": "bh",
|
|
9
|
+
"T": "min",
|
|
10
|
+
"S": "s",
|
|
11
|
+
"L": "ms",
|
|
12
|
+
"U": "us",
|
|
13
|
+
"N": "ns",
|
|
14
|
+
# business day
|
|
15
|
+
"C": "B",
|
|
16
|
+
# month
|
|
17
|
+
"M": "ME",
|
|
18
|
+
"BM": "ME",
|
|
19
|
+
"BME": "ME",
|
|
20
|
+
"CBM": "ME",
|
|
21
|
+
"CBME": "ME",
|
|
22
|
+
"MS": "ME",
|
|
23
|
+
"BMS": "ME",
|
|
24
|
+
"CBMS": "ME",
|
|
25
|
+
# semi-month
|
|
26
|
+
"SM": "SME",
|
|
27
|
+
"SMS": "SME",
|
|
28
|
+
# quarter
|
|
29
|
+
"Q": "QE",
|
|
30
|
+
"BQ": "QE",
|
|
31
|
+
"BQE": "QE",
|
|
32
|
+
"QS": "QE",
|
|
33
|
+
"BQS": "QE",
|
|
34
|
+
# annual
|
|
35
|
+
"A": "YE",
|
|
36
|
+
"Y": "YE",
|
|
37
|
+
"BA": "YE",
|
|
38
|
+
"BY": "YE",
|
|
39
|
+
"BYE": "YE",
|
|
40
|
+
"AS": "YE",
|
|
41
|
+
"YS": "YE",
|
|
42
|
+
"BAS": "YE",
|
|
43
|
+
"BYS": "YE",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def norm_freq_str(offset: pd.DateOffset) -> str:
|
|
48
|
+
"""Obtain frequency string from a pandas.DateOffset object.
|
|
49
|
+
|
|
50
|
+
"Non-standard" frequencies are converted to their "standard" counterparts. For example, MS (month start) is mapped
|
|
51
|
+
to ME (month end) since both correspond to the same seasonality, lags and time features.
|
|
52
|
+
|
|
53
|
+
The frequencies are always mapped to the new non-deprecated aliases (pandas>=2.2), e.g., "H" is mapped to "h". The
|
|
54
|
+
downstream functions like `get_seasonality` handle the new aliases even if older version of pandas is used.
|
|
55
|
+
"""
|
|
56
|
+
base_freq = offset.name.split("-")[0]
|
|
57
|
+
return TO_MAJOR_FREQ.get(base_freq, base_freq)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Generate lag indices based on frequency string. Adapted from gluonts.time_feature.lag.
|
|
3
3
|
"""
|
|
4
|
+
|
|
4
5
|
from typing import List, Optional
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
@@ -96,13 +97,13 @@ def get_lags_for_frequency(
|
|
|
96
97
|
offset = pd.tseries.frequencies.to_offset(freq)
|
|
97
98
|
offset_name = norm_freq_str(offset)
|
|
98
99
|
|
|
99
|
-
if offset_name == "
|
|
100
|
+
if offset_name == "YE":
|
|
100
101
|
lags = []
|
|
101
|
-
elif offset_name == "
|
|
102
|
+
elif offset_name == "QE":
|
|
102
103
|
lags = _make_lags_for_quarter(offset.n)
|
|
103
|
-
elif offset_name == "
|
|
104
|
+
elif offset_name == "ME":
|
|
104
105
|
lags = _make_lags_for_month(offset.n)
|
|
105
|
-
elif offset_name == "
|
|
106
|
+
elif offset_name == "SME":
|
|
106
107
|
lags = _make_lags_for_semi_month(offset.n)
|
|
107
108
|
elif offset_name == "W":
|
|
108
109
|
lags = _make_lags_for_week(offset.n)
|
|
@@ -110,21 +111,21 @@ def get_lags_for_frequency(
|
|
|
110
111
|
lags = _make_lags_for_day(offset.n) + _make_lags_for_week(offset.n / 7.0)
|
|
111
112
|
elif offset_name == "B":
|
|
112
113
|
lags = _make_lags_for_day(offset.n, days_in_week=5, days_in_month=22) + _make_lags_for_week(offset.n / 5.0)
|
|
113
|
-
elif offset_name == "
|
|
114
|
+
elif offset_name == "h":
|
|
114
115
|
lags = (
|
|
115
116
|
_make_lags_for_hour(offset.n)
|
|
116
117
|
+ _make_lags_for_day(offset.n / 24)
|
|
117
118
|
+ _make_lags_for_week(offset.n / (24 * 7))
|
|
118
119
|
)
|
|
119
120
|
# business hour
|
|
120
|
-
elif offset_name == "
|
|
121
|
+
elif offset_name == "bh":
|
|
121
122
|
lags = (
|
|
122
123
|
_make_lags_for_business_hour(offset.n)
|
|
123
124
|
+ _make_lags_for_day(offset.n / 9)
|
|
124
125
|
+ _make_lags_for_week(offset.n / (9 * 7))
|
|
125
126
|
)
|
|
126
127
|
# minutes
|
|
127
|
-
elif offset_name == "
|
|
128
|
+
elif offset_name == "min":
|
|
128
129
|
lags = (
|
|
129
130
|
_make_lags_for_minute(offset.n)
|
|
130
131
|
+ _make_lags_for_hour(offset.n / 60)
|
|
@@ -132,32 +133,32 @@ def get_lags_for_frequency(
|
|
|
132
133
|
+ _make_lags_for_week(offset.n / (60 * 24 * 7))
|
|
133
134
|
)
|
|
134
135
|
# second
|
|
135
|
-
elif offset_name == "
|
|
136
|
+
elif offset_name == "s":
|
|
136
137
|
lags = (
|
|
137
138
|
_make_lags_for_second(offset.n)
|
|
138
139
|
+ _make_lags_for_minute(offset.n / 60)
|
|
139
140
|
+ _make_lags_for_hour(offset.n / (60 * 60))
|
|
140
141
|
)
|
|
141
|
-
elif offset_name == "
|
|
142
|
+
elif offset_name == "ms":
|
|
142
143
|
lags = (
|
|
143
144
|
_make_lags_for_second(offset.n / 1e3)
|
|
144
145
|
+ _make_lags_for_minute(offset.n / (60 * 1e3))
|
|
145
146
|
+ _make_lags_for_hour(offset.n / (60 * 60 * 1e3))
|
|
146
147
|
)
|
|
147
|
-
elif offset_name == "
|
|
148
|
+
elif offset_name == "us":
|
|
148
149
|
lags = (
|
|
149
150
|
_make_lags_for_second(offset.n / 1e6)
|
|
150
151
|
+ _make_lags_for_minute(offset.n / (60 * 1e6))
|
|
151
152
|
+ _make_lags_for_hour(offset.n / (60 * 60 * 1e6))
|
|
152
153
|
)
|
|
153
|
-
elif offset_name == "
|
|
154
|
+
elif offset_name == "ns":
|
|
154
155
|
lags = (
|
|
155
156
|
_make_lags_for_second(offset.n / 1e9)
|
|
156
157
|
+ _make_lags_for_minute(offset.n / (60 * 1e9))
|
|
157
158
|
+ _make_lags_for_hour(offset.n / (60 * 60 * 1e9))
|
|
158
159
|
)
|
|
159
160
|
else:
|
|
160
|
-
raise Exception(f"
|
|
161
|
+
raise Exception(f"Cannot get lags for unsupported frequency {freq}")
|
|
161
162
|
|
|
162
163
|
# flatten lags list and filter
|
|
163
164
|
lags = [int(lag) for sub_list in lags for lag in sub_list if 7 < lag <= lag_ub]
|
|
@@ -5,20 +5,20 @@ import pandas as pd
|
|
|
5
5
|
from .base import norm_freq_str
|
|
6
6
|
|
|
7
7
|
DEFAULT_SEASONALITIES = {
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
"
|
|
8
|
+
"YE": 1,
|
|
9
|
+
"QE": 4,
|
|
10
|
+
"ME": 12,
|
|
11
|
+
"SME": 24,
|
|
12
12
|
"W": 1,
|
|
13
13
|
"D": 7,
|
|
14
14
|
"B": 5,
|
|
15
|
-
"
|
|
16
|
-
"
|
|
17
|
-
"
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
15
|
+
"bh": 9,
|
|
16
|
+
"h": 24,
|
|
17
|
+
"min": 60 * 24,
|
|
18
|
+
"s": 1,
|
|
19
|
+
"ms": 1,
|
|
20
|
+
"us": 1,
|
|
21
|
+
"ns": 1,
|
|
22
22
|
}
|
|
23
23
|
|
|
24
24
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Generate time features based on frequency string. Adapted from gluonts.time_feature.time_feature.
|
|
3
3
|
"""
|
|
4
|
+
|
|
4
5
|
from typing import Callable, List
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
@@ -57,20 +58,20 @@ def second_of_minute(index: pd.DatetimeIndex) -> np.ndarray:
|
|
|
57
58
|
|
|
58
59
|
def get_time_features_for_frequency(freq) -> List[Callable]:
|
|
59
60
|
features_by_offset_name = {
|
|
60
|
-
"
|
|
61
|
-
"
|
|
62
|
-
"
|
|
63
|
-
"
|
|
61
|
+
"YE": [],
|
|
62
|
+
"QE": [quarter_of_year],
|
|
63
|
+
"ME": [month_of_year],
|
|
64
|
+
"SME": [day_of_month, month_of_year],
|
|
64
65
|
"W": [day_of_month, week_of_year],
|
|
65
66
|
"D": [day_of_week, day_of_month, day_of_year],
|
|
66
67
|
"B": [day_of_week, day_of_month, day_of_year],
|
|
67
|
-
"
|
|
68
|
-
"
|
|
69
|
-
"
|
|
70
|
-
"
|
|
71
|
-
"
|
|
72
|
-
"
|
|
73
|
-
"
|
|
68
|
+
"bh": [hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
69
|
+
"h": [hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
70
|
+
"min": [minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
71
|
+
"s": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
72
|
+
"ms": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
73
|
+
"us": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
74
|
+
"ns": [second_of_minute, minute_of_hour, hour_of_day, day_of_week, day_of_month, day_of_year],
|
|
74
75
|
}
|
|
75
76
|
offset = pd.tseries.frequencies.to_offset(freq)
|
|
76
77
|
offset_name = norm_freq_str(offset)
|
|
@@ -14,6 +14,7 @@ from autogluon.features.generators import (
|
|
|
14
14
|
PipelineFeatureGenerator,
|
|
15
15
|
)
|
|
16
16
|
from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TimeSeriesDataFrame
|
|
17
|
+
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
@@ -335,7 +336,9 @@ class AbstractFeatureImportanceTransform:
|
|
|
335
336
|
# we'll have to work on the history of the data alone
|
|
336
337
|
data[feature_name] = data[feature_name].copy()
|
|
337
338
|
feature_data = data[feature_name].groupby(level=ITEMID, sort=False).head(-self.prediction_length)
|
|
338
|
-
|
|
339
|
+
# Silence spurious FutureWarning raised by DataFrame.update https://github.com/pandas-dev/pandas/issues/57124
|
|
340
|
+
with warning_filter():
|
|
341
|
+
data[feature_name].update(self._transform_series(feature_data, is_categorical=is_categorical))
|
|
339
342
|
elif feature_name in self.covariate_metadata.static_features:
|
|
340
343
|
feature_data = data.static_features[feature_name].copy()
|
|
341
344
|
feature_data.reset_index(drop=True, inplace=True)
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
joblib<2,>=1.1
|
|
2
2
|
numpy<1.29,>=1.21
|
|
3
3
|
scipy<1.13,>=1.5.4
|
|
4
|
-
pandas<2.
|
|
4
|
+
pandas<2.3.0,>=2.0.0
|
|
5
5
|
torch<2.2,>=2.1
|
|
6
6
|
lightning<2.2,>=2.1
|
|
7
7
|
pytorch_lightning<2.2,>=2.1
|
|
8
8
|
transformers[sentencepiece]<4.39.0,>=4.38.0
|
|
9
9
|
accelerate<0.22.0,>=0.21.0
|
|
10
10
|
statsmodels<0.15,>=0.13.0
|
|
11
|
-
gluonts<0.
|
|
11
|
+
gluonts<0.14.4,>=0.14.0
|
|
12
12
|
networkx<4,>=3.0
|
|
13
13
|
statsforecast<1.5,>=1.4.0
|
|
14
14
|
mlforecast<0.10.1,>=0.10.0
|
|
@@ -16,9 +16,9 @@ utilsforecast<0.0.11,>=0.0.10
|
|
|
16
16
|
tqdm<5,>=4.38
|
|
17
17
|
orjson~=3.9
|
|
18
18
|
tensorboard<3,>=2.9
|
|
19
|
-
autogluon.core[raytune]==1.1.
|
|
20
|
-
autogluon.common==1.1.
|
|
21
|
-
autogluon.tabular[catboost,lightgbm,xgboost]==1.1.
|
|
19
|
+
autogluon.core[raytune]==1.1.0b20240413
|
|
20
|
+
autogluon.common==1.1.0b20240413
|
|
21
|
+
autogluon.tabular[catboost,lightgbm,xgboost]==1.1.0b20240413
|
|
22
22
|
|
|
23
23
|
[all]
|
|
24
24
|
optimum[onnxruntime]<1.19,>=1.17
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
|
|
3
|
-
TO_MAJOR_FREQ = {
|
|
4
|
-
"min": "T",
|
|
5
|
-
"ms": "L",
|
|
6
|
-
"us": "U",
|
|
7
|
-
# business day
|
|
8
|
-
"C": "B",
|
|
9
|
-
# month
|
|
10
|
-
"BM": "M",
|
|
11
|
-
"CBM": "M",
|
|
12
|
-
"MS": "M",
|
|
13
|
-
"BMS": "M",
|
|
14
|
-
"CBMS": "M",
|
|
15
|
-
# semi-month
|
|
16
|
-
"SMS": "SM",
|
|
17
|
-
# quarter
|
|
18
|
-
"BQ": "Q",
|
|
19
|
-
"QS": "Q",
|
|
20
|
-
"BQS": "Q",
|
|
21
|
-
# annual
|
|
22
|
-
"Y": "A",
|
|
23
|
-
"BA": "A",
|
|
24
|
-
"BY": "A",
|
|
25
|
-
"AS": "A",
|
|
26
|
-
"YS": "A",
|
|
27
|
-
"BAS": "A",
|
|
28
|
-
"BYS": "A",
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def norm_freq_str(offset: pd.DateOffset) -> str:
|
|
33
|
-
"""Obtain frequency string from a pandas.DateOffset object.
|
|
34
|
-
|
|
35
|
-
"Non-standard" frequencies are converted to their "standard" counterparts. For example, MS (month start) is mapped
|
|
36
|
-
to M (month) since both correspond to the same seasonality, lags and time features.
|
|
37
|
-
"""
|
|
38
|
-
base_freq = offset.name.split("-")[0]
|
|
39
|
-
return TO_MAJOR_FREQ.get(base_freq, base_freq)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|