autogluon.timeseries 1.0.1b20240329__tar.gz → 1.0.1b20240403__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/PKG-INFO +2 -2
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/dataset/ts_dataframe.py +11 -3
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py +26 -3
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py +25 -3
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/chronos/model.py +3 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py +3 -2
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/local/abstract_local_model.py +67 -22
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/local/naive.py +18 -14
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/local/npts.py +3 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/local/statsforecast.py +2 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/multi_window/multi_window_model.py +3 -1
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/predictor.py +35 -39
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/utils/features.py +62 -4
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon.timeseries.egg-info/PKG-INFO +2 -2
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon.timeseries.egg-info/requires.txt +3 -3
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/setup.cfg +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/setup.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/configs/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/configs/presets_configs.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/dataset/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/evaluator.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/learner.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/metrics/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/metrics/abstract.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/metrics/point.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/metrics/quantile.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/metrics/utils.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/abstract/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/abstract/model_trial.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/autogluon_tabular/utils.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/chronos/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/chronos/pipeline.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/ensemble/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/gluonts/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/gluonts/torch/models.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/local/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/multi_window/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/models/presets.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/splitter.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/trainer/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/trainer/abstract_trainer.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/trainer/auto_trainer.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/utils/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/utils/datetime/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/utils/datetime/base.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/utils/datetime/lags.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/utils/datetime/seasonality.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/utils/datetime/time_features.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/utils/forecast.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon/timeseries/utils/warning_filters.py +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon.timeseries.egg-info/SOURCES.txt +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon.timeseries.egg-info/dependency_links.txt +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon.timeseries.egg-info/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon.timeseries.egg-info/top_level.txt +0 -0
- {autogluon.timeseries-1.0.1b20240329 → autogluon.timeseries-1.0.1b20240403}/src/autogluon.timeseries.egg-info/zip-safe +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: autogluon.timeseries
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1b20240403
|
|
4
4
|
Summary: AutoML for Image, Text, and Tabular Data
|
|
5
5
|
Home-page: https://github.com/autogluon/autogluon
|
|
6
6
|
Author: AutoGluon Community
|
|
@@ -26,7 +26,7 @@ Description:
|
|
|
26
26
|
[](https://github.com/autogluon/autogluon/actions/workflows/continuous_integration.yml)
|
|
27
27
|
[](https://github.com/autogluon/autogluon/actions/workflows/platform_tests-command.yml)
|
|
28
28
|
|
|
29
|
-
[
|
|
29
|
+
[Installation](https://auto.gluon.ai/stable/install.html) | [Documentation](https://auto.gluon.ai/stable/index.html) | [Release Notes](https://auto.gluon.ai/stable/whats_new/index.html)
|
|
30
30
|
|
|
31
31
|
AutoGluon automates machine learning tasks enabling you to easily achieve strong predictive performance in your applications. With just a few lines of code, you can train and deploy high-accuracy machine learning and deep learning models on image, text, time series, and tabular data.
|
|
32
32
|
</div>
|
|
@@ -765,11 +765,19 @@ class TimeSeriesDataFrame(pd.DataFrame, TimeSeriesDataFrameDeprecatedMixin):
|
|
|
765
765
|
"(for example, using the `convert_frequency` method)."
|
|
766
766
|
)
|
|
767
767
|
|
|
768
|
-
|
|
768
|
+
# Convert to pd.DataFrame for faster processing
|
|
769
|
+
df = pd.DataFrame(self)
|
|
770
|
+
|
|
771
|
+
# Skip filling if there are no NaNs
|
|
772
|
+
if not df.isna().any(axis=None):
|
|
773
|
+
return self
|
|
774
|
+
|
|
775
|
+
grouped_df = df.groupby(level=ITEMID, sort=False, group_keys=False)
|
|
769
776
|
if method == "auto":
|
|
770
777
|
filled_df = grouped_df.ffill()
|
|
771
|
-
#
|
|
772
|
-
|
|
778
|
+
# If necessary, fill missing values at the start of each time series with bfill
|
|
779
|
+
if filled_df.isna().any(axis=None):
|
|
780
|
+
filled_df = filled_df.groupby(level=ITEMID, sort=False, group_keys=False).bfill()
|
|
773
781
|
elif method in ["ffill", "pad"]:
|
|
774
782
|
filled_df = grouped_df.ffill()
|
|
775
783
|
elif method in ["bfill", "backfill"]:
|
|
@@ -201,7 +201,9 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
201
201
|
}
|
|
202
202
|
return info
|
|
203
203
|
|
|
204
|
-
def fit(
|
|
204
|
+
def fit(
|
|
205
|
+
self, train_data: TimeSeriesDataFrame, val_data: Optional[TimeSeriesDataFrame] = None, **kwargs
|
|
206
|
+
) -> "AbstractTimeSeriesModel":
|
|
205
207
|
"""Fit timeseries model.
|
|
206
208
|
|
|
207
209
|
Models should not override the `fit` method, but instead override the `_fit` method which
|
|
@@ -235,7 +237,10 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
235
237
|
model: AbstractTimeSeriesModel
|
|
236
238
|
The fitted model object
|
|
237
239
|
"""
|
|
238
|
-
|
|
240
|
+
train_data = self.preprocess(train_data, is_train=True)
|
|
241
|
+
if self._get_tags()["can_use_val_data"] and val_data is not None:
|
|
242
|
+
val_data = self.preprocess(val_data, is_train=False)
|
|
243
|
+
return super().fit(train_data=train_data, val_data=val_data, **kwargs)
|
|
239
244
|
|
|
240
245
|
def _fit(
|
|
241
246
|
self,
|
|
@@ -290,6 +295,7 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
290
295
|
data is given as a separate forecast item in the dictionary, keyed by the `item_id`s
|
|
291
296
|
of input items.
|
|
292
297
|
"""
|
|
298
|
+
data = self.preprocess(data, is_train=False)
|
|
293
299
|
predictions = self._predict(data=data, known_covariates=known_covariates, **kwargs)
|
|
294
300
|
logger.debug(f"Predicting with model {self.name}")
|
|
295
301
|
# "0.5" might be missing from the quantiles if self is a wrapper (MultiWindowBacktestingModel or ensemble)
|
|
@@ -488,7 +494,7 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
488
494
|
|
|
489
495
|
return hpo_models, analysis
|
|
490
496
|
|
|
491
|
-
def preprocess(self, data:
|
|
497
|
+
def preprocess(self, data: TimeSeriesDataFrame, is_train: bool = False, **kwargs) -> Any:
|
|
492
498
|
return data
|
|
493
499
|
|
|
494
500
|
def get_memory_size(self, **kwargs) -> Optional[int]:
|
|
@@ -506,3 +512,20 @@ class AbstractTimeSeriesModel(AbstractModel):
|
|
|
506
512
|
return {}
|
|
507
513
|
else:
|
|
508
514
|
return self._user_params.copy()
|
|
515
|
+
|
|
516
|
+
def _more_tags(self) -> dict:
|
|
517
|
+
"""Encode model properties using tags, similar to sklearn & autogluon.tabular.
|
|
518
|
+
|
|
519
|
+
For more details, see `autogluon.core.models.abstract.AbstractModel._get_tags()` and https://scikit-learn.org/stable/_sources/developers/develop.rst.txt.
|
|
520
|
+
|
|
521
|
+
List of currently supported tags:
|
|
522
|
+
- allow_nan: Can the model handle data with missing values represented by np.nan?
|
|
523
|
+
- can_refit_full: Does it make sense to retrain the model without validation data?
|
|
524
|
+
See `autogluon.core.models.abstract._tags._DEFAULT_TAGS` for more details.
|
|
525
|
+
- can_use_val_data: Can model use val_data if it's provided to model.fit()?
|
|
526
|
+
"""
|
|
527
|
+
return {
|
|
528
|
+
"allow_nan": False,
|
|
529
|
+
"can_refit_full": False,
|
|
530
|
+
"can_use_val_data": False,
|
|
531
|
+
}
|
|
@@ -85,6 +85,21 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
85
85
|
self._scaler: Optional[BaseTargetTransform] = None
|
|
86
86
|
self._residuals_std_per_item: Optional[pd.Series] = None
|
|
87
87
|
self._avg_residuals_std: Optional[float] = None
|
|
88
|
+
self._train_target_median: Optional[float] = None
|
|
89
|
+
|
|
90
|
+
def preprocess(self, data: TimeSeriesDataFrame, is_train: bool = False, **kwargs) -> Any:
|
|
91
|
+
if is_train:
|
|
92
|
+
# All-NaN series are removed; partially-NaN series in train_data are handled inside _generate_train_val_dfs
|
|
93
|
+
all_nan_items = data.item_ids[data[self.target].isna().groupby(ITEMID, sort=False).all()]
|
|
94
|
+
if len(all_nan_items):
|
|
95
|
+
data = data.query("item_id not in @all_nan_items")
|
|
96
|
+
return data
|
|
97
|
+
else:
|
|
98
|
+
data = data.fill_missing_values()
|
|
99
|
+
# Fill time series consisting of all NaNs with the median of target in train_data
|
|
100
|
+
if data.isna().any(axis=None):
|
|
101
|
+
data[self.target] = data[self.target].fillna(value=self._train_target_median)
|
|
102
|
+
return data
|
|
88
103
|
|
|
89
104
|
def _get_extra_tabular_init_kwargs(self) -> dict:
|
|
90
105
|
raise NotImplementedError
|
|
@@ -98,8 +113,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
98
113
|
return model_params
|
|
99
114
|
|
|
100
115
|
def _get_mlforecast_init_args(self, train_data: TimeSeriesDataFrame, model_params: dict) -> dict:
|
|
101
|
-
# TODO: Support lag generation for all pandas frequencies
|
|
102
|
-
# TODO: Support date_feature generation for all pandas frequencies
|
|
103
116
|
from mlforecast.target_transforms import Differences
|
|
104
117
|
|
|
105
118
|
from .utils import MeanAbsScaler, StandardScaler
|
|
@@ -181,6 +194,10 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
181
194
|
items_to_keep = data.item_ids.to_series().sample(n=int(max_num_items)) # noqa: F841
|
|
182
195
|
data = data.query("item_id in @items_to_keep")
|
|
183
196
|
|
|
197
|
+
# MLForecast.preprocess does not support missing values, but we will exclude them later from the training set
|
|
198
|
+
missing_entries = data.index[data[self.target].isna()]
|
|
199
|
+
data = data.fill_missing_values()
|
|
200
|
+
|
|
184
201
|
num_items = data.num_items
|
|
185
202
|
mlforecast_df = self._to_mlforecast_df(data, data.static_features)
|
|
186
203
|
|
|
@@ -197,6 +214,10 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
197
214
|
|
|
198
215
|
df = self._mask_df(df)
|
|
199
216
|
|
|
217
|
+
# We remove originally missing values filled via imputation from the training set
|
|
218
|
+
if len(missing_entries):
|
|
219
|
+
df = df.set_index(["unique_id", "ds"]).drop(missing_entries, errors="ignore").reset_index()
|
|
220
|
+
|
|
200
221
|
if max_num_samples is not None and len(df) > max_num_samples:
|
|
201
222
|
df = df.sample(n=max_num_samples)
|
|
202
223
|
|
|
@@ -246,6 +267,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
246
267
|
|
|
247
268
|
self._check_fit_params()
|
|
248
269
|
fit_start_time = time.time()
|
|
270
|
+
self._train_target_median = train_data[self.target].median()
|
|
249
271
|
# TabularEstimator is passed to MLForecast later to include tuning_data
|
|
250
272
|
model_params = self._get_model_params()
|
|
251
273
|
|
|
@@ -355,7 +377,7 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
355
377
|
return predictions
|
|
356
378
|
|
|
357
379
|
def _more_tags(self) -> dict:
|
|
358
|
-
return {"can_refit_full": True}
|
|
380
|
+
return {"allow_nan": True, "can_refit_full": True}
|
|
359
381
|
|
|
360
382
|
|
|
361
383
|
class DirectTabularModel(AbstractMLForecastModel):
|
|
@@ -328,8 +328,6 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
328
328
|
|
|
329
329
|
if self.num_feat_static_real > 0:
|
|
330
330
|
feat_static_real = time_series_df.static_features[self.metadata.static_features_real]
|
|
331
|
-
if feat_static_real.isna().values.any():
|
|
332
|
-
feat_static_real = feat_static_real.fillna(feat_static_real.mean())
|
|
333
331
|
else:
|
|
334
332
|
feat_static_real = None
|
|
335
333
|
|
|
@@ -548,3 +546,6 @@ class AbstractGluonTSModel(AbstractTimeSeriesModel):
|
|
|
548
546
|
|
|
549
547
|
forecast_df.index = forecast_index
|
|
550
548
|
return TimeSeriesDataFrame(forecast_df)
|
|
549
|
+
|
|
550
|
+
def _more_tags(self) -> dict:
|
|
551
|
+
return {"allow_nan": True, "can_use_val_data": True}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
3
|
from multiprocessing import TimeoutError, cpu_count
|
|
4
|
-
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
4
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
@@ -85,6 +85,12 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
85
85
|
self._local_model_args: Dict[str, Any] = None
|
|
86
86
|
self._seasonal_period: Optional[int] = None
|
|
87
87
|
self.time_limit: Optional[float] = None
|
|
88
|
+
self._dummy_forecast: Optional[pd.DataFrame] = None
|
|
89
|
+
|
|
90
|
+
def preprocess(self, data: TimeSeriesDataFrame, is_train: bool = False, **kwargs) -> Any:
|
|
91
|
+
if not self._get_tags()["allow_nan"]:
|
|
92
|
+
data = data.fill_missing_values()
|
|
93
|
+
return data
|
|
88
94
|
|
|
89
95
|
def _fit(self, train_data: TimeSeriesDataFrame, time_limit: Optional[int] = None, **kwargs):
|
|
90
96
|
self._check_fit_params()
|
|
@@ -115,8 +121,16 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
115
121
|
|
|
116
122
|
self._local_model_args = self._update_local_model_args(local_model_args=local_model_args)
|
|
117
123
|
self.time_limit = time_limit
|
|
124
|
+
|
|
125
|
+
self._dummy_forecast = self._get_dummy_forecast(train_data)
|
|
118
126
|
return self
|
|
119
127
|
|
|
128
|
+
def _get_dummy_forecast(self, train_data: TimeSeriesDataFrame) -> pd.DataFrame:
|
|
129
|
+
agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
|
|
130
|
+
stats_marginal = train_data[self.target].agg(agg_functions)
|
|
131
|
+
stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
|
|
132
|
+
return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
|
|
133
|
+
|
|
120
134
|
def _update_local_model_args(self, local_model_args: Dict[str, Any]) -> Dict[str, Any]:
|
|
121
135
|
return local_model_args
|
|
122
136
|
|
|
@@ -164,25 +178,30 @@ class AbstractLocalModel(AbstractTimeSeriesModel):
|
|
|
164
178
|
def _predict_wrapper(self, time_series: pd.Series, end_time: Optional[float] = None) -> Tuple[pd.DataFrame, bool]:
|
|
165
179
|
if end_time is not None and time.time() >= end_time:
|
|
166
180
|
raise TimeLimitExceeded
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
if self.use_fallback_model:
|
|
177
|
-
result = seasonal_naive_forecast(
|
|
178
|
-
target=time_series.values.ravel(),
|
|
179
|
-
prediction_length=self.prediction_length,
|
|
180
|
-
quantile_levels=self.quantile_levels,
|
|
181
|
-
seasonal_period=self._seasonal_period,
|
|
181
|
+
|
|
182
|
+
if time_series.isna().all():
|
|
183
|
+
result = self._dummy_forecast.copy()
|
|
184
|
+
model_failed = True
|
|
185
|
+
else:
|
|
186
|
+
try:
|
|
187
|
+
result = self._predict_with_local_model(
|
|
188
|
+
time_series=time_series,
|
|
189
|
+
local_model_args=self._local_model_args.copy(),
|
|
182
190
|
)
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
191
|
+
if not np.isfinite(result.values).all():
|
|
192
|
+
raise RuntimeError("Forecast contains NaN or Inf values.")
|
|
193
|
+
model_failed = False
|
|
194
|
+
except Exception:
|
|
195
|
+
if self.use_fallback_model:
|
|
196
|
+
result = seasonal_naive_forecast(
|
|
197
|
+
target=time_series.values.ravel(),
|
|
198
|
+
prediction_length=self.prediction_length,
|
|
199
|
+
quantile_levels=self.quantile_levels,
|
|
200
|
+
seasonal_period=self._seasonal_period,
|
|
201
|
+
)
|
|
202
|
+
model_failed = True
|
|
203
|
+
else:
|
|
204
|
+
raise
|
|
186
205
|
return result, model_failed
|
|
187
206
|
|
|
188
207
|
def _predict_with_local_model(
|
|
@@ -197,25 +216,51 @@ def seasonal_naive_forecast(
|
|
|
197
216
|
target: np.ndarray, prediction_length: int, quantile_levels: List[float], seasonal_period: int
|
|
198
217
|
) -> pd.DataFrame:
|
|
199
218
|
"""Generate seasonal naive forecast, predicting the last observed value from the same period."""
|
|
219
|
+
|
|
220
|
+
def numpy_ffill(arr: np.ndarray) -> np.ndarray:
|
|
221
|
+
"""Fast implementation of forward fill in numpy."""
|
|
222
|
+
idx = np.arange(len(arr))
|
|
223
|
+
mask = np.isnan(arr)
|
|
224
|
+
idx[mask] = 0
|
|
225
|
+
return arr[np.maximum.accumulate(idx)]
|
|
226
|
+
|
|
200
227
|
forecast = {}
|
|
228
|
+
# Convert to float64 since std computation can be unstable in float32
|
|
229
|
+
target = target.astype(np.float64)
|
|
201
230
|
# At least seasonal_period + 2 values are required to compute sigma for seasonal naive
|
|
202
231
|
if len(target) > seasonal_period + 1 and seasonal_period > 1:
|
|
232
|
+
if np.isnan(target[-(seasonal_period + 2) :]).any():
|
|
233
|
+
target = numpy_ffill(target)
|
|
234
|
+
|
|
203
235
|
indices = [len(target) - seasonal_period + k % seasonal_period for k in range(prediction_length)]
|
|
204
236
|
forecast["mean"] = target[indices]
|
|
205
237
|
residuals = target[seasonal_period:] - target[:-seasonal_period]
|
|
206
238
|
|
|
207
|
-
sigma = np.sqrt(np.
|
|
239
|
+
sigma = np.sqrt(np.nanmean(np.square(residuals)))
|
|
208
240
|
num_full_seasons = np.arange(1, prediction_length + 1) // seasonal_period
|
|
209
241
|
sigma_per_timestep = sigma * np.sqrt(num_full_seasons + 1)
|
|
210
242
|
else:
|
|
211
243
|
# Fall back to naive forecast
|
|
212
|
-
|
|
244
|
+
last_observed_value = target[np.isfinite(target)][-1]
|
|
245
|
+
forecast["mean"] = np.full(shape=[prediction_length], fill_value=last_observed_value)
|
|
213
246
|
residuals = target[1:] - target[:-1]
|
|
214
247
|
|
|
215
|
-
sigma = np.sqrt(np.
|
|
248
|
+
sigma = np.sqrt(np.nanmean(np.square(residuals)))
|
|
249
|
+
if np.isnan(sigma): # happens if there are no two consecutive non-nan observations
|
|
250
|
+
sigma = 0.0
|
|
216
251
|
sigma_per_timestep = sigma * np.sqrt(np.arange(1, prediction_length + 1))
|
|
217
252
|
|
|
218
253
|
for q in quantile_levels:
|
|
219
254
|
forecast[str(q)] = forecast["mean"] + norm.ppf(q) * sigma_per_timestep
|
|
220
255
|
|
|
221
256
|
return pd.DataFrame(forecast)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def get_quantile_function(q: float) -> Callable:
|
|
260
|
+
"""Returns a function with name "q" that computes the q'th quantile of a pandas.Series."""
|
|
261
|
+
|
|
262
|
+
def quantile_fn(x: pd.Series) -> pd.Series:
|
|
263
|
+
return x.quantile(q)
|
|
264
|
+
|
|
265
|
+
quantile_fn.__name__ = str(q)
|
|
266
|
+
return quantile_fn
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
from typing import Callable
|
|
2
|
-
|
|
3
1
|
import numpy as np
|
|
4
2
|
import pandas as pd
|
|
5
3
|
|
|
6
|
-
from autogluon.timeseries.models.local.abstract_local_model import
|
|
4
|
+
from autogluon.timeseries.models.local.abstract_local_model import (
|
|
5
|
+
AbstractLocalModel,
|
|
6
|
+
get_quantile_function,
|
|
7
|
+
seasonal_naive_forecast,
|
|
8
|
+
)
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
class NaiveModel(AbstractLocalModel):
|
|
@@ -36,6 +38,9 @@ class NaiveModel(AbstractLocalModel):
|
|
|
36
38
|
seasonal_period=1,
|
|
37
39
|
)
|
|
38
40
|
|
|
41
|
+
def _more_tags(self) -> dict:
|
|
42
|
+
return {"allow_nan": True}
|
|
43
|
+
|
|
39
44
|
|
|
40
45
|
class SeasonalNaiveModel(AbstractLocalModel):
|
|
41
46
|
"""Baseline model that sets the forecast equal to the last observed value from the same season.
|
|
@@ -75,15 +80,8 @@ class SeasonalNaiveModel(AbstractLocalModel):
|
|
|
75
80
|
seasonal_period=local_model_args["seasonal_period"],
|
|
76
81
|
)
|
|
77
82
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
"""Returns a function with name "q" that computes the q'th quantile of a pandas.Series."""
|
|
81
|
-
|
|
82
|
-
def quantile_fn(x: pd.Series) -> pd.Series:
|
|
83
|
-
return x.quantile(q)
|
|
84
|
-
|
|
85
|
-
quantile_fn.__name__ = str(q)
|
|
86
|
-
return quantile_fn
|
|
83
|
+
def _more_tags(self) -> dict:
|
|
84
|
+
return {"allow_nan": True}
|
|
87
85
|
|
|
88
86
|
|
|
89
87
|
class AverageModel(AbstractLocalModel):
|
|
@@ -109,11 +107,14 @@ class AverageModel(AbstractLocalModel):
|
|
|
109
107
|
time_series: pd.Series,
|
|
110
108
|
local_model_args: dict,
|
|
111
109
|
) -> pd.DataFrame:
|
|
112
|
-
agg_functions = ["mean"] + [
|
|
110
|
+
agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
|
|
113
111
|
stats_marginal = time_series.agg(agg_functions)
|
|
114
112
|
stats_repeated = np.tile(stats_marginal.values, [self.prediction_length, 1])
|
|
115
113
|
return pd.DataFrame(stats_repeated, columns=stats_marginal.index)
|
|
116
114
|
|
|
115
|
+
def _more_tags(self) -> dict:
|
|
116
|
+
return {"allow_nan": True}
|
|
117
|
+
|
|
117
118
|
|
|
118
119
|
class SeasonalAverageModel(AbstractLocalModel):
|
|
119
120
|
"""Baseline model that sets the forecast equal to the historic average or quantile in the same season.
|
|
@@ -146,7 +147,7 @@ class SeasonalAverageModel(AbstractLocalModel):
|
|
|
146
147
|
local_model_args: dict,
|
|
147
148
|
) -> pd.DataFrame:
|
|
148
149
|
seasonal_period = local_model_args["seasonal_period"]
|
|
149
|
-
agg_functions = ["mean"] + [
|
|
150
|
+
agg_functions = ["mean"] + [get_quantile_function(q) for q in self.quantile_levels]
|
|
150
151
|
|
|
151
152
|
# Compute mean & quantiles for each season
|
|
152
153
|
ts_df = time_series.reset_index(drop=True).to_frame()
|
|
@@ -162,3 +163,6 @@ class SeasonalAverageModel(AbstractLocalModel):
|
|
|
162
163
|
stats_marginal = time_series.agg(agg_functions)
|
|
163
164
|
result = result.fillna(stats_marginal)
|
|
164
165
|
return result
|
|
166
|
+
|
|
167
|
+
def _more_tags(self) -> dict:
|
|
168
|
+
return {"allow_nan": True}
|
|
@@ -204,6 +204,8 @@ class ARIMAModel(AbstractProbabilisticStatsForecastModel):
|
|
|
204
204
|
This significantly speeds up fitting and usually leads to no change in accuracy.
|
|
205
205
|
"""
|
|
206
206
|
|
|
207
|
+
# TODO: This model requires statsforecast >= 1.5.0, so it will only be available after we upgrade the dependency
|
|
208
|
+
|
|
207
209
|
allowed_local_model_args = [
|
|
208
210
|
"order",
|
|
209
211
|
"seasonal_order",
|
|
@@ -276,7 +276,7 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
|
|
|
276
276
|
data: Union[TimeSeriesDataFrame, pd.DataFrame, Path, str],
|
|
277
277
|
name: str = "data",
|
|
278
278
|
) -> TimeSeriesDataFrame:
|
|
279
|
-
"""Ensure that TimeSeriesDataFrame has a sorted index
|
|
279
|
+
"""Ensure that TimeSeriesDataFrame has a sorted index and a valid frequency.
|
|
280
280
|
|
|
281
281
|
If self.freq is None, then self.freq of the predictor will be set to the frequency of the data.
|
|
282
282
|
|
|
@@ -314,18 +314,6 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
|
|
|
314
314
|
if df.freq != self.freq:
|
|
315
315
|
logger.warning(f"{name} with frequency '{df.freq}' has been resampled to frequency '{self.freq}'.")
|
|
316
316
|
df = df.convert_frequency(freq=self.freq)
|
|
317
|
-
|
|
318
|
-
# Fill missing values
|
|
319
|
-
if df.isna().values.any():
|
|
320
|
-
# FIXME: Do not automatically fill NaNs here, handle missing values at the level of individual models.
|
|
321
|
-
# FIXME: Current solution leads to incorrect metric computation if missing values are present
|
|
322
|
-
logger.warning(
|
|
323
|
-
f"{name} contains missing values represented by NaN. "
|
|
324
|
-
f"They have been filled by carrying forward the last valid observation."
|
|
325
|
-
)
|
|
326
|
-
df = df.fill_missing_values()
|
|
327
|
-
if df.isna().values.any():
|
|
328
|
-
raise ValueError(f"Some time series in {name} consist completely of NaN values. Please remove them.")
|
|
329
317
|
return df
|
|
330
318
|
|
|
331
319
|
def _check_data_for_evaluation(self, data: TimeSeriesDataFrame, name: str = "data"):
|
|
@@ -337,15 +325,19 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
|
|
|
337
325
|
f"all time series have length > prediction_length (at least {self.prediction_length + 1})"
|
|
338
326
|
)
|
|
339
327
|
|
|
340
|
-
|
|
341
|
-
def _get_dataset_stats(data: TimeSeriesDataFrame) -> str:
|
|
328
|
+
def _get_dataset_stats(self, data: TimeSeriesDataFrame) -> str:
|
|
342
329
|
ts_lengths = data.num_timesteps_per_item()
|
|
343
|
-
median_length =
|
|
330
|
+
median_length = ts_lengths.median()
|
|
344
331
|
min_length = ts_lengths.min()
|
|
345
332
|
max_length = ts_lengths.max()
|
|
333
|
+
missing_value_fraction = data[self.target].isna().mean()
|
|
334
|
+
if missing_value_fraction > 0:
|
|
335
|
+
missing_value_fraction_str = f" (NaN fraction={missing_value_fraction:.1%})"
|
|
336
|
+
else:
|
|
337
|
+
missing_value_fraction_str = ""
|
|
346
338
|
return (
|
|
347
|
-
f"{len(data)} rows, {data.num_items} time series. "
|
|
348
|
-
f"Median time series length is {median_length} (min={min_length}, max={max_length}). "
|
|
339
|
+
f"{len(data)} rows{missing_value_fraction_str}, {data.num_items} time series. "
|
|
340
|
+
f"Median time series length is {median_length:.0f} (min={min_length}, max={max_length}). "
|
|
349
341
|
)
|
|
350
342
|
|
|
351
343
|
def _reduce_num_val_windows_if_necessary(
|
|
@@ -374,41 +366,45 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
|
|
|
374
366
|
)
|
|
375
367
|
return new_num_val_windows
|
|
376
368
|
|
|
377
|
-
def
|
|
369
|
+
def _filter_useless_train_data(
|
|
378
370
|
self,
|
|
379
371
|
train_data: TimeSeriesDataFrame,
|
|
380
372
|
num_val_windows: int,
|
|
381
373
|
val_step_size: int,
|
|
382
374
|
) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
|
|
383
|
-
"""Remove time series from train_data that are too short for chosen
|
|
375
|
+
"""Remove time series from train_data that either contain all NaNs or are too short for chosen settings.
|
|
384
376
|
|
|
385
|
-
This method ensures that
|
|
377
|
+
This method ensures that 1) no time series consist of all NaN values and 2) for each validation fold, all train
|
|
378
|
+
series have length >= max(prediction_length + 1, 5).
|
|
386
379
|
|
|
387
|
-
In other words, this method removes from train_data all time series with length less than
|
|
380
|
+
In other words, this method removes from train_data all time series with only NaN values or length less than
|
|
388
381
|
min_train_length + prediction_length + (num_val_windows - 1) * val_step_size
|
|
389
382
|
"""
|
|
390
383
|
min_length = self._min_train_length + self.prediction_length + (num_val_windows - 1) * val_step_size
|
|
391
|
-
|
|
392
384
|
train_lengths = train_data.num_timesteps_per_item()
|
|
393
|
-
|
|
394
|
-
|
|
385
|
+
too_short_items = train_lengths.index[train_lengths < min_length]
|
|
386
|
+
|
|
387
|
+
if len(too_short_items) > 0:
|
|
395
388
|
logger.info(
|
|
396
|
-
f"\tRemoving {len(
|
|
389
|
+
f"\tRemoving {len(too_short_items)} short time series from train_data. Only series with length "
|
|
397
390
|
f">= {min_length} will be used for training."
|
|
398
391
|
)
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
logger.info(
|
|
406
|
-
f"\tAfter removing short series, train_data has {self._get_dataset_stats(filtered_train_data)}"
|
|
407
|
-
)
|
|
408
|
-
else:
|
|
409
|
-
filtered_train_data = train_data
|
|
392
|
+
train_data = train_data.query("item_id not in @too_short_items")
|
|
393
|
+
|
|
394
|
+
all_nan_items = train_data.item_ids[train_data[self.target].isna().groupby(ITEMID, sort=False).all()]
|
|
395
|
+
if len(all_nan_items) > 0:
|
|
396
|
+
logger.info(f"\tRemoving {len(all_nan_items)} time series consisting of only NaN values from train_data.")
|
|
397
|
+
train_data = train_data.query("item_id not in @all_nan_items")
|
|
410
398
|
|
|
411
|
-
|
|
399
|
+
if len(too_short_items) or len(all_nan_items):
|
|
400
|
+
logger.info(f"\tAfter filtering, train_data has {self._get_dataset_stats(train_data)}")
|
|
401
|
+
|
|
402
|
+
if len(train_data) == 0:
|
|
403
|
+
raise ValueError(
|
|
404
|
+
f"At least some time series in train_data must have >= {min_length} observations. Please provide "
|
|
405
|
+
f"longer time series as train_data or reduce prediction_length, num_val_windows, or val_step_size."
|
|
406
|
+
)
|
|
407
|
+
return train_data
|
|
412
408
|
|
|
413
409
|
@apply_presets(TIMESERIES_PRESETS_CONFIGS)
|
|
414
410
|
def fit(
|
|
@@ -722,7 +718,7 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
|
|
|
722
718
|
raise ValueError("Please set num_val_windows >= 1 or provide custom tuning_data")
|
|
723
719
|
|
|
724
720
|
if not skip_model_selection:
|
|
725
|
-
train_data = self.
|
|
721
|
+
train_data = self._filter_useless_train_data(
|
|
726
722
|
train_data, num_val_windows=num_val_windows, val_step_size=val_step_size
|
|
727
723
|
)
|
|
728
724
|
|
|
@@ -28,9 +28,32 @@ class CovariateMetadata:
|
|
|
28
28
|
past_covariates_real: List[str] = field(default_factory=list)
|
|
29
29
|
past_covariates_cat: List[str] = field(default_factory=list)
|
|
30
30
|
|
|
31
|
+
@property
|
|
32
|
+
def known_covariates(self) -> List[str]:
|
|
33
|
+
return self.known_covariates_cat + self.known_covariates_real
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def past_covariates(self) -> List[str]:
|
|
37
|
+
return self.past_covariates_cat + self.past_covariates_real
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def covariates(self) -> List[str]:
|
|
41
|
+
return self.known_covariates + self.past_covariates
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def covariates_real(self) -> List[str]:
|
|
45
|
+
return self.known_covariates_real + self.past_covariates_real
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def covariates_cat(self) -> List[str]:
|
|
49
|
+
return self.known_covariates_cat + self.past_covariates_cat
|
|
50
|
+
|
|
31
51
|
|
|
32
52
|
class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
|
|
33
|
-
"""Generates categorical and continuous features for time series models.
|
|
53
|
+
"""Generates categorical and continuous features for time series models.
|
|
54
|
+
|
|
55
|
+
Imputes missing categorical features with the most frequent value in the training set.
|
|
56
|
+
"""
|
|
34
57
|
|
|
35
58
|
def __init__(self, verbosity: int = 0, minimum_cat_count=2, float_dtype: str = "float32", **kwargs):
|
|
36
59
|
generators = [
|
|
@@ -62,13 +85,22 @@ class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
|
|
|
62
85
|
# PipelineFeatureGenerator does not use transform() inside fit_transform(), so we need to override both methods
|
|
63
86
|
if isinstance(X, TimeSeriesDataFrame):
|
|
64
87
|
X = pd.DataFrame(X)
|
|
65
|
-
|
|
88
|
+
transformed = self._convert_numerical_columns_to_float(super().fit_transform(X, *args, **kwargs))
|
|
89
|
+
# Ignore the '__dummy__' feature generated by PipelineFeatureGenerator if none of the features are informative
|
|
90
|
+
return transformed.drop(columns=["__dummy__"], errors="ignore")
|
|
66
91
|
|
|
67
92
|
|
|
68
93
|
class TimeSeriesFeatureGenerator:
|
|
69
94
|
"""Takes care of preprocessing for static_features and past/known covariates.
|
|
70
95
|
|
|
71
96
|
All covariates & static features are converted into either float32 or categorical dtype.
|
|
97
|
+
|
|
98
|
+
Missing values in the target column are left as-is but missing values in static features & covariates are imputed.
|
|
99
|
+
Imputation logic is as follows:
|
|
100
|
+
1. For all categorical columns (static, past, known), we fill missing values with the mode of the training set.
|
|
101
|
+
2. For real static features, we impute missing values with the median of the training set.
|
|
102
|
+
3. For real covariates (past, known), we ffill + bfill within each time series. If for some time series all
|
|
103
|
+
covariate values are missing, we fill them with the median of the training set.
|
|
72
104
|
"""
|
|
73
105
|
|
|
74
106
|
def __init__(self, target: str, known_covariates_names: List[str], float_dtype: str = "float32"):
|
|
@@ -82,6 +114,8 @@ class TimeSeriesFeatureGenerator:
|
|
|
82
114
|
# Cat features with cat_count=1 are fine in static_features since they are repeated for all time steps in a TS
|
|
83
115
|
self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator(minimum_cat_count=1)
|
|
84
116
|
self.covariate_metadata: CovariateMetadata = None
|
|
117
|
+
self._train_covariates_real_median: Optional[pd.Series] = None
|
|
118
|
+
self._train_static_real_median: Optional[pd.Series] = None
|
|
85
119
|
|
|
86
120
|
@property
|
|
87
121
|
def required_column_names(self) -> List[str]:
|
|
@@ -129,6 +163,7 @@ class TimeSeriesFeatureGenerator:
|
|
|
129
163
|
logger.info("\tstatic_features:")
|
|
130
164
|
static_features_cat, static_features_real = self._detect_and_log_column_types(static_features_df)
|
|
131
165
|
ignored_static_features = data.static_features.columns.difference(self.static_feature_pipeline.features_in)
|
|
166
|
+
self._train_static_real_median = data.static_features[static_features_real].median()
|
|
132
167
|
else:
|
|
133
168
|
static_features_cat = []
|
|
134
169
|
static_features_real = []
|
|
@@ -154,6 +189,7 @@ class TimeSeriesFeatureGenerator:
|
|
|
154
189
|
static_features_cat=static_features_cat,
|
|
155
190
|
static_features_real=static_features_real,
|
|
156
191
|
)
|
|
192
|
+
self._train_covariates_real_median = data[self.covariate_metadata.covariates_real].median()
|
|
157
193
|
self._is_fit = True
|
|
158
194
|
|
|
159
195
|
def transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
|
|
@@ -180,10 +216,26 @@ class TimeSeriesFeatureGenerator:
|
|
|
180
216
|
if data.static_features is None:
|
|
181
217
|
raise ValueError(f"Provided {data_frame_name} must contain static_features")
|
|
182
218
|
static_features = self.static_feature_pipeline.transform(data.static_features)
|
|
219
|
+
static_real_names = self.covariate_metadata.static_features_real
|
|
220
|
+
# Fill missing static_features_real with the median of the training set
|
|
221
|
+
if static_real_names and static_features[static_real_names].isna().any(axis=None):
|
|
222
|
+
static_features[static_real_names] = static_features[static_real_names].fillna(
|
|
223
|
+
self._train_static_real_median
|
|
224
|
+
)
|
|
183
225
|
else:
|
|
184
226
|
static_features = None
|
|
185
227
|
|
|
186
|
-
|
|
228
|
+
ts_df = TimeSeriesDataFrame(pd.concat(dfs, axis=1), static_features=static_features)
|
|
229
|
+
|
|
230
|
+
covariates_names = self.covariate_metadata.covariates
|
|
231
|
+
if len(covariates_names) > 0:
|
|
232
|
+
# ffill + bfill covariates that have at least some observed values
|
|
233
|
+
ts_df[covariates_names] = ts_df[covariates_names].fill_missing_values()
|
|
234
|
+
# If for some items covariates consist completely of NaNs, fill them with median of training data
|
|
235
|
+
if ts_df[covariates_names].isna().any(axis=None):
|
|
236
|
+
ts_df[covariates_names] = ts_df[covariates_names].fillna(self._train_covariates_real_median)
|
|
237
|
+
|
|
238
|
+
return ts_df
|
|
187
239
|
|
|
188
240
|
def transform_future_known_covariates(
|
|
189
241
|
self, known_covariates: Optional[TimeSeriesDataFrame]
|
|
@@ -194,7 +246,13 @@ class TimeSeriesFeatureGenerator:
|
|
|
194
246
|
self._check_required_columns_are_present(
|
|
195
247
|
known_covariates, required_column_names=self.known_covariates_names, data_frame_name="known_covariates"
|
|
196
248
|
)
|
|
197
|
-
|
|
249
|
+
known_covariates = TimeSeriesDataFrame(self.known_covariates_pipeline.transform(known_covariates))
|
|
250
|
+
# ffill + bfill covariates that have at least some observed values
|
|
251
|
+
known_covariates = known_covariates.fill_missing_values()
|
|
252
|
+
# If for some items covariates consist completely of NaNs, fill them with median of training data
|
|
253
|
+
if known_covariates.isna().any(axis=None):
|
|
254
|
+
known_covariates = known_covariates.fillna(self._train_covariates_real_median)
|
|
255
|
+
return known_covariates
|
|
198
256
|
else:
|
|
199
257
|
return None
|
|
200
258
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: autogluon.timeseries
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1b20240403
|
|
4
4
|
Summary: AutoML for Image, Text, and Tabular Data
|
|
5
5
|
Home-page: https://github.com/autogluon/autogluon
|
|
6
6
|
Author: AutoGluon Community
|
|
@@ -26,7 +26,7 @@ Description:
|
|
|
26
26
|
[](https://github.com/autogluon/autogluon/actions/workflows/continuous_integration.yml)
|
|
27
27
|
[](https://github.com/autogluon/autogluon/actions/workflows/platform_tests-command.yml)
|
|
28
28
|
|
|
29
|
-
[
|
|
29
|
+
[Installation](https://auto.gluon.ai/stable/install.html) | [Documentation](https://auto.gluon.ai/stable/index.html) | [Release Notes](https://auto.gluon.ai/stable/whats_new/index.html)
|
|
30
30
|
|
|
31
31
|
AutoGluon automates machine learning tasks enabling you to easily achieve strong predictive performance in your applications. With just a few lines of code, you can train and deploy high-accuracy machine learning and deep learning models on image, text, time series, and tabular data.
|
|
32
32
|
</div>
|
|
@@ -16,9 +16,9 @@ utilsforecast<0.0.11,>=0.0.10
|
|
|
16
16
|
tqdm<5,>=4.38
|
|
17
17
|
orjson~=3.9
|
|
18
18
|
tensorboard<3,>=2.9
|
|
19
|
-
autogluon.core[raytune]==1.0.
|
|
20
|
-
autogluon.common==1.0.
|
|
21
|
-
autogluon.tabular[catboost,lightgbm,xgboost]==1.0.
|
|
19
|
+
autogluon.core[raytune]==1.0.1b20240403
|
|
20
|
+
autogluon.common==1.0.1b20240403
|
|
21
|
+
autogluon.tabular[catboost,lightgbm,xgboost]==1.0.1b20240403
|
|
22
22
|
|
|
23
23
|
[all]
|
|
24
24
|
optimum[onnxruntime]<1.18,>=1.17
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|