autogluon.timeseries 1.0.1b20240301__tar.gz → 1.0.1b20240303__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/PKG-INFO +1 -1
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py +0 -3
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/predictor.py +1 -1
- autogluon.timeseries-1.0.1b20240303/src/autogluon/timeseries/utils/features.py +228 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/PKG-INFO +1 -1
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/requires.txt +3 -3
- autogluon.timeseries-1.0.1b20240301/src/autogluon/timeseries/utils/features.py +0 -194
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/setup.cfg +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/setup.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/configs/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/configs/presets_configs.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/dataset/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/dataset/ts_dataframe.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/evaluator.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/learner.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/metrics/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/metrics/abstract.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/metrics/point.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/metrics/quantile.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/metrics/utils.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/abstract/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/abstract/model_trial.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/autogluon_tabular/utils.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/ensemble/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/gluonts/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/gluonts/torch/models.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/local/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/local/abstract_local_model.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/local/naive.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/local/npts.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/local/statsforecast.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/multi_window/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/multi_window/multi_window_model.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/presets.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/splitter.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/trainer/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/trainer/abstract_trainer.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/trainer/auto_trainer.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/datetime/__init__.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/datetime/base.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/datetime/lags.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/datetime/seasonality.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/datetime/time_features.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/forecast.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/warning_filters.py +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/SOURCES.txt +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/dependency_links.txt +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/top_level.txt +0 -0
- {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/zip-safe +0 -0
|
@@ -231,9 +231,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
|
231
231
|
if static_features is not None:
|
|
232
232
|
df = pd.merge(df, static_features, how="left", on=ITEMID, suffixes=(None, "_static_feat"))
|
|
233
233
|
|
|
234
|
-
# Convert float64 to float32 to reduce memory usage
|
|
235
|
-
float64_cols = list(df.select_dtypes(include="float64"))
|
|
236
|
-
df[float64_cols] = df[float64_cols].astype("float32")
|
|
237
234
|
# We assume that df is sorted by 'unique_id' inside `TimeSeriesPredictor._check_and_prepare_data_frame`
|
|
238
235
|
return df.rename(columns=column_name_mapping)
|
|
239
236
|
|
|
@@ -293,7 +293,7 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
|
|
|
293
293
|
Preprocessed data in TimeSeriesDataFrame format.
|
|
294
294
|
"""
|
|
295
295
|
df = self._to_data_frame(data, name=name)
|
|
296
|
-
df = df.astype({self.target:
|
|
296
|
+
df = df.astype({self.target: "float32"})
|
|
297
297
|
# MultiIndex.is_monotonic_increasing checks if index is sorted by ["item_id", "timestamp"]
|
|
298
298
|
if not df.index.is_monotonic_increasing:
|
|
299
299
|
df = df.sort_index()
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import reprlib
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import List, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from autogluon.common.features.types import R_FLOAT, R_INT
|
|
9
|
+
from autogluon.features.generators import (
|
|
10
|
+
AsTypeFeatureGenerator,
|
|
11
|
+
CategoryFeatureGenerator,
|
|
12
|
+
IdentityFeatureGenerator,
|
|
13
|
+
PipelineFeatureGenerator,
|
|
14
|
+
)
|
|
15
|
+
from autogluon.timeseries import TimeSeriesDataFrame
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class CovariateMetadata:
|
|
22
|
+
"""Provides mapping from different covariate types to columns in the dataset."""
|
|
23
|
+
|
|
24
|
+
static_features_cat: List[str] = field(default_factory=list)
|
|
25
|
+
static_features_real: List[str] = field(default_factory=list)
|
|
26
|
+
known_covariates_real: List[str] = field(default_factory=list)
|
|
27
|
+
known_covariates_cat: List[str] = field(default_factory=list)
|
|
28
|
+
past_covariates_real: List[str] = field(default_factory=list)
|
|
29
|
+
past_covariates_cat: List[str] = field(default_factory=list)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
|
|
33
|
+
"""Generates categorical and continuous features for time series models."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, verbosity: int = 0, minimum_cat_count=2, float_dtype: str = "float32", **kwargs):
|
|
36
|
+
generators = [
|
|
37
|
+
CategoryFeatureGenerator(minimum_cat_count=minimum_cat_count, fillna="mode"),
|
|
38
|
+
IdentityFeatureGenerator(infer_features_in_args={"valid_raw_types": [R_INT, R_FLOAT]}),
|
|
39
|
+
]
|
|
40
|
+
super().__init__(
|
|
41
|
+
generators=[generators],
|
|
42
|
+
post_generators=[],
|
|
43
|
+
pre_generators=[AsTypeFeatureGenerator(convert_bool=False)],
|
|
44
|
+
pre_enforce_types=False,
|
|
45
|
+
pre_drop_useless=False,
|
|
46
|
+
verbosity=verbosity,
|
|
47
|
+
**kwargs,
|
|
48
|
+
)
|
|
49
|
+
self.float_dtype = float_dtype
|
|
50
|
+
|
|
51
|
+
def _convert_numerical_columns_to_float(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
52
|
+
"""Convert the dtype of all numerical (float or int) columns to the given float dtype."""
|
|
53
|
+
numeric_columns = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
|
|
54
|
+
return df.astype({col: self.float_dtype for col in numeric_columns})
|
|
55
|
+
|
|
56
|
+
def transform(self, X: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
|
|
57
|
+
if isinstance(X, TimeSeriesDataFrame):
|
|
58
|
+
X = pd.DataFrame(X)
|
|
59
|
+
return self._convert_numerical_columns_to_float(super().transform(X, *args, **kwargs))
|
|
60
|
+
|
|
61
|
+
def fit_transform(self, X: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
|
|
62
|
+
# PipelineFeatureGenerator does not use transform() inside fit_transform(), so we need to override both methods
|
|
63
|
+
if isinstance(X, TimeSeriesDataFrame):
|
|
64
|
+
X = pd.DataFrame(X)
|
|
65
|
+
return self._convert_numerical_columns_to_float(super().fit_transform(X, *args, **kwargs))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class TimeSeriesFeatureGenerator:
|
|
69
|
+
"""Takes care of preprocessing for static_features and past/known covariates.
|
|
70
|
+
|
|
71
|
+
All covariates & static features are converted into either float32 or categorical dtype.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(self, target: str, known_covariates_names: List[str], float_dtype: str = "float32"):
|
|
75
|
+
self.target = target
|
|
76
|
+
self.float_dtype = float_dtype
|
|
77
|
+
self._is_fit = False
|
|
78
|
+
self.known_covariates_names = list(known_covariates_names)
|
|
79
|
+
self.past_covariates_names = []
|
|
80
|
+
self.known_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
|
|
81
|
+
self.past_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
|
|
82
|
+
# Cat features with cat_count=1 are fine in static_features since they are repeated for all time steps in a TS
|
|
83
|
+
self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator(minimum_cat_count=1)
|
|
84
|
+
self.covariate_metadata: CovariateMetadata = None
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def required_column_names(self) -> List[str]:
|
|
88
|
+
return [self.target] + list(self.known_covariates_names) + list(self.past_covariates_names)
|
|
89
|
+
|
|
90
|
+
def fit(self, data: TimeSeriesDataFrame) -> None:
|
|
91
|
+
assert not self._is_fit, f"{self.__class__.__name__} has already been fit"
|
|
92
|
+
|
|
93
|
+
self.past_covariates_names = []
|
|
94
|
+
for column in data.columns:
|
|
95
|
+
if column != self.target and column not in self.known_covariates_names:
|
|
96
|
+
self.past_covariates_names.append(column)
|
|
97
|
+
|
|
98
|
+
self._check_required_columns_are_present(
|
|
99
|
+
data, required_column_names=self.required_column_names, data_frame_name="train_data"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
logger.info("\nProvided data contains following columns:")
|
|
103
|
+
logger.info(f"\ttarget: '{self.target}'")
|
|
104
|
+
|
|
105
|
+
if len(self.known_covariates_names) > 0:
|
|
106
|
+
known_covariates_df = self.known_covariates_pipeline.fit_transform(data[self.known_covariates_names])
|
|
107
|
+
logger.info("\tknown_covariates:")
|
|
108
|
+
known_covariates_cat, known_covariates_real = self._detect_and_log_column_types(known_covariates_df)
|
|
109
|
+
self.known_covariates_names = self.known_covariates_pipeline.features_in
|
|
110
|
+
else:
|
|
111
|
+
known_covariates_cat = []
|
|
112
|
+
known_covariates_real = []
|
|
113
|
+
|
|
114
|
+
if len(self.past_covariates_names) > 0:
|
|
115
|
+
past_covariates_df = self.past_covariates_pipeline.fit_transform(data[self.past_covariates_names])
|
|
116
|
+
logger.info("\tpast_covariates:")
|
|
117
|
+
past_covariates_cat, past_covariates_real = self._detect_and_log_column_types(past_covariates_df)
|
|
118
|
+
self.past_covariates_names = self.past_covariates_pipeline.features_in
|
|
119
|
+
else:
|
|
120
|
+
past_covariates_cat = []
|
|
121
|
+
past_covariates_real = []
|
|
122
|
+
|
|
123
|
+
ignored_covariates = data.columns.difference(
|
|
124
|
+
[self.target] + self.known_covariates_names + self.past_covariates_names
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if data.static_features is not None:
|
|
128
|
+
static_features_df = self.static_feature_pipeline.fit_transform(data.static_features)
|
|
129
|
+
logger.info("\tstatic_features:")
|
|
130
|
+
static_features_cat, static_features_real = self._detect_and_log_column_types(static_features_df)
|
|
131
|
+
ignored_static_features = data.static_features.columns.difference(self.static_feature_pipeline.features_in)
|
|
132
|
+
else:
|
|
133
|
+
static_features_cat = []
|
|
134
|
+
static_features_real = []
|
|
135
|
+
ignored_static_features = []
|
|
136
|
+
|
|
137
|
+
if len(ignored_covariates) > 0 or len(ignored_static_features) > 0:
|
|
138
|
+
logger.info("\nAutoGluon will ignore following non-numeric/non-informative columns:")
|
|
139
|
+
if len(ignored_covariates) > 0:
|
|
140
|
+
logger.info(f"\tignored covariates: {list(ignored_covariates)}")
|
|
141
|
+
if len(ignored_static_features) > 0:
|
|
142
|
+
logger.info(f"\tignored static_features: {list(ignored_static_features)}")
|
|
143
|
+
|
|
144
|
+
if len(data.columns) > 1 or data.static_features is not None:
|
|
145
|
+
logger.info(
|
|
146
|
+
"\nTo learn how to fix incorrectly inferred types, please see documentation for TimeSeriesPredictor.fit"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
self.covariate_metadata = CovariateMetadata(
|
|
150
|
+
known_covariates_cat=known_covariates_cat,
|
|
151
|
+
known_covariates_real=known_covariates_real,
|
|
152
|
+
past_covariates_cat=past_covariates_cat,
|
|
153
|
+
past_covariates_real=past_covariates_real,
|
|
154
|
+
static_features_cat=static_features_cat,
|
|
155
|
+
static_features_real=static_features_real,
|
|
156
|
+
)
|
|
157
|
+
self._is_fit = True
|
|
158
|
+
|
|
159
|
+
def transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
|
|
160
|
+
"""Transform static features and past/known covariates.
|
|
161
|
+
|
|
162
|
+
Transformed data is guaranteed to match the specification (same columns / dtypes) of the data seen during fit.
|
|
163
|
+
Extra columns not seen during fitting will be removed.
|
|
164
|
+
|
|
165
|
+
If some columns are missing or are incompatible, an exception will be raised.
|
|
166
|
+
"""
|
|
167
|
+
assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
|
|
168
|
+
self._check_required_columns_are_present(
|
|
169
|
+
data, required_column_names=self.required_column_names, data_frame_name=data_frame_name
|
|
170
|
+
)
|
|
171
|
+
dfs = [data[[self.target]]]
|
|
172
|
+
|
|
173
|
+
if len(self.known_covariates_names) > 0:
|
|
174
|
+
dfs.append(self.known_covariates_pipeline.transform(data[self.known_covariates_names]))
|
|
175
|
+
|
|
176
|
+
if len(self.past_covariates_names) > 0:
|
|
177
|
+
dfs.append(self.past_covariates_pipeline.transform(data[self.past_covariates_names]))
|
|
178
|
+
|
|
179
|
+
if self.static_feature_pipeline.is_fit():
|
|
180
|
+
if data.static_features is None:
|
|
181
|
+
raise ValueError(f"Provided {data_frame_name} must contain static_features")
|
|
182
|
+
static_features = self.static_feature_pipeline.transform(data.static_features)
|
|
183
|
+
else:
|
|
184
|
+
static_features = None
|
|
185
|
+
|
|
186
|
+
return TimeSeriesDataFrame(pd.concat(dfs, axis=1), static_features=static_features)
|
|
187
|
+
|
|
188
|
+
def transform_future_known_covariates(
|
|
189
|
+
self, known_covariates: Optional[TimeSeriesDataFrame]
|
|
190
|
+
) -> Optional[TimeSeriesDataFrame]:
|
|
191
|
+
assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
|
|
192
|
+
if len(self.known_covariates_names) > 0:
|
|
193
|
+
assert known_covariates is not None, "known_covariates must be provided at prediction time"
|
|
194
|
+
self._check_required_columns_are_present(
|
|
195
|
+
known_covariates, required_column_names=self.known_covariates_names, data_frame_name="known_covariates"
|
|
196
|
+
)
|
|
197
|
+
return TimeSeriesDataFrame(self.known_covariates_pipeline.transform(known_covariates))
|
|
198
|
+
else:
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
def fit_transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
|
|
202
|
+
self.fit(data)
|
|
203
|
+
return self.transform(data, data_frame_name=data_frame_name)
|
|
204
|
+
|
|
205
|
+
@staticmethod
|
|
206
|
+
def _detect_and_log_column_types(transformed_df: pd.DataFrame) -> Tuple[List[str], List[str]]:
|
|
207
|
+
"""Log & return names of categorical and real-valued columns in the DataFrame."""
|
|
208
|
+
cat_column_names = []
|
|
209
|
+
real_column_names = []
|
|
210
|
+
for column_name, column_dtype in transformed_df.dtypes.items():
|
|
211
|
+
if isinstance(column_dtype, pd.CategoricalDtype):
|
|
212
|
+
cat_column_names.append(column_name)
|
|
213
|
+
elif pd.api.types.is_numeric_dtype(column_dtype):
|
|
214
|
+
real_column_names.append(column_name)
|
|
215
|
+
|
|
216
|
+
logger.info(f"\t\tcategorical: {reprlib.repr(cat_column_names)}")
|
|
217
|
+
logger.info(f"\t\tcontinuous (float): {reprlib.repr(real_column_names)}")
|
|
218
|
+
return cat_column_names, real_column_names
|
|
219
|
+
|
|
220
|
+
@staticmethod
|
|
221
|
+
def _check_required_columns_are_present(
|
|
222
|
+
data: TimeSeriesDataFrame, required_column_names: List[str], data_frame_name: str
|
|
223
|
+
) -> None:
|
|
224
|
+
missing_columns = pd.Index(required_column_names).difference(data.columns)
|
|
225
|
+
if len(missing_columns) > 0:
|
|
226
|
+
raise ValueError(
|
|
227
|
+
f"{len(missing_columns)} columns are missing from {data_frame_name}: {reprlib.repr(missing_columns.to_list())}"
|
|
228
|
+
)
|
|
@@ -14,9 +14,9 @@ utilsforecast<0.0.11,>=0.0.10
|
|
|
14
14
|
tqdm<5,>=4.38
|
|
15
15
|
orjson~=3.9
|
|
16
16
|
tensorboard<3,>=2.9
|
|
17
|
-
autogluon.core[raytune]==1.0.
|
|
18
|
-
autogluon.common==1.0.
|
|
19
|
-
autogluon.tabular[catboost,lightgbm,xgboost]==1.0.
|
|
17
|
+
autogluon.core[raytune]==1.0.1b20240303
|
|
18
|
+
autogluon.common==1.0.1b20240303
|
|
19
|
+
autogluon.tabular[catboost,lightgbm,xgboost]==1.0.1b20240303
|
|
20
20
|
|
|
21
21
|
[all]
|
|
22
22
|
|
|
@@ -1,194 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import reprlib
|
|
3
|
-
from dataclasses import dataclass, field
|
|
4
|
-
from typing import List, Optional
|
|
5
|
-
|
|
6
|
-
import numpy as np
|
|
7
|
-
import pandas as pd
|
|
8
|
-
|
|
9
|
-
from autogluon.common.features.types import R_FLOAT, R_INT
|
|
10
|
-
from autogluon.features.generators import (
|
|
11
|
-
AsTypeFeatureGenerator,
|
|
12
|
-
CategoryFeatureGenerator,
|
|
13
|
-
IdentityFeatureGenerator,
|
|
14
|
-
PipelineFeatureGenerator,
|
|
15
|
-
)
|
|
16
|
-
from autogluon.timeseries import TimeSeriesDataFrame
|
|
17
|
-
|
|
18
|
-
logger = logging.getLogger(__name__)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@dataclass
|
|
22
|
-
class CovariateMetadata:
|
|
23
|
-
"""Provides mapping from different covariate types to columns in the dataset."""
|
|
24
|
-
|
|
25
|
-
static_features_cat: List[str] = field(default_factory=list)
|
|
26
|
-
static_features_real: List[str] = field(default_factory=list)
|
|
27
|
-
known_covariates_real: List[str] = field(default_factory=list)
|
|
28
|
-
known_covariates_cat: List[str] = field(default_factory=list)
|
|
29
|
-
past_covariates_real: List[str] = field(default_factory=list)
|
|
30
|
-
past_covariates_cat: List[str] = field(default_factory=list)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
|
|
34
|
-
"""Generates categorical and continuous features for time series models."""
|
|
35
|
-
|
|
36
|
-
def __init__(self, verbosity: int = 0, **kwargs):
|
|
37
|
-
generators = [
|
|
38
|
-
CategoryFeatureGenerator(minimum_cat_count=1, fillna="mode"),
|
|
39
|
-
IdentityFeatureGenerator(infer_features_in_args={"valid_raw_types": [R_INT, R_FLOAT]}),
|
|
40
|
-
]
|
|
41
|
-
super().__init__(
|
|
42
|
-
generators=[generators],
|
|
43
|
-
post_generators=[],
|
|
44
|
-
pre_generators=[AsTypeFeatureGenerator(convert_bool=False)],
|
|
45
|
-
pre_enforce_types=False,
|
|
46
|
-
pre_drop_useless=False,
|
|
47
|
-
verbosity=verbosity,
|
|
48
|
-
**kwargs,
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
class TimeSeriesFeatureGenerator:
|
|
53
|
-
"""Takes care of preprocessing for static_features and past/known covariates.
|
|
54
|
-
|
|
55
|
-
Covariates are all converted to float dtype. Static features, if present, are all converted to categorical & float
|
|
56
|
-
dtypes.
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
def __init__(self, target: str, known_covariates_names: List[str]):
|
|
60
|
-
self.target = target
|
|
61
|
-
self._is_fit = False
|
|
62
|
-
self.known_covariates_names = list(known_covariates_names)
|
|
63
|
-
self.past_covariates_names = []
|
|
64
|
-
self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator()
|
|
65
|
-
self.covariate_metadata: CovariateMetadata = None
|
|
66
|
-
|
|
67
|
-
@property
|
|
68
|
-
def required_column_names(self) -> List[str]:
|
|
69
|
-
return [self.target] + list(self.known_covariates_names) + list(self.past_covariates_names)
|
|
70
|
-
|
|
71
|
-
@staticmethod
|
|
72
|
-
def _convert_numerical_features_to_float(df: pd.DataFrame, float_dtype=np.float64) -> pd.DataFrame:
|
|
73
|
-
"""In-place convert the dtype of all numerical (float or int) columns to the given float dtype."""
|
|
74
|
-
numeric_columns = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
|
|
75
|
-
df[numeric_columns] = df[numeric_columns].astype(float_dtype)
|
|
76
|
-
return df
|
|
77
|
-
|
|
78
|
-
def fit(self, data: TimeSeriesDataFrame) -> None:
|
|
79
|
-
assert not self._is_fit, f"{self.__class__.__name__} has already been fit"
|
|
80
|
-
|
|
81
|
-
self.past_covariates_names = []
|
|
82
|
-
for column in data.columns:
|
|
83
|
-
if column != self.target and column not in self.known_covariates_names:
|
|
84
|
-
self.past_covariates_names.append(column)
|
|
85
|
-
|
|
86
|
-
logger.info("\nProvided dataset contains following columns:")
|
|
87
|
-
logger.info(f"\ttarget: '{self.target}'")
|
|
88
|
-
if len(self.known_covariates_names) > 0:
|
|
89
|
-
logger.info(f"\tknown covariates: {self.known_covariates_names}")
|
|
90
|
-
if len(self.past_covariates_names) > 0:
|
|
91
|
-
logger.info(f"\tpast covariates: {self.past_covariates_names}")
|
|
92
|
-
|
|
93
|
-
static_features_cat = []
|
|
94
|
-
static_features_real = []
|
|
95
|
-
if data.static_features is not None:
|
|
96
|
-
static = self.static_feature_pipeline.fit_transform(data.static_features)
|
|
97
|
-
static = self._convert_numerical_features_to_float(static)
|
|
98
|
-
|
|
99
|
-
unused = []
|
|
100
|
-
for col_name in data.static_features.columns:
|
|
101
|
-
if col_name in static.columns and static[col_name].dtype == "category":
|
|
102
|
-
static_features_cat.append(col_name)
|
|
103
|
-
elif col_name in static.columns and static[col_name].dtype == np.float64:
|
|
104
|
-
static_features_real.append(col_name)
|
|
105
|
-
else:
|
|
106
|
-
unused.append(col_name)
|
|
107
|
-
|
|
108
|
-
logger.info("Following types of static features have been inferred:")
|
|
109
|
-
logger.info(f"\tcategorical: {static_features_cat}")
|
|
110
|
-
logger.info(f"\tcontinuous (float): {static_features_real}")
|
|
111
|
-
if len(unused) > 0:
|
|
112
|
-
logger.info(f"\tremoved (uninformative columns): {unused}")
|
|
113
|
-
logger.info(
|
|
114
|
-
"To learn how to fix incorrectly inferred types, please see documentation for TimeSeriesPredictor.fit "
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
self.covariate_metadata = CovariateMetadata(
|
|
118
|
-
static_features_cat=static_features_cat,
|
|
119
|
-
static_features_real=static_features_real,
|
|
120
|
-
known_covariates_real=self.known_covariates_names,
|
|
121
|
-
past_covariates_real=self.past_covariates_names,
|
|
122
|
-
# TODO: Categorical time-varying covariates are not yet supported
|
|
123
|
-
known_covariates_cat=[],
|
|
124
|
-
past_covariates_cat=[],
|
|
125
|
-
)
|
|
126
|
-
self._is_fit = True
|
|
127
|
-
|
|
128
|
-
@staticmethod
|
|
129
|
-
def _check_and_prepare_covariates(
|
|
130
|
-
data: TimeSeriesDataFrame,
|
|
131
|
-
required_column_names: List[str],
|
|
132
|
-
data_frame_name: str,
|
|
133
|
-
) -> TimeSeriesDataFrame:
|
|
134
|
-
"""Select the required dataframe columns and convert them to float64 dtype."""
|
|
135
|
-
missing_columns = pd.Index(required_column_names).difference(data.columns)
|
|
136
|
-
if len(missing_columns) > 0:
|
|
137
|
-
raise ValueError(
|
|
138
|
-
f"{len(missing_columns)} columns are missing from {data_frame_name}: {reprlib.repr(missing_columns.to_list())}"
|
|
139
|
-
)
|
|
140
|
-
data = data[required_column_names]
|
|
141
|
-
try:
|
|
142
|
-
data = data.astype(np.float64)
|
|
143
|
-
except ValueError:
|
|
144
|
-
raise ValueError(
|
|
145
|
-
f"Columns in {data_frame_name} must all have numeric (float or int) dtypes, "
|
|
146
|
-
f"but in provided data they have dtypes {data.dtypes}"
|
|
147
|
-
)
|
|
148
|
-
return data
|
|
149
|
-
|
|
150
|
-
def transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
|
|
151
|
-
"""Transform static features and past/known covariates.
|
|
152
|
-
|
|
153
|
-
Transformed data is guaranteed to match the specification (same columns / dtypes) of the data seen during fit.
|
|
154
|
-
Extra columns not seen during fitting will be removed.
|
|
155
|
-
|
|
156
|
-
If some columns are missing or are incompatible, an exception will be raised.
|
|
157
|
-
"""
|
|
158
|
-
assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
|
|
159
|
-
# Avoid modifying inplace
|
|
160
|
-
data = data.copy(deep=False)
|
|
161
|
-
|
|
162
|
-
data = self._check_and_prepare_covariates(
|
|
163
|
-
data=data,
|
|
164
|
-
required_column_names=self.required_column_names,
|
|
165
|
-
data_frame_name=data_frame_name,
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
if self.static_feature_pipeline.is_fit():
|
|
169
|
-
if data.static_features is None:
|
|
170
|
-
raise ValueError(f"Provided {data_frame_name} must contain static_features")
|
|
171
|
-
static_features = self.static_feature_pipeline.transform(data.static_features)
|
|
172
|
-
data.static_features = self._convert_numerical_features_to_float(static_features)
|
|
173
|
-
else:
|
|
174
|
-
data.static_features = None
|
|
175
|
-
|
|
176
|
-
return data
|
|
177
|
-
|
|
178
|
-
def transform_future_known_covariates(
|
|
179
|
-
self, known_covariates: Optional[TimeSeriesDataFrame]
|
|
180
|
-
) -> Optional[TimeSeriesDataFrame]:
|
|
181
|
-
assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
|
|
182
|
-
if len(self.known_covariates_names) > 0:
|
|
183
|
-
assert known_covariates is not None, "known_covariates must be provided at prediction time"
|
|
184
|
-
return self._check_and_prepare_covariates(
|
|
185
|
-
known_covariates,
|
|
186
|
-
required_column_names=self.known_covariates_names,
|
|
187
|
-
data_frame_name="known_covariates",
|
|
188
|
-
)
|
|
189
|
-
else:
|
|
190
|
-
return None
|
|
191
|
-
|
|
192
|
-
def fit_transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
|
|
193
|
-
self.fit(data)
|
|
194
|
-
return self.transform(data, data_frame_name=data_frame_name)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|