autogluon.timeseries 1.0.1b20240301__tar.gz → 1.0.1b20240303__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (60) hide show
  1. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/PKG-INFO +1 -1
  2. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py +0 -3
  3. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/predictor.py +1 -1
  4. autogluon.timeseries-1.0.1b20240303/src/autogluon/timeseries/utils/features.py +228 -0
  5. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/version.py +1 -1
  6. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/PKG-INFO +1 -1
  7. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/requires.txt +3 -3
  8. autogluon.timeseries-1.0.1b20240301/src/autogluon/timeseries/utils/features.py +0 -194
  9. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/setup.cfg +0 -0
  10. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/setup.py +0 -0
  11. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/__init__.py +0 -0
  12. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/configs/__init__.py +0 -0
  13. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/configs/presets_configs.py +0 -0
  14. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/dataset/__init__.py +0 -0
  15. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/dataset/ts_dataframe.py +0 -0
  16. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/evaluator.py +0 -0
  17. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/learner.py +0 -0
  18. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/metrics/__init__.py +0 -0
  19. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/metrics/abstract.py +0 -0
  20. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/metrics/point.py +0 -0
  21. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/metrics/quantile.py +0 -0
  22. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/metrics/utils.py +0 -0
  23. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/__init__.py +0 -0
  24. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/abstract/__init__.py +0 -0
  25. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py +0 -0
  26. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/abstract/model_trial.py +0 -0
  27. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +0 -0
  28. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/autogluon_tabular/utils.py +0 -0
  29. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/ensemble/__init__.py +0 -0
  30. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -0
  31. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -0
  32. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/gluonts/__init__.py +0 -0
  33. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -0
  34. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  35. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/gluonts/torch/models.py +0 -0
  36. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/local/__init__.py +0 -0
  37. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/local/abstract_local_model.py +0 -0
  38. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/local/naive.py +0 -0
  39. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/local/npts.py +0 -0
  40. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/local/statsforecast.py +0 -0
  41. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/multi_window/__init__.py +0 -0
  42. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/multi_window/multi_window_model.py +0 -0
  43. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/models/presets.py +0 -0
  44. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/splitter.py +0 -0
  45. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/trainer/__init__.py +0 -0
  46. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/trainer/abstract_trainer.py +0 -0
  47. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/trainer/auto_trainer.py +0 -0
  48. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/__init__.py +0 -0
  49. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/datetime/__init__.py +0 -0
  50. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/datetime/base.py +0 -0
  51. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/datetime/lags.py +0 -0
  52. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/datetime/seasonality.py +0 -0
  53. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/datetime/time_features.py +0 -0
  54. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/forecast.py +0 -0
  55. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon/timeseries/utils/warning_filters.py +0 -0
  56. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/SOURCES.txt +0 -0
  57. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/dependency_links.txt +0 -0
  58. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/namespace_packages.txt +0 -0
  59. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/top_level.txt +0 -0
  60. {autogluon.timeseries-1.0.1b20240301 → autogluon.timeseries-1.0.1b20240303}/src/autogluon.timeseries.egg-info/zip-safe +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 1.0.1b20240301
3
+ Version: 1.0.1b20240303
4
4
  Summary: AutoML for Image, Text, and Tabular Data
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -231,9 +231,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
231
231
  if static_features is not None:
232
232
  df = pd.merge(df, static_features, how="left", on=ITEMID, suffixes=(None, "_static_feat"))
233
233
 
234
- # Convert float64 to float32 to reduce memory usage
235
- float64_cols = list(df.select_dtypes(include="float64"))
236
- df[float64_cols] = df[float64_cols].astype("float32")
237
234
  # We assume that df is sorted by 'unique_id' inside `TimeSeriesPredictor._check_and_prepare_data_frame`
238
235
  return df.rename(columns=column_name_mapping)
239
236
 
@@ -293,7 +293,7 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
293
293
  Preprocessed data in TimeSeriesDataFrame format.
294
294
  """
295
295
  df = self._to_data_frame(data, name=name)
296
- df = df.astype({self.target: float})
296
+ df = df.astype({self.target: "float32"})
297
297
  # MultiIndex.is_monotonic_increasing checks if index is sorted by ["item_id", "timestamp"]
298
298
  if not df.index.is_monotonic_increasing:
299
299
  df = df.sort_index()
@@ -0,0 +1,228 @@
1
+ import logging
2
+ import reprlib
3
+ from dataclasses import dataclass, field
4
+ from typing import List, Optional, Tuple
5
+
6
+ import pandas as pd
7
+
8
+ from autogluon.common.features.types import R_FLOAT, R_INT
9
+ from autogluon.features.generators import (
10
+ AsTypeFeatureGenerator,
11
+ CategoryFeatureGenerator,
12
+ IdentityFeatureGenerator,
13
+ PipelineFeatureGenerator,
14
+ )
15
+ from autogluon.timeseries import TimeSeriesDataFrame
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class CovariateMetadata:
22
+ """Provides mapping from different covariate types to columns in the dataset."""
23
+
24
+ static_features_cat: List[str] = field(default_factory=list)
25
+ static_features_real: List[str] = field(default_factory=list)
26
+ known_covariates_real: List[str] = field(default_factory=list)
27
+ known_covariates_cat: List[str] = field(default_factory=list)
28
+ past_covariates_real: List[str] = field(default_factory=list)
29
+ past_covariates_cat: List[str] = field(default_factory=list)
30
+
31
+
32
+ class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
33
+ """Generates categorical and continuous features for time series models."""
34
+
35
+ def __init__(self, verbosity: int = 0, minimum_cat_count=2, float_dtype: str = "float32", **kwargs):
36
+ generators = [
37
+ CategoryFeatureGenerator(minimum_cat_count=minimum_cat_count, fillna="mode"),
38
+ IdentityFeatureGenerator(infer_features_in_args={"valid_raw_types": [R_INT, R_FLOAT]}),
39
+ ]
40
+ super().__init__(
41
+ generators=[generators],
42
+ post_generators=[],
43
+ pre_generators=[AsTypeFeatureGenerator(convert_bool=False)],
44
+ pre_enforce_types=False,
45
+ pre_drop_useless=False,
46
+ verbosity=verbosity,
47
+ **kwargs,
48
+ )
49
+ self.float_dtype = float_dtype
50
+
51
+ def _convert_numerical_columns_to_float(self, df: pd.DataFrame) -> pd.DataFrame:
52
+ """Convert the dtype of all numerical (float or int) columns to the given float dtype."""
53
+ numeric_columns = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
54
+ return df.astype({col: self.float_dtype for col in numeric_columns})
55
+
56
+ def transform(self, X: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
57
+ if isinstance(X, TimeSeriesDataFrame):
58
+ X = pd.DataFrame(X)
59
+ return self._convert_numerical_columns_to_float(super().transform(X, *args, **kwargs))
60
+
61
+ def fit_transform(self, X: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
62
+ # PipelineFeatureGenerator does not use transform() inside fit_transform(), so we need to override both methods
63
+ if isinstance(X, TimeSeriesDataFrame):
64
+ X = pd.DataFrame(X)
65
+ return self._convert_numerical_columns_to_float(super().fit_transform(X, *args, **kwargs))
66
+
67
+
68
+ class TimeSeriesFeatureGenerator:
69
+ """Takes care of preprocessing for static_features and past/known covariates.
70
+
71
+ All covariates & static features are converted into either float32 or categorical dtype.
72
+ """
73
+
74
+ def __init__(self, target: str, known_covariates_names: List[str], float_dtype: str = "float32"):
75
+ self.target = target
76
+ self.float_dtype = float_dtype
77
+ self._is_fit = False
78
+ self.known_covariates_names = list(known_covariates_names)
79
+ self.past_covariates_names = []
80
+ self.known_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
81
+ self.past_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
82
+ # Cat features with cat_count=1 are fine in static_features since they are repeated for all time steps in a TS
83
+ self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator(minimum_cat_count=1)
84
+ self.covariate_metadata: CovariateMetadata = None
85
+
86
+ @property
87
+ def required_column_names(self) -> List[str]:
88
+ return [self.target] + list(self.known_covariates_names) + list(self.past_covariates_names)
89
+
90
+ def fit(self, data: TimeSeriesDataFrame) -> None:
91
+ assert not self._is_fit, f"{self.__class__.__name__} has already been fit"
92
+
93
+ self.past_covariates_names = []
94
+ for column in data.columns:
95
+ if column != self.target and column not in self.known_covariates_names:
96
+ self.past_covariates_names.append(column)
97
+
98
+ self._check_required_columns_are_present(
99
+ data, required_column_names=self.required_column_names, data_frame_name="train_data"
100
+ )
101
+
102
+ logger.info("\nProvided data contains following columns:")
103
+ logger.info(f"\ttarget: '{self.target}'")
104
+
105
+ if len(self.known_covariates_names) > 0:
106
+ known_covariates_df = self.known_covariates_pipeline.fit_transform(data[self.known_covariates_names])
107
+ logger.info("\tknown_covariates:")
108
+ known_covariates_cat, known_covariates_real = self._detect_and_log_column_types(known_covariates_df)
109
+ self.known_covariates_names = self.known_covariates_pipeline.features_in
110
+ else:
111
+ known_covariates_cat = []
112
+ known_covariates_real = []
113
+
114
+ if len(self.past_covariates_names) > 0:
115
+ past_covariates_df = self.past_covariates_pipeline.fit_transform(data[self.past_covariates_names])
116
+ logger.info("\tpast_covariates:")
117
+ past_covariates_cat, past_covariates_real = self._detect_and_log_column_types(past_covariates_df)
118
+ self.past_covariates_names = self.past_covariates_pipeline.features_in
119
+ else:
120
+ past_covariates_cat = []
121
+ past_covariates_real = []
122
+
123
+ ignored_covariates = data.columns.difference(
124
+ [self.target] + self.known_covariates_names + self.past_covariates_names
125
+ )
126
+
127
+ if data.static_features is not None:
128
+ static_features_df = self.static_feature_pipeline.fit_transform(data.static_features)
129
+ logger.info("\tstatic_features:")
130
+ static_features_cat, static_features_real = self._detect_and_log_column_types(static_features_df)
131
+ ignored_static_features = data.static_features.columns.difference(self.static_feature_pipeline.features_in)
132
+ else:
133
+ static_features_cat = []
134
+ static_features_real = []
135
+ ignored_static_features = []
136
+
137
+ if len(ignored_covariates) > 0 or len(ignored_static_features) > 0:
138
+ logger.info("\nAutoGluon will ignore following non-numeric/non-informative columns:")
139
+ if len(ignored_covariates) > 0:
140
+ logger.info(f"\tignored covariates: {list(ignored_covariates)}")
141
+ if len(ignored_static_features) > 0:
142
+ logger.info(f"\tignored static_features: {list(ignored_static_features)}")
143
+
144
+ if len(data.columns) > 1 or data.static_features is not None:
145
+ logger.info(
146
+ "\nTo learn how to fix incorrectly inferred types, please see documentation for TimeSeriesPredictor.fit"
147
+ )
148
+
149
+ self.covariate_metadata = CovariateMetadata(
150
+ known_covariates_cat=known_covariates_cat,
151
+ known_covariates_real=known_covariates_real,
152
+ past_covariates_cat=past_covariates_cat,
153
+ past_covariates_real=past_covariates_real,
154
+ static_features_cat=static_features_cat,
155
+ static_features_real=static_features_real,
156
+ )
157
+ self._is_fit = True
158
+
159
+ def transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
160
+ """Transform static features and past/known covariates.
161
+
162
+ Transformed data is guaranteed to match the specification (same columns / dtypes) of the data seen during fit.
163
+ Extra columns not seen during fitting will be removed.
164
+
165
+ If some columns are missing or are incompatible, an exception will be raised.
166
+ """
167
+ assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
168
+ self._check_required_columns_are_present(
169
+ data, required_column_names=self.required_column_names, data_frame_name=data_frame_name
170
+ )
171
+ dfs = [data[[self.target]]]
172
+
173
+ if len(self.known_covariates_names) > 0:
174
+ dfs.append(self.known_covariates_pipeline.transform(data[self.known_covariates_names]))
175
+
176
+ if len(self.past_covariates_names) > 0:
177
+ dfs.append(self.past_covariates_pipeline.transform(data[self.past_covariates_names]))
178
+
179
+ if self.static_feature_pipeline.is_fit():
180
+ if data.static_features is None:
181
+ raise ValueError(f"Provided {data_frame_name} must contain static_features")
182
+ static_features = self.static_feature_pipeline.transform(data.static_features)
183
+ else:
184
+ static_features = None
185
+
186
+ return TimeSeriesDataFrame(pd.concat(dfs, axis=1), static_features=static_features)
187
+
188
+ def transform_future_known_covariates(
189
+ self, known_covariates: Optional[TimeSeriesDataFrame]
190
+ ) -> Optional[TimeSeriesDataFrame]:
191
+ assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
192
+ if len(self.known_covariates_names) > 0:
193
+ assert known_covariates is not None, "known_covariates must be provided at prediction time"
194
+ self._check_required_columns_are_present(
195
+ known_covariates, required_column_names=self.known_covariates_names, data_frame_name="known_covariates"
196
+ )
197
+ return TimeSeriesDataFrame(self.known_covariates_pipeline.transform(known_covariates))
198
+ else:
199
+ return None
200
+
201
+ def fit_transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
202
+ self.fit(data)
203
+ return self.transform(data, data_frame_name=data_frame_name)
204
+
205
+ @staticmethod
206
+ def _detect_and_log_column_types(transformed_df: pd.DataFrame) -> Tuple[List[str], List[str]]:
207
+ """Log & return names of categorical and real-valued columns in the DataFrame."""
208
+ cat_column_names = []
209
+ real_column_names = []
210
+ for column_name, column_dtype in transformed_df.dtypes.items():
211
+ if isinstance(column_dtype, pd.CategoricalDtype):
212
+ cat_column_names.append(column_name)
213
+ elif pd.api.types.is_numeric_dtype(column_dtype):
214
+ real_column_names.append(column_name)
215
+
216
+ logger.info(f"\t\tcategorical: {reprlib.repr(cat_column_names)}")
217
+ logger.info(f"\t\tcontinuous (float): {reprlib.repr(real_column_names)}")
218
+ return cat_column_names, real_column_names
219
+
220
+ @staticmethod
221
+ def _check_required_columns_are_present(
222
+ data: TimeSeriesDataFrame, required_column_names: List[str], data_frame_name: str
223
+ ) -> None:
224
+ missing_columns = pd.Index(required_column_names).difference(data.columns)
225
+ if len(missing_columns) > 0:
226
+ raise ValueError(
227
+ f"{len(missing_columns)} columns are missing from {data_frame_name}: {reprlib.repr(missing_columns.to_list())}"
228
+ )
@@ -1,3 +1,3 @@
1
1
  """This is the autogluon version file."""
2
- __version__ = '1.0.1b20240301'
2
+ __version__ = '1.0.1b20240303'
3
3
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 1.0.1b20240301
3
+ Version: 1.0.1b20240303
4
4
  Summary: AutoML for Image, Text, and Tabular Data
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -14,9 +14,9 @@ utilsforecast<0.0.11,>=0.0.10
14
14
  tqdm<5,>=4.38
15
15
  orjson~=3.9
16
16
  tensorboard<3,>=2.9
17
- autogluon.core[raytune]==1.0.1b20240301
18
- autogluon.common==1.0.1b20240301
19
- autogluon.tabular[catboost,lightgbm,xgboost]==1.0.1b20240301
17
+ autogluon.core[raytune]==1.0.1b20240303
18
+ autogluon.common==1.0.1b20240303
19
+ autogluon.tabular[catboost,lightgbm,xgboost]==1.0.1b20240303
20
20
 
21
21
  [all]
22
22
 
@@ -1,194 +0,0 @@
1
- import logging
2
- import reprlib
3
- from dataclasses import dataclass, field
4
- from typing import List, Optional
5
-
6
- import numpy as np
7
- import pandas as pd
8
-
9
- from autogluon.common.features.types import R_FLOAT, R_INT
10
- from autogluon.features.generators import (
11
- AsTypeFeatureGenerator,
12
- CategoryFeatureGenerator,
13
- IdentityFeatureGenerator,
14
- PipelineFeatureGenerator,
15
- )
16
- from autogluon.timeseries import TimeSeriesDataFrame
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
-
21
- @dataclass
22
- class CovariateMetadata:
23
- """Provides mapping from different covariate types to columns in the dataset."""
24
-
25
- static_features_cat: List[str] = field(default_factory=list)
26
- static_features_real: List[str] = field(default_factory=list)
27
- known_covariates_real: List[str] = field(default_factory=list)
28
- known_covariates_cat: List[str] = field(default_factory=list)
29
- past_covariates_real: List[str] = field(default_factory=list)
30
- past_covariates_cat: List[str] = field(default_factory=list)
31
-
32
-
33
- class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
34
- """Generates categorical and continuous features for time series models."""
35
-
36
- def __init__(self, verbosity: int = 0, **kwargs):
37
- generators = [
38
- CategoryFeatureGenerator(minimum_cat_count=1, fillna="mode"),
39
- IdentityFeatureGenerator(infer_features_in_args={"valid_raw_types": [R_INT, R_FLOAT]}),
40
- ]
41
- super().__init__(
42
- generators=[generators],
43
- post_generators=[],
44
- pre_generators=[AsTypeFeatureGenerator(convert_bool=False)],
45
- pre_enforce_types=False,
46
- pre_drop_useless=False,
47
- verbosity=verbosity,
48
- **kwargs,
49
- )
50
-
51
-
52
- class TimeSeriesFeatureGenerator:
53
- """Takes care of preprocessing for static_features and past/known covariates.
54
-
55
- Covariates are all converted to float dtype. Static features, if present, are all converted to categorical & float
56
- dtypes.
57
- """
58
-
59
- def __init__(self, target: str, known_covariates_names: List[str]):
60
- self.target = target
61
- self._is_fit = False
62
- self.known_covariates_names = list(known_covariates_names)
63
- self.past_covariates_names = []
64
- self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator()
65
- self.covariate_metadata: CovariateMetadata = None
66
-
67
- @property
68
- def required_column_names(self) -> List[str]:
69
- return [self.target] + list(self.known_covariates_names) + list(self.past_covariates_names)
70
-
71
- @staticmethod
72
- def _convert_numerical_features_to_float(df: pd.DataFrame, float_dtype=np.float64) -> pd.DataFrame:
73
- """In-place convert the dtype of all numerical (float or int) columns to the given float dtype."""
74
- numeric_columns = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
75
- df[numeric_columns] = df[numeric_columns].astype(float_dtype)
76
- return df
77
-
78
- def fit(self, data: TimeSeriesDataFrame) -> None:
79
- assert not self._is_fit, f"{self.__class__.__name__} has already been fit"
80
-
81
- self.past_covariates_names = []
82
- for column in data.columns:
83
- if column != self.target and column not in self.known_covariates_names:
84
- self.past_covariates_names.append(column)
85
-
86
- logger.info("\nProvided dataset contains following columns:")
87
- logger.info(f"\ttarget: '{self.target}'")
88
- if len(self.known_covariates_names) > 0:
89
- logger.info(f"\tknown covariates: {self.known_covariates_names}")
90
- if len(self.past_covariates_names) > 0:
91
- logger.info(f"\tpast covariates: {self.past_covariates_names}")
92
-
93
- static_features_cat = []
94
- static_features_real = []
95
- if data.static_features is not None:
96
- static = self.static_feature_pipeline.fit_transform(data.static_features)
97
- static = self._convert_numerical_features_to_float(static)
98
-
99
- unused = []
100
- for col_name in data.static_features.columns:
101
- if col_name in static.columns and static[col_name].dtype == "category":
102
- static_features_cat.append(col_name)
103
- elif col_name in static.columns and static[col_name].dtype == np.float64:
104
- static_features_real.append(col_name)
105
- else:
106
- unused.append(col_name)
107
-
108
- logger.info("Following types of static features have been inferred:")
109
- logger.info(f"\tcategorical: {static_features_cat}")
110
- logger.info(f"\tcontinuous (float): {static_features_real}")
111
- if len(unused) > 0:
112
- logger.info(f"\tremoved (uninformative columns): {unused}")
113
- logger.info(
114
- "To learn how to fix incorrectly inferred types, please see documentation for TimeSeriesPredictor.fit "
115
- )
116
-
117
- self.covariate_metadata = CovariateMetadata(
118
- static_features_cat=static_features_cat,
119
- static_features_real=static_features_real,
120
- known_covariates_real=self.known_covariates_names,
121
- past_covariates_real=self.past_covariates_names,
122
- # TODO: Categorical time-varying covariates are not yet supported
123
- known_covariates_cat=[],
124
- past_covariates_cat=[],
125
- )
126
- self._is_fit = True
127
-
128
- @staticmethod
129
- def _check_and_prepare_covariates(
130
- data: TimeSeriesDataFrame,
131
- required_column_names: List[str],
132
- data_frame_name: str,
133
- ) -> TimeSeriesDataFrame:
134
- """Select the required dataframe columns and convert them to float64 dtype."""
135
- missing_columns = pd.Index(required_column_names).difference(data.columns)
136
- if len(missing_columns) > 0:
137
- raise ValueError(
138
- f"{len(missing_columns)} columns are missing from {data_frame_name}: {reprlib.repr(missing_columns.to_list())}"
139
- )
140
- data = data[required_column_names]
141
- try:
142
- data = data.astype(np.float64)
143
- except ValueError:
144
- raise ValueError(
145
- f"Columns in {data_frame_name} must all have numeric (float or int) dtypes, "
146
- f"but in provided data they have dtypes {data.dtypes}"
147
- )
148
- return data
149
-
150
- def transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
151
- """Transform static features and past/known covariates.
152
-
153
- Transformed data is guaranteed to match the specification (same columns / dtypes) of the data seen during fit.
154
- Extra columns not seen during fitting will be removed.
155
-
156
- If some columns are missing or are incompatible, an exception will be raised.
157
- """
158
- assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
159
- # Avoid modifying inplace
160
- data = data.copy(deep=False)
161
-
162
- data = self._check_and_prepare_covariates(
163
- data=data,
164
- required_column_names=self.required_column_names,
165
- data_frame_name=data_frame_name,
166
- )
167
-
168
- if self.static_feature_pipeline.is_fit():
169
- if data.static_features is None:
170
- raise ValueError(f"Provided {data_frame_name} must contain static_features")
171
- static_features = self.static_feature_pipeline.transform(data.static_features)
172
- data.static_features = self._convert_numerical_features_to_float(static_features)
173
- else:
174
- data.static_features = None
175
-
176
- return data
177
-
178
- def transform_future_known_covariates(
179
- self, known_covariates: Optional[TimeSeriesDataFrame]
180
- ) -> Optional[TimeSeriesDataFrame]:
181
- assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
182
- if len(self.known_covariates_names) > 0:
183
- assert known_covariates is not None, "known_covariates must be provided at prediction time"
184
- return self._check_and_prepare_covariates(
185
- known_covariates,
186
- required_column_names=self.known_covariates_names,
187
- data_frame_name="known_covariates",
188
- )
189
- else:
190
- return None
191
-
192
- def fit_transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
193
- self.fit(data)
194
- return self.transform(data, data_frame_name=data_frame_name)