autogluon.timeseries 1.0.1b20240229__tar.gz → 1.0.1b20240302__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (60) hide show
  1. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/PKG-INFO +1 -1
  2. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/autogluon_tabular/mlforecast.py +0 -3
  3. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/predictor.py +113 -1
  4. autogluon.timeseries-1.0.1b20240302/src/autogluon/timeseries/utils/features.py +228 -0
  5. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/version.py +1 -1
  6. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon.timeseries.egg-info/PKG-INFO +1 -1
  7. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon.timeseries.egg-info/requires.txt +3 -3
  8. autogluon.timeseries-1.0.1b20240229/src/autogluon/timeseries/utils/features.py +0 -194
  9. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/setup.cfg +0 -0
  10. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/setup.py +0 -0
  11. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/__init__.py +0 -0
  12. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/configs/__init__.py +0 -0
  13. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/configs/presets_configs.py +0 -0
  14. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/dataset/__init__.py +0 -0
  15. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/dataset/ts_dataframe.py +0 -0
  16. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/evaluator.py +0 -0
  17. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/learner.py +0 -0
  18. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/metrics/__init__.py +0 -0
  19. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/metrics/abstract.py +0 -0
  20. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/metrics/point.py +0 -0
  21. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/metrics/quantile.py +0 -0
  22. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/metrics/utils.py +0 -0
  23. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/__init__.py +0 -0
  24. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/abstract/__init__.py +0 -0
  25. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py +0 -0
  26. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/abstract/model_trial.py +0 -0
  27. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +0 -0
  28. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/autogluon_tabular/utils.py +0 -0
  29. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/ensemble/__init__.py +0 -0
  30. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -0
  31. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -0
  32. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/gluonts/__init__.py +0 -0
  33. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -0
  34. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  35. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/gluonts/torch/models.py +0 -0
  36. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/local/__init__.py +0 -0
  37. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/local/abstract_local_model.py +0 -0
  38. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/local/naive.py +0 -0
  39. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/local/npts.py +0 -0
  40. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/local/statsforecast.py +0 -0
  41. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/multi_window/__init__.py +0 -0
  42. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/multi_window/multi_window_model.py +0 -0
  43. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/models/presets.py +0 -0
  44. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/splitter.py +0 -0
  45. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/trainer/__init__.py +0 -0
  46. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/trainer/abstract_trainer.py +0 -0
  47. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/trainer/auto_trainer.py +0 -0
  48. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/utils/__init__.py +0 -0
  49. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/utils/datetime/__init__.py +0 -0
  50. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/utils/datetime/base.py +0 -0
  51. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/utils/datetime/lags.py +0 -0
  52. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/utils/datetime/seasonality.py +0 -0
  53. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/utils/datetime/time_features.py +0 -0
  54. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/utils/forecast.py +0 -0
  55. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon/timeseries/utils/warning_filters.py +0 -0
  56. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon.timeseries.egg-info/SOURCES.txt +0 -0
  57. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon.timeseries.egg-info/dependency_links.txt +0 -0
  58. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon.timeseries.egg-info/namespace_packages.txt +0 -0
  59. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon.timeseries.egg-info/top_level.txt +0 -0
  60. {autogluon.timeseries-1.0.1b20240229 → autogluon.timeseries-1.0.1b20240302}/src/autogluon.timeseries.egg-info/zip-safe +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 1.0.1b20240229
3
+ Version: 1.0.1b20240302
4
4
  Summary: AutoML for Image, Text, and Tabular Data
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -231,9 +231,6 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
231
231
  if static_features is not None:
232
232
  df = pd.merge(df, static_features, how="left", on=ITEMID, suffixes=(None, "_static_feat"))
233
233
 
234
- # Convert float64 to float32 to reduce memory usage
235
- float64_cols = list(df.select_dtypes(include="float64"))
236
- df[float64_cols] = df[float64_cols].astype("float32")
237
234
  # We assume that df is sorted by 'unique_id' inside `TimeSeriesPredictor._check_and_prepare_data_frame`
238
235
  return df.rename(columns=column_name_mapping)
239
236
 
@@ -1,11 +1,13 @@
1
1
  import json
2
2
  import logging
3
+ import math
3
4
  import os
4
5
  import pprint
5
6
  import time
6
7
  from pathlib import Path
7
8
  from typing import Any, Dict, List, Optional, Tuple, Type, Union
8
9
 
10
+ import numpy as np
9
11
  import pandas as pd
10
12
 
11
13
  from autogluon.common.utils.deprecated_utils import Deprecated
@@ -291,7 +293,7 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
291
293
  Preprocessed data in TimeSeriesDataFrame format.
292
294
  """
293
295
  df = self._to_data_frame(data, name=name)
294
- df = df.astype({self.target: float})
296
+ df = df.astype({self.target: "float32"})
295
297
  # MultiIndex.is_monotonic_increasing checks if index is sorted by ["item_id", "timestamp"]
296
298
  if not df.index.is_monotonic_increasing:
297
299
  df = df.sort_index()
@@ -1190,3 +1192,113 @@ class TimeSeriesPredictor(TimeSeriesPredictorDeprecatedMixin):
1190
1192
  quantile_levels=self.quantile_levels,
1191
1193
  )
1192
1194
  return simulation_dict
1195
+
1196
+ def plot(
1197
+ self,
1198
+ data: Union[TimeSeriesDataFrame, pd.DataFrame, str],
1199
+ predictions: Optional[Union[TimeSeriesDataFrame, pd.DataFrame, str]] = None,
1200
+ quantile_levels: Optional[List[float]] = None,
1201
+ item_ids: Optional[List[Union[str, int]]] = None,
1202
+ max_num_item_ids: int = 8,
1203
+ max_history_length: Optional[int] = None,
1204
+ point_forecast_column: Optional[str] = None,
1205
+ matplotlib_rc_params: Optional[dict] = None,
1206
+ ):
1207
+ """Plot historic time series values and the forecasts.
1208
+
1209
+ Parameters
1210
+ ----------
1211
+ data : Union[TimeSeriesDataFrame, pd.DataFrame, str]
1212
+ Observed time series data.
1213
+ predictions : TimeSeriesDataFrame, optional
1214
+ Predictions generated by calling :meth:`~autogluon.timeseries.TimeSeriesPredictor.predict`.
1215
+ quantile_levels : List[float], optional
1216
+ Quantile levels for which to plot the prediction intervals. Defaults to lowest & highest quantile levels
1217
+ available in ``predictions``.
1218
+ item_ids : List[Union[str, int]], optional
1219
+ If provided, plots will only be generated for time series with these item IDs. By default (if set to
1220
+ ``None``), item IDs are selected randomly. In either case, plots are generated for at most
1221
+ ``max_num_item_ids`` time series.
1222
+ max_num_item_ids : int, default = 8
1223
+ At most this many time series will be plotted by the method.
1224
+ max_history_length : int, optional
1225
+ If provided, at most this many time steps will be shown for each time series in ``data``.
1226
+ point_forecast_column : str, optional
1227
+ Name of the column in ``predictions`` that will be plotted as the point forecast. Defaults to ``"0.5"``,
1228
+ if this column is present in ``predictions``, otherwise ``"mean"``.
1229
+ matplotlib_rc_params : dict, optional
1230
+ Dictionary describing the plot style that will be passed to [`matplotlib.pyplot.rc_context`](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.rc_context.html).
1231
+ See [matplotlib documentation](https://matplotlib.org/stable/users/explain/customizing.html#the-default-matplotlibrc-file) for the list of available options.
1232
+ """
1233
+ import matplotlib.pyplot as plt
1234
+
1235
+ data = self._check_and_prepare_data_frame(data)
1236
+ if item_ids is None:
1237
+ item_ids = list(np.random.choice(data.item_ids, size=min(max_num_item_ids, data.num_items), replace=False))
1238
+ else:
1239
+ item_ids = list(item_ids)[:max_num_item_ids]
1240
+
1241
+ if predictions is not None:
1242
+ if (
1243
+ not isinstance(predictions, TimeSeriesDataFrame)
1244
+ or "mean" not in predictions.columns
1245
+ or predictions.index.nlevels != 2
1246
+ ):
1247
+ raise ValueError("predictions must be a TimeSeriesDataFrame produced by predictor.predict()")
1248
+ if point_forecast_column is None:
1249
+ point_forecast_column = "0.5" if "0.5" in predictions.columns else "mean"
1250
+ if quantile_levels is None:
1251
+ available_quantile_levels = [float(q) for q in predictions.columns if q != "mean"]
1252
+ if len(available_quantile_levels) >= 2:
1253
+ quantile_levels = [min(available_quantile_levels), max(available_quantile_levels)]
1254
+ else:
1255
+ quantile_levels = []
1256
+
1257
+ if len(item_ids) == 1:
1258
+ ncols = 1
1259
+ nrows = 1
1260
+ else:
1261
+ ncols = 2
1262
+ nrows = math.ceil(len(item_ids) / ncols)
1263
+
1264
+ rc_params = {
1265
+ "font.size": 10,
1266
+ "figure.figsize": [20, 3.5 * nrows],
1267
+ "figure.dpi": 100,
1268
+ "legend.loc": "upper center",
1269
+ }
1270
+ if matplotlib_rc_params is not None:
1271
+ rc_params.update(matplotlib_rc_params)
1272
+
1273
+ with plt.rc_context(rc_params):
1274
+ fig, axes = plt.subplots(ncols=ncols, nrows=nrows, squeeze=False)
1275
+ fig.tight_layout(h_pad=2.5, w_pad=0.5)
1276
+ axes = axes.ravel()
1277
+
1278
+ for i, (item_id, ax) in enumerate(zip(item_ids, axes)):
1279
+ ax.set_title(item_id)
1280
+ ax.grid()
1281
+ # Label the x axis for subplots in the lowest row
1282
+ if i // nrows == 1:
1283
+ ax.set_xlabel("Time")
1284
+ # Label the y axis for subplots in the leftmost column
1285
+ if i % ncols == 0:
1286
+ ax.set_ylabel(self.target)
1287
+
1288
+ ts = data.loc[item_id][self.target]
1289
+ if max_history_length is not None:
1290
+ ts = ts.iloc[-max_history_length:]
1291
+ ax.plot(ts, label="Observed", color="C0")
1292
+
1293
+ if predictions is not None:
1294
+ forecast = predictions.loc[item_id]
1295
+ point_forecast = forecast[point_forecast_column]
1296
+ ax.plot(point_forecast, color="C1", label="Forecast")
1297
+ if quantile_levels is not None:
1298
+ for q in quantile_levels:
1299
+ ax.fill_between(forecast.index, point_forecast, forecast[str(q)], color="C1", alpha=0.2)
1300
+ if len(axes) > len(item_ids):
1301
+ axes[len(item_ids)].set_axis_off()
1302
+ handles, labels = axes[0].get_legend_handles_labels()
1303
+ fig.legend(handles, labels, bbox_to_anchor=(0.5, 0.0), ncols=len(handles))
1304
+ return fig
@@ -0,0 +1,228 @@
1
+ import logging
2
+ import reprlib
3
+ from dataclasses import dataclass, field
4
+ from typing import List, Optional, Tuple
5
+
6
+ import pandas as pd
7
+
8
+ from autogluon.common.features.types import R_FLOAT, R_INT
9
+ from autogluon.features.generators import (
10
+ AsTypeFeatureGenerator,
11
+ CategoryFeatureGenerator,
12
+ IdentityFeatureGenerator,
13
+ PipelineFeatureGenerator,
14
+ )
15
+ from autogluon.timeseries import TimeSeriesDataFrame
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class CovariateMetadata:
22
+ """Provides mapping from different covariate types to columns in the dataset."""
23
+
24
+ static_features_cat: List[str] = field(default_factory=list)
25
+ static_features_real: List[str] = field(default_factory=list)
26
+ known_covariates_real: List[str] = field(default_factory=list)
27
+ known_covariates_cat: List[str] = field(default_factory=list)
28
+ past_covariates_real: List[str] = field(default_factory=list)
29
+ past_covariates_cat: List[str] = field(default_factory=list)
30
+
31
+
32
+ class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
33
+ """Generates categorical and continuous features for time series models."""
34
+
35
+ def __init__(self, verbosity: int = 0, minimum_cat_count=2, float_dtype: str = "float32", **kwargs):
36
+ generators = [
37
+ CategoryFeatureGenerator(minimum_cat_count=minimum_cat_count, fillna="mode"),
38
+ IdentityFeatureGenerator(infer_features_in_args={"valid_raw_types": [R_INT, R_FLOAT]}),
39
+ ]
40
+ super().__init__(
41
+ generators=[generators],
42
+ post_generators=[],
43
+ pre_generators=[AsTypeFeatureGenerator(convert_bool=False)],
44
+ pre_enforce_types=False,
45
+ pre_drop_useless=False,
46
+ verbosity=verbosity,
47
+ **kwargs,
48
+ )
49
+ self.float_dtype = float_dtype
50
+
51
+ def _convert_numerical_columns_to_float(self, df: pd.DataFrame) -> pd.DataFrame:
52
+ """Convert the dtype of all numerical (float or int) columns to the given float dtype."""
53
+ numeric_columns = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
54
+ return df.astype({col: self.float_dtype for col in numeric_columns})
55
+
56
+ def transform(self, X: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
57
+ if isinstance(X, TimeSeriesDataFrame):
58
+ X = pd.DataFrame(X)
59
+ return self._convert_numerical_columns_to_float(super().transform(X, *args, **kwargs))
60
+
61
+ def fit_transform(self, X: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
62
+ # PipelineFeatureGenerator does not use transform() inside fit_transform(), so we need to override both methods
63
+ if isinstance(X, TimeSeriesDataFrame):
64
+ X = pd.DataFrame(X)
65
+ return self._convert_numerical_columns_to_float(super().fit_transform(X, *args, **kwargs))
66
+
67
+
68
+ class TimeSeriesFeatureGenerator:
69
+ """Takes care of preprocessing for static_features and past/known covariates.
70
+
71
+ All covariates & static features are converted into either float32 or categorical dtype.
72
+ """
73
+
74
+ def __init__(self, target: str, known_covariates_names: List[str], float_dtype: str = "float32"):
75
+ self.target = target
76
+ self.float_dtype = float_dtype
77
+ self._is_fit = False
78
+ self.known_covariates_names = list(known_covariates_names)
79
+ self.past_covariates_names = []
80
+ self.known_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
81
+ self.past_covariates_pipeline = ContinuousAndCategoricalFeatureGenerator()
82
+ # Cat features with cat_count=1 are fine in static_features since they are repeated for all time steps in a TS
83
+ self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator(minimum_cat_count=1)
84
+ self.covariate_metadata: CovariateMetadata = None
85
+
86
+ @property
87
+ def required_column_names(self) -> List[str]:
88
+ return [self.target] + list(self.known_covariates_names) + list(self.past_covariates_names)
89
+
90
+ def fit(self, data: TimeSeriesDataFrame) -> None:
91
+ assert not self._is_fit, f"{self.__class__.__name__} has already been fit"
92
+
93
+ self.past_covariates_names = []
94
+ for column in data.columns:
95
+ if column != self.target and column not in self.known_covariates_names:
96
+ self.past_covariates_names.append(column)
97
+
98
+ self._check_required_columns_are_present(
99
+ data, required_column_names=self.required_column_names, data_frame_name="train_data"
100
+ )
101
+
102
+ logger.info("\nProvided data contains following columns:")
103
+ logger.info(f"\ttarget: '{self.target}'")
104
+
105
+ if len(self.known_covariates_names) > 0:
106
+ known_covariates_df = self.known_covariates_pipeline.fit_transform(data[self.known_covariates_names])
107
+ logger.info("\tknown_covariates:")
108
+ known_covariates_cat, known_covariates_real = self._detect_and_log_column_types(known_covariates_df)
109
+ self.known_covariates_names = self.known_covariates_pipeline.features_in
110
+ else:
111
+ known_covariates_cat = []
112
+ known_covariates_real = []
113
+
114
+ if len(self.past_covariates_names) > 0:
115
+ past_covariates_df = self.past_covariates_pipeline.fit_transform(data[self.past_covariates_names])
116
+ logger.info("\tpast_covariates:")
117
+ past_covariates_cat, past_covariates_real = self._detect_and_log_column_types(past_covariates_df)
118
+ self.past_covariates_names = self.past_covariates_pipeline.features_in
119
+ else:
120
+ past_covariates_cat = []
121
+ past_covariates_real = []
122
+
123
+ ignored_covariates = data.columns.difference(
124
+ [self.target] + self.known_covariates_names + self.past_covariates_names
125
+ )
126
+
127
+ if data.static_features is not None:
128
+ static_features_df = self.static_feature_pipeline.fit_transform(data.static_features)
129
+ logger.info("\tstatic_features:")
130
+ static_features_cat, static_features_real = self._detect_and_log_column_types(static_features_df)
131
+ ignored_static_features = data.static_features.columns.difference(self.static_feature_pipeline.features_in)
132
+ else:
133
+ static_features_cat = []
134
+ static_features_real = []
135
+ ignored_static_features = []
136
+
137
+ if len(ignored_covariates) > 0 or len(ignored_static_features) > 0:
138
+ logger.info("\nAutoGluon will ignore following non-numeric/non-informative columns:")
139
+ if len(ignored_covariates) > 0:
140
+ logger.info(f"\tignored covariates: {list(ignored_covariates)}")
141
+ if len(ignored_static_features) > 0:
142
+ logger.info(f"\tignored static_features: {list(ignored_static_features)}")
143
+
144
+ if len(data.columns) > 1 or data.static_features is not None:
145
+ logger.info(
146
+ "\nTo learn how to fix incorrectly inferred types, please see documentation for TimeSeriesPredictor.fit"
147
+ )
148
+
149
+ self.covariate_metadata = CovariateMetadata(
150
+ known_covariates_cat=known_covariates_cat,
151
+ known_covariates_real=known_covariates_real,
152
+ past_covariates_cat=past_covariates_cat,
153
+ past_covariates_real=past_covariates_real,
154
+ static_features_cat=static_features_cat,
155
+ static_features_real=static_features_real,
156
+ )
157
+ self._is_fit = True
158
+
159
+ def transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
160
+ """Transform static features and past/known covariates.
161
+
162
+ Transformed data is guaranteed to match the specification (same columns / dtypes) of the data seen during fit.
163
+ Extra columns not seen during fitting will be removed.
164
+
165
+ If some columns are missing or are incompatible, an exception will be raised.
166
+ """
167
+ assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
168
+ self._check_required_columns_are_present(
169
+ data, required_column_names=self.required_column_names, data_frame_name=data_frame_name
170
+ )
171
+ dfs = [data[[self.target]]]
172
+
173
+ if len(self.known_covariates_names) > 0:
174
+ dfs.append(self.known_covariates_pipeline.transform(data[self.known_covariates_names]))
175
+
176
+ if len(self.past_covariates_names) > 0:
177
+ dfs.append(self.past_covariates_pipeline.transform(data[self.past_covariates_names]))
178
+
179
+ if self.static_feature_pipeline.is_fit():
180
+ if data.static_features is None:
181
+ raise ValueError(f"Provided {data_frame_name} must contain static_features")
182
+ static_features = self.static_feature_pipeline.transform(data.static_features)
183
+ else:
184
+ static_features = None
185
+
186
+ return TimeSeriesDataFrame(pd.concat(dfs, axis=1), static_features=static_features)
187
+
188
+ def transform_future_known_covariates(
189
+ self, known_covariates: Optional[TimeSeriesDataFrame]
190
+ ) -> Optional[TimeSeriesDataFrame]:
191
+ assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
192
+ if len(self.known_covariates_names) > 0:
193
+ assert known_covariates is not None, "known_covariates must be provided at prediction time"
194
+ self._check_required_columns_are_present(
195
+ known_covariates, required_column_names=self.known_covariates_names, data_frame_name="known_covariates"
196
+ )
197
+ return TimeSeriesDataFrame(self.known_covariates_pipeline.transform(known_covariates))
198
+ else:
199
+ return None
200
+
201
+ def fit_transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
202
+ self.fit(data)
203
+ return self.transform(data, data_frame_name=data_frame_name)
204
+
205
+ @staticmethod
206
+ def _detect_and_log_column_types(transformed_df: pd.DataFrame) -> Tuple[List[str], List[str]]:
207
+ """Log & return names of categorical and real-valued columns in the DataFrame."""
208
+ cat_column_names = []
209
+ real_column_names = []
210
+ for column_name, column_dtype in transformed_df.dtypes.items():
211
+ if isinstance(column_dtype, pd.CategoricalDtype):
212
+ cat_column_names.append(column_name)
213
+ elif pd.api.types.is_numeric_dtype(column_dtype):
214
+ real_column_names.append(column_name)
215
+
216
+ logger.info(f"\t\tcategorical: {reprlib.repr(cat_column_names)}")
217
+ logger.info(f"\t\tcontinuous (float): {reprlib.repr(real_column_names)}")
218
+ return cat_column_names, real_column_names
219
+
220
+ @staticmethod
221
+ def _check_required_columns_are_present(
222
+ data: TimeSeriesDataFrame, required_column_names: List[str], data_frame_name: str
223
+ ) -> None:
224
+ missing_columns = pd.Index(required_column_names).difference(data.columns)
225
+ if len(missing_columns) > 0:
226
+ raise ValueError(
227
+ f"{len(missing_columns)} columns are missing from {data_frame_name}: {reprlib.repr(missing_columns.to_list())}"
228
+ )
@@ -1,3 +1,3 @@
1
1
  """This is the autogluon version file."""
2
- __version__ = '1.0.1b20240229'
2
+ __version__ = '1.0.1b20240302'
3
3
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 1.0.1b20240229
3
+ Version: 1.0.1b20240302
4
4
  Summary: AutoML for Image, Text, and Tabular Data
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -14,9 +14,9 @@ utilsforecast<0.0.11,>=0.0.10
14
14
  tqdm<5,>=4.38
15
15
  orjson~=3.9
16
16
  tensorboard<3,>=2.9
17
- autogluon.core[raytune]==1.0.1b20240229
18
- autogluon.common==1.0.1b20240229
19
- autogluon.tabular[catboost,lightgbm,xgboost]==1.0.1b20240229
17
+ autogluon.core[raytune]==1.0.1b20240302
18
+ autogluon.common==1.0.1b20240302
19
+ autogluon.tabular[catboost,lightgbm,xgboost]==1.0.1b20240302
20
20
 
21
21
  [all]
22
22
 
@@ -1,194 +0,0 @@
1
- import logging
2
- import reprlib
3
- from dataclasses import dataclass, field
4
- from typing import List, Optional
5
-
6
- import numpy as np
7
- import pandas as pd
8
-
9
- from autogluon.common.features.types import R_FLOAT, R_INT
10
- from autogluon.features.generators import (
11
- AsTypeFeatureGenerator,
12
- CategoryFeatureGenerator,
13
- IdentityFeatureGenerator,
14
- PipelineFeatureGenerator,
15
- )
16
- from autogluon.timeseries import TimeSeriesDataFrame
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
-
21
- @dataclass
22
- class CovariateMetadata:
23
- """Provides mapping from different covariate types to columns in the dataset."""
24
-
25
- static_features_cat: List[str] = field(default_factory=list)
26
- static_features_real: List[str] = field(default_factory=list)
27
- known_covariates_real: List[str] = field(default_factory=list)
28
- known_covariates_cat: List[str] = field(default_factory=list)
29
- past_covariates_real: List[str] = field(default_factory=list)
30
- past_covariates_cat: List[str] = field(default_factory=list)
31
-
32
-
33
- class ContinuousAndCategoricalFeatureGenerator(PipelineFeatureGenerator):
34
- """Generates categorical and continuous features for time series models."""
35
-
36
- def __init__(self, verbosity: int = 0, **kwargs):
37
- generators = [
38
- CategoryFeatureGenerator(minimum_cat_count=1, fillna="mode"),
39
- IdentityFeatureGenerator(infer_features_in_args={"valid_raw_types": [R_INT, R_FLOAT]}),
40
- ]
41
- super().__init__(
42
- generators=[generators],
43
- post_generators=[],
44
- pre_generators=[AsTypeFeatureGenerator(convert_bool=False)],
45
- pre_enforce_types=False,
46
- pre_drop_useless=False,
47
- verbosity=verbosity,
48
- **kwargs,
49
- )
50
-
51
-
52
- class TimeSeriesFeatureGenerator:
53
- """Takes care of preprocessing for static_features and past/known covariates.
54
-
55
- Covariates are all converted to float dtype. Static features, if present, are all converted to categorical & float
56
- dtypes.
57
- """
58
-
59
- def __init__(self, target: str, known_covariates_names: List[str]):
60
- self.target = target
61
- self._is_fit = False
62
- self.known_covariates_names = list(known_covariates_names)
63
- self.past_covariates_names = []
64
- self.static_feature_pipeline = ContinuousAndCategoricalFeatureGenerator()
65
- self.covariate_metadata: CovariateMetadata = None
66
-
67
- @property
68
- def required_column_names(self) -> List[str]:
69
- return [self.target] + list(self.known_covariates_names) + list(self.past_covariates_names)
70
-
71
- @staticmethod
72
- def _convert_numerical_features_to_float(df: pd.DataFrame, float_dtype=np.float64) -> pd.DataFrame:
73
- """In-place convert the dtype of all numerical (float or int) columns to the given float dtype."""
74
- numeric_columns = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
75
- df[numeric_columns] = df[numeric_columns].astype(float_dtype)
76
- return df
77
-
78
- def fit(self, data: TimeSeriesDataFrame) -> None:
79
- assert not self._is_fit, f"{self.__class__.__name__} has already been fit"
80
-
81
- self.past_covariates_names = []
82
- for column in data.columns:
83
- if column != self.target and column not in self.known_covariates_names:
84
- self.past_covariates_names.append(column)
85
-
86
- logger.info("\nProvided dataset contains following columns:")
87
- logger.info(f"\ttarget: '{self.target}'")
88
- if len(self.known_covariates_names) > 0:
89
- logger.info(f"\tknown covariates: {self.known_covariates_names}")
90
- if len(self.past_covariates_names) > 0:
91
- logger.info(f"\tpast covariates: {self.past_covariates_names}")
92
-
93
- static_features_cat = []
94
- static_features_real = []
95
- if data.static_features is not None:
96
- static = self.static_feature_pipeline.fit_transform(data.static_features)
97
- static = self._convert_numerical_features_to_float(static)
98
-
99
- unused = []
100
- for col_name in data.static_features.columns:
101
- if col_name in static.columns and static[col_name].dtype == "category":
102
- static_features_cat.append(col_name)
103
- elif col_name in static.columns and static[col_name].dtype == np.float64:
104
- static_features_real.append(col_name)
105
- else:
106
- unused.append(col_name)
107
-
108
- logger.info("Following types of static features have been inferred:")
109
- logger.info(f"\tcategorical: {static_features_cat}")
110
- logger.info(f"\tcontinuous (float): {static_features_real}")
111
- if len(unused) > 0:
112
- logger.info(f"\tremoved (uninformative columns): {unused}")
113
- logger.info(
114
- "To learn how to fix incorrectly inferred types, please see documentation for TimeSeriesPredictor.fit "
115
- )
116
-
117
- self.covariate_metadata = CovariateMetadata(
118
- static_features_cat=static_features_cat,
119
- static_features_real=static_features_real,
120
- known_covariates_real=self.known_covariates_names,
121
- past_covariates_real=self.past_covariates_names,
122
- # TODO: Categorical time-varying covariates are not yet supported
123
- known_covariates_cat=[],
124
- past_covariates_cat=[],
125
- )
126
- self._is_fit = True
127
-
128
- @staticmethod
129
- def _check_and_prepare_covariates(
130
- data: TimeSeriesDataFrame,
131
- required_column_names: List[str],
132
- data_frame_name: str,
133
- ) -> TimeSeriesDataFrame:
134
- """Select the required dataframe columns and convert them to float64 dtype."""
135
- missing_columns = pd.Index(required_column_names).difference(data.columns)
136
- if len(missing_columns) > 0:
137
- raise ValueError(
138
- f"{len(missing_columns)} columns are missing from {data_frame_name}: {reprlib.repr(missing_columns.to_list())}"
139
- )
140
- data = data[required_column_names]
141
- try:
142
- data = data.astype(np.float64)
143
- except ValueError:
144
- raise ValueError(
145
- f"Columns in {data_frame_name} must all have numeric (float or int) dtypes, "
146
- f"but in provided data they have dtypes {data.dtypes}"
147
- )
148
- return data
149
-
150
- def transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
151
- """Transform static features and past/known covariates.
152
-
153
- Transformed data is guaranteed to match the specification (same columns / dtypes) of the data seen during fit.
154
- Extra columns not seen during fitting will be removed.
155
-
156
- If some columns are missing or are incompatible, an exception will be raised.
157
- """
158
- assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
159
- # Avoid modifying inplace
160
- data = data.copy(deep=False)
161
-
162
- data = self._check_and_prepare_covariates(
163
- data=data,
164
- required_column_names=self.required_column_names,
165
- data_frame_name=data_frame_name,
166
- )
167
-
168
- if self.static_feature_pipeline.is_fit():
169
- if data.static_features is None:
170
- raise ValueError(f"Provided {data_frame_name} must contain static_features")
171
- static_features = self.static_feature_pipeline.transform(data.static_features)
172
- data.static_features = self._convert_numerical_features_to_float(static_features)
173
- else:
174
- data.static_features = None
175
-
176
- return data
177
-
178
- def transform_future_known_covariates(
179
- self, known_covariates: Optional[TimeSeriesDataFrame]
180
- ) -> Optional[TimeSeriesDataFrame]:
181
- assert self._is_fit, f"{self.__class__.__name__} has not been fit yet"
182
- if len(self.known_covariates_names) > 0:
183
- assert known_covariates is not None, "known_covariates must be provided at prediction time"
184
- return self._check_and_prepare_covariates(
185
- known_covariates,
186
- required_column_names=self.known_covariates_names,
187
- data_frame_name="known_covariates",
188
- )
189
- else:
190
- return None
191
-
192
- def fit_transform(self, data: TimeSeriesDataFrame, data_frame_name: str = "data") -> TimeSeriesDataFrame:
193
- self.fit(data)
194
- return self.transform(data, data_frame_name=data_frame_name)