autogluon.timeseries 0.7.0b20230301__tar.gz → 0.7.0b20230303__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (56) hide show
  1. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/PKG-INFO +1 -1
  2. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/dataset/ts_dataframe.py +16 -10
  3. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/autogluon_tabular/tabular_model.py +118 -87
  4. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/version.py +1 -1
  5. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/PKG-INFO +1 -1
  6. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/requires.txt +5 -5
  7. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/setup.cfg +0 -0
  8. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/setup.py +0 -0
  9. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/__init__.py +0 -0
  10. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/__init__.py +0 -0
  11. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/configs/__init__.py +0 -0
  12. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/configs/presets_configs.py +0 -0
  13. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/dataset/__init__.py +0 -0
  14. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/evaluator.py +0 -0
  15. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/learner.py +0 -0
  16. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/__init__.py +0 -0
  17. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/abstract/__init__.py +0 -0
  18. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py +0 -0
  19. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/abstract/model_trial.py +0 -0
  20. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +0 -0
  21. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/ensemble/__init__.py +0 -0
  22. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -0
  23. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -0
  24. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/__init__.py +0 -0
  25. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -0
  26. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/mx/__init__.py +0 -0
  27. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/mx/callback.py +0 -0
  28. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/mx/models.py +0 -0
  29. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
  30. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/torch/models.py +0 -0
  31. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/local/__init__.py +0 -0
  32. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/local/abstract_local_model.py +0 -0
  33. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/local/naive.py +0 -0
  34. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/local/statsforecast.py +0 -0
  35. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/local/statsmodels.py +0 -0
  36. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/presets.py +0 -0
  37. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/sktime/__init__.py +0 -0
  38. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/sktime/abstract_sktime.py +0 -0
  39. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/sktime/models.py +0 -0
  40. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/predictor.py +0 -0
  41. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/splitter.py +0 -0
  42. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/trainer/__init__.py +0 -0
  43. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/trainer/abstract_trainer.py +0 -0
  44. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/trainer/auto_trainer.py +0 -0
  45. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/__init__.py +0 -0
  46. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/features.py +0 -0
  47. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/forecast.py +0 -0
  48. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/hashing.py +0 -0
  49. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/random.py +0 -0
  50. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/seasonality.py +0 -0
  51. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/warning_filters.py +0 -0
  52. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/SOURCES.txt +0 -0
  53. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/dependency_links.txt +0 -0
  54. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/namespace_packages.txt +0 -0
  55. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/top_level.txt +0 -0
  56. {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/zip-safe +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 0.7.0b20230301
3
+ Version: 0.7.0b20230303
4
4
  Summary: AutoML for Image, Text, and Tabular Data
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -8,6 +8,7 @@ from typing import Any, Optional, Tuple, Type
8
8
 
9
9
  import numpy as np
10
10
  import pandas as pd
11
+ from joblib.parallel import Parallel, delayed
11
12
  from pandas.core.internals import ArrayManager, BlockManager
12
13
 
13
14
  from autogluon.common.loaders import load_pd
@@ -258,24 +259,27 @@ class TimeSeriesDataFrame(pd.DataFrame):
258
259
  raise ValueError(f"all entries in index `{ITEMID}` must be of integer or string dtype")
259
260
 
260
261
  @classmethod
261
- def _construct_pandas_frame_from_iterable_dataset(cls, iterable_dataset: Iterable) -> pd.DataFrame:
262
- cls._validate_iterable(iterable_dataset)
263
-
264
- all_ts = []
265
- for i, ts in enumerate(iterable_dataset):
262
+ def _construct_pandas_frame_from_iterable_dataset(
263
+ cls, iterable_dataset: Iterable, num_cpus: int = -1
264
+ ) -> pd.DataFrame:
265
+ def load_single_item(item_id: int, ts: dict) -> pd.DataFrame:
266
266
  start_timestamp = ts["start"]
267
267
  freq = start_timestamp.freq
268
268
  if isinstance(start_timestamp, pd.Period):
269
269
  start_timestamp = start_timestamp.to_timestamp(how="S")
270
270
  target = ts["target"]
271
271
  datetime_index = tuple(pd.date_range(start_timestamp, periods=len(target), freq=freq))
272
- idx = pd.MultiIndex.from_product([(i,), datetime_index], names=[ITEMID, TIMESTAMP])
273
- ts_df = pd.Series(target, name="target", index=idx).to_frame()
274
- all_ts.append(ts_df)
272
+ idx = pd.MultiIndex.from_product([(item_id,), datetime_index], names=[ITEMID, TIMESTAMP])
273
+ return pd.Series(target, name="target", index=idx).to_frame()
274
+
275
+ cls._validate_iterable(iterable_dataset)
276
+ all_ts = Parallel(n_jobs=num_cpus)(
277
+ delayed(load_single_item)(item_id, ts) for item_id, ts in enumerate(iterable_dataset)
278
+ )
275
279
  return pd.concat(all_ts)
276
280
 
277
281
  @classmethod
278
- def from_iterable_dataset(cls, iterable_dataset: Iterable) -> pd.DataFrame:
282
+ def from_iterable_dataset(cls, iterable_dataset: Iterable, num_cpus: int = -1) -> pd.DataFrame:
279
283
  """Construct a ``TimeSeriesDataFrame`` from an Iterable of dictionaries each of which
280
284
  represent a single time series.
281
285
 
@@ -294,13 +298,15 @@ class TimeSeriesDataFrame(pd.DataFrame):
294
298
  {"target": [3, 4, 5], "start": pd.Timestamp("01-01-2019", freq='D')},
295
299
  {"target": [6, 7, 8], "start": pd.Timestamp("01-01-2019", freq='D')}
296
300
  ]
301
+ num_cpus : int, default = -1
302
+ Number of CPU cores used to process the iterable dataset in parallel. Set to -1 to use all cores.
297
303
 
298
304
  Returns
299
305
  -------
300
306
  ts_df: TimeSeriesDataFrame
301
307
  A data frame in TimeSeriesDataFrame format.
302
308
  """
303
- return cls(cls._construct_pandas_frame_from_iterable_dataset(iterable_dataset))
309
+ return cls(cls._construct_pandas_frame_from_iterable_dataset(iterable_dataset, num_cpus=num_cpus))
304
310
 
305
311
  @classmethod
306
312
  def _load_data_frame_from_file(cls, path: str) -> pd.DataFrame:
@@ -6,10 +6,10 @@ from typing import Any, Callable, Dict, List, Optional, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
9
- import scipy.stats
10
9
 
11
10
  # TODO: Drop GluonTS dependency
12
11
  from gluonts.time_feature import get_lags_for_frequency, time_features_from_frequency_str
12
+ from joblib.parallel import Parallel, delayed
13
13
 
14
14
  import autogluon.core as ag
15
15
  from autogluon.tabular import TabularPredictor
@@ -38,7 +38,7 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
38
38
  max_train_size : int, default = 1_000_000
39
39
  Maximum number of rows in the training and validation sets. If the number of rows in train or validation data
40
40
  exceeds ``max_train_size``, then ``max_train_size`` many rows are subsampled from the dataframe.
41
- tabular_hyperparmeters : Dict[Dict[str, Any]], optional
41
+ tabular_hyperparameters : Dict[Dict[str, Any]], optional
42
42
  Hyperparameters dictionary passed to `TabularPredictor.fit`. Contains the names of models that should be fit.
43
43
  Defaults to ``{"XGB": {}, "CAT": {}, "GBM" :{}}``.
44
44
  """
@@ -97,7 +97,7 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
97
97
  def _get_features_dataframe(
98
98
  self,
99
99
  data: TimeSeriesDataFrame,
100
- last_k_values: Optional[int] = None,
100
+ max_rows_per_item: Optional[int] = None,
101
101
  ) -> pd.DataFrame:
102
102
  """Generate a feature matrix used by TabularPredictor.
103
103
 
@@ -105,56 +105,32 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
105
105
  ----------
106
106
  data : TimeSeriesDataFrame
107
107
  Dataframe containing features derived from time index & past time series values, as well as the target.
108
- last_k_values: int, optional
109
- If provided, features will be generated only for the last `last_k_values` timesteps of each time series.
108
+ max_rows_per_item: int, optional
109
+ If given, features will be generated only for the last `max_rows_per_item` timesteps of each time series.
110
110
  """
111
- # TODO: Rethink the featurization process for time series based on SotA tree-based models (scaling, rolling feautres)
112
- # TODO: More efficient featurization with tsfresh? (currently sequential over time series => slow)
113
111
 
114
- def get_lags(df: pd.DataFrame, lag_indices: List[int]) -> pd.DataFrame:
115
- """Construct a dataframe consisting of shifted copies of the original df.
116
-
117
- Parameters
118
- ----------
119
- df
120
- Original dataframe, shape [N, D]
121
- lag_indices
122
- List of lag features to compute.
123
-
124
- Returns
125
- -------
126
- lag_df
127
- Dataframe with lag features, shape [N, D * len(lag_indices)]
128
- """
129
- shifted = [df.shift(idx).add_suffix(f"_lag_{idx}") for idx in lag_indices]
130
- return pd.concat(shifted, axis=1)
131
-
132
- def apply_mask(
133
- df: pd.DataFrame, num_hidden: np.ndarray, lag_indices: np.ndarray, num_columns: int = 1
134
- ) -> pd.DataFrame:
112
+ def apply_mask(array: np.ndarray, num_hidden: np.ndarray, lag_indices: np.ndarray) -> pd.DataFrame:
135
113
  """Apply a mask that mimics the situation at prediction time when target/covariates are unknown during the
136
114
  forecast horizon.
137
115
 
138
116
  Parameters
139
117
  ----------
140
- df
141
- Dataframe to mask, shape [N, D * len(lag_indices)]
118
+ array
119
+ Array to mask, shape [N, len(lag_indices)]
142
120
  num_hidden
143
121
  Number of entries hidden in each row, shape [N]
144
122
  lag_indices
145
123
  Lag indices used to construct the dataframe
146
- num_columns
147
- D - number of columns in the original dataframe, before lag features were constructed
148
124
 
149
125
  Returns
150
126
  -------
151
- masked_df
152
- Dataframe with the masking applied, shape [N, D * len(lag_indices)]
127
+ masked_array
128
+ Array with the masking applied, shape [N, D * len(lag_indices)]
153
129
 
154
130
 
155
131
  For example, given the following inputs
156
132
 
157
- df = [
133
+ array = [
158
134
  [1, 1, 1, 1],
159
135
  [1, 1, 1, 1],
160
136
  [1, 1, 1, 1],
@@ -163,78 +139,133 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
163
139
  lag_indices = [1, 2, 5, 10]
164
140
  num_columns = 1
165
141
 
166
- The resulting masked dataframe will be
142
+ The resulting masked output will be
167
143
 
168
- masked_df = [
144
+ masked_array = [
169
145
  [NaN, NaN, NaN, 1],
170
146
  [1, 1, 1, 1],
171
147
  [NaN, 1, 1, 1],
172
148
  ]
173
149
 
174
150
  """
175
- if num_columns > 1:
176
- lag_indices = np.repeat(lag_indices, num_columns)
177
- mask = num_hidden[:, None] >= lag_indices[None] # shape [len(num_hidden), len(lag_indices) * num_columns]
178
- df[mask] = np.nan
179
- return df
180
-
181
- def get_lag_features_and_target(time_series: pd.DataFrame) -> pd.DataFrame:
182
- """Construct the dataframe with lagged features and prediction target for a single time series.
151
+ mask = num_hidden[:, None] >= lag_indices[None] # shape [len(num_hidden), len(lag_indices)]
152
+ array[mask] = np.nan
153
+ return array
154
+
155
+ def get_lags(
156
+ ts: np.ndarray,
157
+ lag_indices: np.ndarray,
158
+ prediction_length: int,
159
+ max_rows_per_item: int = 100_000,
160
+ mask: bool = False,
161
+ ) -> np.ndarray:
162
+ """Generate the matrix of lag features for a single time series.
183
163
 
184
164
  Parameters
185
165
  ----------
186
- time_series
187
- Dataframe containing a single time series, including target & past/known covariates.
188
- Such time series can equivalently be obtained with data.loc[item_id] for some item_id in the dataset.
166
+ ts
167
+ Array with target or covariate values, shape [N]
168
+ lag_indices
169
+ Array with the lag indices to use for feature generation.
170
+ prediction_length
171
+ Length of the forecast horizon.
172
+ max_rows_per_item
173
+ Maximum number of rows to include in the feature matrix.
174
+ If max_rows_per_item < len(ts), the lag features will be generated only
175
+ for the *last* max_rows_per_item entries of ts.
176
+ mask
177
+ If True, a mask will be applied to some entries of the feature matrix,
178
+ mimicking the behavior at prediction time, when the ts values are not
179
+ known during the forecast horizon.
189
180
 
190
181
  Returns
191
182
  -------
192
183
  features
193
- Feature dataframe, where each row corresponds to a single timestep of the input time series.
184
+ Array with lag features, shape [min(N, max_rows_per_item), len(lag_indices)]
194
185
  """
195
- target_lags = get_lags(time_series[[self.target]], self._target_lag_indices)
196
-
197
- # Starting from the end of the time series, mask the values as if the last `prediction_length` steps weren't observed
198
- # This mimics what will happen at test time, when we simultaneously predict the next `prediction_length` values
199
- num_windows = (len(time_series) - 1) // self.prediction_length
200
- # We don't hide any past values for the first `remainder` values, otherwise the features will be all empty
201
- remainder = len(time_series) - num_windows * self.prediction_length
202
- num_hidden = np.concatenate([np.zeros(remainder), np.tile(np.arange(self.prediction_length), num_windows)])
203
- target_lags = apply_mask(target_lags, num_hidden=num_hidden, lag_indices=self._target_lag_indices)
204
- feature_dfs = [target_lags, time_series[[self.target]]]
205
-
206
- if self.metadata.past_covariates_real:
207
- past_covariates_lags = get_lags(
208
- time_series[self.metadata.past_covariates_real], self._past_covariates_lag_indices
209
- )
210
- past_covariates_lags = apply_mask(
211
- past_covariates_lags,
212
- num_hidden=num_hidden,
213
- lag_indices=self._past_covariates_lag_indices,
214
- num_columns=len(self.metadata.past_covariates_real),
215
- )
216
- feature_dfs.append(past_covariates_lags)
186
+ num_rows = min(max_rows_per_item, len(ts))
187
+ features = np.full([num_rows, len(lag_indices)], fill_value=np.nan)
188
+ for i in range(1, num_rows + 1):
189
+ target_idx = len(ts) - i
190
+ selected_lags = lag_indices[lag_indices <= target_idx]
191
+ features[num_rows - i, np.arange(len(selected_lags))] = ts[target_idx - selected_lags]
192
+ if mask:
193
+ num_windows = (len(ts) - 1) // prediction_length
194
+ # We don't hide any past values for the first `remainder` values, otherwise the features will be all empty
195
+ remainder = len(ts) - num_windows * prediction_length
196
+ num_hidden = np.concatenate([np.zeros(remainder), np.tile(np.arange(prediction_length), num_windows)])
197
+ features = apply_mask(features, num_hidden[-num_rows:], lag_indices)
198
+ return features
217
199
 
218
- if self.metadata.known_covariates_real:
219
- known_covariates_lags = get_lags(
220
- time_series[self.metadata.known_covariates_real], self._known_covariates_lag_indices
200
+ def get_lag_features(
201
+ all_series: List[np.ndarray],
202
+ lag_indices: np.ndarray,
203
+ prediction_length: int,
204
+ max_rows_per_item: int,
205
+ mask: bool,
206
+ name: str,
207
+ ):
208
+ """Generate lag features for all time series in the dataset.
209
+
210
+ See the docstring of get_lags for the description of the parameters.
211
+ """
212
+ # TODO: Expose n_jobs to the user as a hyperparameter
213
+ lags_per_item = Parallel(n_jobs=-1)(
214
+ delayed(get_lags)(
215
+ ts,
216
+ lag_indices,
217
+ prediction_length=prediction_length,
218
+ max_rows_per_item=max_rows_per_item,
219
+ mask=mask,
221
220
  )
222
- feature_dfs.append(known_covariates_lags)
223
-
224
- features = pd.concat(feature_dfs, axis=1)
225
- return features
221
+ for ts in all_series
222
+ )
223
+ features = np.concatenate(lags_per_item)
224
+ return pd.DataFrame(features, columns=[f"{name}_lag_{idx}" for idx in lag_indices])
225
+
226
+ df = pd.DataFrame(data)
227
+ all_series = [ts for _, ts in df.droplevel(TIMESTAMP).groupby(level=ITEMID, sort=False)]
228
+ if max_rows_per_item is None:
229
+ max_rows_per_item = data.num_timesteps_per_item().max()
230
+
231
+ feature_dfs = []
232
+ for column_name in df.columns:
233
+ if column_name == self.target:
234
+ mask = True
235
+ lag_indices = self._target_lag_indices
236
+ elif column_name in self.metadata.past_covariates_real:
237
+ mask = True
238
+ lag_indices = self._past_covariates_lag_indices
239
+ elif column_name in self.metadata.known_covariates_real:
240
+ mask = False
241
+ lag_indices = self._known_covariates_lag_indices
242
+ else:
243
+ raise ValueError(f"Unexpected column {column_name} is not among target or covariates.")
244
+
245
+ feature_dfs.append(
246
+ get_lag_features(
247
+ [ts[column_name].to_numpy() for ts in all_series],
248
+ lag_indices=lag_indices,
249
+ prediction_length=self.prediction_length,
250
+ max_rows_per_item=max_rows_per_item,
251
+ mask=mask,
252
+ name=column_name,
253
+ )
254
+ )
226
255
 
227
- df = pd.DataFrame(data).reset_index(level=TIMESTAMP)
228
- timestamps = pd.DatetimeIndex(df.pop(TIMESTAMP))
229
- features = df.groupby(level=ITEMID, sort=False, group_keys=False).apply(get_lag_features_and_target)
256
+ # Only the last max_rows_per_item entries for each item will be included in the feature matrix
257
+ target_with_index = df[self.target].groupby(level=ITEMID, sort=False).tail(max_rows_per_item)
258
+ feature_dfs.append(target_with_index.reset_index(drop=True))
230
259
 
231
- for time_feat in self._time_features:
232
- features[time_feat.__name__] = time_feat(timestamps)
260
+ timestamps = target_with_index.index.get_level_values(level=TIMESTAMP)
261
+ feature_dfs.append(
262
+ pd.DataFrame({time_feat.__name__: time_feat(timestamps) for time_feat in self._time_features})
263
+ )
233
264
 
234
- if last_k_values is not None:
235
- features = features.groupby(level=ITEMID, sort=False, group_keys=False).tail(last_k_values)
265
+ features = pd.concat(feature_dfs, axis=1)
236
266
 
237
267
  if data.static_features is not None:
268
+ features.index = target_with_index.index.get_level_values(level=ITEMID)
238
269
  features = pd.merge(features, data.static_features, how="left", on=ITEMID, suffixes=(None, "_static_feat"))
239
270
 
240
271
  features.reset_index(inplace=True, drop=True)
@@ -277,7 +308,7 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
277
308
  f"train_data and val_data must have the same freq (received {train_data.freq} and {val_data.freq})"
278
309
  )
279
310
  val_data, _ = self._normalize_targets(val_data)
280
- val_df = self._get_features_dataframe(val_data, last_k_values=self.prediction_length)
311
+ val_df = self._get_features_dataframe(val_data, max_rows_per_item=self.prediction_length)
281
312
  val_df = val_df[self._available_features]
282
313
 
283
314
  if len(val_df) > max_train_size:
@@ -331,7 +362,7 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
331
362
 
332
363
  data, scale_per_item = self._normalize_targets(data)
333
364
  data_extended = self._extend_index(data)
334
- features = self._get_features_dataframe(data_extended, last_k_values=self.prediction_length)
365
+ features = self._get_features_dataframe(data_extended, max_rows_per_item=self.prediction_length)
335
366
  features = features[self._available_features]
336
367
 
337
368
  # Predict for batches (instead of using full dataset) to avoid high memory usage
@@ -1,3 +1,3 @@
1
1
  """This is the autogluon version file."""
2
- __version__ = '0.7.0b20230301'
2
+ __version__ = '0.7.0b20230303'
3
3
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 0.7.0b20230301
3
+ Version: 0.7.0b20230303
4
4
  Summary: AutoML for Image, Text, and Tabular Data
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -10,14 +10,14 @@ networkx<3.0,>=2.3
10
10
  statsforecast<1.5,>=1.4.0
11
11
  tqdm<5,>=4.38
12
12
  ujson<6,>=5
13
- autogluon.core[raytune]==0.7.0b20230301
14
- autogluon.common==0.7.0b20230301
15
- autogluon.tabular[catboost,lightgbm,xgboost]==0.7.0b20230301
13
+ autogluon.core[raytune]==0.7.0b20230303
14
+ autogluon.common==0.7.0b20230303
15
+ autogluon.tabular[catboost,lightgbm,xgboost]==0.7.0b20230303
16
16
 
17
17
  [all]
18
- pmdarima<1.9,>=1.8.2
19
- tbats<2,>=1.1
20
18
  sktime<0.16,>=0.14
19
+ tbats<2,>=1.1
20
+ pmdarima<1.9,>=1.8.2
21
21
 
22
22
  [sktime]
23
23
  sktime<0.16,>=0.14