autogluon.timeseries 0.7.0b20230301__tar.gz → 0.7.0b20230303__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/PKG-INFO +1 -1
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/dataset/ts_dataframe.py +16 -10
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/autogluon_tabular/tabular_model.py +118 -87
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/PKG-INFO +1 -1
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/requires.txt +5 -5
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/setup.cfg +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/setup.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/configs/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/configs/presets_configs.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/dataset/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/evaluator.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/learner.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/abstract/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/abstract/abstract_timeseries_model.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/abstract/model_trial.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/autogluon_tabular/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/ensemble/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/ensemble/greedy_ensemble.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/abstract_gluonts.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/mx/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/mx/callback.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/mx/models.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/torch/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/gluonts/torch/models.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/local/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/local/abstract_local_model.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/local/naive.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/local/statsforecast.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/local/statsmodels.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/presets.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/sktime/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/sktime/abstract_sktime.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/models/sktime/models.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/predictor.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/splitter.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/trainer/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/trainer/abstract_trainer.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/trainer/auto_trainer.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/__init__.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/features.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/forecast.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/hashing.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/random.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/seasonality.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon/timeseries/utils/warning_filters.py +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/SOURCES.txt +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/dependency_links.txt +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/namespace_packages.txt +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/top_level.txt +0 -0
- {autogluon.timeseries-0.7.0b20230301 → autogluon.timeseries-0.7.0b20230303}/src/autogluon.timeseries.egg-info/zip-safe +0 -0
|
@@ -8,6 +8,7 @@ from typing import Any, Optional, Tuple, Type
|
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import pandas as pd
|
|
11
|
+
from joblib.parallel import Parallel, delayed
|
|
11
12
|
from pandas.core.internals import ArrayManager, BlockManager
|
|
12
13
|
|
|
13
14
|
from autogluon.common.loaders import load_pd
|
|
@@ -258,24 +259,27 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
258
259
|
raise ValueError(f"all entries in index `{ITEMID}` must be of integer or string dtype")
|
|
259
260
|
|
|
260
261
|
@classmethod
|
|
261
|
-
def _construct_pandas_frame_from_iterable_dataset(
|
|
262
|
-
cls
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
for i, ts in enumerate(iterable_dataset):
|
|
262
|
+
def _construct_pandas_frame_from_iterable_dataset(
|
|
263
|
+
cls, iterable_dataset: Iterable, num_cpus: int = -1
|
|
264
|
+
) -> pd.DataFrame:
|
|
265
|
+
def load_single_item(item_id: int, ts: dict) -> pd.DataFrame:
|
|
266
266
|
start_timestamp = ts["start"]
|
|
267
267
|
freq = start_timestamp.freq
|
|
268
268
|
if isinstance(start_timestamp, pd.Period):
|
|
269
269
|
start_timestamp = start_timestamp.to_timestamp(how="S")
|
|
270
270
|
target = ts["target"]
|
|
271
271
|
datetime_index = tuple(pd.date_range(start_timestamp, periods=len(target), freq=freq))
|
|
272
|
-
idx = pd.MultiIndex.from_product([(
|
|
273
|
-
|
|
274
|
-
|
|
272
|
+
idx = pd.MultiIndex.from_product([(item_id,), datetime_index], names=[ITEMID, TIMESTAMP])
|
|
273
|
+
return pd.Series(target, name="target", index=idx).to_frame()
|
|
274
|
+
|
|
275
|
+
cls._validate_iterable(iterable_dataset)
|
|
276
|
+
all_ts = Parallel(n_jobs=num_cpus)(
|
|
277
|
+
delayed(load_single_item)(item_id, ts) for item_id, ts in enumerate(iterable_dataset)
|
|
278
|
+
)
|
|
275
279
|
return pd.concat(all_ts)
|
|
276
280
|
|
|
277
281
|
@classmethod
|
|
278
|
-
def from_iterable_dataset(cls, iterable_dataset: Iterable) -> pd.DataFrame:
|
|
282
|
+
def from_iterable_dataset(cls, iterable_dataset: Iterable, num_cpus: int = -1) -> pd.DataFrame:
|
|
279
283
|
"""Construct a ``TimeSeriesDataFrame`` from an Iterable of dictionaries each of which
|
|
280
284
|
represent a single time series.
|
|
281
285
|
|
|
@@ -294,13 +298,15 @@ class TimeSeriesDataFrame(pd.DataFrame):
|
|
|
294
298
|
{"target": [3, 4, 5], "start": pd.Timestamp("01-01-2019", freq='D')},
|
|
295
299
|
{"target": [6, 7, 8], "start": pd.Timestamp("01-01-2019", freq='D')}
|
|
296
300
|
]
|
|
301
|
+
num_cpus : int, default = -1
|
|
302
|
+
Number of CPU cores used to process the iterable dataset in parallel. Set to -1 to use all cores.
|
|
297
303
|
|
|
298
304
|
Returns
|
|
299
305
|
-------
|
|
300
306
|
ts_df: TimeSeriesDataFrame
|
|
301
307
|
A data frame in TimeSeriesDataFrame format.
|
|
302
308
|
"""
|
|
303
|
-
return cls(cls._construct_pandas_frame_from_iterable_dataset(iterable_dataset))
|
|
309
|
+
return cls(cls._construct_pandas_frame_from_iterable_dataset(iterable_dataset, num_cpus=num_cpus))
|
|
304
310
|
|
|
305
311
|
@classmethod
|
|
306
312
|
def _load_data_frame_from_file(cls, path: str) -> pd.DataFrame:
|
|
@@ -6,10 +6,10 @@ from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
9
|
-
import scipy.stats
|
|
10
9
|
|
|
11
10
|
# TODO: Drop GluonTS dependency
|
|
12
11
|
from gluonts.time_feature import get_lags_for_frequency, time_features_from_frequency_str
|
|
12
|
+
from joblib.parallel import Parallel, delayed
|
|
13
13
|
|
|
14
14
|
import autogluon.core as ag
|
|
15
15
|
from autogluon.tabular import TabularPredictor
|
|
@@ -38,7 +38,7 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
|
|
|
38
38
|
max_train_size : int, default = 1_000_000
|
|
39
39
|
Maximum number of rows in the training and validation sets. If the number of rows in train or validation data
|
|
40
40
|
exceeds ``max_train_size``, then ``max_train_size`` many rows are subsampled from the dataframe.
|
|
41
|
-
|
|
41
|
+
tabular_hyperparameters : Dict[Dict[str, Any]], optional
|
|
42
42
|
Hyperparameters dictionary passed to `TabularPredictor.fit`. Contains the names of models that should be fit.
|
|
43
43
|
Defaults to ``{"XGB": {}, "CAT": {}, "GBM" :{}}``.
|
|
44
44
|
"""
|
|
@@ -97,7 +97,7 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
|
|
|
97
97
|
def _get_features_dataframe(
|
|
98
98
|
self,
|
|
99
99
|
data: TimeSeriesDataFrame,
|
|
100
|
-
|
|
100
|
+
max_rows_per_item: Optional[int] = None,
|
|
101
101
|
) -> pd.DataFrame:
|
|
102
102
|
"""Generate a feature matrix used by TabularPredictor.
|
|
103
103
|
|
|
@@ -105,56 +105,32 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
|
|
|
105
105
|
----------
|
|
106
106
|
data : TimeSeriesDataFrame
|
|
107
107
|
Dataframe containing features derived from time index & past time series values, as well as the target.
|
|
108
|
-
|
|
109
|
-
If
|
|
108
|
+
max_rows_per_item: int, optional
|
|
109
|
+
If given, features will be generated only for the last `max_rows_per_item` timesteps of each time series.
|
|
110
110
|
"""
|
|
111
|
-
# TODO: Rethink the featurization process for time series based on SotA tree-based models (scaling, rolling feautres)
|
|
112
|
-
# TODO: More efficient featurization with tsfresh? (currently sequential over time series => slow)
|
|
113
111
|
|
|
114
|
-
def
|
|
115
|
-
"""Construct a dataframe consisting of shifted copies of the original df.
|
|
116
|
-
|
|
117
|
-
Parameters
|
|
118
|
-
----------
|
|
119
|
-
df
|
|
120
|
-
Original dataframe, shape [N, D]
|
|
121
|
-
lag_indices
|
|
122
|
-
List of lag features to compute.
|
|
123
|
-
|
|
124
|
-
Returns
|
|
125
|
-
-------
|
|
126
|
-
lag_df
|
|
127
|
-
Dataframe with lag features, shape [N, D * len(lag_indices)]
|
|
128
|
-
"""
|
|
129
|
-
shifted = [df.shift(idx).add_suffix(f"_lag_{idx}") for idx in lag_indices]
|
|
130
|
-
return pd.concat(shifted, axis=1)
|
|
131
|
-
|
|
132
|
-
def apply_mask(
|
|
133
|
-
df: pd.DataFrame, num_hidden: np.ndarray, lag_indices: np.ndarray, num_columns: int = 1
|
|
134
|
-
) -> pd.DataFrame:
|
|
112
|
+
def apply_mask(array: np.ndarray, num_hidden: np.ndarray, lag_indices: np.ndarray) -> pd.DataFrame:
|
|
135
113
|
"""Apply a mask that mimics the situation at prediction time when target/covariates are unknown during the
|
|
136
114
|
forecast horizon.
|
|
137
115
|
|
|
138
116
|
Parameters
|
|
139
117
|
----------
|
|
140
|
-
|
|
141
|
-
|
|
118
|
+
array
|
|
119
|
+
Array to mask, shape [N, len(lag_indices)]
|
|
142
120
|
num_hidden
|
|
143
121
|
Number of entries hidden in each row, shape [N]
|
|
144
122
|
lag_indices
|
|
145
123
|
Lag indices used to construct the dataframe
|
|
146
|
-
num_columns
|
|
147
|
-
D - number of columns in the original dataframe, before lag features were constructed
|
|
148
124
|
|
|
149
125
|
Returns
|
|
150
126
|
-------
|
|
151
|
-
|
|
152
|
-
|
|
127
|
+
masked_array
|
|
128
|
+
Array with the masking applied, shape [N, D * len(lag_indices)]
|
|
153
129
|
|
|
154
130
|
|
|
155
131
|
For example, given the following inputs
|
|
156
132
|
|
|
157
|
-
|
|
133
|
+
array = [
|
|
158
134
|
[1, 1, 1, 1],
|
|
159
135
|
[1, 1, 1, 1],
|
|
160
136
|
[1, 1, 1, 1],
|
|
@@ -163,78 +139,133 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
|
|
|
163
139
|
lag_indices = [1, 2, 5, 10]
|
|
164
140
|
num_columns = 1
|
|
165
141
|
|
|
166
|
-
The resulting masked
|
|
142
|
+
The resulting masked output will be
|
|
167
143
|
|
|
168
|
-
|
|
144
|
+
masked_array = [
|
|
169
145
|
[NaN, NaN, NaN, 1],
|
|
170
146
|
[1, 1, 1, 1],
|
|
171
147
|
[NaN, 1, 1, 1],
|
|
172
148
|
]
|
|
173
149
|
|
|
174
150
|
"""
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
151
|
+
mask = num_hidden[:, None] >= lag_indices[None] # shape [len(num_hidden), len(lag_indices)]
|
|
152
|
+
array[mask] = np.nan
|
|
153
|
+
return array
|
|
154
|
+
|
|
155
|
+
def get_lags(
|
|
156
|
+
ts: np.ndarray,
|
|
157
|
+
lag_indices: np.ndarray,
|
|
158
|
+
prediction_length: int,
|
|
159
|
+
max_rows_per_item: int = 100_000,
|
|
160
|
+
mask: bool = False,
|
|
161
|
+
) -> np.ndarray:
|
|
162
|
+
"""Generate the matrix of lag features for a single time series.
|
|
183
163
|
|
|
184
164
|
Parameters
|
|
185
165
|
----------
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
166
|
+
ts
|
|
167
|
+
Array with target or covariate values, shape [N]
|
|
168
|
+
lag_indices
|
|
169
|
+
Array with the lag indices to use for feature generation.
|
|
170
|
+
prediction_length
|
|
171
|
+
Length of the forecast horizon.
|
|
172
|
+
max_rows_per_item
|
|
173
|
+
Maximum number of rows to include in the feature matrix.
|
|
174
|
+
If max_rows_per_item < len(ts), the lag features will be generated only
|
|
175
|
+
for the *last* max_rows_per_item entries of ts.
|
|
176
|
+
mask
|
|
177
|
+
If True, a mask will be applied to some entries of the feature matrix,
|
|
178
|
+
mimicking the behavior at prediction time, when the ts values are not
|
|
179
|
+
known during the forecast horizon.
|
|
189
180
|
|
|
190
181
|
Returns
|
|
191
182
|
-------
|
|
192
183
|
features
|
|
193
|
-
|
|
184
|
+
Array with lag features, shape [min(N, max_rows_per_item), len(lag_indices)]
|
|
194
185
|
"""
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
time_series[self.metadata.past_covariates_real], self._past_covariates_lag_indices
|
|
209
|
-
)
|
|
210
|
-
past_covariates_lags = apply_mask(
|
|
211
|
-
past_covariates_lags,
|
|
212
|
-
num_hidden=num_hidden,
|
|
213
|
-
lag_indices=self._past_covariates_lag_indices,
|
|
214
|
-
num_columns=len(self.metadata.past_covariates_real),
|
|
215
|
-
)
|
|
216
|
-
feature_dfs.append(past_covariates_lags)
|
|
186
|
+
num_rows = min(max_rows_per_item, len(ts))
|
|
187
|
+
features = np.full([num_rows, len(lag_indices)], fill_value=np.nan)
|
|
188
|
+
for i in range(1, num_rows + 1):
|
|
189
|
+
target_idx = len(ts) - i
|
|
190
|
+
selected_lags = lag_indices[lag_indices <= target_idx]
|
|
191
|
+
features[num_rows - i, np.arange(len(selected_lags))] = ts[target_idx - selected_lags]
|
|
192
|
+
if mask:
|
|
193
|
+
num_windows = (len(ts) - 1) // prediction_length
|
|
194
|
+
# We don't hide any past values for the first `remainder` values, otherwise the features will be all empty
|
|
195
|
+
remainder = len(ts) - num_windows * prediction_length
|
|
196
|
+
num_hidden = np.concatenate([np.zeros(remainder), np.tile(np.arange(prediction_length), num_windows)])
|
|
197
|
+
features = apply_mask(features, num_hidden[-num_rows:], lag_indices)
|
|
198
|
+
return features
|
|
217
199
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
200
|
+
def get_lag_features(
|
|
201
|
+
all_series: List[np.ndarray],
|
|
202
|
+
lag_indices: np.ndarray,
|
|
203
|
+
prediction_length: int,
|
|
204
|
+
max_rows_per_item: int,
|
|
205
|
+
mask: bool,
|
|
206
|
+
name: str,
|
|
207
|
+
):
|
|
208
|
+
"""Generate lag features for all time series in the dataset.
|
|
209
|
+
|
|
210
|
+
See the docstring of get_lags for the description of the parameters.
|
|
211
|
+
"""
|
|
212
|
+
# TODO: Expose n_jobs to the user as a hyperparameter
|
|
213
|
+
lags_per_item = Parallel(n_jobs=-1)(
|
|
214
|
+
delayed(get_lags)(
|
|
215
|
+
ts,
|
|
216
|
+
lag_indices,
|
|
217
|
+
prediction_length=prediction_length,
|
|
218
|
+
max_rows_per_item=max_rows_per_item,
|
|
219
|
+
mask=mask,
|
|
221
220
|
)
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
features =
|
|
225
|
-
return features
|
|
221
|
+
for ts in all_series
|
|
222
|
+
)
|
|
223
|
+
features = np.concatenate(lags_per_item)
|
|
224
|
+
return pd.DataFrame(features, columns=[f"{name}_lag_{idx}" for idx in lag_indices])
|
|
225
|
+
|
|
226
|
+
df = pd.DataFrame(data)
|
|
227
|
+
all_series = [ts for _, ts in df.droplevel(TIMESTAMP).groupby(level=ITEMID, sort=False)]
|
|
228
|
+
if max_rows_per_item is None:
|
|
229
|
+
max_rows_per_item = data.num_timesteps_per_item().max()
|
|
230
|
+
|
|
231
|
+
feature_dfs = []
|
|
232
|
+
for column_name in df.columns:
|
|
233
|
+
if column_name == self.target:
|
|
234
|
+
mask = True
|
|
235
|
+
lag_indices = self._target_lag_indices
|
|
236
|
+
elif column_name in self.metadata.past_covariates_real:
|
|
237
|
+
mask = True
|
|
238
|
+
lag_indices = self._past_covariates_lag_indices
|
|
239
|
+
elif column_name in self.metadata.known_covariates_real:
|
|
240
|
+
mask = False
|
|
241
|
+
lag_indices = self._known_covariates_lag_indices
|
|
242
|
+
else:
|
|
243
|
+
raise ValueError(f"Unexpected column {column_name} is not among target or covariates.")
|
|
244
|
+
|
|
245
|
+
feature_dfs.append(
|
|
246
|
+
get_lag_features(
|
|
247
|
+
[ts[column_name].to_numpy() for ts in all_series],
|
|
248
|
+
lag_indices=lag_indices,
|
|
249
|
+
prediction_length=self.prediction_length,
|
|
250
|
+
max_rows_per_item=max_rows_per_item,
|
|
251
|
+
mask=mask,
|
|
252
|
+
name=column_name,
|
|
253
|
+
)
|
|
254
|
+
)
|
|
226
255
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
256
|
+
# Only the last max_rows_per_item entries for each item will be included in the feature matrix
|
|
257
|
+
target_with_index = df[self.target].groupby(level=ITEMID, sort=False).tail(max_rows_per_item)
|
|
258
|
+
feature_dfs.append(target_with_index.reset_index(drop=True))
|
|
230
259
|
|
|
231
|
-
|
|
232
|
-
|
|
260
|
+
timestamps = target_with_index.index.get_level_values(level=TIMESTAMP)
|
|
261
|
+
feature_dfs.append(
|
|
262
|
+
pd.DataFrame({time_feat.__name__: time_feat(timestamps) for time_feat in self._time_features})
|
|
263
|
+
)
|
|
233
264
|
|
|
234
|
-
|
|
235
|
-
features = features.groupby(level=ITEMID, sort=False, group_keys=False).tail(last_k_values)
|
|
265
|
+
features = pd.concat(feature_dfs, axis=1)
|
|
236
266
|
|
|
237
267
|
if data.static_features is not None:
|
|
268
|
+
features.index = target_with_index.index.get_level_values(level=ITEMID)
|
|
238
269
|
features = pd.merge(features, data.static_features, how="left", on=ITEMID, suffixes=(None, "_static_feat"))
|
|
239
270
|
|
|
240
271
|
features.reset_index(inplace=True, drop=True)
|
|
@@ -277,7 +308,7 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
|
|
|
277
308
|
f"train_data and val_data must have the same freq (received {train_data.freq} and {val_data.freq})"
|
|
278
309
|
)
|
|
279
310
|
val_data, _ = self._normalize_targets(val_data)
|
|
280
|
-
val_df = self._get_features_dataframe(val_data,
|
|
311
|
+
val_df = self._get_features_dataframe(val_data, max_rows_per_item=self.prediction_length)
|
|
281
312
|
val_df = val_df[self._available_features]
|
|
282
313
|
|
|
283
314
|
if len(val_df) > max_train_size:
|
|
@@ -331,7 +362,7 @@ class AutoGluonTabularModel(AbstractTimeSeriesModel):
|
|
|
331
362
|
|
|
332
363
|
data, scale_per_item = self._normalize_targets(data)
|
|
333
364
|
data_extended = self._extend_index(data)
|
|
334
|
-
features = self._get_features_dataframe(data_extended,
|
|
365
|
+
features = self._get_features_dataframe(data_extended, max_rows_per_item=self.prediction_length)
|
|
335
366
|
features = features[self._available_features]
|
|
336
367
|
|
|
337
368
|
# Predict for batches (instead of using full dataset) to avoid high memory usage
|
|
@@ -10,14 +10,14 @@ networkx<3.0,>=2.3
|
|
|
10
10
|
statsforecast<1.5,>=1.4.0
|
|
11
11
|
tqdm<5,>=4.38
|
|
12
12
|
ujson<6,>=5
|
|
13
|
-
autogluon.core[raytune]==0.7.
|
|
14
|
-
autogluon.common==0.7.
|
|
15
|
-
autogluon.tabular[catboost,lightgbm,xgboost]==0.7.
|
|
13
|
+
autogluon.core[raytune]==0.7.0b20230303
|
|
14
|
+
autogluon.common==0.7.0b20230303
|
|
15
|
+
autogluon.tabular[catboost,lightgbm,xgboost]==0.7.0b20230303
|
|
16
16
|
|
|
17
17
|
[all]
|
|
18
|
-
pmdarima<1.9,>=1.8.2
|
|
19
|
-
tbats<2,>=1.1
|
|
20
18
|
sktime<0.16,>=0.14
|
|
19
|
+
tbats<2,>=1.1
|
|
20
|
+
pmdarima<1.9,>=1.8.2
|
|
21
21
|
|
|
22
22
|
[sktime]
|
|
23
23
|
sktime<0.16,>=0.14
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|