autogluon.timeseries 1.3.2b20250709__py3-none-any.whl → 1.3.2b20250711__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/timeseries/models/__init__.py +2 -1
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py +1 -1
- autogluon/timeseries/models/autogluon_tabular/__init__.py +2 -0
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +24 -16
- autogluon/timeseries/models/autogluon_tabular/per_step.py +453 -0
- autogluon/timeseries/models/chronos/model.py +1 -15
- autogluon/timeseries/models/chronos/pipeline/chronos.py +1 -42
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -3
- autogluon/timeseries/models/local/abstract_local_model.py +3 -3
- autogluon/timeseries/models/local/naive.py +4 -4
- autogluon/timeseries/models/local/npts.py +1 -1
- autogluon/timeseries/models/local/statsforecast.py +11 -11
- autogluon/timeseries/models/presets.py +3 -0
- autogluon/timeseries/regressor.py +1 -0
- autogluon/timeseries/utils/datetime/lags.py +1 -1
- autogluon/timeseries/version.py +1 -1
- {autogluon.timeseries-1.3.2b20250709.dist-info → autogluon.timeseries-1.3.2b20250711.dist-info}/METADATA +7 -12
- {autogluon.timeseries-1.3.2b20250709.dist-info → autogluon.timeseries-1.3.2b20250711.dist-info}/RECORD +25 -24
- /autogluon.timeseries-1.3.2b20250709-py3.9-nspkg.pth → /autogluon.timeseries-1.3.2b20250711-py3.9-nspkg.pth +0 -0
- {autogluon.timeseries-1.3.2b20250709.dist-info → autogluon.timeseries-1.3.2b20250711.dist-info}/LICENSE +0 -0
- {autogluon.timeseries-1.3.2b20250709.dist-info → autogluon.timeseries-1.3.2b20250711.dist-info}/NOTICE +0 -0
- {autogluon.timeseries-1.3.2b20250709.dist-info → autogluon.timeseries-1.3.2b20250711.dist-info}/WHEEL +0 -0
- {autogluon.timeseries-1.3.2b20250709.dist-info → autogluon.timeseries-1.3.2b20250711.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.3.2b20250709.dist-info → autogluon.timeseries-1.3.2b20250711.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.3.2b20250709.dist-info → autogluon.timeseries-1.3.2b20250711.dist-info}/zip-safe +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
from .autogluon_tabular import DirectTabularModel, RecursiveTabularModel
|
1
|
+
from .autogluon_tabular import DirectTabularModel, PerStepTabularModel, RecursiveTabularModel
|
2
2
|
from .chronos import ChronosModel
|
3
3
|
from .gluonts import (
|
4
4
|
DeepARModel,
|
@@ -46,6 +46,7 @@ __all__ = [
|
|
46
46
|
"NPTSModel",
|
47
47
|
"NaiveModel",
|
48
48
|
"PatchTSTModel",
|
49
|
+
"PerStepTabularModel",
|
49
50
|
"RecursiveTabularModel",
|
50
51
|
"SeasonalAverageModel",
|
51
52
|
"SeasonalNaiveModel",
|
@@ -482,7 +482,7 @@ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
|
|
482
482
|
self.covariate_regressor.fit(
|
483
483
|
train_data,
|
484
484
|
time_limit=covariate_regressor_time_limit,
|
485
|
-
verbosity=verbosity,
|
485
|
+
verbosity=verbosity - 1,
|
486
486
|
)
|
487
487
|
|
488
488
|
if self._get_tags()["can_use_train_data"]:
|
@@ -22,7 +22,7 @@ from autogluon.timeseries.utils.datetime import (
|
|
22
22
|
get_seasonality,
|
23
23
|
get_time_features_for_frequency,
|
24
24
|
)
|
25
|
-
from autogluon.timeseries.utils.warning_filters import warning_filter
|
25
|
+
from autogluon.timeseries.utils.warning_filters import set_loggers_level, warning_filter
|
26
26
|
|
27
27
|
from .utils import MLF_ITEMID, MLF_TARGET, MLF_TIMESTAMP
|
28
28
|
|
@@ -346,16 +346,18 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
346
346
|
max_num_samples=model_params["max_num_samples"],
|
347
347
|
)
|
348
348
|
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
349
|
+
with set_loggers_level(regex=r"^autogluon.tabular.*", level=logging.ERROR):
|
350
|
+
tabular_model = self._create_tabular_model(
|
351
|
+
model_name=model_params["model_name"], model_hyperparameters=model_params["model_hyperparameters"]
|
352
|
+
)
|
353
|
+
tabular_model.fit(
|
354
|
+
X=train_df.drop(columns=[MLF_TARGET, MLF_ITEMID]),
|
355
|
+
y=train_df[MLF_TARGET],
|
356
|
+
X_val=val_df.drop(columns=[MLF_TARGET, MLF_ITEMID]),
|
357
|
+
y_val=val_df[MLF_TARGET],
|
358
|
+
time_limit=(None if time_limit is None else time_limit - (time.time() - fit_start_time)),
|
359
|
+
verbosity=verbosity - 1,
|
360
|
+
)
|
359
361
|
|
360
362
|
# We directly insert the trained model into models_ since calling _mlf.fit_models does not support X_val, y_val
|
361
363
|
self._mlf.models_ = {"mean": tabular_model}
|
@@ -466,9 +468,9 @@ class AbstractMLForecastModel(AbstractTimeSeriesModel):
|
|
466
468
|
|
467
469
|
|
468
470
|
class DirectTabularModel(AbstractMLForecastModel):
|
469
|
-
"""Predict all future time series values simultaneously using
|
471
|
+
"""Predict all future time series values simultaneously using tabular regression models.
|
470
472
|
|
471
|
-
A single
|
473
|
+
A single tabular model is used to forecast all future time series values using the following features:
|
472
474
|
|
473
475
|
- lag features (observed time series values) based on ``freq`` of the data
|
474
476
|
- time features (e.g., day of the week) based on the timestamp of the measurement
|
@@ -477,8 +479,8 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
477
479
|
|
478
480
|
Features not known during the forecast horizon (e.g., future target values) are replaced by NaNs.
|
479
481
|
|
480
|
-
If ``eval_metric.needs_quantile``, the
|
481
|
-
Otherwise,
|
482
|
+
If ``eval_metric.needs_quantile``, the tabular regression model will be trained with ``"quantile"`` problem type.
|
483
|
+
Otherwise, the model will be trained with ``"regression"`` problem type, and dummy quantiles will be
|
482
484
|
obtained by assuming that the residuals follow zero-mean normal distribution.
|
483
485
|
|
484
486
|
Based on the `mlforecast <https://github.com/Nixtla/mlforecast>`_ library.
|
@@ -530,7 +532,9 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
530
532
|
"""Apply a mask that mimics the situation at prediction time when target/covariates are unknown during the
|
531
533
|
forecast horizon.
|
532
534
|
"""
|
533
|
-
|
535
|
+
# Fix seed to make the model deterministic
|
536
|
+
rng = np.random.default_rng(seed=123)
|
537
|
+
num_hidden = rng.integers(0, self.prediction_length, size=len(df))
|
534
538
|
lag_cols = [f"lag{lag}" for lag in self._target_lags]
|
535
539
|
mask = num_hidden[:, None] < self._target_lags[None] # shape [len(num_hidden), len(_target_lags)]
|
536
540
|
# use df.loc[:, lag_cols] instead of df[lag_cols] to avoid SettingWithCopyWarning
|
@@ -635,6 +639,8 @@ class DirectTabularModel(AbstractMLForecastModel):
|
|
635
639
|
return TabularModel(
|
636
640
|
model_class=model_class,
|
637
641
|
model_kwargs={
|
642
|
+
"path": "",
|
643
|
+
"name": model_class.__name__,
|
638
644
|
"hyperparameters": model_hyperparameters,
|
639
645
|
"problem_type": problem_type,
|
640
646
|
"eval_metric": eval_metric,
|
@@ -746,6 +752,8 @@ class RecursiveTabularModel(AbstractMLForecastModel):
|
|
746
752
|
return TabularModel(
|
747
753
|
model_class=model_class,
|
748
754
|
model_kwargs={
|
755
|
+
"path": "",
|
756
|
+
"name": model_class.__name__,
|
749
757
|
"hyperparameters": model_hyperparameters,
|
750
758
|
"problem_type": ag.constants.REGRESSION,
|
751
759
|
"eval_metric": self.eval_metric.equivalent_tabular_regression_metric or "mean_absolute_error",
|
@@ -0,0 +1,453 @@
|
|
1
|
+
import logging
|
2
|
+
import math
|
3
|
+
import os
|
4
|
+
import time
|
5
|
+
from typing import Any, Callable, Dict, List, Optional, Type
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
from joblib import Parallel, cpu_count, delayed
|
10
|
+
|
11
|
+
from autogluon.common.utils.pandas_utils import get_approximate_df_mem_usage
|
12
|
+
from autogluon.common.utils.resource_utils import ResourceManager
|
13
|
+
from autogluon.core.constants import QUANTILE
|
14
|
+
from autogluon.tabular.models import AbstractModel as AbstractTabularModel
|
15
|
+
from autogluon.tabular.registry import ag_model_registry
|
16
|
+
from autogluon.timeseries import TimeSeriesDataFrame
|
17
|
+
from autogluon.timeseries.dataset.ts_dataframe import ITEMID, TIMESTAMP
|
18
|
+
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel
|
19
|
+
from autogluon.timeseries.utils.datetime import get_lags_for_frequency, get_time_features_for_frequency
|
20
|
+
from autogluon.timeseries.utils.warning_filters import set_loggers_level
|
21
|
+
|
22
|
+
from .utils import MLF_ITEMID, MLF_TARGET, MLF_TIMESTAMP
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
DUMMY_FREQ = "D"
|
27
|
+
|
28
|
+
|
29
|
+
class PerStepTabularModel(AbstractTimeSeriesModel):
|
30
|
+
"""Fit a separate tabular regression model for each time step in the forecast horizon.
|
31
|
+
|
32
|
+
Each model has access to the following features:
|
33
|
+
|
34
|
+
- lag features (observed time series values) based on ``freq`` of the data
|
35
|
+
- time features (e.g., day of the week) based on the timestamp of the measurement
|
36
|
+
- known covariates (if available)
|
37
|
+
- static features of each item (if available)
|
38
|
+
|
39
|
+
This model is typically much slower to fit compared to other tabular forecasting models.
|
40
|
+
|
41
|
+
This model uses `mlforecast <https://github.com/Nixtla/mlforecast>`_ under the hood for efficient preprocessing,
|
42
|
+
but the implementation of the per-step forecasting strategy is different from the `max_horizon` in `mlforecast`.
|
43
|
+
|
44
|
+
|
45
|
+
Other Parameters
|
46
|
+
----------------
|
47
|
+
trailing_lags : List[int], default = None
|
48
|
+
Trailing window lags of the target that will be used as features for predictions.
|
49
|
+
Trailing lags are shifted per forecast step: model for step `h` uses `[lag+h for lag in trailing_lags]`.
|
50
|
+
If None, defaults to [1, 2, ..., 12].
|
51
|
+
seasonal_lags: List[int], default = None
|
52
|
+
Seasonal lags of the target used as features. Unlike trailing lags, seasonal lags are not shifted
|
53
|
+
but filtered by availability: model for step `h` uses `[lag for lag in seasonal_lags if lag > h]`.
|
54
|
+
If None, determined automatically based on data frequency.
|
55
|
+
date_features : List[Union[str, Callable]], default = None
|
56
|
+
Features computed from the dates. Can be pandas date attributes or functions that will take the dates as input.
|
57
|
+
If None, will be determined automatically based on the frequency of the data.
|
58
|
+
target_scaler : {"standard", "mean_abs", "min_max", "robust", None}, default = "mean_abs"
|
59
|
+
Scaling applied to each time series.
|
60
|
+
model_name : str, default = "CAT"
|
61
|
+
Name of the tabular regression model. See `autogluon.tabular.registry.ag_model_registry` or
|
62
|
+
`the documentation <https://auto.gluon.ai/stable/api/autogluon.tabular.models.html>`_ for the list of available
|
63
|
+
tabular models.
|
64
|
+
model_hyperparameters : Dict[str, Any], optional
|
65
|
+
Hyperparameters passed to the tabular regression model.
|
66
|
+
max_num_items : int or None, default = 20_000
|
67
|
+
If not None, the model will randomly select this many time series for training and validation.
|
68
|
+
max_num_samples : int or None, default = 1_000_000
|
69
|
+
If not None, training dataset passed to TabularPredictor will contain at most this many rows (starting from the
|
70
|
+
end of each time series).
|
71
|
+
n_jobs : int or None, default = None
|
72
|
+
Number of parallel jobs for fitting models across forecast horizons.
|
73
|
+
If None, automatically determined based on available memory to prevent OOM errors.
|
74
|
+
"""
|
75
|
+
|
76
|
+
def __init__(self, *args, **kwargs):
|
77
|
+
super().__init__(*args, **kwargs)
|
78
|
+
# We save the relative paths to per-step models. Each worker process independently saves/loads the model.
|
79
|
+
# This is much more efficient than passing around model objects that can get really large
|
80
|
+
self._relative_paths_to_models: list[str]
|
81
|
+
self._trailing_lags: list[int]
|
82
|
+
self._seasonal_lags: list[int]
|
83
|
+
self._date_features: list[Callable]
|
84
|
+
self._model_cls: Type[AbstractTabularModel]
|
85
|
+
self._n_jobs: int
|
86
|
+
self._non_boolean_real_covariates: List[str] = []
|
87
|
+
self._max_ts_length: Optional[int] = None
|
88
|
+
|
89
|
+
@property
|
90
|
+
def allowed_hyperparameters(self) -> List[str]:
|
91
|
+
# TODO: Differencing is currently not supported because it greatly complicates the preprocessing logic
|
92
|
+
return super().allowed_hyperparameters + [
|
93
|
+
"trailing_lags",
|
94
|
+
"seasonal_lags",
|
95
|
+
"date_features",
|
96
|
+
# "differences",
|
97
|
+
"validation_fraction",
|
98
|
+
"model_name",
|
99
|
+
"model_hyperparameters",
|
100
|
+
"max_num_items",
|
101
|
+
"max_num_samples",
|
102
|
+
"n_jobs",
|
103
|
+
]
|
104
|
+
|
105
|
+
@property
|
106
|
+
def _ag_to_nixtla(self) -> dict:
|
107
|
+
return {self.target: MLF_TARGET, ITEMID: MLF_ITEMID, TIMESTAMP: MLF_TIMESTAMP}
|
108
|
+
|
109
|
+
def _get_default_hyperparameters(self):
|
110
|
+
return {
|
111
|
+
"model_name": "CAT",
|
112
|
+
"model_hyperparameters": {},
|
113
|
+
"target_scaler": "mean_abs",
|
114
|
+
"validation_fraction": 0.1,
|
115
|
+
"max_num_samples": 1_000_000,
|
116
|
+
"max_num_items": 20_000,
|
117
|
+
}
|
118
|
+
|
119
|
+
@staticmethod
|
120
|
+
def _fit_single_model(
|
121
|
+
train_df: pd.DataFrame,
|
122
|
+
path_root: str,
|
123
|
+
step: int,
|
124
|
+
model_cls: Type[AbstractTabularModel],
|
125
|
+
model_hyperparameters: dict,
|
126
|
+
validation_fraction: Optional[float],
|
127
|
+
quantile_levels: list[float],
|
128
|
+
lags: list[int],
|
129
|
+
date_features: list[Callable],
|
130
|
+
time_limit: Optional[float],
|
131
|
+
num_cpus: int,
|
132
|
+
verbosity: int,
|
133
|
+
) -> str:
|
134
|
+
from mlforecast import MLForecast
|
135
|
+
|
136
|
+
start_time = time.monotonic()
|
137
|
+
|
138
|
+
mlf = MLForecast(models=[], freq=DUMMY_FREQ, lags=lags, date_features=date_features)
|
139
|
+
|
140
|
+
features_df = mlf.preprocess(train_df, static_features=[], dropna=False)
|
141
|
+
del train_df
|
142
|
+
del mlf
|
143
|
+
# Sort chronologically for efficient train/test split
|
144
|
+
features_df = features_df.sort_values(by=MLF_TIMESTAMP)
|
145
|
+
X = features_df.drop(columns=[MLF_ITEMID, MLF_TIMESTAMP, MLF_TARGET])
|
146
|
+
y = features_df[MLF_TARGET]
|
147
|
+
del features_df
|
148
|
+
|
149
|
+
y_is_valid = np.isfinite(y)
|
150
|
+
X, y = X[y_is_valid], y[y_is_valid]
|
151
|
+
X = X.replace(float("inf"), float("nan"))
|
152
|
+
if validation_fraction is None or validation_fraction == 0.0:
|
153
|
+
X_val = None
|
154
|
+
y_val = None
|
155
|
+
else:
|
156
|
+
assert 0 < validation_fraction < 1, "validation_fraction must be between 0.0 and 1.0"
|
157
|
+
num_val = math.ceil(len(X) * validation_fraction)
|
158
|
+
X_val, y_val = X.iloc[-num_val:], y.iloc[-num_val:]
|
159
|
+
X, y = X.iloc[:-num_val], y.iloc[:-num_val]
|
160
|
+
if len(y) == 0:
|
161
|
+
raise ValueError("Not enough valid target values to fit model")
|
162
|
+
|
163
|
+
elapsed = time.monotonic() - start_time
|
164
|
+
time_left = time_limit - elapsed if time_limit is not None else None
|
165
|
+
try:
|
166
|
+
with set_loggers_level(regex=r"^autogluon.tabular.*", level=logging.ERROR):
|
167
|
+
model = model_cls(
|
168
|
+
path=os.path.join(path_root, f"step_{step}"),
|
169
|
+
name=model_cls.__name__, # explicitly provide name to avoid warnings
|
170
|
+
problem_type=QUANTILE,
|
171
|
+
eval_metric="pinball_loss",
|
172
|
+
hyperparameters={**model_hyperparameters, "ag.quantile_levels": quantile_levels},
|
173
|
+
)
|
174
|
+
model.fit(
|
175
|
+
X=X,
|
176
|
+
y=y,
|
177
|
+
X_val=X_val,
|
178
|
+
y_val=y_val,
|
179
|
+
time_limit=time_left,
|
180
|
+
num_cpus=num_cpus,
|
181
|
+
num_gpus=0, # num_cpus is only used if num_gpus is set as well
|
182
|
+
verbosity=verbosity,
|
183
|
+
)
|
184
|
+
except Exception as e:
|
185
|
+
raise RuntimeError(f"Failed when fitting model for {step=}") from e
|
186
|
+
model.save()
|
187
|
+
relative_path = os.path.relpath(path=model.path, start=path_root)
|
188
|
+
return relative_path
|
189
|
+
|
190
|
+
@staticmethod
|
191
|
+
def _get_n_jobs(
|
192
|
+
train_df: pd.DataFrame,
|
193
|
+
num_extra_dynamic_features: int,
|
194
|
+
model_cls: Type[AbstractTabularModel],
|
195
|
+
model_hyperparameters: dict,
|
196
|
+
overhead_factor: float = 2.0,
|
197
|
+
) -> int:
|
198
|
+
"""Estimate the maximum number of jobs that can be run in parallel without encountering OOM errors."""
|
199
|
+
mem_usage_per_column = get_approximate_df_mem_usage(train_df)
|
200
|
+
num_columns = len(train_df.columns)
|
201
|
+
mem_usage_per_job = mem_usage_per_column.sum()
|
202
|
+
try:
|
203
|
+
mem_usage_per_job += model_cls.estimate_memory_usage_static(
|
204
|
+
X=train_df, hyperparameters=model_hyperparameters, problem_type="regression"
|
205
|
+
)
|
206
|
+
except NotImplementedError:
|
207
|
+
mem_usage_per_job *= 2
|
208
|
+
# Extra scaling factor because the preprocessed DF will have more columns for lags + date features
|
209
|
+
mem_usage_per_job *= overhead_factor + (num_extra_dynamic_features + num_columns) / num_columns
|
210
|
+
max_jobs_by_memory = int(ResourceManager.get_available_virtual_mem() / mem_usage_per_job)
|
211
|
+
return max(1, max_jobs_by_memory)
|
212
|
+
|
213
|
+
def preprocess(
|
214
|
+
self,
|
215
|
+
data: TimeSeriesDataFrame,
|
216
|
+
known_covariates: Optional[TimeSeriesDataFrame] = None,
|
217
|
+
is_train: bool = False,
|
218
|
+
**kwargs,
|
219
|
+
):
|
220
|
+
# TODO: Make this toggleable with a hyperparameter
|
221
|
+
# We add a scaled version of non-boolean known real covariates, same as in MLForecast models
|
222
|
+
if is_train:
|
223
|
+
for col in self.covariate_metadata.known_covariates_real:
|
224
|
+
if not set(data[col].unique()) == set([0, 1]):
|
225
|
+
self._non_boolean_real_covariates.append(col)
|
226
|
+
|
227
|
+
if len(self._non_boolean_real_covariates) > 0:
|
228
|
+
item_ids = data.index.get_level_values(level=ITEMID)
|
229
|
+
scale_per_column: dict[str, pd.Series] = {}
|
230
|
+
columns_grouped = data[self._non_boolean_real_covariates].abs().groupby(item_ids)
|
231
|
+
for col in self._non_boolean_real_covariates:
|
232
|
+
scale_per_column[col] = columns_grouped[col].mean()
|
233
|
+
data = data.assign(**{f"__scaled_{col}": data[col] / scale for col, scale in scale_per_column.items()})
|
234
|
+
if known_covariates is not None:
|
235
|
+
known_covariates = known_covariates.assign(
|
236
|
+
**{f"__scaled_{col}": known_covariates[col] / scale for col, scale in scale_per_column.items()}
|
237
|
+
)
|
238
|
+
data = data.astype({self.target: "float32"})
|
239
|
+
return data, known_covariates
|
240
|
+
|
241
|
+
def _get_train_df(
|
242
|
+
self, train_data: TimeSeriesDataFrame, max_num_items: Optional[int], max_num_samples: Optional[int]
|
243
|
+
) -> pd.DataFrame:
|
244
|
+
if max_num_items is not None and train_data.num_items > max_num_items:
|
245
|
+
items_to_keep = train_data.item_ids.to_series().sample(n=int(max_num_items)) # noqa: F841
|
246
|
+
train_data = train_data.query("item_id in @items_to_keep")
|
247
|
+
|
248
|
+
if max_num_samples is not None and len(train_data) > max_num_samples:
|
249
|
+
max_samples_per_ts = max(200, math.ceil(max_num_samples / train_data.num_items))
|
250
|
+
self._max_ts_length = max_samples_per_ts + self.prediction_length
|
251
|
+
train_data = train_data.slice_by_timestep(-self._max_ts_length, None)
|
252
|
+
|
253
|
+
if len(self.covariate_metadata.past_covariates) > 0:
|
254
|
+
train_data = train_data.drop(columns=self.covariate_metadata.past_covariates)
|
255
|
+
|
256
|
+
train_df = train_data.to_data_frame().reset_index()
|
257
|
+
if train_data.static_features is not None:
|
258
|
+
train_df = pd.merge(
|
259
|
+
left=train_df, right=train_data.static_features, left_on=ITEMID, right_index=True, how="left"
|
260
|
+
)
|
261
|
+
train_df = train_df.rename(columns=self._ag_to_nixtla)
|
262
|
+
train_df = train_df.assign(**{MLF_TARGET: train_df[MLF_TARGET].fillna(float("inf"))})
|
263
|
+
return train_df
|
264
|
+
|
265
|
+
@staticmethod
|
266
|
+
def _get_lags_for_step(
|
267
|
+
trailing_lags: List[int],
|
268
|
+
seasonal_lags: List[int],
|
269
|
+
step: int,
|
270
|
+
) -> List[int]:
|
271
|
+
"""Get the list of lags that can be used by the model for the given step."""
|
272
|
+
shifted_trailing_lags = [lag + step for lag in trailing_lags]
|
273
|
+
# Only keep lags that are available for model predicting `step` values ahead at prediction time
|
274
|
+
valid_lags = [lag for lag in shifted_trailing_lags + seasonal_lags if lag > step]
|
275
|
+
return sorted(set(valid_lags))
|
276
|
+
|
277
|
+
def _fit(
|
278
|
+
self,
|
279
|
+
train_data: TimeSeriesDataFrame,
|
280
|
+
val_data: Optional[TimeSeriesDataFrame] = None,
|
281
|
+
time_limit: Optional[float] = None,
|
282
|
+
verbosity: int = 2,
|
283
|
+
**kwargs,
|
284
|
+
) -> None:
|
285
|
+
self._check_fit_params()
|
286
|
+
self._log_unused_hyperparameters()
|
287
|
+
model_params = self.get_hyperparameters()
|
288
|
+
|
289
|
+
train_df = self._get_train_df(
|
290
|
+
train_data,
|
291
|
+
max_num_items=model_params["max_num_items"],
|
292
|
+
max_num_samples=model_params["max_num_samples"],
|
293
|
+
)
|
294
|
+
|
295
|
+
# Initialize MLForecast arguments
|
296
|
+
assert self.freq is not None
|
297
|
+
trailing_lags = model_params.get("trailing_lags")
|
298
|
+
if trailing_lags is None:
|
299
|
+
trailing_lags = list(range(1, 13))
|
300
|
+
# Ensure that lags have type list[int] and not, e.g., np.ndarray
|
301
|
+
self._trailing_lags = [int(lag) for lag in trailing_lags]
|
302
|
+
assert all(lag >= 1 for lag in self._trailing_lags), "trailing_lags must be >= 1"
|
303
|
+
|
304
|
+
seasonal_lags = model_params.get("seasonal_lags")
|
305
|
+
if seasonal_lags is None:
|
306
|
+
median_ts_length = int(train_df[MLF_ITEMID].value_counts(sort=False).median())
|
307
|
+
seasonal_lags = get_lags_for_frequency(self.freq, num_default_lags=0, lag_ub=median_ts_length)
|
308
|
+
self._seasonal_lags = [int(lag) for lag in seasonal_lags]
|
309
|
+
assert all(lag >= 1 for lag in self._seasonal_lags), "seasonal_lags must be >= 1"
|
310
|
+
|
311
|
+
date_features = model_params.get("date_features")
|
312
|
+
if date_features is None:
|
313
|
+
date_features = get_time_features_for_frequency(self.freq)
|
314
|
+
self._date_features = date_features
|
315
|
+
|
316
|
+
self._model_cls = ag_model_registry.key_to_cls(model_params["model_name"])
|
317
|
+
supported_problem_types = self._model_cls.supported_problem_types()
|
318
|
+
if supported_problem_types is not None and QUANTILE not in supported_problem_types:
|
319
|
+
raise ValueError(
|
320
|
+
f"Chosen model_name='{model_params['model_name']}' cannot be used by {self.name} because it does not "
|
321
|
+
f"support problem_type='quantile' ({supported_problem_types=})"
|
322
|
+
)
|
323
|
+
model_hyperparameters = model_params["model_hyperparameters"]
|
324
|
+
# User-provided n_jobs takes priority over the automatic estimate
|
325
|
+
if model_params.get("n_jobs") is not None:
|
326
|
+
self._n_jobs = model_params["n_jobs"]
|
327
|
+
else:
|
328
|
+
self._n_jobs = self._get_n_jobs(
|
329
|
+
train_df,
|
330
|
+
num_extra_dynamic_features=len(set(self._seasonal_lags + self._trailing_lags))
|
331
|
+
+ len(self._date_features),
|
332
|
+
model_cls=self._model_cls,
|
333
|
+
model_hyperparameters=model_hyperparameters,
|
334
|
+
)
|
335
|
+
n_jobs = min(self._n_jobs, self.prediction_length, cpu_count(only_physical_cores=True))
|
336
|
+
|
337
|
+
num_cpus_per_model = max(cpu_count(only_physical_cores=True) // n_jobs, 1)
|
338
|
+
if time_limit is not None:
|
339
|
+
time_limit_per_model = time_limit / math.ceil(self.prediction_length / n_jobs)
|
340
|
+
else:
|
341
|
+
time_limit_per_model = None
|
342
|
+
model_fit_kwargs = dict(
|
343
|
+
train_df=train_df,
|
344
|
+
path_root=self.path,
|
345
|
+
model_cls=self._model_cls,
|
346
|
+
quantile_levels=self.quantile_levels,
|
347
|
+
validation_fraction=model_params["validation_fraction"],
|
348
|
+
date_features=self._date_features,
|
349
|
+
time_limit=time_limit_per_model,
|
350
|
+
num_cpus=num_cpus_per_model,
|
351
|
+
model_hyperparameters=model_hyperparameters.copy(),
|
352
|
+
verbosity=verbosity - 1,
|
353
|
+
)
|
354
|
+
logger.debug(f"Fitting models in parallel with {n_jobs=}, {num_cpus_per_model=}, {time_limit_per_model=}")
|
355
|
+
self._relative_paths_to_models = Parallel(n_jobs=n_jobs)( # type: ignore
|
356
|
+
delayed(self._fit_single_model)(
|
357
|
+
step=step,
|
358
|
+
lags=self._get_lags_for_step(
|
359
|
+
seasonal_lags=self._seasonal_lags, trailing_lags=self._trailing_lags, step=step
|
360
|
+
),
|
361
|
+
**model_fit_kwargs,
|
362
|
+
)
|
363
|
+
for step in range(self.prediction_length)
|
364
|
+
)
|
365
|
+
|
366
|
+
@staticmethod
|
367
|
+
def _predict_with_single_model(
|
368
|
+
full_df: pd.DataFrame,
|
369
|
+
path_to_model: str,
|
370
|
+
model_cls: Type[AbstractTabularModel],
|
371
|
+
step: int,
|
372
|
+
prediction_length: int,
|
373
|
+
lags: list[int],
|
374
|
+
date_features: list[Callable],
|
375
|
+
) -> np.ndarray:
|
376
|
+
"""Make predictions with the model for the given step.
|
377
|
+
|
378
|
+
Returns
|
379
|
+
-------
|
380
|
+
predictions :
|
381
|
+
Predictions of the model for the given step. Shape: (num_items, len(quantile_levels)).
|
382
|
+
"""
|
383
|
+
from mlforecast import MLForecast
|
384
|
+
|
385
|
+
mlf = MLForecast(models=[], freq=DUMMY_FREQ, lags=lags, date_features=date_features)
|
386
|
+
|
387
|
+
features_df = mlf.preprocess(full_df, static_features=[], dropna=False)
|
388
|
+
del mlf
|
389
|
+
|
390
|
+
end_idx_per_item = np.cumsum(features_df[MLF_ITEMID].value_counts(sort=False).to_numpy(dtype="int32"))
|
391
|
+
features_for_step = features_df.iloc[end_idx_per_item - (prediction_length - step)]
|
392
|
+
try:
|
393
|
+
model: AbstractTabularModel = model_cls.load(path_to_model) # type: ignore
|
394
|
+
except:
|
395
|
+
logger.error(f"Could not load model for {step=} from {path_to_model}")
|
396
|
+
raise
|
397
|
+
predictions = model.predict(features_for_step)
|
398
|
+
return predictions
|
399
|
+
|
400
|
+
def _predict(
|
401
|
+
self,
|
402
|
+
data: TimeSeriesDataFrame,
|
403
|
+
known_covariates: TimeSeriesDataFrame | None = None,
|
404
|
+
**kwargs,
|
405
|
+
) -> TimeSeriesDataFrame:
|
406
|
+
if known_covariates is not None:
|
407
|
+
X_df = known_covariates
|
408
|
+
else:
|
409
|
+
X_df = TimeSeriesDataFrame(
|
410
|
+
pd.DataFrame(float("inf"), index=self.get_forecast_horizon_index(data), columns=[self.target])
|
411
|
+
)
|
412
|
+
full_df = pd.concat([data, X_df])
|
413
|
+
if self._max_ts_length is not None:
|
414
|
+
full_df = full_df.slice_by_timestep(-(self._max_ts_length + self.prediction_length), None)
|
415
|
+
full_df = full_df.to_data_frame().reset_index()
|
416
|
+
if data.static_features is not None:
|
417
|
+
full_df = pd.merge(full_df, data.static_features, left_on=ITEMID, right_index=True, how="left")
|
418
|
+
|
419
|
+
full_df = (
|
420
|
+
full_df.rename(columns=self._ag_to_nixtla)
|
421
|
+
.sort_values(by=[MLF_ITEMID, MLF_TIMESTAMP])
|
422
|
+
.reset_index(drop=True)
|
423
|
+
)
|
424
|
+
full_df = full_df.assign(**{MLF_TARGET: full_df[MLF_TARGET].fillna(float("inf"))})
|
425
|
+
|
426
|
+
model_predict_kwargs = dict(
|
427
|
+
full_df=full_df,
|
428
|
+
prediction_length=self.prediction_length,
|
429
|
+
model_cls=self._model_cls,
|
430
|
+
date_features=self._date_features,
|
431
|
+
)
|
432
|
+
n_jobs = min(self._n_jobs, self.prediction_length, cpu_count(only_physical_cores=True))
|
433
|
+
predictions_per_step = Parallel(n_jobs=n_jobs)(
|
434
|
+
delayed(self._predict_with_single_model)(
|
435
|
+
step=step,
|
436
|
+
lags=self._get_lags_for_step(
|
437
|
+
seasonal_lags=self._seasonal_lags, trailing_lags=self._trailing_lags, step=step
|
438
|
+
),
|
439
|
+
path_to_model=os.path.join(self.path, suffix),
|
440
|
+
**model_predict_kwargs,
|
441
|
+
)
|
442
|
+
for step, suffix in enumerate(self._relative_paths_to_models)
|
443
|
+
)
|
444
|
+
predictions = pd.DataFrame(
|
445
|
+
np.stack(predictions_per_step, axis=1).reshape([-1, len(self.quantile_levels)]),
|
446
|
+
columns=[str(q) for q in self.quantile_levels],
|
447
|
+
index=self.get_forecast_horizon_index(data),
|
448
|
+
)
|
449
|
+
predictions["mean"] = predictions["0.5"]
|
450
|
+
return TimeSeriesDataFrame(predictions)
|
451
|
+
|
452
|
+
def _more_tags(self) -> Dict[str, Any]:
|
453
|
+
return {"allow_nan": True, "can_refit_full": True}
|
@@ -3,7 +3,7 @@ import os
|
|
3
3
|
import shutil
|
4
4
|
import warnings
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Any, Dict,
|
6
|
+
from typing import Any, Dict, Optional, Union
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
import pandas as pd
|
@@ -295,8 +295,6 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
295
295
|
pipeline = BaseChronosPipeline.from_pretrained(
|
296
296
|
self.model_path,
|
297
297
|
device_map=device,
|
298
|
-
# optimization cannot be used during fine-tuning
|
299
|
-
optimization_strategy=None if is_training else self.optimization_strategy,
|
300
298
|
torch_dtype=self.torch_dtype,
|
301
299
|
)
|
302
300
|
|
@@ -332,7 +330,6 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
332
330
|
"torch_dtype": self.default_torch_dtype,
|
333
331
|
"data_loader_num_workers": 0,
|
334
332
|
"context_length": None,
|
335
|
-
"optimization_strategy": None,
|
336
333
|
"fine_tune": False,
|
337
334
|
"keep_transformers_logs": False,
|
338
335
|
"fine_tune_lr": 1e-5,
|
@@ -406,17 +403,6 @@ class ChronosModel(AbstractTimeSeriesModel):
|
|
406
403
|
self.device = model_params["device"]
|
407
404
|
self.torch_dtype = model_params["torch_dtype"]
|
408
405
|
self.data_loader_num_workers = model_params["data_loader_num_workers"]
|
409
|
-
self.optimization_strategy: Optional[Literal["onnx", "openvino"]] = model_params["optimization_strategy"]
|
410
|
-
|
411
|
-
if self.optimization_strategy is not None:
|
412
|
-
warnings.warn(
|
413
|
-
(
|
414
|
-
"optimization_strategy is deprecated and will be removed in a future release. "
|
415
|
-
"We recommend using Chronos-Bolt models for fast inference on the CPU."
|
416
|
-
),
|
417
|
-
category=FutureWarning,
|
418
|
-
stacklevel=3,
|
419
|
-
)
|
420
406
|
self.context_length = model_params["context_length"]
|
421
407
|
|
422
408
|
if self.context_length is not None and self.context_length > self.maximum_context_length:
|
@@ -13,8 +13,6 @@ import torch
|
|
13
13
|
import torch.nn as nn
|
14
14
|
from transformers import AutoConfig, AutoModelForSeq2SeqLM, GenerationConfig, PreTrainedModel
|
15
15
|
|
16
|
-
from autogluon.timeseries.utils.warning_filters import set_loggers_level
|
17
|
-
|
18
16
|
from .base import BaseChronosPipeline, ForecastType
|
19
17
|
|
20
18
|
logger = logging.getLogger("autogluon.timeseries.models.chronos")
|
@@ -529,7 +527,6 @@ class ChronosPipeline(BaseChronosPipeline):
|
|
529
527
|
"""
|
530
528
|
kwargs = kwargs.copy()
|
531
529
|
|
532
|
-
optimization_strategy = kwargs.pop("optimization_strategy", None)
|
533
530
|
context_length = kwargs.pop("context_length", None)
|
534
531
|
|
535
532
|
config = AutoConfig.from_pretrained(*args, **kwargs)
|
@@ -540,45 +537,7 @@ class ChronosPipeline(BaseChronosPipeline):
|
|
540
537
|
chronos_config = ChronosConfig(**config.chronos_config)
|
541
538
|
|
542
539
|
assert chronos_config.model_type == "seq2seq"
|
543
|
-
|
544
|
-
inner_model = AutoModelForSeq2SeqLM.from_pretrained(*args, **kwargs)
|
545
|
-
else:
|
546
|
-
assert optimization_strategy in [
|
547
|
-
"onnx",
|
548
|
-
"openvino",
|
549
|
-
], "optimization_strategy not recognized. Please provide one of `onnx` or `openvino`"
|
550
|
-
kwargs.pop("resume_download", None) # Optimized pipeline does not support 'resume_download' kwargs
|
551
|
-
torch_dtype = kwargs.pop("torch_dtype", "auto")
|
552
|
-
if torch_dtype != "auto":
|
553
|
-
logger.warning(f"\t`torch_dtype` will be ignored for optimization_strategy {optimization_strategy}")
|
554
|
-
|
555
|
-
if optimization_strategy == "onnx":
|
556
|
-
try:
|
557
|
-
from optimum.onnxruntime import ORTModelForSeq2SeqLM
|
558
|
-
except ImportError:
|
559
|
-
raise ImportError(
|
560
|
-
"Huggingface Optimum library must be installed with ONNX for using the `onnx` strategy. "
|
561
|
-
"Please try running `pip install optimum[onnxruntime]` or use Chronos-Bolt models for "
|
562
|
-
"faster performance on the CPU."
|
563
|
-
)
|
564
|
-
|
565
|
-
assert kwargs.pop("device_map", "cpu") in ["cpu", "auto"], "ONNX mode only available on the CPU"
|
566
|
-
with set_loggers_level(regex=r"^optimum.*", level=logging.ERROR):
|
567
|
-
inner_model = ORTModelForSeq2SeqLM.from_pretrained(*args, **{**kwargs, "export": True})
|
568
|
-
elif optimization_strategy == "openvino":
|
569
|
-
try:
|
570
|
-
from optimum.intel import OVModelForSeq2SeqLM
|
571
|
-
except ImportError:
|
572
|
-
raise ImportError(
|
573
|
-
"Huggingface Optimum library must be installed with OpenVINO for using the `openvino` strategy. "
|
574
|
-
"Please try running `pip install optimum-intel[openvino,nncf] optimum[openvino,nncf]` or use "
|
575
|
-
"Chronos-Bolt models for faster performance on the CPU."
|
576
|
-
)
|
577
|
-
with set_loggers_level(regex=r"^optimum.*", level=logging.ERROR):
|
578
|
-
inner_model = OVModelForSeq2SeqLM.from_pretrained(
|
579
|
-
*args, **{**kwargs, "device_map": "cpu", "export": True}
|
580
|
-
)
|
581
|
-
|
540
|
+
inner_model = AutoModelForSeq2SeqLM.from_pretrained(*args, **kwargs)
|
582
541
|
return cls(
|
583
542
|
tokenizer=chronos_config.create_tokenizer(),
|
584
543
|
model=ChronosPretrainedModel(config=chronos_config, model=inner_model),
|
@@ -510,9 +510,6 @@ class ChronosBoltPipeline(BaseChronosPipeline):
|
|
510
510
|
Supports the same arguments as ``AutoConfig`` and ``AutoModel``
|
511
511
|
from ``transformers``.
|
512
512
|
"""
|
513
|
-
# if optimization_strategy is provided, pop this as it won't be used
|
514
|
-
kwargs.pop("optimization_strategy", None)
|
515
|
-
|
516
513
|
config = AutoConfig.from_pretrained(*args, **kwargs)
|
517
514
|
assert hasattr(config, "chronos_config"), "Not a Chronos config file"
|
518
515
|
|
@@ -1,11 +1,11 @@
|
|
1
1
|
import logging
|
2
2
|
import time
|
3
|
-
from multiprocessing import TimeoutError
|
3
|
+
from multiprocessing import TimeoutError
|
4
4
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
5
5
|
|
6
6
|
import numpy as np
|
7
7
|
import pandas as pd
|
8
|
-
from joblib import Parallel, delayed
|
8
|
+
from joblib import Parallel, cpu_count, delayed
|
9
9
|
from scipy.stats import norm
|
10
10
|
|
11
11
|
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
@@ -19,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
19
19
|
|
20
20
|
|
21
21
|
# We use the same default n_jobs across AG-TS to ensure that Joblib reuses the process pool
|
22
|
-
AG_DEFAULT_N_JOBS = max(
|
22
|
+
AG_DEFAULT_N_JOBS = max(cpu_count(only_physical_cores=True), 1)
|
23
23
|
|
24
24
|
|
25
25
|
class AbstractLocalModel(AbstractTimeSeriesModel):
|
@@ -17,7 +17,7 @@ class NaiveModel(AbstractLocalModel):
|
|
17
17
|
|
18
18
|
Other Parameters
|
19
19
|
----------------
|
20
|
-
n_jobs : int or float, default =
|
20
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
21
21
|
Number of CPU cores used to fit the models in parallel.
|
22
22
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
23
23
|
When set to a positive integer, that many cores are used.
|
@@ -59,7 +59,7 @@ class SeasonalNaiveModel(AbstractLocalModel):
|
|
59
59
|
specified manually by providing an integer > 1.
|
60
60
|
If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
|
61
61
|
Seasonality will also be disabled, if the length of the time series is < seasonal_period.
|
62
|
-
n_jobs : int or float, default =
|
62
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
63
63
|
Number of CPU cores used to fit the models in parallel.
|
64
64
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
65
65
|
When set to a positive integer, that many cores are used.
|
@@ -89,7 +89,7 @@ class AverageModel(AbstractLocalModel):
|
|
89
89
|
|
90
90
|
Other Parameters
|
91
91
|
----------------
|
92
|
-
n_jobs : int or float, default =
|
92
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
93
93
|
Number of CPU cores used to fit the models in parallel.
|
94
94
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
95
95
|
When set to a positive integer, that many cores are used.
|
@@ -128,7 +128,7 @@ class SeasonalAverageModel(AbstractLocalModel):
|
|
128
128
|
specified manually by providing an integer > 1.
|
129
129
|
If seasonal_period (inferred or provided) is equal to 1, will fall back to Naive forecast.
|
130
130
|
Seasonality will also be disabled, if the length of the time series is < seasonal_period.
|
131
|
-
n_jobs : int or float, default =
|
131
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
132
132
|
Number of CPU cores used to fit the models in parallel.
|
133
133
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
134
134
|
When set to a positive integer, that many cores are used.
|
@@ -26,7 +26,7 @@ class NPTSModel(AbstractLocalModel):
|
|
26
26
|
Number of samples generated by the forecast.
|
27
27
|
num_default_time_features : int, default = 1
|
28
28
|
Number of time features used by seasonal model.
|
29
|
-
n_jobs : int or float, default =
|
29
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
30
30
|
Number of CPU cores used to fit the models in parallel.
|
31
31
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
32
32
|
When set to a positive integer, that many cores are used.
|
@@ -123,7 +123,7 @@ class AutoARIMAModel(AbstractProbabilisticStatsForecastModel):
|
|
123
123
|
When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
|
124
124
|
specified manually by providing an integer > 1.
|
125
125
|
If seasonal_period (inferred or provided) is equal to 1, seasonality will be disabled.
|
126
|
-
n_jobs : int or float, default =
|
126
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
127
127
|
Number of CPU cores used to fit the models in parallel.
|
128
128
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
129
129
|
When set to a positive integer, that many cores are used.
|
@@ -201,7 +201,7 @@ class ARIMAModel(AbstractProbabilisticStatsForecastModel):
|
|
201
201
|
When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
|
202
202
|
specified manually by providing an integer > 1.
|
203
203
|
If seasonal_period (inferred or provided) is equal to 1, seasonality will be disabled.
|
204
|
-
n_jobs : int or float, default =
|
204
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
205
205
|
Number of CPU cores used to fit the models in parallel.
|
206
206
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
207
207
|
When set to a positive integer, that many cores are used.
|
@@ -257,7 +257,7 @@ class AutoETSModel(AbstractProbabilisticStatsForecastModel):
|
|
257
257
|
If seasonal_period (inferred or provided) is equal to 1, seasonality will be disabled.
|
258
258
|
damped : bool, default = False
|
259
259
|
Whether to dampen the trend.
|
260
|
-
n_jobs : int or float, default =
|
260
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
261
261
|
Number of CPU cores used to fit the models in parallel.
|
262
262
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
263
263
|
When set to a positive integer, that many cores are used.
|
@@ -320,7 +320,7 @@ class ETSModel(AutoETSModel):
|
|
320
320
|
If seasonal_period (inferred or provided) is equal to 1, seasonality will be disabled.
|
321
321
|
damped : bool, default = False
|
322
322
|
Whether to dampen the trend.
|
323
|
-
n_jobs : int or float, default =
|
323
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
324
324
|
Number of CPU cores used to fit the models in parallel.
|
325
325
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
326
326
|
When set to a positive integer, that many cores are used.
|
@@ -359,7 +359,7 @@ class DynamicOptimizedThetaModel(AbstractProbabilisticStatsForecastModel):
|
|
359
359
|
When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
|
360
360
|
specified manually by providing an integer > 1.
|
361
361
|
If seasonal_period (inferred or provided) is equal to 1, seasonality will be disabled.
|
362
|
-
n_jobs : int or float, default =
|
362
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
363
363
|
Number of CPU cores used to fit the models in parallel.
|
364
364
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
365
365
|
When set to a positive integer, that many cores are used.
|
@@ -403,7 +403,7 @@ class ThetaModel(AbstractProbabilisticStatsForecastModel):
|
|
403
403
|
When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
|
404
404
|
specified manually by providing an integer > 1.
|
405
405
|
If seasonal_period (inferred or provided) is equal to 1, seasonality will be disabled.
|
406
|
-
n_jobs : int or float, default =
|
406
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
407
407
|
Number of CPU cores used to fit the models in parallel.
|
408
408
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
409
409
|
When set to a positive integer, that many cores are used.
|
@@ -523,7 +523,7 @@ class AutoCESModel(AbstractProbabilisticStatsForecastModel):
|
|
523
523
|
When set to None, seasonal_period will be inferred from the frequency of the training data. Can also be
|
524
524
|
specified manually by providing an integer > 1.
|
525
525
|
If seasonal_period (inferred or provided) is equal to 1, seasonality will be disabled.
|
526
|
-
n_jobs : int or float, default =
|
526
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
527
527
|
Number of CPU cores used to fit the models in parallel.
|
528
528
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
529
529
|
When set to a positive integer, that many cores are used.
|
@@ -590,7 +590,7 @@ class ADIDAModel(AbstractStatsForecastIntermittentDemandModel):
|
|
590
590
|
|
591
591
|
Other Parameters
|
592
592
|
----------------
|
593
|
-
n_jobs : int or float, default =
|
593
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
594
594
|
Number of CPU cores used to fit the models in parallel.
|
595
595
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
596
596
|
When set to a positive integer, that many cores are used.
|
@@ -626,7 +626,7 @@ class CrostonModel(AbstractStatsForecastIntermittentDemandModel):
|
|
626
626
|
- `"SBA"` - variant of the Croston method based on Syntetos-Boylan Approximation (based on `statsforecast.models.CrostonSBA <https://nixtla.mintlify.app/statsforecast/docs/models/crostonsba.html>`_)
|
627
627
|
- `"optimized"` - variant of the Croston method where the smoothing parameter is optimized (based on `statsforecast.models.CrostonOptimized <https://nixtla.mintlify.app/statsforecast/docs/models/crostonoptimized.html>`_)
|
628
628
|
|
629
|
-
n_jobs : int or float, default =
|
629
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
630
630
|
Number of CPU cores used to fit the models in parallel.
|
631
631
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
632
632
|
When set to a positive integer, that many cores are used.
|
@@ -678,7 +678,7 @@ class IMAPAModel(AbstractStatsForecastIntermittentDemandModel):
|
|
678
678
|
|
679
679
|
Other Parameters
|
680
680
|
----------------
|
681
|
-
n_jobs : int or float, default =
|
681
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
682
682
|
Number of CPU cores used to fit the models in parallel.
|
683
683
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
684
684
|
When set to a positive integer, that many cores are used.
|
@@ -700,7 +700,7 @@ class ZeroModel(AbstractStatsForecastIntermittentDemandModel):
|
|
700
700
|
|
701
701
|
Other Parameters
|
702
702
|
----------------
|
703
|
-
n_jobs : int or float, default =
|
703
|
+
n_jobs : int or float, default = joblib.cpu_count(only_physical_cores=True)
|
704
704
|
Number of CPU cores used to fit the models in parallel.
|
705
705
|
When set to a float between 0.0 and 1.0, that fraction of available CPU cores is used.
|
706
706
|
When set to a positive integer, that many cores are used.
|
@@ -27,6 +27,7 @@ from . import (
|
|
27
27
|
NaiveModel,
|
28
28
|
NPTSModel,
|
29
29
|
PatchTSTModel,
|
30
|
+
PerStepTabularModel,
|
30
31
|
RecursiveTabularModel,
|
31
32
|
SeasonalAverageModel,
|
32
33
|
SeasonalNaiveModel,
|
@@ -55,6 +56,7 @@ MODEL_TYPES = dict(
|
|
55
56
|
WaveNet=WaveNetModel,
|
56
57
|
RecursiveTabular=RecursiveTabularModel,
|
57
58
|
DirectTabular=DirectTabularModel,
|
59
|
+
PerStepTabular=PerStepTabularModel,
|
58
60
|
Average=AverageModel,
|
59
61
|
SeasonalAverage=SeasonalAverageModel,
|
60
62
|
Naive=NaiveModel,
|
@@ -84,6 +86,7 @@ DEFAULT_MODEL_PRIORITY = dict(
|
|
84
86
|
Zero=100,
|
85
87
|
RecursiveTabular=90,
|
86
88
|
DirectTabular=85,
|
89
|
+
PerStepTabular=70, # TODO: Update priority
|
87
90
|
# All local models are grouped together to make sure that joblib parallel pool is reused
|
88
91
|
NPTS=80,
|
89
92
|
ETS=80,
|
@@ -146,6 +146,7 @@ class GlobalCovariateRegressor(CovariateRegressor):
|
|
146
146
|
# Has no effect since the model won't be saved to disk.
|
147
147
|
# We provide path to avoid https://github.com/autogluon/autogluon/issues/4832
|
148
148
|
path="",
|
149
|
+
name=self.model_type.__name__,
|
149
150
|
)
|
150
151
|
if time_limit is not None:
|
151
152
|
time_limit_fit = self.fit_time_fraction * (time_limit - (time.monotonic() - start_time))
|
@@ -164,7 +164,7 @@ def get_lags_for_frequency(
|
|
164
164
|
raise Exception(f"Cannot get lags for unsupported frequency {freq}")
|
165
165
|
|
166
166
|
# flatten lags list and filter
|
167
|
-
lags = [int(lag) for sub_list in lags for lag in sub_list if
|
167
|
+
lags = [int(lag) for sub_list in lags for lag in sub_list if num_default_lags < lag <= lag_ub]
|
168
168
|
lags = list(range(1, num_default_lags + 1)) + sorted(list(set(lags)))
|
169
169
|
|
170
170
|
return sorted(set(lags))[:num_lags]
|
autogluon/timeseries/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: autogluon.timeseries
|
3
|
-
Version: 1.3.
|
3
|
+
Version: 1.3.2b20250711
|
4
4
|
Summary: Fast and Accurate ML in 3 Lines of Code
|
5
5
|
Home-page: https://github.com/autogluon/autogluon
|
6
6
|
Author: AutoGluon Community
|
@@ -38,8 +38,8 @@ License-File: ../LICENSE
|
|
38
38
|
License-File: ../NOTICE
|
39
39
|
Requires-Dist: joblib<2,>=1.1
|
40
40
|
Requires-Dist: numpy<2.4.0,>=1.25.0
|
41
|
-
Requires-Dist: scipy<1.
|
42
|
-
Requires-Dist: pandas<2.
|
41
|
+
Requires-Dist: scipy<1.17,>=1.5.4
|
42
|
+
Requires-Dist: pandas<2.4.0,>=2.0.0
|
43
43
|
Requires-Dist: torch<2.8,>=2.2
|
44
44
|
Requires-Dist: lightning<2.8,>=2.2
|
45
45
|
Requires-Dist: pytorch-lightning
|
@@ -55,16 +55,11 @@ Requires-Dist: fugue>=0.9.0
|
|
55
55
|
Requires-Dist: tqdm<5,>=4.38
|
56
56
|
Requires-Dist: orjson~=3.9
|
57
57
|
Requires-Dist: tensorboard<3,>=2.9
|
58
|
-
Requires-Dist: autogluon.core[raytune]==1.3.
|
59
|
-
Requires-Dist: autogluon.common==1.3.
|
60
|
-
Requires-Dist: autogluon.features==1.3.
|
61
|
-
Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.3.
|
58
|
+
Requires-Dist: autogluon.core[raytune]==1.3.2b20250711
|
59
|
+
Requires-Dist: autogluon.common==1.3.2b20250711
|
60
|
+
Requires-Dist: autogluon.features==1.3.2b20250711
|
61
|
+
Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.3.2b20250711
|
62
62
|
Provides-Extra: all
|
63
|
-
Provides-Extra: chronos-onnx
|
64
|
-
Requires-Dist: optimum[onnxruntime]<1.23,>=1.17; extra == "chronos-onnx"
|
65
|
-
Provides-Extra: chronos-openvino
|
66
|
-
Requires-Dist: optimum-intel[nncf,openvino]<1.23,>=1.15; extra == "chronos-openvino"
|
67
|
-
Requires-Dist: optimum[nncf,openvino]<1.23,>=1.17; extra == "chronos-openvino"
|
68
63
|
Provides-Extra: tests
|
69
64
|
Requires-Dist: pytest; extra == "tests"
|
70
65
|
Requires-Dist: ruff>=0.0.285; extra == "tests"
|
@@ -1,12 +1,12 @@
|
|
1
|
-
autogluon.timeseries-1.3.
|
1
|
+
autogluon.timeseries-1.3.2b20250711-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
|
2
2
|
autogluon/timeseries/__init__.py,sha256=_CrLLc1fkjen7UzWoO0Os8WZoHOgvZbHKy46I8v_4k4,304
|
3
3
|
autogluon/timeseries/evaluator.py,sha256=l642tYfTHsl8WVIq_vV6qhgAFVFr9UuZD7gLra3A_Kc,250
|
4
4
|
autogluon/timeseries/learner.py,sha256=pIn4YSOk0aqCWyBpIlwnAsFnG4h7PLXk8guFH3wFS-w,13923
|
5
5
|
autogluon/timeseries/predictor.py,sha256=u4d7-xMs669g5xxqIYuvEyGQ0P6Y8IoToiyg9zUZoy4,88168
|
6
|
-
autogluon/timeseries/regressor.py,sha256=
|
6
|
+
autogluon/timeseries/regressor.py,sha256=G0zecniv85wr8EXlXsbiqpKYHE5KeNALHRzPp_hO5qs,12001
|
7
7
|
autogluon/timeseries/splitter.py,sha256=yzPca9p2bWV-_VJAptUyyzQsxu-uixAdpMoGQtDzMD4,3205
|
8
8
|
autogluon/timeseries/trainer.py,sha256=-xdGZ4v8OTA3AzMjBJ4CwGYhmKBRsY0Q-dm6YioFOmc,57977
|
9
|
-
autogluon/timeseries/version.py,sha256=
|
9
|
+
autogluon/timeseries/version.py,sha256=v0iakvttW3DdrV3QLVZS9POR34hhZdNc3hc0eVzEc6k,91
|
10
10
|
autogluon/timeseries/configs/__init__.py,sha256=BTtHIPCYeGjqgOcvqb8qPD4VNX-ICKOg6wnkew1cPOE,98
|
11
11
|
autogluon/timeseries/configs/presets_configs.py,sha256=cLat8ecLlWrI-SC5KLBDCX2SbVXaucemy2pjxJAtSY0,2543
|
12
12
|
autogluon/timeseries/dataset/__init__.py,sha256=UvnhAN5tjgxXTHoZMQDy64YMDj4Xxa68yY7NP4vAw0o,81
|
@@ -16,22 +16,23 @@ autogluon/timeseries/metrics/abstract.py,sha256=BpHVmzkzM6EN63NQrDRkApIeAyrpT6Y9
|
|
16
16
|
autogluon/timeseries/metrics/point.py,sha256=xllyGh11otbmUVHyIaceROPR3qyllWPQ9xlSmIGI3EI,18306
|
17
17
|
autogluon/timeseries/metrics/quantile.py,sha256=vhmETtjPsIfVlvtILNAT6F2PtIDNPrOroy-U1FQbgw8,4632
|
18
18
|
autogluon/timeseries/metrics/utils.py,sha256=HuDe1BNe8yJU4f_DKM913nNrUueoRaw6zhxm1-S20s0,910
|
19
|
-
autogluon/timeseries/models/__init__.py,sha256=
|
20
|
-
autogluon/timeseries/models/presets.py,sha256=
|
19
|
+
autogluon/timeseries/models/__init__.py,sha256=nx61eXLCxWIb-eJXpYgCw3C7naNklh_FAaKImb8EdvI,1237
|
20
|
+
autogluon/timeseries/models/presets.py,sha256=ejVCs1Uv6EwVn55uKYyb4ju0kFuuwlOaO0yVmwYbMgI,12314
|
21
21
|
autogluon/timeseries/models/abstract/__init__.py,sha256=Htfkjjc3vo92RvyM8rIlQ0PLWt3jcrCKZES07UvCMV0,146
|
22
|
-
autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=
|
22
|
+
autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=cxAZoYeLT9KsUAHlWlCH9WVw7I_L65m8CMKZBMN7LIU,33112
|
23
23
|
autogluon/timeseries/models/abstract/model_trial.py,sha256=ENPg_7nsdxIvaNM0o0UShZ3x8jFlRmwRc5m0fGPC0TM,3720
|
24
24
|
autogluon/timeseries/models/abstract/tunable.py,sha256=SFl4vjkb6BfFFaRPVdftnnLYlIyCThutLHxiiAlV6tY,7168
|
25
|
-
autogluon/timeseries/models/autogluon_tabular/__init__.py,sha256=
|
26
|
-
autogluon/timeseries/models/autogluon_tabular/mlforecast.py,sha256
|
25
|
+
autogluon/timeseries/models/autogluon_tabular/__init__.py,sha256=E5fZsdFPgVdyCVyj5bGmn_lQFlCMn2NvuRLBMcCFvhM,205
|
26
|
+
autogluon/timeseries/models/autogluon_tabular/mlforecast.py,sha256=-81pbdlvGGtEzBpBJbRt-5HgOonpRWdw6DpiGpoKNkE,37228
|
27
|
+
autogluon/timeseries/models/autogluon_tabular/per_step.py,sha256=qCC8ed4pqm6yoW743WJ2z1Nh6WV8-Z8EVqRwX9Lz6eE,20580
|
27
28
|
autogluon/timeseries/models/autogluon_tabular/transforms.py,sha256=aI1QJLJaOB5Xy2WA0jo6Jh25MRVyyZ8ONrqlV96kpw0,2735
|
28
29
|
autogluon/timeseries/models/autogluon_tabular/utils.py,sha256=Fn3Vu_Q0PCtEUbtNgLp1xIblg7dOdpFlF3W5kLHgruI,63
|
29
30
|
autogluon/timeseries/models/chronos/__init__.py,sha256=wT77HzTtmQxW3sw2k0mA5Ot6PSHivX-Uvn5fjM05EU4,60
|
30
|
-
autogluon/timeseries/models/chronos/model.py,sha256=
|
31
|
+
autogluon/timeseries/models/chronos/model.py,sha256=zs8tbK4CMd-MvHrN_RZJ4sPcJiiLYiGDKtwgSLl9SZY,32315
|
31
32
|
autogluon/timeseries/models/chronos/pipeline/__init__.py,sha256=bkTR0LSKIxAaKFOr9A0HSkCtnRdikDPUPp810WOKgxE,247
|
32
33
|
autogluon/timeseries/models/chronos/pipeline/base.py,sha256=14OAKHmio6LmO4mVom2mPGB0CvIrOjMGJzb-MVSAq-s,5596
|
33
|
-
autogluon/timeseries/models/chronos/pipeline/chronos.py,sha256=
|
34
|
-
autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py,sha256=
|
34
|
+
autogluon/timeseries/models/chronos/pipeline/chronos.py,sha256=C44HGXa_eW80gnnsTgTdsD18aVH-pe-DqkxUYcQx7K4,20216
|
35
|
+
autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py,sha256=BGov6fKr3hip_b0vVQEGAjvRFyc-bucmFPq0s8OoIwU,21410
|
35
36
|
autogluon/timeseries/models/chronos/pipeline/utils.py,sha256=rWqT3DB9upZb7GFVMOxc-ww2EhH8bD7TmEZNi_xTAbE,13033
|
36
37
|
autogluon/timeseries/models/ensemble/__init__.py,sha256=x2Y6dWk15XugTEWNUKq8U5z6nIjelo3UjpI-TfS13OE,159
|
37
38
|
autogluon/timeseries/models/ensemble/abstract.py,sha256=ie-BKD4JIkQQoKqtf6sYI5Aix7dSgywFsSdeGPxoElk,5821
|
@@ -42,10 +43,10 @@ autogluon/timeseries/models/gluonts/abstract.py,sha256=ae-VGN2KY6W8RtzZH3wxhjUP-
|
|
42
43
|
autogluon/timeseries/models/gluonts/dataset.py,sha256=I_4Rq2CXiLiiSf99WYYaRfT7NXEUmlkW1JIZnWjAdLY,5121
|
43
44
|
autogluon/timeseries/models/gluonts/models.py,sha256=Pi_zCRkslt2-LXkZpE56aRx9J4gRCOVabqYltPtI9tE,25718
|
44
45
|
autogluon/timeseries/models/local/__init__.py,sha256=e2UImoJhmj70E148IIObv90C_bHxgyLNk6YsS4p7pfs,701
|
45
|
-
autogluon/timeseries/models/local/abstract_local_model.py,sha256=
|
46
|
-
autogluon/timeseries/models/local/naive.py,sha256=
|
47
|
-
autogluon/timeseries/models/local/npts.py,sha256=
|
48
|
-
autogluon/timeseries/models/local/statsforecast.py,sha256=
|
46
|
+
autogluon/timeseries/models/local/abstract_local_model.py,sha256=BVCMC0wNMwrrDfZy_SQJeEajPmYBAyUlMu4qrTkWJBQ,11535
|
47
|
+
autogluon/timeseries/models/local/naive.py,sha256=TAiQLt3fGCQoZKjBzmlhosV2XVEZ1urtPHDhM7Mf2i8,7408
|
48
|
+
autogluon/timeseries/models/local/npts.py,sha256=I3y5g-718TVVhAbotfJ74wvLfLQ6HfLwA_ivrEWY7Qc,4182
|
49
|
+
autogluon/timeseries/models/local/statsforecast.py,sha256=h2ra9yWEY8DTUSPzgwS8nBKdk7dThwPjY1Os-ewRId4,33044
|
49
50
|
autogluon/timeseries/models/multi_window/__init__.py,sha256=Bq7AT2Jxdd4WNqmjTdzeqgNiwn1NCyWp4tBIWaM-zfI,60
|
50
51
|
autogluon/timeseries/models/multi_window/multi_window_model.py,sha256=xW55TMg7kgta-TmBpVZGcDQlBdBN_eW1z1lVNjZGhpo,11833
|
51
52
|
autogluon/timeseries/transforms/__init__.py,sha256=fKlT4pkJ_8Gl7IUTc3uSDzt2Xow5iH5w6fPB3ePNrTg,127
|
@@ -57,14 +58,14 @@ autogluon/timeseries/utils/forecast.py,sha256=yK1_eNtRUPYGs0R-VWMO4c81LrTGF57ih3
|
|
57
58
|
autogluon/timeseries/utils/warning_filters.py,sha256=tHvhj9y7c3MP6JrjAedc7UiFFw0_mKYziDQupw8NhiQ,2538
|
58
59
|
autogluon/timeseries/utils/datetime/__init__.py,sha256=bTMR8jLh1LW55vHjbOr1zvWRMF_PqbvxpS-cUcNIDWI,173
|
59
60
|
autogluon/timeseries/utils/datetime/base.py,sha256=3NdsH3NDq4cVAOSoy3XpaNixyNlbjy4DJ_YYOGuu9x4,1341
|
60
|
-
autogluon/timeseries/utils/datetime/lags.py,sha256=
|
61
|
+
autogluon/timeseries/utils/datetime/lags.py,sha256=dpndFOV-d-AqCTwKeQ5Dz-AfCJTeI27bxDC13QzY4y8,6003
|
61
62
|
autogluon/timeseries/utils/datetime/seasonality.py,sha256=YK_2k8hvYIMW-sJPnjGWRtCnvIOthwA2hATB3nwVoD4,834
|
62
63
|
autogluon/timeseries/utils/datetime/time_features.py,sha256=MjLi3zQ00uWWJtXH9oGX2GJkTbvjdSiuabSa4kcVuxE,2672
|
63
|
-
autogluon.timeseries-1.3.
|
64
|
-
autogluon.timeseries-1.3.
|
65
|
-
autogluon.timeseries-1.3.
|
66
|
-
autogluon.timeseries-1.3.
|
67
|
-
autogluon.timeseries-1.3.
|
68
|
-
autogluon.timeseries-1.3.
|
69
|
-
autogluon.timeseries-1.3.
|
70
|
-
autogluon.timeseries-1.3.
|
64
|
+
autogluon.timeseries-1.3.2b20250711.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
|
65
|
+
autogluon.timeseries-1.3.2b20250711.dist-info/METADATA,sha256=IBPw5YQfeVFkFZJGhm1AO_8TdIxx8D0-AVgG6a3T9c4,12443
|
66
|
+
autogluon.timeseries-1.3.2b20250711.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
|
67
|
+
autogluon.timeseries-1.3.2b20250711.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
68
|
+
autogluon.timeseries-1.3.2b20250711.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
|
69
|
+
autogluon.timeseries-1.3.2b20250711.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
|
70
|
+
autogluon.timeseries-1.3.2b20250711.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
71
|
+
autogluon.timeseries-1.3.2b20250711.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|