autogluon.timeseries 1.4.1b20251010__py3-none-any.whl → 1.4.1b20251115__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of autogluon.timeseries might be problematic. Click here for more details.
- autogluon/timeseries/dataset/ts_dataframe.py +66 -53
- autogluon/timeseries/learner.py +5 -4
- autogluon/timeseries/metrics/quantile.py +1 -1
- autogluon/timeseries/metrics/utils.py +4 -4
- autogluon/timeseries/models/autogluon_tabular/mlforecast.py +28 -36
- autogluon/timeseries/models/autogluon_tabular/per_step.py +14 -5
- autogluon/timeseries/models/autogluon_tabular/transforms.py +9 -7
- autogluon/timeseries/models/chronos/model.py +101 -68
- autogluon/timeseries/models/chronos/{pipeline/utils.py → utils.py} +64 -32
- autogluon/timeseries/models/ensemble/__init__.py +29 -2
- autogluon/timeseries/models/ensemble/abstract.py +1 -37
- autogluon/timeseries/models/ensemble/array_based/__init__.py +3 -0
- autogluon/timeseries/models/ensemble/array_based/abstract.py +247 -0
- autogluon/timeseries/models/ensemble/array_based/models.py +50 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/__init__.py +10 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/abstract.py +87 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/per_quantile_tabular.py +133 -0
- autogluon/timeseries/models/ensemble/array_based/regressor/tabular.py +141 -0
- autogluon/timeseries/models/ensemble/weighted/__init__.py +8 -0
- autogluon/timeseries/models/ensemble/weighted/abstract.py +41 -0
- autogluon/timeseries/models/ensemble/{basic.py → weighted/basic.py} +0 -10
- autogluon/timeseries/models/gluonts/abstract.py +2 -2
- autogluon/timeseries/models/gluonts/dataset.py +2 -2
- autogluon/timeseries/models/local/abstract_local_model.py +2 -2
- autogluon/timeseries/models/multi_window/multi_window_model.py +1 -1
- autogluon/timeseries/models/toto/model.py +5 -3
- autogluon/timeseries/predictor.py +10 -26
- autogluon/timeseries/regressor.py +9 -7
- autogluon/timeseries/splitter.py +1 -25
- autogluon/timeseries/trainer/ensemble_composer.py +250 -0
- autogluon/timeseries/trainer/trainer.py +124 -193
- autogluon/timeseries/trainer/utils.py +18 -0
- autogluon/timeseries/transforms/covariate_scaler.py +1 -1
- autogluon/timeseries/transforms/target_scaler.py +7 -7
- autogluon/timeseries/utils/features.py +9 -5
- autogluon/timeseries/utils/forecast.py +5 -5
- autogluon/timeseries/version.py +1 -1
- autogluon.timeseries-1.4.1b20251115-py3.9-nspkg.pth +1 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/METADATA +25 -15
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/RECORD +47 -41
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/WHEEL +1 -1
- autogluon/timeseries/evaluator.py +0 -6
- autogluon/timeseries/models/chronos/pipeline/__init__.py +0 -10
- autogluon/timeseries/models/chronos/pipeline/base.py +0 -160
- autogluon/timeseries/models/chronos/pipeline/chronos.py +0 -544
- autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py +0 -580
- autogluon.timeseries-1.4.1b20251010-py3.9-nspkg.pth +0 -1
- /autogluon/timeseries/models/ensemble/{greedy.py → weighted/greedy.py} +0 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info/licenses}/LICENSE +0 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info/licenses}/NOTICE +0 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/namespace_packages.txt +0 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/top_level.txt +0 -0
- {autogluon.timeseries-1.4.1b20251010.dist-info → autogluon_timeseries-1.4.1b20251115.dist-info}/zip-safe +0 -0
|
@@ -5,7 +5,7 @@ import time
|
|
|
5
5
|
import traceback
|
|
6
6
|
from collections import defaultdict
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Any, Literal, Optional,
|
|
8
|
+
from typing import Any, Literal, Optional, Union
|
|
9
9
|
|
|
10
10
|
import networkx as nx
|
|
11
11
|
import numpy as np
|
|
@@ -20,18 +20,20 @@ from autogluon.core.utils.savers import save_pkl
|
|
|
20
20
|
from autogluon.timeseries import TimeSeriesDataFrame
|
|
21
21
|
from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
|
|
22
22
|
from autogluon.timeseries.models.abstract import AbstractTimeSeriesModel, TimeSeriesModelBase
|
|
23
|
-
from autogluon.timeseries.models.ensemble import AbstractTimeSeriesEnsembleModel
|
|
23
|
+
from autogluon.timeseries.models.ensemble import AbstractTimeSeriesEnsembleModel
|
|
24
24
|
from autogluon.timeseries.models.multi_window import MultiWindowBacktestingModel
|
|
25
25
|
from autogluon.timeseries.splitter import AbstractWindowSplitter, ExpandingWindowSplitter
|
|
26
|
+
from autogluon.timeseries.trainer.ensemble_composer import EnsembleComposer, validate_ensemble_hyperparameters
|
|
26
27
|
from autogluon.timeseries.utils.features import (
|
|
27
28
|
ConstantReplacementFeatureImportanceTransform,
|
|
28
29
|
CovariateMetadata,
|
|
29
30
|
PermutationFeatureImportanceTransform,
|
|
30
31
|
)
|
|
31
|
-
from autogluon.timeseries.utils.warning_filters import disable_tqdm
|
|
32
|
+
from autogluon.timeseries.utils.warning_filters import disable_tqdm
|
|
32
33
|
|
|
33
34
|
from .model_set_builder import TrainableModelSetBuilder, contains_searchspace
|
|
34
35
|
from .prediction_cache import PredictionCache, get_prediction_cache
|
|
36
|
+
from .utils import log_scores_and_times
|
|
35
37
|
|
|
36
38
|
logger = logging.getLogger("autogluon.timeseries.trainer")
|
|
37
39
|
|
|
@@ -50,11 +52,11 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
50
52
|
skip_model_selection: bool = False,
|
|
51
53
|
enable_ensemble: bool = True,
|
|
52
54
|
verbosity: int = 2,
|
|
53
|
-
|
|
55
|
+
num_val_windows: Optional[int] = None,
|
|
56
|
+
val_step_size: Optional[int] = None,
|
|
54
57
|
refit_every_n_windows: Optional[int] = 1,
|
|
55
58
|
# TODO: Set cache_predictions=False by default once all models in default presets have a reasonable inference speed
|
|
56
59
|
cache_predictions: bool = True,
|
|
57
|
-
ensemble_model_type: Optional[Type] = None,
|
|
58
60
|
**kwargs,
|
|
59
61
|
):
|
|
60
62
|
super().__init__(
|
|
@@ -71,13 +73,11 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
71
73
|
self.skip_model_selection = skip_model_selection
|
|
72
74
|
# Ensemble cannot be fit if val_scores are not computed
|
|
73
75
|
self.enable_ensemble = enable_ensemble and not skip_model_selection
|
|
74
|
-
if ensemble_model_type is None:
|
|
75
|
-
ensemble_model_type = GreedyEnsemble
|
|
76
|
-
else:
|
|
76
|
+
if kwargs.get("ensemble_model_type") is not None:
|
|
77
77
|
logger.warning(
|
|
78
|
-
"Using a custom `ensemble_model_type` is
|
|
78
|
+
"Using a custom `ensemble_model_type` is no longer supported. Use the `ensemble_hyperparameters` "
|
|
79
|
+
"argument to `fit` instead."
|
|
79
80
|
)
|
|
80
|
-
self.ensemble_model_type: Type[AbstractTimeSeriesEnsembleModel] = ensemble_model_type
|
|
81
81
|
|
|
82
82
|
self.verbosity = verbosity
|
|
83
83
|
|
|
@@ -86,10 +86,9 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
86
86
|
self.model_refit_map = {}
|
|
87
87
|
|
|
88
88
|
self.eval_metric = check_get_evaluation_metric(eval_metric, prediction_length=prediction_length)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
self.val_splitter = val_splitter
|
|
89
|
+
|
|
90
|
+
self.num_val_windows = num_val_windows
|
|
91
|
+
self.val_step_size = val_step_size
|
|
93
92
|
self.refit_every_n_windows = refit_every_n_windows
|
|
94
93
|
self.hpo_results = {}
|
|
95
94
|
|
|
@@ -259,25 +258,6 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
259
258
|
|
|
260
259
|
return info
|
|
261
260
|
|
|
262
|
-
def _train_single(
|
|
263
|
-
self,
|
|
264
|
-
train_data: TimeSeriesDataFrame,
|
|
265
|
-
model: AbstractTimeSeriesModel,
|
|
266
|
-
val_data: Optional[TimeSeriesDataFrame] = None,
|
|
267
|
-
time_limit: Optional[float] = None,
|
|
268
|
-
) -> AbstractTimeSeriesModel:
|
|
269
|
-
"""Train the single model and return the model object that was fitted. This method
|
|
270
|
-
does not save the resulting model."""
|
|
271
|
-
model.fit(
|
|
272
|
-
train_data=train_data,
|
|
273
|
-
val_data=val_data,
|
|
274
|
-
time_limit=time_limit,
|
|
275
|
-
verbosity=self.verbosity,
|
|
276
|
-
val_splitter=self.val_splitter,
|
|
277
|
-
refit_every_n_windows=self.refit_every_n_windows,
|
|
278
|
-
)
|
|
279
|
-
return model
|
|
280
|
-
|
|
281
261
|
def tune_model_hyperparameters(
|
|
282
262
|
self,
|
|
283
263
|
model: AbstractTimeSeriesModel,
|
|
@@ -300,7 +280,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
300
280
|
hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
|
|
301
281
|
time_limit=time_limit,
|
|
302
282
|
default_num_trials=default_num_trials,
|
|
303
|
-
val_splitter=self.
|
|
283
|
+
val_splitter=self._get_val_splitter(),
|
|
304
284
|
refit_every_n_windows=self.refit_every_n_windows,
|
|
305
285
|
)
|
|
306
286
|
total_tuning_time = time.time() - tuning_start_time
|
|
@@ -353,7 +333,15 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
353
333
|
logger.info(f"\tSkipping {model.name} due to lack of time remaining.")
|
|
354
334
|
return model_names_trained
|
|
355
335
|
|
|
356
|
-
model
|
|
336
|
+
model.fit(
|
|
337
|
+
train_data=train_data,
|
|
338
|
+
val_data=val_data,
|
|
339
|
+
time_limit=time_limit,
|
|
340
|
+
verbosity=self.verbosity,
|
|
341
|
+
val_splitter=self._get_val_splitter(),
|
|
342
|
+
refit_every_n_windows=self.refit_every_n_windows,
|
|
343
|
+
)
|
|
344
|
+
|
|
357
345
|
fit_end_time = time.time()
|
|
358
346
|
model.fit_time = model.fit_time or (fit_end_time - fit_start_time)
|
|
359
347
|
|
|
@@ -364,7 +352,12 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
364
352
|
val_data, store_val_score=True, store_predict_time=True, time_limit=time_limit
|
|
365
353
|
)
|
|
366
354
|
|
|
367
|
-
|
|
355
|
+
log_scores_and_times(
|
|
356
|
+
val_score=model.val_score,
|
|
357
|
+
fit_time=model.fit_time,
|
|
358
|
+
predict_time=model.predict_time,
|
|
359
|
+
eval_metric_name=self.eval_metric.name_with_sign,
|
|
360
|
+
)
|
|
368
361
|
|
|
369
362
|
self.save_model(model=model)
|
|
370
363
|
except TimeLimitExceeded:
|
|
@@ -380,31 +373,51 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
380
373
|
|
|
381
374
|
return model_names_trained
|
|
382
375
|
|
|
383
|
-
def
|
|
384
|
-
self,
|
|
385
|
-
val_score: Optional[float] = None,
|
|
386
|
-
fit_time: Optional[float] = None,
|
|
387
|
-
predict_time: Optional[float] = None,
|
|
388
|
-
):
|
|
389
|
-
if val_score is not None:
|
|
390
|
-
logger.info(f"\t{val_score:<7.4f}".ljust(15) + f"= Validation score ({self.eval_metric.name_with_sign})")
|
|
391
|
-
if fit_time is not None:
|
|
392
|
-
logger.info(f"\t{fit_time:<7.2f} s".ljust(15) + "= Training runtime")
|
|
393
|
-
if predict_time is not None:
|
|
394
|
-
logger.info(f"\t{predict_time:<7.2f} s".ljust(15) + "= Validation (prediction) runtime")
|
|
395
|
-
|
|
396
|
-
def _train_multi(
|
|
376
|
+
def fit(
|
|
397
377
|
self,
|
|
398
378
|
train_data: TimeSeriesDataFrame,
|
|
399
|
-
hyperparameters: Union[str, dict],
|
|
379
|
+
hyperparameters: Union[str, dict[Any, dict]],
|
|
400
380
|
val_data: Optional[TimeSeriesDataFrame] = None,
|
|
381
|
+
ensemble_hyperparameters: Optional[dict] = None,
|
|
401
382
|
hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
|
|
402
383
|
excluded_model_types: Optional[list[str]] = None,
|
|
403
384
|
time_limit: Optional[float] = None,
|
|
404
385
|
random_seed: Optional[int] = None,
|
|
405
|
-
)
|
|
386
|
+
):
|
|
387
|
+
"""Fit a set of timeseries models specified by the `hyperparameters`
|
|
388
|
+
dictionary that maps model names to their specified hyperparameters.
|
|
389
|
+
|
|
390
|
+
Parameters
|
|
391
|
+
----------
|
|
392
|
+
train_data
|
|
393
|
+
Training data for fitting time series timeseries models.
|
|
394
|
+
hyperparameters
|
|
395
|
+
A dictionary mapping selected model names, model classes or model factory to hyperparameter
|
|
396
|
+
settings. Model names should be present in `trainer.presets.DEFAULT_MODEL_NAMES`. Optionally,
|
|
397
|
+
the user may provide one of "default", "light" and "very_light" to specify presets.
|
|
398
|
+
val_data
|
|
399
|
+
Optional validation data set to report validation scores on.
|
|
400
|
+
ensemble_hyperparameters
|
|
401
|
+
A dictionary mapping ensemble names to their specified hyperparameters. Ensemble names
|
|
402
|
+
should be defined in the models.ensemble namespace. defaults to `{"GreedyEnsemble": {}}`
|
|
403
|
+
which only fits a greedy weighted ensemble with default hyperparameters. Providing an
|
|
404
|
+
empty dictionary disables ensemble training.
|
|
405
|
+
hyperparameter_tune_kwargs
|
|
406
|
+
Args for hyperparameter tuning
|
|
407
|
+
excluded_model_types
|
|
408
|
+
Names of models that should not be trained, even if listed in `hyperparameters`.
|
|
409
|
+
time_limit
|
|
410
|
+
Time limit for training
|
|
411
|
+
random_seed
|
|
412
|
+
Random seed that will be set to each model during training
|
|
413
|
+
"""
|
|
406
414
|
logger.info(f"\nStarting training. Start time is {time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
407
415
|
|
|
416
|
+
# Handle ensemble hyperparameters
|
|
417
|
+
if ensemble_hyperparameters is None:
|
|
418
|
+
ensemble_hyperparameters = {"GreedyEnsemble": {}}
|
|
419
|
+
ensemble_hyperparameters = validate_ensemble_hyperparameters(ensemble_hyperparameters)
|
|
420
|
+
|
|
408
421
|
time_start = time.time()
|
|
409
422
|
hyperparameters = copy.deepcopy(hyperparameters)
|
|
410
423
|
|
|
@@ -418,7 +431,7 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
418
431
|
hyperparameters=hyperparameters,
|
|
419
432
|
hyperparameter_tune=hyperparameter_tune_kwargs is not None, # TODO: remove hyperparameter_tune
|
|
420
433
|
freq=train_data.freq,
|
|
421
|
-
multi_window=self.
|
|
434
|
+
multi_window=self._get_val_splitter().num_val_windows > 0,
|
|
422
435
|
excluded_model_types=excluded_model_types,
|
|
423
436
|
)
|
|
424
437
|
|
|
@@ -487,42 +500,13 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
487
500
|
train_data, model=model, val_data=val_data, time_limit=time_left_for_model
|
|
488
501
|
)
|
|
489
502
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
if time_left_for_ensemble is not None and time_left_for_ensemble <= 0:
|
|
498
|
-
logger.info(
|
|
499
|
-
"Not fitting ensemble due to lack of time remaining. "
|
|
500
|
-
f"Time left: {time_left_for_ensemble:.1f} seconds"
|
|
501
|
-
)
|
|
502
|
-
elif len(models_available_for_ensemble) <= 1:
|
|
503
|
-
logger.info(
|
|
504
|
-
"Not fitting ensemble as "
|
|
505
|
-
+ (
|
|
506
|
-
"no models were successfully trained."
|
|
507
|
-
if not models_available_for_ensemble
|
|
508
|
-
else "only 1 model was trained."
|
|
509
|
-
)
|
|
510
|
-
)
|
|
511
|
-
else:
|
|
512
|
-
try:
|
|
513
|
-
model_names_trained.append(
|
|
514
|
-
self.fit_ensemble(
|
|
515
|
-
data_per_window=self._get_ensemble_oof_data(train_data=train_data, val_data=val_data),
|
|
516
|
-
model_names=models_available_for_ensemble,
|
|
517
|
-
time_limit=time_left_for_ensemble,
|
|
518
|
-
)
|
|
519
|
-
)
|
|
520
|
-
except Exception as err: # noqa
|
|
521
|
-
logger.error(
|
|
522
|
-
"\tWarning: Exception caused ensemble to fail during training... Skipping this model."
|
|
523
|
-
)
|
|
524
|
-
logger.error(f"\t{err}")
|
|
525
|
-
logger.debug(traceback.format_exc())
|
|
503
|
+
ensemble_names = self._fit_ensembles(
|
|
504
|
+
train_data=train_data,
|
|
505
|
+
val_data=val_data,
|
|
506
|
+
time_limit=None if time_limit is None else time_limit - (time.time() - time_start),
|
|
507
|
+
ensemble_hyperparameters=ensemble_hyperparameters,
|
|
508
|
+
)
|
|
509
|
+
model_names_trained.extend(ensemble_names)
|
|
526
510
|
|
|
527
511
|
logger.info(f"Training complete. Models trained: {model_names_trained}")
|
|
528
512
|
logger.info(f"Total runtime: {time.time() - time_start:.2f} s")
|
|
@@ -536,76 +520,66 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
536
520
|
|
|
537
521
|
return model_names_trained
|
|
538
522
|
|
|
539
|
-
def
|
|
540
|
-
self,
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
523
|
+
def _fit_ensembles(
|
|
524
|
+
self,
|
|
525
|
+
*,
|
|
526
|
+
train_data: TimeSeriesDataFrame,
|
|
527
|
+
val_data: Optional[TimeSeriesDataFrame],
|
|
528
|
+
time_limit: Optional[float],
|
|
529
|
+
ensemble_hyperparameters: dict,
|
|
530
|
+
) -> list[str]:
|
|
531
|
+
if not self.enable_ensemble or not ensemble_hyperparameters:
|
|
532
|
+
logger.warning("Ensemble training is disabled. Skipping ensemble training.")
|
|
533
|
+
return []
|
|
546
534
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
increment += 1
|
|
553
|
-
ensemble_name = f"WeightedEnsemble_{increment}"
|
|
554
|
-
return ensemble_name
|
|
535
|
+
ensemble_composer = self._get_ensemble_composer(ensemble_hyperparameters).fit(
|
|
536
|
+
train_data,
|
|
537
|
+
val_data,
|
|
538
|
+
time_limit,
|
|
539
|
+
)
|
|
555
540
|
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
) -> str:
|
|
562
|
-
logger.info("Fitting simple weighted ensemble.")
|
|
541
|
+
ensembles_trained = []
|
|
542
|
+
for _, model, base_models in ensemble_composer.iter_ensembles():
|
|
543
|
+
self._add_model(model=model, base_models=base_models)
|
|
544
|
+
self.save_model(model=model)
|
|
545
|
+
ensembles_trained.append(model.name)
|
|
563
546
|
|
|
564
|
-
|
|
565
|
-
base_model_scores = self.get_models_attribute_dict(attribute="val_score", models=self.get_model_names(0))
|
|
547
|
+
return ensembles_trained if ensembles_trained else []
|
|
566
548
|
|
|
567
|
-
|
|
568
|
-
|
|
549
|
+
def _get_val_splitter(self) -> AbstractWindowSplitter:
|
|
550
|
+
if self.num_val_windows is None:
|
|
551
|
+
val_splitter = ExpandingWindowSplitter(prediction_length=self.prediction_length)
|
|
552
|
+
else:
|
|
553
|
+
val_splitter = ExpandingWindowSplitter(
|
|
554
|
+
prediction_length=self.prediction_length,
|
|
555
|
+
num_val_windows=self.num_val_windows,
|
|
556
|
+
val_step_size=self.val_step_size,
|
|
557
|
+
)
|
|
558
|
+
return val_splitter
|
|
569
559
|
|
|
570
|
-
|
|
571
|
-
ensemble
|
|
572
|
-
|
|
560
|
+
def _get_ensemble_composer(self, ensemble_hyperparameters: dict) -> "EnsembleComposer":
|
|
561
|
+
"""Create an ensemble composer instance for delegation."""
|
|
562
|
+
return EnsembleComposer(
|
|
563
|
+
path=self.path,
|
|
564
|
+
prediction_length=self.prediction_length,
|
|
573
565
|
eval_metric=self.eval_metric,
|
|
574
566
|
target=self.target,
|
|
575
|
-
prediction_length=self.prediction_length,
|
|
576
|
-
path=self.path,
|
|
577
|
-
freq=data_per_window[0].freq,
|
|
578
567
|
quantile_levels=self.quantile_levels,
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
ensemble.fit(
|
|
583
|
-
predictions_per_window=predictions_per_window,
|
|
584
|
-
data_per_window=data_per_window,
|
|
585
|
-
model_scores=base_model_scores,
|
|
586
|
-
time_limit=time_limit,
|
|
587
|
-
)
|
|
588
|
-
ensemble.fit_time = time.time() - time_start
|
|
589
|
-
|
|
590
|
-
predict_time = 0
|
|
591
|
-
for m in ensemble.model_names:
|
|
592
|
-
predict_time += self.get_model_attribute(model=m, attribute="predict_time")
|
|
593
|
-
ensemble.predict_time = predict_time
|
|
594
|
-
|
|
595
|
-
score_per_fold = []
|
|
596
|
-
for window_idx, data in enumerate(data_per_window):
|
|
597
|
-
predictions = ensemble.predict({n: predictions_per_window[n][window_idx] for n in ensemble.model_names})
|
|
598
|
-
score_per_fold.append(self._score_with_predictions(data, predictions))
|
|
599
|
-
ensemble.val_score = float(np.mean(score_per_fold, dtype=np.float64))
|
|
600
|
-
|
|
601
|
-
self._log_scores_and_times(
|
|
602
|
-
val_score=ensemble.val_score,
|
|
603
|
-
fit_time=ensemble.fit_time,
|
|
604
|
-
predict_time=ensemble.predict_time,
|
|
568
|
+
model_graph=self.model_graph,
|
|
569
|
+
ensemble_hyperparameters=ensemble_hyperparameters,
|
|
570
|
+
window_splitter=self._get_val_splitter(),
|
|
605
571
|
)
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
572
|
+
|
|
573
|
+
def _get_validation_windows(
|
|
574
|
+
self, train_data: TimeSeriesDataFrame, val_data: Optional[TimeSeriesDataFrame]
|
|
575
|
+
) -> list[TimeSeriesDataFrame]:
|
|
576
|
+
"""If validation data is provided, return this as a single validation window. If not,
|
|
577
|
+
use the validation splitter to create a list of validation splits.
|
|
578
|
+
"""
|
|
579
|
+
if val_data is None:
|
|
580
|
+
return [val_fold for _, val_fold in self._get_val_splitter().split(train_data)]
|
|
581
|
+
else:
|
|
582
|
+
return [val_data]
|
|
609
583
|
|
|
610
584
|
def leaderboard(
|
|
611
585
|
self,
|
|
@@ -1228,46 +1202,3 @@ class TimeSeriesTrainer(AbstractTrainer[TimeSeriesModelBase]):
|
|
|
1228
1202
|
excluded_model_types=excluded_model_types,
|
|
1229
1203
|
banned_model_names=self._get_banned_model_names(),
|
|
1230
1204
|
)
|
|
1231
|
-
|
|
1232
|
-
def fit(
|
|
1233
|
-
self,
|
|
1234
|
-
train_data: TimeSeriesDataFrame,
|
|
1235
|
-
hyperparameters: Union[str, dict[Any, dict]],
|
|
1236
|
-
val_data: Optional[TimeSeriesDataFrame] = None,
|
|
1237
|
-
hyperparameter_tune_kwargs: Optional[Union[str, dict]] = None,
|
|
1238
|
-
excluded_model_types: Optional[list[str]] = None,
|
|
1239
|
-
time_limit: Optional[float] = None,
|
|
1240
|
-
random_seed: Optional[int] = None,
|
|
1241
|
-
):
|
|
1242
|
-
"""
|
|
1243
|
-
Fit a set of timeseries models specified by the `hyperparameters`
|
|
1244
|
-
dictionary that maps model names to their specified hyperparameters.
|
|
1245
|
-
|
|
1246
|
-
Parameters
|
|
1247
|
-
----------
|
|
1248
|
-
train_data
|
|
1249
|
-
Training data for fitting time series timeseries models.
|
|
1250
|
-
hyperparameters
|
|
1251
|
-
A dictionary mapping selected model names, model classes or model factory to hyperparameter
|
|
1252
|
-
settings. Model names should be present in `trainer.presets.DEFAULT_MODEL_NAMES`. Optionally,
|
|
1253
|
-
the user may provide one of "default", "light" and "very_light" to specify presets.
|
|
1254
|
-
val_data
|
|
1255
|
-
Optional validation data set to report validation scores on.
|
|
1256
|
-
hyperparameter_tune_kwargs
|
|
1257
|
-
Args for hyperparameter tuning
|
|
1258
|
-
excluded_model_types
|
|
1259
|
-
Names of models that should not be trained, even if listed in `hyperparameters`.
|
|
1260
|
-
time_limit
|
|
1261
|
-
Time limit for training
|
|
1262
|
-
random_seed
|
|
1263
|
-
Random seed that will be set to each model during training
|
|
1264
|
-
"""
|
|
1265
|
-
self._train_multi(
|
|
1266
|
-
train_data,
|
|
1267
|
-
val_data=val_data,
|
|
1268
|
-
hyperparameters=hyperparameters,
|
|
1269
|
-
hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
|
|
1270
|
-
excluded_model_types=excluded_model_types,
|
|
1271
|
-
time_limit=time_limit,
|
|
1272
|
-
random_seed=random_seed,
|
|
1273
|
-
)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
logger = logging.getLogger("autogluon.timeseries.trainer")
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def log_scores_and_times(
|
|
8
|
+
val_score: Optional[float],
|
|
9
|
+
fit_time: Optional[float],
|
|
10
|
+
predict_time: Optional[float],
|
|
11
|
+
eval_metric_name: str,
|
|
12
|
+
):
|
|
13
|
+
if val_score is not None:
|
|
14
|
+
logger.info(f"\t{val_score:<7.4f}".ljust(15) + f"= Validation score ({eval_metric_name})")
|
|
15
|
+
if fit_time is not None:
|
|
16
|
+
logger.info(f"\t{fit_time:<7.2f} s".ljust(15) + "= Training runtime")
|
|
17
|
+
if predict_time is not None:
|
|
18
|
+
logger.info(f"\t{predict_time:<7.2f} s".ljust(15) + "= Validation (prediction) runtime")
|
|
@@ -6,7 +6,7 @@ import pandas as pd
|
|
|
6
6
|
from sklearn.compose import ColumnTransformer
|
|
7
7
|
from sklearn.preprocessing import QuantileTransformer, StandardScaler
|
|
8
8
|
|
|
9
|
-
from autogluon.timeseries.dataset
|
|
9
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
10
10
|
from autogluon.timeseries.utils.features import CovariateMetadata
|
|
11
11
|
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
12
12
|
|
|
@@ -4,7 +4,7 @@ import numpy as np
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from typing_extensions import Self
|
|
6
6
|
|
|
7
|
-
from autogluon.timeseries.dataset
|
|
7
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class TargetScaler(Protocol):
|
|
@@ -59,12 +59,12 @@ class LocalTargetScaler(TargetScaler):
|
|
|
59
59
|
|
|
60
60
|
def transform(self, data: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
61
61
|
"""Apply scaling to the target column in the dataframe."""
|
|
62
|
-
loc, scale = self._reindex_loc_scale(item_index=data.index.get_level_values(ITEMID))
|
|
62
|
+
loc, scale = self._reindex_loc_scale(item_index=data.index.get_level_values(TimeSeriesDataFrame.ITEMID))
|
|
63
63
|
return data.assign(**{self.target: (data[self.target] - loc) / scale})
|
|
64
64
|
|
|
65
65
|
def inverse_transform(self, predictions: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
|
|
66
66
|
"""Apply inverse scaling to all columns in the predictions dataframe."""
|
|
67
|
-
loc, scale = self._reindex_loc_scale(item_index=predictions.index.get_level_values(ITEMID))
|
|
67
|
+
loc, scale = self._reindex_loc_scale(item_index=predictions.index.get_level_values(TimeSeriesDataFrame.ITEMID))
|
|
68
68
|
return predictions.assign(**{col: predictions[col] * scale + loc for col in predictions.columns})
|
|
69
69
|
|
|
70
70
|
|
|
@@ -75,7 +75,7 @@ class LocalStandardScaler(LocalTargetScaler):
|
|
|
75
75
|
"""
|
|
76
76
|
|
|
77
77
|
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
78
|
-
stats = target_series.groupby(level=ITEMID, sort=False).agg(["mean", "std"])
|
|
78
|
+
stats = target_series.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg(["mean", "std"])
|
|
79
79
|
return stats["mean"], stats["std"]
|
|
80
80
|
|
|
81
81
|
|
|
@@ -83,7 +83,7 @@ class LocalMeanAbsScaler(LocalTargetScaler):
|
|
|
83
83
|
"""Applies mean absolute scaling to each time series in the dataset."""
|
|
84
84
|
|
|
85
85
|
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[Optional[pd.Series], pd.Series]:
|
|
86
|
-
scale = target_series.abs().groupby(level=ITEMID, sort=False).agg("mean")
|
|
86
|
+
scale = target_series.abs().groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg("mean")
|
|
87
87
|
return None, scale
|
|
88
88
|
|
|
89
89
|
|
|
@@ -94,7 +94,7 @@ class LocalMinMaxScaler(LocalTargetScaler):
|
|
|
94
94
|
"""
|
|
95
95
|
|
|
96
96
|
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
97
|
-
stats = target_series.abs().groupby(level=ITEMID, sort=False).agg(["min", "max"])
|
|
97
|
+
stats = target_series.abs().groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).agg(["min", "max"])
|
|
98
98
|
scale = (stats["max"] - stats["min"]).clip(lower=self.min_scale)
|
|
99
99
|
loc = stats["min"]
|
|
100
100
|
return loc, scale
|
|
@@ -118,7 +118,7 @@ class LocalRobustScaler(LocalTargetScaler):
|
|
|
118
118
|
assert 0 < self.q_min < self.q_max < 1
|
|
119
119
|
|
|
120
120
|
def _compute_loc_scale(self, target_series: pd.Series) -> tuple[pd.Series, pd.Series]:
|
|
121
|
-
grouped = target_series.groupby(level=ITEMID, sort=False)
|
|
121
|
+
grouped = target_series.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False)
|
|
122
122
|
loc = grouped.median()
|
|
123
123
|
lower = grouped.quantile(self.q_min)
|
|
124
124
|
upper = grouped.quantile(self.q_max)
|
|
@@ -14,7 +14,7 @@ from autogluon.features.generators import (
|
|
|
14
14
|
IdentityFeatureGenerator,
|
|
15
15
|
PipelineFeatureGenerator,
|
|
16
16
|
)
|
|
17
|
-
from autogluon.timeseries.dataset
|
|
17
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
18
18
|
from autogluon.timeseries.utils.warning_filters import warning_filter
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
@@ -415,7 +415,9 @@ class AbstractFeatureImportanceTransform:
|
|
|
415
415
|
if feature_name in self.covariate_metadata.past_covariates:
|
|
416
416
|
# we'll have to work on the history of the data alone
|
|
417
417
|
data[feature_name] = data[feature_name].copy()
|
|
418
|
-
feature_data =
|
|
418
|
+
feature_data = (
|
|
419
|
+
data[feature_name].groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).head(-self.prediction_length)
|
|
420
|
+
)
|
|
419
421
|
# Silence spurious FutureWarning raised by DataFrame.update https://github.com/pandas-dev/pandas/issues/57124
|
|
420
422
|
with warning_filter():
|
|
421
423
|
data[feature_name].update(self._transform_series(feature_data, is_categorical=is_categorical))
|
|
@@ -455,7 +457,7 @@ class PermutationFeatureImportanceTransform(AbstractFeatureImportanceTransform):
|
|
|
455
457
|
rng = np.random.RandomState(self.random_seed)
|
|
456
458
|
|
|
457
459
|
if self.shuffle_type == "itemwise":
|
|
458
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(
|
|
460
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(
|
|
459
461
|
lambda x: x.sample(frac=1, random_state=rng).values
|
|
460
462
|
)
|
|
461
463
|
elif self.shuffle_type == "naive":
|
|
@@ -483,6 +485,8 @@ class ConstantReplacementFeatureImportanceTransform(AbstractFeatureImportanceTra
|
|
|
483
485
|
|
|
484
486
|
def _transform_series(self, feature_data: pd.Series, is_categorical: bool) -> pd.Series:
|
|
485
487
|
if is_categorical:
|
|
486
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(lambda x: x.mode()[0])
|
|
488
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(lambda x: x.mode()[0])
|
|
487
489
|
else:
|
|
488
|
-
return feature_data.groupby(level=ITEMID, sort=False).transform(
|
|
490
|
+
return feature_data.groupby(level=TimeSeriesDataFrame.ITEMID, sort=False).transform(
|
|
491
|
+
self.real_value_aggregation
|
|
492
|
+
) # type: ignore
|
|
@@ -5,7 +5,7 @@ import numpy as np
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
|
|
7
7
|
from autogluon.common.utils.deprecated_utils import Deprecated
|
|
8
|
-
from autogluon.timeseries.dataset
|
|
8
|
+
from autogluon.timeseries.dataset import TimeSeriesDataFrame
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def get_forecast_horizon_index_single_time_series(
|
|
@@ -16,7 +16,7 @@ def get_forecast_horizon_index_single_time_series(
|
|
|
16
16
|
if offset is None:
|
|
17
17
|
raise ValueError(f"Invalid frequency: {freq}")
|
|
18
18
|
start_ts = past_timestamps.max() + 1 * offset
|
|
19
|
-
return pd.date_range(start=start_ts, periods=prediction_length, freq=freq, name=TIMESTAMP)
|
|
19
|
+
return pd.date_range(start=start_ts, periods=prediction_length, freq=freq, name=TimeSeriesDataFrame.TIMESTAMP)
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
@Deprecated(
|
|
@@ -37,14 +37,14 @@ def make_future_data_frame(
|
|
|
37
37
|
"""
|
|
38
38
|
indptr = ts_dataframe.get_indptr()
|
|
39
39
|
last = ts_dataframe.index[indptr[1:] - 1].to_frame(index=False)
|
|
40
|
-
item_ids = np.repeat(last[ITEMID].to_numpy(), prediction_length)
|
|
40
|
+
item_ids = np.repeat(last[TimeSeriesDataFrame.ITEMID].to_numpy(), prediction_length)
|
|
41
41
|
|
|
42
42
|
if freq is None:
|
|
43
43
|
freq = ts_dataframe.freq
|
|
44
44
|
offset = pd.tseries.frequencies.to_offset(freq)
|
|
45
|
-
last_ts = pd.DatetimeIndex(last[TIMESTAMP])
|
|
45
|
+
last_ts = pd.DatetimeIndex(last[TimeSeriesDataFrame.TIMESTAMP])
|
|
46
46
|
# Non-vectorized offsets like BusinessDay may produce a PerformanceWarning - we filter them
|
|
47
47
|
with warnings.catch_warnings():
|
|
48
48
|
warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning)
|
|
49
49
|
timestamps = np.dstack([last_ts + step * offset for step in range(1, prediction_length + 1)]).ravel() # type: ignore[operator]
|
|
50
|
-
return pd.DataFrame({ITEMID: item_ids, TIMESTAMP: timestamps})
|
|
50
|
+
return pd.DataFrame({TimeSeriesDataFrame.ITEMID: item_ids, TimeSeriesDataFrame.TIMESTAMP: timestamps})
|
autogluon/timeseries/version.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import sys, types, os;p = os.path.join(sys._getframe(1).f_locals['sitedir'], *('autogluon',));importlib = __import__('importlib.util');__import__('importlib.machinery');m = sys.modules.setdefault('autogluon', importlib.util.module_from_spec(importlib.machinery.PathFinder.find_spec('autogluon', [os.path.dirname(p)])));m = m or sys.modules.setdefault('autogluon', types.ModuleType('autogluon'));mp = (m or []) and m.__dict__.setdefault('__path__',[]);(p not in mp) and mp.append(p)
|