autogluon.timeseries 1.2.1b20250304__py3-none-any.whl → 1.2.1b20250306__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. autogluon/timeseries/models/abstract/abstract_timeseries_model.py +246 -446
  2. autogluon/timeseries/models/abstract/tunable.py +189 -0
  3. autogluon/timeseries/models/autogluon_tabular/mlforecast.py +3 -4
  4. autogluon/timeseries/models/autogluon_tabular/transforms.py +2 -2
  5. autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py +8 -0
  6. autogluon/timeseries/models/ensemble/greedy_ensemble.py +4 -2
  7. autogluon/timeseries/models/multi_window/multi_window_model.py +0 -5
  8. autogluon/timeseries/models/presets.py +0 -3
  9. autogluon/timeseries/regressor.py +54 -6
  10. autogluon/timeseries/transforms/__init__.py +2 -13
  11. autogluon/timeseries/transforms/covariate_scaler.py +28 -34
  12. autogluon/timeseries/transforms/target_scaler.py +22 -5
  13. autogluon/timeseries/version.py +1 -1
  14. {autogluon.timeseries-1.2.1b20250304.dist-info → autogluon.timeseries-1.2.1b20250306.dist-info}/METADATA +4 -4
  15. {autogluon.timeseries-1.2.1b20250304.dist-info → autogluon.timeseries-1.2.1b20250306.dist-info}/RECORD +22 -21
  16. /autogluon.timeseries-1.2.1b20250304-py3.9-nspkg.pth → /autogluon.timeseries-1.2.1b20250306-py3.9-nspkg.pth +0 -0
  17. {autogluon.timeseries-1.2.1b20250304.dist-info → autogluon.timeseries-1.2.1b20250306.dist-info}/LICENSE +0 -0
  18. {autogluon.timeseries-1.2.1b20250304.dist-info → autogluon.timeseries-1.2.1b20250306.dist-info}/NOTICE +0 -0
  19. {autogluon.timeseries-1.2.1b20250304.dist-info → autogluon.timeseries-1.2.1b20250306.dist-info}/WHEEL +0 -0
  20. {autogluon.timeseries-1.2.1b20250304.dist-info → autogluon.timeseries-1.2.1b20250306.dist-info}/namespace_packages.txt +0 -0
  21. {autogluon.timeseries-1.2.1b20250304.dist-info → autogluon.timeseries-1.2.1b20250306.dist-info}/top_level.txt +0 -0
  22. {autogluon.timeseries-1.2.1b20250304.dist-info → autogluon.timeseries-1.2.1b20250306.dist-info}/zip-safe +0 -0
@@ -5,7 +5,7 @@ import logging
5
5
  import os
6
6
  import re
7
7
  import time
8
- from contextlib import nullcontext
8
+ from abc import ABC, abstractmethod
9
9
  from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
10
10
 
11
11
  import pandas as pd
@@ -14,35 +14,21 @@ from typing_extensions import Self
14
14
  from autogluon.common import space
15
15
  from autogluon.common.loaders import load_pkl
16
16
  from autogluon.common.savers import save_pkl
17
- from autogluon.common.utils.distribute_utils import DistributedContext
18
- from autogluon.common.utils.log_utils import DuplicateFilter
19
17
  from autogluon.common.utils.resource_utils import get_resource_manager
20
- from autogluon.common.utils.try_import import try_import_ray
21
18
  from autogluon.common.utils.utils import setup_outputdir
22
19
  from autogluon.core.constants import AG_ARG_PREFIX, AG_ARGS_FIT, REFIT_FULL_SUFFIX
23
- from autogluon.core.hpo.constants import CUSTOM_BACKEND, RAY_BACKEND
24
- from autogluon.core.hpo.exceptions import EmptySearchSpace
25
- from autogluon.core.hpo.executors import HpoExecutor, HpoExecutorFactory, RayHpoExecutor
26
20
  from autogluon.core.models import ModelBase
27
21
  from autogluon.core.utils.exceptions import TimeLimitExceeded
28
22
  from autogluon.timeseries.dataset import TimeSeriesDataFrame
29
23
  from autogluon.timeseries.metrics import TimeSeriesScorer, check_get_evaluation_metric
30
- from autogluon.timeseries.regressor import CovariateRegressor
31
- from autogluon.timeseries.transforms import (
32
- CovariateScaler,
33
- LocalTargetScaler,
34
- get_covariate_scaler_from_name,
35
- get_target_scaler_from_name,
36
- )
24
+ from autogluon.timeseries.regressor import CovariateRegressor, get_covariate_regressor
25
+ from autogluon.timeseries.transforms import CovariateScaler, TargetScaler, get_covariate_scaler, get_target_scaler
37
26
  from autogluon.timeseries.utils.features import CovariateMetadata
38
27
  from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
39
- from autogluon.timeseries.utils.warning_filters import disable_stdout, warning_filter
40
28
 
41
- from .model_trial import model_trial, skip_hpo
29
+ from .tunable import TimeSeriesTunable
42
30
 
43
31
  logger = logging.getLogger(__name__)
44
- dup_filter = DuplicateFilter()
45
- logger.addFilter(dup_filter)
46
32
 
47
33
 
48
34
  # TODO: refactor and move to util. We do not need to use "params_aux" in time series
@@ -120,9 +106,9 @@ def check_and_split_hyperparameters(
120
106
  return params, params_aux
121
107
 
122
108
 
123
- # TODO: refactor. remove params_aux, etc. make class inherit from ABC, make overrides and abstract
124
- # methods clear, change name to TimeSeriesModel, et al.
125
- class AbstractTimeSeriesModel(ModelBase):
109
+ # TODO: refactor. remove params_aux, etc. make overrides and abstract
110
+ # methods clear, et al.
111
+ class TimeSeriesModelBase(ModelBase, ABC):
126
112
  """Abstract class for all `Model` objects in autogluon.timeseries.
127
113
 
128
114
  Parameters
@@ -212,19 +198,16 @@ class AbstractTimeSeriesModel(ModelBase):
212
198
  else:
213
199
  self.must_drop_median = False
214
200
 
201
+ self._user_params, self._user_params_aux = check_and_split_hyperparameters(hyperparameters)
215
202
  self._oof_predictions: Optional[List[TimeSeriesDataFrame]] = None
216
- self.target_scaler: Optional[LocalTargetScaler] = None
217
- self.covariate_scaler: Optional[CovariateScaler] = None
218
- self.covariate_regressor: Optional[CovariateRegressor] = None
219
-
220
- # TODO: remove the variables below
221
- self.model = None
222
203
 
223
- self._is_initialized = False
224
- self._user_params, self._user_params_aux = check_and_split_hyperparameters(hyperparameters)
204
+ self.params: Dict[str, Any] = {}
205
+ self.params_aux: Dict[str, Any] = {}
206
+ self._init_params_aux()
207
+ self._init_params()
208
+ self._is_initialized = True
225
209
 
226
- self.params = {}
227
- self.params_aux = {}
210
+ # TODO: remove the variables below
228
211
  self.nondefault_params: List[str] = []
229
212
 
230
213
  self.fit_time: Optional[float] = None # Time taken to fit in seconds (Training data)
@@ -234,6 +217,11 @@ class AbstractTimeSeriesModel(ModelBase):
234
217
  )
235
218
  self.val_score: Optional[float] = None # Score with eval_metric (Validation data)
236
219
 
220
+ self.target_scaler: Optional[TargetScaler]
221
+ self.covariate_scaler: Optional[CovariateScaler]
222
+ self.covariate_regressor: Optional[CovariateRegressor]
223
+ self._initialize_transforms_and_regressor()
224
+
237
225
  def __repr__(self) -> str:
238
226
  return self.name
239
227
 
@@ -263,9 +251,7 @@ class AbstractTimeSeriesModel(ModelBase):
263
251
  self._oof_predictions = None
264
252
 
265
253
  file_path = os.path.join(path, self.model_file_name)
266
- _model = self.model
267
254
  save_pkl.save(path=file_path, object=self, verbose=verbose)
268
- self.model = _model
269
255
 
270
256
  self._oof_predictions = oof_predictions
271
257
  return path
@@ -310,21 +296,6 @@ class AbstractTimeSeriesModel(ModelBase):
310
296
  self._oof_predictions = self.load_oof_predictions(self.path)
311
297
  return self._oof_predictions
312
298
 
313
- def _get_default_auxiliary_params(self) -> dict:
314
- return dict(
315
- # ratio of given time_limit to use during fit(). If time_limit == 10 and max_time_limit_ratio=0.3,
316
- # time_limit would be changed to 3.
317
- max_time_limit_ratio=self.default_max_time_limit_ratio,
318
- # max time_limit value during fit(). If the provided time_limit is greater than this value, it will be
319
- # replaced by max_time_limit. Occurs after max_time_limit_ratio is applied.
320
- max_time_limit=None,
321
- )
322
-
323
- # TODO: remove
324
- @classmethod
325
- def _get_default_ag_args(cls) -> dict:
326
- return {}
327
-
328
299
  def _init_params(self):
329
300
  """Initializes model hyperparameters"""
330
301
  hyperparameters = self._user_params
@@ -342,26 +313,35 @@ class AbstractTimeSeriesModel(ModelBase):
342
313
  For documentation on some of the available options and their defaults, refer to `self._get_default_auxiliary_params`.
343
314
  """
344
315
  hyperparameters_aux = self._user_params_aux or {}
345
- self.params_aux = {**self._get_default_auxiliary_params(), **hyperparameters_aux}
346
-
347
- def initialize(self) -> None:
348
- if not self._is_initialized:
349
- self._init_params_aux()
350
- self._init_params()
351
- self._initialize_transforms()
352
- self._is_initialized = True
353
-
354
- def _initialize_transforms(self) -> None:
355
- self.target_scaler = self._create_target_scaler()
356
- self.covariate_scaler = self._create_covariate_scaler()
357
- self.covariate_regressor = self._create_covariate_regressor()
316
+ default_aux_params = dict(
317
+ # ratio of given time_limit to use during fit(). If time_limit == 10 and max_time_limit_ratio=0.3,
318
+ # time_limit would be changed to 3.
319
+ max_time_limit_ratio=self.default_max_time_limit_ratio,
320
+ # max time_limit value during fit(). If the provided time_limit is greater than this value, it will be
321
+ # replaced by max_time_limit. Occurs after max_time_limit_ratio is applied.
322
+ max_time_limit=None,
323
+ )
324
+ self.params_aux = {**default_aux_params, **hyperparameters_aux}
325
+
326
+ def _initialize_transforms_and_regressor(self) -> None:
327
+ self.target_scaler = get_target_scaler(self._get_model_params().get("target_scaler"), target=self.target)
328
+ self.covariate_scaler = get_covariate_scaler(
329
+ self._get_model_params().get("covariate_scaler"),
330
+ covariate_metadata=self.metadata,
331
+ use_static_features=self.supports_static_features,
332
+ use_known_covariates=self.supports_known_covariates,
333
+ use_past_covariates=self.supports_past_covariates,
334
+ )
335
+ self.covariate_regressor = get_covariate_regressor(
336
+ self._get_model_params().get("covariate_regressor"),
337
+ target=self.target,
338
+ covariate_metadata=self.metadata,
339
+ )
358
340
 
359
341
  def _get_model_params(self) -> dict:
360
342
  return self.params.copy()
361
343
 
362
344
  def get_params(self) -> dict:
363
- # TODO: do not extract to AbstractModel if this is only used for getting a
364
- # prototype of the object for HPO.
365
345
  hyperparameters = self._user_params.copy()
366
346
  if self._user_params_aux:
367
347
  hyperparameters[AG_ARGS_FIT] = self._user_params_aux.copy()
@@ -378,19 +358,6 @@ class AbstractTimeSeriesModel(ModelBase):
378
358
  target=self.target,
379
359
  )
380
360
 
381
- @classmethod
382
- def load_info(cls, path: str, load_model_if_required: bool = True) -> dict:
383
- # TODO: remove?
384
- load_path = os.path.join(path, cls.model_info_name)
385
- try:
386
- return load_pkl.load(path=load_path)
387
- except:
388
- if load_model_if_required:
389
- model = cls.load(path=path, reset_paths=True)
390
- return model.get_info()
391
- else:
392
- raise
393
-
394
361
  def get_info(self) -> dict:
395
362
  """
396
363
  Returns a dictionary of numerous fields describing the model.
@@ -410,6 +377,188 @@ class AbstractTimeSeriesModel(ModelBase):
410
377
  }
411
378
  return info
412
379
 
380
+ @classmethod
381
+ def load_info(cls, path: str, load_model_if_required: bool = True) -> dict:
382
+ # TODO: remove?
383
+ load_path = os.path.join(path, cls.model_info_name)
384
+ try:
385
+ return load_pkl.load(path=load_path)
386
+ except:
387
+ if load_model_if_required:
388
+ model = cls.load(path=path, reset_paths=True)
389
+ return model.get_info()
390
+ else:
391
+ raise
392
+
393
+ @property
394
+ def allowed_hyperparameters(self) -> List[str]:
395
+ """List of hyperparameters allowed by the model."""
396
+ return ["target_scaler", "covariate_regressor"]
397
+
398
+ def _score_with_predictions(
399
+ self,
400
+ data: TimeSeriesDataFrame,
401
+ predictions: TimeSeriesDataFrame,
402
+ metric: Optional[str] = None,
403
+ ) -> float:
404
+ """Compute the score measuring how well the predictions align with the data."""
405
+ eval_metric = self.eval_metric if metric is None else check_get_evaluation_metric(metric)
406
+ return eval_metric.score(
407
+ data=data,
408
+ predictions=predictions,
409
+ prediction_length=self.prediction_length,
410
+ target=self.target,
411
+ seasonal_period=self.eval_metric_seasonal_period,
412
+ )
413
+
414
+ def score(self, data: TimeSeriesDataFrame, metric: Optional[str] = None) -> float: # type: ignore
415
+ """Return the evaluation scores for given metric and dataset. The last
416
+ `self.prediction_length` time steps of each time series in the input data set
417
+ will be held out and used for computing the evaluation score. Time series
418
+ models always return higher-is-better type scores.
419
+
420
+ Parameters
421
+ ----------
422
+ data: TimeSeriesDataFrame
423
+ Dataset used for scoring.
424
+ metric: str
425
+ String identifier of evaluation metric to use, from one of
426
+ `autogluon.timeseries.utils.metric_utils.AVAILABLE_METRICS`.
427
+
428
+ Other Parameters
429
+ ----------------
430
+ num_samples: int
431
+ Number of samples to use for making evaluation predictions if the probabilistic
432
+ forecasts are generated by forward sampling from the fitted model.
433
+
434
+ Returns
435
+ -------
436
+ score: float
437
+ The computed forecast evaluation score on the last `self.prediction_length`
438
+ time steps of each time series.
439
+ """
440
+ past_data, known_covariates = data.get_model_inputs_for_scoring(
441
+ prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates
442
+ )
443
+ predictions = self.predict(past_data, known_covariates=known_covariates)
444
+ return self._score_with_predictions(data=data, predictions=predictions, metric=metric)
445
+
446
+ def score_and_cache_oof(
447
+ self,
448
+ val_data: TimeSeriesDataFrame,
449
+ store_val_score: bool = False,
450
+ store_predict_time: bool = False,
451
+ **predict_kwargs,
452
+ ) -> None:
453
+ """Compute val_score, predict_time and cache out-of-fold (OOF) predictions."""
454
+ past_data, known_covariates = val_data.get_model_inputs_for_scoring(
455
+ prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates
456
+ )
457
+ predict_start_time = time.time()
458
+ oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
459
+ self._oof_predictions = [oof_predictions]
460
+ if store_predict_time:
461
+ self.predict_time = time.time() - predict_start_time
462
+ if store_val_score:
463
+ self.val_score = self._score_with_predictions(val_data, oof_predictions)
464
+
465
+ def _is_gpu_available(self) -> bool:
466
+ return False
467
+
468
+ @staticmethod
469
+ def _get_system_resources() -> Dict[str, Any]:
470
+ resource_manager = get_resource_manager()
471
+ system_num_cpus = resource_manager.get_cpu_count()
472
+ system_num_gpus = resource_manager.get_gpu_count()
473
+ return {
474
+ "num_cpus": system_num_cpus,
475
+ "num_gpus": system_num_gpus,
476
+ }
477
+
478
+ def _get_model_base(self) -> Self:
479
+ return self
480
+
481
+ def preprocess( # type: ignore
482
+ self,
483
+ data: TimeSeriesDataFrame,
484
+ known_covariates: Optional[TimeSeriesDataFrame] = None,
485
+ is_train: bool = False,
486
+ **kwargs,
487
+ ) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
488
+ """Method that implements model-specific preprocessing logic."""
489
+ return data, known_covariates
490
+
491
+ def persist(self) -> Self:
492
+ """Ask the model to persist its assets in memory, i.e., to predict with low latency. In practice
493
+ this is used for pretrained models that have to lazy-load model parameters to device memory at
494
+ prediction time.
495
+ """
496
+ return self
497
+
498
+ def convert_to_refit_full_via_copy(self) -> Self:
499
+ # save the model as a new model on disk
500
+ previous_name = self.name
501
+ self.rename(self.name + REFIT_FULL_SUFFIX)
502
+ refit_model_path = self.path
503
+ self.save(path=self.path, verbose=False)
504
+
505
+ self.rename(previous_name)
506
+
507
+ refit_model = self.load(path=refit_model_path, verbose=False)
508
+ refit_model.val_score = None
509
+ refit_model.predict_time = None
510
+
511
+ return refit_model
512
+
513
+ def convert_to_refit_full_template(self):
514
+ """
515
+ After calling this function, returned model should be able to be fit without X_val, y_val using the iterations trained by the original model.
516
+
517
+ Increase max_memory_usage_ratio by 25% to reduce the chance that the refit model will trigger NotEnoughMemoryError and skip training.
518
+ This can happen without the 25% increase since the refit model generally will use more training data and thus require more memory.
519
+ """
520
+ params = copy.deepcopy(self.get_params())
521
+
522
+ if "hyperparameters" not in params:
523
+ params["hyperparameters"] = dict()
524
+
525
+ if AG_ARGS_FIT not in params["hyperparameters"]:
526
+ params["hyperparameters"][AG_ARGS_FIT] = dict()
527
+
528
+ params["hyperparameters"].update(self.params_trained)
529
+ params["name"] = params["name"] + REFIT_FULL_SUFFIX
530
+ template = self.__class__(**params)
531
+
532
+ return template
533
+
534
+ def get_user_params(self) -> dict:
535
+ """Used to access user-specified parameters for the model before initialization."""
536
+ if self._user_params is None:
537
+ return {}
538
+ else:
539
+ return self._user_params.copy()
540
+
541
+ def _more_tags(self) -> dict:
542
+ """Encode model properties using tags, similar to sklearn & autogluon.tabular.
543
+
544
+ For more details, see `autogluon.core.models.abstract.AbstractModel._get_tags()` and https://scikit-learn.org/stable/_sources/developers/develop.rst.txt.
545
+
546
+ List of currently supported tags:
547
+ - allow_nan: Can the model handle data with missing values represented by np.nan?
548
+ - can_refit_full: Does it make sense to retrain the model without validation data?
549
+ See `autogluon.core.models.abstract._tags._DEFAULT_TAGS` for more details.
550
+ - can_use_train_data: Can the model use train_data if it's provided to model.fit()?
551
+ - can_use_val_data: Can the model use val_data if it's provided to model.fit()?
552
+ """
553
+ return {
554
+ "allow_nan": False,
555
+ "can_refit_full": False,
556
+ "can_use_train_data": True,
557
+ "can_use_val_data": False,
558
+ }
559
+
560
+
561
+ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
413
562
  def fit( # type: ignore
414
563
  self,
415
564
  train_data: TimeSeriesDataFrame,
@@ -452,7 +601,6 @@ class AbstractTimeSeriesModel(ModelBase):
452
601
  The fitted model object
453
602
  """
454
603
  start_time = time.monotonic()
455
- self.initialize()
456
604
 
457
605
  if self.target_scaler is not None:
458
606
  train_data = self.target_scaler.fit_transform(train_data)
@@ -503,28 +651,7 @@ class AbstractTimeSeriesModel(ModelBase):
503
651
 
504
652
  return self
505
653
 
506
- def _preprocess_time_limit(self, time_limit: float) -> float:
507
- original_time_limit = time_limit
508
- max_time_limit_ratio = self.params_aux["max_time_limit_ratio"]
509
- max_time_limit = self.params_aux["max_time_limit"]
510
-
511
- time_limit *= max_time_limit_ratio
512
-
513
- if max_time_limit is not None:
514
- time_limit = min(time_limit, max_time_limit)
515
-
516
- if original_time_limit != time_limit:
517
- time_limit_og_str = f"{original_time_limit:.2f}s" if original_time_limit is not None else "None"
518
- time_limit_str = f"{time_limit:.2f}s" if time_limit is not None else "None"
519
- logger.debug(
520
- f"\tTime limit adjusted due to model hyperparameters: "
521
- f"{time_limit_og_str} -> {time_limit_str} "
522
- f"(ag.max_time_limit={max_time_limit}, "
523
- f"ag.max_time_limit_ratio={max_time_limit_ratio}"
524
- )
525
-
526
- return time_limit
527
-
654
+ @abstractmethod
528
655
  def _fit( # type: ignore
529
656
  self,
530
657
  train_data: TimeSeriesDataFrame,
@@ -539,8 +666,7 @@ class AbstractTimeSeriesModel(ModelBase):
539
666
  the model training logic, `fit` additionally implements other logic such as keeping
540
667
  track of the time limit, etc.
541
668
  """
542
- # TODO: Make the models respect `num_cpus` and `num_gpus` parameters
543
- raise NotImplementedError
669
+ pass
544
670
 
545
671
  # TODO: perform this check inside fit() ?
546
672
  def _check_fit_params(self):
@@ -551,65 +677,9 @@ class AbstractTimeSeriesModel(ModelBase):
551
677
  "as hyperparameters when initializing or use `hyperparameter_tune` instead."
552
678
  )
553
679
 
554
- @property
555
- def allowed_hyperparameters(self) -> List[str]:
556
- """List of hyperparameters allowed by the model."""
557
- return ["target_scaler", "covariate_regressor"]
558
-
559
- def _create_target_scaler(self) -> Optional[LocalTargetScaler]:
560
- """Create a LocalTargetScaler object based on the value of the `target_scaler` hyperparameter."""
561
- # TODO: Add support for custom target transforms (e.g., Box-Cox, log1p, ...)
562
- target_scaler_type = self._get_model_params().get("target_scaler")
563
- if target_scaler_type is not None:
564
- return get_target_scaler_from_name(target_scaler_type, target=self.target)
565
- else:
566
- return None
567
-
568
- def _create_covariate_scaler(self) -> Optional[CovariateScaler]:
569
- """Create a CovariateScaler object based on the value of the `covariate_scaler` hyperparameter."""
570
- covariate_scaler_type = self._get_model_params().get("covariate_scaler")
571
- if covariate_scaler_type is not None:
572
- return get_covariate_scaler_from_name(
573
- covariate_scaler_type,
574
- metadata=self.metadata,
575
- use_static_features=self.supports_static_features,
576
- use_known_covariates=self.supports_known_covariates,
577
- use_past_covariates=self.supports_past_covariates,
578
- )
579
- else:
580
- return None
581
-
582
- def _create_covariate_regressor(self) -> Optional[CovariateRegressor]:
583
- """Create a CovariateRegressor object based on the value of the `covariate_regressor` hyperparameter."""
584
- covariate_regressor = self._get_model_params().get("covariate_regressor")
585
- if covariate_regressor is not None:
586
- if len(self.metadata.known_covariates + self.metadata.static_features) == 0:
587
- logger.info(
588
- "\tSkipping covariate_regressor since the dataset contains no covariates or static features."
589
- )
590
- return None
591
- else:
592
- if isinstance(covariate_regressor, str):
593
- return CovariateRegressor(covariate_regressor, target=self.target, metadata=self.metadata)
594
- elif isinstance(covariate_regressor, dict):
595
- return CovariateRegressor(**covariate_regressor, target=self.target, metadata=self.metadata)
596
- elif isinstance(covariate_regressor, CovariateRegressor):
597
- logger.warning(
598
- "\tUsing a custom covariate_regressor is experimental functionality that may break in the future!"
599
- )
600
- covariate_regressor.target = self.target
601
- covariate_regressor.metadata = self.metadata
602
- return covariate_regressor
603
- else:
604
- raise ValueError(
605
- f"Invalid value for covariate_regressor {covariate_regressor} of type {type(covariate_regressor)}"
606
- )
607
- else:
608
- return None
609
-
610
680
  def predict( # type: ignore
611
681
  self,
612
- data: Union[TimeSeriesDataFrame, Dict[str, Optional[TimeSeriesDataFrame]]],
682
+ data: TimeSeriesDataFrame,
613
683
  known_covariates: Optional[TimeSeriesDataFrame] = None,
614
684
  **kwargs,
615
685
  ) -> TimeSeriesDataFrame:
@@ -682,6 +752,7 @@ class AbstractTimeSeriesModel(ModelBase):
682
752
  """For each item in the dataframe, get timestamps for the next `prediction_length` time steps into the future."""
683
753
  return get_forecast_horizon_index_ts_dataframe(data, prediction_length=self.prediction_length, freq=self.freq)
684
754
 
755
+ @abstractmethod
685
756
  def _predict(
686
757
  self,
687
758
  data: Union[TimeSeriesDataFrame, Dict[str, TimeSeriesDataFrame]],
@@ -689,203 +760,29 @@ class AbstractTimeSeriesModel(ModelBase):
689
760
  **kwargs,
690
761
  ) -> TimeSeriesDataFrame:
691
762
  """Private method for `predict`. See `predict` for documentation of arguments."""
692
- raise NotImplementedError
693
-
694
- def _score_with_predictions(
695
- self,
696
- data: TimeSeriesDataFrame,
697
- predictions: TimeSeriesDataFrame,
698
- metric: Optional[str] = None,
699
- ) -> float:
700
- """Compute the score measuring how well the predictions align with the data."""
701
- eval_metric = self.eval_metric if metric is None else check_get_evaluation_metric(metric)
702
- return eval_metric.score(
703
- data=data,
704
- predictions=predictions,
705
- prediction_length=self.prediction_length,
706
- target=self.target,
707
- seasonal_period=self.eval_metric_seasonal_period,
708
- )
709
-
710
- def score(self, data: TimeSeriesDataFrame, metric: Optional[str] = None) -> float: # type: ignore
711
- """Return the evaluation scores for given metric and dataset. The last
712
- `self.prediction_length` time steps of each time series in the input data set
713
- will be held out and used for computing the evaluation score. Time series
714
- models always return higher-is-better type scores.
715
-
716
- Parameters
717
- ----------
718
- data: TimeSeriesDataFrame
719
- Dataset used for scoring.
720
- metric: str
721
- String identifier of evaluation metric to use, from one of
722
- `autogluon.timeseries.utils.metric_utils.AVAILABLE_METRICS`.
723
-
724
- Other Parameters
725
- ----------------
726
- num_samples: int
727
- Number of samples to use for making evaluation predictions if the probabilistic
728
- forecasts are generated by forward sampling from the fitted model.
729
-
730
- Returns
731
- -------
732
- score: float
733
- The computed forecast evaluation score on the last `self.prediction_length`
734
- time steps of each time series.
735
- """
736
- # TODO: align method signature in the new AbstractModel
737
-
738
- past_data, known_covariates = data.get_model_inputs_for_scoring(
739
- prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates
740
- )
741
- predictions = self.predict(past_data, known_covariates=known_covariates)
742
- return self._score_with_predictions(data=data, predictions=predictions, metric=metric)
743
-
744
- def score_and_cache_oof(
745
- self,
746
- val_data: TimeSeriesDataFrame,
747
- store_val_score: bool = False,
748
- store_predict_time: bool = False,
749
- **predict_kwargs,
750
- ) -> None:
751
- """Compute val_score, predict_time and cache out-of-fold (OOF) predictions."""
752
- past_data, known_covariates = val_data.get_model_inputs_for_scoring(
753
- prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates
754
- )
755
- predict_start_time = time.time()
756
- oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
757
- self._oof_predictions = [oof_predictions]
758
- if store_predict_time:
759
- self.predict_time = time.time() - predict_start_time
760
- if store_val_score:
761
- self.val_score = self._score_with_predictions(val_data, oof_predictions)
762
-
763
- def _get_hpo_train_fn_kwargs(self, **train_fn_kwargs) -> dict:
764
- """Update kwargs passed to model_trial depending on the model configuration.
765
-
766
- These kwargs need to be updated, for example, by MultiWindowBacktestingModel.
767
- """
768
- return train_fn_kwargs
769
-
770
- def _is_gpu_available(self) -> bool:
771
- return False
772
-
773
- @staticmethod
774
- def _get_system_resources() -> Dict[str, Any]:
775
- resource_manager = get_resource_manager()
776
- system_num_cpus = resource_manager.get_cpu_count()
777
- system_num_gpus = resource_manager.get_gpu_count()
778
- return {
779
- "num_cpus": system_num_cpus,
780
- "num_gpus": system_num_gpus,
781
- }
782
-
783
- def hyperparameter_tune(
784
- self,
785
- train_data: TimeSeriesDataFrame,
786
- val_data: Optional[TimeSeriesDataFrame],
787
- val_splitter: Any = None,
788
- default_num_trials: Optional[int] = 1,
789
- refit_every_n_windows: Optional[int] = 1,
790
- hyperparameter_tune_kwargs: Union[str, dict] = "auto",
791
- time_limit: Optional[float] = None,
792
- ) -> Tuple[Dict[str, Any], Any]:
793
- hpo_executor = self._get_default_hpo_executor()
794
- hpo_executor.initialize(
795
- hyperparameter_tune_kwargs, default_num_trials=default_num_trials, time_limit=time_limit
796
- )
763
+ pass
797
764
 
798
- self.initialize()
799
-
800
- # we use k_fold=1 to circumvent autogluon.core logic to manage resources during parallelization
801
- # of different folds
802
- # FIXME: we pass in self which currently does not inherit from AbstractModel
803
- hpo_executor.register_resources(self, k_fold=1, **self._get_system_resources()) # type: ignore
804
-
805
- time_start = time.time()
806
- logger.debug(f"\tStarting hyperparameter tuning for {self.name}")
807
- search_space = self._get_search_space()
808
-
809
- try:
810
- hpo_executor.validate_search_space(search_space, self.name)
811
- except EmptySearchSpace:
812
- return skip_hpo(self, train_data, val_data, time_limit=hpo_executor.time_limit)
813
-
814
- train_path, val_path = self._save_with_data(train_data, val_data)
815
-
816
- train_fn_kwargs = self._get_hpo_train_fn_kwargs(
817
- model_cls=self.__class__,
818
- init_params=self.get_params(),
819
- time_start=time_start,
820
- time_limit=hpo_executor.time_limit,
821
- fit_kwargs=dict(
822
- val_splitter=val_splitter,
823
- refit_every_n_windows=refit_every_n_windows,
824
- ),
825
- train_path=train_path,
826
- val_path=val_path,
827
- hpo_executor=hpo_executor,
828
- )
765
+ def _preprocess_time_limit(self, time_limit: float) -> float:
766
+ original_time_limit = time_limit
767
+ max_time_limit_ratio = self.params_aux["max_time_limit_ratio"]
768
+ max_time_limit = self.params_aux["max_time_limit"]
829
769
 
830
- minimum_resources = self.get_minimum_resources(is_gpu_available=self._is_gpu_available())
831
- hpo_context = disable_stdout if isinstance(hpo_executor, RayHpoExecutor) else nullcontext
770
+ time_limit *= max_time_limit_ratio
832
771
 
833
- minimum_cpu_per_trial = minimum_resources.get("num_cpus", 1)
834
- if not isinstance(minimum_cpu_per_trial, int):
835
- logger.warning(
836
- f"Minimum number of CPUs per trial for {self.name} is not an integer. "
837
- f"Setting to 1. Minimum number of CPUs per trial: {minimum_cpu_per_trial}"
838
- )
839
- minimum_cpu_per_trial = 1
840
-
841
- with hpo_context(), warning_filter(): # prevent Ray from outputting its results to stdout with print
842
- hpo_executor.execute(
843
- model_trial=model_trial,
844
- train_fn_kwargs=train_fn_kwargs,
845
- directory=self.path,
846
- minimum_cpu_per_trial=minimum_cpu_per_trial,
847
- minimum_gpu_per_trial=minimum_resources.get("num_gpus", 0),
848
- model_estimate_memory_usage=None,
849
- adapter_type="timeseries",
850
- )
772
+ if max_time_limit is not None:
773
+ time_limit = min(time_limit, max_time_limit)
851
774
 
852
- assert self.path_root is not None
853
- hpo_models, analysis = hpo_executor.get_hpo_results(
854
- model_name=self.name,
855
- model_path_root=self.path_root,
856
- time_start=time_start,
775
+ if original_time_limit != time_limit:
776
+ time_limit_og_str = f"{original_time_limit:.2f}s" if original_time_limit is not None else "None"
777
+ time_limit_str = f"{time_limit:.2f}s" if time_limit is not None else "None"
778
+ logger.debug(
779
+ f"\tTime limit adjusted due to model hyperparameters: "
780
+ f"{time_limit_og_str} -> {time_limit_str} "
781
+ f"(ag.max_time_limit={max_time_limit}, "
782
+ f"ag.max_time_limit_ratio={max_time_limit_ratio}"
857
783
  )
858
784
 
859
- return hpo_models, analysis
860
-
861
- @property
862
- def is_ensemble(self) -> bool:
863
- """Return True if the model is an ensemble model or a container of multiple models."""
864
- return self._get_model_base() is self
865
-
866
- def _get_default_hpo_executor(self) -> HpoExecutor:
867
- backend = (
868
- self._get_model_base()._get_hpo_backend()
869
- ) # If ensemble, will use the base model to determine backend
870
- if backend == RAY_BACKEND:
871
- try:
872
- try_import_ray()
873
- except Exception as e:
874
- warning_msg = f"Will use custom hpo logic because ray import failed. Reason: {str(e)}"
875
- dup_filter.attach_filter_targets(warning_msg)
876
- logger.warning(warning_msg)
877
- backend = CUSTOM_BACKEND
878
- hpo_executor = HpoExecutorFactory.get_hpo_executor(backend)() # type: ignore
879
- return hpo_executor
880
-
881
- def _get_model_base(self) -> AbstractTimeSeriesModel:
882
- return self
883
-
884
- def _get_hpo_backend(self) -> str:
885
- """Choose which backend("ray" or "custom") to use for hpo"""
886
- if DistributedContext.is_distributed_mode():
887
- return RAY_BACKEND
888
- return CUSTOM_BACKEND
785
+ return time_limit
889
786
 
890
787
  def _get_search_space(self):
891
788
  """Sets up default search space for HPO. Each hyperparameter which user did not specify is converted from
@@ -893,100 +790,3 @@ class AbstractTimeSeriesModel(ModelBase):
893
790
  """
894
791
  params = self.params.copy()
895
792
  return params
896
-
897
- def _save_with_data(self, train_data, val_data):
898
- self.set_contexts(os.path.abspath(self.path))
899
- dataset_train_filename = "dataset_train.pkl"
900
- train_path = os.path.join(self.path, dataset_train_filename)
901
- save_pkl.save(path=train_path, object=train_data)
902
-
903
- dataset_val_filename = "dataset_val.pkl"
904
- val_path = os.path.join(self.path, dataset_val_filename)
905
- save_pkl.save(path=val_path, object=val_data)
906
- return train_path, val_path
907
-
908
- def preprocess( # type: ignore
909
- self,
910
- data: TimeSeriesDataFrame,
911
- known_covariates: Optional[TimeSeriesDataFrame] = None,
912
- is_train: bool = False,
913
- **kwargs,
914
- ) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
915
- """Method that implements model-specific preprocessing logic."""
916
- # TODO: move to new AbstractModel
917
- return data, known_covariates
918
-
919
- def persist(self) -> Self:
920
- """Ask the model to persist its assets in memory, i.e., to predict with low latency. In practice
921
- this is used for pretrained models that have to lazy-load model parameters to device memory at
922
- prediction time.
923
- """
924
- return self
925
-
926
- def convert_to_refit_full_via_copy(self) -> Self:
927
- # save the model as a new model on disk
928
- previous_name = self.name
929
- self.rename(self.name + REFIT_FULL_SUFFIX)
930
- refit_model_path = self.path
931
- self.save(path=self.path, verbose=False)
932
-
933
- self.rename(previous_name)
934
-
935
- refit_model = self.load(path=refit_model_path, verbose=False)
936
- refit_model.val_score = None
937
- refit_model.predict_time = None
938
-
939
- return refit_model
940
-
941
- def convert_to_refit_full_template(self):
942
- """
943
- After calling this function, returned model should be able to be fit without X_val, y_val using the iterations trained by the original model.
944
-
945
- Increase max_memory_usage_ratio by 25% to reduce the chance that the refit model will trigger NotEnoughMemoryError and skip training.
946
- This can happen without the 25% increase since the refit model generally will use more training data and thus require more memory.
947
- """
948
- params = copy.deepcopy(self.get_params())
949
-
950
- if "hyperparameters" not in params:
951
- params["hyperparameters"] = dict()
952
-
953
- if AG_ARGS_FIT not in params["hyperparameters"]:
954
- params["hyperparameters"][AG_ARGS_FIT] = dict()
955
-
956
- # TODO: remove
957
- # Increase memory limit by 25% to avoid memory restrictions during fit
958
- params["hyperparameters"][AG_ARGS_FIT]["max_memory_usage_ratio"] = (
959
- params["hyperparameters"][AG_ARGS_FIT].get("max_memory_usage_ratio", 1.0) * 1.25
960
- )
961
-
962
- params["hyperparameters"].update(self.params_trained)
963
- params["name"] = params["name"] + REFIT_FULL_SUFFIX
964
- template = self.__class__(**params)
965
-
966
- return template
967
-
968
- def get_user_params(self) -> dict:
969
- """Used to access user-specified parameters for the model before initialization."""
970
- if self._user_params is None:
971
- return {}
972
- else:
973
- return self._user_params.copy()
974
-
975
- def _more_tags(self) -> dict:
976
- """Encode model properties using tags, similar to sklearn & autogluon.tabular.
977
-
978
- For more details, see `autogluon.core.models.abstract.AbstractModel._get_tags()` and https://scikit-learn.org/stable/_sources/developers/develop.rst.txt.
979
-
980
- List of currently supported tags:
981
- - allow_nan: Can the model handle data with missing values represented by np.nan?
982
- - can_refit_full: Does it make sense to retrain the model without validation data?
983
- See `autogluon.core.models.abstract._tags._DEFAULT_TAGS` for more details.
984
- - can_use_train_data: Can the model use train_data if it's provided to model.fit()?
985
- - can_use_val_data: Can the model use val_data if it's provided to model.fit()?
986
- """
987
- return {
988
- "allow_nan": False,
989
- "can_refit_full": False,
990
- "can_use_train_data": True,
991
- "can_use_val_data": False,
992
- }