autogluon.timeseries 1.2.1b20250305__py3-none-any.whl → 1.2.1b20250307__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,6 @@ import os
6
6
  import re
7
7
  import time
8
8
  from abc import ABC, abstractmethod
9
- from contextlib import nullcontext
10
9
  from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
11
10
 
12
11
  import pandas as pd
@@ -15,15 +14,9 @@ from typing_extensions import Self
15
14
  from autogluon.common import space
16
15
  from autogluon.common.loaders import load_pkl
17
16
  from autogluon.common.savers import save_pkl
18
- from autogluon.common.utils.distribute_utils import DistributedContext
19
- from autogluon.common.utils.log_utils import DuplicateFilter
20
17
  from autogluon.common.utils.resource_utils import get_resource_manager
21
- from autogluon.common.utils.try_import import try_import_ray
22
18
  from autogluon.common.utils.utils import setup_outputdir
23
19
  from autogluon.core.constants import AG_ARG_PREFIX, AG_ARGS_FIT, REFIT_FULL_SUFFIX
24
- from autogluon.core.hpo.constants import CUSTOM_BACKEND, RAY_BACKEND
25
- from autogluon.core.hpo.exceptions import EmptySearchSpace
26
- from autogluon.core.hpo.executors import HpoExecutor, HpoExecutorFactory, RayHpoExecutor
27
20
  from autogluon.core.models import ModelBase
28
21
  from autogluon.core.utils.exceptions import TimeLimitExceeded
29
22
  from autogluon.timeseries.dataset import TimeSeriesDataFrame
@@ -32,13 +25,10 @@ from autogluon.timeseries.regressor import CovariateRegressor, get_covariate_reg
32
25
  from autogluon.timeseries.transforms import CovariateScaler, TargetScaler, get_covariate_scaler, get_target_scaler
33
26
  from autogluon.timeseries.utils.features import CovariateMetadata
34
27
  from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe
35
- from autogluon.timeseries.utils.warning_filters import disable_stdout, warning_filter
36
28
 
37
- from .model_trial import model_trial, skip_hpo
29
+ from .tunable import TimeSeriesTunable
38
30
 
39
31
  logger = logging.getLogger(__name__)
40
- dup_filter = DuplicateFilter()
41
- logger.addFilter(dup_filter)
42
32
 
43
33
 
44
34
  # TODO: refactor and move to util. We do not need to use "params_aux" in time series
@@ -117,8 +107,8 @@ def check_and_split_hyperparameters(
117
107
 
118
108
 
119
109
  # TODO: refactor. remove params_aux, etc. make overrides and abstract
120
- # methods clear, change name to TimeSeriesModel, et al.
121
- class AbstractTimeSeriesModel(ModelBase, ABC):
110
+ # methods clear, et al.
111
+ class TimeSeriesModelBase(ModelBase, ABC):
122
112
  """Abstract class for all `Model` objects in autogluon.timeseries.
123
113
 
124
114
  Parameters
@@ -400,6 +390,175 @@ class AbstractTimeSeriesModel(ModelBase, ABC):
400
390
  else:
401
391
  raise
402
392
 
393
+ @property
394
+ def allowed_hyperparameters(self) -> List[str]:
395
+ """List of hyperparameters allowed by the model."""
396
+ return ["target_scaler", "covariate_regressor"]
397
+
398
+ def _score_with_predictions(
399
+ self,
400
+ data: TimeSeriesDataFrame,
401
+ predictions: TimeSeriesDataFrame,
402
+ metric: Optional[str] = None,
403
+ ) -> float:
404
+ """Compute the score measuring how well the predictions align with the data."""
405
+ eval_metric = self.eval_metric if metric is None else check_get_evaluation_metric(metric)
406
+ return eval_metric.score(
407
+ data=data,
408
+ predictions=predictions,
409
+ prediction_length=self.prediction_length,
410
+ target=self.target,
411
+ seasonal_period=self.eval_metric_seasonal_period,
412
+ )
413
+
414
+ def score(self, data: TimeSeriesDataFrame, metric: Optional[str] = None) -> float: # type: ignore
415
+ """Return the evaluation scores for given metric and dataset. The last
416
+ `self.prediction_length` time steps of each time series in the input data set
417
+ will be held out and used for computing the evaluation score. Time series
418
+ models always return higher-is-better type scores.
419
+
420
+ Parameters
421
+ ----------
422
+ data: TimeSeriesDataFrame
423
+ Dataset used for scoring.
424
+ metric: str
425
+ String identifier of evaluation metric to use, from one of
426
+ `autogluon.timeseries.utils.metric_utils.AVAILABLE_METRICS`.
427
+
428
+ Other Parameters
429
+ ----------------
430
+ num_samples: int
431
+ Number of samples to use for making evaluation predictions if the probabilistic
432
+ forecasts are generated by forward sampling from the fitted model.
433
+
434
+ Returns
435
+ -------
436
+ score: float
437
+ The computed forecast evaluation score on the last `self.prediction_length`
438
+ time steps of each time series.
439
+ """
440
+ past_data, known_covariates = data.get_model_inputs_for_scoring(
441
+ prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates
442
+ )
443
+ predictions = self.predict(past_data, known_covariates=known_covariates)
444
+ return self._score_with_predictions(data=data, predictions=predictions, metric=metric)
445
+
446
+ def score_and_cache_oof(
447
+ self,
448
+ val_data: TimeSeriesDataFrame,
449
+ store_val_score: bool = False,
450
+ store_predict_time: bool = False,
451
+ **predict_kwargs,
452
+ ) -> None:
453
+ """Compute val_score, predict_time and cache out-of-fold (OOF) predictions."""
454
+ past_data, known_covariates = val_data.get_model_inputs_for_scoring(
455
+ prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates
456
+ )
457
+ predict_start_time = time.time()
458
+ oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
459
+ self._oof_predictions = [oof_predictions]
460
+ if store_predict_time:
461
+ self.predict_time = time.time() - predict_start_time
462
+ if store_val_score:
463
+ self.val_score = self._score_with_predictions(val_data, oof_predictions)
464
+
465
+ def _is_gpu_available(self) -> bool:
466
+ return False
467
+
468
+ @staticmethod
469
+ def _get_system_resources() -> Dict[str, Any]:
470
+ resource_manager = get_resource_manager()
471
+ system_num_cpus = resource_manager.get_cpu_count()
472
+ system_num_gpus = resource_manager.get_gpu_count()
473
+ return {
474
+ "num_cpus": system_num_cpus,
475
+ "num_gpus": system_num_gpus,
476
+ }
477
+
478
+ def _get_model_base(self) -> Self:
479
+ return self
480
+
481
+ def preprocess( # type: ignore
482
+ self,
483
+ data: TimeSeriesDataFrame,
484
+ known_covariates: Optional[TimeSeriesDataFrame] = None,
485
+ is_train: bool = False,
486
+ **kwargs,
487
+ ) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
488
+ """Method that implements model-specific preprocessing logic."""
489
+ return data, known_covariates
490
+
491
+ def persist(self) -> Self:
492
+ """Ask the model to persist its assets in memory, i.e., to predict with low latency. In practice
493
+ this is used for pretrained models that have to lazy-load model parameters to device memory at
494
+ prediction time.
495
+ """
496
+ return self
497
+
498
+ def convert_to_refit_full_via_copy(self) -> Self:
499
+ # save the model as a new model on disk
500
+ previous_name = self.name
501
+ self.rename(self.name + REFIT_FULL_SUFFIX)
502
+ refit_model_path = self.path
503
+ self.save(path=self.path, verbose=False)
504
+
505
+ self.rename(previous_name)
506
+
507
+ refit_model = self.load(path=refit_model_path, verbose=False)
508
+ refit_model.val_score = None
509
+ refit_model.predict_time = None
510
+
511
+ return refit_model
512
+
513
+ def convert_to_refit_full_template(self):
514
+ """
515
+ After calling this function, returned model should be able to be fit without X_val, y_val using the iterations trained by the original model.
516
+
517
+ Increase max_memory_usage_ratio by 25% to reduce the chance that the refit model will trigger NotEnoughMemoryError and skip training.
518
+ This can happen without the 25% increase since the refit model generally will use more training data and thus require more memory.
519
+ """
520
+ params = copy.deepcopy(self.get_params())
521
+
522
+ if "hyperparameters" not in params:
523
+ params["hyperparameters"] = dict()
524
+
525
+ if AG_ARGS_FIT not in params["hyperparameters"]:
526
+ params["hyperparameters"][AG_ARGS_FIT] = dict()
527
+
528
+ params["hyperparameters"].update(self.params_trained)
529
+ params["name"] = params["name"] + REFIT_FULL_SUFFIX
530
+ template = self.__class__(**params)
531
+
532
+ return template
533
+
534
+ def get_user_params(self) -> dict:
535
+ """Used to access user-specified parameters for the model before initialization."""
536
+ if self._user_params is None:
537
+ return {}
538
+ else:
539
+ return self._user_params.copy()
540
+
541
+ def _more_tags(self) -> dict:
542
+ """Encode model properties using tags, similar to sklearn & autogluon.tabular.
543
+
544
+ For more details, see `autogluon.core.models.abstract.AbstractModel._get_tags()` and https://scikit-learn.org/stable/_sources/developers/develop.rst.txt.
545
+
546
+ List of currently supported tags:
547
+ - allow_nan: Can the model handle data with missing values represented by np.nan?
548
+ - can_refit_full: Does it make sense to retrain the model without validation data?
549
+ See `autogluon.core.models.abstract._tags._DEFAULT_TAGS` for more details.
550
+ - can_use_train_data: Can the model use train_data if it's provided to model.fit()?
551
+ - can_use_val_data: Can the model use val_data if it's provided to model.fit()?
552
+ """
553
+ return {
554
+ "allow_nan": False,
555
+ "can_refit_full": False,
556
+ "can_use_train_data": True,
557
+ "can_use_val_data": False,
558
+ }
559
+
560
+
561
+ class AbstractTimeSeriesModel(TimeSeriesModelBase, TimeSeriesTunable, ABC):
403
562
  def fit( # type: ignore
404
563
  self,
405
564
  train_data: TimeSeriesDataFrame,
@@ -492,28 +651,6 @@ class AbstractTimeSeriesModel(ModelBase, ABC):
492
651
 
493
652
  return self
494
653
 
495
- def _preprocess_time_limit(self, time_limit: float) -> float:
496
- original_time_limit = time_limit
497
- max_time_limit_ratio = self.params_aux["max_time_limit_ratio"]
498
- max_time_limit = self.params_aux["max_time_limit"]
499
-
500
- time_limit *= max_time_limit_ratio
501
-
502
- if max_time_limit is not None:
503
- time_limit = min(time_limit, max_time_limit)
504
-
505
- if original_time_limit != time_limit:
506
- time_limit_og_str = f"{original_time_limit:.2f}s" if original_time_limit is not None else "None"
507
- time_limit_str = f"{time_limit:.2f}s" if time_limit is not None else "None"
508
- logger.debug(
509
- f"\tTime limit adjusted due to model hyperparameters: "
510
- f"{time_limit_og_str} -> {time_limit_str} "
511
- f"(ag.max_time_limit={max_time_limit}, "
512
- f"ag.max_time_limit_ratio={max_time_limit_ratio}"
513
- )
514
-
515
- return time_limit
516
-
517
654
  @abstractmethod
518
655
  def _fit( # type: ignore
519
656
  self,
@@ -540,14 +677,9 @@ class AbstractTimeSeriesModel(ModelBase, ABC):
540
677
  "as hyperparameters when initializing or use `hyperparameter_tune` instead."
541
678
  )
542
679
 
543
- @property
544
- def allowed_hyperparameters(self) -> List[str]:
545
- """List of hyperparameters allowed by the model."""
546
- return ["target_scaler", "covariate_regressor"]
547
-
548
680
  def predict( # type: ignore
549
681
  self,
550
- data: Union[TimeSeriesDataFrame, Dict[str, Optional[TimeSeriesDataFrame]]],
682
+ data: TimeSeriesDataFrame,
551
683
  known_covariates: Optional[TimeSeriesDataFrame] = None,
552
684
  **kwargs,
553
685
  ) -> TimeSeriesDataFrame:
@@ -630,197 +762,27 @@ class AbstractTimeSeriesModel(ModelBase, ABC):
630
762
  """Private method for `predict`. See `predict` for documentation of arguments."""
631
763
  pass
632
764
 
633
- def _score_with_predictions(
634
- self,
635
- data: TimeSeriesDataFrame,
636
- predictions: TimeSeriesDataFrame,
637
- metric: Optional[str] = None,
638
- ) -> float:
639
- """Compute the score measuring how well the predictions align with the data."""
640
- eval_metric = self.eval_metric if metric is None else check_get_evaluation_metric(metric)
641
- return eval_metric.score(
642
- data=data,
643
- predictions=predictions,
644
- prediction_length=self.prediction_length,
645
- target=self.target,
646
- seasonal_period=self.eval_metric_seasonal_period,
647
- )
648
-
649
- def score(self, data: TimeSeriesDataFrame, metric: Optional[str] = None) -> float: # type: ignore
650
- """Return the evaluation scores for given metric and dataset. The last
651
- `self.prediction_length` time steps of each time series in the input data set
652
- will be held out and used for computing the evaluation score. Time series
653
- models always return higher-is-better type scores.
654
-
655
- Parameters
656
- ----------
657
- data: TimeSeriesDataFrame
658
- Dataset used for scoring.
659
- metric: str
660
- String identifier of evaluation metric to use, from one of
661
- `autogluon.timeseries.utils.metric_utils.AVAILABLE_METRICS`.
662
-
663
- Other Parameters
664
- ----------------
665
- num_samples: int
666
- Number of samples to use for making evaluation predictions if the probabilistic
667
- forecasts are generated by forward sampling from the fitted model.
668
-
669
- Returns
670
- -------
671
- score: float
672
- The computed forecast evaluation score on the last `self.prediction_length`
673
- time steps of each time series.
674
- """
675
- past_data, known_covariates = data.get_model_inputs_for_scoring(
676
- prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates
677
- )
678
- predictions = self.predict(past_data, known_covariates=known_covariates)
679
- return self._score_with_predictions(data=data, predictions=predictions, metric=metric)
680
-
681
- def score_and_cache_oof(
682
- self,
683
- val_data: TimeSeriesDataFrame,
684
- store_val_score: bool = False,
685
- store_predict_time: bool = False,
686
- **predict_kwargs,
687
- ) -> None:
688
- """Compute val_score, predict_time and cache out-of-fold (OOF) predictions."""
689
- past_data, known_covariates = val_data.get_model_inputs_for_scoring(
690
- prediction_length=self.prediction_length, known_covariates_names=self.metadata.known_covariates
691
- )
692
- predict_start_time = time.time()
693
- oof_predictions = self.predict(past_data, known_covariates=known_covariates, **predict_kwargs)
694
- self._oof_predictions = [oof_predictions]
695
- if store_predict_time:
696
- self.predict_time = time.time() - predict_start_time
697
- if store_val_score:
698
- self.val_score = self._score_with_predictions(val_data, oof_predictions)
699
-
700
- def _get_hpo_train_fn_kwargs(self, **train_fn_kwargs) -> dict:
701
- """Update kwargs passed to model_trial depending on the model configuration.
702
-
703
- These kwargs need to be updated, for example, by MultiWindowBacktestingModel.
704
- """
705
- return train_fn_kwargs
706
-
707
- def _is_gpu_available(self) -> bool:
708
- return False
709
-
710
- @staticmethod
711
- def _get_system_resources() -> Dict[str, Any]:
712
- resource_manager = get_resource_manager()
713
- system_num_cpus = resource_manager.get_cpu_count()
714
- system_num_gpus = resource_manager.get_gpu_count()
715
- return {
716
- "num_cpus": system_num_cpus,
717
- "num_gpus": system_num_gpus,
718
- }
719
-
720
- def hyperparameter_tune(
721
- self,
722
- train_data: TimeSeriesDataFrame,
723
- val_data: Optional[TimeSeriesDataFrame],
724
- val_splitter: Any = None,
725
- default_num_trials: Optional[int] = 1,
726
- refit_every_n_windows: Optional[int] = 1,
727
- hyperparameter_tune_kwargs: Union[str, dict] = "auto",
728
- time_limit: Optional[float] = None,
729
- ) -> Tuple[Dict[str, Any], Any]:
730
- hpo_executor = self._get_default_hpo_executor()
731
- hpo_executor.initialize(
732
- hyperparameter_tune_kwargs, default_num_trials=default_num_trials, time_limit=time_limit
733
- )
734
-
735
- # we use k_fold=1 to circumvent autogluon.core logic to manage resources during parallelization
736
- # of different folds
737
- # FIXME: we pass in self which currently does not inherit from AbstractModel
738
- hpo_executor.register_resources(self, k_fold=1, **self._get_system_resources()) # type: ignore
739
-
740
- time_start = time.time()
741
- logger.debug(f"\tStarting hyperparameter tuning for {self.name}")
742
- search_space = self._get_search_space()
743
-
744
- try:
745
- hpo_executor.validate_search_space(search_space, self.name)
746
- except EmptySearchSpace:
747
- return skip_hpo(self, train_data, val_data, time_limit=hpo_executor.time_limit)
748
-
749
- train_path, val_path = self._save_with_data(train_data, val_data)
750
-
751
- train_fn_kwargs = self._get_hpo_train_fn_kwargs(
752
- model_cls=self.__class__,
753
- init_params=self.get_params(),
754
- time_start=time_start,
755
- time_limit=hpo_executor.time_limit,
756
- fit_kwargs=dict(
757
- val_splitter=val_splitter,
758
- refit_every_n_windows=refit_every_n_windows,
759
- ),
760
- train_path=train_path,
761
- val_path=val_path,
762
- hpo_executor=hpo_executor,
763
- )
765
+ def _preprocess_time_limit(self, time_limit: float) -> float:
766
+ original_time_limit = time_limit
767
+ max_time_limit_ratio = self.params_aux["max_time_limit_ratio"]
768
+ max_time_limit = self.params_aux["max_time_limit"]
764
769
 
765
- minimum_resources = self.get_minimum_resources(is_gpu_available=self._is_gpu_available())
766
- hpo_context = disable_stdout if isinstance(hpo_executor, RayHpoExecutor) else nullcontext
770
+ time_limit *= max_time_limit_ratio
767
771
 
768
- minimum_cpu_per_trial = minimum_resources.get("num_cpus", 1)
769
- if not isinstance(minimum_cpu_per_trial, int):
770
- logger.warning(
771
- f"Minimum number of CPUs per trial for {self.name} is not an integer. "
772
- f"Setting to 1. Minimum number of CPUs per trial: {minimum_cpu_per_trial}"
773
- )
774
- minimum_cpu_per_trial = 1
775
-
776
- with hpo_context(), warning_filter(): # prevent Ray from outputting its results to stdout with print
777
- hpo_executor.execute(
778
- model_trial=model_trial,
779
- train_fn_kwargs=train_fn_kwargs,
780
- directory=self.path,
781
- minimum_cpu_per_trial=minimum_cpu_per_trial,
782
- minimum_gpu_per_trial=minimum_resources.get("num_gpus", 0),
783
- model_estimate_memory_usage=None, # type: ignore
784
- adapter_type="timeseries",
785
- )
772
+ if max_time_limit is not None:
773
+ time_limit = min(time_limit, max_time_limit)
786
774
 
787
- assert self.path_root is not None
788
- hpo_models, analysis = hpo_executor.get_hpo_results(
789
- model_name=self.name,
790
- model_path_root=self.path_root,
791
- time_start=time_start,
775
+ if original_time_limit != time_limit:
776
+ time_limit_og_str = f"{original_time_limit:.2f}s" if original_time_limit is not None else "None"
777
+ time_limit_str = f"{time_limit:.2f}s" if time_limit is not None else "None"
778
+ logger.debug(
779
+ f"\tTime limit adjusted due to model hyperparameters: "
780
+ f"{time_limit_og_str} -> {time_limit_str} "
781
+ f"(ag.max_time_limit={max_time_limit}, "
782
+ f"ag.max_time_limit_ratio={max_time_limit_ratio}"
792
783
  )
793
784
 
794
- return hpo_models, analysis
795
-
796
- @property
797
- def is_ensemble(self) -> bool:
798
- """Return True if the model is an ensemble model or a container of multiple models."""
799
- return self._get_model_base() is self
800
-
801
- def _get_default_hpo_executor(self) -> HpoExecutor:
802
- backend = (
803
- self._get_model_base()._get_hpo_backend()
804
- ) # If ensemble, will use the base model to determine backend
805
- if backend == RAY_BACKEND:
806
- try:
807
- try_import_ray()
808
- except Exception as e:
809
- warning_msg = f"Will use custom hpo logic because ray import failed. Reason: {str(e)}"
810
- dup_filter.attach_filter_targets(warning_msg)
811
- logger.warning(warning_msg)
812
- backend = CUSTOM_BACKEND
813
- hpo_executor = HpoExecutorFactory.get_hpo_executor(backend)() # type: ignore
814
- return hpo_executor
815
-
816
- def _get_model_base(self) -> AbstractTimeSeriesModel:
817
- return self
818
-
819
- def _get_hpo_backend(self) -> str:
820
- """Choose which backend("ray" or "custom") to use for hpo"""
821
- if DistributedContext.is_distributed_mode():
822
- return RAY_BACKEND
823
- return CUSTOM_BACKEND
785
+ return time_limit
824
786
 
825
787
  def _get_search_space(self):
826
788
  """Sets up default search space for HPO. Each hyperparameter which user did not specify is converted from
@@ -828,93 +790,3 @@ class AbstractTimeSeriesModel(ModelBase, ABC):
828
790
  """
829
791
  params = self.params.copy()
830
792
  return params
831
-
832
- def _save_with_data(self, train_data, val_data):
833
- self.set_contexts(os.path.abspath(self.path))
834
- dataset_train_filename = "dataset_train.pkl"
835
- train_path = os.path.join(self.path, dataset_train_filename)
836
- save_pkl.save(path=train_path, object=train_data)
837
-
838
- dataset_val_filename = "dataset_val.pkl"
839
- val_path = os.path.join(self.path, dataset_val_filename)
840
- save_pkl.save(path=val_path, object=val_data)
841
- return train_path, val_path
842
-
843
- def preprocess( # type: ignore
844
- self,
845
- data: TimeSeriesDataFrame,
846
- known_covariates: Optional[TimeSeriesDataFrame] = None,
847
- is_train: bool = False,
848
- **kwargs,
849
- ) -> Tuple[TimeSeriesDataFrame, Optional[TimeSeriesDataFrame]]:
850
- """Method that implements model-specific preprocessing logic."""
851
- return data, known_covariates
852
-
853
- def persist(self) -> Self:
854
- """Ask the model to persist its assets in memory, i.e., to predict with low latency. In practice
855
- this is used for pretrained models that have to lazy-load model parameters to device memory at
856
- prediction time.
857
- """
858
- return self
859
-
860
- def convert_to_refit_full_via_copy(self) -> Self:
861
- # save the model as a new model on disk
862
- previous_name = self.name
863
- self.rename(self.name + REFIT_FULL_SUFFIX)
864
- refit_model_path = self.path
865
- self.save(path=self.path, verbose=False)
866
-
867
- self.rename(previous_name)
868
-
869
- refit_model = self.load(path=refit_model_path, verbose=False)
870
- refit_model.val_score = None
871
- refit_model.predict_time = None
872
-
873
- return refit_model
874
-
875
- def convert_to_refit_full_template(self):
876
- """
877
- After calling this function, returned model should be able to be fit without X_val, y_val using the iterations trained by the original model.
878
-
879
- Increase max_memory_usage_ratio by 25% to reduce the chance that the refit model will trigger NotEnoughMemoryError and skip training.
880
- This can happen without the 25% increase since the refit model generally will use more training data and thus require more memory.
881
- """
882
- params = copy.deepcopy(self.get_params())
883
-
884
- if "hyperparameters" not in params:
885
- params["hyperparameters"] = dict()
886
-
887
- if AG_ARGS_FIT not in params["hyperparameters"]:
888
- params["hyperparameters"][AG_ARGS_FIT] = dict()
889
-
890
- params["hyperparameters"].update(self.params_trained)
891
- params["name"] = params["name"] + REFIT_FULL_SUFFIX
892
- template = self.__class__(**params)
893
-
894
- return template
895
-
896
- def get_user_params(self) -> dict:
897
- """Used to access user-specified parameters for the model before initialization."""
898
- if self._user_params is None:
899
- return {}
900
- else:
901
- return self._user_params.copy()
902
-
903
- def _more_tags(self) -> dict:
904
- """Encode model properties using tags, similar to sklearn & autogluon.tabular.
905
-
906
- For more details, see `autogluon.core.models.abstract.AbstractModel._get_tags()` and https://scikit-learn.org/stable/_sources/developers/develop.rst.txt.
907
-
908
- List of currently supported tags:
909
- - allow_nan: Can the model handle data with missing values represented by np.nan?
910
- - can_refit_full: Does it make sense to retrain the model without validation data?
911
- See `autogluon.core.models.abstract._tags._DEFAULT_TAGS` for more details.
912
- - can_use_train_data: Can the model use train_data if it's provided to model.fit()?
913
- - can_use_val_data: Can the model use val_data if it's provided to model.fit()?
914
- """
915
- return {
916
- "allow_nan": False,
917
- "can_refit_full": False,
918
- "can_use_train_data": True,
919
- "can_use_val_data": False,
920
- }
@@ -0,0 +1,189 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import time
6
+ from abc import ABC, abstractmethod
7
+ from contextlib import nullcontext
8
+ from typing import Any, Dict, Optional, Tuple, Union
9
+
10
+ from typing_extensions import Self
11
+
12
+ from autogluon.common.savers import save_pkl
13
+ from autogluon.common.utils.distribute_utils import DistributedContext
14
+ from autogluon.common.utils.log_utils import DuplicateFilter
15
+ from autogluon.common.utils.try_import import try_import_ray
16
+ from autogluon.core.hpo.constants import CUSTOM_BACKEND, RAY_BACKEND
17
+ from autogluon.core.hpo.exceptions import EmptySearchSpace
18
+ from autogluon.core.hpo.executors import HpoExecutor, HpoExecutorFactory, RayHpoExecutor
19
+ from autogluon.core.models import Tunable
20
+ from autogluon.timeseries.dataset import TimeSeriesDataFrame
21
+ from autogluon.timeseries.utils.warning_filters import disable_stdout, warning_filter
22
+
23
+ from .model_trial import model_trial, skip_hpo
24
+
25
+ logger = logging.getLogger(__name__)
26
+ dup_filter = DuplicateFilter()
27
+ logger.addFilter(dup_filter)
28
+
29
+
30
+ class TimeSeriesTunable(Tunable, ABC):
31
+ @abstractmethod
32
+ def __init__(self) -> None:
33
+ self.name: str
34
+ self.path: str
35
+ self.path_root: str
36
+
37
+ def hyperparameter_tune(
38
+ self,
39
+ train_data: TimeSeriesDataFrame,
40
+ val_data: Optional[TimeSeriesDataFrame],
41
+ val_splitter: Any = None,
42
+ default_num_trials: Optional[int] = 1,
43
+ refit_every_n_windows: Optional[int] = 1,
44
+ hyperparameter_tune_kwargs: Union[str, dict] = "auto",
45
+ time_limit: Optional[float] = None,
46
+ ) -> Tuple[Dict[str, Any], Any]:
47
+ hpo_executor = self._get_default_hpo_executor()
48
+ hpo_executor.initialize(
49
+ hyperparameter_tune_kwargs, default_num_trials=default_num_trials, time_limit=time_limit
50
+ )
51
+
52
+ # we use k_fold=1 to circumvent autogluon.core logic to manage resources during parallelization
53
+ # of different folds
54
+ # FIXME: we pass in self which currently does not inherit from AbstractModel
55
+ hpo_executor.register_resources(self, k_fold=1, **self._get_system_resources()) # type: ignore
56
+
57
+ time_start = time.time()
58
+ logger.debug(f"\tStarting hyperparameter tuning for {self.name}")
59
+ search_space = self._get_search_space()
60
+
61
+ try:
62
+ hpo_executor.validate_search_space(search_space, self.name)
63
+ except EmptySearchSpace:
64
+ return skip_hpo(self, train_data, val_data, time_limit=hpo_executor.time_limit)
65
+
66
+ train_path, val_path = self._save_with_data(train_data, val_data)
67
+
68
+ train_fn_kwargs = self._get_hpo_train_fn_kwargs(
69
+ model_cls=self.__class__,
70
+ init_params=self.get_params(),
71
+ time_start=time_start,
72
+ time_limit=hpo_executor.time_limit,
73
+ fit_kwargs=dict(
74
+ val_splitter=val_splitter,
75
+ refit_every_n_windows=refit_every_n_windows,
76
+ ),
77
+ train_path=train_path,
78
+ val_path=val_path,
79
+ hpo_executor=hpo_executor,
80
+ )
81
+
82
+ minimum_resources = self.get_minimum_resources(is_gpu_available=self._is_gpu_available())
83
+ hpo_context = disable_stdout if isinstance(hpo_executor, RayHpoExecutor) else nullcontext
84
+
85
+ minimum_cpu_per_trial = minimum_resources.get("num_cpus", 1)
86
+ if not isinstance(minimum_cpu_per_trial, int):
87
+ logger.warning(
88
+ f"Minimum number of CPUs per trial for {self.name} is not an integer. "
89
+ f"Setting to 1. Minimum number of CPUs per trial: {minimum_cpu_per_trial}"
90
+ )
91
+ minimum_cpu_per_trial = 1
92
+
93
+ with hpo_context(), warning_filter(): # prevent Ray from outputting its results to stdout with print
94
+ hpo_executor.execute(
95
+ model_trial=model_trial,
96
+ train_fn_kwargs=train_fn_kwargs,
97
+ directory=self.path,
98
+ minimum_cpu_per_trial=minimum_cpu_per_trial,
99
+ minimum_gpu_per_trial=minimum_resources.get("num_gpus", 0),
100
+ model_estimate_memory_usage=None, # type: ignore
101
+ adapter_type="timeseries",
102
+ )
103
+
104
+ assert self.path_root is not None
105
+ hpo_models, analysis = hpo_executor.get_hpo_results(
106
+ model_name=self.name,
107
+ model_path_root=self.path_root,
108
+ time_start=time_start,
109
+ )
110
+
111
+ return hpo_models, analysis
112
+
113
+ def _get_default_hpo_executor(self) -> HpoExecutor:
114
+ backend = (
115
+ self._get_model_base()._get_hpo_backend()
116
+ ) # If ensemble, will use the base model to determine backend
117
+ if backend == RAY_BACKEND:
118
+ try:
119
+ try_import_ray()
120
+ except Exception as e:
121
+ warning_msg = f"Will use custom hpo logic because ray import failed. Reason: {str(e)}"
122
+ dup_filter.attach_filter_targets(warning_msg)
123
+ logger.warning(warning_msg)
124
+ backend = CUSTOM_BACKEND
125
+ hpo_executor = HpoExecutorFactory.get_hpo_executor(backend)() # type: ignore
126
+ return hpo_executor
127
+
128
+ def _get_hpo_backend(self) -> str:
129
+ """Choose which backend("ray" or "custom") to use for hpo"""
130
+ if DistributedContext.is_distributed_mode():
131
+ return RAY_BACKEND
132
+ return CUSTOM_BACKEND
133
+
134
+ def _get_hpo_train_fn_kwargs(self, **train_fn_kwargs) -> dict:
135
+ """Update kwargs passed to model_trial depending on the model configuration.
136
+
137
+ These kwargs need to be updated, for example, by MultiWindowBacktestingModel.
138
+ """
139
+ return train_fn_kwargs
140
+
141
+ def estimate_memory_usage(self, *args, **kwargs) -> float | None:
142
+ """Return the estimated memory usage of the model. None if memory usage cannot be
143
+ estimated.
144
+ """
145
+ return None
146
+
147
+ def get_minimum_resources(self, is_gpu_available: bool = False) -> Dict[str, Union[int, float]]:
148
+ return {
149
+ "num_cpus": 1,
150
+ }
151
+
152
+ def _save_with_data(
153
+ self, train_data: TimeSeriesDataFrame, val_data: Optional[TimeSeriesDataFrame]
154
+ ) -> Tuple[str, str]:
155
+ self.path = os.path.abspath(self.path)
156
+ self.path_root = self.path.rsplit(self.name, 1)[0]
157
+
158
+ dataset_train_filename = "dataset_train.pkl"
159
+ train_path = os.path.join(self.path, dataset_train_filename)
160
+ save_pkl.save(path=train_path, object=train_data)
161
+
162
+ dataset_val_filename = "dataset_val.pkl"
163
+ val_path = os.path.join(self.path, dataset_val_filename)
164
+ save_pkl.save(path=val_path, object=val_data)
165
+ return train_path, val_path
166
+
167
+ @abstractmethod
168
+ def _get_model_base(self) -> Self:
169
+ pass
170
+
171
+ @abstractmethod
172
+ def _is_gpu_available(self) -> bool:
173
+ pass
174
+
175
+ @abstractmethod
176
+ def _get_search_space(self) -> Dict[str, Any]:
177
+ pass
178
+
179
+ @abstractmethod
180
+ def get_params(self) -> dict:
181
+ """Return a clean copy of constructor parameters that can be used to
182
+ clone the current model.
183
+ """
184
+ pass
185
+
186
+ @staticmethod
187
+ @abstractmethod
188
+ def _get_system_resources() -> Dict[str, Any]:
189
+ pass
@@ -101,7 +101,9 @@ class TimeSeriesEnsembleSelection(EnsembleSelection):
101
101
  class TimeSeriesGreedyEnsemble(AbstractTimeSeriesEnsembleModel):
102
102
  """Constructs a weighted ensemble using the greedy Ensemble Selection algorithm."""
103
103
 
104
- def __init__(self, name: str, ensemble_size: int = 100, **kwargs):
104
+ def __init__(self, name: Optional[str] = None, ensemble_size: int = 100, **kwargs):
105
+ if name is None:
106
+ name = "WeightedEnsemble"
105
107
  super().__init__(name=name, **kwargs)
106
108
  self.ensemble_size = ensemble_size
107
109
  self.model_to_weight: Dict[str, float] = {}
@@ -144,7 +146,7 @@ class TimeSeriesGreedyEnsemble(AbstractTimeSeriesEnsembleModel):
144
146
  return np.array(list(self.model_to_weight.values()), dtype=np.float64)
145
147
 
146
148
  def predict(self, data: Dict[str, Optional[TimeSeriesDataFrame]], **kwargs) -> TimeSeriesDataFrame:
147
- if set(data.keys()) != set(self.model_names):
149
+ if not set(self.model_names).issubset(set(data.keys())):
148
150
  raise ValueError(
149
151
  f"Set of models given for prediction in {self.name} differ from those provided during initialization."
150
152
  )
@@ -235,9 +235,6 @@ def get_preset_models(
235
235
  "is present in `excluded_model_types` and will be removed."
236
236
  )
237
237
  continue
238
- if "mxnet" in model.lower():
239
- logger.info(f"\tMXNet model '{model}' given in `hyperparameters` is deprecated and won't be trained. ")
240
- continue
241
238
  model_type = MODEL_TYPES[model]
242
239
  elif isinstance(model, type):
243
240
  if not issubclass(model, AbstractTimeSeriesModel):
@@ -1,4 +1,4 @@
1
1
  """This is the autogluon version file."""
2
2
 
3
- __version__ = "1.2.1b20250305"
3
+ __version__ = "1.2.1b20250307"
4
4
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.timeseries
3
- Version: 1.2.1b20250305
3
+ Version: 1.2.1b20250307
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -55,9 +55,9 @@ Requires-Dist: fugue>=0.9.0
55
55
  Requires-Dist: tqdm<5,>=4.38
56
56
  Requires-Dist: orjson~=3.9
57
57
  Requires-Dist: tensorboard<3,>=2.9
58
- Requires-Dist: autogluon.core[raytune]==1.2.1b20250305
59
- Requires-Dist: autogluon.common==1.2.1b20250305
60
- Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.2.1b20250305
58
+ Requires-Dist: autogluon.core[raytune]==1.2.1b20250307
59
+ Requires-Dist: autogluon.common==1.2.1b20250307
60
+ Requires-Dist: autogluon.tabular[catboost,lightgbm,xgboost]==1.2.1b20250307
61
61
  Provides-Extra: all
62
62
  Provides-Extra: chronos-onnx
63
63
  Requires-Dist: optimum[onnxruntime]<1.20,>=1.17; extra == "chronos-onnx"
@@ -1,4 +1,4 @@
1
- autogluon.timeseries-1.2.1b20250305-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
1
+ autogluon.timeseries-1.2.1b20250307-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
2
2
  autogluon/timeseries/__init__.py,sha256=_CrLLc1fkjen7UzWoO0Os8WZoHOgvZbHKy46I8v_4k4,304
3
3
  autogluon/timeseries/evaluator.py,sha256=l642tYfTHsl8WVIq_vV6qhgAFVFr9UuZD7gLra3A_Kc,250
4
4
  autogluon/timeseries/learner.py,sha256=PDAHFlos6q5JukwRE86tKoH0zxYf3nLzy7qfD_a5NYY,13849
@@ -6,7 +6,7 @@ autogluon/timeseries/predictor.py,sha256=DgKNvDfduVyauR7MXQZk04JyT3fc5erXAGVp3XO
6
6
  autogluon/timeseries/regressor.py,sha256=3MlTpP-M1ayTZ52UQDK0wIMMFUijPep-iEyftlDdKPg,11804
7
7
  autogluon/timeseries/splitter.py,sha256=yzPca9p2bWV-_VJAptUyyzQsxu-uixAdpMoGQtDzMD4,3205
8
8
  autogluon/timeseries/trainer.py,sha256=L9FT5qERcqlWTgH9IgE6QsO0aBNj2nivRKF2Oy4UJOk,57250
9
- autogluon/timeseries/version.py,sha256=hjp__CpU1tLt2wGIEfd4tVSKrES_x0vHTkZdLuoYGnE,91
9
+ autogluon/timeseries/version.py,sha256=c5NUbrqeXmY6g27wyrvskoLAD4GR6pJJiY4KIluKRt0,91
10
10
  autogluon/timeseries/configs/__init__.py,sha256=BTtHIPCYeGjqgOcvqb8qPD4VNX-ICKOg6wnkew1cPOE,98
11
11
  autogluon/timeseries/configs/presets_configs.py,sha256=cLat8ecLlWrI-SC5KLBDCX2SbVXaucemy2pjxJAtSY0,2543
12
12
  autogluon/timeseries/dataset/__init__.py,sha256=UvnhAN5tjgxXTHoZMQDy64YMDj4Xxa68yY7NP4vAw0o,81
@@ -17,10 +17,11 @@ autogluon/timeseries/metrics/point.py,sha256=g7L8jVUKc5YVjETZ-B7syK9nZswfKxLFlkN
17
17
  autogluon/timeseries/metrics/quantile.py,sha256=eemdLbo3y2wstnVkuA-f55YXywctUmSW1EhIW4BsoH4,3965
18
18
  autogluon/timeseries/metrics/utils.py,sha256=HuDe1BNe8yJU4f_DKM913nNrUueoRaw6zhxm1-S20s0,910
19
19
  autogluon/timeseries/models/__init__.py,sha256=MYD9JJ-wUDE5B6jW6E6LU2eXQ6vflfQBvqQJkdzJa3A,1189
20
- autogluon/timeseries/models/presets.py,sha256=GezDk-p591Mlhm5UTIjKKJqQE2mnWw9rdsDYKen4zJo,12478
20
+ autogluon/timeseries/models/presets.py,sha256=qfpxoT3G3FEM2_P41nBfTXGNuLZTneCXAVa15guW5do,12292
21
21
  autogluon/timeseries/models/abstract/__init__.py,sha256=wvDsQAZIV0N3AwBeMaGItoQ82trEfnT-nol2AAOIxBg,102
22
- autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=ouQJu75JydLOzWAroHWHzLhaRgkeOXOu0d5F1dh_4Yc,40598
22
+ autogluon/timeseries/models/abstract/abstract_timeseries_model.py,sha256=XlLZQjt0LRRZGcCzqKVXSpuiFTwUOoDdhNm63igdIdE,34836
23
23
  autogluon/timeseries/models/abstract/model_trial.py,sha256=ENPg_7nsdxIvaNM0o0UShZ3x8jFlRmwRc5m0fGPC0TM,3720
24
+ autogluon/timeseries/models/abstract/tunable.py,sha256=SFl4vjkb6BfFFaRPVdftnnLYlIyCThutLHxiiAlV6tY,7168
24
25
  autogluon/timeseries/models/autogluon_tabular/__init__.py,sha256=r9i6jWcyeLHYClkcMSKRVsfrkBUMxpDrTATNTBc_qgQ,136
25
26
  autogluon/timeseries/models/autogluon_tabular/mlforecast.py,sha256=HGuV6_63TnBK9RqVD-VUTbbBuxQG9lmKxo5kLQLTlug,33016
26
27
  autogluon/timeseries/models/autogluon_tabular/transforms.py,sha256=CVvNun8DKH7UQGyXU-iO2xmvBIHRQElw72gIrZ7QjkU,2504
@@ -34,7 +35,7 @@ autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py,sha256=2MJuik-YFgON
34
35
  autogluon/timeseries/models/chronos/pipeline/utils.py,sha256=dtDX5Pyu95bGv7qmqgfUc1iYowWPY84dnGN0uyqyHyQ,13131
35
36
  autogluon/timeseries/models/ensemble/__init__.py,sha256=kFr11Gmt7lQJu9Rr8HuIPphQN5l1TsoorfbJm_O3a_s,128
36
37
  autogluon/timeseries/models/ensemble/abstract_timeseries_ensemble.py,sha256=LzL64JASiwkLsuFxGToXJGRItcMxq5_Ig2QP5Zm7SHw,3537
37
- autogluon/timeseries/models/ensemble/greedy_ensemble.py,sha256=UPEmNx-RSuqCXS7V093NEid_AwwEigM6AXMcZtof8vg,7230
38
+ autogluon/timeseries/models/ensemble/greedy_ensemble.py,sha256=v5A2xv4d_QynA1GWD7iqmn-VVEFpD88Oiswyp72yBCc,7321
38
39
  autogluon/timeseries/models/gluonts/__init__.py,sha256=asC1PTj4j9xMbilvk1IT1julnpeoKbv5ZNuAR6-DFgA,361
39
40
  autogluon/timeseries/models/gluonts/abstract_gluonts.py,sha256=brf2lIMHH4a_AETwyOcOBVPWqWhLxr8iolJ3Z5AR8MA,30621
40
41
  autogluon/timeseries/models/gluonts/torch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -58,11 +59,11 @@ autogluon/timeseries/utils/datetime/base.py,sha256=3NdsH3NDq4cVAOSoy3XpaNixyNlbj
58
59
  autogluon/timeseries/utils/datetime/lags.py,sha256=gQDk5_zmsY5DUWDUpSaCKYkQ9nHKKY-LsywJQRAoYSk,5988
59
60
  autogluon/timeseries/utils/datetime/seasonality.py,sha256=YK_2k8hvYIMW-sJPnjGWRtCnvIOthwA2hATB3nwVoD4,834
60
61
  autogluon/timeseries/utils/datetime/time_features.py,sha256=MjLi3zQ00uWWJtXH9oGX2GJkTbvjdSiuabSa4kcVuxE,2672
61
- autogluon.timeseries-1.2.1b20250305.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
62
- autogluon.timeseries-1.2.1b20250305.dist-info/METADATA,sha256=yPmePcrpFvseKU8tl3A7y22PYvTf5SfD_2lAahDWfU8,12684
63
- autogluon.timeseries-1.2.1b20250305.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
64
- autogluon.timeseries-1.2.1b20250305.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
65
- autogluon.timeseries-1.2.1b20250305.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
66
- autogluon.timeseries-1.2.1b20250305.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
67
- autogluon.timeseries-1.2.1b20250305.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
68
- autogluon.timeseries-1.2.1b20250305.dist-info/RECORD,,
62
+ autogluon.timeseries-1.2.1b20250307.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
63
+ autogluon.timeseries-1.2.1b20250307.dist-info/METADATA,sha256=Ws9N44bzdHBksRKp4Zwq0bPj3tq7UisxMJnfRey8Za0,12684
64
+ autogluon.timeseries-1.2.1b20250307.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
65
+ autogluon.timeseries-1.2.1b20250307.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
66
+ autogluon.timeseries-1.2.1b20250307.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
67
+ autogluon.timeseries-1.2.1b20250307.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
68
+ autogluon.timeseries-1.2.1b20250307.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
69
+ autogluon.timeseries-1.2.1b20250307.dist-info/RECORD,,