PyPI - autogluon.timeseries - Versions diffs - 1.1.2b20241112__py3-none-any.whl → 1.1.2b20241114__py3-none-any.whl - Mend

autogluon.timeseries 1.1.2b20241112py3-none-any.whl → 1.1.2b20241114py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

autogluon/timeseries/metrics/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@ from pprint import pformat
 from typing import Type, Union
 from .abstract import TimeSeriesScorer
-from .point import MAE, MAPE, MASE, MSE, RMSE, RMSLE, RMSSE, SMAPE, WAPE
+from .point import MAE, MAPE, MASE, MSE, RMSE, RMSLE, RMSSE, SMAPE, WAPE, WCD
 from .quantile import SQL, WQL
 __all__ = [
@@ -16,6 +16,7 @@ __all__ = [
     "RMSSE",
     "SQL",
     "WAPE",
+    "WCD",
     "WQL",
 ]
@@ -40,6 +41,11 @@ DEPRECATED_METRICS = {
     "mean_wQuantileLoss": "WQL",
 }
+# Experimental metrics that are not yet user facing
+EXPERIMENTAL_METRICS = {
+    "WCD": WCD,
+}
 def check_get_evaluation_metric(
     eval_metric: Union[str, TimeSeriesScorer, Type[TimeSeriesScorer], None] = None
@@ -51,12 +57,16 @@ def check_get_evaluation_metric(
         eval_metric = eval_metric()
     elif isinstance(eval_metric, str):
         eval_metric = DEPRECATED_METRICS.get(eval_metric, eval_metric)
-        if eval_metric.upper() not in AVAILABLE_METRICS:
+        metric_name = eval_metric.upper()
+        if metric_name in AVAILABLE_METRICS:
+            eval_metric = AVAILABLE_METRICS[metric_name]()
+        elif metric_name in EXPERIMENTAL_METRICS:
+            eval_metric = EXPERIMENTAL_METRICS[metric_name]()
+        else:
             raise ValueError(
                 f"Time series metric {eval_metric} not supported. Available metrics are:\n"
                 f"{pformat(sorted(AVAILABLE_METRICS.keys()))}"
             )
-        eval_metric = AVAILABLE_METRICS[eval_metric.upper()]()
     elif eval_metric is None:
         eval_metric = AVAILABLE_METRICS[DEFAULT_METRIC_NAME]()
     else:

autogluon/timeseries/metrics/point.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import logging
+import warnings
 from typing import Optional
 import numpy as np
@@ -359,3 +360,52 @@ class RMSLE(TimeSeriesScorer):
             seasonal_period=seasonal_period,
             **kwargs,
         )
+class WCD(TimeSeriesScorer):
+    r"""Weighted cumulative discrepancy.
+    Measures the discrepancy between the cumulative sum of the forecast and the cumulative sum of the actual values.
+    .. math::
+        \operatorname{WCD} = 2 \cdot \frac{1}{N} \frac{1}{H} \sum_{i=1}^{N} \sum_{t=T+1}^{T+H} \alpha \cdot \max(0, -d_{i, t}) + (1 - \alpha) \cdot \max(0, d_{i, t})
+    where :math:`d_{i, t}` is the difference between the cumulative predicted value and the cumulative actual value
+    .. math::
+        d_{i, t} = \left(\sum_{s=T+1}^t f_{i, s}) - \left(\sum_{s=T+1}^t y_{i, s})
+    Parameters
+    ----------
+    alpha : float, default = 0.5
+        Values > 0.5 correspond put a stronger penalty on underpredictions (when cumulative forecast is below the
+        cumulative actual value). Values < 0.5 put a stronger penalty on overpredictions.
+    """
+    def __init__(self, alpha: float = 0.5):
+        assert 0 < alpha < 1, "alpha must be in (0, 1)"
+        self.alpha = alpha
+        self.num_items: Optional[int] = None
+        warnings.warn(
+            f"{self.name} is an experimental metric. Its behavior may change in the future version of AutoGluon."
+        )
+    def save_past_metrics(self, data_past: TimeSeriesDataFrame, **kwargs) -> None:
+        self.num_items = data_past.num_items
+    def _fast_cumsum(self, y: np.ndarray) -> np.ndarray:
+        """Compute the cumulative sum for each consecutive `prediction_length` items in the array."""
+        y = y.reshape(self.num_items, -1)
+        return np.nancumsum(y, axis=1).ravel()
+    def compute_metric(
+        self, data_future: TimeSeriesDataFrame, predictions: TimeSeriesDataFrame, target: str = "target", **kwargs
+    ) -> float:
+        y_true, y_pred = self._get_point_forecast_score_inputs(data_future, predictions, target=target)
+        cumsum_true = self._fast_cumsum(y_true.to_numpy())
+        cumsum_pred = self._fast_cumsum(y_pred.to_numpy())
+        diffs = cumsum_pred - cumsum_true
+        error = diffs * np.where(diffs < 0, -self.alpha, (1 - self.alpha))
+        return 2 * self._safemean(error)

autogluon/timeseries/models/chronos/model.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import logging
 import os
+import shutil
+import time
+from pathlib import Path
 from typing import Any, Dict, Literal, Optional, Union
 import numpy as np
@@ -72,9 +75,10 @@ MODEL_ALIASES = {
 class ChronosModel(AbstractTimeSeriesModel):
-    """Chronos pretrained time series forecasting models. Models can be based on the original
+    """Chronos [Ansari2024]_ pretrained time series forecasting models which can be used for zero-shot forecasting or fine-tuned
+    in a task-specific manner. Models can be based on the original
     `ChronosModel <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos.py>`_ implementation,
-    as well as a newer family of Chronos-Bolt models which are capable of much faster inference.
+    as well as a newer family of Chronos-Bolt models capable of much faster inference.
     The original Chronos is a family of pretrained models, based on the T5 family, with number of parameters ranging between
     8M and 710M. The full collection of Chronos models is available on
@@ -88,6 +92,9 @@ class ChronosModel(AbstractTimeSeriesModel):
     time series is then fed into a T5 model for forecasting. The Chronos-Bolt variants are capable of much faster inference,
     and can all run on CPUs. Chronos-Bolt models are also available on Hugging Face <https://huggingface.co/autogluon/>`_.
+    Both Chronos and Chronos-Bolt variants can be fine-tuned by setting ``fine_tune=True`` and selecting appropriate
+    fine-tuning parameters such as the learning rate (``fine_tune_lr``) and max steps (``fine_tune_steps``).
     References
     ----------
     .. [Ansari2024] Ansari, Abdul Fatir, Stella, Lorenzo et al.
@@ -108,8 +115,8 @@ class ChronosModel(AbstractTimeSeriesModel):
     num_samples : int, default = 20
         Number of samples used during inference
     device : str, default = None
-        Device to use for inference. If None, model will use the GPU if available. For larger model sizes
-        `small`, `base`, and `large`; inference will fail if no GPU is available.
+        Device to use for inference (and fine-tuning, if enabled). If None, model will use the GPU if available.
+        For larger model sizes `small`, `base`, and `large`; inference will fail if no GPU is available.
     context_length : int or None, default = None
         The context length to use in the model. Shorter context lengths will decrease model accuracy, but result
         in faster inference. If None, the model will infer context length from the data set length at inference
@@ -129,12 +136,34 @@ class ChronosModel(AbstractTimeSeriesModel):
     data_loader_num_workers : int, default = 0
         Number of worker processes to be used in the data loader. See documentation on ``torch.utils.data.DataLoader``
         for more information.
+    fine_tune : bool, default = False
+        If True, the pretrained model will be fine-tuned
+    fine_tune_lr: float, default = 0.0001
+        The learning rate used for fine-tuning
+    fine_tune_steps : int, default = 5000
+        The number of gradient update steps to fine-tune for
+    fine_tune_batch_size : int, default = 16
+        The batch size to use for fine-tuning
+    fine_tune_shuffle_buffer_size : int, default = 10000
+        The size of the shuffle buffer to shuffle the data during fine-tuning. If None, shuffling will
+        be turned off.
+    eval_during_fine_tune : bool, default = False
+        If True, validation will be performed during fine-tuning to select the best checkpoint.
+        Setting this argument to True may result in slower fine-tuning.
+    fine_tune_eval_max_items : int, default = 256
+        The maximum number of randomly-sampled time series to use from the validation set for evaluation
+        during fine-tuning. If None, the entire validation dataset will be used.
+    fine_tune_trainer_kwargs : dict, optional
+        Extra keyword arguments passed to ``transformers.TrainingArguments``
+    keep_transformers_logs: bool, default = False
+        If True, the logs generated by transformers will NOT be removed after fine-tuning
     """
     # default number of samples for prediction
     default_num_samples: int = 20
     default_model_path = "autogluon/chronos-t5-small"
     maximum_context_length = 2048
+    fine_tuned_ckpt_name: str = "fine-tuned-ckpt"
     def __init__(
         self,
@@ -202,6 +231,12 @@ class ChronosModel(AbstractTimeSeriesModel):
         model = load_pkl.load(path=os.path.join(path, cls.model_file_name), verbose=verbose)
         if reset_paths:
             model.set_contexts(path)
+        fine_tune_ckpt_path = Path(model.path) / cls.fine_tuned_ckpt_name
+        if fine_tune_ckpt_path.exists():
+            logger.debug(f"Fine-tuned checkpoint exists, setting model_path to {fine_tune_ckpt_path}")
+            model.model_path = fine_tune_ckpt_path
         return model
     def _is_gpu_available(self) -> bool:
@@ -245,7 +280,7 @@ class ChronosModel(AbstractTimeSeriesModel):
             minimum_resources["num_gpus"] = self.min_num_gpus
         return minimum_resources
-    def load_model_pipeline(self):
+    def load_model_pipeline(self, is_training: bool = False):
         from .pipeline import BaseChronosPipeline
         gpu_available = self._is_gpu_available()
@@ -262,8 +297,9 @@ class ChronosModel(AbstractTimeSeriesModel):
         pipeline = BaseChronosPipeline.from_pretrained(
             self.model_path,
             device_map=device,
+            # optimization cannot be used during fine-tuning
+            optimization_strategy=None if is_training else self.optimization_strategy,
             torch_dtype=self.torch_dtype,
-            optimization_strategy=self.optimization_strategy,
         )
         self.model_pipeline = pipeline
@@ -272,6 +308,59 @@ class ChronosModel(AbstractTimeSeriesModel):
         self.load_model_pipeline()
         return self
+    def _has_tf32(self):
+        import torch.cuda
+        return torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
+    def _get_model_params(self) -> dict:
+        """Gets params that are passed to the inner model."""
+        init_args = super()._get_model_params().copy()
+        init_args.setdefault("fine_tune", False)
+        init_args.setdefault("keep_transformers_logs", False)
+        init_args.setdefault("fine_tune_lr", 1e-4)
+        init_args.setdefault("fine_tune_steps", 5000)
+        init_args.setdefault("fine_tune_batch_size", self.default_batch_size)
+        init_args.setdefault("eval_during_fine_tune", False)
+        init_args.setdefault("fine_tune_eval_max_items", 256)
+        init_args.setdefault("fine_tune_shuffle_buffer_size", 10_000)
+        eval_during_fine_tune = init_args["eval_during_fine_tune"]
+        output_dir = Path(self.path) / "transformers_logs"
+        fine_tune_trainer_kwargs = dict(
+            output_dir=str(output_dir),
+            per_device_train_batch_size=init_args["fine_tune_batch_size"],
+            per_device_eval_batch_size=init_args["fine_tune_batch_size"],
+            learning_rate=init_args["fine_tune_lr"],
+            lr_scheduler_type="linear",
+            warmup_ratio=0.0,
+            optim="adamw_torch_fused",
+            logging_dir=str(output_dir),
+            logging_strategy="steps",
+            logging_steps=100,
+            report_to="none",
+            max_steps=init_args["fine_tune_steps"],
+            gradient_accumulation_steps=1,
+            dataloader_num_workers=self.data_loader_num_workers,
+            tf32=self._has_tf32(),
+            save_only_model=True,
+            prediction_loss_only=True,
+            save_total_limit=1,
+            save_strategy="steps" if eval_during_fine_tune else "no",
+            save_steps=100 if eval_during_fine_tune else None,
+            evaluation_strategy="steps" if eval_during_fine_tune else "no",
+            eval_steps=100 if eval_during_fine_tune else None,
+            load_best_model_at_end=True if eval_during_fine_tune else False,
+            metric_for_best_model="eval_loss" if eval_during_fine_tune else None,
+        )
+        user_fine_tune_trainer_kwargs = init_args.get("fine_tune_trainer_kwargs", {})
+        fine_tune_trainer_kwargs.update(user_fine_tune_trainer_kwargs)
+        init_args["fine_tune_trainer_kwargs"] = fine_tune_trainer_kwargs
+        return init_args
     def _fit(
         self,
         train_data: TimeSeriesDataFrame,
@@ -279,8 +368,171 @@ class ChronosModel(AbstractTimeSeriesModel):
         time_limit: int = None,
         **kwargs,
     ) -> None:
+        from transformers.trainer import PrinterCallback, Trainer, TrainingArguments
+        from .pipeline import ChronosBoltPipeline, ChronosPipeline
+        from .pipeline.utils import (
+            ChronosFineTuningDataset,
+            EvaluateAndSaveFinalStepCallback,
+            LoggerCallback,
+            TimeLimitCallback,
+        )
+        # TODO: Add support for fine-tuning models with context_length longer than the pretrained model
+        # verbosity < 3: all logs and warnings from transformers will be suppressed
+        # verbosity >= 3: progress bar and loss logs will be logged
+        # verbosity 4: everything will be logged
+        verbosity = kwargs.get("verbosity", 2)
+        for logger_name in logging.root.manager.loggerDict:
+            if "transformers" in logger_name:
+                transformers_logger = logging.getLogger(logger_name)
+                transformers_logger.setLevel(logging.ERROR if verbosity <= 3 else logging.INFO)
         self._check_fit_params()
-        self.time_limit = time_limit
+        fine_tune_args = self._get_model_params()
+        do_fine_tune = fine_tune_args["fine_tune"]
+        if do_fine_tune:
+            assert train_data is not None, "train_data cannot be None when fine_tune=True"
+        eval_during_fine_tune = val_data is not None and fine_tune_args["eval_during_fine_tune"]
+        start_time = time.monotonic()
+        if do_fine_tune:
+            context_length = self._get_context_length(train_data)
+            # load model pipeline to device memory
+            self.load_model_pipeline(is_training=True)
+            fine_tune_prediction_length = self.prediction_length
+            model_prediction_length = self.model_pipeline.inner_model.config.chronos_config["prediction_length"]
+            if isinstance(self.model_pipeline, ChronosPipeline):
+                pipeline_specific_trainer_kwargs = {}
+                # Update prediction_length of the model
+                # NOTE: We only do this for ChronosPipeline because the prediction length of ChronosBolt models
+                # is fixed due to direct multistep forecasting setup
+                self.model_pipeline.model.config.prediction_length = fine_tune_prediction_length
+                self.model_pipeline.inner_model.config.chronos_config["prediction_length"] = (
+                    fine_tune_prediction_length
+                )
+            elif isinstance(self.model_pipeline, ChronosBoltPipeline):
+                # custom label_names is needed for validation to work with ChronosBolt models
+                pipeline_specific_trainer_kwargs = dict(label_names=["target"])
+                # truncate prediction_length if it goes beyond ChronosBolt's prediction_length
+                fine_tune_prediction_length = min(model_prediction_length, self.prediction_length)
+                if self.prediction_length != fine_tune_prediction_length:
+                    logger.debug(
+                        f"ChronosBolt models can only be fine-tuned with a maximum prediction_length of {model_prediction_length}. "
+                        f"Fine-tuning prediction_length has been changed to {fine_tune_prediction_length}."
+                    )
+            fine_tune_trainer_kwargs = fine_tune_args["fine_tune_trainer_kwargs"]
+            fine_tune_trainer_kwargs["disable_tqdm"] = fine_tune_trainer_kwargs.get("disable_tqdm", (verbosity < 3))
+            fine_tune_trainer_kwargs["use_cpu"] = str(self.model_pipeline.inner_model.device) == "cpu"
+            output_dir = Path(fine_tune_trainer_kwargs["output_dir"])
+            if not eval_during_fine_tune:
+                # turn off eval-related trainer args
+                fine_tune_trainer_kwargs["evaluation_strategy"] = "no"
+                fine_tune_trainer_kwargs["eval_steps"] = None
+                fine_tune_trainer_kwargs["load_best_model_at_end"] = False
+                fine_tune_trainer_kwargs["metric_for_best_model"] = None
+            training_args = TrainingArguments(**fine_tune_trainer_kwargs, **pipeline_specific_trainer_kwargs)
+            tokenizer_train_dataset = ChronosFineTuningDataset(
+                target_df=train_data,
+                target_column=self.target,
+                context_length=context_length,
+                prediction_length=fine_tune_prediction_length,
+                # if tokenizer exists, then the data is returned in the HF-style format accepted by
+                # the original Chronos models otherwise the data is returned in ChronosBolt's format
+                tokenizer=getattr(self.model_pipeline, "tokenizer", None),
+                mode="training",
+            ).shuffle(fine_tune_args["fine_tune_shuffle_buffer_size"])
+            callbacks = []
+            if time_limit is not None:
+                callbacks.append(TimeLimitCallback(time_limit=time_limit))
+            if val_data is not None:
+                callbacks.append(EvaluateAndSaveFinalStepCallback())
+                # evaluate on a randomly-sampled subset
+                fine_tune_eval_max_items = (
+                    min(val_data.num_items, fine_tune_args["fine_tune_eval_max_items"])
+                    if fine_tune_args["fine_tune_eval_max_items"] is not None
+                    else val_data.num_items
+                )
+                if fine_tune_eval_max_items < val_data.num_items:
+                    eval_items = np.random.choice(
+                        val_data.item_ids.values, size=fine_tune_eval_max_items, replace=False
+                    )
+                    val_data = val_data.loc[eval_items]
+                tokenizer_val_dataset = ChronosFineTuningDataset(
+                    target_df=val_data,
+                    target_column=self.target,
+                    context_length=context_length,
+                    prediction_length=fine_tune_prediction_length,
+                    tokenizer=getattr(self.model_pipeline, "tokenizer", None),
+                    mode="validation",
+                )
+            trainer = Trainer(
+                model=self.model_pipeline.inner_model,
+                args=training_args,
+                train_dataset=tokenizer_train_dataset,
+                eval_dataset=tokenizer_val_dataset if val_data is not None else None,
+                callbacks=callbacks,
+            )
+            # remove PrinterCallback from callbacks which logs to the console via a print() call,
+            # so it cannot be handled by setting the log level
+            trainer.pop_callback(PrinterCallback)
+            if verbosity >= 3:
+                logger.warning(
+                    "Transformers logging is turned on during fine-tuning. Note that losses reported by transformers "
+                    "may not correspond to those specified via `eval_metric`."
+                )
+                trainer.add_callback(LoggerCallback())
+            if val_data is not None:
+                # evaluate once before training
+                zero_shot_eval_loss = trainer.evaluate()["eval_loss"]
+            trainer.train()
+            if eval_during_fine_tune:
+                # get the best eval_loss logged during fine-tuning
+                log_history_df = pd.DataFrame(trainer.state.log_history)
+                best_train_eval_loss = log_history_df["eval_loss"].min()
+            elif val_data is not None:
+                # evaluate at the end of fine-tuning
+                best_train_eval_loss = trainer.evaluate()["eval_loss"]
+            if val_data is None or best_train_eval_loss <= zero_shot_eval_loss:
+                fine_tuned_ckpt_path = Path(self.path) / self.fine_tuned_ckpt_name
+                logger.info(f"Saving fine-tuned model to {fine_tuned_ckpt_path}")
+                self.model_pipeline.inner_model.save_pretrained(Path(self.path) / self.fine_tuned_ckpt_name)
+            else:
+                # Reset the model to its pretrained state
+                logger.info("Validation loss worsened after fine-tuning. Reverting to the pretrained model.")
+                self.model_pipeline = None
+                self.load_model_pipeline(is_training=False)
+            if not fine_tune_args["keep_transformers_logs"]:
+                logger.debug(f"Removing transformers_logs directory {output_dir}")
+                shutil.rmtree(output_dir)
+        if time_limit is not None:
+            self.time_limit = time_limit - (time.monotonic() - start_time)  # inference time budget
     def _get_inference_data_loader(
         self,
@@ -305,6 +557,13 @@ class ChronosModel(AbstractTimeSeriesModel):
             on_batch=timeout_callback(seconds=time_limit),
         )
+    def _get_context_length(self, data: TimeSeriesDataFrame) -> int:
+        context_length = self.context_length or min(
+            data.num_timesteps_per_item().max(),
+            self.maximum_context_length,
+        )
+        return context_length
     def _predict(
         self,
         data: TimeSeriesDataFrame,
@@ -319,15 +578,13 @@ class ChronosModel(AbstractTimeSeriesModel):
         # Note that this is independent of the model's own context length set in the model's config file.
         # For example, if the context_length is set to 2048 here but the model expects context length
         # (according to its config.json file) of 512, it will further truncate the series during inference.
-        context_length = self.context_length or min(
-            data.num_timesteps_per_item().max(),
-            self.maximum_context_length,
-        )
+        context_length = self._get_context_length(data)
         with warning_filter(all_warnings=True):
             import torch
             if self.model_pipeline is None:
+                # FIXME: optimization_strategy is ignored when model is fine-tuned
                 # load model pipeline to device memory
                 self.load_model_pipeline()
@@ -366,7 +623,7 @@ class ChronosModel(AbstractTimeSeriesModel):
         return TimeSeriesDataFrame(df)
     def _more_tags(self) -> Dict:
-        return {"allow_nan": True}
+        return {"allow_nan": True, "can_use_val_data": self._get_model_params()["fine_tune"]}
     def score_and_cache_oof(
         self,

autogluon/timeseries/models/chronos/pipeline/base.py CHANGED Viewed

@@ -2,12 +2,15 @@
 from enum import Enum
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 import torch
 from .utils import left_pad_and_stack_1D
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel
 class ForecastType(Enum):
     SAMPLES = "samples"
@@ -36,6 +39,16 @@ class BaseChronosPipeline(metaclass=PipelineRegistry):
         "float64": torch.float64,
     }
+    def __init__(self, inner_model: "PreTrainedModel"):
+        """
+        Parameters
+        ----------
+        inner_model : PreTrainedModel
+            A hugging-face transformers PreTrainedModel, e.g., T5ForConditionalGeneration
+        """
+        # for easy access to the inner HF-style model
+        self.inner_model = inner_model
     def _prepare_and_validate_context(self, context: Union[torch.Tensor, List[torch.Tensor]]):
         if isinstance(context, list):
             context = left_pad_and_stack_1D(context)

autogluon.timeseries 1.1.2b20241112__py3-none-any.whl → 1.1.2b20241114__py3-none-any.whl

autogluon.timeseries 1.1.2b20241112py3-none-any.whl → 1.1.2b20241114py3-none-any.whl