PyPI - autogluon.timeseries - Versions diffs - 1.1.2b20241113__tar.gz → 1.1.2b20241114__tar.gz - Mend

autogluon.timeseries 1.1.2b20241113tar.gz → 1.1.2b20241114tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

{autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: autogluon.timeseries
-Version: 1.1.2b20241113
+Version: 1.1.2b20241114
 Summary: Fast and Accurate ML in 3 Lines of Code
 Home-page: https://github.com/autogluon/autogluon
 Author: AutoGluon Community

{autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/chronos/model.py RENAMED Viewed

@@ -1,5 +1,8 @@
 import logging
 import os
+import shutil
+import time
+from pathlib import Path
 from typing import Any, Dict, Literal, Optional, Union
 import numpy as np
@@ -72,9 +75,10 @@ MODEL_ALIASES = {
 class ChronosModel(AbstractTimeSeriesModel):
-    """Chronos pretrained time series forecasting models. Models can be based on the original
+    """Chronos [Ansari2024]_ pretrained time series forecasting models which can be used for zero-shot forecasting or fine-tuned
+    in a task-specific manner. Models can be based on the original
     `ChronosModel <https://github.com/amazon-science/chronos-forecasting/blob/main/src/chronos/chronos.py>`_ implementation,
-    as well as a newer family of Chronos-Bolt models which are capable of much faster inference.
+    as well as a newer family of Chronos-Bolt models capable of much faster inference.
     The original Chronos is a family of pretrained models, based on the T5 family, with number of parameters ranging between
     8M and 710M. The full collection of Chronos models is available on
@@ -88,6 +92,9 @@ class ChronosModel(AbstractTimeSeriesModel):
     time series is then fed into a T5 model for forecasting. The Chronos-Bolt variants are capable of much faster inference,
     and can all run on CPUs. Chronos-Bolt models are also available on Hugging Face <https://huggingface.co/autogluon/>`_.
+    Both Chronos and Chronos-Bolt variants can be fine-tuned by setting ``fine_tune=True`` and selecting appropriate
+    fine-tuning parameters such as the learning rate (``fine_tune_lr``) and max steps (``fine_tune_steps``).
     References
     ----------
     .. [Ansari2024] Ansari, Abdul Fatir, Stella, Lorenzo et al.
@@ -108,8 +115,8 @@ class ChronosModel(AbstractTimeSeriesModel):
     num_samples : int, default = 20
         Number of samples used during inference
     device : str, default = None
-        Device to use for inference. If None, model will use the GPU if available. For larger model sizes
-        `small`, `base`, and `large`; inference will fail if no GPU is available.
+        Device to use for inference (and fine-tuning, if enabled). If None, model will use the GPU if available.
+        For larger model sizes `small`, `base`, and `large`; inference will fail if no GPU is available.
     context_length : int or None, default = None
         The context length to use in the model. Shorter context lengths will decrease model accuracy, but result
         in faster inference. If None, the model will infer context length from the data set length at inference
@@ -129,12 +136,34 @@ class ChronosModel(AbstractTimeSeriesModel):
     data_loader_num_workers : int, default = 0
         Number of worker processes to be used in the data loader. See documentation on ``torch.utils.data.DataLoader``
         for more information.
+    fine_tune : bool, default = False
+        If True, the pretrained model will be fine-tuned
+    fine_tune_lr: float, default = 0.0001
+        The learning rate used for fine-tuning
+    fine_tune_steps : int, default = 5000
+        The number of gradient update steps to fine-tune for
+    fine_tune_batch_size : int, default = 16
+        The batch size to use for fine-tuning
+    fine_tune_shuffle_buffer_size : int, default = 10000
+        The size of the shuffle buffer to shuffle the data during fine-tuning. If None, shuffling will
+        be turned off.
+    eval_during_fine_tune : bool, default = False
+        If True, validation will be performed during fine-tuning to select the best checkpoint.
+        Setting this argument to True may result in slower fine-tuning.
+    fine_tune_eval_max_items : int, default = 256
+        The maximum number of randomly-sampled time series to use from the validation set for evaluation
+        during fine-tuning. If None, the entire validation dataset will be used.
+    fine_tune_trainer_kwargs : dict, optional
+        Extra keyword arguments passed to ``transformers.TrainingArguments``
+    keep_transformers_logs: bool, default = False
+        If True, the logs generated by transformers will NOT be removed after fine-tuning
     """
     # default number of samples for prediction
     default_num_samples: int = 20
     default_model_path = "autogluon/chronos-t5-small"
     maximum_context_length = 2048
+    fine_tuned_ckpt_name: str = "fine-tuned-ckpt"
     def __init__(
         self,
@@ -202,6 +231,12 @@ class ChronosModel(AbstractTimeSeriesModel):
         model = load_pkl.load(path=os.path.join(path, cls.model_file_name), verbose=verbose)
         if reset_paths:
             model.set_contexts(path)
+        fine_tune_ckpt_path = Path(model.path) / cls.fine_tuned_ckpt_name
+        if fine_tune_ckpt_path.exists():
+            logger.debug(f"Fine-tuned checkpoint exists, setting model_path to {fine_tune_ckpt_path}")
+            model.model_path = fine_tune_ckpt_path
         return model
     def _is_gpu_available(self) -> bool:
@@ -245,7 +280,7 @@ class ChronosModel(AbstractTimeSeriesModel):
             minimum_resources["num_gpus"] = self.min_num_gpus
         return minimum_resources
-    def load_model_pipeline(self):
+    def load_model_pipeline(self, is_training: bool = False):
         from .pipeline import BaseChronosPipeline
         gpu_available = self._is_gpu_available()
@@ -262,8 +297,9 @@ class ChronosModel(AbstractTimeSeriesModel):
         pipeline = BaseChronosPipeline.from_pretrained(
             self.model_path,
             device_map=device,
+            # optimization cannot be used during fine-tuning
+            optimization_strategy=None if is_training else self.optimization_strategy,
             torch_dtype=self.torch_dtype,
-            optimization_strategy=self.optimization_strategy,
         )
         self.model_pipeline = pipeline
@@ -272,6 +308,59 @@ class ChronosModel(AbstractTimeSeriesModel):
         self.load_model_pipeline()
         return self
+    def _has_tf32(self):
+        import torch.cuda
+        return torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8
+    def _get_model_params(self) -> dict:
+        """Gets params that are passed to the inner model."""
+        init_args = super()._get_model_params().copy()
+        init_args.setdefault("fine_tune", False)
+        init_args.setdefault("keep_transformers_logs", False)
+        init_args.setdefault("fine_tune_lr", 1e-4)
+        init_args.setdefault("fine_tune_steps", 5000)
+        init_args.setdefault("fine_tune_batch_size", self.default_batch_size)
+        init_args.setdefault("eval_during_fine_tune", False)
+        init_args.setdefault("fine_tune_eval_max_items", 256)
+        init_args.setdefault("fine_tune_shuffle_buffer_size", 10_000)
+        eval_during_fine_tune = init_args["eval_during_fine_tune"]
+        output_dir = Path(self.path) / "transformers_logs"
+        fine_tune_trainer_kwargs = dict(
+            output_dir=str(output_dir),
+            per_device_train_batch_size=init_args["fine_tune_batch_size"],
+            per_device_eval_batch_size=init_args["fine_tune_batch_size"],
+            learning_rate=init_args["fine_tune_lr"],
+            lr_scheduler_type="linear",
+            warmup_ratio=0.0,
+            optim="adamw_torch_fused",
+            logging_dir=str(output_dir),
+            logging_strategy="steps",
+            logging_steps=100,
+            report_to="none",
+            max_steps=init_args["fine_tune_steps"],
+            gradient_accumulation_steps=1,
+            dataloader_num_workers=self.data_loader_num_workers,
+            tf32=self._has_tf32(),
+            save_only_model=True,
+            prediction_loss_only=True,
+            save_total_limit=1,
+            save_strategy="steps" if eval_during_fine_tune else "no",
+            save_steps=100 if eval_during_fine_tune else None,
+            evaluation_strategy="steps" if eval_during_fine_tune else "no",
+            eval_steps=100 if eval_during_fine_tune else None,
+            load_best_model_at_end=True if eval_during_fine_tune else False,
+            metric_for_best_model="eval_loss" if eval_during_fine_tune else None,
+        )
+        user_fine_tune_trainer_kwargs = init_args.get("fine_tune_trainer_kwargs", {})
+        fine_tune_trainer_kwargs.update(user_fine_tune_trainer_kwargs)
+        init_args["fine_tune_trainer_kwargs"] = fine_tune_trainer_kwargs
+        return init_args
     def _fit(
         self,
         train_data: TimeSeriesDataFrame,
@@ -279,8 +368,171 @@ class ChronosModel(AbstractTimeSeriesModel):
         time_limit: int = None,
         **kwargs,
     ) -> None:
+        from transformers.trainer import PrinterCallback, Trainer, TrainingArguments
+        from .pipeline import ChronosBoltPipeline, ChronosPipeline
+        from .pipeline.utils import (
+            ChronosFineTuningDataset,
+            EvaluateAndSaveFinalStepCallback,
+            LoggerCallback,
+            TimeLimitCallback,
+        )
+        # TODO: Add support for fine-tuning models with context_length longer than the pretrained model
+        # verbosity < 3: all logs and warnings from transformers will be suppressed
+        # verbosity >= 3: progress bar and loss logs will be logged
+        # verbosity 4: everything will be logged
+        verbosity = kwargs.get("verbosity", 2)
+        for logger_name in logging.root.manager.loggerDict:
+            if "transformers" in logger_name:
+                transformers_logger = logging.getLogger(logger_name)
+                transformers_logger.setLevel(logging.ERROR if verbosity <= 3 else logging.INFO)
         self._check_fit_params()
-        self.time_limit = time_limit
+        fine_tune_args = self._get_model_params()
+        do_fine_tune = fine_tune_args["fine_tune"]
+        if do_fine_tune:
+            assert train_data is not None, "train_data cannot be None when fine_tune=True"
+        eval_during_fine_tune = val_data is not None and fine_tune_args["eval_during_fine_tune"]
+        start_time = time.monotonic()
+        if do_fine_tune:
+            context_length = self._get_context_length(train_data)
+            # load model pipeline to device memory
+            self.load_model_pipeline(is_training=True)
+            fine_tune_prediction_length = self.prediction_length
+            model_prediction_length = self.model_pipeline.inner_model.config.chronos_config["prediction_length"]
+            if isinstance(self.model_pipeline, ChronosPipeline):
+                pipeline_specific_trainer_kwargs = {}
+                # Update prediction_length of the model
+                # NOTE: We only do this for ChronosPipeline because the prediction length of ChronosBolt models
+                # is fixed due to direct multistep forecasting setup
+                self.model_pipeline.model.config.prediction_length = fine_tune_prediction_length
+                self.model_pipeline.inner_model.config.chronos_config["prediction_length"] = (
+                    fine_tune_prediction_length
+                )
+            elif isinstance(self.model_pipeline, ChronosBoltPipeline):
+                # custom label_names is needed for validation to work with ChronosBolt models
+                pipeline_specific_trainer_kwargs = dict(label_names=["target"])
+                # truncate prediction_length if it goes beyond ChronosBolt's prediction_length
+                fine_tune_prediction_length = min(model_prediction_length, self.prediction_length)
+                if self.prediction_length != fine_tune_prediction_length:
+                    logger.debug(
+                        f"ChronosBolt models can only be fine-tuned with a maximum prediction_length of {model_prediction_length}. "
+                        f"Fine-tuning prediction_length has been changed to {fine_tune_prediction_length}."
+                    )
+            fine_tune_trainer_kwargs = fine_tune_args["fine_tune_trainer_kwargs"]
+            fine_tune_trainer_kwargs["disable_tqdm"] = fine_tune_trainer_kwargs.get("disable_tqdm", (verbosity < 3))
+            fine_tune_trainer_kwargs["use_cpu"] = str(self.model_pipeline.inner_model.device) == "cpu"
+            output_dir = Path(fine_tune_trainer_kwargs["output_dir"])
+            if not eval_during_fine_tune:
+                # turn off eval-related trainer args
+                fine_tune_trainer_kwargs["evaluation_strategy"] = "no"
+                fine_tune_trainer_kwargs["eval_steps"] = None
+                fine_tune_trainer_kwargs["load_best_model_at_end"] = False
+                fine_tune_trainer_kwargs["metric_for_best_model"] = None
+            training_args = TrainingArguments(**fine_tune_trainer_kwargs, **pipeline_specific_trainer_kwargs)
+            tokenizer_train_dataset = ChronosFineTuningDataset(
+                target_df=train_data,
+                target_column=self.target,
+                context_length=context_length,
+                prediction_length=fine_tune_prediction_length,
+                # if tokenizer exists, then the data is returned in the HF-style format accepted by
+                # the original Chronos models otherwise the data is returned in ChronosBolt's format
+                tokenizer=getattr(self.model_pipeline, "tokenizer", None),
+                mode="training",
+            ).shuffle(fine_tune_args["fine_tune_shuffle_buffer_size"])
+            callbacks = []
+            if time_limit is not None:
+                callbacks.append(TimeLimitCallback(time_limit=time_limit))
+            if val_data is not None:
+                callbacks.append(EvaluateAndSaveFinalStepCallback())
+                # evaluate on a randomly-sampled subset
+                fine_tune_eval_max_items = (
+                    min(val_data.num_items, fine_tune_args["fine_tune_eval_max_items"])
+                    if fine_tune_args["fine_tune_eval_max_items"] is not None
+                    else val_data.num_items
+                )
+                if fine_tune_eval_max_items < val_data.num_items:
+                    eval_items = np.random.choice(
+                        val_data.item_ids.values, size=fine_tune_eval_max_items, replace=False
+                    )
+                    val_data = val_data.loc[eval_items]
+                tokenizer_val_dataset = ChronosFineTuningDataset(
+                    target_df=val_data,
+                    target_column=self.target,
+                    context_length=context_length,
+                    prediction_length=fine_tune_prediction_length,
+                    tokenizer=getattr(self.model_pipeline, "tokenizer", None),
+                    mode="validation",
+                )
+            trainer = Trainer(
+                model=self.model_pipeline.inner_model,
+                args=training_args,
+                train_dataset=tokenizer_train_dataset,
+                eval_dataset=tokenizer_val_dataset if val_data is not None else None,
+                callbacks=callbacks,
+            )
+            # remove PrinterCallback from callbacks which logs to the console via a print() call,
+            # so it cannot be handled by setting the log level
+            trainer.pop_callback(PrinterCallback)
+            if verbosity >= 3:
+                logger.warning(
+                    "Transformers logging is turned on during fine-tuning. Note that losses reported by transformers "
+                    "may not correspond to those specified via `eval_metric`."
+                )
+                trainer.add_callback(LoggerCallback())
+            if val_data is not None:
+                # evaluate once before training
+                zero_shot_eval_loss = trainer.evaluate()["eval_loss"]
+            trainer.train()
+            if eval_during_fine_tune:
+                # get the best eval_loss logged during fine-tuning
+                log_history_df = pd.DataFrame(trainer.state.log_history)
+                best_train_eval_loss = log_history_df["eval_loss"].min()
+            elif val_data is not None:
+                # evaluate at the end of fine-tuning
+                best_train_eval_loss = trainer.evaluate()["eval_loss"]
+            if val_data is None or best_train_eval_loss <= zero_shot_eval_loss:
+                fine_tuned_ckpt_path = Path(self.path) / self.fine_tuned_ckpt_name
+                logger.info(f"Saving fine-tuned model to {fine_tuned_ckpt_path}")
+                self.model_pipeline.inner_model.save_pretrained(Path(self.path) / self.fine_tuned_ckpt_name)
+            else:
+                # Reset the model to its pretrained state
+                logger.info("Validation loss worsened after fine-tuning. Reverting to the pretrained model.")
+                self.model_pipeline = None
+                self.load_model_pipeline(is_training=False)
+            if not fine_tune_args["keep_transformers_logs"]:
+                logger.debug(f"Removing transformers_logs directory {output_dir}")
+                shutil.rmtree(output_dir)
+        if time_limit is not None:
+            self.time_limit = time_limit - (time.monotonic() - start_time)  # inference time budget
     def _get_inference_data_loader(
         self,
@@ -305,6 +557,13 @@ class ChronosModel(AbstractTimeSeriesModel):
             on_batch=timeout_callback(seconds=time_limit),
         )
+    def _get_context_length(self, data: TimeSeriesDataFrame) -> int:
+        context_length = self.context_length or min(
+            data.num_timesteps_per_item().max(),
+            self.maximum_context_length,
+        )
+        return context_length
     def _predict(
         self,
         data: TimeSeriesDataFrame,
@@ -319,15 +578,13 @@ class ChronosModel(AbstractTimeSeriesModel):
         # Note that this is independent of the model's own context length set in the model's config file.
         # For example, if the context_length is set to 2048 here but the model expects context length
         # (according to its config.json file) of 512, it will further truncate the series during inference.
-        context_length = self.context_length or min(
-            data.num_timesteps_per_item().max(),
-            self.maximum_context_length,
-        )
+        context_length = self._get_context_length(data)
         with warning_filter(all_warnings=True):
             import torch
             if self.model_pipeline is None:
+                # FIXME: optimization_strategy is ignored when model is fine-tuned
                 # load model pipeline to device memory
                 self.load_model_pipeline()
@@ -366,7 +623,7 @@ class ChronosModel(AbstractTimeSeriesModel):
         return TimeSeriesDataFrame(df)
     def _more_tags(self) -> Dict:
-        return {"allow_nan": True}
+        return {"allow_nan": True, "can_use_val_data": self._get_model_params()["fine_tune"]}
     def score_and_cache_oof(
         self,

{autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/chronos/pipeline/base.py RENAMED Viewed

@@ -2,12 +2,15 @@
 from enum import Enum
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 import torch
 from .utils import left_pad_and_stack_1D
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel
 class ForecastType(Enum):
     SAMPLES = "samples"
@@ -36,6 +39,16 @@ class BaseChronosPipeline(metaclass=PipelineRegistry):
         "float64": torch.float64,
     }
+    def __init__(self, inner_model: "PreTrainedModel"):
+        """
+        Parameters
+        ----------
+        inner_model : PreTrainedModel
+            A hugging-face transformers PreTrainedModel, e.g., T5ForConditionalGeneration
+        """
+        # for easy access to the inner HF-style model
+        self.inner_model = inner_model
     def _prepare_and_validate_context(self, context: Union[torch.Tensor, List[torch.Tensor]]):
         if isinstance(context, list):
             context = left_pad_and_stack_1D(context)

{autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/chronos/pipeline/chronos.py RENAMED Viewed

@@ -65,9 +65,12 @@ class ChronosTokenizer:
     which concrete classes must implement.
     """
-    def input_transform(self, context: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, Any]:
+    def context_input_transform(
+        self,
+        context: torch.Tensor,
+    ) -> Tuple:
         """
-        Turn a batch of time series into token IDs, attention map, and scale.
+        Turn a batch of time series into token IDs, attention mask, and tokenizer_state.
         Parameters
         ----------
@@ -87,9 +90,40 @@ class ChronosTokenizer:
             which input observations are not ``torch.nan`` (i.e. not
             missing nor padding).
         tokenizer_state
-            An object that will be passed to ``output_transform``.
-            Contains the relevant context to decode output samples into
-            real values, such as location and scale parameters.
+            An object that can be passed to ``label_input_transform``
+            and ``output_transform``. Contains the relevant information
+            to decode output samples into real values,
+            such as location and scale parameters.
+        """
+        raise NotImplementedError()
+    def label_input_transform(self, label: torch.Tensor, tokenizer_state: Any) -> Tuple:
+        """
+        Turn a batch of label slices of time series into token IDs and attention mask
+        using the ``tokenizer_state`` provided by ``context_input_transform``.
+        Parameters
+        ----------
+        label
+            A tensor shaped (batch_size, time_length), containing the
+            timeseries label, i.e., the ground-truth future values.
+        tokenizer_state
+            An object returned by ``context_input_transform`` containing
+            relevant information to preprocess data, such as location and
+            scale. The nature of this depends on the specific tokenizer.
+            This is used for tokenizing the label, in order to use the same
+            scaling used to tokenize the context.
+        Returns
+        -------
+        token_ids
+            A tensor of integers, shaped (batch_size, time_length + 1)
+            if ``config.use_eos_token`` and (batch_size, time_length)
+            otherwise, containing token IDs for the input series.
+        attention_mask
+            A boolean tensor, same shape as ``token_ids``, indicating
+            which input observations are not ``torch.nan`` (i.e. not
+            missing nor padding).
         """
         raise NotImplementedError()
@@ -117,6 +151,11 @@ class ChronosTokenizer:
 class MeanScaleUniformBins(ChronosTokenizer):
+    """
+    A tokenizer that performs mean scaling and then quantizes the scaled time series into
+    uniformly-spaced bins between some bounds on the real line.
+    """
     def __init__(self, low_limit: float, high_limit: float, config: ChronosConfig) -> None:
         self.config = config
         self.centers = torch.linspace(
@@ -132,15 +171,15 @@ class MeanScaleUniformBins(ChronosTokenizer):
             )
         )
-    def input_transform(self, context: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        batch_size, length = context.shape
+    def _input_transform(
+        self, context: torch.Tensor, scale: Optional[torch.Tensor] = None
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        attention_mask = ~torch.isnan(context)
-        if length > self.config.context_length:
-            context = context[..., -self.config.context_length :]
+        if scale is None:
+            scale = torch.nansum(torch.abs(context) * attention_mask, dim=-1) / torch.nansum(attention_mask, dim=-1)
+            scale[~(scale > 0)] = 1.0
-        attention_mask = ~torch.isnan(context)
-        scale = torch.nansum(torch.abs(context) * attention_mask, dim=-1) / torch.nansum(attention_mask, dim=-1)
-        scale[~(scale > 0)] = 1.0
         scaled_context = context / scale.unsqueeze(dim=-1)
         token_ids = (
             torch.bucketize(
@@ -153,15 +192,42 @@ class MeanScaleUniformBins(ChronosTokenizer):
             + self.config.n_special_tokens
         )
         token_ids[~attention_mask] = self.config.pad_token_id
+        token_ids.clamp_(0, self.config.n_tokens - 1)
-        if self.config.use_eos_token:
-            eos_tokens = torch.full((batch_size, 1), fill_value=self.config.eos_token_id)
-            token_ids = torch.concat((token_ids, eos_tokens), dim=1)
-            eos_mask = torch.full((batch_size, 1), fill_value=True)
-            attention_mask = torch.concat((attention_mask, eos_mask), dim=1)
+        return token_ids, attention_mask, scale
+    def _append_eos_token(
+        self, token_ids: torch.Tensor, attention_mask: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        batch_size = token_ids.shape[0]
+        eos_tokens = torch.full((batch_size, 1), fill_value=self.config.eos_token_id)
+        token_ids = torch.concat((token_ids, eos_tokens), dim=1)
+        eos_mask = torch.full((batch_size, 1), fill_value=True)
+        attention_mask = torch.concat((attention_mask, eos_mask), dim=1)
+        return token_ids, attention_mask
+    def context_input_transform(self, context: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        length = context.shape[-1]
+        if length > self.config.context_length:
+            context = context[..., -self.config.context_length :]
+        token_ids, attention_mask, scale = self._input_transform(context=context)
+        if self.config.use_eos_token and self.config.model_type == "seq2seq":
+            token_ids, attention_mask = self._append_eos_token(token_ids=token_ids, attention_mask=attention_mask)
         return token_ids, attention_mask, scale
+    def label_input_transform(self, label: torch.Tensor, scale: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        token_ids, attention_mask, _ = self._input_transform(context=label, scale=scale)
+        if self.config.use_eos_token:
+            token_ids, attention_mask = self._append_eos_token(token_ids=token_ids, attention_mask=attention_mask)
+        return token_ids, attention_mask
     def output_transform(self, samples: torch.Tensor, scale: torch.Tensor) -> torch.Tensor:
         scale_unsqueezed = scale.unsqueeze(-1).unsqueeze(-1)
         indices = torch.clamp(
@@ -302,6 +368,7 @@ class ChronosPipeline(BaseChronosPipeline):
     forecast_type: ForecastType = ForecastType.SAMPLES
     def __init__(self, tokenizer, model):
+        super().__init__(inner_model=model.model)
         self.tokenizer = tokenizer
         self.model = model
@@ -330,7 +397,7 @@ class ChronosPipeline(BaseChronosPipeline):
             provided, and the extra 1 is for EOS.
         """
         context = self._prepare_and_validate_context(context=context)
-        token_ids, attention_mask, tokenizer_state = self.tokenizer.input_transform(context)
+        token_ids, attention_mask, tokenizer_state = self.tokenizer.context_input_transform(context)
         embeddings = self.model.encode(
             input_ids=token_ids.to(self.model.device),
             attention_mask=attention_mask.to(self.model.device),
@@ -402,7 +469,7 @@ class ChronosPipeline(BaseChronosPipeline):
         remaining = prediction_length
         while remaining > 0:
-            token_ids, attention_mask, scale = self.tokenizer.input_transform(context)
+            token_ids, attention_mask, scale = self.tokenizer.context_input_transform(context)
             samples = self.model(
                 token_ids.to(self.model.device),
                 attention_mask.to(self.model.device),

{autogluon.timeseries-1.1.2b20241113 → autogluon.timeseries-1.1.2b20241114}/src/autogluon/timeseries/models/chronos/pipeline/chronos_bolt.py RENAMED Viewed

@@ -289,7 +289,7 @@ class ChronosBoltModelForForecasting(T5PreTrainedModel):
             # normalize target
             target, _ = self.instance_norm(target, loc_scale)
             target = target.unsqueeze(1)  # type: ignore
-            assert self.chronos_config.prediction_length == target.shape[-1]
+            assert self.chronos_config.prediction_length >= target.shape[-1]
             target = target.to(quantile_preds.device)
             target_mask = (
@@ -297,6 +297,12 @@ class ChronosBoltModelForForecasting(T5PreTrainedModel):
             )
             target[~target_mask] = 0.0
+            # pad target and target_mask if they are shorter than model's prediction_length
+            if self.chronos_config.prediction_length > target.shape[-1]:
+                padding_shape = (*target.shape[:-1], self.chronos_config.prediction_length - target.shape[-1])
+                target = torch.cat([target, torch.zeros(padding_shape).to(target)], dim=-1)
+                target_mask = torch.cat([target_mask, torch.zeros(padding_shape).to(target_mask)], dim=-1)
             loss = (
                 2
                 * torch.abs(
@@ -373,6 +379,7 @@ class ChronosBoltPipeline(BaseChronosPipeline):
     _aliases = ["PatchedT5Pipeline"]
     def __init__(self, model: ChronosBoltModelForForecasting):
+        super().__init__(inner_model=model)
         self.model = model
     @property

autogluon.timeseries 1.1.2b20241113__tar.gz → 1.1.2b20241114__tar.gz

autogluon.timeseries 1.1.2b20241113tar.gz → 1.1.2b20241114tar.gz