PyPI - autogluon.timeseries - Versions diffs - 1.4.1b20251016__py3-none-any.whl → 1.4.1b20251218__py3-none-any.whl - Mend - Supply Chain Defender

autogluon.timeseries 1.4.1b20251016py3-none-any.whl → 1.4.1b20251218py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of autogluon.timeseries might be problematic. Click here for more details.

Files changed (90) hide show

autogluon/timeseries/models/toto/hf_pretrained_model.py CHANGED Viewed

@@ -1,5 +1,7 @@
+import json
 import logging
-from typing import Optional
+import os
+from pathlib import Path
 from transformers import PretrainedConfig, PreTrainedModel
@@ -15,8 +17,8 @@ class TotoConfig(PretrainedConfig):
         embed_dim: int = 768,
         num_heads: int = 12,
         num_layers: int = 12,
-        output_distribution_classes: Optional[list[str]] = None,
-        output_distribution_kwargs: Optional[dict] = None,
+        output_distribution_classes: list[str] | None = None,
+        output_distribution_kwargs: dict | None = None,
         patch_size: int = 64,
         scale_factor_exponent: float = 10.0,
         spacewise_every_n_layers: int = 12,
@@ -69,12 +71,10 @@ class TotoPretrainedModel(PreTrainedModel):
             scale_factor_exponent=config.scale_factor_exponent,
             **getattr(config, "extra_kwargs", {}),
         )
-        self._register_load_state_dict_pre_hook(self._remap_state_dict_keys_hook)
         self.post_init()
-    def _remap_state_dict_keys_hook(
-        self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
-    ):
+    @staticmethod
+    def _remap_state_dict_keys(state_dict):
         remap = {
             "mlp.0.w12.weight": "mlp.0.weight",
             "mlp.0.w12.bias": "mlp.0.bias",
@@ -82,6 +82,7 @@ class TotoPretrainedModel(PreTrainedModel):
             "mlp.0.w3.bias": "mlp.2.bias",
         }
+        new_state = {}
         keys_to_remap = []
         for key in list(state_dict.keys()):
             for old, new in remap.items():
@@ -90,11 +91,81 @@ class TotoPretrainedModel(PreTrainedModel):
                     keys_to_remap.append((key, new_key))
                     break
+        new_state = state_dict.copy()
         for old_key, new_key in keys_to_remap:
-            state_dict[new_key] = state_dict.pop(old_key)
+            new_state[new_key] = new_state.pop(old_key)
+        return new_state
     @classmethod
-    def from_pretrained(cls, model_name_or_path, config=None, torch_dtype=None, device_map=None, **kwargs):
+    def load_from_checkpoint(
+        cls,
+        checkpoint_path,
+        device_map: str = "cpu",
+        strict=True,
+        **model_kwargs,
+    ):
+        """
+        Custom checkpoint loading. Used to load a local
+        safetensors checkpoint with an optional config.json file.
+        """
+        import safetensors.torch as safetorch
+        if os.path.isdir(checkpoint_path):
+            safetensors_file = os.path.join(checkpoint_path, "model.safetensors")
+        else:
+            safetensors_file = checkpoint_path
+        if os.path.exists(safetensors_file):
+            model_state = safetorch.load_file(safetensors_file, device=device_map)
+        else:
+            raise FileNotFoundError(f"Model checkpoint not found at: {safetensors_file}")
+        # Load configuration from config.json if it exists.
+        config_file = os.path.join(checkpoint_path, "config.json")
+        config = {}
+        if os.path.exists(config_file):
+            with open(config_file, "r") as f:
+                config = json.load(f)
+        # Merge any extra kwargs into the configuration.
+        config.update(model_kwargs)
+        remapped_state_dict = cls._remap_state_dict_keys(model_state)
+        instance = cls(**config)
+        # Filter out unexpected keys
+        filtered_remapped_state_dict = {
+            k: v
+            for k, v in remapped_state_dict.items()
+            if k in instance.state_dict() and not k.endswith("rotary_emb.freqs")
+        }
+        instance.load_state_dict(filtered_remapped_state_dict, strict=strict)
+        instance.to(device_map)  # type: ignore
+        return instance
+    @classmethod
+    def from_pretrained(
+        cls,
+        *,
+        model_id: str,
+        revision: str | None = None,
+        cache_dir: Path | str | None = None,
+        force_download: bool = False,
+        proxies: dict | None = None,
+        resume_download: bool | None = None,
+        local_files_only: bool = False,
+        token: str | bool | None = None,
+        device_map: str = "cpu",
+        strict: bool = False,
+        **model_kwargs,
+    ):
+        """Load Pytorch pretrained weights and return the loaded model."""
+        from huggingface_hub import constants, hf_hub_download
         transformers_logger = logging.getLogger("transformers.modeling_utils")
         original_level = transformers_logger.level
@@ -103,13 +174,23 @@ class TotoPretrainedModel(PreTrainedModel):
             # remapping hook is only called after the initial model loading.
             transformers_logger.setLevel(logging.ERROR)
-            # Transformers follows a different load path that does not call load_state_dict hooks when
-            # loading with explicit device maps. Here, we first load the model with no device maps and
-            # move it.
-            model = super().from_pretrained(model_name_or_path, config=config, torch_dtype=torch_dtype, **kwargs)
-            if device_map is not None:
-                model = model.to(device_map)
+            if os.path.isdir(model_id):
+                print("Loading weights from local directory")
+                model_file = os.path.join(model_id, constants.SAFETENSORS_SINGLE_FILE)
+                model = cls.load_from_checkpoint(model_file, device_map, strict, **model_kwargs)
+            else:
+                model_file = hf_hub_download(
+                    repo_id=model_id,
+                    filename=constants.SAFETENSORS_SINGLE_FILE,
+                    revision=revision,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    proxies=proxies,
+                    resume_download=resume_download,
+                    token=token,
+                    local_files_only=local_files_only,
+                )
+                model = cls.load_from_checkpoint(model_file, device_map, strict, **model_kwargs)
         finally:
             transformers_logger.setLevel(original_level)

autogluon/timeseries/models/toto/model.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 import os
-from typing import TYPE_CHECKING, Any, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Any, Sequence
 import numpy as np
 import pandas as pd
@@ -25,9 +25,11 @@ class TotoModel(AbstractTimeSeriesModel):
     architecture that autoregressively outputs parametric distribution forecasts. More details can be found on
     `Hugging Face <https://huggingface.co/Datadog/Toto-Open-Base-1.0>`_ and `GitHub <https://github.com/DataDog/toto>`_.
-    The AutoGluon implementation of Toto is on a port of the original implementation. It is optimized for easy maintenance
-    with the rest of the AutoGluon model zoo, and does not feature some important optimizations such as xformers and flash-attention
-    available in the original model repository. The AutoGluon implementation of Toto requires a CUDA-compatible GPU.
+    The AutoGluon implementation of Toto is on a port of the original implementation. AutoGluon supports Toto for
+    **inference only**, i.e., the model will not be trained or fine-tuned on the provided training data. Toto is optimized
+    for easy maintenance with the rest of the AutoGluon model zoo, and does not feature some important optimizations such
+    as xformers and flash-attention available in the original model repository. The AutoGluon implementation of Toto
+    requires a CUDA-compatible GPU.
     References
     ----------
@@ -59,12 +61,12 @@ class TotoModel(AbstractTimeSeriesModel):
     def __init__(
         self,
-        path: Optional[str] = None,
-        name: Optional[str] = None,
-        hyperparameters: Optional[dict[str, Any]] = None,
-        freq: Optional[str] = None,
+        path: str | None = None,
+        name: str | None = None,
+        hyperparameters: dict[str, Any] | None = None,
+        freq: str | None = None,
         prediction_length: int = 1,
-        covariate_metadata: Optional[CovariateMetadata] = None,
+        covariate_metadata: CovariateMetadata | None = None,
         target: str = "target",
         quantile_levels: Sequence[float] = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
         eval_metric: Any = None,
@@ -85,9 +87,9 @@ class TotoModel(AbstractTimeSeriesModel):
             eval_metric=eval_metric,
         )
-        self._forecaster: Optional[TotoForecaster] = None
+        self._forecaster: TotoForecaster | None = None
-    def save(self, path: Optional[str] = None, verbose: bool = True) -> str:
+    def save(self, path: str | None = None, verbose: bool = True) -> str:
         forecaster = self._forecaster
         self._forecaster = None
         path = super().save(path=path, verbose=verbose)
@@ -108,7 +110,7 @@ class TotoModel(AbstractTimeSeriesModel):
         return torch.cuda.is_available()
-    def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, Union[int, float]]:
+    def get_minimum_resources(self, is_gpu_available: bool = False) -> dict[str, int | float]:
         return {"num_cpus": 1, "num_gpus": 1}
     def load_forecaster(self):
@@ -124,7 +126,7 @@ class TotoModel(AbstractTimeSeriesModel):
         hyperparameters = self.get_hyperparameters()
         pretrained_model = TotoPretrainedModel.from_pretrained(
-            self.model_path,
+            model_id=self.model_path,
             config=TotoConfig.from_pretrained(self.model_path),
             device_map=hyperparameters["device"],
         )
@@ -145,9 +147,21 @@ class TotoModel(AbstractTimeSeriesModel):
             "num_samples": 256,
             "device": "cuda",
             "context_length": 4096,
-            "compile_model": True,
+            "compile_model": False,
         }
+    def _get_sample_batch_size(self) -> int:
+        num_samples = self.get_hyperparameter("num_samples")
+        batch_size = num_samples
+        while batch_size > 32:
+            for factor in range(2, int(batch_size**0.5) + 1):
+                if batch_size % factor == 0:
+                    batch_size //= factor
+                    break
+            else:  # batch_size is prime
+                return batch_size
+        return batch_size
     @property
     def allowed_hyperparameters(self) -> list[str]:
         return super().allowed_hyperparameters + [
@@ -169,10 +183,10 @@ class TotoModel(AbstractTimeSeriesModel):
     def _fit(
         self,
         train_data: TimeSeriesDataFrame,
-        val_data: Optional[TimeSeriesDataFrame] = None,
-        time_limit: Optional[float] = None,
-        num_cpus: Optional[int] = None,
-        num_gpus: Optional[int] = None,
+        val_data: TimeSeriesDataFrame | None = None,
+        time_limit: float | None = None,
+        num_cpus: int | None = None,
+        num_gpus: int | None = None,
         verbosity: int = 2,
         **kwargs,
     ) -> None:
@@ -180,7 +194,7 @@ class TotoModel(AbstractTimeSeriesModel):
         self.load_forecaster()
     def _predict(
-        self, data: TimeSeriesDataFrame, known_covariates: Optional[TimeSeriesDataFrame] = None, **kwargs
+        self, data: TimeSeriesDataFrame, known_covariates: TimeSeriesDataFrame | None = None, **kwargs
     ) -> TimeSeriesDataFrame:
         import torch
@@ -196,6 +210,7 @@ class TotoModel(AbstractTimeSeriesModel):
         dataset = TotoInferenceDataset(
             target_df=data.fill_missing_values("auto"),
             max_context_length=hyperparameters["context_length"],
+            target_column=self.target,
         )
         loader = TotoDataLoader(
             dataset,
@@ -212,7 +227,7 @@ class TotoModel(AbstractTimeSeriesModel):
                     masked_timeseries,
                     prediction_length=self.prediction_length,
                     num_samples=hyperparameters["num_samples"],
-                    samples_per_batch=32,
+                    samples_per_batch=self._get_sample_batch_size(),
                 )
                 batch_means.append(forecast.mean.cpu().numpy())