PyPI - PVNet_summation - Versions diffs - 1.1.2__py3-none-any.whl - Mend

PVNet_summation 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

pvnet_summation/__init__.py +1 -0
pvnet_summation/data/__init__.py +2 -0
pvnet_summation/data/datamodule.py +312 -0
pvnet_summation/load_model.py +74 -0
pvnet_summation/models/__init__.py +3 -0
pvnet_summation/models/base_model.py +356 -0
pvnet_summation/models/dense_model.py +75 -0
pvnet_summation/models/horizon_dense_model.py +171 -0
pvnet_summation/optimizers.py +219 -0
pvnet_summation/training/__init__.py +3 -0
pvnet_summation/training/lightning_module.py +278 -0
pvnet_summation/training/plots.py +91 -0
pvnet_summation/training/train.py +216 -0
pvnet_summation/utils.py +132 -0
pvnet_summation-1.1.2.dist-info/METADATA +100 -0
pvnet_summation-1.1.2.dist-info/RECORD +19 -0
pvnet_summation-1.1.2.dist-info/WHEEL +5 -0
pvnet_summation-1.1.2.dist-info/licenses/LICENSE +21 -0
pvnet_summation-1.1.2.dist-info/top_level.txt +1 -0

pvnet_summation/models/base_model.py ADDED Viewed

@@ -0,0 +1,356 @@
+"""Base model for all PVNet submodels"""
+import logging
+import os
+import shutil
+import time
+from importlib.metadata import version
+from math import prod
+from pathlib import Path
+import hydra
+import torch
+import yaml
+from huggingface_hub import ModelCard, ModelCardData, snapshot_download
+from huggingface_hub.hf_api import HfApi
+from safetensors.torch import load_file, save_file
+from pvnet_summation.data.datamodule import SumTensorBatch
+from pvnet_summation.utils import (
+    DATAMODULE_CONFIG_NAME,
+    FULL_CONFIG_NAME,
+    MODEL_CARD_NAME,
+    MODEL_CONFIG_NAME,
+    PYTORCH_WEIGHTS_NAME,
+)
+def santize_datamodule(config: dict) -> dict:
+    """Create new datamodule config which only keeps the details required for inference"""
+    return {"pvnet_model": config["pvnet_model"]}
+def download_from_hf(
+    repo_id: str,
+    filename: str | list[str],
+    revision: str,
+    cache_dir: str | None,
+    force_download: bool,
+    max_retries: int = 5,
+    wait_time: int = 10,
+) -> str | list[str]:
+    """Tries to download one or more files from HuggingFace up to max_retries times.
+    Args:
+        repo_id: HuggingFace repo ID
+        filename: Name of the file(s) to download
+        revision: Specific model revision
+        cache_dir: Cache directory
+        force_download: Whether to force a new download
+        max_retries: Maximum number of retry attempts
+        wait_time: Wait time (in seconds) before retrying
+    Returns:
+        The local file path of the downloaded file(s)
+    """
+    for attempt in range(1, max_retries + 1):
+        try:
+            save_dir = snapshot_download(
+                repo_id=repo_id,
+                allow_patterns=filename,
+                revision=revision,
+                cache_dir=cache_dir,
+                force_download=force_download,
+            )
+            if isinstance(filename, list):
+                return [f"{save_dir}/{f}" for f in filename]
+            else:
+                return f"{save_dir}/{filename}"
+        except Exception as e:
+            if attempt == max_retries:
+                raise Exception(
+                    f"Failed to download {filename} from {repo_id} after {max_retries} attempts."
+                ) from e
+            logging.warning(
+                (
+                    f"Attempt {attempt}/{max_retries} failed to download {filename} "
+                    f"from {repo_id}. Retrying in {wait_time} seconds..."
+                )
+            )
+            time.sleep(wait_time)
+class HuggingfaceMixin:
+    """Mixin for saving and loading model to and from huggingface"""
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_id: str,
+        revision: str,
+        cache_dir: str | None = None,
+        force_download: bool = False,
+        strict: bool = True,
+    ) -> "BaseModel":
+        """Load Pytorch pretrained weights and return the loaded model."""
+        if os.path.isdir(model_id):
+            print("Loading model from local directory")
+            model_file = f"{model_id}/{PYTORCH_WEIGHTS_NAME}"
+            config_file = f"{model_id}/{MODEL_CONFIG_NAME}"
+        else:
+            print("Loading model from huggingface repo")
+            model_file, config_file = download_from_hf(
+                repo_id=model_id,
+                filename=[PYTORCH_WEIGHTS_NAME, MODEL_CONFIG_NAME],
+                revision=revision,
+                cache_dir=cache_dir,
+                force_download=force_download,
+                max_retries=5,
+                wait_time=10,
+            )
+        with open(config_file, "r") as f:
+            model = hydra.utils.instantiate(yaml.safe_load(f))
+        state_dict = load_file(model_file)
+        model.load_state_dict(state_dict, strict=strict)  # type: ignore
+        model.eval()  # type: ignore
+        return model
+    @classmethod
+    def get_datamodule_config(
+        cls,
+        model_id: str,
+        revision: str,
+        cache_dir: str | None = None,
+        force_download: bool = False,
+    ) -> str:
+        """Load data config file."""
+        if os.path.isdir(model_id):
+            print("Loading datamodule config from local directory")
+            datamodule_config_file = os.path.join(model_id, DATAMODULE_CONFIG_NAME)
+        else:
+            print("Loading datamodule config from huggingface repo")
+            datamodule_config_file = download_from_hf(
+                repo_id=model_id,
+                filename=DATAMODULE_CONFIG_NAME,
+                revision=revision,
+                cache_dir=cache_dir,
+                force_download=force_download,
+                max_retries=5,
+                wait_time=10,
+            )
+        return datamodule_config_file
+    def _save_model_weights(self, save_directory: str) -> None:
+        """Save weights from a Pytorch model to a local directory."""
+        save_file(self.state_dict(), f"{save_directory}/{PYTORCH_WEIGHTS_NAME}")
+    def save_pretrained(
+        self,
+        save_directory: str,
+        model_config: dict,
+        wandb_repo: str,
+        wandb_id: str,
+        card_template_path: str,
+        datamodule_config_path,
+        experiment_config_path: str | None = None,
+        hf_repo_id: str | None = None,
+        push_to_hub: bool = False,
+    ) -> None:
+        """Save weights in local directory or upload to huggingface hub.
+        Args:
+            save_directory:
+                Path to directory in which the model weights and configuration will be saved.
+            model_config (`dict`):
+                Model configuration specified as a key/value dictionary.
+            wandb_repo: Identifier of the repo on wandb.
+            wandb_id: Identifier of the model on wandb.
+            datamodule_config_path:
+                The path to the datamodule config.
+            card_template_path: Path to the HuggingFace model card template. Defaults to card in
+                PVNet library if set to None.
+            experiment_config_path:
+                The path to the full experimental config.
+            hf_repo_id:
+                ID of your repository on the Hub. Used only if `push_to_hub=True`. Will default to
+                the folder name if not provided.
+            push_to_hub (`bool`, *optional*, defaults to `False`):
+                Whether or not to push your model to the HuggingFace Hub after saving it.
+        """
+        save_directory = Path(save_directory)
+        save_directory.mkdir(parents=True, exist_ok=True)
+        # Save model weights/files
+        self._save_model_weights(save_directory)
+        # Save the model config
+        if isinstance(model_config, dict):
+            with open(save_directory / MODEL_CONFIG_NAME, "w") as outfile:
+                yaml.dump(model_config, outfile, sort_keys=False, default_flow_style=False)
+        # Sanitize and save the datamodule config
+        with open(datamodule_config_path) as cfg:
+            datamodule_config = yaml.load(cfg, Loader=yaml.FullLoader)
+        datamodule_config = santize_datamodule(datamodule_config)
+        with open(save_directory / DATAMODULE_CONFIG_NAME, "w") as outfile:
+            yaml.dump(datamodule_config, outfile, sort_keys=False, default_flow_style=False)
+        # Save the full experimental config
+        if experiment_config_path is not None:
+            shutil.copyfile(experiment_config_path, save_directory / FULL_CONFIG_NAME)
+        card = self.create_hugging_face_model_card(card_template_path, wandb_repo, wandb_id)
+        (save_directory / MODEL_CARD_NAME).write_text(str(card))
+        if push_to_hub:
+            api = HfApi()
+            api.upload_folder(
+                repo_id=hf_repo_id,
+                folder_path=save_directory,
+                repo_type="model",
+                commit_message=f"Upload model - {wandb_id}",
+            )
+            # Print the most recent commit hash
+            c = api.list_repo_commits(repo_id=hf_repo_id, repo_type="model")[0]
+            message = (
+                f"The latest commit is now: \n"
+                f"    date: {c.created_at} \n"
+                f"    commit hash: {c.commit_id}\n"
+                f"    by: {c.authors}\n"
+                f"    title: {c.title}\n"
+            )
+            print(message)
+    @staticmethod
+    def create_hugging_face_model_card(
+        card_template_path: str,
+        wandb_repo: str,
+        wandb_id: str,
+    ) -> ModelCard:
+        """
+        Creates Hugging Face model card
+        Args:
+            card_template_path: Path to the HuggingFace model card template
+            wandb_repo: Identifier of the repo on wandb.
+            wandb_id: Identifier of the model on wandb.
+        Returns:
+            card: ModelCard - Hugging Face model card object
+        """
+        # Creating and saving model card.
+        card_data = ModelCardData(language="en", license="mit", library_name="pytorch")
+        link = f"https://wandb.ai/{wandb_repo}/runs/{wandb_id}"
+        wandb_link = f" - [{link}]({link})\n"
+        # Find package versions for OCF packages
+        packages_to_display = ["pvnet_summation", "ocf-data-sampler"]
+        packages_and_versions = {package: version(package) for package in packages_to_display}
+        package_versions_markdown = ""
+        for package, v in packages_and_versions.items():
+            package_versions_markdown += f" - {package}=={v}\n"
+        return ModelCard.from_template(
+            card_data,
+            template_path=card_template_path,
+            wandb_link=wandb_link,
+            package_versions=package_versions_markdown,
+        )
+class BaseModel(torch.nn.Module, HuggingfaceMixin):
+    """Abstract base class for PVNet-summation submodels"""
+    def __init__(
+        self,
+        output_quantiles: list[float] | None,
+        num_input_locations: int,
+        input_quantiles: list[float] | None,
+        history_minutes: int,
+        forecast_minutes: int,
+        interval_minutes: int,
+    ):
+        """Abtstract base class for PVNet-summation submodels.
+        """
+        super().__init__()
+        if (output_quantiles is not None):
+            if output_quantiles != sorted(output_quantiles):
+                raise ValueError("output_quantiles should be in ascending order")
+            if 0.5 not in output_quantiles:
+                raise ValueError("Quantiles must include 0.5")
+        self.output_quantiles = output_quantiles
+        self.num_input_locations = num_input_locations
+        self.input_quantiles = input_quantiles
+        self.history_minutes = history_minutes
+        self.forecast_minutes = forecast_minutes
+        self.interval_minutes = interval_minutes
+        # Number of timestemps for 30 minutely data
+        self.history_len = history_minutes // interval_minutes
+        self.forecast_len = (forecast_minutes) // interval_minutes
+        # Store whether the model should use quantile regression or simply predict the mean
+        self.use_quantile_regression = self.output_quantiles is not None
+        # Also store the final output shape
+        if self.use_quantile_regression:
+            self.output_shape = (self.forecast_len, len(input_quantiles))
+        else:
+            self.output_shape = (self.forecast_len,)
+        # Store the number of output features and that the model should predict for
+        self.num_output_features = prod(self.output_shape)
+        # Store the expected input shape
+        if input_quantiles is None:
+            self.input_shape = (self.num_input_locations, self.forecast_len)
+        else:
+            self.input_shape = (self.num_input_locations, self.forecast_len, len(input_quantiles))
+    def _quantiles_to_prediction(self, y_quantiles: torch.Tensor) -> torch.Tensor:
+        """Convert network prediction into a point prediction.
+        Args:
+            y_quantiles: Quantile prediction of network
+        Returns:
+            torch.Tensor: Point prediction
+        """
+        # y_quantiles Shape: [batch_size, seq_length, num_quantiles]
+        idx = self.output_quantiles.index(0.5)
+        return y_quantiles[..., idx]
+    def sum_of_locations(self, x: SumTensorBatch) -> torch.Tensor:
+        """Compute the sum of the location-level predictions"""
+        if self.input_quantiles is None:
+            y_hat = x["pvnet_outputs"]
+        else:
+            idx = self.input_quantiles.index(0.5)
+            y_hat = x["pvnet_outputs"][..., idx]
+        return (y_hat * x["relative_capacity"].unsqueeze(-1)).sum(dim=1)

pvnet_summation/models/dense_model.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""Simple model which only uses outputs of PVNet for all GSPs"""
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+from pvnet_summation.data.datamodule import SumTensorBatch
+from pvnet_summation.models.base_model import BaseModel
+class DenseModel(BaseModel):
+    """Neural network architecture based on naive dense layers
+    This model flattens all the features into a 1D vector before feeding them into the sub network
+    """
+    def __init__(
+        self,
+        output_quantiles: list[float] | None,
+        num_input_locations: int,
+        input_quantiles: list[float] | None,
+        history_minutes: int,
+        forecast_minutes: int,
+        interval_minutes: int,
+        output_network: torch.nn.Module,
+        predict_difference_from_sum: bool = False,
+    ):
+        """Neural network architecture based on naive dense layers
+        """
+        super().__init__(
+            output_quantiles,
+            num_input_locations,
+            input_quantiles,
+            history_minutes,
+            forecast_minutes,
+            interval_minutes,
+        )
+        self.predict_difference_from_sum = predict_difference_from_sum
+        self.model = output_network(
+            in_features=np.prod(self.input_shape),
+            out_features=self.num_output_features,
+        )
+        # Add linear layer if predicting difference from sum
+        # This allows difference to be positive or negative
+        if predict_difference_from_sum:
+            self.model = nn.Sequential(
+                self.model,
+                nn.Linear(self.num_output_features, self.num_output_features),
+            )
+    def forward(self, x: SumTensorBatch) -> torch.Tensor:
+        """Run model forward"""
+        x_in = torch.flatten(x["pvnet_outputs"], start_dim=1)
+        out = self.model(x_in)
+        if self.use_quantile_regression:
+            # Shape: [batch_size, seq_length * num_quantiles]
+            out = out.reshape(out.shape[0], self.forecast_len, len(self.output_quantiles))
+        if self.predict_difference_from_sum:
+            loc_sum = self.sum_of_locations(x)
+            if self.use_quantile_regression:
+                loc_sum = loc_sum.unsqueeze(-1)
+            out = F.leaky_relu(loc_sum + out)
+        return out

pvnet_summation/models/horizon_dense_model.py ADDED Viewed

@@ -0,0 +1,171 @@
+"""Neural network architecture based on dense layers applied independently at each horizon"""
+import torch
+import torch.nn.functional as F
+from torch import nn
+from pvnet_summation.data.datamodule import SumTensorBatch
+from pvnet_summation.models.base_model import BaseModel
+class HorizonDenseModel(BaseModel):
+    """Neural network architecture based on dense layers applied independently at each horizon.
+    """
+    def __init__(
+        self,
+        output_quantiles: list[float] | None,
+        num_input_locations: int,
+        input_quantiles: list[float] | None,
+        history_minutes: int,
+        forecast_minutes: int,
+        interval_minutes: int,
+        output_network: torch.nn.Module,
+        predict_difference_from_sum: bool = False,
+        use_horizon_encoding: bool = False,
+        use_solar_position: bool = False,
+        force_non_crossing: bool = False,
+        beta: float = 3,
+    ):
+        """Neural network architecture based on dense layers applied independently at each horizon.
+        Args:
+            output_quantiles: A list of float (0.0, 1.0) quantiles to predict values for. If set to
+                None the output is a single value.
+            num_input_locations: The number of input locations (e.g. number of GSPs)
+            input_quantiles: A list of float (0.0, 1.0) quantiles which PVNet predicts for. If set
+                to None we assume PVNet predicts a single value
+            history_minutes (int): Length of the GSP history period in minutes
+            forecast_minutes (int): Length of the GSP forecast period in minutes
+            interval_minutes: The interval in minutes between each timestep in the data
+            output_network: A partially instantiated pytorch Module class used top predict the
+                outturn at each horizon.
+            predict_difference_from_sum: Whether to predict the difference from the sum of locations
+                else the total is predicted directly
+            use_horizon_encoding: Whether to use the forecast horizon as an input feature
+            use_solar_position: Whether to use the solar coordinates as input features
+            force_non_crossing: If predicting quantile, whether to predict the quantiles other than
+                the median by predicting the distance between them and integrating.
+            beta: If using force_non_crossing, the beta value to use in the softplus activation
+        """
+        super().__init__(
+            output_quantiles,
+            num_input_locations,
+            input_quantiles,
+            history_minutes,
+            forecast_minutes,
+            interval_minutes,
+        )
+        if force_non_crossing:
+            assert self.use_quantile_regression
+        self.use_horizon_encoding = use_horizon_encoding
+        self.predict_difference_from_sum = predict_difference_from_sum
+        self.force_non_crossing = force_non_crossing
+        self.beta = beta
+        self.use_solar_position = use_solar_position
+        in_features = 1 if self.input_quantiles is None else len(self.input_quantiles)
+        in_features = in_features * self.num_input_locations
+        if use_horizon_encoding:
+            in_features += 1
+        if use_solar_position:
+            in_features += 2
+        out_features = (len(self.output_quantiles) if self.use_quantile_regression else 1)
+        model = output_network(in_features=in_features, out_features=out_features)
+        # Add linear layer if predicting difference from sum
+        # - This allows difference to be positive or negative
+        # Also add linear layer if we are applying force_non_crossing since a softplus will be used
+        if predict_difference_from_sum or force_non_crossing:
+            model = nn.Sequential(
+                model,
+                nn.Linear(out_features, out_features),
+            )
+        self.model = model
+    def forward(self, x: SumTensorBatch) -> torch.Tensor:
+        """Run model forward"""
+        # x["pvnet_outputs"] has shape [batch, locs, horizon, (quantile)]
+        batch_size = x["pvnet_outputs"].shape[0]
+        x_in = torch.swapaxes(x["pvnet_outputs"], 1, 2) # -> [batch, horizon, locs, (quantile)]
+        x_in = torch.flatten(x_in, start_dim=2) # -> [batch, horizon, locs*(quantile)]
+        if self.use_horizon_encoding:
+            horizon_encoding = torch.linspace(
+                start=0,
+                end=1,
+                steps=self.forecast_len,
+                device=x_in.device,
+                dtype=x_in.dtype,
+            )
+            horizon_encoding = horizon_encoding.tile((batch_size,1)).unsqueeze(-1)
+            x_in = torch.cat([x_in, horizon_encoding], dim=2)
+        if self.use_solar_position:
+            x_in = torch.cat(
+                [x_in, x["azimuth"].unsqueeze(-1), x["elevation"].unsqueeze(-1)],
+                dim=2
+            )
+        x_in = torch.flatten(x_in, start_dim=0, end_dim=1) # -> [batch*horizon, features]
+        out = self.model(x_in)
+        out = out.view(batch_size, *self.output_shape) # -> [batch, horizon, (quantile)]
+        if self.force_non_crossing:
+            # Get the prediction of the median
+            idx = self.output_quantiles.index(0.5)
+            if self.predict_difference_from_sum:
+                loc_sum = self.sum_of_locations(x).unsqueeze(-1)
+                y_median = loc_sum + out[..., idx:idx+1]
+            else:
+                y_median = out[..., idx:idx+1]
+            # These are the differences between the remaining quantiles
+            dy_below = F.softplus(out[..., :idx], beta=self.beta)
+            dy_above = F.softplus(out[..., idx+1:], beta=self.beta)
+            # Find the absolute value of the quantile predictions from the differences
+            y_below = []
+            y = y_median
+            for i in range(dy_below.shape[-1]):
+                # We detach y to avoid the gradients caused by errors from one quantile
+                # prediction  flowing back to affect the other quantile predictions.
+                # For example if the 0.9 quantile prediction was too low, we don't want the
+                # gradient to pull the 0.5 quantile prediction higher to compensate.
+                y = y.detach() - dy_below[..., i:i+1]
+                y_below.append(y)
+            y_above = []
+            y = y_median
+            for i in range(dy_above.shape[-1]):
+                y = y.detach() + dy_above[..., i:i+1]
+                y_above.append(y)
+            # Compile the quantile predictions in the correct order
+            out = torch.cat(y_below[::-1] + [y_median,] + y_above, dim=-1)
+        else:
+            if self.predict_difference_from_sum:
+                loc_sum = self.sum_of_locations(x)
+                if self.use_quantile_regression:
+                    loc_sum = loc_sum.unsqueeze(-1)
+                out = loc_sum + out
+        # Use leaky relu as a soft clip to 0
+        return F.leaky_relu(out, negative_slope=0.01)