PyPI - PVNet_summation - Versions diffs - 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

PVNet_summation 1.0.1py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of PVNet_summation might be problematic. Click here for more details.

Files changed (12) hide show

pvnet_summation/data/datamodule.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Pytorch lightning datamodules for loading pre-saved samples and predictions."""
+import os
 from glob import glob
 from typing import TypeAlias
@@ -7,16 +8,68 @@ import numpy as np
 import pandas as pd
 import torch
 from lightning.pytorch import LightningDataModule
-from ocf_data_sampler.load.gsp import open_gsp
-from ocf_data_sampler.numpy_sample.common_types import NumpyBatch, NumpySample
+from ocf_data_sampler.load.gsp import get_gsp_boundaries, open_gsp
+from ocf_data_sampler.numpy_sample.common_types import NumpyBatch
+from ocf_data_sampler.numpy_sample.sun_position import calculate_azimuth_and_elevation
+from ocf_data_sampler.select.geospatial import osgb_to_lon_lat
 from ocf_data_sampler.torch_datasets.datasets.pvnet_uk import PVNetUKConcurrentDataset
 from ocf_data_sampler.utils import minutes
-from torch.utils.data import DataLoader, Dataset, default_collate
+from torch.utils.data import DataLoader, Dataset, Subset, default_collate
 from typing_extensions import override
 SumNumpySample: TypeAlias = dict[str, np.ndarray | NumpyBatch]
 SumTensorBatch: TypeAlias = dict[str, torch.Tensor]
+def get_gb_centroid_lon_lat() -> tuple[float, float]:
+    """Get the longitude and latitude of the centroid of Great Britain"""
+    row = get_gsp_boundaries("20250109").loc[0]
+    x_osgb = row.x_osgb.item()
+    y_osgb = row.y_osgb.item()
+    return osgb_to_lon_lat(x_osgb, y_osgb)
+LON, LAT = get_gb_centroid_lon_lat()
+def construct_sample(
+    pvnet_inputs: NumpyBatch,
+    valid_times: pd.DatetimeIndex,
+    relative_capacities: np.ndarray,
+    target: np.ndarray | None,
+    last_outturn: float | None = None,
+) -> SumNumpySample:
+    """Construct an input sample for the summation model
+    Args:
+        pvnet_inputs: The PVNet batch for all GSPs
+        valid_times: An array of valid-times for the forecast
+        relative_capacities: Array of capacities of all GSPs normalised by the total capacity
+        target: The target national outturn. This is only needed during training.
+        last_outturn: The previous national outturn. This is only needed during training.
+    """
+    azimuth, elevation = calculate_azimuth_and_elevation(valid_times, LON, LAT)
+    sample = {
+        # NumpyBatch object with batch size = num_locations
+        "pvnet_inputs": pvnet_inputs,
+        # Shape: [time]
+        "valid_times": valid_times.values.astype(int),
+        # Shape: [num_locations]
+        "relative_capacity": relative_capacities,
+        # Shape: [time]
+        "azimuth": azimuth.astype(np.float32) / 360,
+        # Shape: [time]
+        "elevation": elevation.astype(np.float32) / 180 + 0.5,
+    }
+    if target is not None:
+        # Shape: [time]
+        sample["target"] = target
+    if last_outturn is not None:
+        # Shape: scalar
+        sample["last_outturn"] = last_outturn
+    return sample
 class StreamedDataset(PVNetUKConcurrentDataset):
     """A torch dataset for creating concurrent PVNet inputs and national targets."""
@@ -37,7 +90,7 @@ class StreamedDataset(PVNetUKConcurrentDataset):
         super().__init__(config_filename, start_time, end_time, gsp_ids=None)
         # Load and nornmalise the national GSP data to use as target values
-        national_gsp_data = (
+        self.national_gsp_data = (
             open_gsp(
                 zarr_path=self.config.input_data.gsp.zarr_path,
                 boundaries_version=self.config.input_data.gsp.boundaries_version
@@ -45,8 +98,6 @@ class StreamedDataset(PVNetUKConcurrentDataset):
             .sel(gsp_id=0)
             .compute()
         )
-        self.national_gsp_data = national_gsp_data / national_gsp_data.effective_capacity_mwp
     def _get_sample(self, t0: pd.Timestamp) -> SumNumpySample:
         """Generate a concurrent PVNet sample for given init-time.
@@ -55,33 +106,32 @@ class StreamedDataset(PVNetUKConcurrentDataset):
             t0: init-time for sample
         """
-        pvnet_inputs: NumpySample = super()._get_sample(t0)
-        location_capacities = pvnet_inputs["gsp_effective_capacity_mwp"]
+        # Get the PVNet input batch
+        pvnet_inputs: NumpyBatch = super()._get_sample(t0)
+        # Construct an array of valid times for eahc forecast horizon
         valid_times = pd.date_range(
             t0+minutes(self.config.input_data.gsp.time_resolution_minutes),
             t0+minutes(self.config.input_data.gsp.interval_end_minutes),
             freq=minutes(self.config.input_data.gsp.time_resolution_minutes)
         )
-        total_outturns = self.national_gsp_data.sel(time_utc=valid_times).values
+        # Get the GSP and national capacities
+        location_capacities = pvnet_inputs["gsp_effective_capacity_mwp"]
         total_capacity = self.national_gsp_data.sel(time_utc=t0).effective_capacity_mwp.item()
+        # Calculate requited inputs for the sample
         relative_capacities = location_capacities / total_capacity
-        return {
-            # NumpyBatch object with batch size = num_locations
-            "pvnet_inputs": pvnet_inputs,
-            # Shape: [time]
-            "target": total_outturns,
-            # Shape: [time]
-            "valid_times": valid_times.values.astype(int),
-            # Shape:
-            "last_outturn": self.national_gsp_data.sel(time_utc=t0).values,
-            # Shape: [num_locations]
-            "relative_capacity": relative_capacities,
-        }
+        target = self.national_gsp_data.sel(time_utc=valid_times).values / total_capacity
+        last_outturn = self.national_gsp_data.sel(time_utc=t0).values / total_capacity
+        return construct_sample(
+            pvnet_inputs=pvnet_inputs,
+            valid_times=valid_times,
+            relative_capacities=relative_capacities,
+            target=target,
+            last_outturn=last_outturn,
+        )
     @override
     def __getitem__(self, idx: int) -> SumNumpySample:
@@ -103,6 +153,8 @@ class StreamedDataModule(LightningDataModule):
         num_workers: int = 0,
         prefetch_factor: int | None = None,
         persistent_workers: bool = False,
+        seed: int | None = None,
+        dataset_pickle_dir: str | None = None,
     ):
         """Datamodule for creating concurrent PVNet inputs and national targets.
@@ -115,11 +167,16 @@ class StreamedDataModule(LightningDataModule):
             persistent_workers: If True, the data loader will not shut down the worker processes
                 after a dataset has been consumed once. This allows to maintain the workers Dataset
                 instances alive.
+            seed: Random seed used in shuffling datasets.
+            dataset_pickle_dir: Directory in which the val and train set will be presaved as
+                pickle objects. Setting this speeds up instantiation of multiple workers a lot.
         """
         super().__init__()
         self.configuration = configuration
         self.train_period = train_period
         self.val_period = val_period
+        self.seed = seed
+        self.dataset_pickle_dir = dataset_pickle_dir
         self._dataloader_kwargs = dict(
             batch_size=None,
@@ -132,17 +189,58 @@ class StreamedDataModule(LightningDataModule):
             worker_init_fn=None,
             prefetch_factor=prefetch_factor,
             persistent_workers=persistent_workers,
+            multiprocessing_context="spawn" if num_workers>0 else None,
         )
+    def setup(self, stage: str | None = None):
+        """Called once to prepare the datasets."""
+        # This logic runs only once at the start of training, therefore the val dataset is only
+        # shuffled once
+        if self.dataset_pickle_dir is not None:
+            os.makedirs(self.dataset_pickle_dir, exist_ok=True)
+            train_dataset_path = f"{self.dataset_pickle_dir}/train_dataset.pkl"
+            val_dataset_path = f"{self.dataset_pickle_dir}/val_dataset.pkl"
+            # For safety, these pickled datasets cannot be overwritten.
+            # See: https://github.com/openclimatefix/pvnet/pull/445
+            for path in [train_dataset_path, val_dataset_path]:
+                if os.path.exists(path):
+                    raise FileExistsError(
+                        f"The pickled dataset path '{path}' already exists. Make sure that "
+                        "this can be safely deleted (i.e. not currently being used by any "
+                        "training run) and delete it manually. Else change the "
+                        "`dataset_pickle_dir` to a different directory."
+                    )
+        # Prepare the train dataset
+        self.train_dataset = StreamedDataset(self.configuration, *self.train_period)
+        # Prepare and pre-shuffle the val dataset and set seed for reproducibility
+        val_dataset = StreamedDataset(self.configuration, *self.val_period)
+        shuffled_indices = np.random.default_rng(seed=self.seed).permutation(len(val_dataset))
+        self.val_dataset = Subset(val_dataset, shuffled_indices)
+        if self.dataset_pickle_dir is not None:
+            self.train_dataset.presave_pickle(train_dataset_path)
+            self.train_dataset.presave_pickle(val_dataset_path)
+    def teardown(self, stage: str | None = None) -> None:
+        """Clean up the pickled datasets"""
+        if self.dataset_pickle_dir is not None:
+            for filename in ["val_dataset.pkl", "train_dataset.pkl"]:
+                filepath = f"{self.dataset_pickle_dir}/{filename}"
+                if os.path.exists(filepath):
+                    os.remove(filepath)
     def train_dataloader(self, shuffle: bool = False) -> DataLoader:
         """Construct train dataloader"""
-        dataset = StreamedDataset(self.configuration, *self.train_period)
-        return DataLoader(dataset, shuffle=shuffle, **self._dataloader_kwargs)
+        return DataLoader(self.train_dataset, shuffle=shuffle, **self._dataloader_kwargs)
     def val_dataloader(self, shuffle: bool = False) -> DataLoader:
         """Construct val dataloader"""
-        dataset = StreamedDataset(self.configuration, *self.val_period)
-        return DataLoader(dataset, shuffle=shuffle, **self._dataloader_kwargs)
+        return DataLoader(self.val_dataset, shuffle=shuffle, **self._dataloader_kwargs)
 class PresavedDataset(Dataset):
@@ -200,6 +298,7 @@ class PresavedDataModule(LightningDataModule):
             worker_init_fn=None,
             prefetch_factor=prefetch_factor,
             persistent_workers=persistent_workers,
+            multiprocessing_context="spawn" if num_workers>0 else None,
         )
     def train_dataloader(self, shuffle: bool = True) -> DataLoader:

pvnet_summation/load_model.py CHANGED Viewed

@@ -20,10 +20,14 @@ def get_model_from_checkpoints(
 ) -> tuple[torch.nn.Module, dict, str | None, str | None]:
     """Load a model from its checkpoint directory
+    Args:
+        checkpoint_dir_path: str path to the directory with the model files
+        val_best (optional): if True, load the best epoch model; otherwise, load the last
     Returns:
         tuple:
             model: nn.Module of pretrained model.
-            model_config: path to model config used to train the model.
+            model_config: dict of model config used to train the model.
             datamodule_config: path to datamodule used to create samples e.g train/test split info.
             experiment_configs: path to the full experimental config.

pvnet_summation/models/base_model.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 import shutil
 import time
 from importlib.metadata import version
+from math import prod
 from pathlib import Path
 import hydra
@@ -293,6 +294,12 @@ class BaseModel(torch.nn.Module, HuggingfaceMixin):
         """
         super().__init__()
+        if (output_quantiles is not None):
+            if output_quantiles != sorted(output_quantiles):
+                raise ValueError("output_quantiles should be in ascending order")
+            if 0.5 not in output_quantiles:
+                raise ValueError("Quantiles must include 0.5")
         self.output_quantiles = output_quantiles
         self.num_input_locations = num_input_locations
@@ -309,17 +316,21 @@ class BaseModel(torch.nn.Module, HuggingfaceMixin):
         # Store whether the model should use quantile regression or simply predict the mean
         self.use_quantile_regression = self.output_quantiles is not None
-        # Store the number of ouput features that the model should predict for
+        # Also store the final output shape
         if self.use_quantile_regression:
-            self.num_output_features = self.forecast_len * len(self.output_quantiles)
+            self.output_shape = (self.forecast_len, len(input_quantiles))
         else:
-            self.num_output_features = self.forecast_len
+            self.output_shape = (self.forecast_len,)
+        # Store the number of output features and that the model should predict for
+        self.num_output_features = prod(self.output_shape)
         # Store the expected input shape
         if input_quantiles is None:
             self.input_shape = (self.num_input_locations, self.forecast_len)
         else:
             self.input_shape = (self.num_input_locations, self.forecast_len, len(input_quantiles))
     def _quantiles_to_prediction(self, y_quantiles: torch.Tensor) -> torch.Tensor:
         """Convert network prediction into a point prediction.

pvnet_summation/models/horizon_dense_model.py ADDED Viewed

@@ -0,0 +1,171 @@
+"""Neural network architecture based on dense layers applied independently at each horizon"""
+import torch
+import torch.nn.functional as F
+from torch import nn
+from pvnet_summation.data.datamodule import SumTensorBatch
+from pvnet_summation.models.base_model import BaseModel
+class HorizonDenseModel(BaseModel):
+    """Neural network architecture based on dense layers applied independently at each horizon.
+    """
+    def __init__(
+        self,
+        output_quantiles: list[float] | None,
+        num_input_locations: int,
+        input_quantiles: list[float] | None,
+        history_minutes: int,
+        forecast_minutes: int,
+        interval_minutes: int,
+        output_network: torch.nn.Module,
+        predict_difference_from_sum: bool = False,
+        use_horizon_encoding: bool = False,
+        use_solar_position: bool = False,
+        force_non_crossing: bool = False,
+        beta: float = 3,
+    ):
+        """Neural network architecture based on dense layers applied independently at each horizon.
+        Args:
+            output_quantiles: A list of float (0.0, 1.0) quantiles to predict values for. If set to
+                None the output is a single value.
+            num_input_locations: The number of input locations (e.g. number of GSPs)
+            input_quantiles: A list of float (0.0, 1.0) quantiles which PVNet predicts for. If set
+                to None we assume PVNet predicts a single value
+            history_minutes (int): Length of the GSP history period in minutes
+            forecast_minutes (int): Length of the GSP forecast period in minutes
+            interval_minutes: The interval in minutes between each timestep in the data
+            output_network: A partially instantiated pytorch Module class used top predict the
+                outturn at each horizon.
+            predict_difference_from_sum: Whether to predict the difference from the sum of locations
+                else the total is predicted directly
+            use_horizon_encoding: Whether to use the forecast horizon as an input feature
+            use_solar_position: Whether to use the solar coordinates as input features
+            force_non_crossing: If predicting quantile, whether to predict the quantiles other than
+                the median by predicting the distance between them and integrating.
+            beta: If using force_non_crossing, the beta value to use in the softplus activation
+        """
+        super().__init__(
+            output_quantiles,
+            num_input_locations,
+            input_quantiles,
+            history_minutes,
+            forecast_minutes,
+            interval_minutes,
+        )
+        if force_non_crossing:
+            assert self.use_quantile_regression
+        self.use_horizon_encoding = use_horizon_encoding
+        self.predict_difference_from_sum = predict_difference_from_sum
+        self.force_non_crossing = force_non_crossing
+        self.beta = beta
+        self.use_solar_position = use_solar_position
+        in_features = 1 if self.input_quantiles is None else len(self.input_quantiles)
+        in_features = in_features * self.num_input_locations
+        if use_horizon_encoding:
+            in_features += 1
+        if use_solar_position:
+            in_features += 2
+        out_features = (len(self.output_quantiles) if self.use_quantile_regression else 1)
+        model = output_network(in_features=in_features, out_features=out_features)
+        # Add linear layer if predicting difference from sum
+        # - This allows difference to be positive or negative
+        # Also add linear layer if we are applying force_non_crossing since a softplus will be used
+        if predict_difference_from_sum or force_non_crossing:
+            model = nn.Sequential(
+                model,
+                nn.Linear(out_features, out_features),
+            )
+        self.model = model
+    def forward(self, x: SumTensorBatch) -> torch.Tensor:
+        """Run model forward"""
+        # x["pvnet_outputs"] has shape [batch, locs, horizon, (quantile)]
+        batch_size = x["pvnet_outputs"].shape[0]
+        x_in = torch.swapaxes(x["pvnet_outputs"], 1, 2) # -> [batch, horizon, locs, (quantile)]
+        x_in = torch.flatten(x_in, start_dim=2) # -> [batch, horizon, locs*(quantile)]
+        if self.use_horizon_encoding:
+            horizon_encoding = torch.linspace(
+                start=0,
+                end=1,
+                steps=self.forecast_len,
+                device=x_in.device,
+                dtype=x_in.dtype,
+            )
+            horizon_encoding = horizon_encoding.tile((batch_size,1)).unsqueeze(-1)
+            x_in = torch.cat([x_in, horizon_encoding], dim=2)
+        if self.use_solar_position:
+            x_in = torch.cat(
+                [x_in, x["azimuth"].unsqueeze(-1), x["elevation"].unsqueeze(-1)],
+                dim=2
+            )
+        x_in = torch.flatten(x_in, start_dim=0, end_dim=1) # -> [batch*horizon, features]
+        out = self.model(x_in)
+        out = out.view(batch_size, *self.output_shape) # -> [batch, horizon, (quantile)]
+        if self.force_non_crossing:
+            # Get the prediction of the median
+            idx = self.output_quantiles.index(0.5)
+            if self.predict_difference_from_sum:
+                loc_sum = self.sum_of_locations(x).unsqueeze(-1)
+                y_median = loc_sum + out[..., idx:idx+1]
+            else:
+                y_median = out[..., idx:idx+1]
+            # These are the differences between the remaining quantiles
+            dy_below = F.softplus(out[..., :idx], beta=self.beta)
+            dy_above = F.softplus(out[..., idx+1:], beta=self.beta)
+            # Find the absolute value of the quantile predictions from the differences
+            y_below = []
+            y = y_median
+            for i in range(dy_below.shape[-1]):
+                # We detach y to avoid the gradients caused by errors from one quantile
+                # prediction  flowing back to affect the other quantile predictions.
+                # For example if the 0.9 quantile prediction was too low, we don't want the
+                # gradient to pull the 0.5 quantile prediction higher to compensate.
+                y = y.detach() - dy_below[..., i:i+1]
+                y_below.append(y)
+            y_above = []
+            y = y_median
+            for i in range(dy_above.shape[-1]):
+                y = y.detach() + dy_above[..., i:i+1]
+                y_above.append(y)
+            # Compile the quantile predictions in the correct order
+            out = torch.cat(y_below[::-1] + [y_median,] + y_above, dim=-1)
+        else:
+            if self.predict_difference_from_sum:
+                loc_sum = self.sum_of_locations(x)
+                if self.use_quantile_regression:
+                    loc_sum = loc_sum.unsqueeze(-1)
+                out = loc_sum + out
+        # Use leaky relu as a soft clip to 0
+        return F.leaky_relu(out, negative_slope=0.01)

pvnet_summation/training/train.py CHANGED Viewed

@@ -7,13 +7,21 @@ import torch
 from lightning.pytorch import Callback, Trainer, seed_everything
 from lightning.pytorch.callbacks import ModelCheckpoint
 from lightning.pytorch.loggers import Logger, WandbLogger
-from ocf_data_sampler.torch_datasets.sample.base import batch_to_tensor, copy_batch_to_device
+from ocf_data_sampler.torch_datasets.utils.torch_batch_utils import (
+    batch_to_tensor,
+    copy_batch_to_device,
+)
 from omegaconf import DictConfig, OmegaConf
 from pvnet.models import BaseModel as PVNetBaseModel
 from tqdm import tqdm
 from pvnet_summation.data.datamodule import PresavedDataModule, StreamedDataModule
-from pvnet_summation.utils import DATAMODULE_CONFIG_NAME, FULL_CONFIG_NAME, MODEL_CONFIG_NAME
+from pvnet_summation.utils import (
+    DATAMODULE_CONFIG_NAME,
+    FULL_CONFIG_NAME,
+    MODEL_CONFIG_NAME,
+    create_pvnet_model_config,
+)
 log = logging.getLogger(__name__)
@@ -21,9 +29,8 @@ log = logging.getLogger(__name__)
 def resolve_monitor_loss(output_quantiles: list | None) -> str:
     """Return the desired metric to monitor based on whether quantile regression is being used.
-    The adds the option to use something like:
+    Adds the option to use
         monitor: "${resolve_monitor_loss:${model.model.output_quantiles}}"
     in early stopping and model checkpoint callbacks so the callbacks config does not need to be
     modified depending on whether quantile regression is being used or not.
     """
@@ -86,15 +93,33 @@ def train(config: DictConfig) -> None:
         os.makedirs(f"{save_dir}/train")
         os.makedirs(f"{save_dir}/val")
+        pvnet_data_config_path = f"{save_dir}/pvnet_data_config.yaml"
+        data_source_paths = OmegaConf.to_container(
+            config.datamodule.data_source_paths,
+            resolve=True,
+        )
+        create_pvnet_model_config(
+            save_path=pvnet_data_config_path,
+            repo=config.datamodule.pvnet_model.model_id,
+            commit=config.datamodule.pvnet_model.revision,
+            data_source_paths=data_source_paths,
+        )
         datamodule = StreamedDataModule(
-            configuration=config.datamodule.configuration,
+            configuration=pvnet_data_config_path,
             num_workers=config.datamodule.num_workers,
             prefetch_factor=config.datamodule.prefetch_factor,
             train_period=config.datamodule.train_period,
             val_period=config.datamodule.val_period,
             persistent_workers=False,
+            seed=config.datamodule.seed,
+            dataset_pickle_dir=config.datamodule.dataset_pickle_dir,
         )
+        datamodule.setup()
         for dataloader_func, max_num_samples, split in [
             (datamodule.train_dataloader, config.datamodule.max_num_train_samples, "train",),
             (datamodule.val_dataloader, config.datamodule.max_num_val_samples, "val"),
@@ -103,7 +128,10 @@ def train(config: DictConfig) -> None:
             log.info(f"Saving {split} outputs")
             dataloader = dataloader_func(shuffle=True)
-            for i, sample in tqdm(zip(range(max_num_samples), dataloader)):
+            # If max_num_samples set to None use all samples
+            max_num_samples = max_num_samples or len(dataloader)
+            for i, sample in tqdm(zip(range(max_num_samples), dataloader), total=max_num_samples):
                 # Run PVNet inputs though model
                 x = copy_batch_to_device(batch_to_tensor(sample["pvnet_inputs"]), device)
                 pvnet_outputs = pvnet_model(x).detach().cpu()
@@ -116,6 +144,9 @@ def train(config: DictConfig) -> None:
             del dataloader
+        datamodule.teardown()
     datamodule = PresavedDataModule(
         sample_dir=save_dir,
         batch_size=config.datamodule.batch_size,
@@ -182,4 +213,4 @@ def train(config: DictConfig) -> None:
     )
     # Train the model completely
-    trainer.fit(model=model, datamodule=datamodule)
+    trainer.fit(model=model, datamodule=datamodule)

pvnet_summation/utils.py CHANGED Viewed

@@ -3,8 +3,10 @@ import logging
 import rich.syntax
 import rich.tree
+import yaml
 from lightning.pytorch.utilities import rank_zero_only
 from omegaconf import DictConfig, OmegaConf
+from pvnet.models.base_model import BaseModel as PVNetBaseModel
 logger = logging.getLogger(__name__)
@@ -17,11 +19,10 @@ MODEL_CARD_NAME = "README.md"
-def run_config_utilities(config: DictConfig) -> None:
-    """A couple of optional utilities.
+def maybe_apply_debug_mode(config: DictConfig) -> None:
+    """Check if debugging run is requested and force debug-frendly configuration
-    Controlled by main config file:
-    - forcing debug friendly configuration
+    Controlled by main config file
     Modifies DictConfig in place.
@@ -52,7 +53,7 @@ def run_config_utilities(config: DictConfig) -> None:
 @rank_zero_only
 def print_config(
     config: DictConfig,
-    fields: tuple[str] = (
+    fields: tuple[str, ...] = (
         "trainer",
         "model",
         "datamodule",
@@ -66,7 +67,7 @@ def print_config(
     Args:
         config (DictConfig): Configuration composed by Hydra.
-        fields (Sequence[str], optional): Determines which main fields from config will
+        fields (tuple[str, ...], optional): Determines which main fields from config will
         be printed and in what order.
         resolve (bool, optional): Whether to resolve reference fields of DictConfig.
     """
@@ -85,3 +86,47 @@ def print_config(
         branch.add(rich.syntax.Syntax(branch_content, "yaml"))
     rich.print(tree)
+def populate_config_with_data_data_filepaths(config: dict, data_source_paths: dict) -> dict:
+    """Populate the data source filepaths in the config
+    Args:
+        config: The data config
+        data_source_paths: A dictionary of data paths for the different input sources
+    """
+    # Replace the GSP data path
+    config["input_data"]["gsp"]["zarr_path"] =  data_source_paths["gsp"]
+    # Replace satellite data path if using it
+    if "satellite" in config["input_data"]:
+        if config["input_data"]["satellite"]["zarr_path"] != "":
+            config["input_data"]["satellite"]["zarr_path"] = data_source_paths["satellite"]
+    # NWP is nested so much be treated separately
+    if "nwp" in config["input_data"]:
+        nwp_config = config["input_data"]["nwp"]
+        for nwp_source in nwp_config.keys():
+            provider = nwp_config[nwp_source]["provider"]
+            assert provider in data_source_paths["nwp"], f"Missing NWP path: {provider}"
+            nwp_config[nwp_source]["zarr_path"] = data_source_paths["nwp"][provider]
+    return config
+def create_pvnet_model_config(
+    save_path: str,
+    repo: str,
+    commit: str,
+    data_source_paths: dict,
+) -> None:
+    """Create the data config needed to run the PVNet model"""
+    data_config_path = PVNetBaseModel.get_data_config(repo, revision=commit)
+    with open(data_config_path) as file:
+        data_config = yaml.load(file, Loader=yaml.FullLoader)
+    data_config = populate_config_with_data_data_filepaths(data_config, data_source_paths)
+    with open(save_path, "w") as file:
+        yaml.dump(data_config, file, default_flow_style=False)

{pvnet_summation-1.0.1.dist-info → pvnet_summation-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,13 +1,13 @@
 Metadata-Version: 2.4
 Name: PVNet_summation
-Version: 1.0.1
+Version: 1.1.0
 Summary: PVNet_summation
 Author-email: James Fulton <info@openclimatefix.org>
-Requires-Python: >=3.10
+Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: pvnet>=5.0.0
-Requires-Dist: ocf-data-sampler>=0.2.32
+Requires-Dist: ocf-data-sampler>=0.6.0
 Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: matplotlib

pvnet_summation-1.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,19 @@
+pvnet_summation/__init__.py,sha256=8bjkx2pvF7lZ2W5BiTpHr7iqpkRXc3vW5K1pxJAWaj0,22
+pvnet_summation/load_model.py,sha256=mQJXJ9p8wb25CVsm5UBGb0IL6xGZj-81iIBKHsNdQMY,2515
+pvnet_summation/optimizers.py,sha256=kuR3PUnISiAO5bSaKhq_7vqRKZ0gO5cRS4UbjmKgq1c,6472
+pvnet_summation/utils.py,sha256=JyqzDQjABCtRsdLgxr5j9K9AdmNlQhmYGenj6mKGnFY,4352
+pvnet_summation/data/__init__.py,sha256=AYJFlJ3KaAQXED0PxuuknI2lKEeFMFLJiJ9b6-H8398,81
+pvnet_summation/data/datamodule.py,sha256=Pa2iip-ALihhkAVtqDBPJZ93vh4evJwG9L9YCJiRQag,12517
+pvnet_summation/models/__init__.py,sha256=v3KMMH_bz9YGUFWsrb5Ndg-d_dgxQPw7yiFahQAag4c,103
+pvnet_summation/models/base_model.py,sha256=mxrEq8k6NAVpezLx3ORPM33OrXzRccVD2ErFkPIw8bc,12496
+pvnet_summation/models/dense_model.py,sha256=vh3Hrm-n7apgVkta_RtQ5mdxb6jiJNFm3ObWukSBgdU,2305
+pvnet_summation/models/horizon_dense_model.py,sha256=8NfJiO4upQT8ksqwDn1Jkct5-nrbs_EKfKBseVRay1U,7011
+pvnet_summation/training/__init__.py,sha256=2fbydXPJFk527DUGPlNV0Teaqvu4WNp8hgcODwHJFEw,110
+pvnet_summation/training/lightning_module.py,sha256=t16gcAc4Fmi1g26dhQwQOm4qe2mwnTfEBbOyH_BFZ4o,8695
+pvnet_summation/training/plots.py,sha256=VZHyzI6UvCEd4nmXiJCF1FiVlpDyFHTxX6_rc0vmJrU,2248
+pvnet_summation/training/train.py,sha256=ze4LCr4XvJ18NjiZhR9KslVf_5HoC1xjGIhBcfw8u5E,8000
+pvnet_summation-1.1.0.dist-info/licenses/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
+pvnet_summation-1.1.0.dist-info/METADATA,sha256=uy-zlQ8IyRNgM27nYxL207m41MrlHfuCPDzp0474-e8,3720
+pvnet_summation-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pvnet_summation-1.1.0.dist-info/top_level.txt,sha256=5fWJ75RKtpaHUdLG_-2oDCInXeq4r1aMCxkZp5Wy-LQ,16
+pvnet_summation-1.1.0.dist-info/RECORD,,

pvnet_summation-1.0.1.dist-info/RECORD DELETED Viewed

@@ -1,18 +0,0 @@
-pvnet_summation/__init__.py,sha256=8bjkx2pvF7lZ2W5BiTpHr7iqpkRXc3vW5K1pxJAWaj0,22
-pvnet_summation/load_model.py,sha256=GfreRSaKVTWjV9fnJGNYjp09wrpZwaTunHijdff6cyc,2338
-pvnet_summation/optimizers.py,sha256=kuR3PUnISiAO5bSaKhq_7vqRKZ0gO5cRS4UbjmKgq1c,6472
-pvnet_summation/utils.py,sha256=G7l2iZK8qNWEau27pJYPvGOLSzPaSttFrGwr75yTlPQ,2628
-pvnet_summation/data/__init__.py,sha256=AYJFlJ3KaAQXED0PxuuknI2lKEeFMFLJiJ9b6-H8398,81
-pvnet_summation/data/datamodule.py,sha256=dexqqz9CHsH2c7ehgOTnJw5LjlOTNCvNhDZsFOVwy1g,8072
-pvnet_summation/models/__init__.py,sha256=v3KMMH_bz9YGUFWsrb5Ndg-d_dgxQPw7yiFahQAag4c,103
-pvnet_summation/models/base_model.py,sha256=qtsbH8WqrRUQdWpBdeLJ3yz3dlhUeLFUKzVvX7uiopo,12074
-pvnet_summation/models/dense_model.py,sha256=vh3Hrm-n7apgVkta_RtQ5mdxb6jiJNFm3ObWukSBgdU,2305
-pvnet_summation/training/__init__.py,sha256=2fbydXPJFk527DUGPlNV0Teaqvu4WNp8hgcODwHJFEw,110
-pvnet_summation/training/lightning_module.py,sha256=t16gcAc4Fmi1g26dhQwQOm4qe2mwnTfEBbOyH_BFZ4o,8695
-pvnet_summation/training/plots.py,sha256=VZHyzI6UvCEd4nmXiJCF1FiVlpDyFHTxX6_rc0vmJrU,2248
-pvnet_summation/training/train.py,sha256=qBzSCsBMsJpbbBx3laVfOSdBSTCBF7XBWl_AZglbsKQ,7171
-pvnet_summation-1.0.1.dist-info/licenses/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
-pvnet_summation-1.0.1.dist-info/METADATA,sha256=fIi2uaWV8-ihgZFMGxIoIQSa2-mHCa5u6-UYcP8fipA,3721
-pvnet_summation-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-pvnet_summation-1.0.1.dist-info/top_level.txt,sha256=5fWJ75RKtpaHUdLG_-2oDCInXeq4r1aMCxkZp5Wy-LQ,16
-pvnet_summation-1.0.1.dist-info/RECORD,,

{pvnet_summation-1.0.1.dist-info → pvnet_summation-1.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{pvnet_summation-1.0.1.dist-info → pvnet_summation-1.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pvnet_summation-1.0.1.dist-info → pvnet_summation-1.1.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

PVNet_summation 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

Potentially problematic release.

PVNet_summation 1.0.1py3-none-any.whl → 1.1.0py3-none-any.whl