PyPI - PVNet_summation - Versions diffs - 1.1.2__py3-none-any.whl - Mend

PVNet_summation 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

pvnet_summation/__init__.py +1 -0
pvnet_summation/data/__init__.py +2 -0
pvnet_summation/data/datamodule.py +312 -0
pvnet_summation/load_model.py +74 -0
pvnet_summation/models/__init__.py +3 -0
pvnet_summation/models/base_model.py +356 -0
pvnet_summation/models/dense_model.py +75 -0
pvnet_summation/models/horizon_dense_model.py +171 -0
pvnet_summation/optimizers.py +219 -0
pvnet_summation/training/__init__.py +3 -0
pvnet_summation/training/lightning_module.py +278 -0
pvnet_summation/training/plots.py +91 -0
pvnet_summation/training/train.py +216 -0
pvnet_summation/utils.py +132 -0
pvnet_summation-1.1.2.dist-info/METADATA +100 -0
pvnet_summation-1.1.2.dist-info/RECORD +19 -0
pvnet_summation-1.1.2.dist-info/WHEEL +5 -0
pvnet_summation-1.1.2.dist-info/licenses/LICENSE +21 -0
pvnet_summation-1.1.2.dist-info/top_level.txt +1 -0

pvnet_summation/training/train.py ADDED Viewed

@@ -0,0 +1,216 @@
+"""Training"""
+import logging
+import os
+import hydra
+import torch
+from lightning.pytorch import Callback, Trainer, seed_everything
+from lightning.pytorch.callbacks import ModelCheckpoint
+from lightning.pytorch.loggers import Logger, WandbLogger
+from ocf_data_sampler.torch_datasets.utils.torch_batch_utils import (
+    batch_to_tensor,
+    copy_batch_to_device,
+)
+from omegaconf import DictConfig, OmegaConf
+from pvnet.models import BaseModel as PVNetBaseModel
+from tqdm import tqdm
+from pvnet_summation.data.datamodule import PresavedDataModule, StreamedDataModule
+from pvnet_summation.utils import (
+    DATAMODULE_CONFIG_NAME,
+    FULL_CONFIG_NAME,
+    MODEL_CONFIG_NAME,
+    create_pvnet_model_config,
+)
+log = logging.getLogger(__name__)
+def resolve_monitor_loss(output_quantiles: list | None) -> str:
+    """Return the desired metric to monitor based on whether quantile regression is being used.
+    Adds the option to use
+        monitor: "${resolve_monitor_loss:${model.model.output_quantiles}}"
+    in early stopping and model checkpoint callbacks so the callbacks config does not need to be
+    modified depending on whether quantile regression is being used or not.
+    """
+    if output_quantiles is None:
+        return "MAE/val"
+    else:
+        return "quantile_loss/val"
+OmegaConf.register_new_resolver("resolve_monitor_loss", resolve_monitor_loss)
+def train(config: DictConfig) -> None:
+    """Contains training pipeline.
+    Instantiates all PyTorch Lightning objects from config.
+    Args:
+        config (DictConfig): Configuration composed by Hydra.
+    """
+    # Get the PVNet model
+    pvnet_model = PVNetBaseModel.from_pretrained(
+        model_id=config.datamodule.pvnet_model.model_id,
+        revision=config.datamodule.pvnet_model.revision
+    )
+    # Enable adding new keys to config
+    OmegaConf.set_struct(config, False)
+    # Set summation model parameters to align with the input PVNet model
+    config.model.model.history_minutes = pvnet_model.history_minutes
+    config.model.model.forecast_minutes = pvnet_model.forecast_minutes
+    config.model.model.interval_minutes = pvnet_model.interval_minutes
+    config.model.model.num_input_locations = len(pvnet_model.location_id_mapping)
+    config.model.model.input_quantiles = pvnet_model.output_quantiles
+    OmegaConf.set_struct(config, True)
+    # Set seed for random number generators in pytorch, numpy and python.random
+    if "seed" in config:
+        seed_everything(config.seed, workers=True)
+    # Compute and save the PVNet predictions before training the summation model
+    save_dir = (
+        f"{config.sample_save_dir}/{config.datamodule.pvnet_model.model_id}"
+        f"/{config.datamodule.pvnet_model.revision}"
+    )
+    if os.path.isdir(save_dir):
+        log.info(
+            f"PVNet output directory already exists: {save_dir}\n"
+            "Skipping saving new outputs. The existing saved outputs will be loaded."
+        )
+    else:
+        log.info(f"Saving PVNet outputs to {save_dir}")
+        # Move to device and disable gradients for inference
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        pvnet_model.to(device).requires_grad_(False)
+        os.makedirs(f"{save_dir}/train")
+        os.makedirs(f"{save_dir}/val")
+        pvnet_data_config_path = f"{save_dir}/pvnet_data_config.yaml"
+        data_source_paths = OmegaConf.to_container(
+            config.datamodule.data_source_paths,
+            resolve=True,
+        )
+        create_pvnet_model_config(
+            save_path=pvnet_data_config_path,
+            repo=config.datamodule.pvnet_model.model_id,
+            commit=config.datamodule.pvnet_model.revision,
+            data_source_paths=data_source_paths,
+        )
+        datamodule = StreamedDataModule(
+            configuration=pvnet_data_config_path,
+            num_workers=config.datamodule.num_workers,
+            prefetch_factor=config.datamodule.prefetch_factor,
+            train_period=config.datamodule.train_period,
+            val_period=config.datamodule.val_period,
+            persistent_workers=False,
+            seed=config.datamodule.seed,
+            dataset_pickle_dir=config.datamodule.dataset_pickle_dir,
+        )
+        datamodule.setup()
+        for dataloader_func, max_num_samples, split in [
+            (datamodule.train_dataloader, config.datamodule.max_num_train_samples, "train",),
+            (datamodule.val_dataloader, config.datamodule.max_num_val_samples, "val"),
+        ]:
+            log.info(f"Saving {split} outputs")
+            dataloader = dataloader_func(shuffle=True)
+            # If max_num_samples set to None use all samples
+            max_num_samples = max_num_samples or len(dataloader)
+            for i, sample in tqdm(zip(range(max_num_samples), dataloader), total=max_num_samples):
+                # Run PVNet inputs though model
+                x = copy_batch_to_device(batch_to_tensor(sample["pvnet_inputs"]), device)
+                pvnet_outputs = pvnet_model(x).detach().cpu()
+                # Create version of sample without the PVNet inputs and save
+                sample_to_save = {k: v.clone() for k, v in sample.items() if k!="pvnet_inputs"}
+                sample_to_save["pvnet_outputs"] = pvnet_outputs
+                torch.save(sample_to_save, f"{save_dir}/{split}/{i:06}.pt")
+            del dataloader
+        datamodule.teardown()
+    datamodule = PresavedDataModule(
+        sample_dir=save_dir,
+        batch_size=config.datamodule.batch_size,
+        num_workers=config.datamodule.num_workers,
+        prefetch_factor=config.datamodule.prefetch_factor,
+        persistent_workers=config.datamodule.persistent_workers,
+    )
+    # Init lightning loggers
+    loggers: list[Logger] = []
+    if "logger" in config:
+        for _, lg_conf in config.logger.items():
+            loggers.append(hydra.utils.instantiate(lg_conf))
+    # Init lightning callbacks
+    callbacks: list[Callback] = []
+    if "callbacks" in config:
+        for _, cb_conf in config.callbacks.items():
+            callbacks.append(hydra.utils.instantiate(cb_conf))
+    # Align the wandb id with the checkpoint path
+    # - only works if wandb logger and model checkpoint used
+    # - this makes it easy to push the model to huggingface
+    use_wandb_logger = False
+    for logger in loggers:
+        if isinstance(logger, WandbLogger):
+            use_wandb_logger = True
+            wandb_logger = logger
+            break
+    # Set the output directory based in the wandb-id of the run
+    if use_wandb_logger:
+        for callback in callbacks:
+            if isinstance(callback, ModelCheckpoint):
+                # Calling the .experiment property instantiates a wandb run
+                wandb_id = wandb_logger.experiment.id
+                # Save the run results to the expected parent folder but with the folder name
+                # set by the wandb ID
+                save_dir = f"{os.path.dirname(callback.dirpath)}/{wandb_id}"
+                callback.dirpath = save_dir
+                # Save the model config
+                os.makedirs(save_dir, exist_ok=True)
+                OmegaConf.save(config.model, f"{save_dir}/{MODEL_CONFIG_NAME}")
+                # Save the datamodule config
+                OmegaConf.save(config.datamodule, f"{save_dir}/{DATAMODULE_CONFIG_NAME}")
+                # Save the full hydra config to the output directory and to wandb
+                OmegaConf.save(config, f"{save_dir}/{FULL_CONFIG_NAME}")
+                wandb_logger.experiment.save(f"{save_dir}/{FULL_CONFIG_NAME}", base_path=save_dir)
+    # Init lightning model
+    model = hydra.utils.instantiate(config.model)
+    trainer: Trainer = hydra.utils.instantiate(
+        config.trainer,
+        logger=loggers,
+        _convert_="partial",
+        callbacks=callbacks,
+    )
+    # Train the model completely
+    trainer.fit(model=model, datamodule=datamodule)

pvnet_summation/utils.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""Utils"""
+import logging
+import rich.syntax
+import rich.tree
+import yaml
+from lightning.pytorch.utilities import rank_zero_only
+from omegaconf import DictConfig, OmegaConf
+from pvnet.models.base_model import BaseModel as PVNetBaseModel
+logger = logging.getLogger(__name__)
+PYTORCH_WEIGHTS_NAME = "model_weights.safetensors"
+MODEL_CONFIG_NAME = "model_config.yaml"
+DATAMODULE_CONFIG_NAME = "datamodule_config.yaml"
+FULL_CONFIG_NAME =  "full_experiment_config.yaml"
+MODEL_CARD_NAME = "README.md"
+def maybe_apply_debug_mode(config: DictConfig) -> None:
+    """Check if debugging run is requested and force debug-frendly configuration
+    Controlled by main config file
+    Modifies DictConfig in place.
+    Args:
+        config (DictConfig): Configuration composed by Hydra.
+    """
+    # Enable adding new keys to config
+    OmegaConf.set_struct(config, False)
+    # Force debugger friendly configuration if <config.trainer.fast_dev_run=True>
+    if config.trainer.get("fast_dev_run"):
+        logger.info("Forcing debugger friendly configuration! <config.trainer.fast_dev_run=True>")
+        # Debuggers don't like GPUs or multiprocessing
+        if config.trainer.get("gpus"):
+            config.trainer.gpus = 0
+        if config.datamodule.get("pin_memory"):
+            config.datamodule.pin_memory = False
+        if config.datamodule.get("num_workers"):
+            config.datamodule.num_workers = 0
+        if config.datamodule.get("prefetch_factor"):
+            config.datamodule.prefetch_factor = None
+    # Disable adding new keys to config
+    OmegaConf.set_struct(config, True)
+@rank_zero_only
+def print_config(
+    config: DictConfig,
+    fields: tuple[str, ...] = (
+        "trainer",
+        "model",
+        "datamodule",
+        "callbacks",
+        "logger",
+        "seed",
+    ),
+    resolve: bool = True,
+) -> None:
+    """Prints content of DictConfig using Rich library and its tree structure.
+    Args:
+        config (DictConfig): Configuration composed by Hydra.
+        fields (tuple[str, ...], optional): Determines which main fields from config will
+        be printed and in what order.
+        resolve (bool, optional): Whether to resolve reference fields of DictConfig.
+    """
+    style = "dim"
+    tree = rich.tree.Tree("CONFIG", style=style, guide_style=style)
+    for field in fields:
+        branch = tree.add(field, style=style, guide_style=style)
+        config_section = config.get(field)
+        branch_content = str(config_section)
+        if isinstance(config_section, DictConfig):
+            branch_content = OmegaConf.to_yaml(config_section, resolve=resolve)
+        branch.add(rich.syntax.Syntax(branch_content, "yaml"))
+    rich.print(tree)
+def populate_config_with_data_data_filepaths(config: dict, data_source_paths: dict) -> dict:
+    """Populate the data source filepaths in the config
+    Args:
+        config: The data config
+        data_source_paths: A dictionary of data paths for the different input sources
+    """
+    # Replace the GSP data path
+    config["input_data"]["gsp"]["zarr_path"] =  data_source_paths["gsp"]
+    # Replace satellite data path if using it
+    if "satellite" in config["input_data"]:
+        if config["input_data"]["satellite"]["zarr_path"] != "":
+            config["input_data"]["satellite"]["zarr_path"] = data_source_paths["satellite"]
+    # NWP is nested so much be treated separately
+    if "nwp" in config["input_data"]:
+        nwp_config = config["input_data"]["nwp"]
+        for nwp_source in nwp_config.keys():
+            provider = nwp_config[nwp_source]["provider"]
+            assert provider in data_source_paths["nwp"], f"Missing NWP path: {provider}"
+            nwp_config[nwp_source]["zarr_path"] = data_source_paths["nwp"][provider]
+    return config
+def create_pvnet_model_config(
+    save_path: str,
+    repo: str,
+    commit: str,
+    data_source_paths: dict,
+) -> None:
+    """Create the data config needed to run the PVNet model"""
+    data_config_path = PVNetBaseModel.get_data_config(repo, revision=commit)
+    with open(data_config_path) as file:
+        data_config = yaml.load(file, Loader=yaml.FullLoader)
+    data_config = populate_config_with_data_data_filepaths(data_config, data_source_paths)
+    with open(save_path, "w") as file:
+        yaml.dump(data_config, file, default_flow_style=False)

pvnet_summation-1.1.2.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,100 @@
+Metadata-Version: 2.4
+Name: PVNet_summation
+Version: 1.1.2
+Summary: PVNet_summation
+Author-email: James Fulton <info@openclimatefix.org>
+Requires-Python: <3.14,>=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pvnet>=5.0.0
+Requires-Dist: ocf-data-sampler>=0.6.0
+Requires-Dist: numpy
+Requires-Dist: pandas
+Requires-Dist: matplotlib
+Requires-Dist: xarray
+Requires-Dist: torch>=2.0.0
+Requires-Dist: lightning
+Requires-Dist: typer
+Requires-Dist: wandb
+Requires-Dist: huggingface-hub
+Requires-Dist: tqdm
+Requires-Dist: omegaconf
+Requires-Dist: hydra-core
+Requires-Dist: rich
+Requires-Dist: safetensors
+Dynamic: license-file
+# PVNet summation
+[![ease of contribution: hard](https://img.shields.io/badge/ease%20of%20contribution:%20hard-bb2629)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)
+This project is used for training a model to sum the GSP predictions of [PVNet](https://github.com/openclimatefix/pvnet) into a national estimate.
+Using the summation model to sum the GSP predictions rather than doing a simple sum increases the accuracy of the national predictions and can be configured to produce estimates of the uncertainty range of the national estimate. See the [PVNet](https://github.com/openclimatefix/pvnet) repo for more details and our paper.
+## Setup / Installation
+```bash
+git clone https://github.com/openclimatefix/PVNet_summation
+cd PVNet_summation
+pip install .
+```
+### Additional development dependencies
+```bash
+pip install ".[dev]"
+```
+## Getting started with running PVNet summation
+In order to run PVNet summation, we assume that you are already set up with
+[PVNet](https://github.com/openclimatefix/pvnet) and have a trained PVNet model already available either locally or pushed to HuggingFace.
+Before running any code, copy the example configuration to a configs directory:
+```
+cp -r configs.example configs
+```
+You will be making local amendments to these configs.
+### Datasets
+The datasets required are the same as documented in
+[PVNet](https://github.com/openclimatefix/pvnet). The only addition is that you will need PVLive
+data for the national sum i.e. GSP ID 0.
+### Training PVNet_summation
+How PVNet_summation is run is determined by the extensive configuration in the config files. The
+configs stored in `configs.example`.
+Make sure to update the following config files before training your model:
+1. At the very start of training we loop over all of the input samples and make predictions for them using PVNet. These predictions are saved to disk and will be loaded in the training loop for more efficient training. In `configs/config.yaml` update `sample_save_dir` to set where the predictions will be saved to.
+2. In `configs/datamodule/default.yaml`:
+  - Update `pvnet_model.model_id` and `pvnet_model.revision` to point to the Huggingface commit or local directory where the exported PVNet model is.
+  - Update `configuration` to point to a data configuration compatible with the PVNet model whose outputs will be fed into the summation model.
+  - Set `train_period` and `val_period` to control the time ranges of the train and val period
+  - Optionally set `max_num_train_samples` and `max_num_val_samples` to limit the number of possible train and validation example which will be used.
+3. In `configs/model/default.yaml`:
+    - Update the hyperparameters and structure of the summation model
+4. In `configs/trainer/default.yaml`:
+    - Set `accelerator: 0` if running on a system without a supported GPU
+Assuming you have updated the configs, you should now be able to run:
+```
+python run.py
+```
+## Testing
+You can use `python -m pytest tests` to run tests

pvnet_summation-1.1.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,19 @@
+pvnet_summation/__init__.py,sha256=8bjkx2pvF7lZ2W5BiTpHr7iqpkRXc3vW5K1pxJAWaj0,22
+pvnet_summation/load_model.py,sha256=mQJXJ9p8wb25CVsm5UBGb0IL6xGZj-81iIBKHsNdQMY,2515
+pvnet_summation/optimizers.py,sha256=kuR3PUnISiAO5bSaKhq_7vqRKZ0gO5cRS4UbjmKgq1c,6472
+pvnet_summation/utils.py,sha256=JyqzDQjABCtRsdLgxr5j9K9AdmNlQhmYGenj6mKGnFY,4352
+pvnet_summation/data/__init__.py,sha256=AYJFlJ3KaAQXED0PxuuknI2lKEeFMFLJiJ9b6-H8398,81
+pvnet_summation/data/datamodule.py,sha256=Pa2iip-ALihhkAVtqDBPJZ93vh4evJwG9L9YCJiRQag,12517
+pvnet_summation/models/__init__.py,sha256=v3KMMH_bz9YGUFWsrb5Ndg-d_dgxQPw7yiFahQAag4c,103
+pvnet_summation/models/base_model.py,sha256=mxrEq8k6NAVpezLx3ORPM33OrXzRccVD2ErFkPIw8bc,12496
+pvnet_summation/models/dense_model.py,sha256=vh3Hrm-n7apgVkta_RtQ5mdxb6jiJNFm3ObWukSBgdU,2305
+pvnet_summation/models/horizon_dense_model.py,sha256=8NfJiO4upQT8ksqwDn1Jkct5-nrbs_EKfKBseVRay1U,7011
+pvnet_summation/training/__init__.py,sha256=2fbydXPJFk527DUGPlNV0Teaqvu4WNp8hgcODwHJFEw,110
+pvnet_summation/training/lightning_module.py,sha256=IMwayobtjA69Blz8v6dxhG31-GgovB9kBqUZJ5A5qRA,9926
+pvnet_summation/training/plots.py,sha256=wjiNh1bH6FQa9rf4Y9Xtp1jyks1bzGJG2-8936I_Dk0,2475
+pvnet_summation/training/train.py,sha256=ze4LCr4XvJ18NjiZhR9KslVf_5HoC1xjGIhBcfw8u5E,8000
+pvnet_summation-1.1.2.dist-info/licenses/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
+pvnet_summation-1.1.2.dist-info/METADATA,sha256=zTBNEYtw5n-s_kAloLYcQPka1Ql_9uMbw5zRIYadeiM,3726
+pvnet_summation-1.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pvnet_summation-1.1.2.dist-info/top_level.txt,sha256=5fWJ75RKtpaHUdLG_-2oDCInXeq4r1aMCxkZp5Wy-LQ,16
+pvnet_summation-1.1.2.dist-info/RECORD,,

pvnet_summation-1.1.2.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

pvnet_summation-1.1.2.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2023 Open Climate Fix
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pvnet_summation-1.1.2.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ pvnet_summation