PyPI - PVNet - Versions diffs - 5.0.7__tar.gz → 5.0.8__tar.gz - Mend

PVNet 5.0.7tar.gz → 5.0.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{pvnet-5.0.7 → pvnet-5.0.8}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.4
 Name: PVNet
-Version: 5.0.7
+Version: 5.0.8
 Summary: PVNet
 Author-email: Peter Dudfield <info@openclimatefix.org>
-Requires-Python: >=3.10
+Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: ocf-data-sampler>=0.2.32
+Requires-Dist: ocf-data-sampler>=0.5.20
 Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: matplotlib

{pvnet-5.0.7 → pvnet-5.0.8}/PVNet.egg-info/PKG-INFO RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.4
 Name: PVNet
-Version: 5.0.7
+Version: 5.0.8
 Summary: PVNet
 Author-email: Peter Dudfield <info@openclimatefix.org>
-Requires-Python: >=3.10
+Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: ocf-data-sampler>=0.2.32
+Requires-Dist: ocf-data-sampler>=0.5.20
 Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: matplotlib

{pvnet-5.0.7 → pvnet-5.0.8}/PVNet.egg-info/requires.txt RENAMED Viewed

@@ -1,4 +1,4 @@
-ocf-data-sampler>=0.2.32
+ocf-data-sampler>=0.5.20
 numpy
 pandas
 matplotlib

{pvnet-5.0.7 → pvnet-5.0.8}/pvnet/data/base_datamodule.py RENAMED Viewed

@@ -1,5 +1,6 @@
 """ Data module for pytorch lightning """
+import os
 from glob import glob
 import numpy as np
@@ -77,6 +78,7 @@ class BasePresavedDataModule(LightningDataModule):
             worker_init_fn=None,
             prefetch_factor=prefetch_factor,
             persistent_workers=persistent_workers,
+            multiprocessing_context="spawn" if num_workers>0 else None,
         )
     def _get_premade_samples_dataset(self, subdir: str) -> Dataset:
@@ -107,7 +109,7 @@ class BaseStreamedDataModule(LightningDataModule):
         train_period: list[str | None] = [None, None],
         val_period: list[str | None] = [None, None],
         seed: int | None = None,
+        dataset_pickle_dir: str | None = None,
     ):
         """Base Datamodule for streaming samples.
@@ -124,6 +126,8 @@ class BaseStreamedDataModule(LightningDataModule):
             train_period: Date range filter for train dataloader.
             val_period: Date range filter for val dataloader.
             seed: Random seed used in shuffling datasets.
+            dataset_pickle_dir: Directory in which the val and train set will be presaved as
+                pickle objects. Setting this speeds up instantiation of multiple workers a lot.
         """
         super().__init__()
@@ -131,6 +135,7 @@ class BaseStreamedDataModule(LightningDataModule):
         self.train_period = train_period
         self.val_period = val_period
         self.seed = seed
+        self.dataset_pickle_dir = dataset_pickle_dir
         self._common_dataloader_kwargs = dict(
             batch_size=batch_size,
@@ -143,6 +148,7 @@ class BaseStreamedDataModule(LightningDataModule):
             worker_init_fn=None,
             prefetch_factor=prefetch_factor,
             persistent_workers=persistent_workers,
+            multiprocessing_context="spawn" if num_workers>0 else None,
         )
     def setup(self, stage: str | None = None):
@@ -160,6 +166,33 @@ class BaseStreamedDataModule(LightningDataModule):
             shuffled_indices = np.random.default_rng(seed=self.seed).permutation(len(val_dataset))
             self.val_dataset = Subset(val_dataset, shuffled_indices)
+            if self.dataset_pickle_dir is not None:
+                os.makedirs(self.dataset_pickle_dir, exist_ok=True)
+                train_dataset_path = f"{self.dataset_pickle_dir}/train_dataset.pkl"
+                val_dataset_path = f"{self.dataset_pickle_dir}/val_dataset.pkl"
+                # For safety, these pickled datasets cannot be overwritten.
+                # See: https://github.com/openclimatefix/pvnet/pull/445
+                for path in [train_dataset_path, val_dataset_path]:
+                    if os.path.exists(path):
+                        raise FileExistsError(
+                            f"The pickled dataset path '{path}' already exists. Make sure that "
+                            "this can be safely deleted (i.e. not currently being used by any "
+                            "training run) and delete it manually. Else change the "
+                            "`dataset_pickle_dir` to a different directory."
+                        )
+                self.train_dataset.presave_pickle(train_dataset_path)
+                self.train_dataset.presave_pickle(val_dataset_path)
+    def teardown(self, stage: str | None = None) -> None:
+        """Clean up the pickled datasets"""
+        if self.dataset_pickle_dir is not None:
+            for filename in ["val_dataset.pkl", "train_dataset.pkl"]:
+                filepath = f"{self.dataset_pickle_dir}/{filename}"
+                if os.path.exists(filepath):
+                    os.remove(filepath)
     def _get_streamed_samples_dataset(
         self,

{pvnet-5.0.7 → pvnet-5.0.8}/pvnet/training/lightning_module.py RENAMED Viewed

@@ -223,7 +223,9 @@ class PVNetLightningModule(pl.LightningModule):
             plot_name = f"val_forecast_samples/sample_set_{plot_num}"
-            self.logger.experiment.log({plot_name: wandb.Image(fig)})
+            # Disabled for testing or using no logger
+            if self.logger:
+                self.logger.experiment.log({plot_name: wandb.Image(fig)})
             plt.close(fig)

{pvnet-5.0.7 → pvnet-5.0.8}/pyproject.toml RENAMED Viewed

@@ -9,10 +9,10 @@ authors = [{name="Peter Dudfield", email="info@openclimatefix.org"}]
 dynamic = ["version"]
 license={file="LICENCE"}
 readme = {file="README.md", content-type="text/markdown"}
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 dependencies = [
-    "ocf-data-sampler>=0.2.32",
+    "ocf-data-sampler>=0.5.20",
     "numpy",
     "pandas",
     "matplotlib",

pvnet-5.0.8/tests/test_end2end.py ADDED Viewed

@@ -0,0 +1,32 @@
+import lightning
+from pvnet.data import  UKRegionalStreamedDataModule
+from pvnet.training.lightning_module import PVNetLightningModule
+from pvnet.optimizers import EmbAdamWReduceLROnPlateau
+def test_model_trainer_fit(session_tmp_path, uk_data_config_path, late_fusion_model):
+    """Test end-to-end training."""
+    datamodule = UKRegionalStreamedDataModule(
+        configuration=uk_data_config_path,
+        batch_size=2,
+        num_workers=2,
+        prefetch_factor=None,
+        dataset_pickle_dir=f"{session_tmp_path}/dataset_pickles"
+    )
+    ligtning_model = PVNetLightningModule(
+        model=late_fusion_model,
+        optimizer=EmbAdamWReduceLROnPlateau(),
+    )
+    # Train the model for two batches
+    trainer = lightning.Trainer(
+        max_epochs=2,
+        limit_val_batches=2,
+        limit_train_batches=2,
+        accelerator="cpu",
+        logger=False,
+        enable_checkpointing=False,
+    )
+    trainer.fit(model=ligtning_model, datamodule=datamodule)

pvnet-5.0.7/tests/test_end2end.py DELETED Viewed

@@ -1,21 +0,0 @@
-import lightning
-from pvnet.training.lightning_module import PVNetLightningModule
-from pvnet.optimizers import EmbAdamWReduceLROnPlateau
-def test_model_trainer_fit(late_fusion_model, uk_streamed_datamodule):
-    """Test end-to-end training."""
-    ligtning_model = PVNetLightningModule(
-        model=late_fusion_model,
-        optimizer=EmbAdamWReduceLROnPlateau(),
-    )
-    # Get a sample batch for testing
-    batch = next(iter(uk_streamed_datamodule.train_dataloader()))
-    # Run a forward pass to verify the training module works with the data
-    y = late_fusion_model(batch)
-    # Train the model for one batch
-    trainer = lightning.Trainer(fast_dev_run=True, accelerator="cpu")
-    trainer.fit(model=ligtning_model, datamodule=uk_streamed_datamodule)