PVNet 5.0.7__tar.gz → 5.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {pvnet-5.0.7 → pvnet-5.0.8}/PKG-INFO +3 -3
  2. {pvnet-5.0.7 → pvnet-5.0.8}/PVNet.egg-info/PKG-INFO +3 -3
  3. {pvnet-5.0.7 → pvnet-5.0.8}/PVNet.egg-info/requires.txt +1 -1
  4. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/data/base_datamodule.py +34 -1
  5. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/training/lightning_module.py +3 -1
  6. {pvnet-5.0.7 → pvnet-5.0.8}/pyproject.toml +2 -2
  7. pvnet-5.0.8/tests/test_end2end.py +32 -0
  8. pvnet-5.0.7/tests/test_end2end.py +0 -21
  9. {pvnet-5.0.7 → pvnet-5.0.8}/LICENSE +0 -0
  10. {pvnet-5.0.7 → pvnet-5.0.8}/PVNet.egg-info/SOURCES.txt +0 -0
  11. {pvnet-5.0.7 → pvnet-5.0.8}/PVNet.egg-info/dependency_links.txt +0 -0
  12. {pvnet-5.0.7 → pvnet-5.0.8}/PVNet.egg-info/top_level.txt +0 -0
  13. {pvnet-5.0.7 → pvnet-5.0.8}/README.md +0 -0
  14. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/__init__.py +0 -0
  15. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/data/__init__.py +0 -0
  16. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/data/site_datamodule.py +0 -0
  17. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/data/uk_regional_datamodule.py +0 -0
  18. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/load_model.py +0 -0
  19. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/__init__.py +0 -0
  20. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/base_model.py +0 -0
  21. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/ensemble.py +0 -0
  22. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/__init__.py +0 -0
  23. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/basic_blocks.py +0 -0
  24. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/encoders/__init__.py +0 -0
  25. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/encoders/basic_blocks.py +0 -0
  26. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/encoders/encoders3d.py +0 -0
  27. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/late_fusion.py +0 -0
  28. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/linear_networks/__init__.py +0 -0
  29. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/linear_networks/basic_blocks.py +0 -0
  30. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/linear_networks/networks.py +0 -0
  31. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/site_encoders/__init__.py +0 -0
  32. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/site_encoders/basic_blocks.py +0 -0
  33. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/models/late_fusion/site_encoders/encoders.py +0 -0
  34. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/optimizers.py +0 -0
  35. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/training/__init__.py +0 -0
  36. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/training/plots.py +0 -0
  37. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/training/train.py +0 -0
  38. {pvnet-5.0.7 → pvnet-5.0.8}/pvnet/utils.py +0 -0
  39. {pvnet-5.0.7 → pvnet-5.0.8}/setup.cfg +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PVNet
3
- Version: 5.0.7
3
+ Version: 5.0.8
4
4
  Summary: PVNet
5
5
  Author-email: Peter Dudfield <info@openclimatefix.org>
6
- Requires-Python: >=3.10
6
+ Requires-Python: >=3.11
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: ocf-data-sampler>=0.2.32
9
+ Requires-Dist: ocf-data-sampler>=0.5.20
10
10
  Requires-Dist: numpy
11
11
  Requires-Dist: pandas
12
12
  Requires-Dist: matplotlib
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PVNet
3
- Version: 5.0.7
3
+ Version: 5.0.8
4
4
  Summary: PVNet
5
5
  Author-email: Peter Dudfield <info@openclimatefix.org>
6
- Requires-Python: >=3.10
6
+ Requires-Python: >=3.11
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: ocf-data-sampler>=0.2.32
9
+ Requires-Dist: ocf-data-sampler>=0.5.20
10
10
  Requires-Dist: numpy
11
11
  Requires-Dist: pandas
12
12
  Requires-Dist: matplotlib
@@ -1,4 +1,4 @@
1
- ocf-data-sampler>=0.2.32
1
+ ocf-data-sampler>=0.5.20
2
2
  numpy
3
3
  pandas
4
4
  matplotlib
@@ -1,5 +1,6 @@
1
1
  """ Data module for pytorch lightning """
2
2
 
3
+ import os
3
4
  from glob import glob
4
5
 
5
6
  import numpy as np
@@ -77,6 +78,7 @@ class BasePresavedDataModule(LightningDataModule):
77
78
  worker_init_fn=None,
78
79
  prefetch_factor=prefetch_factor,
79
80
  persistent_workers=persistent_workers,
81
+ multiprocessing_context="spawn" if num_workers>0 else None,
80
82
  )
81
83
 
82
84
  def _get_premade_samples_dataset(self, subdir: str) -> Dataset:
@@ -107,7 +109,7 @@ class BaseStreamedDataModule(LightningDataModule):
107
109
  train_period: list[str | None] = [None, None],
108
110
  val_period: list[str | None] = [None, None],
109
111
  seed: int | None = None,
110
-
112
+ dataset_pickle_dir: str | None = None,
111
113
  ):
112
114
  """Base Datamodule for streaming samples.
113
115
 
@@ -124,6 +126,8 @@ class BaseStreamedDataModule(LightningDataModule):
124
126
  train_period: Date range filter for train dataloader.
125
127
  val_period: Date range filter for val dataloader.
126
128
  seed: Random seed used in shuffling datasets.
129
+ dataset_pickle_dir: Directory in which the val and train set will be presaved as
130
+ pickle objects. Setting this speeds up instantiation of multiple workers a lot.
127
131
  """
128
132
  super().__init__()
129
133
 
@@ -131,6 +135,7 @@ class BaseStreamedDataModule(LightningDataModule):
131
135
  self.train_period = train_period
132
136
  self.val_period = val_period
133
137
  self.seed = seed
138
+ self.dataset_pickle_dir = dataset_pickle_dir
134
139
 
135
140
  self._common_dataloader_kwargs = dict(
136
141
  batch_size=batch_size,
@@ -143,6 +148,7 @@ class BaseStreamedDataModule(LightningDataModule):
143
148
  worker_init_fn=None,
144
149
  prefetch_factor=prefetch_factor,
145
150
  persistent_workers=persistent_workers,
151
+ multiprocessing_context="spawn" if num_workers>0 else None,
146
152
  )
147
153
 
148
154
  def setup(self, stage: str | None = None):
@@ -160,6 +166,33 @@ class BaseStreamedDataModule(LightningDataModule):
160
166
 
161
167
  shuffled_indices = np.random.default_rng(seed=self.seed).permutation(len(val_dataset))
162
168
  self.val_dataset = Subset(val_dataset, shuffled_indices)
169
+
170
+ if self.dataset_pickle_dir is not None:
171
+ os.makedirs(self.dataset_pickle_dir, exist_ok=True)
172
+ train_dataset_path = f"{self.dataset_pickle_dir}/train_dataset.pkl"
173
+ val_dataset_path = f"{self.dataset_pickle_dir}/val_dataset.pkl"
174
+
175
+ # For safety, these pickled datasets cannot be overwritten.
176
+ # See: https://github.com/openclimatefix/pvnet/pull/445
177
+ for path in [train_dataset_path, val_dataset_path]:
178
+ if os.path.exists(path):
179
+ raise FileExistsError(
180
+ f"The pickled dataset path '{path}' already exists. Make sure that "
181
+ "this can be safely deleted (i.e. not currently being used by any "
182
+ "training run) and delete it manually. Else change the "
183
+ "`dataset_pickle_dir` to a different directory."
184
+ )
185
+
186
+ self.train_dataset.presave_pickle(train_dataset_path)
187
+ self.train_dataset.presave_pickle(val_dataset_path)
188
+
189
+ def teardown(self, stage: str | None = None) -> None:
190
+ """Clean up the pickled datasets"""
191
+ if self.dataset_pickle_dir is not None:
192
+ for filename in ["val_dataset.pkl", "train_dataset.pkl"]:
193
+ filepath = f"{self.dataset_pickle_dir}/{filename}"
194
+ if os.path.exists(filepath):
195
+ os.remove(filepath)
163
196
 
164
197
  def _get_streamed_samples_dataset(
165
198
  self,
@@ -223,7 +223,9 @@ class PVNetLightningModule(pl.LightningModule):
223
223
 
224
224
  plot_name = f"val_forecast_samples/sample_set_{plot_num}"
225
225
 
226
- self.logger.experiment.log({plot_name: wandb.Image(fig)})
226
+ # Disabled for testing or using no logger
227
+ if self.logger:
228
+ self.logger.experiment.log({plot_name: wandb.Image(fig)})
227
229
 
228
230
  plt.close(fig)
229
231
 
@@ -9,10 +9,10 @@ authors = [{name="Peter Dudfield", email="info@openclimatefix.org"}]
9
9
  dynamic = ["version"]
10
10
  license={file="LICENCE"}
11
11
  readme = {file="README.md", content-type="text/markdown"}
12
- requires-python = ">=3.10"
12
+ requires-python = ">=3.11"
13
13
 
14
14
  dependencies = [
15
- "ocf-data-sampler>=0.2.32",
15
+ "ocf-data-sampler>=0.5.20",
16
16
  "numpy",
17
17
  "pandas",
18
18
  "matplotlib",
@@ -0,0 +1,32 @@
1
+ import lightning
2
+ from pvnet.data import UKRegionalStreamedDataModule
3
+ from pvnet.training.lightning_module import PVNetLightningModule
4
+ from pvnet.optimizers import EmbAdamWReduceLROnPlateau
5
+
6
+
7
+ def test_model_trainer_fit(session_tmp_path, uk_data_config_path, late_fusion_model):
8
+ """Test end-to-end training."""
9
+
10
+ datamodule = UKRegionalStreamedDataModule(
11
+ configuration=uk_data_config_path,
12
+ batch_size=2,
13
+ num_workers=2,
14
+ prefetch_factor=None,
15
+ dataset_pickle_dir=f"{session_tmp_path}/dataset_pickles"
16
+ )
17
+
18
+ ligtning_model = PVNetLightningModule(
19
+ model=late_fusion_model,
20
+ optimizer=EmbAdamWReduceLROnPlateau(),
21
+ )
22
+
23
+ # Train the model for two batches
24
+ trainer = lightning.Trainer(
25
+ max_epochs=2,
26
+ limit_val_batches=2,
27
+ limit_train_batches=2,
28
+ accelerator="cpu",
29
+ logger=False,
30
+ enable_checkpointing=False,
31
+ )
32
+ trainer.fit(model=ligtning_model, datamodule=datamodule)
@@ -1,21 +0,0 @@
1
- import lightning
2
- from pvnet.training.lightning_module import PVNetLightningModule
3
- from pvnet.optimizers import EmbAdamWReduceLROnPlateau
4
-
5
- def test_model_trainer_fit(late_fusion_model, uk_streamed_datamodule):
6
- """Test end-to-end training."""
7
-
8
- ligtning_model = PVNetLightningModule(
9
- model=late_fusion_model,
10
- optimizer=EmbAdamWReduceLROnPlateau(),
11
- )
12
-
13
- # Get a sample batch for testing
14
- batch = next(iter(uk_streamed_datamodule.train_dataloader()))
15
-
16
- # Run a forward pass to verify the training module works with the data
17
- y = late_fusion_model(batch)
18
-
19
- # Train the model for one batch
20
- trainer = lightning.Trainer(fast_dev_run=True, accelerator="cpu")
21
- trainer.fit(model=ligtning_model, datamodule=uk_streamed_datamodule)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes