PVNet 4.1.30__tar.gz → 5.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {pvnet-4.1.30 → pvnet-5.0.0}/PKG-INFO +26 -32
  2. {pvnet-4.1.30 → pvnet-5.0.0}/PVNet.egg-info/PKG-INFO +26 -32
  3. pvnet-5.0.0/PVNet.egg-info/SOURCES.txt +36 -0
  4. {pvnet-4.1.30 → pvnet-5.0.0}/PVNet.egg-info/requires.txt +1 -2
  5. {pvnet-4.1.30 → pvnet-5.0.0}/README.md +24 -29
  6. {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/data/base_datamodule.py +5 -9
  7. {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/data/site_datamodule.py +1 -2
  8. {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/data/uk_regional_datamodule.py +1 -2
  9. {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/load_model.py +33 -19
  10. pvnet-5.0.0/pvnet/models/__init__.py +4 -0
  11. pvnet-5.0.0/pvnet/models/base_model.py +511 -0
  12. {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/models/ensemble.py +4 -6
  13. pvnet-5.0.0/pvnet/models/late_fusion/__init__.py +1 -0
  14. {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/basic_blocks.py +2 -2
  15. {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/encoders/basic_blocks.py +3 -2
  16. {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/encoders/encoders3d.py +12 -12
  17. pvnet-4.1.30/pvnet/models/multimodal/multimodal.py → pvnet-5.0.0/pvnet/models/late_fusion/late_fusion.py +43 -68
  18. {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/linear_networks/basic_blocks.py +4 -4
  19. {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/linear_networks/networks.py +10 -9
  20. {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/site_encoders/basic_blocks.py +2 -1
  21. {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/site_encoders/encoders.py +21 -18
  22. {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/optimizers.py +66 -46
  23. pvnet-5.0.0/pvnet/training/__init__.py +2 -0
  24. pvnet-5.0.0/pvnet/training/lightning_module.py +347 -0
  25. pvnet-5.0.0/pvnet/training/plots.py +86 -0
  26. pvnet-5.0.0/pvnet/training/train.py +144 -0
  27. pvnet-5.0.0/pvnet/utils.py +86 -0
  28. {pvnet-4.1.30 → pvnet-5.0.0}/pyproject.toml +3 -12
  29. pvnet-5.0.0/tests/test_end2end.py +21 -0
  30. pvnet-4.1.30/PVNet.egg-info/SOURCES.txt +0 -35
  31. pvnet-4.1.30/pvnet/models/__init__.py +0 -1
  32. pvnet-4.1.30/pvnet/models/base_model.py +0 -959
  33. pvnet-4.1.30/pvnet/models/multimodal/__init__.py +0 -1
  34. pvnet-4.1.30/pvnet/models/multimodal/unimodal_teacher.py +0 -447
  35. pvnet-4.1.30/pvnet/models/utils.py +0 -121
  36. pvnet-4.1.30/pvnet/training.py +0 -140
  37. pvnet-4.1.30/pvnet/utils.py +0 -234
  38. pvnet-4.1.30/tests/test_end2end.py +0 -13
  39. {pvnet-4.1.30 → pvnet-5.0.0}/LICENSE +0 -0
  40. {pvnet-4.1.30 → pvnet-5.0.0}/PVNet.egg-info/dependency_links.txt +0 -0
  41. {pvnet-4.1.30 → pvnet-5.0.0}/PVNet.egg-info/top_level.txt +0 -0
  42. {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/__init__.py +0 -0
  43. {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/data/__init__.py +0 -0
  44. {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/encoders/__init__.py +0 -0
  45. {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/linear_networks/__init__.py +0 -0
  46. {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/site_encoders/__init__.py +0 -0
  47. {pvnet-4.1.30 → pvnet-5.0.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PVNet
3
- Version: 4.1.30
3
+ Version: 5.0.0
4
4
  Summary: PVNet
5
5
  Author-email: Peter Dudfield <info@openclimatefix.org>
6
6
  Requires-Python: >=3.10
@@ -15,8 +15,6 @@ Requires-Dist: h5netcdf
15
15
  Requires-Dist: torch>=2.0.0
16
16
  Requires-Dist: lightning
17
17
  Requires-Dist: torchvision
18
- Requires-Dist: pytest
19
- Requires-Dist: pytest-cov
20
18
  Requires-Dist: typer
21
19
  Requires-Dist: sqlalchemy
22
20
  Requires-Dist: fsspec[s3]
@@ -27,9 +25,10 @@ Requires-Dist: omegaconf
27
25
  Requires-Dist: hydra-core
28
26
  Requires-Dist: rich
29
27
  Requires-Dist: einops
28
+ Requires-Dist: safetensors
30
29
  Dynamic: license-file
31
30
 
32
- # PVNet 2.1
31
+ # PVNet
33
32
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
34
33
  [![All Contributors](https://img.shields.io/badge/all_contributors-19-orange.svg?style=flat-square)](#contributors-)
35
34
  <!-- ALL-CONTRIBUTORS-BADGE:END -->
@@ -40,39 +39,34 @@ Dynamic: license-file
40
39
 
41
40
  This project is used for training PVNet and running PVNet on live data.
42
41
 
43
- PVNet2 is a multi-modal late-fusion model that largely inherits the same architecture from
44
- [PVNet1.0](https://github.com/openclimatefix/predict_pv_yield). The NWP (Numerical Weather Prediction) and
45
- satellite data are sent through some neural network which encodes them down to
46
- 1D intermediate representations. These are concatenated together with the GSP (Grid Supply Point)
47
- output history, the calculated solar coordinates (azimuth and elevation) and the
48
- GSP ID which has been put through an embedding layer. This 1D concatenated
49
- feature vector is put through an output network which outputs predictions of the
50
- future GSP yield. National forecasts are made by adding all the GSP forecasts
51
- together.
42
+ PVNet is a multi-modal late-fusion model for predicting renewable energy generation from weather
43
+ data. The NWP (Numerical Weather Prediction) and satellite data are sent through a neural network
44
+ which encodes them down to 1D intermediate representations. These are concatenated together with
45
+ recent generation, the calculated solar coordinates (azimuth and elevation) and the location ID
46
+ which has been put through an embedding layer. This 1D concatenated feature vector is put through
47
+ an output network which outputs predictions of the future energy yield.
52
48
 
53
49
 
54
50
  ## Experiments
55
51
 
56
- Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning workshop at ICLR 2024 and can be viewed [here](https://www.climatechange.ai/papers/iclr2024/46).
57
-
58
- Some slightly more structured notes on deliberate experiments we have performed with PVNet are [here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
59
-
60
- Some very rough, early working notes on this model are
61
- [here](https://docs.google.com/document/d/1fbkfkBzp16WbnCg7RDuRDvgzInA6XQu3xh4NCjV-WDA). These are now somewhat out of date.
52
+ Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning
53
+ workshop at ICLR 2024 and can be viewed [here](https://www.climatechange.ai/papers/iclr2024/46).
62
54
 
55
+ Some more structured notes on experiments we have performed with PVNet are
56
+ [here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
63
57
 
64
58
 
65
59
  ## Setup / Installation
66
60
 
67
61
  ```bash
68
- git clone https://github.com/openclimatefix/PVNet.git
62
+ git clone git@github.com:openclimatefix/PVNet.git
69
63
  cd PVNet
70
64
  pip install .
71
65
  ```
72
66
 
73
67
  The commit history is extensive. To save download time, use a depth of 1:
74
68
  ```bash
75
- git clone --depth 1 https://github.com/openclimatefix/PVNet.git
69
+ git clone --depth 1 git@github.com:openclimatefix/PVNet.git
76
70
  ```
77
71
  This means only the latest commit and its associated files will be downloaded.
78
72
 
@@ -130,7 +124,7 @@ here: https://huggingface.co/datasets/openclimatefix/uk_pv
130
124
 
131
125
  Outside the PVNet repo, clone the ocf-data-sampler repo and exit the conda env created for PVNet: https://github.com/openclimatefix/ocf-data-sampler
132
126
  ```bash
133
- git clone https://github.com/openclimatefix/ocf-data-sampler.git
127
+ git clone git@github.com/openclimatefix/ocf-data-sampler.git
134
128
  conda create -n ocf-data-sampler python=3.11
135
129
  ```
136
130
 
@@ -146,7 +140,8 @@ Then exit this environment, and enter back into the pvnet conda environment and
146
140
  pip install -e <PATH-TO-ocf-data-sampler-REPO>
147
141
  ```
148
142
 
149
- If you install the local version of `ocf-data-sampler` that is more recent than the version specified in PVNet, you might receive a warning. However, it should still function correctly.
143
+ If you install the local version of `ocf-data-sampler` that is more recent than the version
144
+ specified in `PVNet` it is not guarenteed to function properly with this library.
150
145
 
151
146
  ## Pre-saving samples of data for training/validation of PVNet
152
147
 
@@ -205,14 +200,14 @@ Files stored in multiple locations can be added as a list. For example, in the `
205
200
 
206
201
  ```yaml
207
202
  satellite:
208
- satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
203
+ zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
209
204
  ```
210
205
 
211
206
  Or to satellite data hosted by Google:
212
207
 
213
208
  ```yaml
214
209
  satellite:
215
- satellite_zarr_paths:
210
+ zarr_path:
216
211
  - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr"
217
212
  - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr"
218
213
  ```
@@ -227,13 +222,13 @@ files. The configs stored in `PVNet/configs.example` should work with samples cr
227
222
 
228
223
  Make sure to update the following config files before training your model:
229
224
 
230
- 1. In `configs/datamodule/local_presaved_samples.yaml`:
225
+ 1. In `configs/datamodule/presaved_samples.yaml`:
231
226
  - update `sample_dir` to point to the directory you stored your samples in during sample creation
232
- 2. In `configs/model/local_multimodal.yaml`:
227
+ 2. In `configs/model/late_fusion.yaml`:
233
228
  - update the list of encoders to reflect the data sources you are using. If you are using different NWP sources, the encoders for these should follow the same structure with two important updates:
234
229
  - `in_channels`: number of variables your NWP source supplies
235
230
  - `image_size_pixels`: spatial crop of your NWP data. It depends on the spatial resolution of your NWP; should match `image_size_pixels_height` and/or `image_size_pixels_width` in `datamodule/configuration/site_example_configuration.yaml` for the NWP, unless transformations such as coarsening was applied (e. g. as for ECMWF data)
236
- 3. In `configs/local_trainer.yaml`:
231
+ 3. In `configs/trainer/default.yaml`:
237
232
  - set `accelerator: 0` if running on a system without a supported GPU
238
233
 
239
234
  If creating copies of the config files instead of modifying existing ones, update `defaults` in the main `./configs/config.yaml` file to use
@@ -241,11 +236,10 @@ your customised config files:
241
236
 
242
237
  ```yaml
243
238
  defaults:
244
- - trainer: local_trainer.yaml
245
- - model: local_multimodal.yaml
246
- - datamodule: local_presaved_samples.yaml
239
+ - trainer: default.yaml
240
+ - model: late_fusion.yaml
241
+ - datamodule: presaved_samples.yaml
247
242
  - callbacks: null
248
- - logger: csv.yaml
249
243
  - experiment: null
250
244
  - hparams_search: null
251
245
  - hydra: default.yaml
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PVNet
3
- Version: 4.1.30
3
+ Version: 5.0.0
4
4
  Summary: PVNet
5
5
  Author-email: Peter Dudfield <info@openclimatefix.org>
6
6
  Requires-Python: >=3.10
@@ -15,8 +15,6 @@ Requires-Dist: h5netcdf
15
15
  Requires-Dist: torch>=2.0.0
16
16
  Requires-Dist: lightning
17
17
  Requires-Dist: torchvision
18
- Requires-Dist: pytest
19
- Requires-Dist: pytest-cov
20
18
  Requires-Dist: typer
21
19
  Requires-Dist: sqlalchemy
22
20
  Requires-Dist: fsspec[s3]
@@ -27,9 +25,10 @@ Requires-Dist: omegaconf
27
25
  Requires-Dist: hydra-core
28
26
  Requires-Dist: rich
29
27
  Requires-Dist: einops
28
+ Requires-Dist: safetensors
30
29
  Dynamic: license-file
31
30
 
32
- # PVNet 2.1
31
+ # PVNet
33
32
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
34
33
  [![All Contributors](https://img.shields.io/badge/all_contributors-19-orange.svg?style=flat-square)](#contributors-)
35
34
  <!-- ALL-CONTRIBUTORS-BADGE:END -->
@@ -40,39 +39,34 @@ Dynamic: license-file
40
39
 
41
40
  This project is used for training PVNet and running PVNet on live data.
42
41
 
43
- PVNet2 is a multi-modal late-fusion model that largely inherits the same architecture from
44
- [PVNet1.0](https://github.com/openclimatefix/predict_pv_yield). The NWP (Numerical Weather Prediction) and
45
- satellite data are sent through some neural network which encodes them down to
46
- 1D intermediate representations. These are concatenated together with the GSP (Grid Supply Point)
47
- output history, the calculated solar coordinates (azimuth and elevation) and the
48
- GSP ID which has been put through an embedding layer. This 1D concatenated
49
- feature vector is put through an output network which outputs predictions of the
50
- future GSP yield. National forecasts are made by adding all the GSP forecasts
51
- together.
42
+ PVNet is a multi-modal late-fusion model for predicting renewable energy generation from weather
43
+ data. The NWP (Numerical Weather Prediction) and satellite data are sent through a neural network
44
+ which encodes them down to 1D intermediate representations. These are concatenated together with
45
+ recent generation, the calculated solar coordinates (azimuth and elevation) and the location ID
46
+ which has been put through an embedding layer. This 1D concatenated feature vector is put through
47
+ an output network which outputs predictions of the future energy yield.
52
48
 
53
49
 
54
50
  ## Experiments
55
51
 
56
- Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning workshop at ICLR 2024 and can be viewed [here](https://www.climatechange.ai/papers/iclr2024/46).
57
-
58
- Some slightly more structured notes on deliberate experiments we have performed with PVNet are [here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
59
-
60
- Some very rough, early working notes on this model are
61
- [here](https://docs.google.com/document/d/1fbkfkBzp16WbnCg7RDuRDvgzInA6XQu3xh4NCjV-WDA). These are now somewhat out of date.
52
+ Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning
53
+ workshop at ICLR 2024 and can be viewed [here](https://www.climatechange.ai/papers/iclr2024/46).
62
54
 
55
+ Some more structured notes on experiments we have performed with PVNet are
56
+ [here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
63
57
 
64
58
 
65
59
  ## Setup / Installation
66
60
 
67
61
  ```bash
68
- git clone https://github.com/openclimatefix/PVNet.git
62
+ git clone git@github.com:openclimatefix/PVNet.git
69
63
  cd PVNet
70
64
  pip install .
71
65
  ```
72
66
 
73
67
  The commit history is extensive. To save download time, use a depth of 1:
74
68
  ```bash
75
- git clone --depth 1 https://github.com/openclimatefix/PVNet.git
69
+ git clone --depth 1 git@github.com:openclimatefix/PVNet.git
76
70
  ```
77
71
  This means only the latest commit and its associated files will be downloaded.
78
72
 
@@ -130,7 +124,7 @@ here: https://huggingface.co/datasets/openclimatefix/uk_pv
130
124
 
131
125
  Outside the PVNet repo, clone the ocf-data-sampler repo and exit the conda env created for PVNet: https://github.com/openclimatefix/ocf-data-sampler
132
126
  ```bash
133
- git clone https://github.com/openclimatefix/ocf-data-sampler.git
127
+ git clone git@github.com/openclimatefix/ocf-data-sampler.git
134
128
  conda create -n ocf-data-sampler python=3.11
135
129
  ```
136
130
 
@@ -146,7 +140,8 @@ Then exit this environment, and enter back into the pvnet conda environment and
146
140
  pip install -e <PATH-TO-ocf-data-sampler-REPO>
147
141
  ```
148
142
 
149
- If you install the local version of `ocf-data-sampler` that is more recent than the version specified in PVNet, you might receive a warning. However, it should still function correctly.
143
+ If you install the local version of `ocf-data-sampler` that is more recent than the version
144
+ specified in `PVNet` it is not guarenteed to function properly with this library.
150
145
 
151
146
  ## Pre-saving samples of data for training/validation of PVNet
152
147
 
@@ -205,14 +200,14 @@ Files stored in multiple locations can be added as a list. For example, in the `
205
200
 
206
201
  ```yaml
207
202
  satellite:
208
- satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
203
+ zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
209
204
  ```
210
205
 
211
206
  Or to satellite data hosted by Google:
212
207
 
213
208
  ```yaml
214
209
  satellite:
215
- satellite_zarr_paths:
210
+ zarr_path:
216
211
  - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr"
217
212
  - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr"
218
213
  ```
@@ -227,13 +222,13 @@ files. The configs stored in `PVNet/configs.example` should work with samples cr
227
222
 
228
223
  Make sure to update the following config files before training your model:
229
224
 
230
- 1. In `configs/datamodule/local_presaved_samples.yaml`:
225
+ 1. In `configs/datamodule/presaved_samples.yaml`:
231
226
  - update `sample_dir` to point to the directory you stored your samples in during sample creation
232
- 2. In `configs/model/local_multimodal.yaml`:
227
+ 2. In `configs/model/late_fusion.yaml`:
233
228
  - update the list of encoders to reflect the data sources you are using. If you are using different NWP sources, the encoders for these should follow the same structure with two important updates:
234
229
  - `in_channels`: number of variables your NWP source supplies
235
230
  - `image_size_pixels`: spatial crop of your NWP data. It depends on the spatial resolution of your NWP; should match `image_size_pixels_height` and/or `image_size_pixels_width` in `datamodule/configuration/site_example_configuration.yaml` for the NWP, unless transformations such as coarsening was applied (e. g. as for ECMWF data)
236
- 3. In `configs/local_trainer.yaml`:
231
+ 3. In `configs/trainer/default.yaml`:
237
232
  - set `accelerator: 0` if running on a system without a supported GPU
238
233
 
239
234
  If creating copies of the config files instead of modifying existing ones, update `defaults` in the main `./configs/config.yaml` file to use
@@ -241,11 +236,10 @@ your customised config files:
241
236
 
242
237
  ```yaml
243
238
  defaults:
244
- - trainer: local_trainer.yaml
245
- - model: local_multimodal.yaml
246
- - datamodule: local_presaved_samples.yaml
239
+ - trainer: default.yaml
240
+ - model: late_fusion.yaml
241
+ - datamodule: presaved_samples.yaml
247
242
  - callbacks: null
248
- - logger: csv.yaml
249
243
  - experiment: null
250
244
  - hparams_search: null
251
245
  - hydra: default.yaml
@@ -0,0 +1,36 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ PVNet.egg-info/PKG-INFO
5
+ PVNet.egg-info/SOURCES.txt
6
+ PVNet.egg-info/dependency_links.txt
7
+ PVNet.egg-info/requires.txt
8
+ PVNet.egg-info/top_level.txt
9
+ pvnet/__init__.py
10
+ pvnet/load_model.py
11
+ pvnet/optimizers.py
12
+ pvnet/utils.py
13
+ pvnet/data/__init__.py
14
+ pvnet/data/base_datamodule.py
15
+ pvnet/data/site_datamodule.py
16
+ pvnet/data/uk_regional_datamodule.py
17
+ pvnet/models/__init__.py
18
+ pvnet/models/base_model.py
19
+ pvnet/models/ensemble.py
20
+ pvnet/models/late_fusion/__init__.py
21
+ pvnet/models/late_fusion/basic_blocks.py
22
+ pvnet/models/late_fusion/late_fusion.py
23
+ pvnet/models/late_fusion/encoders/__init__.py
24
+ pvnet/models/late_fusion/encoders/basic_blocks.py
25
+ pvnet/models/late_fusion/encoders/encoders3d.py
26
+ pvnet/models/late_fusion/linear_networks/__init__.py
27
+ pvnet/models/late_fusion/linear_networks/basic_blocks.py
28
+ pvnet/models/late_fusion/linear_networks/networks.py
29
+ pvnet/models/late_fusion/site_encoders/__init__.py
30
+ pvnet/models/late_fusion/site_encoders/basic_blocks.py
31
+ pvnet/models/late_fusion/site_encoders/encoders.py
32
+ pvnet/training/__init__.py
33
+ pvnet/training/lightning_module.py
34
+ pvnet/training/plots.py
35
+ pvnet/training/train.py
36
+ tests/test_end2end.py
@@ -7,8 +7,6 @@ h5netcdf
7
7
  torch>=2.0.0
8
8
  lightning
9
9
  torchvision
10
- pytest
11
- pytest-cov
12
10
  typer
13
11
  sqlalchemy
14
12
  fsspec[s3]
@@ -19,3 +17,4 @@ omegaconf
19
17
  hydra-core
20
18
  rich
21
19
  einops
20
+ safetensors
@@ -1,4 +1,4 @@
1
- # PVNet 2.1
1
+ # PVNet
2
2
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
3
3
  [![All Contributors](https://img.shields.io/badge/all_contributors-19-orange.svg?style=flat-square)](#contributors-)
4
4
  <!-- ALL-CONTRIBUTORS-BADGE:END -->
@@ -9,39 +9,34 @@
9
9
 
10
10
  This project is used for training PVNet and running PVNet on live data.
11
11
 
12
- PVNet2 is a multi-modal late-fusion model that largely inherits the same architecture from
13
- [PVNet1.0](https://github.com/openclimatefix/predict_pv_yield). The NWP (Numerical Weather Prediction) and
14
- satellite data are sent through some neural network which encodes them down to
15
- 1D intermediate representations. These are concatenated together with the GSP (Grid Supply Point)
16
- output history, the calculated solar coordinates (azimuth and elevation) and the
17
- GSP ID which has been put through an embedding layer. This 1D concatenated
18
- feature vector is put through an output network which outputs predictions of the
19
- future GSP yield. National forecasts are made by adding all the GSP forecasts
20
- together.
12
+ PVNet is a multi-modal late-fusion model for predicting renewable energy generation from weather
13
+ data. The NWP (Numerical Weather Prediction) and satellite data are sent through a neural network
14
+ which encodes them down to 1D intermediate representations. These are concatenated together with
15
+ recent generation, the calculated solar coordinates (azimuth and elevation) and the location ID
16
+ which has been put through an embedding layer. This 1D concatenated feature vector is put through
17
+ an output network which outputs predictions of the future energy yield.
21
18
 
22
19
 
23
20
  ## Experiments
24
21
 
25
- Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning workshop at ICLR 2024 and can be viewed [here](https://www.climatechange.ai/papers/iclr2024/46).
26
-
27
- Some slightly more structured notes on deliberate experiments we have performed with PVNet are [here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
28
-
29
- Some very rough, early working notes on this model are
30
- [here](https://docs.google.com/document/d/1fbkfkBzp16WbnCg7RDuRDvgzInA6XQu3xh4NCjV-WDA). These are now somewhat out of date.
22
+ Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning
23
+ workshop at ICLR 2024 and can be viewed [here](https://www.climatechange.ai/papers/iclr2024/46).
31
24
 
25
+ Some more structured notes on experiments we have performed with PVNet are
26
+ [here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
32
27
 
33
28
 
34
29
  ## Setup / Installation
35
30
 
36
31
  ```bash
37
- git clone https://github.com/openclimatefix/PVNet.git
32
+ git clone git@github.com:openclimatefix/PVNet.git
38
33
  cd PVNet
39
34
  pip install .
40
35
  ```
41
36
 
42
37
  The commit history is extensive. To save download time, use a depth of 1:
43
38
  ```bash
44
- git clone --depth 1 https://github.com/openclimatefix/PVNet.git
39
+ git clone --depth 1 git@github.com:openclimatefix/PVNet.git
45
40
  ```
46
41
  This means only the latest commit and its associated files will be downloaded.
47
42
 
@@ -99,7 +94,7 @@ here: https://huggingface.co/datasets/openclimatefix/uk_pv
99
94
 
100
95
  Outside the PVNet repo, clone the ocf-data-sampler repo and exit the conda env created for PVNet: https://github.com/openclimatefix/ocf-data-sampler
101
96
  ```bash
102
- git clone https://github.com/openclimatefix/ocf-data-sampler.git
97
+ git clone git@github.com/openclimatefix/ocf-data-sampler.git
103
98
  conda create -n ocf-data-sampler python=3.11
104
99
  ```
105
100
 
@@ -115,7 +110,8 @@ Then exit this environment, and enter back into the pvnet conda environment and
115
110
  pip install -e <PATH-TO-ocf-data-sampler-REPO>
116
111
  ```
117
112
 
118
- If you install the local version of `ocf-data-sampler` that is more recent than the version specified in PVNet, you might receive a warning. However, it should still function correctly.
113
+ If you install the local version of `ocf-data-sampler` that is more recent than the version
114
+ specified in `PVNet` it is not guarenteed to function properly with this library.
119
115
 
120
116
  ## Pre-saving samples of data for training/validation of PVNet
121
117
 
@@ -174,14 +170,14 @@ Files stored in multiple locations can be added as a list. For example, in the `
174
170
 
175
171
  ```yaml
176
172
  satellite:
177
- satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
173
+ zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
178
174
  ```
179
175
 
180
176
  Or to satellite data hosted by Google:
181
177
 
182
178
  ```yaml
183
179
  satellite:
184
- satellite_zarr_paths:
180
+ zarr_path:
185
181
  - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr"
186
182
  - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr"
187
183
  ```
@@ -196,13 +192,13 @@ files. The configs stored in `PVNet/configs.example` should work with samples cr
196
192
 
197
193
  Make sure to update the following config files before training your model:
198
194
 
199
- 1. In `configs/datamodule/local_presaved_samples.yaml`:
195
+ 1. In `configs/datamodule/presaved_samples.yaml`:
200
196
  - update `sample_dir` to point to the directory you stored your samples in during sample creation
201
- 2. In `configs/model/local_multimodal.yaml`:
197
+ 2. In `configs/model/late_fusion.yaml`:
202
198
  - update the list of encoders to reflect the data sources you are using. If you are using different NWP sources, the encoders for these should follow the same structure with two important updates:
203
199
  - `in_channels`: number of variables your NWP source supplies
204
200
  - `image_size_pixels`: spatial crop of your NWP data. It depends on the spatial resolution of your NWP; should match `image_size_pixels_height` and/or `image_size_pixels_width` in `datamodule/configuration/site_example_configuration.yaml` for the NWP, unless transformations such as coarsening was applied (e. g. as for ECMWF data)
205
- 3. In `configs/local_trainer.yaml`:
201
+ 3. In `configs/trainer/default.yaml`:
206
202
  - set `accelerator: 0` if running on a system without a supported GPU
207
203
 
208
204
  If creating copies of the config files instead of modifying existing ones, update `defaults` in the main `./configs/config.yaml` file to use
@@ -210,11 +206,10 @@ your customised config files:
210
206
 
211
207
  ```yaml
212
208
  defaults:
213
- - trainer: local_trainer.yaml
214
- - model: local_multimodal.yaml
215
- - datamodule: local_presaved_samples.yaml
209
+ - trainer: default.yaml
210
+ - model: late_fusion.yaml
211
+ - datamodule: presaved_samples.yaml
216
212
  - callbacks: null
217
- - logger: csv.yaml
218
213
  - experiment: null
219
214
  - hparams_search: null
220
215
  - hydra: default.yaml
@@ -5,16 +5,12 @@ from glob import glob
5
5
  import torch
6
6
  from lightning.pytorch import LightningDataModule
7
7
  from ocf_data_sampler.numpy_sample.collate import stack_np_samples_into_batch
8
- from ocf_data_sampler.torch_datasets.sample.base import (
9
- NumpyBatch,
10
- SampleBase,
11
- TensorBatch,
12
- batch_to_tensor,
13
- )
8
+ from ocf_data_sampler.numpy_sample.common_types import NumpySample, TensorBatch
9
+ from ocf_data_sampler.torch_datasets.sample.base import SampleBase, batch_to_tensor
14
10
  from torch.utils.data import DataLoader, Dataset, Subset
15
11
 
16
12
 
17
- def collate_fn(samples: list[NumpyBatch]) -> TensorBatch:
13
+ def collate_fn(samples: list[NumpySample]) -> TensorBatch:
18
14
  """Convert a list of NumpySample samples to a tensor batch"""
19
15
  return batch_to_tensor(stack_np_samples_into_batch(samples))
20
16
 
@@ -32,10 +28,10 @@ class PresavedSamplesDataset(Dataset):
32
28
  self.sample_paths = glob(f"{sample_dir}/*")
33
29
  self.sample_class = sample_class
34
30
 
35
- def __len__(self):
31
+ def __len__(self) -> int:
36
32
  return len(self.sample_paths)
37
33
 
38
- def __getitem__(self, idx):
34
+ def __getitem__(self, idx) -> NumpySample:
39
35
  sample = self.sample_class.load(self.sample_paths[idx])
40
36
  return sample.to_numpy()
41
37
 
@@ -15,8 +15,7 @@ class SitePresavedDataModule(BasePresavedDataModule):
15
15
  """Datamodule for loading pre-saved samples."""
16
16
 
17
17
  def _get_premade_samples_dataset(self, subdir: str) -> Dataset:
18
- split_dir = f"{self.sample_dir}/{subdir}"
19
- return PresavedSamplesDataset(split_dir, SiteSample)
18
+ return PresavedSamplesDataset(f"{self.sample_dir}/{subdir}", SiteSample)
20
19
 
21
20
 
22
21
  class SiteStreamedDataModule(BaseStreamedDataModule):
@@ -15,8 +15,7 @@ class UKRegionalPresavedDataModule(BasePresavedDataModule):
15
15
  """Datamodule for loading pre-saved samples."""
16
16
 
17
17
  def _get_premade_samples_dataset(self, subdir: str) -> Dataset:
18
- split_dir = f"{self.sample_dir}/{subdir}"
19
- return PresavedSamplesDataset(split_dir, UKRegionalSample)
18
+ return PresavedSamplesDataset(f"{self.sample_dir}/{subdir}", UKRegionalSample)
20
19
 
21
20
 
22
21
  class UKRegionalStreamedDataModule(BaseStreamedDataModule):
@@ -2,17 +2,16 @@
2
2
 
3
3
  import glob
4
4
  import os
5
- from typing import Any
6
5
 
7
6
  import hydra
8
7
  import torch
9
- from pyaml_env import parse_config
8
+ import yaml
10
9
 
11
10
  from pvnet.models.ensemble import Ensemble
12
- from pvnet.models.multimodal.unimodal_teacher import Model as UMTModel
13
11
  from pvnet.utils import (
14
12
  DATA_CONFIG_NAME,
15
13
  DATAMODULE_CONFIG_NAME,
14
+ FULL_CONFIG_NAME,
16
15
  MODEL_CONFIG_NAME,
17
16
  )
18
17
 
@@ -20,7 +19,7 @@ from pvnet.utils import (
20
19
  def get_model_from_checkpoints(
21
20
  checkpoint_dir_paths: list[str],
22
21
  val_best: bool = True,
23
- ) -> tuple[torch.nn.Module, dict[str, Any] | str, str | None, str | None]:
22
+ ) -> tuple[torch.nn.Module, dict, str, str | None, str | None]:
24
23
  """Load a model from its checkpoint directory
25
24
 
26
25
  Returns:
@@ -29,6 +28,7 @@ def get_model_from_checkpoints(
29
28
  model_config: path to model config used to train the model.
30
29
  data_config: path to data config used to create samples for the model.
31
30
  datamodule_config: path to datamodule used to create samples e.g train/test split info.
31
+ experiment_configs: path to the full experimental config.
32
32
 
33
33
  """
34
34
  is_ensemble = len(checkpoint_dir_paths) > 1
@@ -37,12 +37,15 @@ def get_model_from_checkpoints(
37
37
  models = []
38
38
  data_configs = []
39
39
  datamodule_configs = []
40
+ experiment_configs = []
40
41
 
41
42
  for path in checkpoint_dir_paths:
42
- # Load the model
43
- model_config = parse_config(f"{path}/{MODEL_CONFIG_NAME}")
44
43
 
45
- model = hydra.utils.instantiate(model_config)
44
+ # Load lightning training module
45
+ with open(f"{path}/{MODEL_CONFIG_NAME}") as cfg:
46
+ model_config = yaml.load(cfg, Loader=yaml.FullLoader)
47
+
48
+ lightning_module = hydra.utils.instantiate(model_config)
46
49
 
47
50
  if val_best:
48
51
  # Only one epoch (best) saved per model
@@ -52,33 +55,40 @@ def get_model_from_checkpoints(
52
55
  f"Found {len(files)} checkpoints @ {path}/epoch*.ckpt. Expected one."
53
56
  )
54
57
  # TODO: Loading with weights_only=False is not recommended
55
- checkpoint = torch.load(files[0], map_location="cpu", weights_only=False)
58
+ checkpoint = torch.load(files[0], map_location="cpu", weights_only=True)
56
59
  else:
57
- checkpoint = torch.load(f"{path}/last.ckpt", map_location="cpu", weights_only=False)
58
-
59
- model.load_state_dict(state_dict=checkpoint["state_dict"])
60
+ checkpoint = torch.load(f"{path}/last.ckpt", map_location="cpu", weights_only=True)
60
61
 
61
- if isinstance(model, UMTModel):
62
- model, model_config = model.convert_to_multimodal_model(model_config)
62
+ lightning_module.load_state_dict(state_dict=checkpoint["state_dict"])
63
63
 
64
- model_configs.append(model_config)
65
- models.append(model)
64
+ # Extract the model from the lightning module
65
+ models.append(lightning_module.model)
66
+ model_configs.append(model_config["model"])
66
67
 
67
- # Check for data config
68
+ # Store the data config used for the model
68
69
  data_config = f"{path}/{DATA_CONFIG_NAME}"
69
70
 
70
71
  if os.path.isfile(data_config):
71
72
  data_configs.append(data_config)
72
73
  else:
73
- data_configs.append(None)
74
+ raise FileNotFoundError(f"File {data_config} does not exist")
74
75
 
75
- # check for datamodule config
76
+ # Check for datamodule config
77
+ # This only exists if the model was trained with presaved samples
76
78
  datamodule_config = f"{path}/{DATAMODULE_CONFIG_NAME}"
77
79
  if os.path.isfile(datamodule_config):
78
80
  datamodule_configs.append(datamodule_config)
79
81
  else:
80
82
  datamodule_configs.append(None)
81
83
 
84
+ # Check for experiment config
85
+ # For backwards compatibility - this might always exist
86
+ experiment_config = f"{path}/{FULL_CONFIG_NAME}"
87
+ if os.path.isfile(datamodule_config):
88
+ experiment_configs.append(experiment_config)
89
+ else:
90
+ experiment_configs.append(None)
91
+
82
92
  if is_ensemble:
83
93
  model_config = {
84
94
  "_target_": "pvnet.models.ensemble.Ensemble",
@@ -90,7 +100,11 @@ def get_model_from_checkpoints(
90
100
  model_config = model_configs[0]
91
101
  model = models[0]
92
102
 
103
+ # Assume if using an ensemble that the members were trained on the same input data
93
104
  data_config = data_configs[0]
94
105
  datamodule_config = datamodule_configs[0]
95
106
 
96
- return model, model_config, data_config, datamodule_config
107
+ # TODO: How should we save the experimental configs if we had an ensemble?
108
+ experiment_config = experiment_configs[0]
109
+
110
+ return model, model_config, data_config, datamodule_config, experiment_config
@@ -0,0 +1,4 @@
1
+ """Models for PVNet"""
2
+ from .base_model import BaseModel
3
+ from .ensemble import Ensemble
4
+ from .late_fusion.late_fusion import LateFusionModel