PVNet 4.1.30__tar.gz → 5.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pvnet-4.1.30 → pvnet-5.0.0}/PKG-INFO +26 -32
- {pvnet-4.1.30 → pvnet-5.0.0}/PVNet.egg-info/PKG-INFO +26 -32
- pvnet-5.0.0/PVNet.egg-info/SOURCES.txt +36 -0
- {pvnet-4.1.30 → pvnet-5.0.0}/PVNet.egg-info/requires.txt +1 -2
- {pvnet-4.1.30 → pvnet-5.0.0}/README.md +24 -29
- {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/data/base_datamodule.py +5 -9
- {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/data/site_datamodule.py +1 -2
- {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/data/uk_regional_datamodule.py +1 -2
- {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/load_model.py +33 -19
- pvnet-5.0.0/pvnet/models/__init__.py +4 -0
- pvnet-5.0.0/pvnet/models/base_model.py +511 -0
- {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/models/ensemble.py +4 -6
- pvnet-5.0.0/pvnet/models/late_fusion/__init__.py +1 -0
- {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/basic_blocks.py +2 -2
- {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/encoders/basic_blocks.py +3 -2
- {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/encoders/encoders3d.py +12 -12
- pvnet-4.1.30/pvnet/models/multimodal/multimodal.py → pvnet-5.0.0/pvnet/models/late_fusion/late_fusion.py +43 -68
- {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/linear_networks/basic_blocks.py +4 -4
- {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/linear_networks/networks.py +10 -9
- {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/site_encoders/basic_blocks.py +2 -1
- {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/site_encoders/encoders.py +21 -18
- {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/optimizers.py +66 -46
- pvnet-5.0.0/pvnet/training/__init__.py +2 -0
- pvnet-5.0.0/pvnet/training/lightning_module.py +347 -0
- pvnet-5.0.0/pvnet/training/plots.py +86 -0
- pvnet-5.0.0/pvnet/training/train.py +144 -0
- pvnet-5.0.0/pvnet/utils.py +86 -0
- {pvnet-4.1.30 → pvnet-5.0.0}/pyproject.toml +3 -12
- pvnet-5.0.0/tests/test_end2end.py +21 -0
- pvnet-4.1.30/PVNet.egg-info/SOURCES.txt +0 -35
- pvnet-4.1.30/pvnet/models/__init__.py +0 -1
- pvnet-4.1.30/pvnet/models/base_model.py +0 -959
- pvnet-4.1.30/pvnet/models/multimodal/__init__.py +0 -1
- pvnet-4.1.30/pvnet/models/multimodal/unimodal_teacher.py +0 -447
- pvnet-4.1.30/pvnet/models/utils.py +0 -121
- pvnet-4.1.30/pvnet/training.py +0 -140
- pvnet-4.1.30/pvnet/utils.py +0 -234
- pvnet-4.1.30/tests/test_end2end.py +0 -13
- {pvnet-4.1.30 → pvnet-5.0.0}/LICENSE +0 -0
- {pvnet-4.1.30 → pvnet-5.0.0}/PVNet.egg-info/dependency_links.txt +0 -0
- {pvnet-4.1.30 → pvnet-5.0.0}/PVNet.egg-info/top_level.txt +0 -0
- {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/__init__.py +0 -0
- {pvnet-4.1.30 → pvnet-5.0.0}/pvnet/data/__init__.py +0 -0
- {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/encoders/__init__.py +0 -0
- {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/linear_networks/__init__.py +0 -0
- {pvnet-4.1.30/pvnet/models/multimodal → pvnet-5.0.0/pvnet/models/late_fusion}/site_encoders/__init__.py +0 -0
- {pvnet-4.1.30 → pvnet-5.0.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PVNet
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.0
|
|
4
4
|
Summary: PVNet
|
|
5
5
|
Author-email: Peter Dudfield <info@openclimatefix.org>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,8 +15,6 @@ Requires-Dist: h5netcdf
|
|
|
15
15
|
Requires-Dist: torch>=2.0.0
|
|
16
16
|
Requires-Dist: lightning
|
|
17
17
|
Requires-Dist: torchvision
|
|
18
|
-
Requires-Dist: pytest
|
|
19
|
-
Requires-Dist: pytest-cov
|
|
20
18
|
Requires-Dist: typer
|
|
21
19
|
Requires-Dist: sqlalchemy
|
|
22
20
|
Requires-Dist: fsspec[s3]
|
|
@@ -27,9 +25,10 @@ Requires-Dist: omegaconf
|
|
|
27
25
|
Requires-Dist: hydra-core
|
|
28
26
|
Requires-Dist: rich
|
|
29
27
|
Requires-Dist: einops
|
|
28
|
+
Requires-Dist: safetensors
|
|
30
29
|
Dynamic: license-file
|
|
31
30
|
|
|
32
|
-
# PVNet
|
|
31
|
+
# PVNet
|
|
33
32
|
<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
|
|
34
33
|
[](#contributors-)
|
|
35
34
|
<!-- ALL-CONTRIBUTORS-BADGE:END -->
|
|
@@ -40,39 +39,34 @@ Dynamic: license-file
|
|
|
40
39
|
|
|
41
40
|
This project is used for training PVNet and running PVNet on live data.
|
|
42
41
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
feature vector is put through an output network which outputs predictions of the
|
|
50
|
-
future GSP yield. National forecasts are made by adding all the GSP forecasts
|
|
51
|
-
together.
|
|
42
|
+
PVNet is a multi-modal late-fusion model for predicting renewable energy generation from weather
|
|
43
|
+
data. The NWP (Numerical Weather Prediction) and satellite data are sent through a neural network
|
|
44
|
+
which encodes them down to 1D intermediate representations. These are concatenated together with
|
|
45
|
+
recent generation, the calculated solar coordinates (azimuth and elevation) and the location ID
|
|
46
|
+
which has been put through an embedding layer. This 1D concatenated feature vector is put through
|
|
47
|
+
an output network which outputs predictions of the future energy yield.
|
|
52
48
|
|
|
53
49
|
|
|
54
50
|
## Experiments
|
|
55
51
|
|
|
56
|
-
Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning
|
|
57
|
-
|
|
58
|
-
Some slightly more structured notes on deliberate experiments we have performed with PVNet are [here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
|
|
59
|
-
|
|
60
|
-
Some very rough, early working notes on this model are
|
|
61
|
-
[here](https://docs.google.com/document/d/1fbkfkBzp16WbnCg7RDuRDvgzInA6XQu3xh4NCjV-WDA). These are now somewhat out of date.
|
|
52
|
+
Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning
|
|
53
|
+
workshop at ICLR 2024 and can be viewed [here](https://www.climatechange.ai/papers/iclr2024/46).
|
|
62
54
|
|
|
55
|
+
Some more structured notes on experiments we have performed with PVNet are
|
|
56
|
+
[here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
|
|
63
57
|
|
|
64
58
|
|
|
65
59
|
## Setup / Installation
|
|
66
60
|
|
|
67
61
|
```bash
|
|
68
|
-
git clone
|
|
62
|
+
git clone git@github.com:openclimatefix/PVNet.git
|
|
69
63
|
cd PVNet
|
|
70
64
|
pip install .
|
|
71
65
|
```
|
|
72
66
|
|
|
73
67
|
The commit history is extensive. To save download time, use a depth of 1:
|
|
74
68
|
```bash
|
|
75
|
-
git clone --depth 1
|
|
69
|
+
git clone --depth 1 git@github.com:openclimatefix/PVNet.git
|
|
76
70
|
```
|
|
77
71
|
This means only the latest commit and its associated files will be downloaded.
|
|
78
72
|
|
|
@@ -130,7 +124,7 @@ here: https://huggingface.co/datasets/openclimatefix/uk_pv
|
|
|
130
124
|
|
|
131
125
|
Outside the PVNet repo, clone the ocf-data-sampler repo and exit the conda env created for PVNet: https://github.com/openclimatefix/ocf-data-sampler
|
|
132
126
|
```bash
|
|
133
|
-
git clone
|
|
127
|
+
git clone git@github.com/openclimatefix/ocf-data-sampler.git
|
|
134
128
|
conda create -n ocf-data-sampler python=3.11
|
|
135
129
|
```
|
|
136
130
|
|
|
@@ -146,7 +140,8 @@ Then exit this environment, and enter back into the pvnet conda environment and
|
|
|
146
140
|
pip install -e <PATH-TO-ocf-data-sampler-REPO>
|
|
147
141
|
```
|
|
148
142
|
|
|
149
|
-
If you install the local version of `ocf-data-sampler` that is more recent than the version
|
|
143
|
+
If you install the local version of `ocf-data-sampler` that is more recent than the version
|
|
144
|
+
specified in `PVNet` it is not guarenteed to function properly with this library.
|
|
150
145
|
|
|
151
146
|
## Pre-saving samples of data for training/validation of PVNet
|
|
152
147
|
|
|
@@ -205,14 +200,14 @@ Files stored in multiple locations can be added as a list. For example, in the `
|
|
|
205
200
|
|
|
206
201
|
```yaml
|
|
207
202
|
satellite:
|
|
208
|
-
|
|
203
|
+
zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
|
|
209
204
|
```
|
|
210
205
|
|
|
211
206
|
Or to satellite data hosted by Google:
|
|
212
207
|
|
|
213
208
|
```yaml
|
|
214
209
|
satellite:
|
|
215
|
-
|
|
210
|
+
zarr_path:
|
|
216
211
|
- "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr"
|
|
217
212
|
- "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr"
|
|
218
213
|
```
|
|
@@ -227,13 +222,13 @@ files. The configs stored in `PVNet/configs.example` should work with samples cr
|
|
|
227
222
|
|
|
228
223
|
Make sure to update the following config files before training your model:
|
|
229
224
|
|
|
230
|
-
1. In `configs/datamodule/
|
|
225
|
+
1. In `configs/datamodule/presaved_samples.yaml`:
|
|
231
226
|
- update `sample_dir` to point to the directory you stored your samples in during sample creation
|
|
232
|
-
2. In `configs/model/
|
|
227
|
+
2. In `configs/model/late_fusion.yaml`:
|
|
233
228
|
- update the list of encoders to reflect the data sources you are using. If you are using different NWP sources, the encoders for these should follow the same structure with two important updates:
|
|
234
229
|
- `in_channels`: number of variables your NWP source supplies
|
|
235
230
|
- `image_size_pixels`: spatial crop of your NWP data. It depends on the spatial resolution of your NWP; should match `image_size_pixels_height` and/or `image_size_pixels_width` in `datamodule/configuration/site_example_configuration.yaml` for the NWP, unless transformations such as coarsening was applied (e. g. as for ECMWF data)
|
|
236
|
-
3. In `configs/
|
|
231
|
+
3. In `configs/trainer/default.yaml`:
|
|
237
232
|
- set `accelerator: 0` if running on a system without a supported GPU
|
|
238
233
|
|
|
239
234
|
If creating copies of the config files instead of modifying existing ones, update `defaults` in the main `./configs/config.yaml` file to use
|
|
@@ -241,11 +236,10 @@ your customised config files:
|
|
|
241
236
|
|
|
242
237
|
```yaml
|
|
243
238
|
defaults:
|
|
244
|
-
- trainer:
|
|
245
|
-
- model:
|
|
246
|
-
- datamodule:
|
|
239
|
+
- trainer: default.yaml
|
|
240
|
+
- model: late_fusion.yaml
|
|
241
|
+
- datamodule: presaved_samples.yaml
|
|
247
242
|
- callbacks: null
|
|
248
|
-
- logger: csv.yaml
|
|
249
243
|
- experiment: null
|
|
250
244
|
- hparams_search: null
|
|
251
245
|
- hydra: default.yaml
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PVNet
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.0
|
|
4
4
|
Summary: PVNet
|
|
5
5
|
Author-email: Peter Dudfield <info@openclimatefix.org>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,8 +15,6 @@ Requires-Dist: h5netcdf
|
|
|
15
15
|
Requires-Dist: torch>=2.0.0
|
|
16
16
|
Requires-Dist: lightning
|
|
17
17
|
Requires-Dist: torchvision
|
|
18
|
-
Requires-Dist: pytest
|
|
19
|
-
Requires-Dist: pytest-cov
|
|
20
18
|
Requires-Dist: typer
|
|
21
19
|
Requires-Dist: sqlalchemy
|
|
22
20
|
Requires-Dist: fsspec[s3]
|
|
@@ -27,9 +25,10 @@ Requires-Dist: omegaconf
|
|
|
27
25
|
Requires-Dist: hydra-core
|
|
28
26
|
Requires-Dist: rich
|
|
29
27
|
Requires-Dist: einops
|
|
28
|
+
Requires-Dist: safetensors
|
|
30
29
|
Dynamic: license-file
|
|
31
30
|
|
|
32
|
-
# PVNet
|
|
31
|
+
# PVNet
|
|
33
32
|
<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
|
|
34
33
|
[](#contributors-)
|
|
35
34
|
<!-- ALL-CONTRIBUTORS-BADGE:END -->
|
|
@@ -40,39 +39,34 @@ Dynamic: license-file
|
|
|
40
39
|
|
|
41
40
|
This project is used for training PVNet and running PVNet on live data.
|
|
42
41
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
feature vector is put through an output network which outputs predictions of the
|
|
50
|
-
future GSP yield. National forecasts are made by adding all the GSP forecasts
|
|
51
|
-
together.
|
|
42
|
+
PVNet is a multi-modal late-fusion model for predicting renewable energy generation from weather
|
|
43
|
+
data. The NWP (Numerical Weather Prediction) and satellite data are sent through a neural network
|
|
44
|
+
which encodes them down to 1D intermediate representations. These are concatenated together with
|
|
45
|
+
recent generation, the calculated solar coordinates (azimuth and elevation) and the location ID
|
|
46
|
+
which has been put through an embedding layer. This 1D concatenated feature vector is put through
|
|
47
|
+
an output network which outputs predictions of the future energy yield.
|
|
52
48
|
|
|
53
49
|
|
|
54
50
|
## Experiments
|
|
55
51
|
|
|
56
|
-
Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning
|
|
57
|
-
|
|
58
|
-
Some slightly more structured notes on deliberate experiments we have performed with PVNet are [here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
|
|
59
|
-
|
|
60
|
-
Some very rough, early working notes on this model are
|
|
61
|
-
[here](https://docs.google.com/document/d/1fbkfkBzp16WbnCg7RDuRDvgzInA6XQu3xh4NCjV-WDA). These are now somewhat out of date.
|
|
52
|
+
Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning
|
|
53
|
+
workshop at ICLR 2024 and can be viewed [here](https://www.climatechange.ai/papers/iclr2024/46).
|
|
62
54
|
|
|
55
|
+
Some more structured notes on experiments we have performed with PVNet are
|
|
56
|
+
[here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
|
|
63
57
|
|
|
64
58
|
|
|
65
59
|
## Setup / Installation
|
|
66
60
|
|
|
67
61
|
```bash
|
|
68
|
-
git clone
|
|
62
|
+
git clone git@github.com:openclimatefix/PVNet.git
|
|
69
63
|
cd PVNet
|
|
70
64
|
pip install .
|
|
71
65
|
```
|
|
72
66
|
|
|
73
67
|
The commit history is extensive. To save download time, use a depth of 1:
|
|
74
68
|
```bash
|
|
75
|
-
git clone --depth 1
|
|
69
|
+
git clone --depth 1 git@github.com:openclimatefix/PVNet.git
|
|
76
70
|
```
|
|
77
71
|
This means only the latest commit and its associated files will be downloaded.
|
|
78
72
|
|
|
@@ -130,7 +124,7 @@ here: https://huggingface.co/datasets/openclimatefix/uk_pv
|
|
|
130
124
|
|
|
131
125
|
Outside the PVNet repo, clone the ocf-data-sampler repo and exit the conda env created for PVNet: https://github.com/openclimatefix/ocf-data-sampler
|
|
132
126
|
```bash
|
|
133
|
-
git clone
|
|
127
|
+
git clone git@github.com/openclimatefix/ocf-data-sampler.git
|
|
134
128
|
conda create -n ocf-data-sampler python=3.11
|
|
135
129
|
```
|
|
136
130
|
|
|
@@ -146,7 +140,8 @@ Then exit this environment, and enter back into the pvnet conda environment and
|
|
|
146
140
|
pip install -e <PATH-TO-ocf-data-sampler-REPO>
|
|
147
141
|
```
|
|
148
142
|
|
|
149
|
-
If you install the local version of `ocf-data-sampler` that is more recent than the version
|
|
143
|
+
If you install the local version of `ocf-data-sampler` that is more recent than the version
|
|
144
|
+
specified in `PVNet` it is not guarenteed to function properly with this library.
|
|
150
145
|
|
|
151
146
|
## Pre-saving samples of data for training/validation of PVNet
|
|
152
147
|
|
|
@@ -205,14 +200,14 @@ Files stored in multiple locations can be added as a list. For example, in the `
|
|
|
205
200
|
|
|
206
201
|
```yaml
|
|
207
202
|
satellite:
|
|
208
|
-
|
|
203
|
+
zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
|
|
209
204
|
```
|
|
210
205
|
|
|
211
206
|
Or to satellite data hosted by Google:
|
|
212
207
|
|
|
213
208
|
```yaml
|
|
214
209
|
satellite:
|
|
215
|
-
|
|
210
|
+
zarr_path:
|
|
216
211
|
- "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr"
|
|
217
212
|
- "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr"
|
|
218
213
|
```
|
|
@@ -227,13 +222,13 @@ files. The configs stored in `PVNet/configs.example` should work with samples cr
|
|
|
227
222
|
|
|
228
223
|
Make sure to update the following config files before training your model:
|
|
229
224
|
|
|
230
|
-
1. In `configs/datamodule/
|
|
225
|
+
1. In `configs/datamodule/presaved_samples.yaml`:
|
|
231
226
|
- update `sample_dir` to point to the directory you stored your samples in during sample creation
|
|
232
|
-
2. In `configs/model/
|
|
227
|
+
2. In `configs/model/late_fusion.yaml`:
|
|
233
228
|
- update the list of encoders to reflect the data sources you are using. If you are using different NWP sources, the encoders for these should follow the same structure with two important updates:
|
|
234
229
|
- `in_channels`: number of variables your NWP source supplies
|
|
235
230
|
- `image_size_pixels`: spatial crop of your NWP data. It depends on the spatial resolution of your NWP; should match `image_size_pixels_height` and/or `image_size_pixels_width` in `datamodule/configuration/site_example_configuration.yaml` for the NWP, unless transformations such as coarsening was applied (e. g. as for ECMWF data)
|
|
236
|
-
3. In `configs/
|
|
231
|
+
3. In `configs/trainer/default.yaml`:
|
|
237
232
|
- set `accelerator: 0` if running on a system without a supported GPU
|
|
238
233
|
|
|
239
234
|
If creating copies of the config files instead of modifying existing ones, update `defaults` in the main `./configs/config.yaml` file to use
|
|
@@ -241,11 +236,10 @@ your customised config files:
|
|
|
241
236
|
|
|
242
237
|
```yaml
|
|
243
238
|
defaults:
|
|
244
|
-
- trainer:
|
|
245
|
-
- model:
|
|
246
|
-
- datamodule:
|
|
239
|
+
- trainer: default.yaml
|
|
240
|
+
- model: late_fusion.yaml
|
|
241
|
+
- datamodule: presaved_samples.yaml
|
|
247
242
|
- callbacks: null
|
|
248
|
-
- logger: csv.yaml
|
|
249
243
|
- experiment: null
|
|
250
244
|
- hparams_search: null
|
|
251
245
|
- hydra: default.yaml
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
PVNet.egg-info/PKG-INFO
|
|
5
|
+
PVNet.egg-info/SOURCES.txt
|
|
6
|
+
PVNet.egg-info/dependency_links.txt
|
|
7
|
+
PVNet.egg-info/requires.txt
|
|
8
|
+
PVNet.egg-info/top_level.txt
|
|
9
|
+
pvnet/__init__.py
|
|
10
|
+
pvnet/load_model.py
|
|
11
|
+
pvnet/optimizers.py
|
|
12
|
+
pvnet/utils.py
|
|
13
|
+
pvnet/data/__init__.py
|
|
14
|
+
pvnet/data/base_datamodule.py
|
|
15
|
+
pvnet/data/site_datamodule.py
|
|
16
|
+
pvnet/data/uk_regional_datamodule.py
|
|
17
|
+
pvnet/models/__init__.py
|
|
18
|
+
pvnet/models/base_model.py
|
|
19
|
+
pvnet/models/ensemble.py
|
|
20
|
+
pvnet/models/late_fusion/__init__.py
|
|
21
|
+
pvnet/models/late_fusion/basic_blocks.py
|
|
22
|
+
pvnet/models/late_fusion/late_fusion.py
|
|
23
|
+
pvnet/models/late_fusion/encoders/__init__.py
|
|
24
|
+
pvnet/models/late_fusion/encoders/basic_blocks.py
|
|
25
|
+
pvnet/models/late_fusion/encoders/encoders3d.py
|
|
26
|
+
pvnet/models/late_fusion/linear_networks/__init__.py
|
|
27
|
+
pvnet/models/late_fusion/linear_networks/basic_blocks.py
|
|
28
|
+
pvnet/models/late_fusion/linear_networks/networks.py
|
|
29
|
+
pvnet/models/late_fusion/site_encoders/__init__.py
|
|
30
|
+
pvnet/models/late_fusion/site_encoders/basic_blocks.py
|
|
31
|
+
pvnet/models/late_fusion/site_encoders/encoders.py
|
|
32
|
+
pvnet/training/__init__.py
|
|
33
|
+
pvnet/training/lightning_module.py
|
|
34
|
+
pvnet/training/plots.py
|
|
35
|
+
pvnet/training/train.py
|
|
36
|
+
tests/test_end2end.py
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# PVNet
|
|
1
|
+
# PVNet
|
|
2
2
|
<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
|
|
3
3
|
[](#contributors-)
|
|
4
4
|
<!-- ALL-CONTRIBUTORS-BADGE:END -->
|
|
@@ -9,39 +9,34 @@
|
|
|
9
9
|
|
|
10
10
|
This project is used for training PVNet and running PVNet on live data.
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
feature vector is put through an output network which outputs predictions of the
|
|
19
|
-
future GSP yield. National forecasts are made by adding all the GSP forecasts
|
|
20
|
-
together.
|
|
12
|
+
PVNet is a multi-modal late-fusion model for predicting renewable energy generation from weather
|
|
13
|
+
data. The NWP (Numerical Weather Prediction) and satellite data are sent through a neural network
|
|
14
|
+
which encodes them down to 1D intermediate representations. These are concatenated together with
|
|
15
|
+
recent generation, the calculated solar coordinates (azimuth and elevation) and the location ID
|
|
16
|
+
which has been put through an embedding layer. This 1D concatenated feature vector is put through
|
|
17
|
+
an output network which outputs predictions of the future energy yield.
|
|
21
18
|
|
|
22
19
|
|
|
23
20
|
## Experiments
|
|
24
21
|
|
|
25
|
-
Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning
|
|
26
|
-
|
|
27
|
-
Some slightly more structured notes on deliberate experiments we have performed with PVNet are [here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
|
|
28
|
-
|
|
29
|
-
Some very rough, early working notes on this model are
|
|
30
|
-
[here](https://docs.google.com/document/d/1fbkfkBzp16WbnCg7RDuRDvgzInA6XQu3xh4NCjV-WDA). These are now somewhat out of date.
|
|
22
|
+
Our paper based on this repo was accepted into the Tackling Climate Change with Machine Learning
|
|
23
|
+
workshop at ICLR 2024 and can be viewed [here](https://www.climatechange.ai/papers/iclr2024/46).
|
|
31
24
|
|
|
25
|
+
Some more structured notes on experiments we have performed with PVNet are
|
|
26
|
+
[here](https://docs.google.com/document/d/1VumDwWd8YAfvXbOtJEv3ZJm_FHQDzrKXR0jU9vnvGQg).
|
|
32
27
|
|
|
33
28
|
|
|
34
29
|
## Setup / Installation
|
|
35
30
|
|
|
36
31
|
```bash
|
|
37
|
-
git clone
|
|
32
|
+
git clone git@github.com:openclimatefix/PVNet.git
|
|
38
33
|
cd PVNet
|
|
39
34
|
pip install .
|
|
40
35
|
```
|
|
41
36
|
|
|
42
37
|
The commit history is extensive. To save download time, use a depth of 1:
|
|
43
38
|
```bash
|
|
44
|
-
git clone --depth 1
|
|
39
|
+
git clone --depth 1 git@github.com:openclimatefix/PVNet.git
|
|
45
40
|
```
|
|
46
41
|
This means only the latest commit and its associated files will be downloaded.
|
|
47
42
|
|
|
@@ -99,7 +94,7 @@ here: https://huggingface.co/datasets/openclimatefix/uk_pv
|
|
|
99
94
|
|
|
100
95
|
Outside the PVNet repo, clone the ocf-data-sampler repo and exit the conda env created for PVNet: https://github.com/openclimatefix/ocf-data-sampler
|
|
101
96
|
```bash
|
|
102
|
-
git clone
|
|
97
|
+
git clone git@github.com/openclimatefix/ocf-data-sampler.git
|
|
103
98
|
conda create -n ocf-data-sampler python=3.11
|
|
104
99
|
```
|
|
105
100
|
|
|
@@ -115,7 +110,8 @@ Then exit this environment, and enter back into the pvnet conda environment and
|
|
|
115
110
|
pip install -e <PATH-TO-ocf-data-sampler-REPO>
|
|
116
111
|
```
|
|
117
112
|
|
|
118
|
-
If you install the local version of `ocf-data-sampler` that is more recent than the version
|
|
113
|
+
If you install the local version of `ocf-data-sampler` that is more recent than the version
|
|
114
|
+
specified in `PVNet` it is not guarenteed to function properly with this library.
|
|
119
115
|
|
|
120
116
|
## Pre-saving samples of data for training/validation of PVNet
|
|
121
117
|
|
|
@@ -174,14 +170,14 @@ Files stored in multiple locations can be added as a list. For example, in the `
|
|
|
174
170
|
|
|
175
171
|
```yaml
|
|
176
172
|
satellite:
|
|
177
|
-
|
|
173
|
+
zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
|
|
178
174
|
```
|
|
179
175
|
|
|
180
176
|
Or to satellite data hosted by Google:
|
|
181
177
|
|
|
182
178
|
```yaml
|
|
183
179
|
satellite:
|
|
184
|
-
|
|
180
|
+
zarr_path:
|
|
185
181
|
- "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr"
|
|
186
182
|
- "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr"
|
|
187
183
|
```
|
|
@@ -196,13 +192,13 @@ files. The configs stored in `PVNet/configs.example` should work with samples cr
|
|
|
196
192
|
|
|
197
193
|
Make sure to update the following config files before training your model:
|
|
198
194
|
|
|
199
|
-
1. In `configs/datamodule/
|
|
195
|
+
1. In `configs/datamodule/presaved_samples.yaml`:
|
|
200
196
|
- update `sample_dir` to point to the directory you stored your samples in during sample creation
|
|
201
|
-
2. In `configs/model/
|
|
197
|
+
2. In `configs/model/late_fusion.yaml`:
|
|
202
198
|
- update the list of encoders to reflect the data sources you are using. If you are using different NWP sources, the encoders for these should follow the same structure with two important updates:
|
|
203
199
|
- `in_channels`: number of variables your NWP source supplies
|
|
204
200
|
- `image_size_pixels`: spatial crop of your NWP data. It depends on the spatial resolution of your NWP; should match `image_size_pixels_height` and/or `image_size_pixels_width` in `datamodule/configuration/site_example_configuration.yaml` for the NWP, unless transformations such as coarsening was applied (e. g. as for ECMWF data)
|
|
205
|
-
3. In `configs/
|
|
201
|
+
3. In `configs/trainer/default.yaml`:
|
|
206
202
|
- set `accelerator: 0` if running on a system without a supported GPU
|
|
207
203
|
|
|
208
204
|
If creating copies of the config files instead of modifying existing ones, update `defaults` in the main `./configs/config.yaml` file to use
|
|
@@ -210,11 +206,10 @@ your customised config files:
|
|
|
210
206
|
|
|
211
207
|
```yaml
|
|
212
208
|
defaults:
|
|
213
|
-
- trainer:
|
|
214
|
-
- model:
|
|
215
|
-
- datamodule:
|
|
209
|
+
- trainer: default.yaml
|
|
210
|
+
- model: late_fusion.yaml
|
|
211
|
+
- datamodule: presaved_samples.yaml
|
|
216
212
|
- callbacks: null
|
|
217
|
-
- logger: csv.yaml
|
|
218
213
|
- experiment: null
|
|
219
214
|
- hparams_search: null
|
|
220
215
|
- hydra: default.yaml
|
|
@@ -5,16 +5,12 @@ from glob import glob
|
|
|
5
5
|
import torch
|
|
6
6
|
from lightning.pytorch import LightningDataModule
|
|
7
7
|
from ocf_data_sampler.numpy_sample.collate import stack_np_samples_into_batch
|
|
8
|
-
from ocf_data_sampler.
|
|
9
|
-
|
|
10
|
-
SampleBase,
|
|
11
|
-
TensorBatch,
|
|
12
|
-
batch_to_tensor,
|
|
13
|
-
)
|
|
8
|
+
from ocf_data_sampler.numpy_sample.common_types import NumpySample, TensorBatch
|
|
9
|
+
from ocf_data_sampler.torch_datasets.sample.base import SampleBase, batch_to_tensor
|
|
14
10
|
from torch.utils.data import DataLoader, Dataset, Subset
|
|
15
11
|
|
|
16
12
|
|
|
17
|
-
def collate_fn(samples: list[
|
|
13
|
+
def collate_fn(samples: list[NumpySample]) -> TensorBatch:
|
|
18
14
|
"""Convert a list of NumpySample samples to a tensor batch"""
|
|
19
15
|
return batch_to_tensor(stack_np_samples_into_batch(samples))
|
|
20
16
|
|
|
@@ -32,10 +28,10 @@ class PresavedSamplesDataset(Dataset):
|
|
|
32
28
|
self.sample_paths = glob(f"{sample_dir}/*")
|
|
33
29
|
self.sample_class = sample_class
|
|
34
30
|
|
|
35
|
-
def __len__(self):
|
|
31
|
+
def __len__(self) -> int:
|
|
36
32
|
return len(self.sample_paths)
|
|
37
33
|
|
|
38
|
-
def __getitem__(self, idx):
|
|
34
|
+
def __getitem__(self, idx) -> NumpySample:
|
|
39
35
|
sample = self.sample_class.load(self.sample_paths[idx])
|
|
40
36
|
return sample.to_numpy()
|
|
41
37
|
|
|
@@ -15,8 +15,7 @@ class SitePresavedDataModule(BasePresavedDataModule):
|
|
|
15
15
|
"""Datamodule for loading pre-saved samples."""
|
|
16
16
|
|
|
17
17
|
def _get_premade_samples_dataset(self, subdir: str) -> Dataset:
|
|
18
|
-
|
|
19
|
-
return PresavedSamplesDataset(split_dir, SiteSample)
|
|
18
|
+
return PresavedSamplesDataset(f"{self.sample_dir}/{subdir}", SiteSample)
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class SiteStreamedDataModule(BaseStreamedDataModule):
|
|
@@ -15,8 +15,7 @@ class UKRegionalPresavedDataModule(BasePresavedDataModule):
|
|
|
15
15
|
"""Datamodule for loading pre-saved samples."""
|
|
16
16
|
|
|
17
17
|
def _get_premade_samples_dataset(self, subdir: str) -> Dataset:
|
|
18
|
-
|
|
19
|
-
return PresavedSamplesDataset(split_dir, UKRegionalSample)
|
|
18
|
+
return PresavedSamplesDataset(f"{self.sample_dir}/{subdir}", UKRegionalSample)
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class UKRegionalStreamedDataModule(BaseStreamedDataModule):
|
|
@@ -2,17 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
import glob
|
|
4
4
|
import os
|
|
5
|
-
from typing import Any
|
|
6
5
|
|
|
7
6
|
import hydra
|
|
8
7
|
import torch
|
|
9
|
-
|
|
8
|
+
import yaml
|
|
10
9
|
|
|
11
10
|
from pvnet.models.ensemble import Ensemble
|
|
12
|
-
from pvnet.models.multimodal.unimodal_teacher import Model as UMTModel
|
|
13
11
|
from pvnet.utils import (
|
|
14
12
|
DATA_CONFIG_NAME,
|
|
15
13
|
DATAMODULE_CONFIG_NAME,
|
|
14
|
+
FULL_CONFIG_NAME,
|
|
16
15
|
MODEL_CONFIG_NAME,
|
|
17
16
|
)
|
|
18
17
|
|
|
@@ -20,7 +19,7 @@ from pvnet.utils import (
|
|
|
20
19
|
def get_model_from_checkpoints(
|
|
21
20
|
checkpoint_dir_paths: list[str],
|
|
22
21
|
val_best: bool = True,
|
|
23
|
-
) -> tuple[torch.nn.Module, dict
|
|
22
|
+
) -> tuple[torch.nn.Module, dict, str, str | None, str | None]:
|
|
24
23
|
"""Load a model from its checkpoint directory
|
|
25
24
|
|
|
26
25
|
Returns:
|
|
@@ -29,6 +28,7 @@ def get_model_from_checkpoints(
|
|
|
29
28
|
model_config: path to model config used to train the model.
|
|
30
29
|
data_config: path to data config used to create samples for the model.
|
|
31
30
|
datamodule_config: path to datamodule used to create samples e.g train/test split info.
|
|
31
|
+
experiment_configs: path to the full experimental config.
|
|
32
32
|
|
|
33
33
|
"""
|
|
34
34
|
is_ensemble = len(checkpoint_dir_paths) > 1
|
|
@@ -37,12 +37,15 @@ def get_model_from_checkpoints(
|
|
|
37
37
|
models = []
|
|
38
38
|
data_configs = []
|
|
39
39
|
datamodule_configs = []
|
|
40
|
+
experiment_configs = []
|
|
40
41
|
|
|
41
42
|
for path in checkpoint_dir_paths:
|
|
42
|
-
# Load the model
|
|
43
|
-
model_config = parse_config(f"{path}/{MODEL_CONFIG_NAME}")
|
|
44
43
|
|
|
45
|
-
|
|
44
|
+
# Load lightning training module
|
|
45
|
+
with open(f"{path}/{MODEL_CONFIG_NAME}") as cfg:
|
|
46
|
+
model_config = yaml.load(cfg, Loader=yaml.FullLoader)
|
|
47
|
+
|
|
48
|
+
lightning_module = hydra.utils.instantiate(model_config)
|
|
46
49
|
|
|
47
50
|
if val_best:
|
|
48
51
|
# Only one epoch (best) saved per model
|
|
@@ -52,33 +55,40 @@ def get_model_from_checkpoints(
|
|
|
52
55
|
f"Found {len(files)} checkpoints @ {path}/epoch*.ckpt. Expected one."
|
|
53
56
|
)
|
|
54
57
|
# TODO: Loading with weights_only=False is not recommended
|
|
55
|
-
checkpoint = torch.load(files[0], map_location="cpu", weights_only=
|
|
58
|
+
checkpoint = torch.load(files[0], map_location="cpu", weights_only=True)
|
|
56
59
|
else:
|
|
57
|
-
checkpoint = torch.load(f"{path}/last.ckpt", map_location="cpu", weights_only=
|
|
58
|
-
|
|
59
|
-
model.load_state_dict(state_dict=checkpoint["state_dict"])
|
|
60
|
+
checkpoint = torch.load(f"{path}/last.ckpt", map_location="cpu", weights_only=True)
|
|
60
61
|
|
|
61
|
-
|
|
62
|
-
model, model_config = model.convert_to_multimodal_model(model_config)
|
|
62
|
+
lightning_module.load_state_dict(state_dict=checkpoint["state_dict"])
|
|
63
63
|
|
|
64
|
-
|
|
65
|
-
models.append(model)
|
|
64
|
+
# Extract the model from the lightning module
|
|
65
|
+
models.append(lightning_module.model)
|
|
66
|
+
model_configs.append(model_config["model"])
|
|
66
67
|
|
|
67
|
-
#
|
|
68
|
+
# Store the data config used for the model
|
|
68
69
|
data_config = f"{path}/{DATA_CONFIG_NAME}"
|
|
69
70
|
|
|
70
71
|
if os.path.isfile(data_config):
|
|
71
72
|
data_configs.append(data_config)
|
|
72
73
|
else:
|
|
73
|
-
|
|
74
|
+
raise FileNotFoundError(f"File {data_config} does not exist")
|
|
74
75
|
|
|
75
|
-
#
|
|
76
|
+
# Check for datamodule config
|
|
77
|
+
# This only exists if the model was trained with presaved samples
|
|
76
78
|
datamodule_config = f"{path}/{DATAMODULE_CONFIG_NAME}"
|
|
77
79
|
if os.path.isfile(datamodule_config):
|
|
78
80
|
datamodule_configs.append(datamodule_config)
|
|
79
81
|
else:
|
|
80
82
|
datamodule_configs.append(None)
|
|
81
83
|
|
|
84
|
+
# Check for experiment config
|
|
85
|
+
# For backwards compatibility - this might always exist
|
|
86
|
+
experiment_config = f"{path}/{FULL_CONFIG_NAME}"
|
|
87
|
+
if os.path.isfile(datamodule_config):
|
|
88
|
+
experiment_configs.append(experiment_config)
|
|
89
|
+
else:
|
|
90
|
+
experiment_configs.append(None)
|
|
91
|
+
|
|
82
92
|
if is_ensemble:
|
|
83
93
|
model_config = {
|
|
84
94
|
"_target_": "pvnet.models.ensemble.Ensemble",
|
|
@@ -90,7 +100,11 @@ def get_model_from_checkpoints(
|
|
|
90
100
|
model_config = model_configs[0]
|
|
91
101
|
model = models[0]
|
|
92
102
|
|
|
103
|
+
# Assume if using an ensemble that the members were trained on the same input data
|
|
93
104
|
data_config = data_configs[0]
|
|
94
105
|
datamodule_config = datamodule_configs[0]
|
|
95
106
|
|
|
96
|
-
|
|
107
|
+
# TODO: How should we save the experimental configs if we had an ensemble?
|
|
108
|
+
experiment_config = experiment_configs[0]
|
|
109
|
+
|
|
110
|
+
return model, model_config, data_config, datamodule_config, experiment_config
|