PVNet_summation 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,356 @@
1
+ """Base model for all PVNet submodels"""
2
+ import logging
3
+ import os
4
+ import shutil
5
+ import time
6
+ from importlib.metadata import version
7
+ from math import prod
8
+ from pathlib import Path
9
+
10
+ import hydra
11
+ import torch
12
+ import yaml
13
+ from huggingface_hub import ModelCard, ModelCardData, snapshot_download
14
+ from huggingface_hub.hf_api import HfApi
15
+ from safetensors.torch import load_file, save_file
16
+
17
+ from pvnet_summation.data.datamodule import SumTensorBatch
18
+ from pvnet_summation.utils import (
19
+ DATAMODULE_CONFIG_NAME,
20
+ FULL_CONFIG_NAME,
21
+ MODEL_CARD_NAME,
22
+ MODEL_CONFIG_NAME,
23
+ PYTORCH_WEIGHTS_NAME,
24
+ )
25
+
26
+
27
+ def santize_datamodule(config: dict) -> dict:
28
+ """Create new datamodule config which only keeps the details required for inference"""
29
+ return {"pvnet_model": config["pvnet_model"]}
30
+
31
+
32
+ def download_from_hf(
33
+ repo_id: str,
34
+ filename: str | list[str],
35
+ revision: str,
36
+ cache_dir: str | None,
37
+ force_download: bool,
38
+ max_retries: int = 5,
39
+ wait_time: int = 10,
40
+ ) -> str | list[str]:
41
+ """Tries to download one or more files from HuggingFace up to max_retries times.
42
+
43
+ Args:
44
+ repo_id: HuggingFace repo ID
45
+ filename: Name of the file(s) to download
46
+ revision: Specific model revision
47
+ cache_dir: Cache directory
48
+ force_download: Whether to force a new download
49
+ max_retries: Maximum number of retry attempts
50
+ wait_time: Wait time (in seconds) before retrying
51
+
52
+ Returns:
53
+ The local file path of the downloaded file(s)
54
+ """
55
+ for attempt in range(1, max_retries + 1):
56
+ try:
57
+ save_dir = snapshot_download(
58
+ repo_id=repo_id,
59
+ allow_patterns=filename,
60
+ revision=revision,
61
+ cache_dir=cache_dir,
62
+ force_download=force_download,
63
+ )
64
+
65
+ if isinstance(filename, list):
66
+ return [f"{save_dir}/{f}" for f in filename]
67
+ else:
68
+ return f"{save_dir}/{filename}"
69
+
70
+ except Exception as e:
71
+ if attempt == max_retries:
72
+ raise Exception(
73
+ f"Failed to download {filename} from {repo_id} after {max_retries} attempts."
74
+ ) from e
75
+ logging.warning(
76
+ (
77
+ f"Attempt {attempt}/{max_retries} failed to download {filename} "
78
+ f"from {repo_id}. Retrying in {wait_time} seconds..."
79
+ )
80
+ )
81
+ time.sleep(wait_time)
82
+
83
+
84
+ class HuggingfaceMixin:
85
+ """Mixin for saving and loading model to and from huggingface"""
86
+
87
+ @classmethod
88
+ def from_pretrained(
89
+ cls,
90
+ model_id: str,
91
+ revision: str,
92
+ cache_dir: str | None = None,
93
+ force_download: bool = False,
94
+ strict: bool = True,
95
+ ) -> "BaseModel":
96
+ """Load Pytorch pretrained weights and return the loaded model."""
97
+
98
+ if os.path.isdir(model_id):
99
+ print("Loading model from local directory")
100
+ model_file = f"{model_id}/{PYTORCH_WEIGHTS_NAME}"
101
+ config_file = f"{model_id}/{MODEL_CONFIG_NAME}"
102
+ else:
103
+ print("Loading model from huggingface repo")
104
+
105
+ model_file, config_file = download_from_hf(
106
+ repo_id=model_id,
107
+ filename=[PYTORCH_WEIGHTS_NAME, MODEL_CONFIG_NAME],
108
+ revision=revision,
109
+ cache_dir=cache_dir,
110
+ force_download=force_download,
111
+ max_retries=5,
112
+ wait_time=10,
113
+ )
114
+
115
+ with open(config_file, "r") as f:
116
+ model = hydra.utils.instantiate(yaml.safe_load(f))
117
+
118
+ state_dict = load_file(model_file)
119
+ model.load_state_dict(state_dict, strict=strict) # type: ignore
120
+ model.eval() # type: ignore
121
+
122
+ return model
123
+
124
+ @classmethod
125
+ def get_datamodule_config(
126
+ cls,
127
+ model_id: str,
128
+ revision: str,
129
+ cache_dir: str | None = None,
130
+ force_download: bool = False,
131
+ ) -> str:
132
+ """Load data config file."""
133
+ if os.path.isdir(model_id):
134
+ print("Loading datamodule config from local directory")
135
+ datamodule_config_file = os.path.join(model_id, DATAMODULE_CONFIG_NAME)
136
+ else:
137
+ print("Loading datamodule config from huggingface repo")
138
+ datamodule_config_file = download_from_hf(
139
+ repo_id=model_id,
140
+ filename=DATAMODULE_CONFIG_NAME,
141
+ revision=revision,
142
+ cache_dir=cache_dir,
143
+ force_download=force_download,
144
+ max_retries=5,
145
+ wait_time=10,
146
+ )
147
+
148
+ return datamodule_config_file
149
+
150
+ def _save_model_weights(self, save_directory: str) -> None:
151
+ """Save weights from a Pytorch model to a local directory."""
152
+ save_file(self.state_dict(), f"{save_directory}/{PYTORCH_WEIGHTS_NAME}")
153
+
154
+ def save_pretrained(
155
+ self,
156
+ save_directory: str,
157
+ model_config: dict,
158
+ wandb_repo: str,
159
+ wandb_id: str,
160
+ card_template_path: str,
161
+ datamodule_config_path,
162
+ experiment_config_path: str | None = None,
163
+ hf_repo_id: str | None = None,
164
+ push_to_hub: bool = False,
165
+ ) -> None:
166
+ """Save weights in local directory or upload to huggingface hub.
167
+
168
+ Args:
169
+ save_directory:
170
+ Path to directory in which the model weights and configuration will be saved.
171
+ model_config (`dict`):
172
+ Model configuration specified as a key/value dictionary.
173
+ wandb_repo: Identifier of the repo on wandb.
174
+ wandb_id: Identifier of the model on wandb.
175
+ datamodule_config_path:
176
+ The path to the datamodule config.
177
+ card_template_path: Path to the HuggingFace model card template. Defaults to card in
178
+ PVNet library if set to None.
179
+ experiment_config_path:
180
+ The path to the full experimental config.
181
+ hf_repo_id:
182
+ ID of your repository on the Hub. Used only if `push_to_hub=True`. Will default to
183
+ the folder name if not provided.
184
+ push_to_hub (`bool`, *optional*, defaults to `False`):
185
+ Whether or not to push your model to the HuggingFace Hub after saving it.
186
+ """
187
+
188
+ save_directory = Path(save_directory)
189
+ save_directory.mkdir(parents=True, exist_ok=True)
190
+
191
+ # Save model weights/files
192
+ self._save_model_weights(save_directory)
193
+
194
+ # Save the model config
195
+ if isinstance(model_config, dict):
196
+ with open(save_directory / MODEL_CONFIG_NAME, "w") as outfile:
197
+ yaml.dump(model_config, outfile, sort_keys=False, default_flow_style=False)
198
+
199
+ # Sanitize and save the datamodule config
200
+ with open(datamodule_config_path) as cfg:
201
+ datamodule_config = yaml.load(cfg, Loader=yaml.FullLoader)
202
+
203
+ datamodule_config = santize_datamodule(datamodule_config)
204
+
205
+ with open(save_directory / DATAMODULE_CONFIG_NAME, "w") as outfile:
206
+ yaml.dump(datamodule_config, outfile, sort_keys=False, default_flow_style=False)
207
+
208
+ # Save the full experimental config
209
+ if experiment_config_path is not None:
210
+ shutil.copyfile(experiment_config_path, save_directory / FULL_CONFIG_NAME)
211
+
212
+ card = self.create_hugging_face_model_card(card_template_path, wandb_repo, wandb_id)
213
+
214
+ (save_directory / MODEL_CARD_NAME).write_text(str(card))
215
+
216
+ if push_to_hub:
217
+ api = HfApi()
218
+
219
+ api.upload_folder(
220
+ repo_id=hf_repo_id,
221
+ folder_path=save_directory,
222
+ repo_type="model",
223
+ commit_message=f"Upload model - {wandb_id}",
224
+ )
225
+
226
+ # Print the most recent commit hash
227
+ c = api.list_repo_commits(repo_id=hf_repo_id, repo_type="model")[0]
228
+
229
+ message = (
230
+ f"The latest commit is now: \n"
231
+ f" date: {c.created_at} \n"
232
+ f" commit hash: {c.commit_id}\n"
233
+ f" by: {c.authors}\n"
234
+ f" title: {c.title}\n"
235
+ )
236
+
237
+ print(message)
238
+
239
+ @staticmethod
240
+ def create_hugging_face_model_card(
241
+ card_template_path: str,
242
+ wandb_repo: str,
243
+ wandb_id: str,
244
+ ) -> ModelCard:
245
+ """
246
+ Creates Hugging Face model card
247
+
248
+ Args:
249
+ card_template_path: Path to the HuggingFace model card template
250
+ wandb_repo: Identifier of the repo on wandb.
251
+ wandb_id: Identifier of the model on wandb.
252
+
253
+ Returns:
254
+ card: ModelCard - Hugging Face model card object
255
+ """
256
+
257
+ # Creating and saving model card.
258
+ card_data = ModelCardData(language="en", license="mit", library_name="pytorch")
259
+
260
+ link = f"https://wandb.ai/{wandb_repo}/runs/{wandb_id}"
261
+ wandb_link = f" - [{link}]({link})\n"
262
+
263
+ # Find package versions for OCF packages
264
+ packages_to_display = ["pvnet_summation", "ocf-data-sampler"]
265
+ packages_and_versions = {package: version(package) for package in packages_to_display}
266
+
267
+
268
+ package_versions_markdown = ""
269
+ for package, v in packages_and_versions.items():
270
+ package_versions_markdown += f" - {package}=={v}\n"
271
+
272
+ return ModelCard.from_template(
273
+ card_data,
274
+ template_path=card_template_path,
275
+ wandb_link=wandb_link,
276
+ package_versions=package_versions_markdown,
277
+ )
278
+
279
+
280
+ class BaseModel(torch.nn.Module, HuggingfaceMixin):
281
+ """Abstract base class for PVNet-summation submodels"""
282
+
283
+ def __init__(
284
+ self,
285
+ output_quantiles: list[float] | None,
286
+ num_input_locations: int,
287
+ input_quantiles: list[float] | None,
288
+ history_minutes: int,
289
+ forecast_minutes: int,
290
+ interval_minutes: int,
291
+ ):
292
+ """Abtstract base class for PVNet-summation submodels.
293
+
294
+ """
295
+ super().__init__()
296
+
297
+ if (output_quantiles is not None):
298
+ if output_quantiles != sorted(output_quantiles):
299
+ raise ValueError("output_quantiles should be in ascending order")
300
+ if 0.5 not in output_quantiles:
301
+ raise ValueError("Quantiles must include 0.5")
302
+
303
+ self.output_quantiles = output_quantiles
304
+
305
+ self.num_input_locations = num_input_locations
306
+ self.input_quantiles = input_quantiles
307
+
308
+ self.history_minutes = history_minutes
309
+ self.forecast_minutes = forecast_minutes
310
+ self.interval_minutes = interval_minutes
311
+
312
+ # Number of timestemps for 30 minutely data
313
+ self.history_len = history_minutes // interval_minutes
314
+ self.forecast_len = (forecast_minutes) // interval_minutes
315
+
316
+ # Store whether the model should use quantile regression or simply predict the mean
317
+ self.use_quantile_regression = self.output_quantiles is not None
318
+
319
+ # Also store the final output shape
320
+ if self.use_quantile_regression:
321
+ self.output_shape = (self.forecast_len, len(input_quantiles))
322
+ else:
323
+ self.output_shape = (self.forecast_len,)
324
+
325
+ # Store the number of output features and that the model should predict for
326
+ self.num_output_features = prod(self.output_shape)
327
+
328
+ # Store the expected input shape
329
+ if input_quantiles is None:
330
+ self.input_shape = (self.num_input_locations, self.forecast_len)
331
+ else:
332
+ self.input_shape = (self.num_input_locations, self.forecast_len, len(input_quantiles))
333
+
334
+
335
+ def _quantiles_to_prediction(self, y_quantiles: torch.Tensor) -> torch.Tensor:
336
+ """Convert network prediction into a point prediction.
337
+
338
+ Args:
339
+ y_quantiles: Quantile prediction of network
340
+
341
+ Returns:
342
+ torch.Tensor: Point prediction
343
+ """
344
+ # y_quantiles Shape: [batch_size, seq_length, num_quantiles]
345
+ idx = self.output_quantiles.index(0.5)
346
+ return y_quantiles[..., idx]
347
+
348
+ def sum_of_locations(self, x: SumTensorBatch) -> torch.Tensor:
349
+ """Compute the sum of the location-level predictions"""
350
+ if self.input_quantiles is None:
351
+ y_hat = x["pvnet_outputs"]
352
+ else:
353
+ idx = self.input_quantiles.index(0.5)
354
+ y_hat = x["pvnet_outputs"][..., idx]
355
+
356
+ return (y_hat * x["relative_capacity"].unsqueeze(-1)).sum(dim=1)
@@ -0,0 +1,75 @@
1
+ """Simple model which only uses outputs of PVNet for all GSPs"""
2
+
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn.functional as F
6
+ from torch import nn
7
+
8
+ from pvnet_summation.data.datamodule import SumTensorBatch
9
+ from pvnet_summation.models.base_model import BaseModel
10
+
11
+
12
+ class DenseModel(BaseModel):
13
+ """Neural network architecture based on naive dense layers
14
+
15
+ This model flattens all the features into a 1D vector before feeding them into the sub network
16
+ """
17
+
18
+ def __init__(
19
+ self,
20
+ output_quantiles: list[float] | None,
21
+ num_input_locations: int,
22
+ input_quantiles: list[float] | None,
23
+ history_minutes: int,
24
+ forecast_minutes: int,
25
+ interval_minutes: int,
26
+ output_network: torch.nn.Module,
27
+ predict_difference_from_sum: bool = False,
28
+ ):
29
+ """Neural network architecture based on naive dense layers
30
+
31
+ """
32
+
33
+ super().__init__(
34
+ output_quantiles,
35
+ num_input_locations,
36
+ input_quantiles,
37
+ history_minutes,
38
+ forecast_minutes,
39
+ interval_minutes,
40
+ )
41
+
42
+ self.predict_difference_from_sum = predict_difference_from_sum
43
+
44
+ self.model = output_network(
45
+ in_features=np.prod(self.input_shape),
46
+ out_features=self.num_output_features,
47
+ )
48
+
49
+ # Add linear layer if predicting difference from sum
50
+ # This allows difference to be positive or negative
51
+ if predict_difference_from_sum:
52
+ self.model = nn.Sequential(
53
+ self.model,
54
+ nn.Linear(self.num_output_features, self.num_output_features),
55
+ )
56
+
57
+ def forward(self, x: SumTensorBatch) -> torch.Tensor:
58
+ """Run model forward"""
59
+
60
+ x_in = torch.flatten(x["pvnet_outputs"], start_dim=1)
61
+ out = self.model(x_in)
62
+
63
+ if self.use_quantile_regression:
64
+ # Shape: [batch_size, seq_length * num_quantiles]
65
+ out = out.reshape(out.shape[0], self.forecast_len, len(self.output_quantiles))
66
+
67
+ if self.predict_difference_from_sum:
68
+ loc_sum = self.sum_of_locations(x)
69
+
70
+ if self.use_quantile_regression:
71
+ loc_sum = loc_sum.unsqueeze(-1)
72
+
73
+ out = F.leaky_relu(loc_sum + out)
74
+
75
+ return out
@@ -0,0 +1,171 @@
1
+ """Neural network architecture based on dense layers applied independently at each horizon"""
2
+
3
+
4
+ import torch
5
+ import torch.nn.functional as F
6
+ from torch import nn
7
+
8
+ from pvnet_summation.data.datamodule import SumTensorBatch
9
+ from pvnet_summation.models.base_model import BaseModel
10
+
11
+
12
+ class HorizonDenseModel(BaseModel):
13
+ """Neural network architecture based on dense layers applied independently at each horizon.
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ output_quantiles: list[float] | None,
19
+ num_input_locations: int,
20
+ input_quantiles: list[float] | None,
21
+ history_minutes: int,
22
+ forecast_minutes: int,
23
+ interval_minutes: int,
24
+ output_network: torch.nn.Module,
25
+ predict_difference_from_sum: bool = False,
26
+ use_horizon_encoding: bool = False,
27
+ use_solar_position: bool = False,
28
+ force_non_crossing: bool = False,
29
+ beta: float = 3,
30
+ ):
31
+ """Neural network architecture based on dense layers applied independently at each horizon.
32
+
33
+ Args:
34
+ output_quantiles: A list of float (0.0, 1.0) quantiles to predict values for. If set to
35
+ None the output is a single value.
36
+ num_input_locations: The number of input locations (e.g. number of GSPs)
37
+ input_quantiles: A list of float (0.0, 1.0) quantiles which PVNet predicts for. If set
38
+ to None we assume PVNet predicts a single value
39
+ history_minutes (int): Length of the GSP history period in minutes
40
+ forecast_minutes (int): Length of the GSP forecast period in minutes
41
+ interval_minutes: The interval in minutes between each timestep in the data
42
+ output_network: A partially instantiated pytorch Module class used top predict the
43
+ outturn at each horizon.
44
+ predict_difference_from_sum: Whether to predict the difference from the sum of locations
45
+ else the total is predicted directly
46
+ use_horizon_encoding: Whether to use the forecast horizon as an input feature
47
+ use_solar_position: Whether to use the solar coordinates as input features
48
+ force_non_crossing: If predicting quantile, whether to predict the quantiles other than
49
+ the median by predicting the distance between them and integrating.
50
+ beta: If using force_non_crossing, the beta value to use in the softplus activation
51
+ """
52
+
53
+ super().__init__(
54
+ output_quantiles,
55
+ num_input_locations,
56
+ input_quantiles,
57
+ history_minutes,
58
+ forecast_minutes,
59
+ interval_minutes,
60
+ )
61
+
62
+ if force_non_crossing:
63
+ assert self.use_quantile_regression
64
+
65
+ self.use_horizon_encoding = use_horizon_encoding
66
+ self.predict_difference_from_sum = predict_difference_from_sum
67
+ self.force_non_crossing = force_non_crossing
68
+ self.beta = beta
69
+ self.use_solar_position = use_solar_position
70
+
71
+ in_features = 1 if self.input_quantiles is None else len(self.input_quantiles)
72
+ in_features = in_features * self.num_input_locations
73
+
74
+ if use_horizon_encoding:
75
+ in_features += 1
76
+
77
+ if use_solar_position:
78
+ in_features += 2
79
+
80
+ out_features = (len(self.output_quantiles) if self.use_quantile_regression else 1)
81
+
82
+ model = output_network(in_features=in_features, out_features=out_features)
83
+
84
+ # Add linear layer if predicting difference from sum
85
+ # - This allows difference to be positive or negative
86
+ # Also add linear layer if we are applying force_non_crossing since a softplus will be used
87
+ if predict_difference_from_sum or force_non_crossing:
88
+ model = nn.Sequential(
89
+ model,
90
+ nn.Linear(out_features, out_features),
91
+ )
92
+
93
+ self.model = model
94
+
95
+
96
+ def forward(self, x: SumTensorBatch) -> torch.Tensor:
97
+ """Run model forward"""
98
+
99
+ # x["pvnet_outputs"] has shape [batch, locs, horizon, (quantile)]
100
+ batch_size = x["pvnet_outputs"].shape[0]
101
+ x_in = torch.swapaxes(x["pvnet_outputs"], 1, 2) # -> [batch, horizon, locs, (quantile)]
102
+ x_in = torch.flatten(x_in, start_dim=2) # -> [batch, horizon, locs*(quantile)]
103
+
104
+ if self.use_horizon_encoding:
105
+ horizon_encoding = torch.linspace(
106
+ start=0,
107
+ end=1,
108
+ steps=self.forecast_len,
109
+ device=x_in.device,
110
+ dtype=x_in.dtype,
111
+ )
112
+ horizon_encoding = horizon_encoding.tile((batch_size,1)).unsqueeze(-1)
113
+ x_in = torch.cat([x_in, horizon_encoding], dim=2)
114
+
115
+ if self.use_solar_position:
116
+ x_in = torch.cat(
117
+ [x_in, x["azimuth"].unsqueeze(-1), x["elevation"].unsqueeze(-1)],
118
+ dim=2
119
+ )
120
+
121
+ x_in = torch.flatten(x_in, start_dim=0, end_dim=1) # -> [batch*horizon, features]
122
+
123
+ out = self.model(x_in)
124
+ out = out.view(batch_size, *self.output_shape) # -> [batch, horizon, (quantile)]
125
+
126
+ if self.force_non_crossing:
127
+
128
+ # Get the prediction of the median
129
+ idx = self.output_quantiles.index(0.5)
130
+ if self.predict_difference_from_sum:
131
+ loc_sum = self.sum_of_locations(x).unsqueeze(-1)
132
+ y_median = loc_sum + out[..., idx:idx+1]
133
+ else:
134
+ y_median = out[..., idx:idx+1]
135
+
136
+ # These are the differences between the remaining quantiles
137
+ dy_below = F.softplus(out[..., :idx], beta=self.beta)
138
+ dy_above = F.softplus(out[..., idx+1:], beta=self.beta)
139
+
140
+ # Find the absolute value of the quantile predictions from the differences
141
+ y_below = []
142
+ y = y_median
143
+ for i in range(dy_below.shape[-1]):
144
+ # We detach y to avoid the gradients caused by errors from one quantile
145
+ # prediction flowing back to affect the other quantile predictions.
146
+ # For example if the 0.9 quantile prediction was too low, we don't want the
147
+ # gradient to pull the 0.5 quantile prediction higher to compensate.
148
+ y = y.detach() - dy_below[..., i:i+1]
149
+ y_below.append(y)
150
+
151
+ y_above = []
152
+ y = y_median
153
+ for i in range(dy_above.shape[-1]):
154
+ y = y.detach() + dy_above[..., i:i+1]
155
+ y_above.append(y)
156
+
157
+ # Compile the quantile predictions in the correct order
158
+ out = torch.cat(y_below[::-1] + [y_median,] + y_above, dim=-1)
159
+
160
+ else:
161
+
162
+ if self.predict_difference_from_sum:
163
+ loc_sum = self.sum_of_locations(x)
164
+
165
+ if self.use_quantile_regression:
166
+ loc_sum = loc_sum.unsqueeze(-1)
167
+
168
+ out = loc_sum + out
169
+
170
+ # Use leaky relu as a soft clip to 0
171
+ return F.leaky_relu(out, negative_slope=0.01)