ocf-data-sampler 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- ocf_data_sampler/config/model.py +6 -0
- ocf_data_sampler/data/uk_gsp_locations_20250109.csv +333 -0
- ocf_data_sampler/load/gsp.py +37 -8
- ocf_data_sampler/load/load_dataset.py +8 -3
- ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +18 -12
- ocf_data_sampler/torch_datasets/sample/uk_regional.py +129 -43
- ocf_data_sampler/torch_datasets/utils/validation_utils.py +39 -0
- {ocf_data_sampler-0.2.15.dist-info → ocf_data_sampler-0.2.17.dist-info}/METADATA +1 -1
- {ocf_data_sampler-0.2.15.dist-info → ocf_data_sampler-0.2.17.dist-info}/RECORD +13 -11
- {ocf_data_sampler-0.2.15.dist-info → ocf_data_sampler-0.2.17.dist-info}/WHEEL +1 -1
- scripts/download_gsp_location_data.py +95 -0
- /ocf_data_sampler/data/{uk_gsp_locations.csv → uk_gsp_locations_20220314.csv} +0 -0
- {ocf_data_sampler-0.2.15.dist-info → ocf_data_sampler-0.2.17.dist-info}/top_level.txt +0 -0
ocf_data_sampler/load/gsp.py
CHANGED
|
@@ -6,25 +6,54 @@ import pandas as pd
|
|
|
6
6
|
import xarray as xr
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def
|
|
9
|
+
def get_gsp_boundaries(version: str) -> pd.DataFrame:
|
|
10
|
+
"""Get the GSP boundaries for a given version.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
version: Version of the GSP boundaries to use. Options are "20220314" or "20250109".
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
pd.DataFrame: The GSP boundaries
|
|
17
|
+
"""
|
|
18
|
+
if version not in ["20220314", "20250109"]:
|
|
19
|
+
raise ValueError(
|
|
20
|
+
"Invalid version. Options are '20220314' or '20250109'.",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
return pd.read_csv(
|
|
24
|
+
files("ocf_data_sampler.data").joinpath(f"uk_gsp_locations_{version}.csv"),
|
|
25
|
+
index_col="gsp_id",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def open_gsp(zarr_path: str, boundaries_version: str = "20220314") -> xr.DataArray:
|
|
10
30
|
"""Open the GSP data.
|
|
11
31
|
|
|
12
32
|
Args:
|
|
13
33
|
zarr_path: Path to the GSP zarr data
|
|
34
|
+
boundaries_version: Version of the GSP boundaries to use. Options are "20220314" or
|
|
35
|
+
"20250109".
|
|
14
36
|
|
|
15
37
|
Returns:
|
|
16
38
|
xr.DataArray: The opened GSP data
|
|
17
39
|
"""
|
|
18
|
-
ds = xr.open_zarr(zarr_path)
|
|
19
|
-
|
|
20
|
-
ds = ds.rename({"datetime_gmt": "time_utc"})
|
|
21
|
-
|
|
22
40
|
# Load UK GSP locations
|
|
23
|
-
df_gsp_loc =
|
|
24
|
-
|
|
25
|
-
|
|
41
|
+
df_gsp_loc = get_gsp_boundaries(boundaries_version)
|
|
42
|
+
|
|
43
|
+
# Open the GSP generation data
|
|
44
|
+
ds = (
|
|
45
|
+
xr.open_zarr(zarr_path)
|
|
46
|
+
.rename({"datetime_gmt": "time_utc"})
|
|
26
47
|
)
|
|
27
48
|
|
|
49
|
+
if not (ds.gsp_id.isin(df_gsp_loc.index)).all():
|
|
50
|
+
raise ValueError(
|
|
51
|
+
"Some GSP IDs in the GSP generation data are available in the locations file.",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Select the locations by the GSP IDs in the generation data
|
|
55
|
+
df_gsp_loc = df_gsp_loc.loc[ds.gsp_id.values]
|
|
56
|
+
|
|
28
57
|
# Add locations and capacities as coordinates for each GSP and datetime
|
|
29
58
|
ds = ds.assign_coords(
|
|
30
59
|
x_osgb=(df_gsp_loc.x_osgb.to_xarray()),
|
|
@@ -6,8 +6,10 @@ from ocf_data_sampler.config import InputData
|
|
|
6
6
|
from ocf_data_sampler.load import open_gsp, open_nwp, open_sat_data, open_site
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def get_dataset_dict(
|
|
10
|
-
|
|
9
|
+
def get_dataset_dict(
|
|
10
|
+
input_config: InputData,
|
|
11
|
+
gsp_ids: list[int] | None = None,
|
|
12
|
+
) -> dict[str, dict[xr.DataArray] | xr.DataArray]:
|
|
11
13
|
"""Construct dictionary of all of the input data sources.
|
|
12
14
|
|
|
13
15
|
Args:
|
|
@@ -19,7 +21,10 @@ def get_dataset_dict(input_config: InputData, gsp_ids: list[int] | None = None)\
|
|
|
19
21
|
# Load GSP data unless the path is None
|
|
20
22
|
if input_config.gsp and input_config.gsp.zarr_path:
|
|
21
23
|
|
|
22
|
-
da_gsp = open_gsp(
|
|
24
|
+
da_gsp = open_gsp(
|
|
25
|
+
zarr_path=input_config.gsp.zarr_path,
|
|
26
|
+
boundaries_version=input_config.gsp.boundaries_version,
|
|
27
|
+
).compute()
|
|
23
28
|
|
|
24
29
|
if gsp_ids is None:
|
|
25
30
|
# Remove national (gsp_id=0)
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""Torch dataset for UK PVNet."""
|
|
2
2
|
|
|
3
|
-
from importlib.resources import files
|
|
4
|
-
|
|
5
3
|
import numpy as np
|
|
6
4
|
import pandas as pd
|
|
7
5
|
import xarray as xr
|
|
@@ -9,6 +7,7 @@ from torch.utils.data import Dataset
|
|
|
9
7
|
from typing_extensions import override
|
|
10
8
|
|
|
11
9
|
from ocf_data_sampler.config import Configuration, load_yaml_configuration
|
|
10
|
+
from ocf_data_sampler.load.gsp import get_gsp_boundaries
|
|
12
11
|
from ocf_data_sampler.load.load_dataset import get_dataset_dict
|
|
13
12
|
from ocf_data_sampler.numpy_sample import (
|
|
14
13
|
convert_gsp_to_numpy_sample,
|
|
@@ -47,22 +46,26 @@ def compute(xarray_dict: dict) -> dict:
|
|
|
47
46
|
return xarray_dict
|
|
48
47
|
|
|
49
48
|
|
|
50
|
-
def get_gsp_locations(
|
|
49
|
+
def get_gsp_locations(
|
|
50
|
+
gsp_ids: list[int] | None = None,
|
|
51
|
+
version: str = "20220314",
|
|
52
|
+
) -> list[Location]:
|
|
51
53
|
"""Get list of locations of all GSPs.
|
|
52
54
|
|
|
53
55
|
Args:
|
|
54
|
-
gsp_ids: List of GSP IDs to include. Defaults to all
|
|
56
|
+
gsp_ids: List of GSP IDs to include. Defaults to all GSPs except national
|
|
57
|
+
version: Version of GSP boundaries to use. Defaults to "20220314"
|
|
55
58
|
"""
|
|
59
|
+
df_gsp_loc = get_gsp_boundaries(version)
|
|
60
|
+
|
|
61
|
+
# Default GSP IDs is all except national (gsp_id=0)
|
|
56
62
|
if gsp_ids is None:
|
|
57
|
-
gsp_ids =
|
|
63
|
+
gsp_ids = df_gsp_loc.index.values
|
|
64
|
+
gsp_ids = gsp_ids[gsp_ids != 0]
|
|
58
65
|
|
|
59
|
-
|
|
66
|
+
df_gsp_loc = df_gsp_loc.loc[gsp_ids]
|
|
60
67
|
|
|
61
|
-
|
|
62
|
-
df_gsp_loc = pd.read_csv(
|
|
63
|
-
files("ocf_data_sampler.data").joinpath("uk_gsp_locations.csv"),
|
|
64
|
-
index_col="gsp_id",
|
|
65
|
-
)
|
|
68
|
+
locations = []
|
|
66
69
|
|
|
67
70
|
for gsp_id in gsp_ids:
|
|
68
71
|
locations.append(
|
|
@@ -108,7 +111,10 @@ class AbstractPVNetUKDataset(Dataset):
|
|
|
108
111
|
valid_t0_times = valid_t0_times[valid_t0_times <= pd.Timestamp(end_time)]
|
|
109
112
|
|
|
110
113
|
# Construct list of locations to sample from
|
|
111
|
-
self.locations = get_gsp_locations(
|
|
114
|
+
self.locations = get_gsp_locations(
|
|
115
|
+
gsp_ids,
|
|
116
|
+
version=config.input_data.gsp.boundaries_version,
|
|
117
|
+
)
|
|
112
118
|
self.valid_t0_times = valid_t0_times
|
|
113
119
|
|
|
114
120
|
# Assign config and input data to self
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""PVNet UK Regional sample implementation for dataset handling and visualisation."""
|
|
2
2
|
|
|
3
|
+
import logging
|
|
4
|
+
|
|
3
5
|
import torch
|
|
4
6
|
from typing_extensions import override
|
|
5
7
|
|
|
@@ -14,8 +16,11 @@ from ocf_data_sampler.torch_datasets.sample.base import SampleBase
|
|
|
14
16
|
from ocf_data_sampler.torch_datasets.utils.validation_utils import (
|
|
15
17
|
calculate_expected_shapes,
|
|
16
18
|
check_dimensions,
|
|
19
|
+
validation_warning,
|
|
17
20
|
)
|
|
18
21
|
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
19
24
|
|
|
20
25
|
class UKRegionalSample(SampleBase):
|
|
21
26
|
"""Handles UK Regional PVNet data operations."""
|
|
@@ -50,14 +55,27 @@ class UKRegionalSample(SampleBase):
|
|
|
50
55
|
# TODO: We should move away from using torch.load(..., weights_only=False)
|
|
51
56
|
return cls(torch.load(path, weights_only=False))
|
|
52
57
|
|
|
53
|
-
def validate_sample(self, config: Configuration) ->
|
|
54
|
-
"""Validates
|
|
58
|
+
def validate_sample(self, config: Configuration) -> dict:
|
|
59
|
+
"""Validates the sample, logging warnings and raising errors.
|
|
60
|
+
|
|
61
|
+
Checks that the sample has the expected structure and data shapes based
|
|
62
|
+
on the provided configuration. Critical issues (missing required data,
|
|
63
|
+
shape mismatches) will raise a ValueError. Non-critical issues (e.g.,
|
|
64
|
+
unexpected data components found) will be logged as warnings using
|
|
65
|
+
the standard Python logging module.
|
|
55
66
|
|
|
56
67
|
Args:
|
|
57
|
-
config: Configuration
|
|
68
|
+
config: Configuration object defining expected shapes and required fields.
|
|
58
69
|
|
|
59
70
|
Returns:
|
|
60
|
-
|
|
71
|
+
dict: A dictionary indicating success: `{"valid": True}`.
|
|
72
|
+
If validation fails due to a critical issue, an exception is raised
|
|
73
|
+
instead of returning. Warnings encountered are logged.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
TypeError: If `config` is not a Configuration object.
|
|
77
|
+
ValueError: For critical validation failures like missing expected data,
|
|
78
|
+
incorrect data shapes, or missing required NWP providers.
|
|
61
79
|
"""
|
|
62
80
|
if not isinstance(config, Configuration):
|
|
63
81
|
raise TypeError("config must be Configuration object")
|
|
@@ -67,78 +85,146 @@ class UKRegionalSample(SampleBase):
|
|
|
67
85
|
|
|
68
86
|
# Check GSP shape if specified
|
|
69
87
|
gsp_key = GSPSampleKey.gsp
|
|
70
|
-
# Check if GSP data is expected but missing
|
|
71
88
|
if gsp_key in expected_shapes and gsp_key not in self._data:
|
|
72
89
|
raise ValueError(f"Configuration expects GSP data ('{gsp_key}') but is missing.")
|
|
73
90
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
91
|
+
if gsp_key in self._data:
|
|
92
|
+
if gsp_key in expected_shapes:
|
|
93
|
+
gsp_data = self._data[gsp_key]
|
|
94
|
+
check_dimensions(
|
|
95
|
+
actual_shape=gsp_data.shape,
|
|
96
|
+
expected_shape=expected_shapes[gsp_key],
|
|
97
|
+
name="GSP",
|
|
98
|
+
)
|
|
99
|
+
else:
|
|
100
|
+
validation_warning(
|
|
101
|
+
message=f"GSP data ('{gsp_key}') is present but not expected in configuration.",
|
|
102
|
+
warning_type="unexpected_component",
|
|
103
|
+
component=str(gsp_key),
|
|
104
|
+
)
|
|
82
105
|
|
|
83
|
-
# Checks for NWP data
|
|
106
|
+
# Checks for NWP data
|
|
84
107
|
nwp_key = NWPSampleKey.nwp
|
|
85
108
|
if nwp_key in expected_shapes and nwp_key not in self._data:
|
|
86
109
|
raise ValueError(f"Configuration expects NWP data ('{nwp_key}') but is missing.")
|
|
87
110
|
|
|
88
|
-
# Check NWP structure and shapes if data exists
|
|
89
111
|
if nwp_key in self._data:
|
|
90
112
|
nwp_data_all_providers = self._data[nwp_key]
|
|
91
113
|
if not isinstance(nwp_data_all_providers, dict):
|
|
92
114
|
raise ValueError(f"NWP data ('{nwp_key}') should be a dictionary.")
|
|
93
115
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
raise ValueError(f"Missing array key in NWP data for provider '{provider}'.")
|
|
116
|
+
if nwp_key in expected_shapes:
|
|
117
|
+
expected_providers = set(expected_shapes[nwp_key].keys())
|
|
118
|
+
actual_providers = set(nwp_data_all_providers.keys())
|
|
98
119
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
120
|
+
unexpected_providers = actual_providers - expected_providers
|
|
121
|
+
if unexpected_providers:
|
|
122
|
+
validation_warning(
|
|
123
|
+
message=f"Unexpected NWP providers found: {list(unexpected_providers)}",
|
|
124
|
+
warning_type="unexpected_provider",
|
|
125
|
+
providers=list(unexpected_providers),
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
missing_expected_providers = expected_providers - actual_providers
|
|
129
|
+
if missing_expected_providers:
|
|
130
|
+
raise ValueError(
|
|
131
|
+
f"Expected NWP providers are missing from the data: "
|
|
132
|
+
f"{list(missing_expected_providers)}",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
for provider in expected_shapes[nwp_key]:
|
|
136
|
+
provider_data = nwp_data_all_providers[provider]
|
|
137
|
+
|
|
138
|
+
if "nwp" not in provider_data:
|
|
139
|
+
error_msg = (
|
|
140
|
+
f"Missing array key 'nwp' in NWP data for provider '{provider}'."
|
|
141
|
+
)
|
|
142
|
+
raise ValueError(error_msg)
|
|
103
143
|
|
|
144
|
+
nwp_array = provider_data["nwp"]
|
|
104
145
|
check_dimensions(
|
|
105
|
-
actual_shape=
|
|
106
|
-
expected_shape=
|
|
146
|
+
actual_shape=nwp_array.shape,
|
|
147
|
+
expected_shape=expected_shapes[nwp_key][provider],
|
|
107
148
|
name=f"NWP data ({provider})",
|
|
108
149
|
)
|
|
150
|
+
else:
|
|
151
|
+
validation_warning(
|
|
152
|
+
message=(
|
|
153
|
+
f"NWP data ('{nwp_key}') is present but not expected "
|
|
154
|
+
"in configuration."
|
|
155
|
+
),
|
|
156
|
+
warning_type="unexpected_component",
|
|
157
|
+
component=str(nwp_key),
|
|
158
|
+
)
|
|
109
159
|
|
|
110
160
|
# Validate satellite data
|
|
111
161
|
sat_key = SatelliteSampleKey.satellite_actual
|
|
112
|
-
# Check if Satellite data is expected but missing
|
|
113
162
|
if sat_key in expected_shapes and sat_key not in self._data:
|
|
114
163
|
raise ValueError(f"Configuration expects Satellite data ('{sat_key}') but is missing.")
|
|
115
164
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
165
|
+
if sat_key in self._data:
|
|
166
|
+
if sat_key in expected_shapes:
|
|
167
|
+
sat_data = self._data[sat_key]
|
|
168
|
+
check_dimensions(
|
|
169
|
+
actual_shape=sat_data.shape,
|
|
170
|
+
expected_shape=expected_shapes[sat_key],
|
|
171
|
+
name="Satellite data",
|
|
172
|
+
)
|
|
173
|
+
else:
|
|
174
|
+
validation_warning(
|
|
175
|
+
message=(
|
|
176
|
+
f"Satellite data ('{sat_key}') is present but not expected "
|
|
177
|
+
"in configuration."
|
|
178
|
+
),
|
|
179
|
+
warning_type="unexpected_component",
|
|
180
|
+
component=str(sat_key),
|
|
181
|
+
)
|
|
124
182
|
|
|
125
183
|
# Validate solar coordinates data
|
|
126
184
|
solar_keys = ["solar_azimuth", "solar_elevation"]
|
|
127
|
-
# Check if solar coordinate is expected but missing
|
|
128
185
|
for solar_key in solar_keys:
|
|
186
|
+
solar_name = solar_key.replace("_", " ").title()
|
|
129
187
|
if solar_key in expected_shapes and solar_key not in self._data:
|
|
130
188
|
raise ValueError(f"Configuration expects {solar_key} data but is missing.")
|
|
131
189
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
190
|
+
if solar_key in self._data:
|
|
191
|
+
if solar_key in expected_shapes:
|
|
192
|
+
solar_data = self._data[solar_key]
|
|
193
|
+
check_dimensions(
|
|
194
|
+
actual_shape=solar_data.shape,
|
|
195
|
+
expected_shape=expected_shapes[solar_key],
|
|
196
|
+
name=f"{solar_name} data",
|
|
197
|
+
)
|
|
198
|
+
else:
|
|
199
|
+
validation_warning(
|
|
200
|
+
message=(
|
|
201
|
+
f"{solar_name} data is present but not expected "
|
|
202
|
+
"in configuration."
|
|
203
|
+
),
|
|
204
|
+
warning_type="unexpected_component",
|
|
205
|
+
component=solar_key,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Check for potentially unexpected components
|
|
209
|
+
checked_keys = {gsp_key, nwp_key, sat_key} | set(solar_keys)
|
|
210
|
+
all_present_keys = set(self._data.keys())
|
|
211
|
+
unexpected_present_keys = all_present_keys - set(expected_shapes.keys())
|
|
212
|
+
|
|
213
|
+
for key in unexpected_present_keys:
|
|
214
|
+
if key not in checked_keys:
|
|
215
|
+
validation_warning(
|
|
216
|
+
message=(
|
|
217
|
+
f"Unexpected component '{key}' is present in data but not defined "
|
|
218
|
+
"in configuration's expected shapes."
|
|
219
|
+
),
|
|
220
|
+
warning_type="unexpected_component",
|
|
221
|
+
component=str(key),
|
|
139
222
|
)
|
|
140
223
|
|
|
141
|
-
return
|
|
224
|
+
return {
|
|
225
|
+
"valid": True,
|
|
226
|
+
}
|
|
227
|
+
|
|
142
228
|
|
|
143
229
|
@override
|
|
144
230
|
def plot(self) -> None:
|
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
"""Validate sample shape against expected shape - utility function."""
|
|
2
2
|
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
3
6
|
from ocf_data_sampler.config import Configuration
|
|
4
7
|
from ocf_data_sampler.numpy_sample import GSPSampleKey, NWPSampleKey, SatelliteSampleKey
|
|
5
8
|
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
6
11
|
|
|
7
12
|
def check_dimensions(
|
|
8
13
|
actual_shape: tuple[int, ...],
|
|
@@ -93,6 +98,40 @@ def calculate_expected_shapes(
|
|
|
93
98
|
return expected_shapes
|
|
94
99
|
|
|
95
100
|
|
|
101
|
+
def validation_warning(
|
|
102
|
+
message: str,
|
|
103
|
+
warning_type: str,
|
|
104
|
+
*,
|
|
105
|
+
component: str | None = None,
|
|
106
|
+
providers: list[str] | None = None,
|
|
107
|
+
) -> dict[str, Any]:
|
|
108
|
+
"""Constructs warning details and logs a standard warning message.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
message: The base warning message string.
|
|
112
|
+
warning_type: The category of the warning (e.g., 'unexpected_component').
|
|
113
|
+
component: Optional component identifier (e.g., 'gsp').
|
|
114
|
+
providers: Optional list of provider names (e.g., ['ukv']).
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
None - This function now directly logs the warning.
|
|
118
|
+
"""
|
|
119
|
+
warning_info: dict[str, Any] = {"type": warning_type, "message": message}
|
|
120
|
+
log_message_parts = [message]
|
|
121
|
+
log_message_parts.append(f"(Type: {warning_type}")
|
|
122
|
+
|
|
123
|
+
if component is not None:
|
|
124
|
+
warning_info["component"] = component
|
|
125
|
+
log_message_parts.append(f", Component: {component}")
|
|
126
|
+
if providers is not None:
|
|
127
|
+
warning_info["providers"] = providers
|
|
128
|
+
log_message_parts.append(f", Providers: {providers}")
|
|
129
|
+
|
|
130
|
+
log_message_parts.append(")")
|
|
131
|
+
log_message = " ".join(log_message_parts)
|
|
132
|
+
logger.warning(log_message)
|
|
133
|
+
|
|
134
|
+
|
|
96
135
|
def _calculate_time_steps(start_minutes: int, end_minutes: int, resolution_minutes: int) -> int:
|
|
97
136
|
"""Calculate number of time steps based on interval and resolution.
|
|
98
137
|
|
|
@@ -2,12 +2,13 @@ ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,
|
|
|
2
2
|
ocf_data_sampler/utils.py,sha256=DjuneGGisl08ENvPZV_lrcX4b2NCKJC1ZpXgIpxuQi4,290
|
|
3
3
|
ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
|
|
4
4
|
ocf_data_sampler/config/load.py,sha256=LL-7wemI8o4KPkx35j-wQ3HjsMvDgqXr7G46IcASfnU,632
|
|
5
|
-
ocf_data_sampler/config/model.py,sha256=
|
|
5
|
+
ocf_data_sampler/config/model.py,sha256=SyjtlSK6gzQHWUfgX3VNKYLODyiKuD0Mu4hlm9GoHeg,10427
|
|
6
6
|
ocf_data_sampler/config/save.py,sha256=m8SPw5rXjkMm1rByjh3pK5StdBi4e8ysnn3jQopdRaI,1064
|
|
7
|
-
ocf_data_sampler/data/
|
|
7
|
+
ocf_data_sampler/data/uk_gsp_locations_20220314.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
|
|
8
|
+
ocf_data_sampler/data/uk_gsp_locations_20250109.csv,sha256=XZISFatnbpO9j8LwaxNKFzQSjs6hcHFsV8a9uDDpy2E,9055334
|
|
8
9
|
ocf_data_sampler/load/__init__.py,sha256=-vQP9g0UOWdVbjEGyVX_ipa7R1btmiETIKAf6aw4d78,201
|
|
9
|
-
ocf_data_sampler/load/gsp.py,sha256=
|
|
10
|
-
ocf_data_sampler/load/load_dataset.py,sha256=
|
|
10
|
+
ocf_data_sampler/load/gsp.py,sha256=UfPxwHw2Dw2xYSO5Al28oTamgnEM_n_4bYXsqGwY5Tc,1884
|
|
11
|
+
ocf_data_sampler/load/load_dataset.py,sha256=sIi0nkijR_-1fRfW5JcXNTR0ccGbpkhxb7JX_zjJ-W4,1956
|
|
11
12
|
ocf_data_sampler/load/satellite.py,sha256=E7Ln7Y60Qr1RTV-_R71YoxXQM-Ca7Y1faIo3oKB2eFk,2292
|
|
12
13
|
ocf_data_sampler/load/site.py,sha256=zOzlWk6pYZBB5daqG8URGksmDXWKrkutUvN8uALAIh8,1468
|
|
13
14
|
ocf_data_sampler/load/utils.py,sha256=sZ0-zzconcLkVQwAkCYrqKDo98Hrh5ChdiQJv5Bh91g,2040
|
|
@@ -38,22 +39,23 @@ ocf_data_sampler/select/location.py,sha256=AZvGR8y62opiW7zACGXjoOtBEWRfSLOZIA73O
|
|
|
38
39
|
ocf_data_sampler/select/select_spatial_slice.py,sha256=liAqIa-Amj58pOqx5r16i99HURj9oQ41j7gnPgRDQP4,8201
|
|
39
40
|
ocf_data_sampler/select/select_time_slice.py,sha256=HeHbwZ0CP03x0-LaJtpbSdtpLufwVTR73p6wH6O_PS8,5513
|
|
40
41
|
ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
|
|
41
|
-
ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=
|
|
42
|
+
ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=ZV2FoMPxFU2aPTWipj9HhJhGfrEg9MYOJRNR8aFcmvs,12613
|
|
42
43
|
ocf_data_sampler/torch_datasets/datasets/site.py,sha256=nRUlhXQQGVrTuBmE1QnwXAUsPTXz0dsezlQjwK71jIQ,17641
|
|
43
44
|
ocf_data_sampler/torch_datasets/sample/__init__.py,sha256=GL84vdZl_SjHDGVyh9Uekx2XhPYuZ0dnO3l6f6KXnHI,100
|
|
44
45
|
ocf_data_sampler/torch_datasets/sample/base.py,sha256=cQ1oIyhdmlotejZK8B3Cw6MNvpdnBPD8G_o2h7Ye4Vc,2206
|
|
45
46
|
ocf_data_sampler/torch_datasets/sample/site.py,sha256=ZUEgn50g-GmqujOEtezNILF7wjokF80sDAA4OOldcRI,1268
|
|
46
|
-
ocf_data_sampler/torch_datasets/sample/uk_regional.py,sha256=
|
|
47
|
+
ocf_data_sampler/torch_datasets/sample/uk_regional.py,sha256=Xx5cBYUyaM6PGUWQ76MHT9hwj6IJ7WAOxbpmYFbJGhc,10483
|
|
47
48
|
ocf_data_sampler/torch_datasets/utils/__init__.py,sha256=N7i_hHtWUDiJqsiJoDx4T_QuiYOuvIyulPrn6xEA4TY,309
|
|
48
49
|
ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py,sha256=un2IiyoAmTDIymdeMiPU899_86iCDMD-oIifjHlNyqw,555
|
|
49
50
|
ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=we7BTxRH7B7jKayDT7YfNyfI3zZClz2Bk-HXKQIokgU,956
|
|
50
51
|
ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py,sha256=Hvz0wHSWMYYamf2oHNiGlzJcM4cAH6pL_7ZEvIBL2dE,1882
|
|
51
52
|
ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py,sha256=1DN6VsWWdLvkpJxodZtBRDUgC4vJE2td_RP5J3ZqPNw,4268
|
|
52
53
|
ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=xcy75cVxl0WrglnX5YUAFjXXlO2GwEBHWyqo8TDuiOA,4714
|
|
53
|
-
ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=
|
|
54
|
+
ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=YqmT-lExWlI8_ul3l0EP73Ik002fStr_bhsZh9mQqEU,4735
|
|
55
|
+
scripts/download_gsp_location_data.py,sha256=rRDXMoqX-RYY4jPdxhdlxJGhWdl6r245F5UARgKV6P4,3121
|
|
54
56
|
scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
|
|
55
57
|
utils/compute_icon_mean_stddev.py,sha256=a1oWMRMnny39rV-dvu8rcx85sb4bXzPFrR1gkUr4Jpg,2296
|
|
56
|
-
ocf_data_sampler-0.2.
|
|
57
|
-
ocf_data_sampler-0.2.
|
|
58
|
-
ocf_data_sampler-0.2.
|
|
59
|
-
ocf_data_sampler-0.2.
|
|
58
|
+
ocf_data_sampler-0.2.17.dist-info/METADATA,sha256=OKEhg6yBn1fCJKsWOBngnCXVSSd5G5VvOnck0J8bXxw,11581
|
|
59
|
+
ocf_data_sampler-0.2.17.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
|
60
|
+
ocf_data_sampler-0.2.17.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
|
|
61
|
+
ocf_data_sampler-0.2.17.dist-info/RECORD,,
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""This script downloads the GSP location data from the Neso API and saves it to a CSV file.
|
|
2
|
+
|
|
3
|
+
This script was used to create the `uk_gsp_locations_20250109.csv` file in the `data` directory.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import io
|
|
7
|
+
import os
|
|
8
|
+
import tempfile
|
|
9
|
+
import zipfile
|
|
10
|
+
|
|
11
|
+
import geopandas as gpd
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import requests
|
|
14
|
+
|
|
15
|
+
SAVE_PATH = "uk_gsp_locations_20250109.csv"
|
|
16
|
+
|
|
17
|
+
# --- Configuration ---
|
|
18
|
+
GSP_REGIONS_URL = (
|
|
19
|
+
"https://api.neso.energy/dataset/2810092e-d4b2-472f-b955-d8bea01f9ec0/"
|
|
20
|
+
"resource/d95e8c1b-9cd9-41dd-aacb-4b53b8c07c20/download/gsp_regions_20250109.zip"
|
|
21
|
+
)
|
|
22
|
+
# This is the path to the OSBG version of the boundaries. The lon-lats version can be found at:
|
|
23
|
+
# Proj_4326/GSP_regions_4326_20250109.geojson
|
|
24
|
+
GSP_REGIONS_GEOJSON_PATH_IN_ZIP = "Proj_27700/GSP_regions_27700_20250109.geojson"
|
|
25
|
+
GSP_NAME_MAP_URL = "https://api.pvlive.uk/pvlive/api/v4/gsp_list"
|
|
26
|
+
SAVE_PATH = "uk_gsp_locations_20250109.csv"
|
|
27
|
+
# --- End Configuration ---
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
31
|
+
|
|
32
|
+
# Download the GSP regions
|
|
33
|
+
response_regions = requests.get(GSP_REGIONS_URL, timeout=30)
|
|
34
|
+
response_regions.raise_for_status()
|
|
35
|
+
|
|
36
|
+
# Unzip
|
|
37
|
+
with zipfile.ZipFile(io.BytesIO(response_regions.content)) as z:
|
|
38
|
+
geojson_extract_path = os.path.join(tmpdirname, GSP_REGIONS_GEOJSON_PATH_IN_ZIP)
|
|
39
|
+
z.extract(GSP_REGIONS_GEOJSON_PATH_IN_ZIP, tmpdirname)
|
|
40
|
+
|
|
41
|
+
# Load the GSP regions
|
|
42
|
+
df_bound = gpd.read_file(geojson_extract_path)
|
|
43
|
+
|
|
44
|
+
# Download the GSP name mapping
|
|
45
|
+
response_map = requests.get(GSP_NAME_MAP_URL, timeout=10)
|
|
46
|
+
response_map.raise_for_status()
|
|
47
|
+
|
|
48
|
+
# Load the GSP name mapping
|
|
49
|
+
gsp_name_map = response_map.json()
|
|
50
|
+
df_gsp_name_map = (
|
|
51
|
+
pd.DataFrame(data=gsp_name_map["data"], columns=gsp_name_map["meta"])
|
|
52
|
+
.drop("pes_id", axis=1)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def combine_gsps(gdf: gpd.GeoDataFrame) -> gpd.GeoSeries:
|
|
57
|
+
"""Combine GSPs which have been split into mutliple rows."""
|
|
58
|
+
# If only one row for the GSP name then just return the row
|
|
59
|
+
if len(gdf)==0:
|
|
60
|
+
return gdf.iloc[0]
|
|
61
|
+
|
|
62
|
+
# If multiple rows for the GSP then get union of the GSP shapes
|
|
63
|
+
else:
|
|
64
|
+
return gpd.GeoSeries(gdf.unary_union, index=["geometry"], crs=gdf.crs)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# Combine GSPs which have been split into multiple rows
|
|
68
|
+
df_bound = (
|
|
69
|
+
df_bound.groupby("GSPs")
|
|
70
|
+
.apply(combine_gsps, include_groups=False)
|
|
71
|
+
.reset_index()
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Add the PVLive GSP ID for each GSP
|
|
75
|
+
df_bound = (
|
|
76
|
+
df_bound.merge(df_gsp_name_map, left_on="GSPs", right_on="gsp_name")
|
|
77
|
+
.drop("GSPs", axis=1)
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Add the national GSP - this is the union of all GSPs
|
|
81
|
+
national_boundaries = gpd.GeoDataFrame(
|
|
82
|
+
[["NATIONAL", df_bound.unary_union, 0]],
|
|
83
|
+
columns=["gsp_name", "geometry", "gsp_id"],
|
|
84
|
+
crs=df_bound.crs,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
df_bound = pd.concat([national_boundaries, df_bound], ignore_index=True)
|
|
88
|
+
|
|
89
|
+
# Add the coordinates for the centroid of each GSP
|
|
90
|
+
df_bound["x_osgb"] = df_bound.geometry.centroid.x
|
|
91
|
+
df_bound["y_osgb"] = df_bound.geometry.centroid.y
|
|
92
|
+
|
|
93
|
+
# Reorder columns, sort by gsp_id (increasing) and save
|
|
94
|
+
columns = ["gsp_id", "gsp_name", "geometry", "x_osgb", "y_osgb"]
|
|
95
|
+
df_bound[columns].sort_values("gsp_id").to_csv(SAVE_PATH, index=False)
|
|
File without changes
|
|
File without changes
|