ocf-data-sampler 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -6,25 +6,54 @@ import pandas as pd
6
6
  import xarray as xr
7
7
 
8
8
 
9
- def open_gsp(zarr_path: str) -> xr.DataArray:
9
+ def get_gsp_boundaries(version: str) -> pd.DataFrame:
10
+ """Get the GSP boundaries for a given version.
11
+
12
+ Args:
13
+ version: Version of the GSP boundaries to use. Options are "20220314" or "20250109".
14
+
15
+ Returns:
16
+ pd.DataFrame: The GSP boundaries
17
+ """
18
+ if version not in ["20220314", "20250109"]:
19
+ raise ValueError(
20
+ "Invalid version. Options are '20220314' or '20250109'.",
21
+ )
22
+
23
+ return pd.read_csv(
24
+ files("ocf_data_sampler.data").joinpath(f"uk_gsp_locations_{version}.csv"),
25
+ index_col="gsp_id",
26
+ )
27
+
28
+
29
+ def open_gsp(zarr_path: str, boundaries_version: str = "20220314") -> xr.DataArray:
10
30
  """Open the GSP data.
11
31
 
12
32
  Args:
13
33
  zarr_path: Path to the GSP zarr data
34
+ boundaries_version: Version of the GSP boundaries to use. Options are "20220314" or
35
+ "20250109".
14
36
 
15
37
  Returns:
16
38
  xr.DataArray: The opened GSP data
17
39
  """
18
- ds = xr.open_zarr(zarr_path)
19
-
20
- ds = ds.rename({"datetime_gmt": "time_utc"})
21
-
22
40
  # Load UK GSP locations
23
- df_gsp_loc = pd.read_csv(
24
- files("ocf_data_sampler.data").joinpath("uk_gsp_locations.csv"),
25
- index_col="gsp_id",
41
+ df_gsp_loc = get_gsp_boundaries(boundaries_version)
42
+
43
+ # Open the GSP generation data
44
+ ds = (
45
+ xr.open_zarr(zarr_path)
46
+ .rename({"datetime_gmt": "time_utc"})
26
47
  )
27
48
 
49
+ if not (ds.gsp_id.isin(df_gsp_loc.index)).all():
50
+ raise ValueError(
51
+ "Some GSP IDs in the GSP generation data are available in the locations file.",
52
+ )
53
+
54
+ # Select the locations by the GSP IDs in the generation data
55
+ df_gsp_loc = df_gsp_loc.loc[ds.gsp_id.values]
56
+
28
57
  # Add locations and capacities as coordinates for each GSP and datetime
29
58
  ds = ds.assign_coords(
30
59
  x_osgb=(df_gsp_loc.x_osgb.to_xarray()),
@@ -6,8 +6,10 @@ from ocf_data_sampler.config import InputData
6
6
  from ocf_data_sampler.load import open_gsp, open_nwp, open_sat_data, open_site
7
7
 
8
8
 
9
- def get_dataset_dict(input_config: InputData, gsp_ids: list[int] | None = None)\
10
- -> dict[str, dict[xr.DataArray] | xr.DataArray]:
9
+ def get_dataset_dict(
10
+ input_config: InputData,
11
+ gsp_ids: list[int] | None = None,
12
+ ) -> dict[str, dict[xr.DataArray] | xr.DataArray]:
11
13
  """Construct dictionary of all of the input data sources.
12
14
 
13
15
  Args:
@@ -19,7 +21,10 @@ def get_dataset_dict(input_config: InputData, gsp_ids: list[int] | None = None)\
19
21
  # Load GSP data unless the path is None
20
22
  if input_config.gsp and input_config.gsp.zarr_path:
21
23
 
22
- da_gsp = open_gsp(zarr_path=input_config.gsp.zarr_path).compute()
24
+ da_gsp = open_gsp(
25
+ zarr_path=input_config.gsp.zarr_path,
26
+ boundaries_version=input_config.gsp.boundaries_version,
27
+ ).compute()
23
28
 
24
29
  if gsp_ids is None:
25
30
  # Remove national (gsp_id=0)
@@ -1,7 +1,5 @@
1
1
  """Torch dataset for UK PVNet."""
2
2
 
3
- from importlib.resources import files
4
-
5
3
  import numpy as np
6
4
  import pandas as pd
7
5
  import xarray as xr
@@ -9,6 +7,7 @@ from torch.utils.data import Dataset
9
7
  from typing_extensions import override
10
8
 
11
9
  from ocf_data_sampler.config import Configuration, load_yaml_configuration
10
+ from ocf_data_sampler.load.gsp import get_gsp_boundaries
12
11
  from ocf_data_sampler.load.load_dataset import get_dataset_dict
13
12
  from ocf_data_sampler.numpy_sample import (
14
13
  convert_gsp_to_numpy_sample,
@@ -47,22 +46,26 @@ def compute(xarray_dict: dict) -> dict:
47
46
  return xarray_dict
48
47
 
49
48
 
50
- def get_gsp_locations(gsp_ids: list[int] | None = None) -> list[Location]:
49
+ def get_gsp_locations(
50
+ gsp_ids: list[int] | None = None,
51
+ version: str = "20220314",
52
+ ) -> list[Location]:
51
53
  """Get list of locations of all GSPs.
52
54
 
53
55
  Args:
54
- gsp_ids: List of GSP IDs to include. Defaults to all
56
+ gsp_ids: List of GSP IDs to include. Defaults to all GSPs except national
57
+ version: Version of GSP boundaries to use. Defaults to "20220314"
55
58
  """
59
+ df_gsp_loc = get_gsp_boundaries(version)
60
+
61
+ # Default GSP IDs is all except national (gsp_id=0)
56
62
  if gsp_ids is None:
57
- gsp_ids = list(range(1, 318))
63
+ gsp_ids = df_gsp_loc.index.values
64
+ gsp_ids = gsp_ids[gsp_ids != 0]
58
65
 
59
- locations = []
66
+ df_gsp_loc = df_gsp_loc.loc[gsp_ids]
60
67
 
61
- # Load UK GSP locations
62
- df_gsp_loc = pd.read_csv(
63
- files("ocf_data_sampler.data").joinpath("uk_gsp_locations.csv"),
64
- index_col="gsp_id",
65
- )
68
+ locations = []
66
69
 
67
70
  for gsp_id in gsp_ids:
68
71
  locations.append(
@@ -108,7 +111,10 @@ class AbstractPVNetUKDataset(Dataset):
108
111
  valid_t0_times = valid_t0_times[valid_t0_times <= pd.Timestamp(end_time)]
109
112
 
110
113
  # Construct list of locations to sample from
111
- self.locations = get_gsp_locations(gsp_ids)
114
+ self.locations = get_gsp_locations(
115
+ gsp_ids,
116
+ version=config.input_data.gsp.boundaries_version,
117
+ )
112
118
  self.valid_t0_times = valid_t0_times
113
119
 
114
120
  # Assign config and input data to self
@@ -1,5 +1,7 @@
1
1
  """PVNet UK Regional sample implementation for dataset handling and visualisation."""
2
2
 
3
+ import logging
4
+
3
5
  import torch
4
6
  from typing_extensions import override
5
7
 
@@ -14,8 +16,11 @@ from ocf_data_sampler.torch_datasets.sample.base import SampleBase
14
16
  from ocf_data_sampler.torch_datasets.utils.validation_utils import (
15
17
  calculate_expected_shapes,
16
18
  check_dimensions,
19
+ validation_warning,
17
20
  )
18
21
 
22
+ logger = logging.getLogger(__name__)
23
+
19
24
 
20
25
  class UKRegionalSample(SampleBase):
21
26
  """Handles UK Regional PVNet data operations."""
@@ -50,14 +55,27 @@ class UKRegionalSample(SampleBase):
50
55
  # TODO: We should move away from using torch.load(..., weights_only=False)
51
56
  return cls(torch.load(path, weights_only=False))
52
57
 
53
- def validate_sample(self, config: Configuration) -> bool:
54
- """Validates that the sample has the expected structure and data shapes.
58
+ def validate_sample(self, config: Configuration) -> dict:
59
+ """Validates the sample, logging warnings and raising errors.
60
+
61
+ Checks that the sample has the expected structure and data shapes based
62
+ on the provided configuration. Critical issues (missing required data,
63
+ shape mismatches) will raise a ValueError. Non-critical issues (e.g.,
64
+ unexpected data components found) will be logged as warnings using
65
+ the standard Python logging module.
55
66
 
56
67
  Args:
57
- config: Configuration dict with expected shapes and required fields.
68
+ config: Configuration object defining expected shapes and required fields.
58
69
 
59
70
  Returns:
60
- bool: True if validation passes, otherwise raises an exception.
71
+ dict: A dictionary indicating success: `{"valid": True}`.
72
+ If validation fails due to a critical issue, an exception is raised
73
+ instead of returning. Warnings encountered are logged.
74
+
75
+ Raises:
76
+ TypeError: If `config` is not a Configuration object.
77
+ ValueError: For critical validation failures like missing expected data,
78
+ incorrect data shapes, or missing required NWP providers.
61
79
  """
62
80
  if not isinstance(config, Configuration):
63
81
  raise TypeError("config must be Configuration object")
@@ -67,78 +85,146 @@ class UKRegionalSample(SampleBase):
67
85
 
68
86
  # Check GSP shape if specified
69
87
  gsp_key = GSPSampleKey.gsp
70
- # Check if GSP data is expected but missing
71
88
  if gsp_key in expected_shapes and gsp_key not in self._data:
72
89
  raise ValueError(f"Configuration expects GSP data ('{gsp_key}') but is missing.")
73
90
 
74
- # Check GSP shape if data exists and is expected
75
- if gsp_key in self._data and gsp_key in expected_shapes:
76
- gsp_data = self._data[gsp_key]
77
- check_dimensions(
78
- actual_shape=gsp_data.shape,
79
- expected_shape=expected_shapes[gsp_key],
80
- name="GSP",
81
- )
91
+ if gsp_key in self._data:
92
+ if gsp_key in expected_shapes:
93
+ gsp_data = self._data[gsp_key]
94
+ check_dimensions(
95
+ actual_shape=gsp_data.shape,
96
+ expected_shape=expected_shapes[gsp_key],
97
+ name="GSP",
98
+ )
99
+ else:
100
+ validation_warning(
101
+ message=f"GSP data ('{gsp_key}') is present but not expected in configuration.",
102
+ warning_type="unexpected_component",
103
+ component=str(gsp_key),
104
+ )
82
105
 
83
- # Checks for NWP data - nested structure
106
+ # Checks for NWP data
84
107
  nwp_key = NWPSampleKey.nwp
85
108
  if nwp_key in expected_shapes and nwp_key not in self._data:
86
109
  raise ValueError(f"Configuration expects NWP data ('{nwp_key}') but is missing.")
87
110
 
88
- # Check NWP structure and shapes if data exists
89
111
  if nwp_key in self._data:
90
112
  nwp_data_all_providers = self._data[nwp_key]
91
113
  if not isinstance(nwp_data_all_providers, dict):
92
114
  raise ValueError(f"NWP data ('{nwp_key}') should be a dictionary.")
93
115
 
94
- # Loop through providers present in actual data
95
- for provider, provider_data in nwp_data_all_providers.items():
96
- if "nwp" not in provider_data:
97
- raise ValueError(f"Missing array key in NWP data for provider '{provider}'.")
116
+ if nwp_key in expected_shapes:
117
+ expected_providers = set(expected_shapes[nwp_key].keys())
118
+ actual_providers = set(nwp_data_all_providers.keys())
98
119
 
99
- if nwp_key in expected_shapes and provider in expected_shapes[nwp_key]:
100
- nwp_array = provider_data["nwp"]
101
- actual_shape = nwp_array.shape
102
- expected_shape = expected_shapes[nwp_key][provider]
120
+ unexpected_providers = actual_providers - expected_providers
121
+ if unexpected_providers:
122
+ validation_warning(
123
+ message=f"Unexpected NWP providers found: {list(unexpected_providers)}",
124
+ warning_type="unexpected_provider",
125
+ providers=list(unexpected_providers),
126
+ )
127
+
128
+ missing_expected_providers = expected_providers - actual_providers
129
+ if missing_expected_providers:
130
+ raise ValueError(
131
+ f"Expected NWP providers are missing from the data: "
132
+ f"{list(missing_expected_providers)}",
133
+ )
134
+
135
+ for provider in expected_shapes[nwp_key]:
136
+ provider_data = nwp_data_all_providers[provider]
137
+
138
+ if "nwp" not in provider_data:
139
+ error_msg = (
140
+ f"Missing array key 'nwp' in NWP data for provider '{provider}'."
141
+ )
142
+ raise ValueError(error_msg)
103
143
 
144
+ nwp_array = provider_data["nwp"]
104
145
  check_dimensions(
105
- actual_shape=actual_shape,
106
- expected_shape=expected_shape,
146
+ actual_shape=nwp_array.shape,
147
+ expected_shape=expected_shapes[nwp_key][provider],
107
148
  name=f"NWP data ({provider})",
108
149
  )
150
+ else:
151
+ validation_warning(
152
+ message=(
153
+ f"NWP data ('{nwp_key}') is present but not expected "
154
+ "in configuration."
155
+ ),
156
+ warning_type="unexpected_component",
157
+ component=str(nwp_key),
158
+ )
109
159
 
110
160
  # Validate satellite data
111
161
  sat_key = SatelliteSampleKey.satellite_actual
112
- # Check if Satellite data is expected but missing
113
162
  if sat_key in expected_shapes and sat_key not in self._data:
114
163
  raise ValueError(f"Configuration expects Satellite data ('{sat_key}') but is missing.")
115
164
 
116
- # Check satellite shape if data exists and is expected
117
- if sat_key in self._data and sat_key in expected_shapes:
118
- sat_data = self._data[sat_key]
119
- check_dimensions(
120
- actual_shape=sat_data.shape,
121
- expected_shape=expected_shapes[sat_key],
122
- name="Satellite data",
123
- )
165
+ if sat_key in self._data:
166
+ if sat_key in expected_shapes:
167
+ sat_data = self._data[sat_key]
168
+ check_dimensions(
169
+ actual_shape=sat_data.shape,
170
+ expected_shape=expected_shapes[sat_key],
171
+ name="Satellite data",
172
+ )
173
+ else:
174
+ validation_warning(
175
+ message=(
176
+ f"Satellite data ('{sat_key}') is present but not expected "
177
+ "in configuration."
178
+ ),
179
+ warning_type="unexpected_component",
180
+ component=str(sat_key),
181
+ )
124
182
 
125
183
  # Validate solar coordinates data
126
184
  solar_keys = ["solar_azimuth", "solar_elevation"]
127
- # Check if solar coordinate is expected but missing
128
185
  for solar_key in solar_keys:
186
+ solar_name = solar_key.replace("_", " ").title()
129
187
  if solar_key in expected_shapes and solar_key not in self._data:
130
188
  raise ValueError(f"Configuration expects {solar_key} data but is missing.")
131
189
 
132
- # Check solar coordinate shape if data exists and is expected
133
- if solar_key in self._data and solar_key in expected_shapes:
134
- solar_data = self._data[solar_key]
135
- check_dimensions(
136
- actual_shape=solar_data.shape,
137
- expected_shape=expected_shapes[solar_key],
138
- name=f"{solar_key.replace('_', ' ').title()} data",
190
+ if solar_key in self._data:
191
+ if solar_key in expected_shapes:
192
+ solar_data = self._data[solar_key]
193
+ check_dimensions(
194
+ actual_shape=solar_data.shape,
195
+ expected_shape=expected_shapes[solar_key],
196
+ name=f"{solar_name} data",
197
+ )
198
+ else:
199
+ validation_warning(
200
+ message=(
201
+ f"{solar_name} data is present but not expected "
202
+ "in configuration."
203
+ ),
204
+ warning_type="unexpected_component",
205
+ component=solar_key,
206
+ )
207
+
208
+ # Check for potentially unexpected components
209
+ checked_keys = {gsp_key, nwp_key, sat_key} | set(solar_keys)
210
+ all_present_keys = set(self._data.keys())
211
+ unexpected_present_keys = all_present_keys - set(expected_shapes.keys())
212
+
213
+ for key in unexpected_present_keys:
214
+ if key not in checked_keys:
215
+ validation_warning(
216
+ message=(
217
+ f"Unexpected component '{key}' is present in data but not defined "
218
+ "in configuration's expected shapes."
219
+ ),
220
+ warning_type="unexpected_component",
221
+ component=str(key),
139
222
  )
140
223
 
141
- return True
224
+ return {
225
+ "valid": True,
226
+ }
227
+
142
228
 
143
229
  @override
144
230
  def plot(self) -> None:
@@ -1,8 +1,13 @@
1
1
  """Validate sample shape against expected shape - utility function."""
2
2
 
3
+ import logging
4
+ from typing import Any
5
+
3
6
  from ocf_data_sampler.config import Configuration
4
7
  from ocf_data_sampler.numpy_sample import GSPSampleKey, NWPSampleKey, SatelliteSampleKey
5
8
 
9
+ logger = logging.getLogger(__name__)
10
+
6
11
 
7
12
  def check_dimensions(
8
13
  actual_shape: tuple[int, ...],
@@ -93,6 +98,40 @@ def calculate_expected_shapes(
93
98
  return expected_shapes
94
99
 
95
100
 
101
+ def validation_warning(
102
+ message: str,
103
+ warning_type: str,
104
+ *,
105
+ component: str | None = None,
106
+ providers: list[str] | None = None,
107
+ ) -> dict[str, Any]:
108
+ """Constructs warning details and logs a standard warning message.
109
+
110
+ Args:
111
+ message: The base warning message string.
112
+ warning_type: The category of the warning (e.g., 'unexpected_component').
113
+ component: Optional component identifier (e.g., 'gsp').
114
+ providers: Optional list of provider names (e.g., ['ukv']).
115
+
116
+ Returns:
117
+ None - This function now directly logs the warning.
118
+ """
119
+ warning_info: dict[str, Any] = {"type": warning_type, "message": message}
120
+ log_message_parts = [message]
121
+ log_message_parts.append(f"(Type: {warning_type}")
122
+
123
+ if component is not None:
124
+ warning_info["component"] = component
125
+ log_message_parts.append(f", Component: {component}")
126
+ if providers is not None:
127
+ warning_info["providers"] = providers
128
+ log_message_parts.append(f", Providers: {providers}")
129
+
130
+ log_message_parts.append(")")
131
+ log_message = " ".join(log_message_parts)
132
+ logger.warning(log_message)
133
+
134
+
96
135
  def _calculate_time_steps(start_minutes: int, end_minutes: int, resolution_minutes: int) -> int:
97
136
  """Calculate number of time steps based on interval and resolution.
98
137
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.2.15
3
+ Version: 0.2.17
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -2,12 +2,13 @@ ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,
2
2
  ocf_data_sampler/utils.py,sha256=DjuneGGisl08ENvPZV_lrcX4b2NCKJC1ZpXgIpxuQi4,290
3
3
  ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
4
4
  ocf_data_sampler/config/load.py,sha256=LL-7wemI8o4KPkx35j-wQ3HjsMvDgqXr7G46IcASfnU,632
5
- ocf_data_sampler/config/model.py,sha256=pb02qtCmWhJhrU3_T_gUzC7i2_JcO8xGwwhKGd8yMuk,10209
5
+ ocf_data_sampler/config/model.py,sha256=SyjtlSK6gzQHWUfgX3VNKYLODyiKuD0Mu4hlm9GoHeg,10427
6
6
  ocf_data_sampler/config/save.py,sha256=m8SPw5rXjkMm1rByjh3pK5StdBi4e8ysnn3jQopdRaI,1064
7
- ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
7
+ ocf_data_sampler/data/uk_gsp_locations_20220314.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
8
+ ocf_data_sampler/data/uk_gsp_locations_20250109.csv,sha256=XZISFatnbpO9j8LwaxNKFzQSjs6hcHFsV8a9uDDpy2E,9055334
8
9
  ocf_data_sampler/load/__init__.py,sha256=-vQP9g0UOWdVbjEGyVX_ipa7R1btmiETIKAf6aw4d78,201
9
- ocf_data_sampler/load/gsp.py,sha256=keB3Nv_CNK1P6pS9Kdfc8PoZXTI1_YFN-spsvEv_Ewc,899
10
- ocf_data_sampler/load/load_dataset.py,sha256=Cn-yz7RgHR2HkH3xQM1njivVEkp8rZC3KXXgcidwuME,1863
10
+ ocf_data_sampler/load/gsp.py,sha256=UfPxwHw2Dw2xYSO5Al28oTamgnEM_n_4bYXsqGwY5Tc,1884
11
+ ocf_data_sampler/load/load_dataset.py,sha256=sIi0nkijR_-1fRfW5JcXNTR0ccGbpkhxb7JX_zjJ-W4,1956
11
12
  ocf_data_sampler/load/satellite.py,sha256=E7Ln7Y60Qr1RTV-_R71YoxXQM-Ca7Y1faIo3oKB2eFk,2292
12
13
  ocf_data_sampler/load/site.py,sha256=zOzlWk6pYZBB5daqG8URGksmDXWKrkutUvN8uALAIh8,1468
13
14
  ocf_data_sampler/load/utils.py,sha256=sZ0-zzconcLkVQwAkCYrqKDo98Hrh5ChdiQJv5Bh91g,2040
@@ -38,22 +39,23 @@ ocf_data_sampler/select/location.py,sha256=AZvGR8y62opiW7zACGXjoOtBEWRfSLOZIA73O
38
39
  ocf_data_sampler/select/select_spatial_slice.py,sha256=liAqIa-Amj58pOqx5r16i99HURj9oQ41j7gnPgRDQP4,8201
39
40
  ocf_data_sampler/select/select_time_slice.py,sha256=HeHbwZ0CP03x0-LaJtpbSdtpLufwVTR73p6wH6O_PS8,5513
40
41
  ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
41
- ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=9BJ4wVcZUMEzStVCbbWrf2eK8WPpV9SoeOQviZktHAc,12355
42
+ ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=ZV2FoMPxFU2aPTWipj9HhJhGfrEg9MYOJRNR8aFcmvs,12613
42
43
  ocf_data_sampler/torch_datasets/datasets/site.py,sha256=nRUlhXQQGVrTuBmE1QnwXAUsPTXz0dsezlQjwK71jIQ,17641
43
44
  ocf_data_sampler/torch_datasets/sample/__init__.py,sha256=GL84vdZl_SjHDGVyh9Uekx2XhPYuZ0dnO3l6f6KXnHI,100
44
45
  ocf_data_sampler/torch_datasets/sample/base.py,sha256=cQ1oIyhdmlotejZK8B3Cw6MNvpdnBPD8G_o2h7Ye4Vc,2206
45
46
  ocf_data_sampler/torch_datasets/sample/site.py,sha256=ZUEgn50g-GmqujOEtezNILF7wjokF80sDAA4OOldcRI,1268
46
- ocf_data_sampler/torch_datasets/sample/uk_regional.py,sha256=8hDgaMg5Vb6eYitqYiljpAeTeTemwsYaRpZn7_3_XjI,7013
47
+ ocf_data_sampler/torch_datasets/sample/uk_regional.py,sha256=Xx5cBYUyaM6PGUWQ76MHT9hwj6IJ7WAOxbpmYFbJGhc,10483
47
48
  ocf_data_sampler/torch_datasets/utils/__init__.py,sha256=N7i_hHtWUDiJqsiJoDx4T_QuiYOuvIyulPrn6xEA4TY,309
48
49
  ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py,sha256=un2IiyoAmTDIymdeMiPU899_86iCDMD-oIifjHlNyqw,555
49
50
  ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=we7BTxRH7B7jKayDT7YfNyfI3zZClz2Bk-HXKQIokgU,956
50
51
  ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py,sha256=Hvz0wHSWMYYamf2oHNiGlzJcM4cAH6pL_7ZEvIBL2dE,1882
51
52
  ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py,sha256=1DN6VsWWdLvkpJxodZtBRDUgC4vJE2td_RP5J3ZqPNw,4268
52
53
  ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=xcy75cVxl0WrglnX5YUAFjXXlO2GwEBHWyqo8TDuiOA,4714
53
- ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=2fwW-kpsMM2a-FWBG0YBT_r2LDIhhn7WokQ7GWvgx6U,3504
54
+ ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=YqmT-lExWlI8_ul3l0EP73Ik002fStr_bhsZh9mQqEU,4735
55
+ scripts/download_gsp_location_data.py,sha256=rRDXMoqX-RYY4jPdxhdlxJGhWdl6r245F5UARgKV6P4,3121
54
56
  scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
55
57
  utils/compute_icon_mean_stddev.py,sha256=a1oWMRMnny39rV-dvu8rcx85sb4bXzPFrR1gkUr4Jpg,2296
56
- ocf_data_sampler-0.2.15.dist-info/METADATA,sha256=tg-DIt5MElINQMTTHFhKuzAbgUB9slyRCDDE-xzaDkc,11581
57
- ocf_data_sampler-0.2.15.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
58
- ocf_data_sampler-0.2.15.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
59
- ocf_data_sampler-0.2.15.dist-info/RECORD,,
58
+ ocf_data_sampler-0.2.17.dist-info/METADATA,sha256=OKEhg6yBn1fCJKsWOBngnCXVSSd5G5VvOnck0J8bXxw,11581
59
+ ocf_data_sampler-0.2.17.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
60
+ ocf_data_sampler-0.2.17.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
61
+ ocf_data_sampler-0.2.17.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,95 @@
1
+ """This script downloads the GSP location data from the Neso API and saves it to a CSV file.
2
+
3
+ This script was used to create the `uk_gsp_locations_20250109.csv` file in the `data` directory.
4
+ """
5
+
6
+ import io
7
+ import os
8
+ import tempfile
9
+ import zipfile
10
+
11
+ import geopandas as gpd
12
+ import pandas as pd
13
+ import requests
14
+
15
+ SAVE_PATH = "uk_gsp_locations_20250109.csv"
16
+
17
+ # --- Configuration ---
18
+ GSP_REGIONS_URL = (
19
+ "https://api.neso.energy/dataset/2810092e-d4b2-472f-b955-d8bea01f9ec0/"
20
+ "resource/d95e8c1b-9cd9-41dd-aacb-4b53b8c07c20/download/gsp_regions_20250109.zip"
21
+ )
22
+ # This is the path to the OSBG version of the boundaries. The lon-lats version can be found at:
23
+ # Proj_4326/GSP_regions_4326_20250109.geojson
24
+ GSP_REGIONS_GEOJSON_PATH_IN_ZIP = "Proj_27700/GSP_regions_27700_20250109.geojson"
25
+ GSP_NAME_MAP_URL = "https://api.pvlive.uk/pvlive/api/v4/gsp_list"
26
+ SAVE_PATH = "uk_gsp_locations_20250109.csv"
27
+ # --- End Configuration ---
28
+
29
+
30
+ with tempfile.TemporaryDirectory() as tmpdirname:
31
+
32
+ # Download the GSP regions
33
+ response_regions = requests.get(GSP_REGIONS_URL, timeout=30)
34
+ response_regions.raise_for_status()
35
+
36
+ # Unzip
37
+ with zipfile.ZipFile(io.BytesIO(response_regions.content)) as z:
38
+ geojson_extract_path = os.path.join(tmpdirname, GSP_REGIONS_GEOJSON_PATH_IN_ZIP)
39
+ z.extract(GSP_REGIONS_GEOJSON_PATH_IN_ZIP, tmpdirname)
40
+
41
+ # Load the GSP regions
42
+ df_bound = gpd.read_file(geojson_extract_path)
43
+
44
+ # Download the GSP name mapping
45
+ response_map = requests.get(GSP_NAME_MAP_URL, timeout=10)
46
+ response_map.raise_for_status()
47
+
48
+ # Load the GSP name mapping
49
+ gsp_name_map = response_map.json()
50
+ df_gsp_name_map = (
51
+ pd.DataFrame(data=gsp_name_map["data"], columns=gsp_name_map["meta"])
52
+ .drop("pes_id", axis=1)
53
+ )
54
+
55
+
56
+ def combine_gsps(gdf: gpd.GeoDataFrame) -> gpd.GeoSeries:
57
+ """Combine GSPs which have been split into mutliple rows."""
58
+ # If only one row for the GSP name then just return the row
59
+ if len(gdf)==0:
60
+ return gdf.iloc[0]
61
+
62
+ # If multiple rows for the GSP then get union of the GSP shapes
63
+ else:
64
+ return gpd.GeoSeries(gdf.unary_union, index=["geometry"], crs=gdf.crs)
65
+
66
+
67
+ # Combine GSPs which have been split into multiple rows
68
+ df_bound = (
69
+ df_bound.groupby("GSPs")
70
+ .apply(combine_gsps, include_groups=False)
71
+ .reset_index()
72
+ )
73
+
74
+ # Add the PVLive GSP ID for each GSP
75
+ df_bound = (
76
+ df_bound.merge(df_gsp_name_map, left_on="GSPs", right_on="gsp_name")
77
+ .drop("GSPs", axis=1)
78
+ )
79
+
80
+ # Add the national GSP - this is the union of all GSPs
81
+ national_boundaries = gpd.GeoDataFrame(
82
+ [["NATIONAL", df_bound.unary_union, 0]],
83
+ columns=["gsp_name", "geometry", "gsp_id"],
84
+ crs=df_bound.crs,
85
+ )
86
+
87
+ df_bound = pd.concat([national_boundaries, df_bound], ignore_index=True)
88
+
89
+ # Add the coordinates for the centroid of each GSP
90
+ df_bound["x_osgb"] = df_bound.geometry.centroid.x
91
+ df_bound["y_osgb"] = df_bound.geometry.centroid.y
92
+
93
+ # Reorder columns, sort by gsp_id (increasing) and save
94
+ columns = ["gsp_id", "gsp_name", "geometry", "x_osgb", "y_osgb"]
95
+ df_bound[columns].sort_values("gsp_id").to_csv(SAVE_PATH, index=False)