ocf-data-sampler 0.5.41__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -25,11 +25,20 @@ def open_site(generation_file_path: str, metadata_file_path: str) -> xr.DataArra
25
25
  metadata_df = metadata_df.reindex(generation_ds.site_id.values)
26
26
 
27
27
  # Assign coordinates to the Dataset using the aligned metadata
28
- generation_ds = generation_ds.assign_coords(
29
- latitude=("site_id", metadata_df["latitude"].values),
30
- longitude=("site_id", metadata_df["longitude"].values),
31
- capacity_kwp=("site_id", metadata_df["capacity_kwp"].values),
32
- )
28
+ # Check if variable capacity was passed with the generation data
29
+ # If not assign static capacity from metadata
30
+ if hasattr(generation_ds,"capacity_kwp"):
31
+ generation_ds = generation_ds.assign_coords(
32
+ latitude=(metadata_df.latitude.to_xarray()),
33
+ longitude=(metadata_df.longitude.to_xarray()),
34
+ capacity_kwp=generation_ds.capacity_kwp,
35
+ )
36
+ else:
37
+ generation_ds = generation_ds.assign_coords(
38
+ latitude=(metadata_df.latitude.to_xarray()),
39
+ longitude=(metadata_df.longitude.to_xarray()),
40
+ capacity_kwp=(metadata_df.capacity_kwp.to_xarray()),
41
+ )
33
42
 
34
43
  # Sanity checks, to prevent inf or negative values
35
44
  # Note NaNs are allowed in generation_kw as can have non overlapping time periods for sites
@@ -1,36 +1,9 @@
1
- """Base class for handling flat/nested data structures with NWP consideration."""
2
-
3
- from abc import ABC, abstractmethod
1
+ """Functions to convert batches to tensors and move them to a given device."""
4
2
 
5
3
  import numpy as np
6
4
  import torch
7
5
 
8
- from ocf_data_sampler.numpy_sample.common_types import NumpyBatch, NumpySample, TensorBatch
9
-
10
-
11
- class SampleBase(ABC):
12
- """Abstract base class for all sample types."""
13
-
14
- @abstractmethod
15
- def to_numpy(self) -> NumpySample:
16
- """Convert sample data to numpy format."""
17
- raise NotImplementedError
18
-
19
- @abstractmethod
20
- def plot(self) -> None:
21
- """Create a visualisation of the data."""
22
- raise NotImplementedError
23
-
24
- @abstractmethod
25
- def save(self, path: str) -> None:
26
- """Saves the sample to disk in the implementations' required format."""
27
- raise NotImplementedError
28
-
29
- @classmethod
30
- @abstractmethod
31
- def load(cls, path: str) -> "SampleBase":
32
- """Load a sample from disk from the implementations' format."""
33
- raise NotImplementedError
6
+ from ocf_data_sampler.numpy_sample.common_types import NumpyBatch, TensorBatch
34
7
 
35
8
 
36
9
  def batch_to_tensor(batch: NumpyBatch) -> TensorBatch:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.5.41
3
+ Version: 0.6.1
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -11,7 +11,7 @@ ocf_data_sampler/load/gsp.py,sha256=zsQ39dZBS45qd86lGfCZUjheLRTtMzIUozj-j8c87UQ,
11
11
  ocf_data_sampler/load/load_dataset.py,sha256=K8rWykjII-3g127If7WRRFivzHNx3SshCvZj4uQlf28,2089
12
12
  ocf_data_sampler/load/open_xarray_tensorstore.py,sha256=YglCBeKa4mSjUU5qlcMOLZXUtFrPFWVKDeKHLjs_YbA,6353
13
13
  ocf_data_sampler/load/satellite.py,sha256=5o5SfcplQfZFlm3JJq73j8_m_cWKpFtKk0tTKGjjCuE,1856
14
- ocf_data_sampler/load/site.py,sha256=bpFABjpvlstn6yJ6OPVlPZms-CjJdxNwCkQafpnj0Ik,2539
14
+ ocf_data_sampler/load/site.py,sha256=ZH85pgLTXTxa8zPHdJ-Y9Yy2ORDc3vRX2uD1I6uDI6g,2939
15
15
  ocf_data_sampler/load/utils.py,sha256=AGL0aOOQPrgqNBTjlBtR7Qg1PyQov3DFJo-y198u8pY,2044
16
16
  ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
17
17
  ocf_data_sampler/load/nwp/nwp.py,sha256=3nAsbw2yjmaF0YEHbCDciPLuIRBGxGuvixlMgKNc3zU,3570
@@ -44,10 +44,6 @@ ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=o0SsEXXZ6k9iL__5_RN1
44
44
  ocf_data_sampler/torch_datasets/datasets/picklecache.py,sha256=b8T5lgKfiPXLwuVQuFpCQBlU-HNBrA-Z-eSwYICKvsQ,1350
45
45
  ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=TDSraKUYE2wA5YF9SYO2RmAIbDJtcvfTtxr7WmYDszg,12385
46
46
  ocf_data_sampler/torch_datasets/datasets/site.py,sha256=IFMVy8c887mfT2NmcKX8ocm1OvlBhvzJNye_scfBMFQ,15265
47
- ocf_data_sampler/torch_datasets/sample/__init__.py,sha256=GL84vdZl_SjHDGVyh9Uekx2XhPYuZ0dnO3l6f6KXnHI,100
48
- ocf_data_sampler/torch_datasets/sample/base.py,sha256=cQ1oIyhdmlotejZK8B3Cw6MNvpdnBPD8G_o2h7Ye4Vc,2206
49
- ocf_data_sampler/torch_datasets/sample/site.py,sha256=40NwNTqjL1WVhPdwe02zDHHfDLG2u_bvCfRCtGAtFc0,1466
50
- ocf_data_sampler/torch_datasets/sample/uk_regional.py,sha256=GtChjvbFB-BqkGftgZyh8jgTUX9IostYeyaSoVMCzp0,10481
51
47
  ocf_data_sampler/torch_datasets/utils/__init__.py,sha256=4l1VcEmxHInU9G66zrimNMa8WcyKUASQST_iF9QfxUw,457
52
48
  ocf_data_sampler/torch_datasets/utils/add_alterate_coordinate_projections.py,sha256=w6Q4TyxNyl7PKAbhqiXvqOpnqIjwmOUcGREIvPNGYlQ,2666
53
49
  ocf_data_sampler/torch_datasets/utils/config_normalization_values_to_dicts.py,sha256=SGt1H2nXcaj44ND14-gHzvA7dkLfgjTacCq7rOkRGwg,1991
@@ -55,11 +51,12 @@ ocf_data_sampler/torch_datasets/utils/diff_nwp_data.py,sha256=o7NpKWxKHhwMbol3xB
55
51
  ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=VkQv4wJVihObh_OiSuwKqV_w2lEOweaYgJPkm075CZc,2132
56
52
  ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py,sha256=Hvz0wHSWMYYamf2oHNiGlzJcM4cAH6pL_7ZEvIBL2dE,1882
57
53
  ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py,sha256=1r1J2KNSo1_imN9gpVf5AupJaZ7VSnSevS1o_wck440,3925
54
+ ocf_data_sampler/torch_datasets/utils/torch_batch_utils.py,sha256=yL_24FOKGs9rYg9V3ZTc3LXd8_9N6EFz2yxnxPt_bi8,1434
58
55
  ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=xcy75cVxl0WrglnX5YUAFjXXlO2GwEBHWyqo8TDuiOA,4714
59
56
  ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=YqmT-lExWlI8_ul3l0EP73Ik002fStr_bhsZh9mQqEU,4735
60
57
  scripts/download_gsp_location_data.py,sha256=rRDXMoqX-RYY4jPdxhdlxJGhWdl6r245F5UARgKV6P4,3121
61
58
  scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
62
- ocf_data_sampler-0.5.41.dist-info/METADATA,sha256=-wCdU7dBURk_1dkDIuNOPZjfVkanwkbtHhfkSxhkOVg,13541
63
- ocf_data_sampler-0.5.41.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
- ocf_data_sampler-0.5.41.dist-info/top_level.txt,sha256=deUxqmsONNAGZDNbsntbXH7BRA1MqWaUeAJrCo6q_xA,25
65
- ocf_data_sampler-0.5.41.dist-info/RECORD,,
59
+ ocf_data_sampler-0.6.1.dist-info/METADATA,sha256=w0MKJCWR-a9VPWtGxWVP3_h2t5-V4ny1PiofevouGHY,13540
60
+ ocf_data_sampler-0.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
61
+ ocf_data_sampler-0.6.1.dist-info/top_level.txt,sha256=deUxqmsONNAGZDNbsntbXH7BRA1MqWaUeAJrCo6q_xA,25
62
+ ocf_data_sampler-0.6.1.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- from .base import SampleBase
2
- from .uk_regional import UKRegionalSample
3
- from .site import SiteSample
@@ -1,48 +0,0 @@
1
- """PVNet Site sample implementation for netCDF data handling and conversion."""
2
-
3
- import torch
4
- from typing_extensions import override
5
-
6
- from ocf_data_sampler.numpy_sample.common_types import NumpySample
7
-
8
- from .base import SampleBase
9
-
10
-
11
- # TODO this is now similar to the UKRegionalSample
12
- # We should consider just having one Sample class for all datasets
13
- class SiteSample(SampleBase):
14
- """Handles SiteSample specific operations."""
15
-
16
- def __init__(self, data: NumpySample) -> None:
17
- """Initializes the SiteSample object with the given NumpySample."""
18
- self._data = data
19
-
20
- @override
21
- def to_numpy(self) -> NumpySample:
22
- return self._data
23
-
24
- @override
25
- def save(self, path: str) -> None:
26
- """Saves sample to the specified path in pickle format."""
27
- # Saves to pickle format
28
- torch.save(self._data, path)
29
-
30
- @classmethod
31
- @override
32
- def load(cls, path: str) -> "SiteSample":
33
- """Loads sample from the specified path.
34
-
35
- Args:
36
- path: Path to the saved sample file.
37
-
38
- Returns:
39
- A SiteSample instance with the loaded data.
40
- """
41
- # Loads from .pt format
42
- # TODO: We should move away from using torch.load(..., weights_only=False)
43
- return cls(torch.load(path, weights_only=False))
44
-
45
- @override
46
- def plot(self) -> None:
47
- # TODO - placeholder for now
48
- raise NotImplementedError("Plotting not yet implemented for SiteSample")
@@ -1,262 +0,0 @@
1
- """PVNet UK Regional sample implementation for dataset handling and visualisation."""
2
-
3
- import logging
4
-
5
- import torch
6
- from typing_extensions import override
7
-
8
- from ocf_data_sampler.config import Configuration
9
- from ocf_data_sampler.numpy_sample import (
10
- GSPSampleKey,
11
- NWPSampleKey,
12
- SatelliteSampleKey,
13
- )
14
- from ocf_data_sampler.numpy_sample.common_types import NumpySample
15
- from ocf_data_sampler.torch_datasets.sample.base import SampleBase
16
- from ocf_data_sampler.torch_datasets.utils.validation_utils import (
17
- calculate_expected_shapes,
18
- check_dimensions,
19
- validation_warning,
20
- )
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
-
25
- class UKRegionalSample(SampleBase):
26
- """Handles UK Regional PVNet data operations."""
27
-
28
- def __init__(self, data: NumpySample) -> None:
29
- """Initialises UK Regional sample with data."""
30
- self._data = data
31
-
32
- @override
33
- def to_numpy(self) -> NumpySample:
34
- """Returns the data as a NumPy sample."""
35
- return self._data
36
-
37
- @override
38
- def save(self, path: str) -> None:
39
- """Saves sample to the specified path in pickle format."""
40
- # Saves to pickle format
41
- torch.save(self._data, path)
42
-
43
- @classmethod
44
- @override
45
- def load(cls, path: str) -> "UKRegionalSample":
46
- """Loads sample from the specified path.
47
-
48
- Args:
49
- path: Path to the saved sample file.
50
-
51
- Returns:
52
- A UKRegionalSample instance with the loaded data.
53
- """
54
- # Loads from .pt format
55
- # TODO: We should move away from using torch.load(..., weights_only=False)
56
- return cls(torch.load(path, weights_only=False))
57
-
58
- def validate_sample(self, config: Configuration) -> dict:
59
- """Validates the sample, logging warnings and raising errors.
60
-
61
- Checks that the sample has the expected structure and data shapes based
62
- on the provided configuration. Critical issues (missing required data,
63
- shape mismatches) will raise a ValueError. Non-critical issues (e.g.,
64
- unexpected data components found) will be logged as warnings using
65
- the standard Python logging module.
66
-
67
- Args:
68
- config: Configuration object defining expected shapes and required fields.
69
-
70
- Returns:
71
- dict: A dictionary indicating success: `{"valid": True}`.
72
- If validation fails due to a critical issue, an exception is raised
73
- instead of returning. Warnings encountered are logged.
74
-
75
- Raises:
76
- TypeError: If `config` is not a Configuration object.
77
- ValueError: For critical validation failures like missing expected data,
78
- incorrect data shapes, or missing required NWP providers.
79
- """
80
- if not isinstance(config, Configuration):
81
- raise TypeError("config must be Configuration object")
82
-
83
- # Calculate expected shapes from configuration
84
- expected_shapes = calculate_expected_shapes(config)
85
-
86
- # Check GSP shape if specified
87
- gsp_key = GSPSampleKey.gsp
88
- if gsp_key in expected_shapes and gsp_key not in self._data:
89
- raise ValueError(f"Configuration expects GSP data ('{gsp_key}') but is missing.")
90
-
91
- if gsp_key in self._data:
92
- if gsp_key in expected_shapes:
93
- gsp_data = self._data[gsp_key]
94
- check_dimensions(
95
- actual_shape=gsp_data.shape,
96
- expected_shape=expected_shapes[gsp_key],
97
- name="GSP",
98
- )
99
- else:
100
- validation_warning(
101
- message=f"GSP data ('{gsp_key}') is present but not expected in configuration.",
102
- warning_type="unexpected_component",
103
- component=str(gsp_key),
104
- )
105
-
106
- # Checks for NWP data
107
- nwp_key = NWPSampleKey.nwp
108
- if nwp_key in expected_shapes and nwp_key not in self._data:
109
- raise ValueError(f"Configuration expects NWP data ('{nwp_key}') but is missing.")
110
-
111
- if nwp_key in self._data:
112
- nwp_data_all_providers = self._data[nwp_key]
113
- if not isinstance(nwp_data_all_providers, dict):
114
- raise ValueError(f"NWP data ('{nwp_key}') should be a dictionary.")
115
-
116
- if nwp_key in expected_shapes:
117
- expected_providers = set(expected_shapes[nwp_key].keys())
118
- actual_providers = set(nwp_data_all_providers.keys())
119
-
120
- unexpected_providers = actual_providers - expected_providers
121
- if unexpected_providers:
122
- validation_warning(
123
- message=f"Unexpected NWP providers found: {list(unexpected_providers)}",
124
- warning_type="unexpected_provider",
125
- providers=list(unexpected_providers),
126
- )
127
-
128
- missing_expected_providers = expected_providers - actual_providers
129
- if missing_expected_providers:
130
- raise ValueError(
131
- f"Expected NWP providers are missing from the data: "
132
- f"{list(missing_expected_providers)}",
133
- )
134
-
135
- for provider in expected_shapes[nwp_key]:
136
- provider_data = nwp_data_all_providers[provider]
137
-
138
- if "nwp" not in provider_data:
139
- error_msg = (
140
- f"Missing array key 'nwp' in NWP data for provider '{provider}'."
141
- )
142
- raise ValueError(error_msg)
143
-
144
- nwp_array = provider_data["nwp"]
145
- check_dimensions(
146
- actual_shape=nwp_array.shape,
147
- expected_shape=expected_shapes[nwp_key][provider],
148
- name=f"NWP data ({provider})",
149
- )
150
- else:
151
- validation_warning(
152
- message=(
153
- f"NWP data ('{nwp_key}') is present but not expected "
154
- "in configuration."
155
- ),
156
- warning_type="unexpected_component",
157
- component=str(nwp_key),
158
- )
159
-
160
- # Validate satellite data
161
- sat_key = SatelliteSampleKey.satellite_actual
162
- if sat_key in expected_shapes and sat_key not in self._data:
163
- raise ValueError(f"Configuration expects Satellite data ('{sat_key}') but is missing.")
164
-
165
- if sat_key in self._data:
166
- if sat_key in expected_shapes:
167
- sat_data = self._data[sat_key]
168
- check_dimensions(
169
- actual_shape=sat_data.shape,
170
- expected_shape=expected_shapes[sat_key],
171
- name="Satellite data",
172
- )
173
- else:
174
- validation_warning(
175
- message=(
176
- f"Satellite data ('{sat_key}') is present but not expected "
177
- "in configuration."
178
- ),
179
- warning_type="unexpected_component",
180
- component=str(sat_key),
181
- )
182
-
183
- # Validate solar coordinates data
184
- solar_keys = ["solar_azimuth", "solar_elevation"]
185
- for solar_key in solar_keys:
186
- solar_name = solar_key.replace("_", " ").title()
187
- if solar_key in expected_shapes and solar_key not in self._data:
188
- raise ValueError(f"Configuration expects {solar_key} data but is missing.")
189
-
190
- if solar_key in self._data:
191
- if solar_key in expected_shapes:
192
- solar_data = self._data[solar_key]
193
- check_dimensions(
194
- actual_shape=solar_data.shape,
195
- expected_shape=expected_shapes[solar_key],
196
- name=f"{solar_name} data",
197
- )
198
- else:
199
- validation_warning(
200
- message=(
201
- f"{solar_name} data is present but not expected "
202
- "in configuration."
203
- ),
204
- warning_type="unexpected_component",
205
- component=solar_key,
206
- )
207
-
208
- # Check for potentially unexpected components
209
- checked_keys = {gsp_key, nwp_key, sat_key} | set(solar_keys)
210
- all_present_keys = set(self._data.keys())
211
- unexpected_present_keys = all_present_keys - set(expected_shapes.keys())
212
-
213
- for key in unexpected_present_keys:
214
- if key not in checked_keys:
215
- validation_warning(
216
- message=(
217
- f"Unexpected component '{key}' is present in data but not defined "
218
- "in configuration's expected shapes."
219
- ),
220
- warning_type="unexpected_component",
221
- component=str(key),
222
- )
223
-
224
- return {
225
- "valid": True,
226
- }
227
-
228
-
229
- @override
230
- def plot(self) -> None:
231
- """Plots the sample data for visualization."""
232
- from matplotlib import pyplot as plt
233
-
234
- _, axes = plt.subplots(2, 2, figsize=(12, 8))
235
-
236
- if NWPSampleKey.nwp in self._data:
237
- first_nwp = next(iter(self._data[NWPSampleKey.nwp].values()))
238
- if "nwp" in first_nwp:
239
- axes[0, 1].imshow(first_nwp["nwp"][0])
240
- title = "NWP (First Channel)"
241
- if NWPSampleKey.channel_names in first_nwp:
242
- channel_names = first_nwp[NWPSampleKey.channel_names]
243
- if channel_names:
244
- title = f"NWP: {channel_names[0]}"
245
- axes[0, 1].set_title(title)
246
-
247
- if GSPSampleKey.gsp in self._data:
248
- axes[0, 0].plot(self._data[GSPSampleKey.gsp])
249
- axes[0, 0].set_title("GSP Generation")
250
-
251
- if "solar_azimuth" in self._data and "solar_elevation" in self._data:
252
- axes[1, 1].plot(self._data["solar_azimuth"], label="Azimuth")
253
- axes[1, 1].plot(self._data["solar_elevation"], label="Elevation")
254
- axes[1, 1].set_title("Solar Position")
255
- axes[1, 1].legend()
256
-
257
- if SatelliteSampleKey.satellite_actual in self._data:
258
- axes[1, 0].imshow(self._data[SatelliteSampleKey.satellite_actual])
259
- axes[1, 0].set_title("Satellite Data")
260
-
261
- plt.tight_layout()
262
- plt.show()