ocf-data-sampler 0.1.11__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- ocf_data_sampler/config/load.py +3 -3
- ocf_data_sampler/config/model.py +146 -64
- ocf_data_sampler/config/save.py +5 -4
- ocf_data_sampler/load/gsp.py +6 -5
- ocf_data_sampler/load/load_dataset.py +5 -6
- ocf_data_sampler/load/nwp/nwp.py +17 -5
- ocf_data_sampler/load/nwp/providers/ecmwf.py +6 -7
- ocf_data_sampler/load/nwp/providers/gfs.py +36 -0
- ocf_data_sampler/load/nwp/providers/icon.py +46 -0
- ocf_data_sampler/load/nwp/providers/ukv.py +4 -5
- ocf_data_sampler/load/nwp/providers/utils.py +3 -1
- ocf_data_sampler/load/satellite.py +9 -10
- ocf_data_sampler/load/site.py +10 -6
- ocf_data_sampler/load/utils.py +21 -16
- ocf_data_sampler/numpy_sample/collate.py +10 -9
- ocf_data_sampler/numpy_sample/datetime_features.py +3 -5
- ocf_data_sampler/numpy_sample/gsp.py +12 -14
- ocf_data_sampler/numpy_sample/nwp.py +12 -12
- ocf_data_sampler/numpy_sample/satellite.py +9 -9
- ocf_data_sampler/numpy_sample/site.py +5 -8
- ocf_data_sampler/numpy_sample/sun_position.py +16 -21
- ocf_data_sampler/sample/base.py +15 -17
- ocf_data_sampler/sample/site.py +13 -20
- ocf_data_sampler/sample/uk_regional.py +29 -35
- ocf_data_sampler/select/dropout.py +16 -14
- ocf_data_sampler/select/fill_time_periods.py +15 -5
- ocf_data_sampler/select/find_contiguous_time_periods.py +88 -75
- ocf_data_sampler/select/geospatial.py +63 -54
- ocf_data_sampler/select/location.py +16 -51
- ocf_data_sampler/select/select_spatial_slice.py +105 -89
- ocf_data_sampler/select/select_time_slice.py +71 -58
- ocf_data_sampler/select/spatial_slice_for_dataset.py +7 -6
- ocf_data_sampler/select/time_slice_for_dataset.py +17 -16
- ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +140 -131
- ocf_data_sampler/torch_datasets/datasets/site.py +152 -112
- ocf_data_sampler/torch_datasets/utils/__init__.py +3 -0
- ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py +11 -0
- ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +6 -2
- ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +23 -22
- ocf_data_sampler/utils.py +3 -1
- {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.17.dist-info}/METADATA +7 -18
- ocf_data_sampler-0.1.17.dist-info/RECORD +56 -0
- {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.17.dist-info}/WHEEL +1 -1
- {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.17.dist-info}/top_level.txt +1 -1
- scripts/refactor_site.py +63 -33
- utils/compute_icon_mean_stddev.py +72 -0
- ocf_data_sampler/constants.py +0 -222
- ocf_data_sampler/torch_datasets/utils/validate_channels.py +0 -82
- ocf_data_sampler-0.1.11.dist-info/LICENSE +0 -21
- ocf_data_sampler-0.1.11.dist-info/RECORD +0 -82
- tests/__init__.py +0 -0
- tests/config/test_config.py +0 -113
- tests/config/test_load.py +0 -7
- tests/config/test_save.py +0 -28
- tests/conftest.py +0 -319
- tests/load/test_load_gsp.py +0 -15
- tests/load/test_load_nwp.py +0 -21
- tests/load/test_load_satellite.py +0 -17
- tests/load/test_load_sites.py +0 -14
- tests/numpy_sample/test_collate.py +0 -21
- tests/numpy_sample/test_datetime_features.py +0 -37
- tests/numpy_sample/test_gsp.py +0 -38
- tests/numpy_sample/test_nwp.py +0 -13
- tests/numpy_sample/test_satellite.py +0 -40
- tests/numpy_sample/test_sun_position.py +0 -81
- tests/select/test_dropout.py +0 -69
- tests/select/test_fill_time_periods.py +0 -28
- tests/select/test_find_contiguous_time_periods.py +0 -202
- tests/select/test_location.py +0 -67
- tests/select/test_select_spatial_slice.py +0 -154
- tests/select/test_select_time_slice.py +0 -275
- tests/test_sample/test_base.py +0 -164
- tests/test_sample/test_site_sample.py +0 -165
- tests/test_sample/test_uk_regional_sample.py +0 -136
- tests/torch_datasets/test_merge_and_fill_utils.py +0 -40
- tests/torch_datasets/test_pvnet_uk.py +0 -154
- tests/torch_datasets/test_site.py +0 -226
- tests/torch_datasets/test_validate_channels_utils.py +0 -78
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Site class testing - SiteSample
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import pytest
|
|
6
|
-
import numpy as np
|
|
7
|
-
import xarray as xr
|
|
8
|
-
import pandas as pd
|
|
9
|
-
|
|
10
|
-
from pathlib import Path
|
|
11
|
-
from xarray import Dataset
|
|
12
|
-
|
|
13
|
-
from ocf_data_sampler.sample.site import SiteSample
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@pytest.fixture
|
|
17
|
-
def sample_data():
|
|
18
|
-
""" Fixture creation with sample data """
|
|
19
|
-
|
|
20
|
-
# Time periods specified
|
|
21
|
-
init_time = pd.Timestamp("2023-01-01 00:00")
|
|
22
|
-
target_times = pd.date_range("2023-01-01 00:00", periods=4, freq="1h")
|
|
23
|
-
sat_times = pd.date_range("2023-01-01 00:00", periods=7, freq="5min")
|
|
24
|
-
site_times = pd.date_range("2023-01-01 00:00", periods=4, freq="30min")
|
|
25
|
-
|
|
26
|
-
# Defined steps for NWP data
|
|
27
|
-
steps = [(t - init_time) for t in target_times]
|
|
28
|
-
|
|
29
|
-
# Create the sample dataset
|
|
30
|
-
return Dataset(
|
|
31
|
-
data_vars={
|
|
32
|
-
'nwp-ukv': (
|
|
33
|
-
["nwp-ukv__target_time_utc", "nwp-ukv__channel",
|
|
34
|
-
"nwp-ukv__y_osgb", "nwp-ukv__x_osgb"],
|
|
35
|
-
np.random.rand(4, 1, 2, 2)
|
|
36
|
-
),
|
|
37
|
-
'satellite': (
|
|
38
|
-
["satellite__time_utc", "satellite__channel",
|
|
39
|
-
"satellite__y_geostationary", "satellite__x_geostationary"],
|
|
40
|
-
np.random.rand(7, 1, 2, 2)
|
|
41
|
-
),
|
|
42
|
-
'site': (["site__time_utc"], np.random.rand(4))
|
|
43
|
-
},
|
|
44
|
-
coords={
|
|
45
|
-
# NWP coords
|
|
46
|
-
'nwp-ukv__target_time_utc': target_times,
|
|
47
|
-
'nwp-ukv__channel': ['dswrf'],
|
|
48
|
-
'nwp-ukv__y_osgb': [0, 1],
|
|
49
|
-
'nwp-ukv__x_osgb': [0, 1],
|
|
50
|
-
'nwp-ukv__init_time_utc': init_time,
|
|
51
|
-
'nwp-ukv__step': ('nwp-ukv__target_time_utc', steps),
|
|
52
|
-
|
|
53
|
-
# Sat coords
|
|
54
|
-
'satellite__time_utc': sat_times,
|
|
55
|
-
'satellite__channel': ['vis'],
|
|
56
|
-
'satellite__y_geostationary': [0, 1],
|
|
57
|
-
'satellite__x_geostationary': [0, 1],
|
|
58
|
-
|
|
59
|
-
# Site coords
|
|
60
|
-
'site__time_utc': site_times,
|
|
61
|
-
'site__capacity_kwp': 1000.0,
|
|
62
|
-
'site__site_id': 1,
|
|
63
|
-
'site__longitude': -3.5,
|
|
64
|
-
'site__latitude': 51.5,
|
|
65
|
-
|
|
66
|
-
# Site features as coords
|
|
67
|
-
'site__solar_azimuth': ('site__time_utc', np.random.rand(4)),
|
|
68
|
-
'site__solar_elevation': ('site__time_utc', np.random.rand(4)),
|
|
69
|
-
'site__date_cos': ('site__time_utc', np.random.rand(4)),
|
|
70
|
-
'site__date_sin': ('site__time_utc', np.random.rand(4)),
|
|
71
|
-
'site__time_cos': ('site__time_utc', np.random.rand(4)),
|
|
72
|
-
'site__time_sin': ('site__time_utc', np.random.rand(4))
|
|
73
|
-
}
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def test_site_sample_with_data(sample_data):
|
|
78
|
-
""" Testing of defined sample with actual data """
|
|
79
|
-
sample = SiteSample(sample_data)
|
|
80
|
-
|
|
81
|
-
# Assert data structure
|
|
82
|
-
assert isinstance(sample._data, Dataset)
|
|
83
|
-
|
|
84
|
-
# Assert dimensions / shapes
|
|
85
|
-
expected_dims = {
|
|
86
|
-
"satellite__x_geostationary",
|
|
87
|
-
"site__time_utc",
|
|
88
|
-
"nwp-ukv__target_time_utc",
|
|
89
|
-
"nwp-ukv__x_osgb",
|
|
90
|
-
"satellite__channel",
|
|
91
|
-
"satellite__y_geostationary",
|
|
92
|
-
"satellite__time_utc",
|
|
93
|
-
"nwp-ukv__channel",
|
|
94
|
-
"nwp-ukv__y_osgb",
|
|
95
|
-
}
|
|
96
|
-
assert set(sample._data.dims) == expected_dims
|
|
97
|
-
assert sample._data["satellite"].values.shape == (7, 1, 2, 2)
|
|
98
|
-
assert sample._data["nwp-ukv"].values.shape == (4, 1, 2, 2)
|
|
99
|
-
assert sample._data["site"].values.shape == (4,)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def test_save_load(tmp_path, sample_data):
|
|
103
|
-
""" Save and load functionality """
|
|
104
|
-
sample = SiteSample(sample_data)
|
|
105
|
-
filepath = tmp_path / "test_sample.nc"
|
|
106
|
-
sample.save(filepath)
|
|
107
|
-
|
|
108
|
-
# Assert file exists and has content
|
|
109
|
-
assert filepath.exists()
|
|
110
|
-
assert filepath.stat().st_size > 0
|
|
111
|
-
|
|
112
|
-
# Load and verify
|
|
113
|
-
loaded = SiteSample.load(filepath)
|
|
114
|
-
assert isinstance(loaded, SiteSample)
|
|
115
|
-
assert isinstance(loaded._data, Dataset)
|
|
116
|
-
|
|
117
|
-
# Compare original / loaded data
|
|
118
|
-
xr.testing.assert_identical(sample._data, loaded._data)
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def test_invalid_data_type():
|
|
122
|
-
""" Handling of invalid data types """
|
|
123
|
-
|
|
124
|
-
with pytest.raises(TypeError, match="Data must be xarray Dataset"):
|
|
125
|
-
_ = SiteSample({"invalid": "data"})
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def test_to_numpy(sample_data):
|
|
129
|
-
""" To numpy conversion """
|
|
130
|
-
sample = SiteSample(sample_data)
|
|
131
|
-
numpy_data = sample.to_numpy()
|
|
132
|
-
|
|
133
|
-
# Assert structure
|
|
134
|
-
assert isinstance(numpy_data, dict)
|
|
135
|
-
assert 'site' in numpy_data
|
|
136
|
-
assert 'nwp' in numpy_data
|
|
137
|
-
|
|
138
|
-
# Check site - numpy array instead of dict
|
|
139
|
-
site_data = numpy_data['site']
|
|
140
|
-
assert isinstance(site_data, np.ndarray)
|
|
141
|
-
assert site_data.ndim == 1
|
|
142
|
-
assert len(site_data) == 4
|
|
143
|
-
assert np.all(site_data >= 0) and np.all(site_data <= 1)
|
|
144
|
-
|
|
145
|
-
# Check NWP
|
|
146
|
-
assert 'ukv' in numpy_data['nwp']
|
|
147
|
-
nwp_data = numpy_data['nwp']['ukv']
|
|
148
|
-
assert 'nwp' in nwp_data
|
|
149
|
-
assert nwp_data['nwp'].shape == (4, 1, 2, 2)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def test_data_consistency(sample_data):
|
|
153
|
-
""" Consistency of data across operations """
|
|
154
|
-
sample = SiteSample(sample_data)
|
|
155
|
-
numpy_data = sample.to_numpy()
|
|
156
|
-
|
|
157
|
-
# Assert components remain consistent after conversion above
|
|
158
|
-
assert numpy_data['nwp']['ukv']['nwp'].shape == (4, 1, 2, 2)
|
|
159
|
-
assert 'site' in numpy_data
|
|
160
|
-
|
|
161
|
-
# Update site data checks to expect numpy array
|
|
162
|
-
assert isinstance(numpy_data['site'], np.ndarray)
|
|
163
|
-
assert numpy_data['site'].shape == (4,)
|
|
164
|
-
assert np.all(numpy_data['site'] >= 0)
|
|
165
|
-
assert np.all(numpy_data['site'] <= 1)
|
|
@@ -1,136 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
UK Regional class testing - UKRegionalSample
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import pytest
|
|
6
|
-
import numpy as np
|
|
7
|
-
import torch
|
|
8
|
-
import tempfile
|
|
9
|
-
|
|
10
|
-
from ocf_data_sampler.numpy_sample import (
|
|
11
|
-
GSPSampleKey,
|
|
12
|
-
SatelliteSampleKey,
|
|
13
|
-
NWPSampleKey
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
from ocf_data_sampler.sample.uk_regional import UKRegionalSample
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
@pytest.fixture
|
|
20
|
-
def pvnet_config_filename(tmp_path):
|
|
21
|
-
""" Minimal config file - testing """
|
|
22
|
-
config_content = """
|
|
23
|
-
input_data:
|
|
24
|
-
gsp:
|
|
25
|
-
zarr_path: ""
|
|
26
|
-
time_resolution_minutes: 30
|
|
27
|
-
interval_start_minutes: -180
|
|
28
|
-
interval_end_minutes: 0
|
|
29
|
-
nwp:
|
|
30
|
-
ukv:
|
|
31
|
-
zarr_path: ""
|
|
32
|
-
image_size_pixels_height: 64
|
|
33
|
-
image_size_pixels_width: 64
|
|
34
|
-
time_resolution_minutes: 60
|
|
35
|
-
interval_start_minutes: -180
|
|
36
|
-
interval_end_minutes: 0
|
|
37
|
-
channels: ["t", "dswrf"]
|
|
38
|
-
provider: "ukv"
|
|
39
|
-
satellite:
|
|
40
|
-
zarr_path: ""
|
|
41
|
-
image_size_pixels_height: 64
|
|
42
|
-
image_size_pixels_width: 64
|
|
43
|
-
time_resolution_minutes: 30
|
|
44
|
-
interval_start_minutes: -180
|
|
45
|
-
interval_end_minutes: 0
|
|
46
|
-
channels: ["HRV"]
|
|
47
|
-
"""
|
|
48
|
-
config_file = tmp_path / "test_config.yaml"
|
|
49
|
-
config_file.write_text(config_content)
|
|
50
|
-
return str(config_file)
|
|
51
|
-
|
|
52
|
-
@pytest.fixture
|
|
53
|
-
def numpy_sample():
|
|
54
|
-
""" Synthetic data generation """
|
|
55
|
-
|
|
56
|
-
# Field / spatial coordinates
|
|
57
|
-
nwp_data = {
|
|
58
|
-
'nwp': np.random.rand(4, 1, 2, 2),
|
|
59
|
-
'x': np.array([1, 2]),
|
|
60
|
-
'y': np.array([1, 2]),
|
|
61
|
-
NWPSampleKey.channel_names: ['test_channel']
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
return {
|
|
65
|
-
'nwp': {
|
|
66
|
-
'ukv': nwp_data
|
|
67
|
-
},
|
|
68
|
-
GSPSampleKey.gsp: np.random.rand(7),
|
|
69
|
-
SatelliteSampleKey.satellite_actual: np.random.rand(7, 1, 2, 2),
|
|
70
|
-
GSPSampleKey.solar_azimuth: np.random.rand(7),
|
|
71
|
-
GSPSampleKey.solar_elevation: np.random.rand(7)
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def test_sample_save_load(numpy_sample):
|
|
76
|
-
sample = UKRegionalSample(numpy_sample)
|
|
77
|
-
|
|
78
|
-
with tempfile.NamedTemporaryFile(suffix='.pt') as tf:
|
|
79
|
-
sample.save(tf.name)
|
|
80
|
-
loaded = UKRegionalSample.load(tf.name)
|
|
81
|
-
|
|
82
|
-
assert set(loaded._data.keys()) == set(sample._data.keys())
|
|
83
|
-
assert isinstance(loaded._data['nwp'], dict)
|
|
84
|
-
assert 'ukv' in loaded._data['nwp']
|
|
85
|
-
|
|
86
|
-
assert loaded._data[GSPSampleKey.gsp].shape == (7,)
|
|
87
|
-
assert loaded._data[SatelliteSampleKey.satellite_actual].shape == (7, 1, 2, 2)
|
|
88
|
-
assert loaded._data[GSPSampleKey.solar_azimuth].shape == (7,)
|
|
89
|
-
assert loaded._data[GSPSampleKey.solar_elevation].shape == (7,)
|
|
90
|
-
|
|
91
|
-
np.testing.assert_array_almost_equal(
|
|
92
|
-
loaded._data[GSPSampleKey.gsp],
|
|
93
|
-
sample._data[GSPSampleKey.gsp]
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def test_load_corrupted_file():
|
|
98
|
-
""" Test loading - corrupted / empty file """
|
|
99
|
-
|
|
100
|
-
with tempfile.NamedTemporaryFile(suffix='.pt') as tf:
|
|
101
|
-
with open(tf.name, 'wb') as f:
|
|
102
|
-
f.write(b'corrupted data')
|
|
103
|
-
|
|
104
|
-
with pytest.raises(Exception):
|
|
105
|
-
UKRegionalSample.load(tf.name)
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def test_to_numpy():
|
|
109
|
-
""" To numpy conversion check """
|
|
110
|
-
|
|
111
|
-
data = {
|
|
112
|
-
'nwp': {
|
|
113
|
-
'ukv': {
|
|
114
|
-
'nwp': np.random.rand(4, 1, 2, 2),
|
|
115
|
-
'x': np.array([1, 2]),
|
|
116
|
-
'y': np.array([1, 2])
|
|
117
|
-
}
|
|
118
|
-
},
|
|
119
|
-
GSPSampleKey.gsp: np.random.rand(7),
|
|
120
|
-
SatelliteSampleKey.satellite_actual: np.random.rand(7, 1, 2, 2),
|
|
121
|
-
GSPSampleKey.solar_azimuth: np.random.rand(7),
|
|
122
|
-
GSPSampleKey.solar_elevation: np.random.rand(7)
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
sample = UKRegionalSample(data)
|
|
126
|
-
|
|
127
|
-
numpy_data = sample.to_numpy()
|
|
128
|
-
|
|
129
|
-
# Check returned data matches
|
|
130
|
-
assert numpy_data == sample._data
|
|
131
|
-
assert len(numpy_data) == len(sample._data)
|
|
132
|
-
|
|
133
|
-
# Assert specific keys and types
|
|
134
|
-
assert 'nwp' in numpy_data
|
|
135
|
-
assert isinstance(numpy_data['nwp']['ukv']['nwp'], np.ndarray)
|
|
136
|
-
assert numpy_data[GSPSampleKey.gsp].shape == (7,)
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
from ocf_data_sampler.torch_datasets.utils.merge_and_fill_utils import (
|
|
4
|
-
merge_dicts,
|
|
5
|
-
fill_nans_in_arrays,
|
|
6
|
-
)
|
|
7
|
-
|
|
8
|
-
def test_merge_dicts():
|
|
9
|
-
"""Test merge_dicts function"""
|
|
10
|
-
dict1 = {"a": 1, "b": 2}
|
|
11
|
-
dict2 = {"c": 3, "d": 4}
|
|
12
|
-
dict3 = {"e": 5}
|
|
13
|
-
|
|
14
|
-
result = merge_dicts([dict1, dict2, dict3])
|
|
15
|
-
assert result == {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}
|
|
16
|
-
|
|
17
|
-
# Test key overwriting
|
|
18
|
-
dict4 = {"a": 10, "f": 6}
|
|
19
|
-
result = merge_dicts([dict1, dict4])
|
|
20
|
-
assert result["a"] == 10
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def test_fill_nans_in_arrays():
|
|
24
|
-
"""Test the fill_nans_in_arrays function"""
|
|
25
|
-
array_with_nans = np.array([1.0, np.nan, 3.0, np.nan])
|
|
26
|
-
nested_dict = {
|
|
27
|
-
"array1": array_with_nans,
|
|
28
|
-
"nested": {
|
|
29
|
-
"array2": np.array([np.nan, 2.0, np.nan, 4.0])
|
|
30
|
-
},
|
|
31
|
-
"string_key": "not_an_array"
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
result = fill_nans_in_arrays(nested_dict)
|
|
35
|
-
|
|
36
|
-
assert np.array_equal(result["array1"], np.array([1.0, 0.0, 3.0, 0.0]))
|
|
37
|
-
assert np.array_equal(result["nested"]["array2"], np.array([0.0, 2.0, 0.0, 4.0]))
|
|
38
|
-
assert result["string_key"] == "not_an_array"
|
|
39
|
-
|
|
40
|
-
|
|
@@ -1,154 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import xarray as xr
|
|
4
|
-
import dask.array
|
|
5
|
-
|
|
6
|
-
from ocf_data_sampler.config import load_yaml_configuration, save_yaml_configuration
|
|
7
|
-
from ocf_data_sampler.torch_datasets.datasets.pvnet_uk import (
|
|
8
|
-
PVNetUKRegionalDataset,
|
|
9
|
-
PVNetUKConcurrentDataset,
|
|
10
|
-
process_and_combine_datasets,
|
|
11
|
-
compute,
|
|
12
|
-
)
|
|
13
|
-
from ocf_data_sampler.select.location import Location
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def test_process_and_combine_datasets(pvnet_config_filename, ds_nwp_ukv_time_sliced):
|
|
17
|
-
|
|
18
|
-
config = load_yaml_configuration(pvnet_config_filename)
|
|
19
|
-
|
|
20
|
-
t0 = pd.Timestamp("2024-01-01 00:00")
|
|
21
|
-
location = Location(coordinate_system="osgb", x=1234, y=5678, id=1)
|
|
22
|
-
|
|
23
|
-
sat_data = xr.DataArray(
|
|
24
|
-
np.random.rand(7, 1, 2, 2),
|
|
25
|
-
dims=["time_utc", "channel", "y", "x"],
|
|
26
|
-
coords={
|
|
27
|
-
"time_utc": pd.date_range("2024-01-01 00:00", periods=7, freq="5min"),
|
|
28
|
-
"channel": ["HRV"],
|
|
29
|
-
"x_geostationary": (["y", "x"], np.array([[1, 2], [1, 2]])),
|
|
30
|
-
"y_geostationary": (["y", "x"], np.array([[1, 1], [2, 2]]))
|
|
31
|
-
}
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
dataset_dict = {
|
|
35
|
-
"nwp": {"ukv": ds_nwp_ukv_time_sliced},
|
|
36
|
-
"sat": sat_data
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
sample = process_and_combine_datasets(dataset_dict, config, t0, location)
|
|
40
|
-
|
|
41
|
-
assert isinstance(sample, dict)
|
|
42
|
-
assert "nwp" in sample
|
|
43
|
-
assert sample["satellite_actual"].shape == sat_data.shape
|
|
44
|
-
assert sample["nwp"]["ukv"]["nwp"].shape == ds_nwp_ukv_time_sliced.shape
|
|
45
|
-
assert "gsp_id" in sample
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def test_compute():
|
|
49
|
-
"""Test compute function with dask array"""
|
|
50
|
-
da_dask = xr.DataArray(dask.array.random.random((5, 5)))
|
|
51
|
-
|
|
52
|
-
# Create a nested dictionary with dask array
|
|
53
|
-
lazy_data_dict = {
|
|
54
|
-
"array1": da_dask,
|
|
55
|
-
"nested": {
|
|
56
|
-
"array2": da_dask
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
computed_data_dict = compute(lazy_data_dict)
|
|
61
|
-
|
|
62
|
-
# Assert that the result is no longer lazy
|
|
63
|
-
assert isinstance(computed_data_dict["array1"].data, np.ndarray)
|
|
64
|
-
assert isinstance(computed_data_dict["nested"]["array2"].data, np.ndarray)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def test_pvnet_uk_regional_dataset(pvnet_config_filename):
|
|
68
|
-
|
|
69
|
-
# Create dataset object
|
|
70
|
-
dataset = PVNetUKRegionalDataset(pvnet_config_filename)
|
|
71
|
-
|
|
72
|
-
assert len(dataset.locations) == 317 # Number of regional GSPs
|
|
73
|
-
# NB. I have not checked the value (39 below) is in fact correct
|
|
74
|
-
assert len(dataset.valid_t0_times) == 39
|
|
75
|
-
assert len(dataset) == 317*39
|
|
76
|
-
|
|
77
|
-
# Generate a sample
|
|
78
|
-
sample = dataset[0]
|
|
79
|
-
|
|
80
|
-
assert isinstance(sample, dict)
|
|
81
|
-
|
|
82
|
-
for key in [
|
|
83
|
-
"nwp", "satellite_actual", "gsp",
|
|
84
|
-
"gsp_solar_azimuth", "gsp_solar_elevation",
|
|
85
|
-
]:
|
|
86
|
-
assert key in sample
|
|
87
|
-
|
|
88
|
-
for nwp_source in ["ukv"]:
|
|
89
|
-
assert nwp_source in sample["nwp"]
|
|
90
|
-
|
|
91
|
-
# Check the shape of the data is correct
|
|
92
|
-
# 30 minutes of 5 minute data (inclusive), one channel, 2x2 pixels
|
|
93
|
-
assert sample["satellite_actual"].shape == (7, 1, 2, 2)
|
|
94
|
-
# 3 hours of 60 minute data (inclusive), one channel, 2x2 pixels
|
|
95
|
-
assert sample["nwp"]["ukv"]["nwp"].shape == (4, 1, 2, 2)
|
|
96
|
-
# 3 hours of 30 minute data (inclusive)
|
|
97
|
-
assert sample["gsp"].shape == (7,)
|
|
98
|
-
# Solar angles have same shape as GSP data
|
|
99
|
-
assert sample["gsp_solar_azimuth"].shape == (7,)
|
|
100
|
-
assert sample["gsp_solar_elevation"].shape == (7,)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def test_pvnet_no_gsp(tmp_path, pvnet_config_filename):
|
|
104
|
-
|
|
105
|
-
# Create new config without GSP inputs
|
|
106
|
-
config = load_yaml_configuration(pvnet_config_filename)
|
|
107
|
-
config.input_data.gsp.zarr_path = ''
|
|
108
|
-
new_config_path = tmp_path / "pvnet_config_no_gsp.yaml"
|
|
109
|
-
save_yaml_configuration(config, new_config_path)
|
|
110
|
-
|
|
111
|
-
# Create dataset object
|
|
112
|
-
dataset = PVNetUKRegionalDataset(new_config_path)
|
|
113
|
-
|
|
114
|
-
# Generate a sample
|
|
115
|
-
_ = dataset[0]
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def test_pvnet_uk_concurrent_dataset(pvnet_config_filename):
|
|
119
|
-
|
|
120
|
-
# Create dataset object using a limited set of GSPs for test
|
|
121
|
-
gsp_ids = [1,2,3]
|
|
122
|
-
num_gsps = len(gsp_ids)
|
|
123
|
-
|
|
124
|
-
dataset = PVNetUKConcurrentDataset(pvnet_config_filename, gsp_ids=gsp_ids)
|
|
125
|
-
|
|
126
|
-
assert len(dataset.locations) == num_gsps # Number of regional GSPs
|
|
127
|
-
# NB. I have not checked the value (39 below) is in fact correct
|
|
128
|
-
assert len(dataset.valid_t0_times) == 39
|
|
129
|
-
assert len(dataset) == 39
|
|
130
|
-
|
|
131
|
-
# Generate a sample
|
|
132
|
-
sample = dataset[0]
|
|
133
|
-
|
|
134
|
-
assert isinstance(sample, dict)
|
|
135
|
-
|
|
136
|
-
for key in [
|
|
137
|
-
"nwp", "satellite_actual", "gsp",
|
|
138
|
-
"gsp_solar_azimuth", "gsp_solar_elevation",
|
|
139
|
-
]:
|
|
140
|
-
assert key in sample
|
|
141
|
-
|
|
142
|
-
for nwp_source in ["ukv"]:
|
|
143
|
-
assert nwp_source in sample["nwp"]
|
|
144
|
-
|
|
145
|
-
# Check the shape of the data is correct
|
|
146
|
-
# 30 minutes of 5 minute data (inclusive), one channel, 2x2 pixels
|
|
147
|
-
assert sample["satellite_actual"].shape == (num_gsps, 7, 1, 2, 2)
|
|
148
|
-
# 3 hours of 60 minute data (inclusive), one channel, 2x2 pixels
|
|
149
|
-
assert sample["nwp"]["ukv"]["nwp"].shape == (num_gsps, 4, 1, 2, 2)
|
|
150
|
-
# 3 hours of 30 minute data (inclusive)
|
|
151
|
-
assert sample["gsp"].shape == (num_gsps, 7,)
|
|
152
|
-
# Solar angles have same shape as GSP data
|
|
153
|
-
assert sample["gsp_solar_azimuth"].shape == (num_gsps, 7,)
|
|
154
|
-
assert sample["gsp_solar_elevation"].shape == (num_gsps, 7,)
|