ocf-data-sampler 0.1.10__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- ocf_data_sampler/config/load.py +3 -3
- ocf_data_sampler/config/model.py +86 -72
- ocf_data_sampler/config/save.py +5 -4
- ocf_data_sampler/constants.py +140 -12
- ocf_data_sampler/load/gsp.py +6 -5
- ocf_data_sampler/load/load_dataset.py +5 -6
- ocf_data_sampler/load/nwp/nwp.py +17 -5
- ocf_data_sampler/load/nwp/providers/ecmwf.py +6 -7
- ocf_data_sampler/load/nwp/providers/gfs.py +36 -0
- ocf_data_sampler/load/nwp/providers/icon.py +46 -0
- ocf_data_sampler/load/nwp/providers/ukv.py +4 -5
- ocf_data_sampler/load/nwp/providers/utils.py +3 -1
- ocf_data_sampler/load/satellite.py +27 -36
- ocf_data_sampler/load/site.py +11 -7
- ocf_data_sampler/load/utils.py +21 -16
- ocf_data_sampler/numpy_sample/collate.py +10 -9
- ocf_data_sampler/numpy_sample/datetime_features.py +3 -5
- ocf_data_sampler/numpy_sample/gsp.py +15 -13
- ocf_data_sampler/numpy_sample/nwp.py +17 -23
- ocf_data_sampler/numpy_sample/satellite.py +17 -14
- ocf_data_sampler/numpy_sample/site.py +8 -7
- ocf_data_sampler/numpy_sample/sun_position.py +19 -25
- ocf_data_sampler/sample/__init__.py +0 -7
- ocf_data_sampler/sample/base.py +23 -44
- ocf_data_sampler/sample/site.py +25 -69
- ocf_data_sampler/sample/uk_regional.py +52 -103
- ocf_data_sampler/select/dropout.py +42 -27
- ocf_data_sampler/select/fill_time_periods.py +15 -3
- ocf_data_sampler/select/find_contiguous_time_periods.py +87 -75
- ocf_data_sampler/select/geospatial.py +63 -54
- ocf_data_sampler/select/location.py +16 -51
- ocf_data_sampler/select/select_spatial_slice.py +105 -89
- ocf_data_sampler/select/select_time_slice.py +71 -58
- ocf_data_sampler/select/spatial_slice_for_dataset.py +7 -6
- ocf_data_sampler/select/time_slice_for_dataset.py +17 -16
- ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +126 -118
- ocf_data_sampler/torch_datasets/datasets/site.py +135 -101
- ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +6 -2
- ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +23 -22
- ocf_data_sampler/torch_datasets/utils/validate_channels.py +23 -19
- ocf_data_sampler/utils.py +3 -1
- {ocf_data_sampler-0.1.10.dist-info → ocf_data_sampler-0.1.16.dist-info}/METADATA +7 -18
- ocf_data_sampler-0.1.16.dist-info/RECORD +56 -0
- {ocf_data_sampler-0.1.10.dist-info → ocf_data_sampler-0.1.16.dist-info}/WHEEL +1 -1
- {ocf_data_sampler-0.1.10.dist-info → ocf_data_sampler-0.1.16.dist-info}/top_level.txt +1 -1
- scripts/refactor_site.py +62 -33
- utils/compute_icon_mean_stddev.py +72 -0
- ocf_data_sampler-0.1.10.dist-info/LICENSE +0 -21
- ocf_data_sampler-0.1.10.dist-info/RECORD +0 -82
- tests/__init__.py +0 -0
- tests/config/test_config.py +0 -113
- tests/config/test_load.py +0 -7
- tests/config/test_save.py +0 -28
- tests/conftest.py +0 -286
- tests/load/test_load_gsp.py +0 -15
- tests/load/test_load_nwp.py +0 -21
- tests/load/test_load_satellite.py +0 -17
- tests/load/test_load_sites.py +0 -14
- tests/numpy_sample/test_collate.py +0 -21
- tests/numpy_sample/test_datetime_features.py +0 -37
- tests/numpy_sample/test_gsp.py +0 -38
- tests/numpy_sample/test_nwp.py +0 -52
- tests/numpy_sample/test_satellite.py +0 -40
- tests/numpy_sample/test_sun_position.py +0 -81
- tests/select/test_dropout.py +0 -75
- tests/select/test_fill_time_periods.py +0 -28
- tests/select/test_find_contiguous_time_periods.py +0 -202
- tests/select/test_location.py +0 -67
- tests/select/test_select_spatial_slice.py +0 -154
- tests/select/test_select_time_slice.py +0 -275
- tests/test_sample/test_base.py +0 -164
- tests/test_sample/test_site_sample.py +0 -195
- tests/test_sample/test_uk_regional_sample.py +0 -163
- tests/torch_datasets/test_merge_and_fill_utils.py +0 -40
- tests/torch_datasets/test_pvnet_uk.py +0 -167
- tests/torch_datasets/test_site.py +0 -226
- tests/torch_datasets/test_validate_channels_utils.py +0 -78
tests/config/test_load.py
DELETED
tests/config/test_save.py
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
"""Tests for configuration saving functionality."""
|
|
2
|
-
import os
|
|
3
|
-
from ocf_data_sampler.config import Configuration, save_yaml_configuration, load_yaml_configuration
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def test_save_yaml_configuration_basic(tmp_path):
|
|
7
|
-
"""Save an empty configuration object"""
|
|
8
|
-
config = Configuration()
|
|
9
|
-
|
|
10
|
-
filepath = f"{tmp_path}/config.yaml"
|
|
11
|
-
save_yaml_configuration(config, filepath)
|
|
12
|
-
|
|
13
|
-
assert os.path.exists(filepath)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def test_save_load_yaml_configuration(tmp_path, test_config_filename):
|
|
17
|
-
"""Make sure a saved configuration is the same after loading"""
|
|
18
|
-
|
|
19
|
-
# Start with this config
|
|
20
|
-
initial_config = load_yaml_configuration(test_config_filename)
|
|
21
|
-
|
|
22
|
-
# Save it
|
|
23
|
-
filepath = f"{tmp_path}/config.yaml"
|
|
24
|
-
save_yaml_configuration(initial_config, filepath)
|
|
25
|
-
|
|
26
|
-
# Load it and check it is still the same
|
|
27
|
-
loaded_config = load_yaml_configuration(filepath)
|
|
28
|
-
assert loaded_config == initial_config
|
tests/conftest.py
DELETED
|
@@ -1,286 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import numpy as np
|
|
5
|
-
import pandas as pd
|
|
6
|
-
import xarray as xr
|
|
7
|
-
import dask.array
|
|
8
|
-
|
|
9
|
-
from ocf_data_sampler.config.model import Site
|
|
10
|
-
from ocf_data_sampler.config import load_yaml_configuration, save_yaml_configuration
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
_top_test_directory = os.path.dirname(os.path.realpath(__file__))
|
|
14
|
-
|
|
15
|
-
@pytest.fixture()
|
|
16
|
-
def test_config_filename():
|
|
17
|
-
return f"{_top_test_directory}/test_data/configs/test_config.yaml"
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
@pytest.fixture(scope="session")
|
|
21
|
-
def config_filename():
|
|
22
|
-
return f"{_top_test_directory}/test_data/configs/pvnet_test_config.yaml"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@pytest.fixture(scope="session")
|
|
26
|
-
def session_tmp_path(tmp_path_factory):
|
|
27
|
-
return tmp_path_factory.mktemp("data")
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
@pytest.fixture(scope="session")
|
|
31
|
-
def sat_zarr_path(session_tmp_path):
|
|
32
|
-
|
|
33
|
-
# Define coords for satellite-like dataset
|
|
34
|
-
variables = [
|
|
35
|
-
'IR_016', 'IR_039', 'IR_087', 'IR_097', 'IR_108', 'IR_120',
|
|
36
|
-
'IR_134', 'VIS006', 'VIS008', 'WV_062', 'WV_073',
|
|
37
|
-
]
|
|
38
|
-
x = np.linspace(start=15002, stop=-1824245, num=100)
|
|
39
|
-
y = np.linspace(start=4191563, stop=5304712, num=100)
|
|
40
|
-
times = pd.date_range("2023-01-01 00:00", "2023-01-01 23:55", freq="5min")
|
|
41
|
-
|
|
42
|
-
area_string = (
|
|
43
|
-
"""msg_seviri_rss_3km:
|
|
44
|
-
description: MSG SEVIRI Rapid Scanning Service area definition with 3 km resolution
|
|
45
|
-
projection:
|
|
46
|
-
proj: geos
|
|
47
|
-
lon_0: 9.5
|
|
48
|
-
h: 35785831
|
|
49
|
-
x_0: 0
|
|
50
|
-
y_0: 0
|
|
51
|
-
a: 6378169
|
|
52
|
-
rf: 295.488065897014
|
|
53
|
-
no_defs: null
|
|
54
|
-
type: crs
|
|
55
|
-
shape:
|
|
56
|
-
height: 298
|
|
57
|
-
width: 615
|
|
58
|
-
area_extent:
|
|
59
|
-
lower_left_xy: [28503.830075263977, 5090183.970808983]
|
|
60
|
-
upper_right_xy: [-1816744.1169023514, 4196063.827395439]
|
|
61
|
-
units: m
|
|
62
|
-
"""
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
# Create satellite-like data with some NaNs
|
|
66
|
-
data = dask.array.zeros(
|
|
67
|
-
shape=(len(variables), len(times), len(y), len(x)),
|
|
68
|
-
chunks=(-1, 10, -1, -1),
|
|
69
|
-
dtype=np.float32
|
|
70
|
-
)
|
|
71
|
-
data [:, 10, :, :] = np.nan
|
|
72
|
-
|
|
73
|
-
ds = xr.DataArray(
|
|
74
|
-
data=data,
|
|
75
|
-
coords=dict(
|
|
76
|
-
variable=variables,
|
|
77
|
-
time=times,
|
|
78
|
-
y_geostationary=y,
|
|
79
|
-
x_geostationary=x,
|
|
80
|
-
),
|
|
81
|
-
attrs=dict(area=area_string),
|
|
82
|
-
).to_dataset(name="data")
|
|
83
|
-
|
|
84
|
-
# Save temporarily as a zarr
|
|
85
|
-
zarr_path = session_tmp_path / "test_sat.zarr"
|
|
86
|
-
ds.to_zarr(zarr_path)
|
|
87
|
-
|
|
88
|
-
yield zarr_path
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
@pytest.fixture(scope="session")
|
|
92
|
-
def ds_nwp_ukv():
|
|
93
|
-
init_times = pd.date_range(start="2023-01-01 00:00", freq="180min", periods=24 * 7)
|
|
94
|
-
steps = pd.timedelta_range("0h", "10h", freq="1h")
|
|
95
|
-
|
|
96
|
-
x = np.linspace(-239_000, 857_000, 50)
|
|
97
|
-
y = np.linspace(-183_000, 1225_000, 100)
|
|
98
|
-
variables = ["si10", "dswrf", "t", "prate"]
|
|
99
|
-
|
|
100
|
-
coords = (
|
|
101
|
-
("init_time", init_times),
|
|
102
|
-
("variable", variables),
|
|
103
|
-
("step", steps),
|
|
104
|
-
("x", x),
|
|
105
|
-
("y", y),
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
nwp_array_shape = tuple(len(coord_values) for _, coord_values in coords)
|
|
109
|
-
|
|
110
|
-
nwp_data = xr.DataArray(
|
|
111
|
-
np.random.uniform(0, 200, size=nwp_array_shape).astype(np.float32),
|
|
112
|
-
coords=coords,
|
|
113
|
-
)
|
|
114
|
-
return nwp_data.to_dataset(name="UKV")
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
@pytest.fixture(scope="session")
|
|
118
|
-
def nwp_ukv_zarr_path(session_tmp_path, ds_nwp_ukv):
|
|
119
|
-
ds = ds_nwp_ukv.chunk(
|
|
120
|
-
{
|
|
121
|
-
"init_time": 1,
|
|
122
|
-
"step": -1,
|
|
123
|
-
"variable": -1,
|
|
124
|
-
"x": 50,
|
|
125
|
-
"y": 50,
|
|
126
|
-
}
|
|
127
|
-
)
|
|
128
|
-
zarr_path = session_tmp_path / "ukv_nwp.zarr"
|
|
129
|
-
ds.to_zarr(zarr_path)
|
|
130
|
-
yield zarr_path
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
@pytest.fixture(scope="session")
|
|
134
|
-
def ds_nwp_ecmwf():
|
|
135
|
-
init_times = pd.date_range(start="2023-01-01 00:00", freq="6h", periods=24 * 7)
|
|
136
|
-
steps = pd.timedelta_range("0h", "14h", freq="1h")
|
|
137
|
-
|
|
138
|
-
lons = np.arange(-12, 3)
|
|
139
|
-
lats = np.arange(48, 60)
|
|
140
|
-
variables = ["t2m","dswrf", "mcc"]
|
|
141
|
-
|
|
142
|
-
coords = (
|
|
143
|
-
("init_time", init_times),
|
|
144
|
-
("variable", variables),
|
|
145
|
-
("step", steps),
|
|
146
|
-
("longitude", lons),
|
|
147
|
-
("latitude", lats),
|
|
148
|
-
)
|
|
149
|
-
|
|
150
|
-
nwp_array_shape = tuple(len(coord_values) for _, coord_values in coords)
|
|
151
|
-
|
|
152
|
-
nwp_data = xr.DataArray(
|
|
153
|
-
np.random.uniform(0, 200, size=nwp_array_shape).astype(np.float32),
|
|
154
|
-
coords=coords,
|
|
155
|
-
)
|
|
156
|
-
return nwp_data.to_dataset(name="ECMWF_UK")
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
@pytest.fixture(scope="session")
|
|
160
|
-
def nwp_ecmwf_zarr_path(session_tmp_path, ds_nwp_ecmwf):
|
|
161
|
-
ds = ds_nwp_ecmwf.chunk(
|
|
162
|
-
{
|
|
163
|
-
"init_time": 1,
|
|
164
|
-
"step": -1,
|
|
165
|
-
"variable": -1,
|
|
166
|
-
"longitude": 50,
|
|
167
|
-
"latitude": 50,
|
|
168
|
-
}
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
zarr_path = session_tmp_path / "ukv_ecmwf.zarr"
|
|
172
|
-
ds.to_zarr(zarr_path)
|
|
173
|
-
yield zarr_path
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
@pytest.fixture(scope="session")
|
|
177
|
-
def ds_uk_gsp():
|
|
178
|
-
times = pd.date_range("2023-01-01 00:00", "2023-01-02 00:00", freq="30min")
|
|
179
|
-
gsp_ids = np.arange(0, 318)
|
|
180
|
-
capacity = np.ones((len(times), len(gsp_ids)))
|
|
181
|
-
generation = np.random.uniform(0, 200, size=(len(times), len(gsp_ids))).astype(np.float32)
|
|
182
|
-
|
|
183
|
-
coords = (
|
|
184
|
-
("datetime_gmt", times),
|
|
185
|
-
("gsp_id", gsp_ids),
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
da_cap = xr.DataArray(
|
|
189
|
-
capacity,
|
|
190
|
-
coords=coords,
|
|
191
|
-
)
|
|
192
|
-
|
|
193
|
-
da_gen = xr.DataArray(
|
|
194
|
-
generation,
|
|
195
|
-
coords=coords,
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
return xr.Dataset({
|
|
199
|
-
"capacity_mwp": da_cap,
|
|
200
|
-
"installedcapacity_mwp": da_cap,
|
|
201
|
-
"generation_mw":da_gen
|
|
202
|
-
})
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
@pytest.fixture(scope="session")
|
|
206
|
-
def data_sites(session_tmp_path) -> Site:
|
|
207
|
-
"""
|
|
208
|
-
Make fake data for sites
|
|
209
|
-
Returns: filename for netcdf file, and csv metadata
|
|
210
|
-
"""
|
|
211
|
-
times = pd.date_range("2023-01-01 00:00", "2023-01-02 00:00", freq="30min")
|
|
212
|
-
site_ids = list(range(0,10))
|
|
213
|
-
capacity_kwp_1d = np.array([0.1,1.1,4,6,8,9,15,2,3,4])
|
|
214
|
-
# these are quite specific for the fake satellite data
|
|
215
|
-
longitude = np.arange(-4, -3, 0.1)
|
|
216
|
-
latitude = np.arange(51, 52, 0.1)
|
|
217
|
-
|
|
218
|
-
generation = np.random.uniform(0, 200, size=(len(times), len(site_ids))).astype(np.float32)
|
|
219
|
-
|
|
220
|
-
# repeat capacity in new dims len(times) times
|
|
221
|
-
capacity_kwp = (np.tile(capacity_kwp_1d, len(times))).reshape(len(times),10)
|
|
222
|
-
|
|
223
|
-
coords = (
|
|
224
|
-
("time_utc", times),
|
|
225
|
-
("site_id", site_ids),
|
|
226
|
-
)
|
|
227
|
-
|
|
228
|
-
da_cap = xr.DataArray(
|
|
229
|
-
capacity_kwp,
|
|
230
|
-
coords=coords,
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
da_gen = xr.DataArray(
|
|
234
|
-
generation,
|
|
235
|
-
coords=coords,
|
|
236
|
-
)
|
|
237
|
-
|
|
238
|
-
# metadata
|
|
239
|
-
meta_df = pd.DataFrame(columns=[], data = [])
|
|
240
|
-
meta_df['site_id'] = site_ids
|
|
241
|
-
meta_df['capacity_kwp'] = capacity_kwp_1d
|
|
242
|
-
meta_df['longitude'] = longitude
|
|
243
|
-
meta_df['latitude'] = latitude
|
|
244
|
-
|
|
245
|
-
generation = xr.Dataset({
|
|
246
|
-
"capacity_kwp": da_cap,
|
|
247
|
-
"generation_kw": da_gen,
|
|
248
|
-
})
|
|
249
|
-
|
|
250
|
-
filename = f"{session_tmp_path}/sites.netcdf"
|
|
251
|
-
filename_csv = f"{session_tmp_path}/sites_metadata.csv"
|
|
252
|
-
generation.to_netcdf(filename)
|
|
253
|
-
meta_df.to_csv(filename_csv)
|
|
254
|
-
|
|
255
|
-
site = Site(
|
|
256
|
-
file_path=filename,
|
|
257
|
-
metadata_file_path=filename_csv,
|
|
258
|
-
interval_start_minutes=-30,
|
|
259
|
-
interval_end_minutes=60,
|
|
260
|
-
time_resolution_minutes=30,
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
yield site
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
@pytest.fixture(scope="session")
|
|
267
|
-
def uk_gsp_zarr_path(session_tmp_path, ds_uk_gsp):
|
|
268
|
-
zarr_path = session_tmp_path / "uk_gsp.zarr"
|
|
269
|
-
ds_uk_gsp.to_zarr(zarr_path)
|
|
270
|
-
yield zarr_path
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
@pytest.fixture()
|
|
274
|
-
def pvnet_config_filename(
|
|
275
|
-
tmp_path, config_filename, nwp_ukv_zarr_path, uk_gsp_zarr_path, sat_zarr_path
|
|
276
|
-
):
|
|
277
|
-
|
|
278
|
-
# adjust config to point to the zarr file
|
|
279
|
-
config = load_yaml_configuration(config_filename)
|
|
280
|
-
config.input_data.nwp["ukv"].zarr_path = nwp_ukv_zarr_path
|
|
281
|
-
config.input_data.satellite.zarr_path = sat_zarr_path
|
|
282
|
-
config.input_data.gsp.zarr_path = uk_gsp_zarr_path
|
|
283
|
-
|
|
284
|
-
filename = f"{tmp_path}/configuration.yaml"
|
|
285
|
-
save_yaml_configuration(config, filename)
|
|
286
|
-
return filename
|
tests/load/test_load_gsp.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from ocf_data_sampler.load.gsp import open_gsp
|
|
2
|
-
import xarray as xr
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def test_open_gsp(uk_gsp_zarr_path):
|
|
6
|
-
da = open_gsp(uk_gsp_zarr_path)
|
|
7
|
-
|
|
8
|
-
assert isinstance(da, xr.DataArray)
|
|
9
|
-
assert da.dims == ("time_utc", "gsp_id")
|
|
10
|
-
|
|
11
|
-
assert "nominal_capacity_mwp" in da.coords
|
|
12
|
-
assert "effective_capacity_mwp" in da.coords
|
|
13
|
-
assert "x_osgb" in da.coords
|
|
14
|
-
assert "y_osgb" in da.coords
|
|
15
|
-
assert da.shape == (49, 318)
|
tests/load/test_load_nwp.py
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
from xarray import DataArray
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
from ocf_data_sampler.load.nwp import open_nwp
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def test_load_ukv(nwp_ukv_zarr_path):
|
|
9
|
-
da = open_nwp(zarr_path=nwp_ukv_zarr_path, provider="ukv")
|
|
10
|
-
assert isinstance(da, DataArray)
|
|
11
|
-
assert da.dims == ("init_time_utc", "step", "channel", "x_osgb", "y_osgb")
|
|
12
|
-
assert da.shape == (24 * 7, 11, 4, 50, 100)
|
|
13
|
-
assert np.issubdtype(da.dtype, np.number)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def test_load_ecmwf(nwp_ecmwf_zarr_path):
|
|
17
|
-
da = open_nwp(zarr_path=nwp_ecmwf_zarr_path, provider="ecmwf")
|
|
18
|
-
assert isinstance(da, DataArray)
|
|
19
|
-
assert da.dims == ("init_time_utc", "step", "channel", "longitude", "latitude")
|
|
20
|
-
assert da.shape == (24 * 7, 15, 3, 15, 12)
|
|
21
|
-
assert np.issubdtype(da.dtype, np.number)
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from ocf_data_sampler.load.satellite import open_sat_data
|
|
2
|
-
import xarray as xr
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def test_open_satellite(sat_zarr_path):
|
|
7
|
-
da = open_sat_data(zarr_path=sat_zarr_path)
|
|
8
|
-
|
|
9
|
-
assert isinstance(da, xr.DataArray)
|
|
10
|
-
assert da.dims == ("time_utc", "channel", "x_geostationary", "y_geostationary")
|
|
11
|
-
# 288 is 1 days of data at 5 minutes intervals, 12 * 24
|
|
12
|
-
# There are 11 channels
|
|
13
|
-
# There are 100 x 100 pixels
|
|
14
|
-
assert da.shape == (288, 11, 100, 100)
|
|
15
|
-
assert np.issubdtype(da.dtype, np.number)
|
|
16
|
-
|
|
17
|
-
|
tests/load/test_load_sites.py
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
from ocf_data_sampler.load.site import open_site
|
|
2
|
-
import xarray as xr
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def test_open_site(data_sites):
|
|
6
|
-
da = open_site(data_sites.file_path, data_sites.metadata_file_path)
|
|
7
|
-
|
|
8
|
-
assert isinstance(da, xr.DataArray)
|
|
9
|
-
assert da.dims == ("time_utc", "site_id")
|
|
10
|
-
|
|
11
|
-
assert "capacity_kwp" in da.coords
|
|
12
|
-
assert "latitude" in da.coords
|
|
13
|
-
assert "longitude" in da.coords
|
|
14
|
-
assert da.shape == (49, 10)
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
from ocf_data_sampler.numpy_sample.collate import stack_np_samples_into_batch
|
|
2
|
-
from ocf_data_sampler.torch_datasets.datasets.pvnet_uk import PVNetUKRegionalDataset
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def test_stack_np_samples_into_batch(pvnet_config_filename):
|
|
6
|
-
|
|
7
|
-
# Create dataset object
|
|
8
|
-
dataset = PVNetUKRegionalDataset(pvnet_config_filename)
|
|
9
|
-
|
|
10
|
-
# Generate 2 samples
|
|
11
|
-
sample1 = dataset[0]
|
|
12
|
-
sample2 = dataset[1]
|
|
13
|
-
|
|
14
|
-
batch = stack_np_samples_into_batch([sample1, sample2])
|
|
15
|
-
|
|
16
|
-
assert isinstance(batch, dict)
|
|
17
|
-
assert "nwp" in batch
|
|
18
|
-
assert isinstance(batch["nwp"], dict)
|
|
19
|
-
assert "ukv" in batch["nwp"]
|
|
20
|
-
assert "gsp" in batch
|
|
21
|
-
assert "satellite_actual" in batch
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
from ocf_data_sampler.numpy_sample.datetime_features import make_datetime_numpy_dict
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def test_calculate_azimuth_and_elevation():
|
|
9
|
-
|
|
10
|
-
# Pick the day of the summer solstice
|
|
11
|
-
datetimes = pd.to_datetime(["2024-06-20 12:00", "2024-06-20 12:30", "2024-06-20 13:00"])
|
|
12
|
-
|
|
13
|
-
# Calculate sun angles
|
|
14
|
-
datetime_features = make_datetime_numpy_dict(datetimes)
|
|
15
|
-
|
|
16
|
-
assert len(datetime_features) == 4
|
|
17
|
-
|
|
18
|
-
assert len(datetime_features["wind_date_sin"]) == len(datetimes)
|
|
19
|
-
assert (datetime_features["wind_date_cos"] != datetime_features["wind_date_sin"]).all()
|
|
20
|
-
|
|
21
|
-
# assert all values are between -1 and 1
|
|
22
|
-
assert all(np.abs(datetime_features["wind_date_sin"]) <= 1)
|
|
23
|
-
assert all(np.abs(datetime_features["wind_date_cos"]) <= 1)
|
|
24
|
-
assert all(np.abs(datetime_features["wind_time_sin"]) <= 1)
|
|
25
|
-
assert all(np.abs(datetime_features["wind_time_cos"]) <= 1)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def test_make_datetime_numpy_batch_custom_key_prefix():
|
|
29
|
-
# Test function correctly applies custom prefix to dict keys
|
|
30
|
-
datetimes = pd.to_datetime(["2024-06-20 12:00", "2024-06-20 12:30", "2024-06-20 13:00"])
|
|
31
|
-
key_prefix = "solar"
|
|
32
|
-
|
|
33
|
-
datetime_features = make_datetime_numpy_dict(datetimes, key_prefix=key_prefix)
|
|
34
|
-
|
|
35
|
-
# Assert dict contains expected quantity of keys and verify starting with custom prefix
|
|
36
|
-
assert len(datetime_features) == 4
|
|
37
|
-
assert all(key.startswith(key_prefix) for key in datetime_features.keys())
|
tests/numpy_sample/test_gsp.py
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
from ocf_data_sampler.load.gsp import open_gsp
|
|
2
|
-
import numpy as np
|
|
3
|
-
|
|
4
|
-
from ocf_data_sampler.numpy_sample import convert_gsp_to_numpy_sample, GSPSampleKey
|
|
5
|
-
|
|
6
|
-
def test_convert_gsp_to_numpy_sample(uk_gsp_zarr_path):
|
|
7
|
-
|
|
8
|
-
da = (
|
|
9
|
-
open_gsp(uk_gsp_zarr_path)
|
|
10
|
-
.isel(time_utc=slice(0, 10))
|
|
11
|
-
.sel(gsp_id=1)
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
numpy_sample = convert_gsp_to_numpy_sample(da)
|
|
15
|
-
|
|
16
|
-
# Test data structure
|
|
17
|
-
assert isinstance(numpy_sample, dict), "Should be dict"
|
|
18
|
-
assert set(numpy_sample.keys()).issubset({
|
|
19
|
-
GSPSampleKey.gsp,
|
|
20
|
-
GSPSampleKey.nominal_capacity_mwp,
|
|
21
|
-
GSPSampleKey.effective_capacity_mwp,
|
|
22
|
-
GSPSampleKey.time_utc,
|
|
23
|
-
}), "Unexpected keys"
|
|
24
|
-
|
|
25
|
-
# Assert data content and capacity values
|
|
26
|
-
assert np.array_equal(numpy_sample[GSPSampleKey.gsp], da.values), "GSP values mismatch"
|
|
27
|
-
assert isinstance(numpy_sample[GSPSampleKey.time_utc], np.ndarray), "Time UTC should be numpy array"
|
|
28
|
-
assert numpy_sample[GSPSampleKey.time_utc].dtype == float, "Time UTC should be float type"
|
|
29
|
-
assert numpy_sample[GSPSampleKey.nominal_capacity_mwp] == da.isel(time_utc=0)["nominal_capacity_mwp"].values
|
|
30
|
-
assert numpy_sample[GSPSampleKey.effective_capacity_mwp] == da.isel(time_utc=0)["effective_capacity_mwp"].values
|
|
31
|
-
|
|
32
|
-
# Test with t0_idx
|
|
33
|
-
t0_idx = 5
|
|
34
|
-
numpy_sample_with_t0 = convert_gsp_to_numpy_sample(da, t0_idx=t0_idx)
|
|
35
|
-
assert numpy_sample_with_t0[GSPSampleKey.t0_idx] == t0_idx, "t0_idx not correctly set"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
tests/numpy_sample/test_nwp.py
DELETED
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import xarray as xr
|
|
4
|
-
|
|
5
|
-
import pytest
|
|
6
|
-
|
|
7
|
-
from ocf_data_sampler.numpy_sample import convert_nwp_to_numpy_sample, NWPSampleKey
|
|
8
|
-
|
|
9
|
-
@pytest.fixture(scope="module")
|
|
10
|
-
def da_nwp_like():
|
|
11
|
-
"""Create dummy data which looks like time-sliced NWP data"""
|
|
12
|
-
|
|
13
|
-
t0 = pd.to_datetime("2024-01-02 00:00")
|
|
14
|
-
|
|
15
|
-
x = np.arange(-100, 100, 10)
|
|
16
|
-
y = np.arange(-100, 100, 10)
|
|
17
|
-
steps = pd.timedelta_range("0h", "8h", freq="1h")
|
|
18
|
-
target_times = t0 + steps
|
|
19
|
-
|
|
20
|
-
channels = ["t", "dswrf"]
|
|
21
|
-
init_times = pd.to_datetime([t0]*len(steps))
|
|
22
|
-
|
|
23
|
-
# Create dummy time-sliced NWP data
|
|
24
|
-
da_nwp = xr.DataArray(
|
|
25
|
-
np.random.normal(size=(len(target_times), len(channels), len(x), len(y))),
|
|
26
|
-
coords=dict(
|
|
27
|
-
target_times_utc=(["target_times_utc"], target_times),
|
|
28
|
-
channel=(["channel"], channels),
|
|
29
|
-
x_osgb=(["x_osgb"], x),
|
|
30
|
-
y_osgb=(["y_osgb"], y),
|
|
31
|
-
)
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
# Add extra non-coordinate dimensions
|
|
35
|
-
da_nwp = da_nwp.assign_coords(
|
|
36
|
-
init_time_utc=("target_times_utc", init_times),
|
|
37
|
-
step=("target_times_utc", steps),
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
return da_nwp
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def test_convert_nwp_to_numpy_sample(da_nwp_like):
|
|
44
|
-
|
|
45
|
-
# Call the function
|
|
46
|
-
numpy_sample = convert_nwp_to_numpy_sample(da_nwp_like)
|
|
47
|
-
|
|
48
|
-
# Assert the output type
|
|
49
|
-
assert isinstance(numpy_sample, dict)
|
|
50
|
-
|
|
51
|
-
# Assert the shape of the numpy sample
|
|
52
|
-
assert (numpy_sample[NWPSampleKey.nwp] == da_nwp_like.values).all()
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
import numpy as np
|
|
3
|
-
import pandas as pd
|
|
4
|
-
import xarray as xr
|
|
5
|
-
|
|
6
|
-
import pytest
|
|
7
|
-
|
|
8
|
-
from ocf_data_sampler.numpy_sample import convert_satellite_to_numpy_sample, SatelliteSampleKey
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@pytest.fixture(scope="module")
|
|
12
|
-
def da_sat_like():
|
|
13
|
-
"""Create dummy data which looks like satellite data"""
|
|
14
|
-
x = np.arange(-100, 100, 10)
|
|
15
|
-
y = np.arange(-100, 100, 10)
|
|
16
|
-
datetimes = pd.date_range("2024-01-01 12:00", "2024-01-01 12:30", freq="5min")
|
|
17
|
-
channels = ["VIS008", "IR016"]
|
|
18
|
-
|
|
19
|
-
da_sat = xr.DataArray(
|
|
20
|
-
np.random.normal(size=(len(datetimes), len(channels), len(x), len(y))),
|
|
21
|
-
coords=dict(
|
|
22
|
-
time_utc=(["time_utc"], datetimes),
|
|
23
|
-
channel=(["channel"], channels),
|
|
24
|
-
x_geostationary=(["x_geostationary"], x),
|
|
25
|
-
y_geostationary=(["y_geostationary"], y),
|
|
26
|
-
)
|
|
27
|
-
)
|
|
28
|
-
return da_sat
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def test_convert_satellite_to_numpy_sample(da_sat_like):
|
|
32
|
-
|
|
33
|
-
# Call the function
|
|
34
|
-
numpy_sample = convert_satellite_to_numpy_sample(da_sat_like)
|
|
35
|
-
|
|
36
|
-
# Assert the output type
|
|
37
|
-
assert isinstance(numpy_sample, dict)
|
|
38
|
-
|
|
39
|
-
# Assert the shape of the numpy sample
|
|
40
|
-
assert (numpy_sample[SatelliteSampleKey.satellite_actual] == da_sat_like.values).all()
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
from ocf_data_sampler.numpy_sample.sun_position import (
|
|
6
|
-
calculate_azimuth_and_elevation, make_sun_position_numpy_sample
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
from ocf_data_sampler.numpy_sample import GSPSampleKey
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
@pytest.mark.parametrize("lat", [0, 5, 10, 23.5])
|
|
13
|
-
def test_calculate_azimuth_and_elevation(lat):
|
|
14
|
-
|
|
15
|
-
# Pick the day of the summer solstice
|
|
16
|
-
datetimes = pd.to_datetime(["2024-06-20 12:00"])
|
|
17
|
-
|
|
18
|
-
# Calculate sun angles
|
|
19
|
-
azimuth, elevation = calculate_azimuth_and_elevation(datetimes, lon=0, lat=lat)
|
|
20
|
-
|
|
21
|
-
assert len(azimuth)==len(datetimes)
|
|
22
|
-
assert len(elevation)==len(datetimes)
|
|
23
|
-
|
|
24
|
-
# elevation should be close to (90 - (23.5-lat) degrees
|
|
25
|
-
assert np.abs(elevation - (90-23.5+lat)) < 1
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def test_calculate_azimuth_and_elevation_random():
|
|
29
|
-
"""Test that the function produces the expected range of azimuths and elevations"""
|
|
30
|
-
|
|
31
|
-
# Set seed so we know the test should pass
|
|
32
|
-
np.random.seed(0)
|
|
33
|
-
|
|
34
|
-
# Pick the day of the summer solstice
|
|
35
|
-
datetimes = pd.to_datetime(["2024-06-20 12:00"])
|
|
36
|
-
|
|
37
|
-
# Pick 100 random locations and measure their azimuth and elevations
|
|
38
|
-
azimuths = []
|
|
39
|
-
elevations = []
|
|
40
|
-
|
|
41
|
-
for _ in range(100):
|
|
42
|
-
|
|
43
|
-
lon = np.random.uniform(low=0, high=360)
|
|
44
|
-
lat = np.random.uniform(low=-90, high=90)
|
|
45
|
-
|
|
46
|
-
# Calculate sun angles
|
|
47
|
-
azimuth, elevation = calculate_azimuth_and_elevation(datetimes, lon=lon, lat=lat)
|
|
48
|
-
|
|
49
|
-
azimuths.append(azimuth.item())
|
|
50
|
-
elevations.append(elevation.item())
|
|
51
|
-
|
|
52
|
-
azimuths = np.array(azimuths)
|
|
53
|
-
elevations = np.array(elevations)
|
|
54
|
-
|
|
55
|
-
assert (0<=azimuths).all() and (azimuths<=360).all()
|
|
56
|
-
assert (-90<=elevations).all() and (elevations<=90).all()
|
|
57
|
-
|
|
58
|
-
# Azimuth range is [0, 360]
|
|
59
|
-
assert azimuths.min() < 30
|
|
60
|
-
assert azimuths.max() > 330
|
|
61
|
-
|
|
62
|
-
# Elevation range is [-90, 90]
|
|
63
|
-
assert elevations.min() < -70
|
|
64
|
-
assert elevations.max() > 70
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def test_make_sun_position_numpy_sample():
|
|
68
|
-
|
|
69
|
-
datetimes = pd.date_range("2024-06-20 12:00", "2024-06-20 16:00", freq="30min")
|
|
70
|
-
lon, lat = 0, 51.5
|
|
71
|
-
|
|
72
|
-
sample = make_sun_position_numpy_sample(datetimes, lon, lat, key_prefix="gsp")
|
|
73
|
-
|
|
74
|
-
assert GSPSampleKey.solar_elevation in sample
|
|
75
|
-
assert GSPSampleKey.solar_azimuth in sample
|
|
76
|
-
|
|
77
|
-
# The solar coords are normalised in the function
|
|
78
|
-
assert (sample[GSPSampleKey.solar_elevation]>=0).all()
|
|
79
|
-
assert (sample[GSPSampleKey.solar_elevation]<=1).all()
|
|
80
|
-
assert (sample[GSPSampleKey.solar_azimuth]>=0).all()
|
|
81
|
-
assert (sample[GSPSampleKey.solar_azimuth]<=1).all()
|