ocf-data-sampler 0.0.19__py3-none-any.whl → 0.0.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- ocf_data_sampler/config/__init__.py +5 -0
- ocf_data_sampler/config/load.py +33 -0
- ocf_data_sampler/config/model.py +249 -0
- ocf_data_sampler/config/save.py +36 -0
- ocf_data_sampler/constants.py +135 -0
- ocf_data_sampler/numpy_batch/gsp.py +21 -8
- ocf_data_sampler/numpy_batch/nwp.py +13 -3
- ocf_data_sampler/numpy_batch/satellite.py +15 -8
- ocf_data_sampler/numpy_batch/sun_position.py +5 -6
- ocf_data_sampler/select/dropout.py +2 -2
- ocf_data_sampler/select/geospatial.py +118 -0
- ocf_data_sampler/select/location.py +62 -0
- ocf_data_sampler/select/select_spatial_slice.py +5 -14
- ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +16 -20
- ocf_data_sampler-0.0.22.dist-info/METADATA +88 -0
- ocf_data_sampler-0.0.22.dist-info/RECORD +54 -0
- {ocf_data_sampler-0.0.19.dist-info → ocf_data_sampler-0.0.22.dist-info}/WHEEL +1 -1
- tests/config/test_config.py +152 -0
- tests/conftest.py +6 -1
- tests/load/test_load_gsp.py +15 -0
- tests/load/test_load_nwp.py +21 -0
- tests/load/test_load_satellite.py +17 -0
- tests/numpy_batch/test_gsp.py +22 -0
- tests/numpy_batch/test_nwp.py +54 -0
- tests/numpy_batch/test_satellite.py +42 -0
- tests/numpy_batch/test_sun_position.py +81 -0
- tests/select/test_dropout.py +75 -0
- tests/select/test_fill_time_periods.py +28 -0
- tests/select/test_find_contiguous_time_periods.py +202 -0
- tests/select/test_location.py +67 -0
- tests/select/test_select_spatial_slice.py +154 -0
- tests/select/test_select_time_slice.py +284 -0
- tests/torch_datasets/test_pvnet_uk_regional.py +74 -0
- ocf_data_sampler-0.0.19.dist-info/METADATA +0 -22
- ocf_data_sampler-0.0.19.dist-info/RECORD +0 -32
- {ocf_data_sampler-0.0.19.dist-info → ocf_data_sampler-0.0.22.dist-info}/LICENSE +0 -0
- {ocf_data_sampler-0.0.19.dist-info → ocf_data_sampler-0.0.22.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from xarray import DataArray
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from ocf_data_sampler.load.nwp import open_nwp
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_load_ukv(nwp_ukv_zarr_path):
|
|
9
|
+
da = open_nwp(zarr_path=nwp_ukv_zarr_path, provider="ukv")
|
|
10
|
+
assert isinstance(da, DataArray)
|
|
11
|
+
assert da.dims == ("init_time_utc", "step", "channel", "x_osgb", "y_osgb")
|
|
12
|
+
assert da.shape == (24 * 7, 11, 4, 50, 100)
|
|
13
|
+
assert np.issubdtype(da.dtype, np.number)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_load_ecmwf(nwp_ecmwf_zarr_path):
|
|
17
|
+
da = open_nwp(zarr_path=nwp_ecmwf_zarr_path, provider="ecmwf")
|
|
18
|
+
assert isinstance(da, DataArray)
|
|
19
|
+
assert da.dims == ("init_time_utc", "step", "channel", "longitude", "latitude")
|
|
20
|
+
assert da.shape == (24 * 7, 15, 3, 15, 12)
|
|
21
|
+
assert np.issubdtype(da.dtype, np.number)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from ocf_data_sampler.load.satellite import open_sat_data
|
|
2
|
+
import xarray as xr
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_open_satellite(sat_zarr_path):
|
|
7
|
+
da = open_sat_data(zarr_path=sat_zarr_path)
|
|
8
|
+
|
|
9
|
+
assert isinstance(da, xr.DataArray)
|
|
10
|
+
assert da.dims == ("time_utc", "channel", "x_geostationary", "y_geostationary")
|
|
11
|
+
# 576 is 2 days of data at 5 minutes intervals, 12 * 24 * 2
|
|
12
|
+
# There are 11 channels
|
|
13
|
+
# There are 49 x 20 pixels
|
|
14
|
+
assert da.shape == (576, 11, 49, 20)
|
|
15
|
+
assert np.issubdtype(da.dtype, np.number)
|
|
16
|
+
|
|
17
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from ocf_data_sampler.load.gsp import open_gsp
|
|
2
|
+
|
|
3
|
+
from ocf_data_sampler.numpy_batch import convert_gsp_to_numpy_batch
|
|
4
|
+
from ocf_data_sampler.numpy_batch.gsp import GSPBatchKey
|
|
5
|
+
|
|
6
|
+
def test_convert_gsp_to_numpy_batch(uk_gsp_zarr_path):
|
|
7
|
+
|
|
8
|
+
da = (
|
|
9
|
+
open_gsp(uk_gsp_zarr_path)
|
|
10
|
+
.isel(time_utc=slice(0, 10))
|
|
11
|
+
.sel(gsp_id=1)
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# Call the function
|
|
15
|
+
numpy_batch = convert_gsp_to_numpy_batch(da)
|
|
16
|
+
|
|
17
|
+
# Assert the output type
|
|
18
|
+
assert isinstance(numpy_batch, dict)
|
|
19
|
+
|
|
20
|
+
# Assert the shape of the numpy batch
|
|
21
|
+
assert (numpy_batch[GSPBatchKey.gsp] == da.values).all()
|
|
22
|
+
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import xarray as xr
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from ocf_data_sampler.numpy_batch import convert_nwp_to_numpy_batch
|
|
8
|
+
|
|
9
|
+
from ocf_data_sampler.numpy_batch.nwp import NWPBatchKey
|
|
10
|
+
|
|
11
|
+
@pytest.fixture(scope="module")
|
|
12
|
+
def da_nwp_like():
|
|
13
|
+
"""Create dummy data which looks like time-sliced NWP data"""
|
|
14
|
+
|
|
15
|
+
t0 = pd.to_datetime("2024-01-02 00:00")
|
|
16
|
+
|
|
17
|
+
x = np.arange(-100, 100, 10)
|
|
18
|
+
y = np.arange(-100, 100, 10)
|
|
19
|
+
steps = pd.timedelta_range("0H", "8H", freq="1H")
|
|
20
|
+
target_times = t0 + steps
|
|
21
|
+
|
|
22
|
+
channels = ["t", "dswrf"]
|
|
23
|
+
init_times = pd.to_datetime([t0]*len(steps))
|
|
24
|
+
|
|
25
|
+
# Create dummy time-sliced NWP data
|
|
26
|
+
da_nwp = xr.DataArray(
|
|
27
|
+
np.random.normal(size=(len(target_times), len(channels), len(x), len(y))),
|
|
28
|
+
coords=dict(
|
|
29
|
+
target_times_utc=(["target_times_utc"], target_times),
|
|
30
|
+
channel=(["channel"], channels),
|
|
31
|
+
x_osgb=(["x_osgb"], x),
|
|
32
|
+
y_osgb=(["y_osgb"], y),
|
|
33
|
+
)
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Add extra non-coordinate dimensions
|
|
37
|
+
da_nwp = da_nwp.assign_coords(
|
|
38
|
+
init_time_utc=("target_times_utc", init_times),
|
|
39
|
+
step=("target_times_utc", steps),
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
return da_nwp
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_convert_nwp_to_numpy_batch(da_nwp_like):
|
|
46
|
+
|
|
47
|
+
# Call the function
|
|
48
|
+
numpy_batch = convert_nwp_to_numpy_batch(da_nwp_like)
|
|
49
|
+
|
|
50
|
+
# Assert the output type
|
|
51
|
+
assert isinstance(numpy_batch, dict)
|
|
52
|
+
|
|
53
|
+
# Assert the shape of the numpy batch
|
|
54
|
+
assert (numpy_batch[NWPBatchKey.nwp] == da_nwp_like.values).all()
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import xarray as xr
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from ocf_data_sampler.numpy_batch import convert_satellite_to_numpy_batch
|
|
9
|
+
|
|
10
|
+
from ocf_data_sampler.numpy_batch.satellite import SatelliteBatchKey
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@pytest.fixture(scope="module")
|
|
14
|
+
def da_sat_like():
|
|
15
|
+
"""Create dummy data which looks like satellite data"""
|
|
16
|
+
x = np.arange(-100, 100, 10)
|
|
17
|
+
y = np.arange(-100, 100, 10)
|
|
18
|
+
datetimes = pd.date_range("2024-01-01 12:00", "2024-01-01 12:30", freq="5min")
|
|
19
|
+
channels = ["VIS008", "IR016"]
|
|
20
|
+
|
|
21
|
+
da_sat = xr.DataArray(
|
|
22
|
+
np.random.normal(size=(len(datetimes), len(channels), len(x), len(y))),
|
|
23
|
+
coords=dict(
|
|
24
|
+
time_utc=(["time_utc"], datetimes),
|
|
25
|
+
channel=(["channel"], channels),
|
|
26
|
+
x_geostationary=(["x_geostationary"], x),
|
|
27
|
+
y_geostationary=(["y_geostationary"], y),
|
|
28
|
+
)
|
|
29
|
+
)
|
|
30
|
+
return da_sat
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_convert_satellite_to_numpy_batch(da_sat_like):
|
|
34
|
+
|
|
35
|
+
# Call the function
|
|
36
|
+
numpy_batch = convert_satellite_to_numpy_batch(da_sat_like)
|
|
37
|
+
|
|
38
|
+
# Assert the output type
|
|
39
|
+
assert isinstance(numpy_batch, dict)
|
|
40
|
+
|
|
41
|
+
# Assert the shape of the numpy batch
|
|
42
|
+
assert (numpy_batch[SatelliteBatchKey.satellite_actual] == da_sat_like.values).all()
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from ocf_data_sampler.numpy_batch.sun_position import (
|
|
6
|
+
calculate_azimuth_and_elevation, make_sun_position_numpy_batch
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
from ocf_data_sampler.numpy_batch.gsp import GSPBatchKey
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pytest.mark.parametrize("lat", [0, 5, 10, 23.5])
|
|
13
|
+
def test_calculate_azimuth_and_elevation(lat):
|
|
14
|
+
|
|
15
|
+
# Pick the day of the summer solstice
|
|
16
|
+
datetimes = pd.to_datetime(["2024-06-20 12:00"])
|
|
17
|
+
|
|
18
|
+
# Calculate sun angles
|
|
19
|
+
azimuth, elevation = calculate_azimuth_and_elevation(datetimes, lon=0, lat=lat)
|
|
20
|
+
|
|
21
|
+
assert len(azimuth)==len(datetimes)
|
|
22
|
+
assert len(elevation)==len(datetimes)
|
|
23
|
+
|
|
24
|
+
# elevation should be close to (90 - (23.5-lat) degrees
|
|
25
|
+
assert np.abs(elevation - (90-23.5+lat)) < 1
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_calculate_azimuth_and_elevation_random():
|
|
29
|
+
"""Test that the function produces the expected range of azimuths and elevations"""
|
|
30
|
+
|
|
31
|
+
# Set seed so we know the test should pass
|
|
32
|
+
np.random.seed(0)
|
|
33
|
+
|
|
34
|
+
# Pick the day of the summer solstice
|
|
35
|
+
datetimes = pd.to_datetime(["2024-06-20 12:00"])
|
|
36
|
+
|
|
37
|
+
# Pick 100 random locations and measure their azimuth and elevations
|
|
38
|
+
azimuths = []
|
|
39
|
+
elevations = []
|
|
40
|
+
|
|
41
|
+
for _ in range(100):
|
|
42
|
+
|
|
43
|
+
lon = np.random.uniform(low=0, high=360)
|
|
44
|
+
lat = np.random.uniform(low=-90, high=90)
|
|
45
|
+
|
|
46
|
+
# Calculate sun angles
|
|
47
|
+
azimuth, elevation = calculate_azimuth_and_elevation(datetimes, lon=lon, lat=lat)
|
|
48
|
+
|
|
49
|
+
azimuths.append(azimuth.item())
|
|
50
|
+
elevations.append(elevation.item())
|
|
51
|
+
|
|
52
|
+
azimuths = np.array(azimuths)
|
|
53
|
+
elevations = np.array(elevations)
|
|
54
|
+
|
|
55
|
+
assert (0<=azimuths).all() and (azimuths<=360).all()
|
|
56
|
+
assert (-90<=elevations).all() and (elevations<=90).all()
|
|
57
|
+
|
|
58
|
+
# Azimuth range is [0, 360]
|
|
59
|
+
assert azimuths.min() < 30
|
|
60
|
+
assert azimuths.max() > 330
|
|
61
|
+
|
|
62
|
+
# Elevation range is [-90, 90]
|
|
63
|
+
assert elevations.min() < -70
|
|
64
|
+
assert elevations.max() > 70
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_make_sun_position_numpy_batch():
|
|
68
|
+
|
|
69
|
+
datetimes = pd.date_range("2024-06-20 12:00", "2024-06-20 16:00", freq="30min")
|
|
70
|
+
lon, lat = 0, 51.5
|
|
71
|
+
|
|
72
|
+
batch = make_sun_position_numpy_batch(datetimes, lon, lat, key_prefix="gsp")
|
|
73
|
+
|
|
74
|
+
assert GSPBatchKey.gsp_solar_elevation in batch
|
|
75
|
+
assert GSPBatchKey.gsp_solar_azimuth in batch
|
|
76
|
+
|
|
77
|
+
# The solar coords are normalised in the function
|
|
78
|
+
assert (batch[GSPBatchKey.gsp_solar_elevation]>=0).all()
|
|
79
|
+
assert (batch[GSPBatchKey.gsp_solar_elevation]<=1).all()
|
|
80
|
+
assert (batch[GSPBatchKey.gsp_solar_azimuth]>=0).all()
|
|
81
|
+
assert (batch[GSPBatchKey.gsp_solar_azimuth]<=1).all()
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from ocf_data_sampler.select.dropout import draw_dropout_time, apply_dropout_time
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import xarray as xr
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture(scope="module")
|
|
11
|
+
def da_sample():
|
|
12
|
+
"""Create dummy data which looks like satellite data"""
|
|
13
|
+
|
|
14
|
+
datetimes = pd.date_range("2024-01-01 12:00", "2024-01-01 13:00", freq="5min")
|
|
15
|
+
|
|
16
|
+
da_sat = xr.DataArray(
|
|
17
|
+
np.random.normal(size=(len(datetimes),)),
|
|
18
|
+
coords=dict(
|
|
19
|
+
time_utc=(["time_utc"], datetimes),
|
|
20
|
+
)
|
|
21
|
+
)
|
|
22
|
+
return da_sat
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_draw_dropout_time():
|
|
26
|
+
t0 = pd.Timestamp("2021-01-01 04:00:00")
|
|
27
|
+
|
|
28
|
+
dropout_timedeltas = pd.to_timedelta([-30, -60], unit="min")
|
|
29
|
+
dropout_time = draw_dropout_time(t0, dropout_timedeltas, dropout_frac=1)
|
|
30
|
+
|
|
31
|
+
assert isinstance(dropout_time, pd.Timestamp)
|
|
32
|
+
assert dropout_time-t0 in dropout_timedeltas
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_draw_dropout_time_partial():
|
|
36
|
+
t0 = pd.Timestamp("2021-01-01 04:00:00")
|
|
37
|
+
|
|
38
|
+
dropout_timedeltas = pd.to_timedelta([-30, -60], unit="min")
|
|
39
|
+
|
|
40
|
+
dropouts = set()
|
|
41
|
+
|
|
42
|
+
# Loop over 1000 to have very high probability of seeing all dropouts
|
|
43
|
+
# The chances of this failing by chance are approx ((2/3)^100)*3 = 7e-18
|
|
44
|
+
for _ in range(100):
|
|
45
|
+
dropouts.add(draw_dropout_time(t0, dropout_timedeltas, dropout_frac=2/3))
|
|
46
|
+
|
|
47
|
+
# Check all expected dropouts are present
|
|
48
|
+
dropouts == {None} | set(t0 + dt for dt in dropout_timedeltas)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_draw_dropout_time_none():
|
|
52
|
+
t0 = pd.Timestamp("2021-01-01 04:00:00")
|
|
53
|
+
|
|
54
|
+
# No dropout timedeltas
|
|
55
|
+
dropout_time = draw_dropout_time(t0, dropout_timedeltas=None, dropout_frac=1)
|
|
56
|
+
assert dropout_time is None
|
|
57
|
+
|
|
58
|
+
# Dropout fraction is 0
|
|
59
|
+
dropout_timedeltas = [pd.Timedelta(-30, "min")]
|
|
60
|
+
dropout_time = draw_dropout_time(t0, dropout_timedeltas=dropout_timedeltas, dropout_frac=0)
|
|
61
|
+
assert dropout_time is None
|
|
62
|
+
|
|
63
|
+
# No dropout timedeltas and dropout fraction is 0
|
|
64
|
+
dropout_time = draw_dropout_time(t0, dropout_timedeltas=None, dropout_frac=0)
|
|
65
|
+
assert dropout_time is None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@pytest.mark.parametrize("t0_str", ["12:00", "12:30", "13:00"])
|
|
69
|
+
def test_apply_dropout_time(da_sample, t0_str):
|
|
70
|
+
dropout_time = pd.Timestamp(f"2024-01-01 {t0_str}")
|
|
71
|
+
|
|
72
|
+
da_dropout = apply_dropout_time(da_sample, dropout_time)
|
|
73
|
+
|
|
74
|
+
assert da_dropout.sel(time_utc=slice(None, dropout_time)).notnull().all()
|
|
75
|
+
assert da_dropout.sel(time_utc=slice(dropout_time+pd.Timedelta(5, "min"), None)).isnull().all()
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from ocf_data_sampler.select.fill_time_periods import fill_time_periods
|
|
4
|
+
|
|
5
|
+
def test_fill_time_periods():
|
|
6
|
+
time_periods = pd.DataFrame(
|
|
7
|
+
{
|
|
8
|
+
"start_dt": [
|
|
9
|
+
"2021-01-01 04:10:00", "2021-01-01 09:00:00",
|
|
10
|
+
"2021-01-01 09:15:00", "2021-01-01 12:00:00"
|
|
11
|
+
],
|
|
12
|
+
"end_dt": [
|
|
13
|
+
"2021-01-01 06:00:00", "2021-01-01 09:00:00",
|
|
14
|
+
"2021-01-01 09:20:00", "2021-01-01 14:45:00"
|
|
15
|
+
],
|
|
16
|
+
}
|
|
17
|
+
)
|
|
18
|
+
freq = pd.Timedelta("30min")
|
|
19
|
+
filled_time_periods = fill_time_periods(time_periods, freq)
|
|
20
|
+
|
|
21
|
+
expected_times = [
|
|
22
|
+
"04:30", "05:00", "05:30", "06:00", "09:00", "12:00",
|
|
23
|
+
"12:30", "13:00", "13:30", "14:00", "14:30"
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
expected_times = pd.DatetimeIndex([f"2021-01-01 {t}" for t in expected_times])
|
|
27
|
+
|
|
28
|
+
pd.testing.assert_index_equal(filled_time_periods, expected_times)
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from ocf_data_sampler.select.find_contiguous_time_periods import (
|
|
4
|
+
find_contiguous_t0_periods, find_contiguous_t0_periods_nwp,
|
|
5
|
+
intersection_of_multiple_dataframes_of_periods,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_find_contiguous_t0_periods():
|
|
11
|
+
|
|
12
|
+
# Create 5-minutely data timestamps
|
|
13
|
+
freq = pd.Timedelta(5, "min")
|
|
14
|
+
history_duration = pd.Timedelta(60, "min")
|
|
15
|
+
forecast_duration = pd.Timedelta(15, "min")
|
|
16
|
+
|
|
17
|
+
datetimes = (
|
|
18
|
+
pd.date_range("2023-01-01 12:00", "2023-01-01 17:00", freq=freq)
|
|
19
|
+
.delete([5, 6, 30])
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
periods = find_contiguous_t0_periods(
|
|
23
|
+
datetimes=datetimes,
|
|
24
|
+
history_duration=history_duration,
|
|
25
|
+
forecast_duration=forecast_duration,
|
|
26
|
+
sample_period_duration=freq,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
expected_results = pd.DataFrame(
|
|
30
|
+
{
|
|
31
|
+
"start_dt": pd.to_datetime(
|
|
32
|
+
[
|
|
33
|
+
"2023-01-01 13:35",
|
|
34
|
+
"2023-01-01 15:35",
|
|
35
|
+
]
|
|
36
|
+
),
|
|
37
|
+
"end_dt": pd.to_datetime(
|
|
38
|
+
[
|
|
39
|
+
"2023-01-01 14:10",
|
|
40
|
+
"2023-01-01 16:45",
|
|
41
|
+
]
|
|
42
|
+
),
|
|
43
|
+
},
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
assert periods.equals(expected_results)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_find_contiguous_t0_periods_nwp():
|
|
50
|
+
|
|
51
|
+
# These are the expected results of the test
|
|
52
|
+
expected_results = [
|
|
53
|
+
pd.DataFrame(
|
|
54
|
+
{
|
|
55
|
+
"start_dt": pd.to_datetime(["2023-01-01 03:00", "2023-01-02 03:00"]),
|
|
56
|
+
"end_dt": pd.to_datetime(["2023-01-01 21:00", "2023-01-03 06:00"]),
|
|
57
|
+
},
|
|
58
|
+
),
|
|
59
|
+
pd.DataFrame(
|
|
60
|
+
{
|
|
61
|
+
"start_dt": pd.to_datetime(
|
|
62
|
+
[
|
|
63
|
+
"2023-01-01 05:00",
|
|
64
|
+
"2023-01-02 05:00",
|
|
65
|
+
]
|
|
66
|
+
),
|
|
67
|
+
"end_dt": pd.to_datetime(
|
|
68
|
+
[
|
|
69
|
+
"2023-01-01 21:00",
|
|
70
|
+
"2023-01-03 06:00",
|
|
71
|
+
]
|
|
72
|
+
),
|
|
73
|
+
},
|
|
74
|
+
),
|
|
75
|
+
pd.DataFrame(
|
|
76
|
+
{
|
|
77
|
+
"start_dt": pd.to_datetime(
|
|
78
|
+
[
|
|
79
|
+
"2023-01-01 05:00",
|
|
80
|
+
"2023-01-02 05:00",
|
|
81
|
+
"2023-01-02 14:00",
|
|
82
|
+
]
|
|
83
|
+
),
|
|
84
|
+
"end_dt": pd.to_datetime(
|
|
85
|
+
[
|
|
86
|
+
"2023-01-01 18:00",
|
|
87
|
+
"2023-01-02 09:00",
|
|
88
|
+
"2023-01-03 03:00",
|
|
89
|
+
]
|
|
90
|
+
),
|
|
91
|
+
},
|
|
92
|
+
),
|
|
93
|
+
pd.DataFrame(
|
|
94
|
+
{
|
|
95
|
+
"start_dt": pd.to_datetime(
|
|
96
|
+
[
|
|
97
|
+
"2023-01-01 05:00",
|
|
98
|
+
"2023-01-01 11:00",
|
|
99
|
+
"2023-01-02 05:00",
|
|
100
|
+
"2023-01-02 14:00",
|
|
101
|
+
]
|
|
102
|
+
),
|
|
103
|
+
"end_dt": pd.to_datetime(
|
|
104
|
+
[
|
|
105
|
+
"2023-01-01 06:00",
|
|
106
|
+
"2023-01-01 15:00",
|
|
107
|
+
"2023-01-02 06:00",
|
|
108
|
+
"2023-01-03 00:00",
|
|
109
|
+
]
|
|
110
|
+
),
|
|
111
|
+
},
|
|
112
|
+
),
|
|
113
|
+
pd.DataFrame(
|
|
114
|
+
{
|
|
115
|
+
"start_dt": pd.to_datetime(
|
|
116
|
+
[
|
|
117
|
+
"2023-01-01 06:00",
|
|
118
|
+
"2023-01-01 12:00",
|
|
119
|
+
"2023-01-02 06:00",
|
|
120
|
+
"2023-01-02 15:00",
|
|
121
|
+
]
|
|
122
|
+
),
|
|
123
|
+
"end_dt": pd.to_datetime(
|
|
124
|
+
[
|
|
125
|
+
"2023-01-01 09:00",
|
|
126
|
+
"2023-01-01 18:00",
|
|
127
|
+
"2023-01-02 09:00",
|
|
128
|
+
"2023-01-03 03:00",
|
|
129
|
+
]
|
|
130
|
+
),
|
|
131
|
+
},
|
|
132
|
+
),
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
# Create 3-hourly init times with a few time stamps missing
|
|
136
|
+
freq = pd.Timedelta(3, "h")
|
|
137
|
+
|
|
138
|
+
datetimes = (
|
|
139
|
+
pd.date_range("2023-01-01 03:00", "2023-01-02 21:00", freq=freq)
|
|
140
|
+
.delete([1, 4, 5, 6, 7, 9, 10])
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Choose some history durations and max stalenesses
|
|
144
|
+
history_durations_hr = [0, 2, 2, 2, 2]
|
|
145
|
+
max_stalenesses_hr = [9, 9, 6, 3, 6]
|
|
146
|
+
max_dropouts_hr = [0, 0, 0, 0, 3]
|
|
147
|
+
|
|
148
|
+
for i in range(len(expected_results)):
|
|
149
|
+
history_duration = pd.Timedelta(history_durations_hr[i], "h")
|
|
150
|
+
max_staleness = pd.Timedelta(max_stalenesses_hr[i], "h")
|
|
151
|
+
max_dropout = pd.Timedelta(max_dropouts_hr[i], "h")
|
|
152
|
+
|
|
153
|
+
time_periods = find_contiguous_t0_periods_nwp(
|
|
154
|
+
datetimes=datetimes,
|
|
155
|
+
history_duration=history_duration,
|
|
156
|
+
max_staleness=max_staleness,
|
|
157
|
+
max_dropout=max_dropout,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Check if results are as expected
|
|
161
|
+
assert time_periods.equals(expected_results[i])
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def test_intersection_of_multiple_dataframes_of_periods():
|
|
165
|
+
periods_1 = pd.DataFrame(
|
|
166
|
+
{
|
|
167
|
+
"start_dt": pd.to_datetime(["2023-01-01 05:00", "2023-01-01 14:10"]),
|
|
168
|
+
"end_dt": pd.to_datetime(["2023-01-01 13:35", "2023-01-01 18:00"]),
|
|
169
|
+
},
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
periods_2 = pd.DataFrame(
|
|
173
|
+
{
|
|
174
|
+
"start_dt": pd.to_datetime(["2023-01-01 12:00"]),
|
|
175
|
+
"end_dt": pd.to_datetime(["2023-01-02 00:00"]),
|
|
176
|
+
},
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
periods_3 = pd.DataFrame(
|
|
180
|
+
{
|
|
181
|
+
"start_dt": pd.to_datetime(["2023-01-01 00:00", "2023-01-01 13:00"]),
|
|
182
|
+
"end_dt": pd.to_datetime(["2023-01-01 12:30", "2023-01-01 23:00"]),
|
|
183
|
+
},
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
expected_result = pd.DataFrame(
|
|
187
|
+
{
|
|
188
|
+
"start_dt": pd.to_datetime(
|
|
189
|
+
["2023-01-01 12:00", "2023-01-01 13:00", "2023-01-01 14:10"]
|
|
190
|
+
),
|
|
191
|
+
"end_dt": pd.to_datetime([
|
|
192
|
+
"2023-01-01 12:30", "2023-01-01 13:35", "2023-01-01 18:00"]
|
|
193
|
+
),
|
|
194
|
+
},
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
overlaping_periods = intersection_of_multiple_dataframes_of_periods(
|
|
198
|
+
[periods_1, periods_2, periods_3]
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Check if results are as expected
|
|
202
|
+
assert overlaping_periods.equals(expected_result)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from ocf_data_sampler.select.location import Location
|
|
2
|
+
import pytest
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def test_make_valid_location_object_with_default_coordinate_system():
|
|
6
|
+
x, y = -1000.5, 50000
|
|
7
|
+
location = Location(x=x, y=y)
|
|
8
|
+
assert location.x == x, "location.x value not set correctly"
|
|
9
|
+
assert location.y == y, "location.x value not set correctly"
|
|
10
|
+
assert (
|
|
11
|
+
location.coordinate_system == "osgb"
|
|
12
|
+
), "location.coordinate_system value not set correctly"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_make_valid_location_object_with_osgb_coordinate_system():
|
|
16
|
+
x, y, coordinate_system = 1.2, 22.9, "osgb"
|
|
17
|
+
location = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
18
|
+
assert location.x == x, "location.x value not set correctly"
|
|
19
|
+
assert location.y == y, "location.x value not set correctly"
|
|
20
|
+
assert (
|
|
21
|
+
location.coordinate_system == coordinate_system
|
|
22
|
+
), "location.coordinate_system value not set correctly"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_make_valid_location_object_with_lon_lat_coordinate_system():
|
|
26
|
+
x, y, coordinate_system = 1.2, 1.2, "lon_lat"
|
|
27
|
+
location = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
28
|
+
assert location.x == x, "location.x value not set correctly"
|
|
29
|
+
assert location.y == y, "location.x value not set correctly"
|
|
30
|
+
assert (
|
|
31
|
+
location.coordinate_system == coordinate_system
|
|
32
|
+
), "location.coordinate_system value not set correctly"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_make_invalid_location_object_with_invalid_osgb_x():
|
|
36
|
+
x, y, coordinate_system = 10000000, 1.2, "osgb"
|
|
37
|
+
with pytest.raises(ValueError) as err:
|
|
38
|
+
_ = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
39
|
+
assert err.typename == "ValidationError"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_make_invalid_location_object_with_invalid_osgb_y():
|
|
43
|
+
x, y, coordinate_system = 2.5, 10000000, "osgb"
|
|
44
|
+
with pytest.raises(ValueError) as err:
|
|
45
|
+
_ = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
46
|
+
assert err.typename == "ValidationError"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_make_invalid_location_object_with_invalid_lon_lat_x():
|
|
50
|
+
x, y, coordinate_system = 200, 1.2, "lon_lat"
|
|
51
|
+
with pytest.raises(ValueError) as err:
|
|
52
|
+
_ = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
53
|
+
assert err.typename == "ValidationError"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_make_invalid_location_object_with_invalid_lon_lat_y():
|
|
57
|
+
x, y, coordinate_system = 2.5, -200, "lon_lat"
|
|
58
|
+
with pytest.raises(ValueError) as err:
|
|
59
|
+
_ = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
60
|
+
assert err.typename == "ValidationError"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_make_invalid_location_object_with_invalid_coordinate_system():
|
|
64
|
+
x, y, coordinate_system = 2.5, 1000, "abcd"
|
|
65
|
+
with pytest.raises(ValueError) as err:
|
|
66
|
+
_ = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
67
|
+
assert err.typename == "ValidationError"
|