ocf-data-sampler 0.1.11__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- ocf_data_sampler/config/load.py +3 -3
- ocf_data_sampler/config/model.py +73 -61
- ocf_data_sampler/config/save.py +5 -4
- ocf_data_sampler/constants.py +140 -12
- ocf_data_sampler/load/gsp.py +6 -5
- ocf_data_sampler/load/load_dataset.py +5 -6
- ocf_data_sampler/load/nwp/nwp.py +17 -5
- ocf_data_sampler/load/nwp/providers/ecmwf.py +6 -7
- ocf_data_sampler/load/nwp/providers/gfs.py +36 -0
- ocf_data_sampler/load/nwp/providers/icon.py +46 -0
- ocf_data_sampler/load/nwp/providers/ukv.py +4 -5
- ocf_data_sampler/load/nwp/providers/utils.py +3 -1
- ocf_data_sampler/load/satellite.py +9 -10
- ocf_data_sampler/load/site.py +10 -6
- ocf_data_sampler/load/utils.py +21 -16
- ocf_data_sampler/numpy_sample/collate.py +10 -9
- ocf_data_sampler/numpy_sample/datetime_features.py +3 -5
- ocf_data_sampler/numpy_sample/gsp.py +12 -14
- ocf_data_sampler/numpy_sample/nwp.py +12 -12
- ocf_data_sampler/numpy_sample/satellite.py +9 -9
- ocf_data_sampler/numpy_sample/site.py +5 -8
- ocf_data_sampler/numpy_sample/sun_position.py +16 -21
- ocf_data_sampler/sample/base.py +15 -17
- ocf_data_sampler/sample/site.py +13 -20
- ocf_data_sampler/sample/uk_regional.py +29 -35
- ocf_data_sampler/select/dropout.py +16 -14
- ocf_data_sampler/select/fill_time_periods.py +15 -5
- ocf_data_sampler/select/find_contiguous_time_periods.py +88 -75
- ocf_data_sampler/select/geospatial.py +63 -54
- ocf_data_sampler/select/location.py +16 -51
- ocf_data_sampler/select/select_spatial_slice.py +105 -89
- ocf_data_sampler/select/select_time_slice.py +71 -58
- ocf_data_sampler/select/spatial_slice_for_dataset.py +7 -6
- ocf_data_sampler/select/time_slice_for_dataset.py +17 -16
- ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +126 -118
- ocf_data_sampler/torch_datasets/datasets/site.py +135 -101
- ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +6 -2
- ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +23 -22
- ocf_data_sampler/torch_datasets/utils/validate_channels.py +23 -19
- ocf_data_sampler/utils.py +3 -1
- {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/METADATA +7 -18
- ocf_data_sampler-0.1.16.dist-info/RECORD +56 -0
- {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/WHEEL +1 -1
- {ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/top_level.txt +1 -1
- scripts/refactor_site.py +62 -33
- utils/compute_icon_mean_stddev.py +72 -0
- ocf_data_sampler-0.1.11.dist-info/LICENSE +0 -21
- ocf_data_sampler-0.1.11.dist-info/RECORD +0 -82
- tests/__init__.py +0 -0
- tests/config/test_config.py +0 -113
- tests/config/test_load.py +0 -7
- tests/config/test_save.py +0 -28
- tests/conftest.py +0 -319
- tests/load/test_load_gsp.py +0 -15
- tests/load/test_load_nwp.py +0 -21
- tests/load/test_load_satellite.py +0 -17
- tests/load/test_load_sites.py +0 -14
- tests/numpy_sample/test_collate.py +0 -21
- tests/numpy_sample/test_datetime_features.py +0 -37
- tests/numpy_sample/test_gsp.py +0 -38
- tests/numpy_sample/test_nwp.py +0 -13
- tests/numpy_sample/test_satellite.py +0 -40
- tests/numpy_sample/test_sun_position.py +0 -81
- tests/select/test_dropout.py +0 -69
- tests/select/test_fill_time_periods.py +0 -28
- tests/select/test_find_contiguous_time_periods.py +0 -202
- tests/select/test_location.py +0 -67
- tests/select/test_select_spatial_slice.py +0 -154
- tests/select/test_select_time_slice.py +0 -275
- tests/test_sample/test_base.py +0 -164
- tests/test_sample/test_site_sample.py +0 -165
- tests/test_sample/test_uk_regional_sample.py +0 -136
- tests/torch_datasets/test_merge_and_fill_utils.py +0 -40
- tests/torch_datasets/test_pvnet_uk.py +0 -154
- tests/torch_datasets/test_site.py +0 -226
- tests/torch_datasets/test_validate_channels_utils.py +0 -78
tests/select/test_dropout.py
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
from ocf_data_sampler.select.dropout import draw_dropout_time, apply_dropout_time
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pandas as pd
|
|
5
|
-
import xarray as xr
|
|
6
|
-
|
|
7
|
-
import pytest
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
@pytest.fixture(scope="module")
|
|
11
|
-
def da_sample():
|
|
12
|
-
"""Create dummy data which looks like satellite data"""
|
|
13
|
-
|
|
14
|
-
datetimes = pd.date_range("2024-01-01 12:00", "2024-01-01 13:00", freq="5min")
|
|
15
|
-
|
|
16
|
-
da_sat = xr.DataArray(
|
|
17
|
-
np.random.normal(size=(len(datetimes))),
|
|
18
|
-
coords=dict(time_utc=datetimes)
|
|
19
|
-
)
|
|
20
|
-
return da_sat
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def test_draw_dropout_time():
|
|
24
|
-
t0 = pd.Timestamp("2021-01-01 04:00:00")
|
|
25
|
-
|
|
26
|
-
dropout_timedeltas = pd.to_timedelta([-30, -60], unit="min")
|
|
27
|
-
dropout_time = draw_dropout_time(t0, dropout_timedeltas, dropout_frac=1)
|
|
28
|
-
|
|
29
|
-
assert isinstance(dropout_time, pd.Timestamp)
|
|
30
|
-
assert (dropout_time-t0) in dropout_timedeltas
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def test_draw_dropout_time_partial():
|
|
34
|
-
t0 = pd.Timestamp("2021-01-01 04:00:00")
|
|
35
|
-
|
|
36
|
-
dropout_timedeltas = pd.to_timedelta([-30, -60], unit="min")
|
|
37
|
-
|
|
38
|
-
dropouts = set()
|
|
39
|
-
|
|
40
|
-
# Loop over 1000 to have very high probability of seeing all dropouts
|
|
41
|
-
# The chances of this failing by chance are approx ((2/3)^100)*3 = 7e-18
|
|
42
|
-
for _ in range(100):
|
|
43
|
-
dropouts.add(draw_dropout_time(t0, dropout_timedeltas, dropout_frac=2/3))
|
|
44
|
-
|
|
45
|
-
# Check all expected dropouts are present
|
|
46
|
-
dropouts == {None} | set(t0 + dt for dt in dropout_timedeltas)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def test_draw_dropout_time_null():
|
|
50
|
-
t0 = pd.Timestamp("2021-01-01 04:00:00")
|
|
51
|
-
|
|
52
|
-
# Dropout fraction is 0
|
|
53
|
-
dropout_timedeltas = [pd.Timedelta(-30, "min")]
|
|
54
|
-
dropout_time = draw_dropout_time(t0, dropout_timedeltas=dropout_timedeltas, dropout_frac=0)
|
|
55
|
-
assert dropout_time==t0
|
|
56
|
-
|
|
57
|
-
# No dropout timedeltas and dropout fraction is 0
|
|
58
|
-
dropout_time = draw_dropout_time(t0, dropout_timedeltas=[], dropout_frac=0)
|
|
59
|
-
assert dropout_time==t0
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
@pytest.mark.parametrize("t0_str", ["12:00", "12:30", "13:00"])
|
|
63
|
-
def test_apply_dropout_time(da_sample, t0_str):
|
|
64
|
-
dropout_time = pd.Timestamp(f"2024-01-01 {t0_str}")
|
|
65
|
-
|
|
66
|
-
da_dropout = apply_dropout_time(da_sample, dropout_time)
|
|
67
|
-
|
|
68
|
-
assert da_dropout.sel(time_utc=slice(None, dropout_time)).notnull().all()
|
|
69
|
-
assert da_dropout.sel(time_utc=slice(dropout_time+pd.Timedelta(5, "min"), None)).isnull().all()
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
|
|
3
|
-
from ocf_data_sampler.select.fill_time_periods import fill_time_periods
|
|
4
|
-
|
|
5
|
-
def test_fill_time_periods():
|
|
6
|
-
time_periods = pd.DataFrame(
|
|
7
|
-
{
|
|
8
|
-
"start_dt": [
|
|
9
|
-
"2021-01-01 04:10:00", "2021-01-01 09:00:00",
|
|
10
|
-
"2021-01-01 09:15:00", "2021-01-01 12:00:00"
|
|
11
|
-
],
|
|
12
|
-
"end_dt": [
|
|
13
|
-
"2021-01-01 06:00:00", "2021-01-01 09:00:00",
|
|
14
|
-
"2021-01-01 09:20:00", "2021-01-01 14:45:00"
|
|
15
|
-
],
|
|
16
|
-
}
|
|
17
|
-
)
|
|
18
|
-
freq = pd.Timedelta("30min")
|
|
19
|
-
filled_time_periods = fill_time_periods(time_periods, freq)
|
|
20
|
-
|
|
21
|
-
expected_times = [
|
|
22
|
-
"04:30", "05:00", "05:30", "06:00", "09:00", "12:00",
|
|
23
|
-
"12:30", "13:00", "13:30", "14:00", "14:30"
|
|
24
|
-
]
|
|
25
|
-
|
|
26
|
-
expected_times = pd.DatetimeIndex([f"2021-01-01 {t}" for t in expected_times])
|
|
27
|
-
|
|
28
|
-
pd.testing.assert_index_equal(filled_time_periods, expected_times)
|
|
@@ -1,202 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
|
|
3
|
-
from ocf_data_sampler.select.find_contiguous_time_periods import (
|
|
4
|
-
find_contiguous_t0_periods, find_contiguous_t0_periods_nwp,
|
|
5
|
-
intersection_of_multiple_dataframes_of_periods,
|
|
6
|
-
)
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def test_find_contiguous_t0_periods():
|
|
11
|
-
|
|
12
|
-
# Create 5-minutely data timestamps
|
|
13
|
-
freq = pd.Timedelta(5, "min")
|
|
14
|
-
interval_start = pd.Timedelta(-60, "min")
|
|
15
|
-
interval_end = pd.Timedelta(15, "min")
|
|
16
|
-
|
|
17
|
-
datetimes = (
|
|
18
|
-
pd.date_range("2023-01-01 12:00", "2023-01-01 17:00", freq=freq)
|
|
19
|
-
.delete([5, 6, 30])
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
periods = find_contiguous_t0_periods(
|
|
23
|
-
datetimes=datetimes,
|
|
24
|
-
interval_start=interval_start,
|
|
25
|
-
interval_end=interval_end,
|
|
26
|
-
sample_period_duration=freq,
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
expected_results = pd.DataFrame(
|
|
30
|
-
{
|
|
31
|
-
"start_dt": pd.to_datetime(
|
|
32
|
-
[
|
|
33
|
-
"2023-01-01 13:35",
|
|
34
|
-
"2023-01-01 15:35",
|
|
35
|
-
]
|
|
36
|
-
),
|
|
37
|
-
"end_dt": pd.to_datetime(
|
|
38
|
-
[
|
|
39
|
-
"2023-01-01 14:10",
|
|
40
|
-
"2023-01-01 16:45",
|
|
41
|
-
]
|
|
42
|
-
),
|
|
43
|
-
},
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
assert periods.equals(expected_results)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def test_find_contiguous_t0_periods_nwp():
|
|
50
|
-
|
|
51
|
-
# These are the expected results of the test
|
|
52
|
-
expected_results = [
|
|
53
|
-
pd.DataFrame(
|
|
54
|
-
{
|
|
55
|
-
"start_dt": pd.to_datetime(["2023-01-01 03:00", "2023-01-02 03:00"]),
|
|
56
|
-
"end_dt": pd.to_datetime(["2023-01-01 21:00", "2023-01-03 06:00"]),
|
|
57
|
-
},
|
|
58
|
-
),
|
|
59
|
-
pd.DataFrame(
|
|
60
|
-
{
|
|
61
|
-
"start_dt": pd.to_datetime(
|
|
62
|
-
[
|
|
63
|
-
"2023-01-01 05:00",
|
|
64
|
-
"2023-01-02 05:00",
|
|
65
|
-
]
|
|
66
|
-
),
|
|
67
|
-
"end_dt": pd.to_datetime(
|
|
68
|
-
[
|
|
69
|
-
"2023-01-01 21:00",
|
|
70
|
-
"2023-01-03 06:00",
|
|
71
|
-
]
|
|
72
|
-
),
|
|
73
|
-
},
|
|
74
|
-
),
|
|
75
|
-
pd.DataFrame(
|
|
76
|
-
{
|
|
77
|
-
"start_dt": pd.to_datetime(
|
|
78
|
-
[
|
|
79
|
-
"2023-01-01 05:00",
|
|
80
|
-
"2023-01-02 05:00",
|
|
81
|
-
"2023-01-02 14:00",
|
|
82
|
-
]
|
|
83
|
-
),
|
|
84
|
-
"end_dt": pd.to_datetime(
|
|
85
|
-
[
|
|
86
|
-
"2023-01-01 18:00",
|
|
87
|
-
"2023-01-02 09:00",
|
|
88
|
-
"2023-01-03 03:00",
|
|
89
|
-
]
|
|
90
|
-
),
|
|
91
|
-
},
|
|
92
|
-
),
|
|
93
|
-
pd.DataFrame(
|
|
94
|
-
{
|
|
95
|
-
"start_dt": pd.to_datetime(
|
|
96
|
-
[
|
|
97
|
-
"2023-01-01 05:00",
|
|
98
|
-
"2023-01-01 11:00",
|
|
99
|
-
"2023-01-02 05:00",
|
|
100
|
-
"2023-01-02 14:00",
|
|
101
|
-
]
|
|
102
|
-
),
|
|
103
|
-
"end_dt": pd.to_datetime(
|
|
104
|
-
[
|
|
105
|
-
"2023-01-01 06:00",
|
|
106
|
-
"2023-01-01 15:00",
|
|
107
|
-
"2023-01-02 06:00",
|
|
108
|
-
"2023-01-03 00:00",
|
|
109
|
-
]
|
|
110
|
-
),
|
|
111
|
-
},
|
|
112
|
-
),
|
|
113
|
-
pd.DataFrame(
|
|
114
|
-
{
|
|
115
|
-
"start_dt": pd.to_datetime(
|
|
116
|
-
[
|
|
117
|
-
"2023-01-01 06:00",
|
|
118
|
-
"2023-01-01 12:00",
|
|
119
|
-
"2023-01-02 06:00",
|
|
120
|
-
"2023-01-02 15:00",
|
|
121
|
-
]
|
|
122
|
-
),
|
|
123
|
-
"end_dt": pd.to_datetime(
|
|
124
|
-
[
|
|
125
|
-
"2023-01-01 09:00",
|
|
126
|
-
"2023-01-01 18:00",
|
|
127
|
-
"2023-01-02 09:00",
|
|
128
|
-
"2023-01-03 03:00",
|
|
129
|
-
]
|
|
130
|
-
),
|
|
131
|
-
},
|
|
132
|
-
),
|
|
133
|
-
]
|
|
134
|
-
|
|
135
|
-
# Create 3-hourly init times with a few time stamps missing
|
|
136
|
-
freq = pd.Timedelta(3, "h")
|
|
137
|
-
|
|
138
|
-
init_times = (
|
|
139
|
-
pd.date_range("2023-01-01 03:00", "2023-01-02 21:00", freq=freq)
|
|
140
|
-
.delete([1, 4, 5, 6, 7, 9, 10])
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
# Choose some history durations and max stalenesses
|
|
144
|
-
history_durations_hr = [0, 2, 2, 2, 2]
|
|
145
|
-
max_stalenesses_hr = [9, 9, 6, 3, 6]
|
|
146
|
-
max_dropouts_hr = [0, 0, 0, 0, 3]
|
|
147
|
-
|
|
148
|
-
for i in range(len(expected_results)):
|
|
149
|
-
interval_start = pd.Timedelta(-history_durations_hr[i], "h")
|
|
150
|
-
max_staleness = pd.Timedelta(max_stalenesses_hr[i], "h")
|
|
151
|
-
max_dropout = pd.Timedelta(max_dropouts_hr[i], "h")
|
|
152
|
-
|
|
153
|
-
time_periods = find_contiguous_t0_periods_nwp(
|
|
154
|
-
init_times=init_times,
|
|
155
|
-
interval_start=interval_start,
|
|
156
|
-
max_staleness=max_staleness,
|
|
157
|
-
max_dropout=max_dropout,
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
# Check if results are as expected
|
|
161
|
-
assert time_periods.equals(expected_results[i])
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def test_intersection_of_multiple_dataframes_of_periods():
|
|
165
|
-
periods_1 = pd.DataFrame(
|
|
166
|
-
{
|
|
167
|
-
"start_dt": pd.to_datetime(["2023-01-01 05:00", "2023-01-01 14:10"]),
|
|
168
|
-
"end_dt": pd.to_datetime(["2023-01-01 13:35", "2023-01-01 18:00"]),
|
|
169
|
-
},
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
periods_2 = pd.DataFrame(
|
|
173
|
-
{
|
|
174
|
-
"start_dt": pd.to_datetime(["2023-01-01 12:00"]),
|
|
175
|
-
"end_dt": pd.to_datetime(["2023-01-02 00:00"]),
|
|
176
|
-
},
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
periods_3 = pd.DataFrame(
|
|
180
|
-
{
|
|
181
|
-
"start_dt": pd.to_datetime(["2023-01-01 00:00", "2023-01-01 13:00"]),
|
|
182
|
-
"end_dt": pd.to_datetime(["2023-01-01 12:30", "2023-01-01 23:00"]),
|
|
183
|
-
},
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
expected_result = pd.DataFrame(
|
|
187
|
-
{
|
|
188
|
-
"start_dt": pd.to_datetime(
|
|
189
|
-
["2023-01-01 12:00", "2023-01-01 13:00", "2023-01-01 14:10"]
|
|
190
|
-
),
|
|
191
|
-
"end_dt": pd.to_datetime([
|
|
192
|
-
"2023-01-01 12:30", "2023-01-01 13:35", "2023-01-01 18:00"]
|
|
193
|
-
),
|
|
194
|
-
},
|
|
195
|
-
)
|
|
196
|
-
|
|
197
|
-
overlaping_periods = intersection_of_multiple_dataframes_of_periods(
|
|
198
|
-
[periods_1, periods_2, periods_3]
|
|
199
|
-
)
|
|
200
|
-
|
|
201
|
-
# Check if results are as expected
|
|
202
|
-
assert overlaping_periods.equals(expected_result)
|
tests/select/test_location.py
DELETED
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
from ocf_data_sampler.select.location import Location
|
|
2
|
-
import pytest
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def test_make_valid_location_object_with_default_coordinate_system():
|
|
6
|
-
x, y = -1000.5, 50000
|
|
7
|
-
location = Location(x=x, y=y)
|
|
8
|
-
assert location.x == x, "location.x value not set correctly"
|
|
9
|
-
assert location.y == y, "location.x value not set correctly"
|
|
10
|
-
assert (
|
|
11
|
-
location.coordinate_system == "osgb"
|
|
12
|
-
), "location.coordinate_system value not set correctly"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def test_make_valid_location_object_with_osgb_coordinate_system():
|
|
16
|
-
x, y, coordinate_system = 1.2, 22.9, "osgb"
|
|
17
|
-
location = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
18
|
-
assert location.x == x, "location.x value not set correctly"
|
|
19
|
-
assert location.y == y, "location.x value not set correctly"
|
|
20
|
-
assert (
|
|
21
|
-
location.coordinate_system == coordinate_system
|
|
22
|
-
), "location.coordinate_system value not set correctly"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def test_make_valid_location_object_with_lon_lat_coordinate_system():
|
|
26
|
-
x, y, coordinate_system = 1.2, 1.2, "lon_lat"
|
|
27
|
-
location = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
28
|
-
assert location.x == x, "location.x value not set correctly"
|
|
29
|
-
assert location.y == y, "location.x value not set correctly"
|
|
30
|
-
assert (
|
|
31
|
-
location.coordinate_system == coordinate_system
|
|
32
|
-
), "location.coordinate_system value not set correctly"
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def test_make_invalid_location_object_with_invalid_osgb_x():
|
|
36
|
-
x, y, coordinate_system = 10000000, 1.2, "osgb"
|
|
37
|
-
with pytest.raises(ValueError) as err:
|
|
38
|
-
_ = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
39
|
-
assert err.typename == "ValidationError"
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def test_make_invalid_location_object_with_invalid_osgb_y():
|
|
43
|
-
x, y, coordinate_system = 2.5, 10000000, "osgb"
|
|
44
|
-
with pytest.raises(ValueError) as err:
|
|
45
|
-
_ = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
46
|
-
assert err.typename == "ValidationError"
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def test_make_invalid_location_object_with_invalid_lon_lat_x():
|
|
50
|
-
x, y, coordinate_system = 200, 1.2, "lon_lat"
|
|
51
|
-
with pytest.raises(ValueError) as err:
|
|
52
|
-
_ = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
53
|
-
assert err.typename == "ValidationError"
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def test_make_invalid_location_object_with_invalid_lon_lat_y():
|
|
57
|
-
x, y, coordinate_system = 2.5, -200, "lon_lat"
|
|
58
|
-
with pytest.raises(ValueError) as err:
|
|
59
|
-
_ = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
60
|
-
assert err.typename == "ValidationError"
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def test_make_invalid_location_object_with_invalid_coordinate_system():
|
|
64
|
-
x, y, coordinate_system = 2.5, 1000, "abcd"
|
|
65
|
-
with pytest.raises(ValueError) as err:
|
|
66
|
-
_ = Location(x=x, y=y, coordinate_system=coordinate_system)
|
|
67
|
-
assert err.typename == "ValidationError"
|
|
@@ -1,154 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import xarray as xr
|
|
3
|
-
from ocf_data_sampler.select.location import Location
|
|
4
|
-
import pytest
|
|
5
|
-
|
|
6
|
-
from ocf_data_sampler.select.select_spatial_slice import (
|
|
7
|
-
select_spatial_slice_pixels, _get_idx_of_pixel_closest_to_poi
|
|
8
|
-
)
|
|
9
|
-
|
|
10
|
-
@pytest.fixture(scope="module")
|
|
11
|
-
def da():
|
|
12
|
-
# Create dummy data
|
|
13
|
-
x = np.arange(-100, 100)
|
|
14
|
-
y = np.arange(-100, 100)
|
|
15
|
-
|
|
16
|
-
da = xr.DataArray(
|
|
17
|
-
np.random.normal(size=(len(x), len(y))),
|
|
18
|
-
coords=dict(
|
|
19
|
-
x_osgb=(["x_osgb"], x),
|
|
20
|
-
y_osgb=(["y_osgb"], y),
|
|
21
|
-
)
|
|
22
|
-
)
|
|
23
|
-
return da
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def test_get_idx_of_pixel_closest_to_poi(da):
|
|
27
|
-
|
|
28
|
-
idx_location = _get_idx_of_pixel_closest_to_poi(
|
|
29
|
-
da,
|
|
30
|
-
location=Location(x=10, y=10, coordinate_system="osgb"),
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
assert idx_location.coordinate_system == "idx"
|
|
34
|
-
assert idx_location.x == 110
|
|
35
|
-
assert idx_location.y == 110
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def test_select_spatial_slice_pixels(da):
|
|
41
|
-
|
|
42
|
-
# Select window which lies within x-y bounds of the data
|
|
43
|
-
da_sliced = select_spatial_slice_pixels(
|
|
44
|
-
da,
|
|
45
|
-
location=Location(x=-90, y=-80, coordinate_system="osgb"),
|
|
46
|
-
width_pixels=10,
|
|
47
|
-
height_pixels=10,
|
|
48
|
-
allow_partial_slice=True,
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
assert isinstance(da_sliced, xr.DataArray)
|
|
53
|
-
assert (da_sliced.x_osgb.values == np.arange(-95, -85)).all()
|
|
54
|
-
assert (da_sliced.y_osgb.values == np.arange(-85, -75)).all()
|
|
55
|
-
# No padding in this case so no NaNs
|
|
56
|
-
assert not da_sliced.isnull().any()
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
# Select window where the edge of the window lies right on the edge of the data
|
|
60
|
-
da_sliced = select_spatial_slice_pixels(
|
|
61
|
-
da,
|
|
62
|
-
location=Location(x=-90, y=-80, coordinate_system="osgb"),
|
|
63
|
-
width_pixels=20,
|
|
64
|
-
height_pixels=20,
|
|
65
|
-
allow_partial_slice=True,
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
assert isinstance(da_sliced, xr.DataArray)
|
|
69
|
-
assert (da_sliced.x_osgb.values == np.arange(-100, -80)).all()
|
|
70
|
-
assert (da_sliced.y_osgb.values == np.arange(-90, -70)).all()
|
|
71
|
-
# No padding in this case so no NaNs
|
|
72
|
-
assert not da_sliced.isnull().any()
|
|
73
|
-
|
|
74
|
-
# Select window which is partially outside the boundary of the data - padded on left
|
|
75
|
-
da_sliced = select_spatial_slice_pixels(
|
|
76
|
-
da,
|
|
77
|
-
location=Location(x=-90, y=-80, coordinate_system="osgb"),
|
|
78
|
-
width_pixels=30,
|
|
79
|
-
height_pixels=30,
|
|
80
|
-
allow_partial_slice=True,
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
assert isinstance(da_sliced, xr.DataArray)
|
|
84
|
-
assert (da_sliced.x_osgb.values == np.arange(-105, -75)).all()
|
|
85
|
-
assert (da_sliced.y_osgb.values == np.arange(-95, -65)).all()
|
|
86
|
-
# Data has been padded on left by 5 NaN pixels
|
|
87
|
-
assert da_sliced.isnull().sum() == 5*len(da_sliced.y_osgb)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
# Select window which is partially outside the boundary of the data - padded on right
|
|
91
|
-
da_sliced = select_spatial_slice_pixels(
|
|
92
|
-
da,
|
|
93
|
-
location=Location(x=90, y=-80, coordinate_system="osgb"),
|
|
94
|
-
width_pixels=30,
|
|
95
|
-
height_pixels=30,
|
|
96
|
-
allow_partial_slice=True,
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
assert isinstance(da_sliced, xr.DataArray)
|
|
100
|
-
assert (da_sliced.x_osgb.values == np.arange(75, 105)).all()
|
|
101
|
-
assert (da_sliced.y_osgb.values == np.arange(-95, -65)).all()
|
|
102
|
-
# Data has been padded on right by 5 NaN pixels
|
|
103
|
-
assert da_sliced.isnull().sum() == 5*len(da_sliced.y_osgb)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
location = Location(x=-90, y=-0, coordinate_system="osgb")
|
|
107
|
-
|
|
108
|
-
# Select window which is partially outside the boundary of the data - padded on top
|
|
109
|
-
da_sliced = select_spatial_slice_pixels(
|
|
110
|
-
da,
|
|
111
|
-
location=Location(x=-90, y=95, coordinate_system="osgb"),
|
|
112
|
-
width_pixels=20,
|
|
113
|
-
height_pixels=20,
|
|
114
|
-
allow_partial_slice=True,
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
assert isinstance(da_sliced, xr.DataArray)
|
|
118
|
-
assert (da_sliced.x_osgb.values == np.arange(-100, -80)).all()
|
|
119
|
-
assert (da_sliced.y_osgb.values == np.arange(85, 105)).all()
|
|
120
|
-
# Data has been padded on top by 5 NaN pixels
|
|
121
|
-
assert da_sliced.isnull().sum() == 5*len(da_sliced.x_osgb)
|
|
122
|
-
|
|
123
|
-
# Select window which is partially outside the boundary of the data - padded on bottom
|
|
124
|
-
da_sliced = select_spatial_slice_pixels(
|
|
125
|
-
da,
|
|
126
|
-
location=Location(x=-90, y=-95, coordinate_system="osgb"),
|
|
127
|
-
width_pixels=20,
|
|
128
|
-
height_pixels=20,
|
|
129
|
-
allow_partial_slice=True,
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
assert isinstance(da_sliced, xr.DataArray)
|
|
133
|
-
assert (da_sliced.x_osgb.values == np.arange(-100, -80)).all()
|
|
134
|
-
assert (da_sliced.y_osgb.values == np.arange(-105, -85)).all()
|
|
135
|
-
# Data has been padded on bottom by 5 NaN pixels
|
|
136
|
-
assert da_sliced.isnull().sum() == 5*len(da_sliced.x_osgb)
|
|
137
|
-
|
|
138
|
-
# Select window which is partially outside the boundary of the data - padded right and bottom
|
|
139
|
-
da_sliced = select_spatial_slice_pixels(
|
|
140
|
-
da,
|
|
141
|
-
location=Location(x=90, y=-80, coordinate_system="osgb"),
|
|
142
|
-
width_pixels=50,
|
|
143
|
-
height_pixels=50,
|
|
144
|
-
allow_partial_slice=True,
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
assert isinstance(da_sliced, xr.DataArray)
|
|
148
|
-
assert (da_sliced.x_osgb.values == np.arange(65, 115)).all()
|
|
149
|
-
assert (da_sliced.y_osgb.values == np.arange(-105, -55)).all()
|
|
150
|
-
# Data has been padded on right by 15 pixels and bottom by 5 NaN pixels
|
|
151
|
-
assert da_sliced.isnull().sum() == 15*len(da_sliced.y_osgb) + 5*len(da_sliced.x_osgb) - 15*5
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|