ocf-data-sampler 0.0.50__py3-none-any.whl → 0.0.52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocf_data_sampler/constants.py +48 -0
- ocf_data_sampler/select/select_time_slice.py +7 -36
- ocf_data_sampler/select/time_slice_for_dataset.py +3 -7
- {ocf_data_sampler-0.0.50.dist-info → ocf_data_sampler-0.0.52.dist-info}/METADATA +3 -2
- {ocf_data_sampler-0.0.50.dist-info → ocf_data_sampler-0.0.52.dist-info}/RECORD +9 -9
- tests/select/test_select_time_slice.py +9 -6
- {ocf_data_sampler-0.0.50.dist-info → ocf_data_sampler-0.0.52.dist-info}/LICENSE +0 -0
- {ocf_data_sampler-0.0.50.dist-info → ocf_data_sampler-0.0.52.dist-info}/WHEEL +0 -0
- {ocf_data_sampler-0.0.50.dist-info → ocf_data_sampler-0.0.52.dist-info}/top_level.txt +0 -0
ocf_data_sampler/constants.py
CHANGED
|
@@ -5,6 +5,7 @@ import numpy as np
|
|
|
5
5
|
NWP_PROVIDERS = [
|
|
6
6
|
"ukv",
|
|
7
7
|
"ecmwf",
|
|
8
|
+
"gfs"
|
|
8
9
|
]
|
|
9
10
|
|
|
10
11
|
|
|
@@ -127,13 +128,60 @@ ECMWF_MEAN = {
|
|
|
127
128
|
ECMWF_STD = _to_data_array(ECMWF_STD)
|
|
128
129
|
ECMWF_MEAN = _to_data_array(ECMWF_MEAN)
|
|
129
130
|
|
|
131
|
+
# ------ GFS
|
|
132
|
+
GFS_STD = {
|
|
133
|
+
"dlwrf": 96.305916,
|
|
134
|
+
"dswrf": 246.18533,
|
|
135
|
+
"hcc": 42.525383,
|
|
136
|
+
"lcc": 44.3732,
|
|
137
|
+
"mcc": 43.150745,
|
|
138
|
+
"prate": 0.00010159573,
|
|
139
|
+
"r": 25.440672,
|
|
140
|
+
"sde": 0.43345627,
|
|
141
|
+
"t": 22.825893,
|
|
142
|
+
"tcc": 41.030598,
|
|
143
|
+
"u10": 5.470838,
|
|
144
|
+
"u100": 6.8899174,
|
|
145
|
+
"v10": 4.7401133,
|
|
146
|
+
"v100": 6.076132,
|
|
147
|
+
"vis": 8294.022,
|
|
148
|
+
"u": 10.614556,
|
|
149
|
+
"v": 7.176398,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
GFS_MEAN = {
|
|
153
|
+
"dlwrf": 298.342,
|
|
154
|
+
"dswrf": 168.12321,
|
|
155
|
+
"hcc": 35.272,
|
|
156
|
+
"lcc": 43.578342,
|
|
157
|
+
"mcc": 33.738823,
|
|
158
|
+
"prate": 2.8190969e-05,
|
|
159
|
+
"r": 18.359747,
|
|
160
|
+
"sde": 0.36937004,
|
|
161
|
+
"t": 278.5223,
|
|
162
|
+
"tcc": 66.841606,
|
|
163
|
+
"u10": -0.0022310058,
|
|
164
|
+
"u100": 0.0823025,
|
|
165
|
+
"v10": 0.06219831,
|
|
166
|
+
"v100": 0.0797807,
|
|
167
|
+
"vis": 19628.32,
|
|
168
|
+
"u": 11.645444,
|
|
169
|
+
"v": 0.12330122,
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
GFS_STD = _to_data_array(GFS_STD)
|
|
173
|
+
GFS_MEAN = _to_data_array(GFS_MEAN)
|
|
174
|
+
|
|
175
|
+
|
|
130
176
|
NWP_STDS = NWPStatDict(
|
|
131
177
|
ukv=UKV_STD,
|
|
132
178
|
ecmwf=ECMWF_STD,
|
|
179
|
+
gfs=GFS_STD
|
|
133
180
|
)
|
|
134
181
|
NWP_MEANS = NWPStatDict(
|
|
135
182
|
ukv=UKV_MEAN,
|
|
136
183
|
ecmwf=ECMWF_MEAN,
|
|
184
|
+
gfs=GFS_MEAN
|
|
137
185
|
)
|
|
138
186
|
|
|
139
187
|
# ------ Satellite
|
|
@@ -2,40 +2,14 @@ import xarray as xr
|
|
|
2
2
|
import pandas as pd
|
|
3
3
|
import numpy as np
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
def _sel_fillnan(
|
|
7
|
-
da: xr.DataArray,
|
|
8
|
-
start_dt: pd.Timestamp,
|
|
9
|
-
end_dt: pd.Timestamp,
|
|
10
|
-
sample_period_duration: pd.Timedelta,
|
|
11
|
-
) -> xr.DataArray:
|
|
12
|
-
"""Select a time slice from a DataArray, filling missing times with NaNs."""
|
|
13
|
-
requested_times = pd.date_range(start_dt, end_dt, freq=sample_period_duration)
|
|
14
|
-
return da.reindex(time_utc=requested_times)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def _sel_default(
|
|
18
|
-
da: xr.DataArray,
|
|
19
|
-
start_dt: pd.Timestamp,
|
|
20
|
-
end_dt: pd.Timestamp,
|
|
21
|
-
sample_period_duration: pd.Timedelta,
|
|
22
|
-
) -> xr.DataArray:
|
|
23
|
-
"""Select a time slice from a DataArray, without filling missing times."""
|
|
24
|
-
return da.sel(time_utc=slice(start_dt, end_dt))
|
|
25
|
-
|
|
26
|
-
|
|
27
5
|
def select_time_slice(
|
|
28
6
|
ds: xr.DataArray,
|
|
29
7
|
t0: pd.Timestamp,
|
|
30
8
|
interval_start: pd.Timedelta,
|
|
31
9
|
interval_end: pd.Timedelta,
|
|
32
10
|
sample_period_duration: pd.Timedelta,
|
|
33
|
-
fill_selection: bool = False,
|
|
34
11
|
):
|
|
35
12
|
"""Select a time slice from a Dataset or DataArray."""
|
|
36
|
-
|
|
37
|
-
_sel = _sel_fillnan if fill_selection else _sel_default
|
|
38
|
-
|
|
39
13
|
t0_datetime_utc = pd.Timestamp(t0)
|
|
40
14
|
start_dt = t0_datetime_utc + interval_start
|
|
41
15
|
end_dt = t0_datetime_utc + interval_end
|
|
@@ -43,8 +17,7 @@ def select_time_slice(
|
|
|
43
17
|
start_dt = start_dt.ceil(sample_period_duration)
|
|
44
18
|
end_dt = end_dt.ceil(sample_period_duration)
|
|
45
19
|
|
|
46
|
-
return
|
|
47
|
-
|
|
20
|
+
return ds.sel(time_utc=slice(start_dt, end_dt))
|
|
48
21
|
|
|
49
22
|
def select_time_slice_nwp(
|
|
50
23
|
da: xr.DataArray,
|
|
@@ -57,7 +30,6 @@ def select_time_slice_nwp(
|
|
|
57
30
|
accum_channels: list[str] = [],
|
|
58
31
|
channel_dim_name: str = "channel",
|
|
59
32
|
):
|
|
60
|
-
|
|
61
33
|
if dropout_timedeltas is not None:
|
|
62
34
|
assert all(
|
|
63
35
|
[t < pd.Timedelta(0) for t in dropout_timedeltas]
|
|
@@ -66,8 +38,7 @@ def select_time_slice_nwp(
|
|
|
66
38
|
assert 0 <= dropout_frac <= 1
|
|
67
39
|
consider_dropout = (dropout_timedeltas is not None) and dropout_frac > 0
|
|
68
40
|
|
|
69
|
-
|
|
70
|
-
# The accumatation and non-accumulation channels
|
|
41
|
+
# The accumatation and non-accumulation channels
|
|
71
42
|
accum_channels = np.intersect1d(
|
|
72
43
|
da[channel_dim_name].values, accum_channels
|
|
73
44
|
)
|
|
@@ -100,19 +71,19 @@ def select_time_slice_nwp(
|
|
|
100
71
|
|
|
101
72
|
# Find the required steps for all target times
|
|
102
73
|
steps = target_times - selected_init_times
|
|
103
|
-
|
|
74
|
+
|
|
104
75
|
# We want one timestep for each target_time_hourly (obviously!) If we simply do
|
|
105
76
|
# nwp.sel(init_time=init_times, step=steps) then we'll get the *product* of
|
|
106
77
|
# init_times and steps, which is not what # we want! Instead, we use xarray's
|
|
107
78
|
# vectorized-indexing mode by using a DataArray indexer. See the last example here:
|
|
108
79
|
# https://docs.xarray.dev/en/latest/user-guide/indexing.html#more-advanced-indexing
|
|
80
|
+
|
|
109
81
|
coords = {"target_time_utc": target_times}
|
|
110
82
|
init_time_indexer = xr.DataArray(selected_init_times, coords=coords)
|
|
111
83
|
step_indexer = xr.DataArray(steps, coords=coords)
|
|
112
84
|
|
|
113
85
|
if len(accum_channels) == 0:
|
|
114
86
|
da_sel = da.sel(step=step_indexer, init_time_utc=init_time_indexer)
|
|
115
|
-
|
|
116
87
|
else:
|
|
117
88
|
# First minimise the size of the dataset we are diffing
|
|
118
89
|
# - find the init times we are slicing from
|
|
@@ -136,14 +107,14 @@ def select_time_slice_nwp(
|
|
|
136
107
|
|
|
137
108
|
# Slice out the channels which need to be diffed
|
|
138
109
|
da_accum = da_min.sel({channel_dim_name: accum_channels})
|
|
139
|
-
|
|
110
|
+
|
|
140
111
|
# Take the diff and slice requested data
|
|
141
112
|
da_accum = da_accum.diff(dim="step", label="lower")
|
|
142
113
|
da_sel_accum = da_accum.sel(step=step_indexer, init_time_utc=init_time_indexer)
|
|
143
114
|
|
|
144
115
|
# Join diffed and non-diffed variables
|
|
145
116
|
da_sel = xr.concat([da_sel_non_accum, da_sel_accum], dim=channel_dim_name)
|
|
146
|
-
|
|
117
|
+
|
|
147
118
|
# Reorder the variable back to the original order
|
|
148
119
|
da_sel = da_sel.sel({channel_dim_name: da[channel_dim_name].values})
|
|
149
120
|
|
|
@@ -153,4 +124,4 @@ def select_time_slice_nwp(
|
|
|
153
124
|
for v in da_sel[channel_dim_name].values
|
|
154
125
|
]
|
|
155
126
|
|
|
156
|
-
return da_sel
|
|
127
|
+
return da_sel
|
|
@@ -6,7 +6,6 @@ from ocf_data_sampler.select.dropout import draw_dropout_time, apply_dropout_tim
|
|
|
6
6
|
from ocf_data_sampler.select.select_time_slice import select_time_slice_nwp, select_time_slice
|
|
7
7
|
from ocf_data_sampler.utils import minutes
|
|
8
8
|
|
|
9
|
-
|
|
10
9
|
def slice_datasets_by_time(
|
|
11
10
|
datasets_dict: dict,
|
|
12
11
|
t0: pd.Timestamp,
|
|
@@ -23,11 +22,9 @@ def slice_datasets_by_time(
|
|
|
23
22
|
sliced_datasets_dict = {}
|
|
24
23
|
|
|
25
24
|
if "nwp" in datasets_dict:
|
|
26
|
-
|
|
27
25
|
sliced_datasets_dict["nwp"] = {}
|
|
28
|
-
|
|
26
|
+
|
|
29
27
|
for nwp_key, da_nwp in datasets_dict["nwp"].items():
|
|
30
|
-
|
|
31
28
|
nwp_config = config.input_data.nwp[nwp_key]
|
|
32
29
|
|
|
33
30
|
sliced_datasets_dict["nwp"][nwp_key] = select_time_slice_nwp(
|
|
@@ -42,7 +39,6 @@ def slice_datasets_by_time(
|
|
|
42
39
|
)
|
|
43
40
|
|
|
44
41
|
if "sat" in datasets_dict:
|
|
45
|
-
|
|
46
42
|
sat_config = config.input_data.satellite
|
|
47
43
|
|
|
48
44
|
sliced_datasets_dict["sat"] = select_time_slice(
|
|
@@ -76,7 +72,7 @@ def slice_datasets_by_time(
|
|
|
76
72
|
interval_start=minutes(gsp_config.time_resolution_minutes),
|
|
77
73
|
interval_end=minutes(gsp_config.interval_end_minutes),
|
|
78
74
|
)
|
|
79
|
-
|
|
75
|
+
|
|
80
76
|
sliced_datasets_dict["gsp"] = select_time_slice(
|
|
81
77
|
datasets_dict["gsp"],
|
|
82
78
|
t0,
|
|
@@ -96,7 +92,7 @@ def slice_datasets_by_time(
|
|
|
96
92
|
sliced_datasets_dict["gsp"],
|
|
97
93
|
gsp_dropout_time
|
|
98
94
|
)
|
|
99
|
-
|
|
95
|
+
|
|
100
96
|
if "site" in datasets_dict:
|
|
101
97
|
site_config = config.input_data.site
|
|
102
98
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: ocf_data_sampler
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.52
|
|
4
4
|
Summary: Sample from weather data for renewable energy prediction
|
|
5
5
|
Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
|
|
6
6
|
Author-email: info@openclimatefix.org
|
|
@@ -56,7 +56,7 @@ Requires-Dist: mkdocs-material>=8.0; extra == "docs"
|
|
|
56
56
|
# ocf-data-sampler
|
|
57
57
|
|
|
58
58
|
<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
|
|
59
|
-
[](#contributors-)
|
|
60
60
|
<!-- ALL-CONTRIBUTORS-BADGE:END -->
|
|
61
61
|
|
|
62
62
|
[](https://github.com/openclimatefix/ocf-data-sampler/tags)
|
|
@@ -136,6 +136,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
|
|
|
136
136
|
<td align="center" valign="top" width="14.28%"><a href="https://timothyajaniportfolio-b6v3zq29k-timthegreat.vercel.app/"><img src="https://avatars.githubusercontent.com/u/60073728?v=4?s=100" width="100px;" alt="Ajani Timothy"/><br /><sub><b>Ajani Timothy</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=Tim1119" title="Code">💻</a></td>
|
|
137
137
|
<td align="center" valign="top" width="14.28%"><a href="https://rupeshmangalam.vercel.app/"><img src="https://avatars.githubusercontent.com/u/91172425?v=4?s=100" width="100px;" alt="Rupesh Mangalam"/><br /><sub><b>Rupesh Mangalam</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=RupeshMangalam21" title="Code">💻</a></td>
|
|
138
138
|
<td align="center" valign="top" width="14.28%"><a href="http://siddharth7113.github.io"><img src="https://avatars.githubusercontent.com/u/114160268?v=4?s=100" width="100px;" alt="Siddharth"/><br /><sub><b>Siddharth</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=siddharth7113" title="Code">💻</a></td>
|
|
139
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/Sachin-G13"><img src="https://avatars.githubusercontent.com/u/190184500?v=4?s=100" width="100px;" alt="Sachin-G13"/><br /><sub><b>Sachin-G13</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=Sachin-G13" title="Code">💻</a></td>
|
|
139
140
|
</tr>
|
|
140
141
|
</tbody>
|
|
141
142
|
</table>
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
2
|
-
ocf_data_sampler/constants.py,sha256=
|
|
2
|
+
ocf_data_sampler/constants.py,sha256=S4-pkGdKj9HxQcMpI0iL90Ob_37Dloejn3c-niqdar0,5063
|
|
3
3
|
ocf_data_sampler/utils.py,sha256=rKA0BHAyAG4f90zEcgxp25EEYrXS-aOVNzttZ6Mzv2k,250
|
|
4
4
|
ocf_data_sampler/config/__init__.py,sha256=YXnAkgHViHB26hSsjiv32b6EbpG-A1kKTkARJf0_RkY,212
|
|
5
5
|
ocf_data_sampler/config/load.py,sha256=4f7vPHAIAmd-55tPxoIzn7F_TI_ue4NxkDcLPoVWl0g,943
|
|
@@ -33,9 +33,9 @@ ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=q7IaNfX95A3z9XHqb
|
|
|
33
33
|
ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
|
|
34
34
|
ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
|
|
35
35
|
ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
|
|
36
|
-
ocf_data_sampler/select/select_time_slice.py,sha256=
|
|
36
|
+
ocf_data_sampler/select/select_time_slice.py,sha256=9M-yvDv9K77XfEys_OIR31_aVB56sNWk3BnCnkCgcPI,4725
|
|
37
37
|
ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
|
|
38
|
-
ocf_data_sampler/select/time_slice_for_dataset.py,sha256=
|
|
38
|
+
ocf_data_sampler/select/time_slice_for_dataset.py,sha256=P7cAARfDzjttGDvpKt2zuA4WkLoTmSXy_lBpI8RiA6k,4249
|
|
39
39
|
ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=nJUa2KzVa84ZoM0PT2AbDz26ennmAYc7M7WJVfypPMs,85
|
|
40
40
|
ocf_data_sampler/torch_datasets/datasets/pvnet_uk_regional.py,sha256=xxeX4Js9LQpydehi3BS7k9psqkYGzgJuM17uTYux40M,8742
|
|
41
41
|
ocf_data_sampler/torch_datasets/datasets/site.py,sha256=v7plMF_WJPkfwnJAUFf_8gXAy8SXE5Og_fgZMEm4c20,15257
|
|
@@ -61,13 +61,13 @@ tests/select/test_fill_time_periods.py,sha256=o59f2YRe5b0vJrG3B0aYZkYeHnpNk4s6EJ
|
|
|
61
61
|
tests/select/test_find_contiguous_time_periods.py,sha256=kOga_V7er5We7ewMARXaKdM3agOhsvZYx8inXtUn1PM,5976
|
|
62
62
|
tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts,2712
|
|
63
63
|
tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
|
|
64
|
-
tests/select/test_select_time_slice.py,sha256=
|
|
64
|
+
tests/select/test_select_time_slice.py,sha256=nYrdlmZlGEygJKiE26bADiluNPN1qt5kD4FrI2vtxUw,9686
|
|
65
65
|
tests/torch_datasets/conftest.py,sha256=eRCzHE7cxS4AoskExkCGFDBeqItktAYNAdkfpMoFCeE,629
|
|
66
66
|
tests/torch_datasets/test_merge_and_fill_utils.py,sha256=ueA0A7gZaWEgNdsU8p3CnKuvSnlleTUjEhSw2HUUROM,1229
|
|
67
67
|
tests/torch_datasets/test_pvnet_uk_regional.py,sha256=FCiFueeFqrsXe7gWguSjBz5ZeUrvyhGbGw81gaVvkHM,5087
|
|
68
68
|
tests/torch_datasets/test_site.py,sha256=0gT_7k086BBnxqbvOayiUeI-vzJsYXlx3KvACC0c6lk,6114
|
|
69
|
-
ocf_data_sampler-0.0.
|
|
70
|
-
ocf_data_sampler-0.0.
|
|
71
|
-
ocf_data_sampler-0.0.
|
|
72
|
-
ocf_data_sampler-0.0.
|
|
73
|
-
ocf_data_sampler-0.0.
|
|
69
|
+
ocf_data_sampler-0.0.52.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
|
|
70
|
+
ocf_data_sampler-0.0.52.dist-info/METADATA,sha256=lUeJfMdOioAdIfntynW_Ql5Azncfd32jMGKW9DPgkFc,12143
|
|
71
|
+
ocf_data_sampler-0.0.52.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
72
|
+
ocf_data_sampler-0.0.52.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
|
|
73
|
+
ocf_data_sampler-0.0.52.dist-info/RECORD,,
|
|
@@ -86,11 +86,15 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
|
|
|
86
86
|
freq = pd.Timedelta("5min")
|
|
87
87
|
|
|
88
88
|
# The data is available between these times
|
|
89
|
-
min_time = da_sat_like.time_utc.min()
|
|
90
|
-
max_time = da_sat_like.time_utc.max()
|
|
91
|
-
|
|
92
|
-
# Expect to return these timestamps
|
|
93
|
-
expected_datetimes = pd.date_range(
|
|
89
|
+
min_time = pd.Timestamp(da_sat_like.time_utc.min().item())
|
|
90
|
+
max_time = pd.Timestamp(da_sat_like.time_utc.max().item())
|
|
91
|
+
|
|
92
|
+
# Expect to return these timestamps within the requested range
|
|
93
|
+
expected_datetimes = pd.date_range(
|
|
94
|
+
max(t0 + interval_start, min_time),
|
|
95
|
+
min(t0 + interval_end, max_time),
|
|
96
|
+
freq=freq,
|
|
97
|
+
)
|
|
94
98
|
|
|
95
99
|
# Make the partially out of bounds selection
|
|
96
100
|
sat_sample = select_time_slice(
|
|
@@ -99,7 +103,6 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
|
|
|
99
103
|
interval_start=interval_start,
|
|
100
104
|
interval_end=interval_end,
|
|
101
105
|
sample_period_duration=freq,
|
|
102
|
-
fill_selection=True
|
|
103
106
|
)
|
|
104
107
|
|
|
105
108
|
# Check the returned times are as expected
|
|
File without changes
|
|
File without changes
|
|
File without changes
|