ocf-data-sampler 0.0.10__tar.gz → 0.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- {ocf_data_sampler-0.0.10/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.12}/PKG-INFO +1 -1
- ocf_data_sampler-0.0.12/ocf_data_sampler/load/nwp/providers/ecmwf.py +37 -0
- ocf_data_sampler-0.0.12/ocf_data_sampler/load/nwp/providers/ukv.py +45 -0
- ocf_data_sampler-0.0.12/ocf_data_sampler/load/nwp/providers/utils.py +34 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +14 -7
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12/ocf_data_sampler.egg-info}/PKG-INFO +1 -1
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler.egg-info/SOURCES.txt +4 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/setup.py +1 -1
- ocf_data_sampler-0.0.12/tests/__init__.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/LICENSE +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/MANIFEST.in +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/README.md +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/__init__.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/load/__init__.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/load/gsp.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/load/nwp/__init__.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/load/nwp/nwp.py +0 -0
- {ocf_data_sampler-0.0.10/tests → ocf_data_sampler-0.0.12/ocf_data_sampler/load/nwp/providers}/__init__.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/load/satellite.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/load/utils.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/numpy_batch/__init__.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/numpy_batch/gsp.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/numpy_batch/nwp.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/numpy_batch/satellite.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/numpy_batch/sun_position.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/select/__init__.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/select/dropout.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/select/fill_time_periods.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/select/select_time_slice.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/torch_datasets/__init__.py +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler.egg-info/requires.txt +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler.egg-info/top_level.txt +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/requirements.txt +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/setup.cfg +0 -0
- {ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/tests/conftest.py +0 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""ECMWF provider loaders"""
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import xarray as xr
|
|
4
|
+
from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
|
|
5
|
+
from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
|
|
6
|
+
|
|
7
|
+
def open_ifs(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
|
|
8
|
+
"""
|
|
9
|
+
Opens the ECMWF IFS NWP data
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
zarr_path: Path to the zarr to open
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Xarray DataArray of the NWP data
|
|
16
|
+
"""
|
|
17
|
+
# Open the data
|
|
18
|
+
ds = open_zarr_paths(zarr_path)
|
|
19
|
+
|
|
20
|
+
# Rename
|
|
21
|
+
ds = ds.rename(
|
|
22
|
+
{
|
|
23
|
+
"init_time": "init_time_utc",
|
|
24
|
+
"variable": "channel",
|
|
25
|
+
}
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Check the timestmps are unique and increasing
|
|
29
|
+
check_time_unique_increasing(ds.init_time_utc)
|
|
30
|
+
|
|
31
|
+
# Make sure the spatial coords are in increasing order
|
|
32
|
+
ds = make_spatial_coords_increasing(ds, x_coord="longitude", y_coord="latitude")
|
|
33
|
+
|
|
34
|
+
ds = ds.transpose("init_time_utc", "step", "channel", "longitude", "latitude")
|
|
35
|
+
|
|
36
|
+
# TODO: should we control the dtype of the DataArray?
|
|
37
|
+
return ds.ECMWF_UK
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""UKV provider loaders"""
|
|
2
|
+
|
|
3
|
+
import xarray as xr
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
|
|
8
|
+
from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
|
|
12
|
+
"""
|
|
13
|
+
Opens the NWP data
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
zarr_path: Path to the zarr to open
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Xarray DataArray of the NWP data
|
|
20
|
+
"""
|
|
21
|
+
# Open the data
|
|
22
|
+
ds = open_zarr_paths(zarr_path)
|
|
23
|
+
|
|
24
|
+
# Rename
|
|
25
|
+
ds = ds.rename(
|
|
26
|
+
{
|
|
27
|
+
"init_time": "init_time_utc",
|
|
28
|
+
"variable": "channel",
|
|
29
|
+
"x": "x_osgb",
|
|
30
|
+
"y": "y_osgb",
|
|
31
|
+
}
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Check the timestmps are unique and increasing
|
|
35
|
+
check_time_unique_increasing(ds.init_time_utc)
|
|
36
|
+
|
|
37
|
+
# Make sure the spatial coords are in increasing order
|
|
38
|
+
ds = make_spatial_coords_increasing(ds, x_coord="x_osgb", y_coord="y_osgb")
|
|
39
|
+
|
|
40
|
+
ds = ds.transpose("init_time_utc", "step", "channel", "x_osgb", "y_osgb")
|
|
41
|
+
|
|
42
|
+
# TODO: should we control the dtype of the DataArray?
|
|
43
|
+
return ds.UKV
|
|
44
|
+
|
|
45
|
+
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import xarray as xr
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def open_zarr_paths(
|
|
6
|
+
zarr_path: Path | str | list[Path] | list[str],
|
|
7
|
+
time_dim: str = "init_time"
|
|
8
|
+
) -> xr.Dataset:
|
|
9
|
+
"""Opens the NWP data
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
zarr_path: Path to the zarr(s) to open
|
|
13
|
+
time_dim: Name of the time dimension
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
The opened Xarray Dataset
|
|
17
|
+
"""
|
|
18
|
+
if type(zarr_path) in [list, tuple] or "*" in str(zarr_path): # Multi-file dataset
|
|
19
|
+
ds = xr.open_mfdataset(
|
|
20
|
+
zarr_path,
|
|
21
|
+
engine="zarr",
|
|
22
|
+
concat_dim=time_dim,
|
|
23
|
+
combine="nested",
|
|
24
|
+
chunks="auto",
|
|
25
|
+
).sortby(time_dim)
|
|
26
|
+
else:
|
|
27
|
+
ds = xr.open_dataset(
|
|
28
|
+
zarr_path,
|
|
29
|
+
engine="zarr",
|
|
30
|
+
consolidated=True,
|
|
31
|
+
mode="r",
|
|
32
|
+
chunks="auto",
|
|
33
|
+
)
|
|
34
|
+
return ds
|
|
@@ -114,7 +114,7 @@ def find_valid_t0_times(
|
|
|
114
114
|
|
|
115
115
|
assert set(datasets_dict.keys()).issubset({"nwp", "sat", "gsp"})
|
|
116
116
|
|
|
117
|
-
contiguous_time_periods =
|
|
117
|
+
contiguous_time_periods: dict[str: pd.DataFrame] = {} # Used to store contiguous time periods from each data source
|
|
118
118
|
|
|
119
119
|
if "nwp" in datasets_dict:
|
|
120
120
|
for nwp_key, nwp_config in config.input_data.nwp.items():
|
|
@@ -158,7 +158,7 @@ def find_valid_t0_times(
|
|
|
158
158
|
max_dropout=max_dropout,
|
|
159
159
|
)
|
|
160
160
|
|
|
161
|
-
contiguous_time_periods
|
|
161
|
+
contiguous_time_periods[f'nwp_{nwp_key}'] = time_periods
|
|
162
162
|
|
|
163
163
|
if "sat" in datasets_dict:
|
|
164
164
|
sat_config = config.input_data.satellite
|
|
@@ -170,7 +170,7 @@ def find_valid_t0_times(
|
|
|
170
170
|
forecast_duration=minutes(sat_config.forecast_minutes),
|
|
171
171
|
)
|
|
172
172
|
|
|
173
|
-
contiguous_time_periods
|
|
173
|
+
contiguous_time_periods['sat'] = time_periods
|
|
174
174
|
|
|
175
175
|
# GSP always assumed to be in data
|
|
176
176
|
gsp_config = config.input_data.gsp
|
|
@@ -182,15 +182,22 @@ def find_valid_t0_times(
|
|
|
182
182
|
forecast_duration=minutes(gsp_config.forecast_minutes),
|
|
183
183
|
)
|
|
184
184
|
|
|
185
|
-
contiguous_time_periods
|
|
185
|
+
contiguous_time_periods['gsp'] = time_periods
|
|
186
|
+
|
|
187
|
+
# just get the values (no the keys)
|
|
188
|
+
contiguous_time_periods_values = list(contiguous_time_periods.values())
|
|
186
189
|
|
|
187
190
|
# Find joint overlapping contiguous time periods
|
|
188
|
-
if len(
|
|
191
|
+
if len(contiguous_time_periods_values) > 1:
|
|
189
192
|
valid_time_periods = intersection_of_multiple_dataframes_of_periods(
|
|
190
|
-
|
|
193
|
+
contiguous_time_periods_values
|
|
191
194
|
)
|
|
192
195
|
else:
|
|
193
|
-
valid_time_periods =
|
|
196
|
+
valid_time_periods = contiguous_time_periods_values[0]
|
|
197
|
+
|
|
198
|
+
# check there are some valid time periods
|
|
199
|
+
if len(valid_time_periods.keys()) == 0:
|
|
200
|
+
raise ValueError(f"No valid time periods found, {contiguous_time_periods=}")
|
|
194
201
|
|
|
195
202
|
# Fill out the contiguous time periods to get the t0 times
|
|
196
203
|
valid_t0_times = fill_time_periods(
|
|
@@ -16,6 +16,10 @@ ocf_data_sampler/load/satellite.py
|
|
|
16
16
|
ocf_data_sampler/load/utils.py
|
|
17
17
|
ocf_data_sampler/load/nwp/__init__.py
|
|
18
18
|
ocf_data_sampler/load/nwp/nwp.py
|
|
19
|
+
ocf_data_sampler/load/nwp/providers/__init__.py
|
|
20
|
+
ocf_data_sampler/load/nwp/providers/ecmwf.py
|
|
21
|
+
ocf_data_sampler/load/nwp/providers/ukv.py
|
|
22
|
+
ocf_data_sampler/load/nwp/providers/utils.py
|
|
19
23
|
ocf_data_sampler/numpy_batch/__init__.py
|
|
20
24
|
ocf_data_sampler/numpy_batch/gsp.py
|
|
21
25
|
ocf_data_sampler/numpy_batch/nwp.py
|
|
@@ -10,7 +10,7 @@ install_requires = (this_directory / "requirements.txt").read_text().splitlines(
|
|
|
10
10
|
|
|
11
11
|
setup(
|
|
12
12
|
name="ocf_data_sampler",
|
|
13
|
-
version="0.0.
|
|
13
|
+
version="0.0.12",
|
|
14
14
|
license="MIT",
|
|
15
15
|
description="Sample from weather data for renewable energy prediction",
|
|
16
16
|
author="James Fulton, Peter Dudfield, and the Open Climate Fix team",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/data/uk_gsp_locations.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/numpy_batch/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/numpy_batch/satellite.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/numpy_batch/sun_position.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/select/fill_time_periods.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/select/select_spatial_slice.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/select/select_time_slice.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler/torch_datasets/__init__.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.10 → ocf_data_sampler-0.0.12}/ocf_data_sampler.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|