ocf-data-sampler 0.1.5__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ocf_data_sampler-0.1.5/ocf_data_sampler.egg-info → ocf_data_sampler-0.1.6}/PKG-INFO +1 -1
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/config/__init__.py +1 -1
- ocf_data_sampler-0.1.6/ocf_data_sampler/config/load.py +22 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/config/model.py +10 -20
- ocf_data_sampler-0.1.6/ocf_data_sampler/config/save.py +31 -0
- ocf_data_sampler-0.1.6/ocf_data_sampler/load/__init__.py +5 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/gsp.py +10 -6
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/load_dataset.py +15 -17
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/nwp.py +3 -4
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/providers/ecmwf.py +6 -17
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/providers/ukv.py +1 -9
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/providers/utils.py +1 -5
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/satellite.py +4 -8
- ocf_data_sampler-0.1.6/ocf_data_sampler/load/site.py +37 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/collate.py +3 -4
- ocf_data_sampler-0.1.6/ocf_data_sampler/numpy_sample/datetime_features.py +38 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/find_contiguous_time_periods.py +2 -2
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +2 -2
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/torch_datasets/datasets/site.py +1 -1
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6/ocf_data_sampler.egg-info}/PKG-INFO +1 -1
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler.egg-info/SOURCES.txt +1 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/pyproject.toml +1 -1
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/config/test_config.py +1 -47
- ocf_data_sampler-0.1.6/tests/config/test_load.py +7 -0
- ocf_data_sampler-0.1.6/tests/config/test_save.py +28 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/load/test_load_sites.py +1 -1
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_datetime_features.py +0 -10
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/torch_datasets/test_site.py +3 -3
- ocf_data_sampler-0.1.5/ocf_data_sampler/config/load.py +0 -33
- ocf_data_sampler-0.1.5/ocf_data_sampler/config/save.py +0 -84
- ocf_data_sampler-0.1.5/ocf_data_sampler/load/__init__.py +0 -1
- ocf_data_sampler-0.1.5/ocf_data_sampler/load/site.py +0 -30
- ocf_data_sampler-0.1.5/ocf_data_sampler/numpy_sample/datetime_features.py +0 -46
- ocf_data_sampler-0.1.5/tests/config/test_save.py +0 -37
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/LICENSE +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/MANIFEST.in +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/README.md +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/__init__.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/constants.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/__init__.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/utils.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/site.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/sample/__init__.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/sample/base.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/sample/site.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/sample/uk_regional.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/__init__.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/dropout.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/fill_time_periods.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/geospatial.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/location.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/select_time_slice.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/spatial_slice_for_dataset.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/time_slice_for_dataset.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/utils.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler.egg-info/requires.txt +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler.egg-info/top_level.txt +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/scripts/refactor_site.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/setup.cfg +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/__init__.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/conftest.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/load/test_load_gsp.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/load/test_load_nwp.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/load/test_load_satellite.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_collate.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_gsp.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_nwp.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_satellite.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_sun_position.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_dropout.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_fill_time_periods.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_find_contiguous_time_periods.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_location.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_select_spatial_slice.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_select_time_slice.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/test_sample/test_base.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/test_sample/test_site_sample.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/test_sample/test_uk_regional_sample.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/torch_datasets/test_merge_and_fill_utils.py +0 -0
- {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/torch_datasets/test_pvnet_uk.py +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""Configuration model"""
|
|
2
2
|
|
|
3
|
-
from ocf_data_sampler.config.model import Configuration
|
|
3
|
+
from ocf_data_sampler.config.model import Configuration, InputData
|
|
4
4
|
from ocf_data_sampler.config.save import save_yaml_configuration
|
|
5
5
|
from ocf_data_sampler.config.load import load_yaml_configuration
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Load configuration from a yaml file"""
|
|
2
|
+
|
|
3
|
+
import fsspec
|
|
4
|
+
from pyaml_env import parse_config
|
|
5
|
+
from ocf_data_sampler.config import Configuration
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_yaml_configuration(filename: str) -> Configuration:
|
|
9
|
+
"""
|
|
10
|
+
Load a yaml file which has a configuration in it
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
filename: the yaml file name that you want to load. Will load from local, AWS, or GCP
|
|
14
|
+
depending on the protocol suffix (e.g. 's3://bucket/config.yaml').
|
|
15
|
+
|
|
16
|
+
Returns: pydantic class
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
with fsspec.open(filename, mode="r") as stream:
|
|
20
|
+
configuration = parse_config(data=stream)
|
|
21
|
+
|
|
22
|
+
return Configuration(**configuration)
|
|
@@ -1,16 +1,10 @@
|
|
|
1
1
|
"""Configuration model for the dataset.
|
|
2
2
|
|
|
3
|
-
All paths must include the protocol prefix. For local files,
|
|
4
|
-
it's sufficient to just start with a '/'. For aws, start with 's3://',
|
|
5
|
-
for gcp start with 'gs://'.
|
|
6
3
|
|
|
7
|
-
|
|
4
|
+
Absolute or relative zarr filepath(s). Prefix with a protocol like s3:// to read from alternative filesystems.
|
|
8
5
|
|
|
9
|
-
from ocf_data_sampler.config import Configuration
|
|
10
|
-
config = Configuration(**config_dict)
|
|
11
6
|
"""
|
|
12
7
|
|
|
13
|
-
import logging
|
|
14
8
|
from typing import Dict, List, Optional
|
|
15
9
|
from typing_extensions import Self
|
|
16
10
|
|
|
@@ -18,10 +12,6 @@ from pydantic import BaseModel, Field, RootModel, field_validator, ValidationInf
|
|
|
18
12
|
|
|
19
13
|
from ocf_data_sampler.constants import NWP_PROVIDERS
|
|
20
14
|
|
|
21
|
-
logger = logging.getLogger(__name__)
|
|
22
|
-
|
|
23
|
-
providers = ["pvoutput.org", "solar_sheffield_passiv"]
|
|
24
|
-
|
|
25
15
|
|
|
26
16
|
class Base(BaseModel):
|
|
27
17
|
"""Pydantic Base model where no extras can be added"""
|
|
@@ -79,8 +69,6 @@ class TimeWindowMixin(Base):
|
|
|
79
69
|
return v
|
|
80
70
|
|
|
81
71
|
|
|
82
|
-
|
|
83
|
-
# noinspection PyMethodParameters
|
|
84
72
|
class DropoutMixin(Base):
|
|
85
73
|
"""Mixin class, to add dropout minutes"""
|
|
86
74
|
|
|
@@ -137,7 +125,8 @@ class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
|
|
|
137
125
|
|
|
138
126
|
zarr_path: str | tuple[str] | list[str] = Field(
|
|
139
127
|
...,
|
|
140
|
-
description="
|
|
128
|
+
description="Absolute or relative zarr filepath(s). Prefix with a protocol like s3:// "
|
|
129
|
+
"to read from alternative filesystems.",
|
|
141
130
|
)
|
|
142
131
|
|
|
143
132
|
channels: list[str] = Field(
|
|
@@ -145,13 +134,13 @@ class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
|
|
|
145
134
|
)
|
|
146
135
|
|
|
147
136
|
|
|
148
|
-
# noinspection PyMethodParameters
|
|
149
137
|
class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
|
|
150
138
|
"""NWP configuration model"""
|
|
151
139
|
|
|
152
140
|
zarr_path: str | tuple[str] | list[str] = Field(
|
|
153
141
|
...,
|
|
154
|
-
description="
|
|
142
|
+
description="Absolute or relative zarr filepath(s). Prefix with a protocol like s3:// "
|
|
143
|
+
"to read from alternative filesystems.",
|
|
155
144
|
)
|
|
156
145
|
|
|
157
146
|
channels: list[str] = Field(
|
|
@@ -175,7 +164,6 @@ class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
|
|
|
175
164
|
"""Validate 'provider'"""
|
|
176
165
|
if v.lower() not in NWP_PROVIDERS:
|
|
177
166
|
message = f"NWP provider {v} is not in {NWP_PROVIDERS}"
|
|
178
|
-
logger.warning(message)
|
|
179
167
|
raise Exception(message)
|
|
180
168
|
return v
|
|
181
169
|
|
|
@@ -209,7 +197,11 @@ class MultiNWP(RootModel):
|
|
|
209
197
|
class GSP(TimeWindowMixin, DropoutMixin):
|
|
210
198
|
"""GSP configuration model"""
|
|
211
199
|
|
|
212
|
-
zarr_path: str = Field(
|
|
200
|
+
zarr_path: str = Field(
|
|
201
|
+
...,
|
|
202
|
+
description="Absolute or relative zarr filepath. Prefix with a protocol like s3:// "
|
|
203
|
+
"to read from alternative filesystems.",
|
|
204
|
+
)
|
|
213
205
|
|
|
214
206
|
|
|
215
207
|
class Site(TimeWindowMixin, DropoutMixin):
|
|
@@ -228,8 +220,6 @@ class Site(TimeWindowMixin, DropoutMixin):
|
|
|
228
220
|
# TODO validate the csv for metadata
|
|
229
221
|
|
|
230
222
|
|
|
231
|
-
|
|
232
|
-
# noinspection PyPep8Naming
|
|
233
223
|
class InputData(Base):
|
|
234
224
|
"""Input data model"""
|
|
235
225
|
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Save functions for the configuration model.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to save configuration objects to YAML files,
|
|
4
|
+
supporting local and cloud storage locations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import fsspec
|
|
9
|
+
import yaml
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
from ocf_data_sampler.config import Configuration
|
|
13
|
+
|
|
14
|
+
def save_yaml_configuration(configuration: Configuration, filename: str) -> None:
|
|
15
|
+
"""Save a configuration object to a YAML file.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
configuration: Configuration object containing the settings to save
|
|
19
|
+
filename: Destination path for the YAML file. Can be a local path or
|
|
20
|
+
cloud storage URL (e.g., 'gs://', 's3://'). For local paths,
|
|
21
|
+
absolute paths are recommended.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
if os.path.exists(filename):
|
|
25
|
+
raise FileExistsError(f"File already exists: {filename}")
|
|
26
|
+
|
|
27
|
+
# Serialize configuration to JSON-compatible dictionary
|
|
28
|
+
config_dict = json.loads(configuration.model_dump_json())
|
|
29
|
+
|
|
30
|
+
with fsspec.open(filename, mode='w') as yaml_file:
|
|
31
|
+
yaml.safe_dump(config_dict, yaml_file, default_flow_style=False)
|
|
@@ -1,16 +1,21 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
1
|
import pkg_resources
|
|
3
2
|
|
|
4
3
|
import pandas as pd
|
|
5
4
|
import xarray as xr
|
|
6
5
|
|
|
7
6
|
|
|
8
|
-
def open_gsp(zarr_path: str
|
|
7
|
+
def open_gsp(zarr_path: str) -> xr.DataArray:
|
|
8
|
+
"""Open the GSP data
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
zarr_path: Path to the GSP zarr data
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
xr.DataArray: The opened GSP data
|
|
15
|
+
"""
|
|
9
16
|
|
|
10
|
-
# Load GSP generation xr.Dataset
|
|
11
17
|
ds = xr.open_zarr(zarr_path)
|
|
12
18
|
|
|
13
|
-
# Rename to standard time name
|
|
14
19
|
ds = ds.rename({"datetime_gmt": "time_utc"})
|
|
15
20
|
|
|
16
21
|
# Load UK GSP locations
|
|
@@ -19,13 +24,12 @@ def open_gsp(zarr_path: str | Path) -> xr.DataArray:
|
|
|
19
24
|
index_col="gsp_id",
|
|
20
25
|
)
|
|
21
26
|
|
|
22
|
-
# Add coordinates
|
|
27
|
+
# Add locations and capacities as coordinates for each GSP and datetime
|
|
23
28
|
ds = ds.assign_coords(
|
|
24
29
|
x_osgb=(df_gsp_loc.x_osgb.to_xarray()),
|
|
25
30
|
y_osgb=(df_gsp_loc.y_osgb.to_xarray()),
|
|
26
31
|
nominal_capacity_mwp=ds.installedcapacity_mwp,
|
|
27
32
|
effective_capacity_mwp=ds.capacity_mwp,
|
|
28
|
-
|
|
29
33
|
)
|
|
30
34
|
|
|
31
35
|
return ds.generation_mw
|
|
@@ -1,36 +1,31 @@
|
|
|
1
1
|
""" Loads all data sources """
|
|
2
2
|
import xarray as xr
|
|
3
3
|
|
|
4
|
-
from ocf_data_sampler.config import
|
|
5
|
-
from ocf_data_sampler.load
|
|
6
|
-
from ocf_data_sampler.load.nwp import open_nwp
|
|
7
|
-
from ocf_data_sampler.load.satellite import open_sat_data
|
|
8
|
-
from ocf_data_sampler.load.site import open_site
|
|
4
|
+
from ocf_data_sampler.config import InputData
|
|
5
|
+
from ocf_data_sampler.load import open_nwp, open_gsp, open_sat_data, open_site
|
|
9
6
|
|
|
10
7
|
|
|
11
|
-
def get_dataset_dict(
|
|
8
|
+
def get_dataset_dict(input_config: InputData) -> dict[str, dict[xr.DataArray] | xr.DataArray]:
|
|
12
9
|
"""Construct dictionary of all of the input data sources
|
|
13
10
|
|
|
14
11
|
Args:
|
|
15
|
-
|
|
12
|
+
input_config: InputData configuration object
|
|
16
13
|
"""
|
|
17
14
|
|
|
18
|
-
in_config = config.input_data
|
|
19
|
-
|
|
20
15
|
datasets_dict = {}
|
|
21
16
|
|
|
22
17
|
# Load GSP data unless the path is None
|
|
23
|
-
if
|
|
24
|
-
da_gsp = open_gsp(zarr_path=
|
|
18
|
+
if input_config.gsp and input_config.gsp.zarr_path:
|
|
19
|
+
da_gsp = open_gsp(zarr_path=input_config.gsp.zarr_path).compute()
|
|
25
20
|
|
|
26
21
|
# Remove national GSP
|
|
27
22
|
datasets_dict["gsp"] = da_gsp.sel(gsp_id=slice(1, None))
|
|
28
23
|
|
|
29
24
|
# Load NWP data if in config
|
|
30
|
-
if
|
|
25
|
+
if input_config.nwp:
|
|
31
26
|
|
|
32
27
|
datasets_dict["nwp"] = {}
|
|
33
|
-
for nwp_source, nwp_config in
|
|
28
|
+
for nwp_source, nwp_config in input_config.nwp.items():
|
|
34
29
|
|
|
35
30
|
da_nwp = open_nwp(nwp_config.zarr_path, provider=nwp_config.provider)
|
|
36
31
|
|
|
@@ -39,8 +34,8 @@ def get_dataset_dict(config: Configuration) -> dict[str, dict[xr.DataArray]]:
|
|
|
39
34
|
datasets_dict["nwp"][nwp_source] = da_nwp
|
|
40
35
|
|
|
41
36
|
# Load satellite data if in config
|
|
42
|
-
if
|
|
43
|
-
sat_config =
|
|
37
|
+
if input_config.satellite:
|
|
38
|
+
sat_config = input_config.satellite
|
|
44
39
|
|
|
45
40
|
da_sat = open_sat_data(sat_config.zarr_path)
|
|
46
41
|
|
|
@@ -48,8 +43,11 @@ def get_dataset_dict(config: Configuration) -> dict[str, dict[xr.DataArray]]:
|
|
|
48
43
|
|
|
49
44
|
datasets_dict["sat"] = da_sat
|
|
50
45
|
|
|
51
|
-
if
|
|
52
|
-
da_sites = open_site(
|
|
46
|
+
if input_config.site:
|
|
47
|
+
da_sites = open_site(
|
|
48
|
+
generation_file_path=input_config.site.file_path,
|
|
49
|
+
metadata_file_path=input_config.site.metadata_file_path,
|
|
50
|
+
)
|
|
53
51
|
datasets_dict["site"] = da_sites
|
|
54
52
|
|
|
55
53
|
return datasets_dict
|
|
@@ -1,15 +1,14 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
1
|
import xarray as xr
|
|
3
2
|
|
|
4
3
|
from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
|
|
5
4
|
from ocf_data_sampler.load.nwp.providers.ecmwf import open_ifs
|
|
6
5
|
|
|
7
6
|
|
|
8
|
-
def open_nwp(zarr_path:
|
|
9
|
-
"""Opens NWP
|
|
7
|
+
def open_nwp(zarr_path: str | list[str], provider: str) -> xr.DataArray:
|
|
8
|
+
"""Opens NWP zarr
|
|
10
9
|
|
|
11
10
|
Args:
|
|
12
|
-
zarr_path:
|
|
11
|
+
zarr_path: path to the zarr file
|
|
13
12
|
provider: NWP provider
|
|
14
13
|
"""
|
|
15
14
|
|
{ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/providers/ecmwf.py
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""ECMWF provider loaders"""
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import xarray as xr
|
|
4
4
|
from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
|
|
5
5
|
from ocf_data_sampler.load.utils import (
|
|
@@ -9,7 +9,7 @@ from ocf_data_sampler.load.utils import (
|
|
|
9
9
|
)
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def open_ifs(zarr_path:
|
|
12
|
+
def open_ifs(zarr_path: str | list[str]) -> xr.DataArray:
|
|
13
13
|
"""
|
|
14
14
|
Opens the ECMWF IFS NWP data
|
|
15
15
|
|
|
@@ -19,25 +19,14 @@ def open_ifs(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
|
|
|
19
19
|
Returns:
|
|
20
20
|
Xarray DataArray of the NWP data
|
|
21
21
|
"""
|
|
22
|
-
# Open the data
|
|
23
|
-
ds = open_zarr_paths(zarr_path)
|
|
24
|
-
|
|
25
|
-
# Rename
|
|
26
|
-
ds = ds.rename(
|
|
27
|
-
{
|
|
28
|
-
"init_time": "init_time_utc",
|
|
29
|
-
}
|
|
30
|
-
)
|
|
31
22
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
23
|
+
ds = open_zarr_paths(zarr_path)
|
|
24
|
+
|
|
25
|
+
# LEGACY SUPPORT - rename variable to channel if it exists
|
|
26
|
+
ds = ds.rename({"init_time": "init_time_utc", "variable": "channel"})
|
|
36
27
|
|
|
37
|
-
# Check the timestamps are unique and increasing
|
|
38
28
|
check_time_unique_increasing(ds.init_time_utc)
|
|
39
29
|
|
|
40
|
-
# Make sure the spatial coords are in increasing order
|
|
41
30
|
ds = make_spatial_coords_increasing(ds, x_coord="longitude", y_coord="latitude")
|
|
42
31
|
|
|
43
32
|
ds = ds.transpose("init_time_utc", "step", "channel", "longitude", "latitude")
|
{ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/providers/ukv.py
RENAMED
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import xarray as xr
|
|
4
4
|
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
5
|
from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
|
|
8
6
|
from ocf_data_sampler.load.utils import (
|
|
9
7
|
check_time_unique_increasing,
|
|
@@ -12,7 +10,7 @@ from ocf_data_sampler.load.utils import (
|
|
|
12
10
|
)
|
|
13
11
|
|
|
14
12
|
|
|
15
|
-
def open_ukv(zarr_path:
|
|
13
|
+
def open_ukv(zarr_path: str | list[str]) -> xr.DataArray:
|
|
16
14
|
"""
|
|
17
15
|
Opens the NWP data
|
|
18
16
|
|
|
@@ -22,10 +20,8 @@ def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
|
|
|
22
20
|
Returns:
|
|
23
21
|
Xarray DataArray of the NWP data
|
|
24
22
|
"""
|
|
25
|
-
# Open the data
|
|
26
23
|
ds = open_zarr_paths(zarr_path)
|
|
27
24
|
|
|
28
|
-
# Rename
|
|
29
25
|
ds = ds.rename(
|
|
30
26
|
{
|
|
31
27
|
"init_time": "init_time_utc",
|
|
@@ -35,15 +31,11 @@ def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
|
|
|
35
31
|
}
|
|
36
32
|
)
|
|
37
33
|
|
|
38
|
-
# Check the timestamps are unique and increasing
|
|
39
34
|
check_time_unique_increasing(ds.init_time_utc)
|
|
40
35
|
|
|
41
|
-
# Make sure the spatial coords are in increasing order
|
|
42
36
|
ds = make_spatial_coords_increasing(ds, x_coord="x_osgb", y_coord="y_osgb")
|
|
43
37
|
|
|
44
38
|
ds = ds.transpose("init_time_utc", "step", "channel", "x_osgb", "y_osgb")
|
|
45
39
|
|
|
46
40
|
# TODO: should we control the dtype of the DataArray?
|
|
47
41
|
return get_xr_data_array_from_xr_dataset(ds)
|
|
48
|
-
|
|
49
|
-
|
{ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/providers/utils.py
RENAMED
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
1
|
import xarray as xr
|
|
3
2
|
|
|
4
3
|
|
|
5
|
-
def open_zarr_paths(
|
|
6
|
-
zarr_path: Path | str | list[Path] | list[str],
|
|
7
|
-
time_dim: str = "init_time"
|
|
8
|
-
) -> xr.Dataset:
|
|
4
|
+
def open_zarr_paths(zarr_path: str | list[str], time_dim: str = "init_time") -> xr.Dataset:
|
|
9
5
|
"""Opens the NWP data
|
|
10
6
|
|
|
11
7
|
Args:
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Satellite loader"""
|
|
2
2
|
|
|
3
3
|
import subprocess
|
|
4
|
-
from pathlib import Path
|
|
5
4
|
|
|
6
5
|
import xarray as xr
|
|
7
6
|
from ocf_data_sampler.load.utils import (
|
|
@@ -11,7 +10,7 @@ from ocf_data_sampler.load.utils import (
|
|
|
11
10
|
)
|
|
12
11
|
|
|
13
12
|
|
|
14
|
-
def _get_single_sat_data(zarr_path:
|
|
13
|
+
def _get_single_sat_data(zarr_path: str) -> xr.Dataset:
|
|
15
14
|
"""Helper function to open a Zarr from either a local or GCP path.
|
|
16
15
|
|
|
17
16
|
Args:
|
|
@@ -50,7 +49,7 @@ def _get_single_sat_data(zarr_path: Path | str) -> xr.Dataset:
|
|
|
50
49
|
return ds
|
|
51
50
|
|
|
52
51
|
|
|
53
|
-
def open_sat_data(zarr_path:
|
|
52
|
+
def open_sat_data(zarr_path: str | list[str]) -> xr.DataArray:
|
|
54
53
|
"""Lazily opens the Zarr store.
|
|
55
54
|
|
|
56
55
|
Args:
|
|
@@ -69,7 +68,6 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
|
|
|
69
68
|
else:
|
|
70
69
|
ds = _get_single_sat_data(zarr_path)
|
|
71
70
|
|
|
72
|
-
# Rename dimensions
|
|
73
71
|
ds = ds.rename(
|
|
74
72
|
{
|
|
75
73
|
"variable": "channel",
|
|
@@ -77,13 +75,11 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
|
|
|
77
75
|
}
|
|
78
76
|
)
|
|
79
77
|
|
|
80
|
-
# Check timestamps
|
|
81
78
|
check_time_unique_increasing(ds.time_utc)
|
|
82
79
|
|
|
83
|
-
# Ensure spatial coordinates are sorted
|
|
84
80
|
ds = make_spatial_coords_increasing(ds, x_coord="x_geostationary", y_coord="y_geostationary")
|
|
85
|
-
|
|
81
|
+
|
|
86
82
|
ds = ds.transpose("time_utc", "channel", "x_geostationary", "y_geostationary")
|
|
83
|
+
|
|
87
84
|
# TODO: should we control the dtype of the DataArray?
|
|
88
|
-
|
|
89
85
|
return get_xr_data_array_from_xr_dataset(ds)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import xarray as xr
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def open_site(generation_file_path: str, metadata_file_path: str) -> xr.DataArray:
|
|
7
|
+
"""Open a site's generation data and metadata.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
generation_file_path: Path to the site generation netcdf data
|
|
11
|
+
metadata_file_path: Path to the site csv metadata
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
xr.DataArray: The opened site generation data
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
generation_ds = xr.open_dataset(generation_file_path)
|
|
18
|
+
|
|
19
|
+
metadata_df = pd.read_csv(metadata_file_path, index_col="site_id")
|
|
20
|
+
|
|
21
|
+
assert metadata_df.index.is_unique
|
|
22
|
+
|
|
23
|
+
# Ensure metadata aligns with the site_id dimension in data_ds
|
|
24
|
+
metadata_df = metadata_df.reindex(generation_ds.site_id.values)
|
|
25
|
+
|
|
26
|
+
# Assign coordinates to the Dataset using the aligned metadata
|
|
27
|
+
generation_ds = generation_ds.assign_coords(
|
|
28
|
+
latitude=("site_id", metadata_df["latitude"].values),
|
|
29
|
+
longitude=("site_id", metadata_df["longitude"].values),
|
|
30
|
+
capacity_kwp=("site_id", metadata_df["capacity_kwp"].values),
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Sanity checks
|
|
34
|
+
assert np.isfinite(generation_ds.capacity_kwp.values).all()
|
|
35
|
+
assert (generation_ds.capacity_kwp.values > 0).all()
|
|
36
|
+
|
|
37
|
+
return generation_ds.generation_kw
|
|
@@ -45,11 +45,12 @@ def stack_np_samples_into_batch(dict_list: list[dict]) -> dict:
|
|
|
45
45
|
return batch
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
def _key_is_constant(key: str):
|
|
48
|
+
def _key_is_constant(key: str) -> bool:
|
|
49
|
+
"""Check if a key is for value which is constant for all samples"""
|
|
49
50
|
return key.endswith("t0_idx") or key.endswith("channel_names")
|
|
50
51
|
|
|
51
52
|
|
|
52
|
-
def stack_data_list(data_list: list, key: str):
|
|
53
|
+
def stack_data_list(data_list: list, key: str) -> np.ndarray:
|
|
53
54
|
"""Stack a sequence of data elements along a new axis
|
|
54
55
|
|
|
55
56
|
Args:
|
|
@@ -57,8 +58,6 @@ def stack_data_list(data_list: list, key: str):
|
|
|
57
58
|
key: string identifying the data type
|
|
58
59
|
"""
|
|
59
60
|
if _key_is_constant(key):
|
|
60
|
-
# These are always the same for all examples.
|
|
61
61
|
return data_list[0]
|
|
62
62
|
else:
|
|
63
63
|
return np.stack(data_list)
|
|
64
|
-
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Functions to create trigonometric date and time inputs"""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _get_date_time_in_pi(dt: pd.DatetimeIndex) -> tuple[np.ndarray, np.ndarray]:
|
|
8
|
+
"""Create positional embeddings for the datetimes in radians
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
dt: DatetimeIndex to create radian embeddings for
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
Tuple of numpy arrays containing radian coordinates for date and time
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
day_of_year = dt.dayofyear
|
|
18
|
+
minute_of_day = dt.minute + dt.hour * 60
|
|
19
|
+
|
|
20
|
+
time_in_pi = (2 * np.pi) * (minute_of_day / (24 * 60))
|
|
21
|
+
date_in_pi = (2 * np.pi) * (day_of_year / 365)
|
|
22
|
+
|
|
23
|
+
return date_in_pi, time_in_pi
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def make_datetime_numpy_dict(datetimes: pd.DatetimeIndex, key_prefix: str = "wind") -> dict:
|
|
27
|
+
""" Creates dictionary of cyclical datetime features - encoded """
|
|
28
|
+
|
|
29
|
+
date_in_pi, time_in_pi = _get_date_time_in_pi(datetimes)
|
|
30
|
+
|
|
31
|
+
time_numpy_sample = {}
|
|
32
|
+
|
|
33
|
+
time_numpy_sample[key_prefix + "_date_sin"] = np.sin(date_in_pi)
|
|
34
|
+
time_numpy_sample[key_prefix + "_date_cos"] = np.cos(date_in_pi)
|
|
35
|
+
time_numpy_sample[key_prefix + "_time_sin"] = np.sin(time_in_pi)
|
|
36
|
+
time_numpy_sample[key_prefix + "_time_cos"] = np.cos(time_in_pi)
|
|
37
|
+
|
|
38
|
+
return time_numpy_sample
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
|
+
from ocf_data_sampler.load.utils import check_time_unique_increasing
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
|
|
@@ -28,8 +29,7 @@ def find_contiguous_time_periods(
|
|
|
28
29
|
# Sanity checks.
|
|
29
30
|
assert len(datetimes) > 0
|
|
30
31
|
assert min_seq_length > 1
|
|
31
|
-
|
|
32
|
-
assert datetimes.is_unique
|
|
32
|
+
check_time_unique_increasing(datetimes)
|
|
33
33
|
|
|
34
34
|
# Find indices of gaps larger than max_gap:
|
|
35
35
|
gap_mask = pd.TimedeltaIndex(np.diff(datetimes)) > max_gap_duration
|
|
@@ -187,7 +187,7 @@ class PVNetUKRegionalDataset(Dataset):
|
|
|
187
187
|
|
|
188
188
|
config = load_yaml_configuration(config_filename)
|
|
189
189
|
|
|
190
|
-
datasets_dict = get_dataset_dict(config)
|
|
190
|
+
datasets_dict = get_dataset_dict(config.input_data)
|
|
191
191
|
|
|
192
192
|
# Get t0 times where all input data is available
|
|
193
193
|
valid_t0_times = find_valid_t0_times(datasets_dict, config)
|
|
@@ -295,7 +295,7 @@ class PVNetUKConcurrentDataset(Dataset):
|
|
|
295
295
|
|
|
296
296
|
config = load_yaml_configuration(config_filename)
|
|
297
297
|
|
|
298
|
-
datasets_dict = get_dataset_dict(config)
|
|
298
|
+
datasets_dict = get_dataset_dict(config.input_data)
|
|
299
299
|
|
|
300
300
|
# Get t0 times where all input data is available
|
|
301
301
|
valid_t0_times = find_valid_t0_times(datasets_dict, config)
|
{ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/torch_datasets/datasets/site.py
RENAMED
|
@@ -47,7 +47,7 @@ class SitesDataset(Dataset):
|
|
|
47
47
|
"""
|
|
48
48
|
|
|
49
49
|
config: Configuration = load_yaml_configuration(config_filename)
|
|
50
|
-
datasets_dict = get_dataset_dict(config)
|
|
50
|
+
datasets_dict = get_dataset_dict(config.input_data)
|
|
51
51
|
|
|
52
52
|
# Assign config and input data to self
|
|
53
53
|
self.datasets_dict = datasets_dict
|