ocf-data-sampler 0.0.19__py3-none-any.whl → 0.0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- ocf_data_sampler/config/__init__.py +5 -0
- ocf_data_sampler/config/load.py +33 -0
- ocf_data_sampler/config/model.py +246 -0
- ocf_data_sampler/config/save.py +73 -0
- ocf_data_sampler/constants.py +173 -0
- ocf_data_sampler/load/load_dataset.py +55 -0
- ocf_data_sampler/load/nwp/providers/ecmwf.py +5 -2
- ocf_data_sampler/load/site.py +30 -0
- ocf_data_sampler/numpy_sample/__init__.py +8 -0
- ocf_data_sampler/numpy_sample/collate.py +75 -0
- ocf_data_sampler/numpy_sample/gsp.py +34 -0
- ocf_data_sampler/numpy_sample/nwp.py +42 -0
- ocf_data_sampler/numpy_sample/satellite.py +30 -0
- ocf_data_sampler/numpy_sample/site.py +30 -0
- ocf_data_sampler/{numpy_batch → numpy_sample}/sun_position.py +9 -10
- ocf_data_sampler/select/__init__.py +8 -1
- ocf_data_sampler/select/dropout.py +4 -3
- ocf_data_sampler/select/find_contiguous_time_periods.py +40 -75
- ocf_data_sampler/select/geospatial.py +160 -0
- ocf_data_sampler/select/location.py +62 -0
- ocf_data_sampler/select/select_spatial_slice.py +13 -16
- ocf_data_sampler/select/select_time_slice.py +24 -33
- ocf_data_sampler/select/spatial_slice_for_dataset.py +53 -0
- ocf_data_sampler/select/time_slice_for_dataset.py +125 -0
- ocf_data_sampler/torch_datasets/__init__.py +2 -1
- ocf_data_sampler/torch_datasets/process_and_combine.py +131 -0
- ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +11 -425
- ocf_data_sampler/torch_datasets/site.py +405 -0
- ocf_data_sampler/torch_datasets/valid_time_periods.py +116 -0
- ocf_data_sampler/utils.py +10 -0
- ocf_data_sampler-0.0.43.dist-info/METADATA +154 -0
- ocf_data_sampler-0.0.43.dist-info/RECORD +71 -0
- {ocf_data_sampler-0.0.19.dist-info → ocf_data_sampler-0.0.43.dist-info}/WHEEL +1 -1
- {ocf_data_sampler-0.0.19.dist-info → ocf_data_sampler-0.0.43.dist-info}/top_level.txt +1 -0
- scripts/refactor_site.py +50 -0
- tests/config/test_config.py +161 -0
- tests/config/test_save.py +37 -0
- tests/conftest.py +86 -1
- tests/load/test_load_gsp.py +15 -0
- tests/load/test_load_nwp.py +21 -0
- tests/load/test_load_satellite.py +17 -0
- tests/load/test_load_sites.py +14 -0
- tests/numpy_sample/test_collate.py +26 -0
- tests/numpy_sample/test_gsp.py +38 -0
- tests/numpy_sample/test_nwp.py +52 -0
- tests/numpy_sample/test_satellite.py +40 -0
- tests/numpy_sample/test_sun_position.py +81 -0
- tests/select/test_dropout.py +75 -0
- tests/select/test_fill_time_periods.py +28 -0
- tests/select/test_find_contiguous_time_periods.py +202 -0
- tests/select/test_location.py +67 -0
- tests/select/test_select_spatial_slice.py +154 -0
- tests/select/test_select_time_slice.py +272 -0
- tests/torch_datasets/conftest.py +18 -0
- tests/torch_datasets/test_process_and_combine.py +126 -0
- tests/torch_datasets/test_pvnet_uk_regional.py +59 -0
- tests/torch_datasets/test_site.py +129 -0
- ocf_data_sampler/numpy_batch/__init__.py +0 -7
- ocf_data_sampler/numpy_batch/gsp.py +0 -20
- ocf_data_sampler/numpy_batch/nwp.py +0 -33
- ocf_data_sampler/numpy_batch/satellite.py +0 -23
- ocf_data_sampler-0.0.19.dist-info/METADATA +0 -22
- ocf_data_sampler-0.0.19.dist-info/RECORD +0 -32
- {ocf_data_sampler-0.0.19.dist-info → ocf_data_sampler-0.0.43.dist-info}/LICENSE +0 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from ocf_data_sampler.torch_datasets import SitesDataset
|
|
4
|
+
from ocf_data_sampler.torch_datasets.site import convert_from_dataset_to_dict_datasets
|
|
5
|
+
import numpy as np
|
|
6
|
+
from xarray import Dataset, DataArray
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_site(site_config_filename):
|
|
10
|
+
|
|
11
|
+
# Create dataset object
|
|
12
|
+
dataset = SitesDataset(site_config_filename)
|
|
13
|
+
|
|
14
|
+
assert len(dataset) == 10 * 41
|
|
15
|
+
# TODO check 41
|
|
16
|
+
|
|
17
|
+
# Generate a sample
|
|
18
|
+
sample = dataset[0]
|
|
19
|
+
|
|
20
|
+
assert isinstance(sample, Dataset)
|
|
21
|
+
|
|
22
|
+
# Expected dimensions and data variables
|
|
23
|
+
expected_dims = {'satellite__x_geostationary', 'site__time_utc', 'nwp-ukv__target_time_utc',
|
|
24
|
+
'nwp-ukv__x_osgb', 'satellite__channel', 'satellite__y_geostationary',
|
|
25
|
+
'satellite__time_utc', 'nwp-ukv__channel', 'nwp-ukv__y_osgb'}
|
|
26
|
+
expected_data_vars = {"nwp-ukv", "satellite", "site"}
|
|
27
|
+
|
|
28
|
+
# Check dimensions
|
|
29
|
+
assert set(sample.dims) == expected_dims, f"Missing or extra dimensions: {set(sample.dims) ^ expected_dims}"
|
|
30
|
+
# Check data variables
|
|
31
|
+
assert set(sample.data_vars) == expected_data_vars, f"Missing or extra data variables: {set(sample.data_vars) ^ expected_data_vars}"
|
|
32
|
+
|
|
33
|
+
# check the shape of the data is correct
|
|
34
|
+
# 30 minutes of 5 minute data (inclusive), one channel, 2x2 pixels
|
|
35
|
+
assert sample["satellite"].values.shape == (7, 1, 2, 2)
|
|
36
|
+
# 3 hours of 60 minute data (inclusive), one channel, 2x2 pixels
|
|
37
|
+
assert sample["nwp-ukv"].values.shape == (4, 1, 2, 2)
|
|
38
|
+
# 1.5 hours of 30 minute data (inclusive)
|
|
39
|
+
assert sample["site"].values.shape == (4,)
|
|
40
|
+
|
|
41
|
+
def test_site_time_filter_start(site_config_filename):
|
|
42
|
+
|
|
43
|
+
# Create dataset object
|
|
44
|
+
dataset = SitesDataset(site_config_filename, start_time="2024-01-01")
|
|
45
|
+
|
|
46
|
+
assert len(dataset) == 0
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_site_time_filter_end(site_config_filename):
|
|
50
|
+
|
|
51
|
+
# Create dataset object
|
|
52
|
+
dataset = SitesDataset(site_config_filename, end_time="2000-01-01")
|
|
53
|
+
|
|
54
|
+
assert len(dataset) == 0
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_site_get_sample(site_config_filename):
|
|
58
|
+
|
|
59
|
+
# Create dataset object
|
|
60
|
+
dataset = SitesDataset(site_config_filename)
|
|
61
|
+
|
|
62
|
+
assert len(dataset) == 410
|
|
63
|
+
sample = dataset.get_sample(t0=pd.Timestamp("2023-01-01 12:00"), site_id=1)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_convert_from_dataset_to_dict_datasets(site_config_filename):
|
|
67
|
+
# Create dataset object
|
|
68
|
+
dataset = SitesDataset(site_config_filename)
|
|
69
|
+
|
|
70
|
+
# Generate two samples
|
|
71
|
+
sample_xr = dataset[0]
|
|
72
|
+
|
|
73
|
+
sample = convert_from_dataset_to_dict_datasets(sample_xr)
|
|
74
|
+
|
|
75
|
+
assert isinstance(sample, dict)
|
|
76
|
+
|
|
77
|
+
print(sample.keys())
|
|
78
|
+
|
|
79
|
+
for key in ["nwp", "satellite", "site"]:
|
|
80
|
+
assert key in sample
|
|
81
|
+
|
|
82
|
+
def test_process_and_combine_site_sample_dict(site_config_filename):
|
|
83
|
+
# Load config
|
|
84
|
+
# config = load_yaml_configuration(pvnet_config_filename)
|
|
85
|
+
site_ds = SitesDataset(site_config_filename)
|
|
86
|
+
# Specify minimal structure for testing
|
|
87
|
+
raw_nwp_values = np.random.rand(4, 1, 2, 2) # Single channel
|
|
88
|
+
fake_site_values = np.random.rand(197)
|
|
89
|
+
site_dict = {
|
|
90
|
+
"nwp": {
|
|
91
|
+
"ukv": DataArray(
|
|
92
|
+
raw_nwp_values,
|
|
93
|
+
dims=["time_utc", "channel", "y", "x"],
|
|
94
|
+
coords={
|
|
95
|
+
"time_utc": pd.date_range("2024-01-01 00:00", periods=4, freq="h"),
|
|
96
|
+
"channel": ["dswrf"], # Single channel
|
|
97
|
+
},
|
|
98
|
+
)
|
|
99
|
+
},
|
|
100
|
+
"site": DataArray(
|
|
101
|
+
fake_site_values,
|
|
102
|
+
dims=["time_utc"],
|
|
103
|
+
coords={
|
|
104
|
+
"time_utc": pd.date_range("2024-01-01 00:00", periods=197, freq="15min"),
|
|
105
|
+
"capacity_kwp": 1000,
|
|
106
|
+
"site_id": 1,
|
|
107
|
+
"longitude": -3.5,
|
|
108
|
+
"latitude": 51.5
|
|
109
|
+
}
|
|
110
|
+
)
|
|
111
|
+
}
|
|
112
|
+
print(f"Input site_dict: {site_dict}")
|
|
113
|
+
|
|
114
|
+
# Call function
|
|
115
|
+
result = site_ds.process_and_combine_site_sample_dict(site_dict)
|
|
116
|
+
|
|
117
|
+
# Assert to validate output structure
|
|
118
|
+
assert isinstance(result, Dataset), "Result should be an xarray.Dataset"
|
|
119
|
+
assert len(result.data_vars) > 0, "Dataset should contain data variables"
|
|
120
|
+
|
|
121
|
+
# Validate variable via assertion and shape of such
|
|
122
|
+
expected_variables = ["nwp-ukv", "site"]
|
|
123
|
+
for expected_variable in expected_variables:
|
|
124
|
+
assert expected_variable in result.data_vars, f"Expected variable '{expected_variable}' not found"
|
|
125
|
+
|
|
126
|
+
nwp_result = result["nwp-ukv"]
|
|
127
|
+
assert nwp_result.shape == (4, 1, 2, 2), f"Unexpected shape for nwp-ukv : {nwp_result.shape}"
|
|
128
|
+
site_result = result["site"]
|
|
129
|
+
assert site_result.shape == (197,), f"Unexpected shape for site: {site_result.shape}"
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
"""Convert GSP to Numpy Batch"""
|
|
2
|
-
|
|
3
|
-
import xarray as xr
|
|
4
|
-
from ocf_datapipes.batch import BatchKey, NumpyBatch
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def convert_gsp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> NumpyBatch:
|
|
8
|
-
"""Convert from Xarray to NumpyBatch"""
|
|
9
|
-
|
|
10
|
-
example: NumpyBatch = {
|
|
11
|
-
BatchKey.gsp: da.values,
|
|
12
|
-
BatchKey.gsp_nominal_capacity_mwp: da.isel(time_utc=0)["nominal_capacity_mwp"].values,
|
|
13
|
-
BatchKey.gsp_effective_capacity_mwp: da.isel(time_utc=0)["effective_capacity_mwp"].values,
|
|
14
|
-
BatchKey.gsp_time_utc: da["time_utc"].values.astype(float),
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
if t0_idx is not None:
|
|
18
|
-
example[BatchKey.gsp_t0_idx] = t0_idx
|
|
19
|
-
|
|
20
|
-
return example
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
"""Convert NWP to NumpyBatch"""
|
|
2
|
-
|
|
3
|
-
import pandas as pd
|
|
4
|
-
import xarray as xr
|
|
5
|
-
|
|
6
|
-
from ocf_datapipes.batch import NWPBatchKey, NWPNumpyBatch
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def convert_nwp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> NWPNumpyBatch:
|
|
10
|
-
"""Convert from Xarray to NWP NumpyBatch"""
|
|
11
|
-
|
|
12
|
-
example: NWPNumpyBatch = {
|
|
13
|
-
NWPBatchKey.nwp: da.values,
|
|
14
|
-
NWPBatchKey.nwp_channel_names: da.channel.values,
|
|
15
|
-
NWPBatchKey.nwp_init_time_utc: da.init_time_utc.values.astype(float),
|
|
16
|
-
NWPBatchKey.nwp_step: (da.step.values / pd.Timedelta("1h")).astype(int),
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
if "target_time_utc" in da.coords:
|
|
20
|
-
example[NWPBatchKey.nwp_target_time_utc] = da.target_time_utc.values.astype(float)
|
|
21
|
-
|
|
22
|
-
# TODO: Do we need this at all? Especially since it is only present in UKV data
|
|
23
|
-
for batch_key, dataset_key in (
|
|
24
|
-
(NWPBatchKey.nwp_y_osgb, "y_osgb"),
|
|
25
|
-
(NWPBatchKey.nwp_x_osgb, "x_osgb"),
|
|
26
|
-
):
|
|
27
|
-
if dataset_key in da.coords:
|
|
28
|
-
example[batch_key] = da[dataset_key].values
|
|
29
|
-
|
|
30
|
-
if t0_idx is not None:
|
|
31
|
-
example[NWPBatchKey.nwp_t0_idx] = t0_idx
|
|
32
|
-
|
|
33
|
-
return example
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
"""Convert Satellite to NumpyBatch"""
|
|
2
|
-
import xarray as xr
|
|
3
|
-
|
|
4
|
-
from ocf_datapipes.batch import BatchKey, NumpyBatch
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def convert_satellite_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> NumpyBatch:
|
|
8
|
-
"""Convert from Xarray to NumpyBatch"""
|
|
9
|
-
example: NumpyBatch = {
|
|
10
|
-
BatchKey.satellite_actual: da.values,
|
|
11
|
-
BatchKey.satellite_time_utc: da.time_utc.values.astype(float),
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
for batch_key, dataset_key in (
|
|
15
|
-
(BatchKey.satellite_x_geostationary, "x_geostationary"),
|
|
16
|
-
(BatchKey.satellite_y_geostationary, "y_geostationary"),
|
|
17
|
-
):
|
|
18
|
-
example[batch_key] = da[dataset_key].values
|
|
19
|
-
|
|
20
|
-
if t0_idx is not None:
|
|
21
|
-
example[BatchKey.satellite_t0_idx] = t0_idx
|
|
22
|
-
|
|
23
|
-
return example
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: ocf_data_sampler
|
|
3
|
-
Version: 0.0.19
|
|
4
|
-
Summary: Sample from weather data for renewable energy prediction
|
|
5
|
-
Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
|
|
6
|
-
Author-email: info@openclimatefix.org
|
|
7
|
-
License: MIT
|
|
8
|
-
Description-Content-Type: text/markdown
|
|
9
|
-
License-File: LICENSE
|
|
10
|
-
Requires-Dist: numpy
|
|
11
|
-
Requires-Dist: pandas
|
|
12
|
-
Requires-Dist: xarray
|
|
13
|
-
Requires-Dist: zarr
|
|
14
|
-
Requires-Dist: dask
|
|
15
|
-
Requires-Dist: ocf-blosc2
|
|
16
|
-
Requires-Dist: ocf-datapipes ==3.3.39
|
|
17
|
-
Requires-Dist: pvlib
|
|
18
|
-
|
|
19
|
-
# OCF Data Sampler
|
|
20
|
-
[](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)
|
|
21
|
-
|
|
22
|
-
A repo for sampling from weather data for renewable energy prediction
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
2
|
-
ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
|
|
3
|
-
ocf_data_sampler/load/__init__.py,sha256=MjgfxilTzyz1RYFoBEeAXmE9hyjknLvdmlHPmlAoiQY,44
|
|
4
|
-
ocf_data_sampler/load/gsp.py,sha256=Gcr1JVUOPKhFRDCSHtfPDjxx0BtyyEhXrZvGEKLPJ5I,759
|
|
5
|
-
ocf_data_sampler/load/satellite.py,sha256=3KlA1fx4SwxdzM-jC1WRaONXO0D6m0WxORnEnwUnZrA,2967
|
|
6
|
-
ocf_data_sampler/load/utils.py,sha256=EQGvVWlGMoSOdbDYuMfVAa0v6wmAOPmHIAemdrTB5v4,1406
|
|
7
|
-
ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
|
|
8
|
-
ocf_data_sampler/load/nwp/nwp.py,sha256=O4QnajEZem8BvBgTcYYDBhRhgqPYuJkolHmpMRmrXEA,610
|
|
9
|
-
ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=vW-p3vCyQ-CofKo555-gE7VDi5hlpjtjTLfHqWF0HEE,1175
|
|
11
|
-
ocf_data_sampler/load/nwp/providers/ukv.py,sha256=79Bm7q-K_GJPYMy62SUIZbRWRF4-tIaB1dYPEgLD9vo,1207
|
|
12
|
-
ocf_data_sampler/load/nwp/providers/utils.py,sha256=Sy2exG1wpXLLhMXYdsfR-DZMR3txG1_bBmBdchlc-yA,848
|
|
13
|
-
ocf_data_sampler/numpy_batch/__init__.py,sha256=mrtqwbGik5Zc9MYP5byfCTBm08wMtS2XnTsypC4fPMo,245
|
|
14
|
-
ocf_data_sampler/numpy_batch/gsp.py,sha256=3gwSj0k29JyA8_09zovB8f8Pr-dVhCuMSO1-k4QKAOg,668
|
|
15
|
-
ocf_data_sampler/numpy_batch/nwp.py,sha256=Rv0yfDj902Z2oCwdlRjOs3Kh-F5Fgxjjylh99-lQ9ws,1105
|
|
16
|
-
ocf_data_sampler/numpy_batch/satellite.py,sha256=e6eoNmiiHtzZbDVtBolFzDuE3qwhHN6bL9H86emAUsk,732
|
|
17
|
-
ocf_data_sampler/numpy_batch/sun_position.py,sha256=UW6-WtjrKdCkcguolHUDSLhYFfarknQzzjlCX8YdEOM,1700
|
|
18
|
-
ocf_data_sampler/select/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
19
|
-
ocf_data_sampler/select/dropout.py,sha256=3aDqlL3U9kzzIkIHV44h5_ZbfOUSg1iChHKingXk7-s,1064
|
|
20
|
-
ocf_data_sampler/select/fill_time_periods.py,sha256=iTtMjIPFYG5xtUYYedAFBLjTWWUa7t7WQ0-yksWf0-E,440
|
|
21
|
-
ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=6ioB8LeFpFNBMgKDxrgG3zqzNjkBF_jlV9yye2ZYT2E,11925
|
|
22
|
-
ocf_data_sampler/select/select_spatial_slice.py,sha256=7BSzOFPMSBWpBWXSajWTfI8luUVsSgh4zN-rkr-AuUs,11470
|
|
23
|
-
ocf_data_sampler/select/select_time_slice.py,sha256=41cch1fQr59fZgv7UHsNGc3OvoynrixT3bmr3_1d7cU,6628
|
|
24
|
-
ocf_data_sampler/torch_datasets/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
25
|
-
ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=NGrq5e6NOX8npz_45io7gVlx6eYfI-AW0rxcQjSOBQE,19167
|
|
26
|
-
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
tests/conftest.py,sha256=OcArgF60paroZQqoP7xExRBF34nEyMuXd7dS7hD6p3w,5393
|
|
28
|
-
ocf_data_sampler-0.0.19.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
|
|
29
|
-
ocf_data_sampler-0.0.19.dist-info/METADATA,sha256=rmnF4WJbPaLoeBFmJ-ZWgwY4FpoxcND8ZvQHMrnv8b8,801
|
|
30
|
-
ocf_data_sampler-0.0.19.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
31
|
-
ocf_data_sampler-0.0.19.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
|
|
32
|
-
ocf_data_sampler-0.0.19.dist-info/RECORD,,
|
|
File without changes
|