ocf-data-sampler 0.0.19__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (64) hide show
  1. ocf_data_sampler/config/__init__.py +5 -0
  2. ocf_data_sampler/config/load.py +33 -0
  3. ocf_data_sampler/config/model.py +246 -0
  4. ocf_data_sampler/config/save.py +73 -0
  5. ocf_data_sampler/constants.py +173 -0
  6. ocf_data_sampler/load/load_dataset.py +55 -0
  7. ocf_data_sampler/load/nwp/providers/ecmwf.py +5 -2
  8. ocf_data_sampler/load/site.py +30 -0
  9. ocf_data_sampler/numpy_sample/__init__.py +8 -0
  10. ocf_data_sampler/numpy_sample/collate.py +77 -0
  11. ocf_data_sampler/numpy_sample/gsp.py +34 -0
  12. ocf_data_sampler/numpy_sample/nwp.py +42 -0
  13. ocf_data_sampler/numpy_sample/satellite.py +30 -0
  14. ocf_data_sampler/numpy_sample/site.py +30 -0
  15. ocf_data_sampler/{numpy_batch → numpy_sample}/sun_position.py +9 -10
  16. ocf_data_sampler/select/__init__.py +8 -1
  17. ocf_data_sampler/select/dropout.py +4 -3
  18. ocf_data_sampler/select/find_contiguous_time_periods.py +40 -75
  19. ocf_data_sampler/select/geospatial.py +160 -0
  20. ocf_data_sampler/select/location.py +62 -0
  21. ocf_data_sampler/select/select_spatial_slice.py +13 -16
  22. ocf_data_sampler/select/select_time_slice.py +24 -33
  23. ocf_data_sampler/select/spatial_slice_for_dataset.py +53 -0
  24. ocf_data_sampler/select/time_slice_for_dataset.py +125 -0
  25. ocf_data_sampler/torch_datasets/__init__.py +2 -1
  26. ocf_data_sampler/torch_datasets/process_and_combine.py +131 -0
  27. ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +11 -425
  28. ocf_data_sampler/torch_datasets/site.py +405 -0
  29. ocf_data_sampler/torch_datasets/valid_time_periods.py +116 -0
  30. ocf_data_sampler/utils.py +10 -0
  31. ocf_data_sampler-0.0.42.dist-info/METADATA +153 -0
  32. ocf_data_sampler-0.0.42.dist-info/RECORD +71 -0
  33. {ocf_data_sampler-0.0.19.dist-info → ocf_data_sampler-0.0.42.dist-info}/WHEEL +1 -1
  34. {ocf_data_sampler-0.0.19.dist-info → ocf_data_sampler-0.0.42.dist-info}/top_level.txt +1 -0
  35. scripts/refactor_site.py +50 -0
  36. tests/config/test_config.py +161 -0
  37. tests/config/test_save.py +37 -0
  38. tests/conftest.py +86 -1
  39. tests/load/test_load_gsp.py +15 -0
  40. tests/load/test_load_nwp.py +21 -0
  41. tests/load/test_load_satellite.py +17 -0
  42. tests/load/test_load_sites.py +14 -0
  43. tests/numpy_sample/test_collate.py +26 -0
  44. tests/numpy_sample/test_gsp.py +38 -0
  45. tests/numpy_sample/test_nwp.py +52 -0
  46. tests/numpy_sample/test_satellite.py +40 -0
  47. tests/numpy_sample/test_sun_position.py +81 -0
  48. tests/select/test_dropout.py +75 -0
  49. tests/select/test_fill_time_periods.py +28 -0
  50. tests/select/test_find_contiguous_time_periods.py +202 -0
  51. tests/select/test_location.py +67 -0
  52. tests/select/test_select_spatial_slice.py +154 -0
  53. tests/select/test_select_time_slice.py +272 -0
  54. tests/torch_datasets/conftest.py +18 -0
  55. tests/torch_datasets/test_process_and_combine.py +126 -0
  56. tests/torch_datasets/test_pvnet_uk_regional.py +59 -0
  57. tests/torch_datasets/test_site.py +129 -0
  58. ocf_data_sampler/numpy_batch/__init__.py +0 -7
  59. ocf_data_sampler/numpy_batch/gsp.py +0 -20
  60. ocf_data_sampler/numpy_batch/nwp.py +0 -33
  61. ocf_data_sampler/numpy_batch/satellite.py +0 -23
  62. ocf_data_sampler-0.0.19.dist-info/METADATA +0 -22
  63. ocf_data_sampler-0.0.19.dist-info/RECORD +0 -32
  64. {ocf_data_sampler-0.0.19.dist-info → ocf_data_sampler-0.0.42.dist-info}/LICENSE +0 -0
@@ -0,0 +1,129 @@
1
+ import pandas as pd
2
+
3
+ from ocf_data_sampler.torch_datasets import SitesDataset
4
+ from ocf_data_sampler.torch_datasets.site import convert_from_dataset_to_dict_datasets
5
+ import numpy as np
6
+ from xarray import Dataset, DataArray
7
+
8
+
9
+ def test_site(site_config_filename):
10
+
11
+ # Create dataset object
12
+ dataset = SitesDataset(site_config_filename)
13
+
14
+ assert len(dataset) == 10 * 41
15
+ # TODO check 41
16
+
17
+ # Generate a sample
18
+ sample = dataset[0]
19
+
20
+ assert isinstance(sample, Dataset)
21
+
22
+ # Expected dimensions and data variables
23
+ expected_dims = {'satellite__x_geostationary', 'site__time_utc', 'nwp-ukv__target_time_utc',
24
+ 'nwp-ukv__x_osgb', 'satellite__channel', 'satellite__y_geostationary',
25
+ 'satellite__time_utc', 'nwp-ukv__channel', 'nwp-ukv__y_osgb'}
26
+ expected_data_vars = {"nwp-ukv", "satellite", "site"}
27
+
28
+ # Check dimensions
29
+ assert set(sample.dims) == expected_dims, f"Missing or extra dimensions: {set(sample.dims) ^ expected_dims}"
30
+ # Check data variables
31
+ assert set(sample.data_vars) == expected_data_vars, f"Missing or extra data variables: {set(sample.data_vars) ^ expected_data_vars}"
32
+
33
+ # check the shape of the data is correct
34
+ # 30 minutes of 5 minute data (inclusive), one channel, 2x2 pixels
35
+ assert sample["satellite"].values.shape == (7, 1, 2, 2)
36
+ # 3 hours of 60 minute data (inclusive), one channel, 2x2 pixels
37
+ assert sample["nwp-ukv"].values.shape == (4, 1, 2, 2)
38
+ # 1.5 hours of 30 minute data (inclusive)
39
+ assert sample["site"].values.shape == (4,)
40
+
41
+ def test_site_time_filter_start(site_config_filename):
42
+
43
+ # Create dataset object
44
+ dataset = SitesDataset(site_config_filename, start_time="2024-01-01")
45
+
46
+ assert len(dataset) == 0
47
+
48
+
49
+ def test_site_time_filter_end(site_config_filename):
50
+
51
+ # Create dataset object
52
+ dataset = SitesDataset(site_config_filename, end_time="2000-01-01")
53
+
54
+ assert len(dataset) == 0
55
+
56
+
57
+ def test_site_get_sample(site_config_filename):
58
+
59
+ # Create dataset object
60
+ dataset = SitesDataset(site_config_filename)
61
+
62
+ assert len(dataset) == 410
63
+ sample = dataset.get_sample(t0=pd.Timestamp("2023-01-01 12:00"), site_id=1)
64
+
65
+
66
+ def test_convert_from_dataset_to_dict_datasets(site_config_filename):
67
+ # Create dataset object
68
+ dataset = SitesDataset(site_config_filename)
69
+
70
+ # Generate two samples
71
+ sample_xr = dataset[0]
72
+
73
+ sample = convert_from_dataset_to_dict_datasets(sample_xr)
74
+
75
+ assert isinstance(sample, dict)
76
+
77
+ print(sample.keys())
78
+
79
+ for key in ["nwp", "satellite", "site"]:
80
+ assert key in sample
81
+
82
+ def test_process_and_combine_site_sample_dict(site_config_filename):
83
+ # Load config
84
+ # config = load_yaml_configuration(pvnet_config_filename)
85
+ site_ds = SitesDataset(site_config_filename)
86
+ # Specify minimal structure for testing
87
+ raw_nwp_values = np.random.rand(4, 1, 2, 2) # Single channel
88
+ fake_site_values = np.random.rand(197)
89
+ site_dict = {
90
+ "nwp": {
91
+ "ukv": DataArray(
92
+ raw_nwp_values,
93
+ dims=["time_utc", "channel", "y", "x"],
94
+ coords={
95
+ "time_utc": pd.date_range("2024-01-01 00:00", periods=4, freq="h"),
96
+ "channel": ["dswrf"], # Single channel
97
+ },
98
+ )
99
+ },
100
+ "site": DataArray(
101
+ fake_site_values,
102
+ dims=["time_utc"],
103
+ coords={
104
+ "time_utc": pd.date_range("2024-01-01 00:00", periods=197, freq="15min"),
105
+ "capacity_kwp": 1000,
106
+ "site_id": 1,
107
+ "longitude": -3.5,
108
+ "latitude": 51.5
109
+ }
110
+ )
111
+ }
112
+ print(f"Input site_dict: {site_dict}")
113
+
114
+ # Call function
115
+ result = site_ds.process_and_combine_site_sample_dict(site_dict)
116
+
117
+ # Assert to validate output structure
118
+ assert isinstance(result, Dataset), "Result should be an xarray.Dataset"
119
+ assert len(result.data_vars) > 0, "Dataset should contain data variables"
120
+
121
+ # Validate variable via assertion and shape of such
122
+ expected_variables = ["nwp-ukv", "site"]
123
+ for expected_variable in expected_variables:
124
+ assert expected_variable in result.data_vars, f"Expected variable '{expected_variable}' not found"
125
+
126
+ nwp_result = result["nwp-ukv"]
127
+ assert nwp_result.shape == (4, 1, 2, 2), f"Unexpected shape for nwp-ukv : {nwp_result.shape}"
128
+ site_result = result["site"]
129
+ assert site_result.shape == (197,), f"Unexpected shape for site: {site_result.shape}"
@@ -1,7 +0,0 @@
1
- """Conversion from Xarray to NumpyBatch"""
2
-
3
- from .gsp import convert_gsp_to_numpy_batch
4
- from .nwp import convert_nwp_to_numpy_batch
5
- from .satellite import convert_satellite_to_numpy_batch
6
- from .sun_position import make_sun_position_numpy_batch
7
-
@@ -1,20 +0,0 @@
1
- """Convert GSP to Numpy Batch"""
2
-
3
- import xarray as xr
4
- from ocf_datapipes.batch import BatchKey, NumpyBatch
5
-
6
-
7
- def convert_gsp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> NumpyBatch:
8
- """Convert from Xarray to NumpyBatch"""
9
-
10
- example: NumpyBatch = {
11
- BatchKey.gsp: da.values,
12
- BatchKey.gsp_nominal_capacity_mwp: da.isel(time_utc=0)["nominal_capacity_mwp"].values,
13
- BatchKey.gsp_effective_capacity_mwp: da.isel(time_utc=0)["effective_capacity_mwp"].values,
14
- BatchKey.gsp_time_utc: da["time_utc"].values.astype(float),
15
- }
16
-
17
- if t0_idx is not None:
18
- example[BatchKey.gsp_t0_idx] = t0_idx
19
-
20
- return example
@@ -1,33 +0,0 @@
1
- """Convert NWP to NumpyBatch"""
2
-
3
- import pandas as pd
4
- import xarray as xr
5
-
6
- from ocf_datapipes.batch import NWPBatchKey, NWPNumpyBatch
7
-
8
-
9
- def convert_nwp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> NWPNumpyBatch:
10
- """Convert from Xarray to NWP NumpyBatch"""
11
-
12
- example: NWPNumpyBatch = {
13
- NWPBatchKey.nwp: da.values,
14
- NWPBatchKey.nwp_channel_names: da.channel.values,
15
- NWPBatchKey.nwp_init_time_utc: da.init_time_utc.values.astype(float),
16
- NWPBatchKey.nwp_step: (da.step.values / pd.Timedelta("1h")).astype(int),
17
- }
18
-
19
- if "target_time_utc" in da.coords:
20
- example[NWPBatchKey.nwp_target_time_utc] = da.target_time_utc.values.astype(float)
21
-
22
- # TODO: Do we need this at all? Especially since it is only present in UKV data
23
- for batch_key, dataset_key in (
24
- (NWPBatchKey.nwp_y_osgb, "y_osgb"),
25
- (NWPBatchKey.nwp_x_osgb, "x_osgb"),
26
- ):
27
- if dataset_key in da.coords:
28
- example[batch_key] = da[dataset_key].values
29
-
30
- if t0_idx is not None:
31
- example[NWPBatchKey.nwp_t0_idx] = t0_idx
32
-
33
- return example
@@ -1,23 +0,0 @@
1
- """Convert Satellite to NumpyBatch"""
2
- import xarray as xr
3
-
4
- from ocf_datapipes.batch import BatchKey, NumpyBatch
5
-
6
-
7
- def convert_satellite_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> NumpyBatch:
8
- """Convert from Xarray to NumpyBatch"""
9
- example: NumpyBatch = {
10
- BatchKey.satellite_actual: da.values,
11
- BatchKey.satellite_time_utc: da.time_utc.values.astype(float),
12
- }
13
-
14
- for batch_key, dataset_key in (
15
- (BatchKey.satellite_x_geostationary, "x_geostationary"),
16
- (BatchKey.satellite_y_geostationary, "y_geostationary"),
17
- ):
18
- example[batch_key] = da[dataset_key].values
19
-
20
- if t0_idx is not None:
21
- example[BatchKey.satellite_t0_idx] = t0_idx
22
-
23
- return example
@@ -1,22 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: ocf_data_sampler
3
- Version: 0.0.19
4
- Summary: Sample from weather data for renewable energy prediction
5
- Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
- Author-email: info@openclimatefix.org
7
- License: MIT
8
- Description-Content-Type: text/markdown
9
- License-File: LICENSE
10
- Requires-Dist: numpy
11
- Requires-Dist: pandas
12
- Requires-Dist: xarray
13
- Requires-Dist: zarr
14
- Requires-Dist: dask
15
- Requires-Dist: ocf-blosc2
16
- Requires-Dist: ocf-datapipes ==3.3.39
17
- Requires-Dist: pvlib
18
-
19
- # OCF Data Sampler
20
- [![ease of contribution: easy](https://img.shields.io/badge/ease%20of%20contribution:%20easy-32bd50)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)
21
-
22
- A repo for sampling from weather data for renewable energy prediction
@@ -1,32 +0,0 @@
1
- ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
2
- ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
3
- ocf_data_sampler/load/__init__.py,sha256=MjgfxilTzyz1RYFoBEeAXmE9hyjknLvdmlHPmlAoiQY,44
4
- ocf_data_sampler/load/gsp.py,sha256=Gcr1JVUOPKhFRDCSHtfPDjxx0BtyyEhXrZvGEKLPJ5I,759
5
- ocf_data_sampler/load/satellite.py,sha256=3KlA1fx4SwxdzM-jC1WRaONXO0D6m0WxORnEnwUnZrA,2967
6
- ocf_data_sampler/load/utils.py,sha256=EQGvVWlGMoSOdbDYuMfVAa0v6wmAOPmHIAemdrTB5v4,1406
7
- ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
8
- ocf_data_sampler/load/nwp/nwp.py,sha256=O4QnajEZem8BvBgTcYYDBhRhgqPYuJkolHmpMRmrXEA,610
9
- ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=vW-p3vCyQ-CofKo555-gE7VDi5hlpjtjTLfHqWF0HEE,1175
11
- ocf_data_sampler/load/nwp/providers/ukv.py,sha256=79Bm7q-K_GJPYMy62SUIZbRWRF4-tIaB1dYPEgLD9vo,1207
12
- ocf_data_sampler/load/nwp/providers/utils.py,sha256=Sy2exG1wpXLLhMXYdsfR-DZMR3txG1_bBmBdchlc-yA,848
13
- ocf_data_sampler/numpy_batch/__init__.py,sha256=mrtqwbGik5Zc9MYP5byfCTBm08wMtS2XnTsypC4fPMo,245
14
- ocf_data_sampler/numpy_batch/gsp.py,sha256=3gwSj0k29JyA8_09zovB8f8Pr-dVhCuMSO1-k4QKAOg,668
15
- ocf_data_sampler/numpy_batch/nwp.py,sha256=Rv0yfDj902Z2oCwdlRjOs3Kh-F5Fgxjjylh99-lQ9ws,1105
16
- ocf_data_sampler/numpy_batch/satellite.py,sha256=e6eoNmiiHtzZbDVtBolFzDuE3qwhHN6bL9H86emAUsk,732
17
- ocf_data_sampler/numpy_batch/sun_position.py,sha256=UW6-WtjrKdCkcguolHUDSLhYFfarknQzzjlCX8YdEOM,1700
18
- ocf_data_sampler/select/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
19
- ocf_data_sampler/select/dropout.py,sha256=3aDqlL3U9kzzIkIHV44h5_ZbfOUSg1iChHKingXk7-s,1064
20
- ocf_data_sampler/select/fill_time_periods.py,sha256=iTtMjIPFYG5xtUYYedAFBLjTWWUa7t7WQ0-yksWf0-E,440
21
- ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=6ioB8LeFpFNBMgKDxrgG3zqzNjkBF_jlV9yye2ZYT2E,11925
22
- ocf_data_sampler/select/select_spatial_slice.py,sha256=7BSzOFPMSBWpBWXSajWTfI8luUVsSgh4zN-rkr-AuUs,11470
23
- ocf_data_sampler/select/select_time_slice.py,sha256=41cch1fQr59fZgv7UHsNGc3OvoynrixT3bmr3_1d7cU,6628
24
- ocf_data_sampler/torch_datasets/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
25
- ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=NGrq5e6NOX8npz_45io7gVlx6eYfI-AW0rxcQjSOBQE,19167
26
- tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- tests/conftest.py,sha256=OcArgF60paroZQqoP7xExRBF34nEyMuXd7dS7hD6p3w,5393
28
- ocf_data_sampler-0.0.19.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
29
- ocf_data_sampler-0.0.19.dist-info/METADATA,sha256=rmnF4WJbPaLoeBFmJ-ZWgwY4FpoxcND8ZvQHMrnv8b8,801
30
- ocf_data_sampler-0.0.19.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
31
- ocf_data_sampler-0.0.19.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
32
- ocf_data_sampler-0.0.19.dist-info/RECORD,,