ocf-data-sampler 0.0.21__tar.gz → 0.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (62) hide show
  1. {ocf_data_sampler-0.0.21/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.22}/PKG-INFO +7 -2
  2. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/config/model.py +1 -1
  3. ocf_data_sampler-0.0.22/ocf_data_sampler/constants.py +135 -0
  4. ocf_data_sampler-0.0.22/ocf_data_sampler/numpy_batch/gsp.py +33 -0
  5. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/numpy_batch/nwp.py +13 -3
  6. ocf_data_sampler-0.0.22/ocf_data_sampler/numpy_batch/satellite.py +30 -0
  7. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/numpy_batch/sun_position.py +5 -6
  8. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +15 -18
  9. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22/ocf_data_sampler.egg-info}/PKG-INFO +7 -2
  10. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler.egg-info/SOURCES.txt +1 -0
  11. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler.egg-info/requires.txt +6 -1
  12. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/pyproject.toml +8 -3
  13. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/numpy_batch/test_gsp.py +2 -3
  14. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/numpy_batch/test_nwp.py +1 -1
  15. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/numpy_batch/test_satellite.py +2 -2
  16. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/numpy_batch/test_sun_position.py +8 -8
  17. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/select/test_select_spatial_slice.py +1 -1
  18. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/torch_datasets/test_pvnet_uk_regional.py +11 -9
  19. ocf_data_sampler-0.0.21/ocf_data_sampler/numpy_batch/gsp.py +0 -20
  20. ocf_data_sampler-0.0.21/ocf_data_sampler/numpy_batch/satellite.py +0 -23
  21. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/LICENSE +0 -0
  22. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/MANIFEST.in +0 -0
  23. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/README.md +0 -0
  24. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/__init__.py +0 -0
  25. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/config/__init__.py +0 -0
  26. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/config/load.py +0 -0
  27. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/config/save.py +0 -0
  28. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  29. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/__init__.py +0 -0
  30. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/gsp.py +0 -0
  31. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  32. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  33. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  34. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  35. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  36. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  37. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/satellite.py +0 -0
  38. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/utils.py +0 -0
  39. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/numpy_batch/__init__.py +0 -0
  40. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/__init__.py +0 -0
  41. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/dropout.py +0 -0
  42. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  43. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  44. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/geospatial.py +0 -0
  45. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/location.py +0 -0
  46. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  47. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/select_time_slice.py +0 -0
  48. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler/torch_datasets/__init__.py +0 -0
  49. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  50. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  51. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/setup.cfg +0 -0
  52. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/__init__.py +0 -0
  53. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/config/test_config.py +0 -0
  54. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/conftest.py +0 -0
  55. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/load/test_load_gsp.py +0 -0
  56. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/load/test_load_nwp.py +0 -0
  57. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/load/test_load_satellite.py +0 -0
  58. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/select/test_dropout.py +0 -0
  59. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/select/test_fill_time_periods.py +0 -0
  60. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/select/test_find_contiguous_time_periods.py +0 -0
  61. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/select/test_location.py +0 -0
  62. {ocf_data_sampler-0.0.21 → ocf_data_sampler-0.0.22}/tests/select/test_select_time_slice.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.21
3
+ Version: 0.0.22
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -36,14 +36,19 @@ Classifier: Operating System :: POSIX :: Linux
36
36
  Requires-Python: >=3.8
37
37
  Description-Content-Type: text/markdown
38
38
  License-File: LICENSE
39
+ Requires-Dist: torch
39
40
  Requires-Dist: numpy
40
41
  Requires-Dist: pandas
41
42
  Requires-Dist: xarray
42
43
  Requires-Dist: zarr
43
44
  Requires-Dist: dask
44
45
  Requires-Dist: ocf_blosc2
45
- Requires-Dist: ocf_datapipes==3.3.39
46
46
  Requires-Dist: pvlib
47
+ Requires-Dist: pydantic
48
+ Requires-Dist: pyproj
49
+ Requires-Dist: pathy
50
+ Requires-Dist: pyaml_env
51
+ Requires-Dist: pyresample
47
52
  Provides-Extra: docs
48
53
  Requires-Dist: mkdocs>=1.2; extra == "docs"
49
54
  Requires-Dist: mkdocs-material>=8.0; extra == "docs"
@@ -15,7 +15,7 @@ from typing import Dict, List, Optional
15
15
  from typing_extensions import Self
16
16
 
17
17
  from pydantic import BaseModel, Field, RootModel, field_validator, ValidationInfo, model_validator
18
- from ocf_datapipes.utils.consts import NWP_PROVIDERS
18
+ from ocf_data_sampler.constants import NWP_PROVIDERS
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
@@ -0,0 +1,135 @@
1
+ import xarray as xr
2
+ import numpy as np
3
+
4
+
5
+ NWP_PROVIDERS = [
6
+ "ukv",
7
+ "ecmwf",
8
+ ]
9
+
10
+
11
+ def _to_data_array(d):
12
+ return xr.DataArray(
13
+ [d[k] for k in d.keys()],
14
+ coords={"channel": [k for k in d.keys()]},
15
+ ).astype(np.float32)
16
+
17
+
18
+ class NWPStatDict(dict):
19
+ """Custom dictionary class to hold NWP normalization stats"""
20
+
21
+ def __getitem__(self, key):
22
+ if key not in NWP_PROVIDERS:
23
+ raise KeyError(f"{key} is not a supported NWP provider - {NWP_PROVIDERS}")
24
+ elif key in self.keys():
25
+ return super().__getitem__(key)
26
+ else:
27
+ raise KeyError(
28
+ f"Values for {key} not yet available in ocf-data-sampler {list(self.keys())}"
29
+ )
30
+
31
+ # ------ UKV
32
+ # Means and std computed WITH version_7 and higher, MetOffice values
33
+ UKV_STD = {
34
+ "cdcb": 2126.99350113,
35
+ "lcc": 39.33210726,
36
+ "mcc": 41.91144559,
37
+ "hcc": 38.07184418,
38
+ "sde": 0.1029753,
39
+ "hcct": 18382.63958991,
40
+ "dswrf": 190.47216887,
41
+ "dlwrf": 39.45988077,
42
+ "h": 1075.77812282,
43
+ "t": 4.38818501,
44
+ "r": 11.45012499,
45
+ "dpt": 4.57250482,
46
+ "vis": 21578.97975625,
47
+ "si10": 3.94718813,
48
+ "wdir10": 94.08407495,
49
+ "prmsl": 1252.71790539,
50
+ "prate": 0.00021497,
51
+ }
52
+ UKV_MEAN = {
53
+ "cdcb": 1412.26599062,
54
+ "lcc": 50.08362643,
55
+ "mcc": 40.88984494,
56
+ "hcc": 29.11949682,
57
+ "sde": 0.00289545,
58
+ "hcct": -18345.97478167,
59
+ "dswrf": 111.28265039,
60
+ "dlwrf": 325.03130139,
61
+ "h": 2096.51991356,
62
+ "t": 283.64913206,
63
+ "r": 81.79229501,
64
+ "dpt": 280.54379901,
65
+ "vis": 32262.03285118,
66
+ "si10": 6.88348448,
67
+ "wdir10": 199.41891636,
68
+ "prmsl": 101321.61574029,
69
+ "prate": 3.45793433e-05,
70
+ }
71
+
72
+ UKV_STD = _to_data_array(UKV_STD)
73
+ UKV_MEAN = _to_data_array(UKV_MEAN)
74
+
75
+ # ------ ECMWF
76
+ # These were calculated from 100 random init times of UK data from 2020-2023
77
+ ECMWF_STD = {
78
+ "dlwrf": 15855867.0,
79
+ "dswrf": 13025427.0,
80
+ "duvrs": 1445635.25,
81
+ "hcc": 0.42244860529899597,
82
+ "lcc": 0.3791404366493225,
83
+ "mcc": 0.38039860129356384,
84
+ "prate": 9.81039775069803e-05,
85
+ "sde": 0.000913831521756947,
86
+ "sr": 16294988.0,
87
+ "t2m": 3.692270040512085,
88
+ "tcc": 0.37487083673477173,
89
+ "u10": 5.531515598297119,
90
+ "u100": 7.2320556640625,
91
+ "u200": 8.049470901489258,
92
+ "v10": 5.411230564117432,
93
+ "v100": 6.944501876831055,
94
+ "v200": 7.561611652374268,
95
+ "diff_dlwrf": 131942.03125,
96
+ "diff_dswrf": 715366.3125,
97
+ "diff_duvrs": 81605.25,
98
+ "diff_sr": 818950.6875,
99
+ }
100
+ ECMWF_MEAN = {
101
+ "dlwrf": 27187026.0,
102
+ "dswrf": 11458988.0,
103
+ "duvrs": 1305651.25,
104
+ "hcc": 0.3961029052734375,
105
+ "lcc": 0.44901806116104126,
106
+ "mcc": 0.3288780450820923,
107
+ "prate": 3.108070450252853e-05,
108
+ "sde": 8.107526082312688e-05,
109
+ "sr": 12905302.0,
110
+ "t2m": 283.48333740234375,
111
+ "tcc": 0.7049227356910706,
112
+ "u10": 1.7677178382873535,
113
+ "u100": 2.393547296524048,
114
+ "u200": 2.7963004112243652,
115
+ "v10": 0.985887885093689,
116
+ "v100": 1.4244288206100464,
117
+ "v200": 1.6010299921035767,
118
+ "diff_dlwrf": 1136464.0,
119
+ "diff_dswrf": 420584.6875,
120
+ "diff_duvrs": 48265.4765625,
121
+ "diff_sr": 469169.5,
122
+ }
123
+
124
+ ECMWF_STD = _to_data_array(ECMWF_STD)
125
+ ECMWF_MEAN = _to_data_array(ECMWF_MEAN)
126
+
127
+ NWP_STDS = NWPStatDict(
128
+ ukv=UKV_STD,
129
+ ecmwf=ECMWF_STD,
130
+ )
131
+ NWP_MEANS = NWPStatDict(
132
+ ukv=UKV_MEAN,
133
+ ecmwf=ECMWF_MEAN,
134
+ )
135
+
@@ -0,0 +1,33 @@
1
+ """Convert GSP to Numpy Batch"""
2
+
3
+ import xarray as xr
4
+
5
+
6
+ class GSPBatchKey:
7
+
8
+ gsp = 'gsp'
9
+ gsp_nominal_capacity_mwp = 'gsp_nominal_capacity_mwp'
10
+ gsp_effective_capacity_mwp = 'gsp_effective_capacity_mwp'
11
+ gsp_time_utc = 'gsp_time_utc'
12
+ gsp_t0_idx = 'gsp_t0_idx'
13
+ gsp_solar_azimuth = 'gsp_solar_azimuth'
14
+ gsp_solar_elevation = 'gsp_solar_elevation'
15
+ gsp_id = 'gsp_id'
16
+ gsp_x_osgb = 'gsp_x_osgb'
17
+ gsp_y_osgb = 'gsp_y_osgb'
18
+
19
+
20
+ def convert_gsp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> dict:
21
+ """Convert from Xarray to NumpyBatch"""
22
+
23
+ example = {
24
+ GSPBatchKey.gsp: da.values,
25
+ GSPBatchKey.gsp_nominal_capacity_mwp: da.isel(time_utc=0)["nominal_capacity_mwp"].values,
26
+ GSPBatchKey.gsp_effective_capacity_mwp: da.isel(time_utc=0)["effective_capacity_mwp"].values,
27
+ GSPBatchKey.gsp_time_utc: da["time_utc"].values.astype(float),
28
+ }
29
+
30
+ if t0_idx is not None:
31
+ example[GSPBatchKey.gsp_t0_idx] = t0_idx
32
+
33
+ return example
@@ -3,13 +3,23 @@
3
3
  import pandas as pd
4
4
  import xarray as xr
5
5
 
6
- from ocf_datapipes.batch import NWPBatchKey, NWPNumpyBatch
7
6
 
7
+ class NWPBatchKey:
8
8
 
9
- def convert_nwp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> NWPNumpyBatch:
9
+ nwp = 'nwp'
10
+ nwp_channel_names = 'nwp_channel_names'
11
+ nwp_init_time_utc = 'nwp_init_time_utc'
12
+ nwp_step = 'nwp_step'
13
+ nwp_target_time_utc = 'nwp_target_time_utc'
14
+ nwp_t0_idx = 'nwp_t0_idx'
15
+ nwp_y_osgb = 'nwp_y_osgb'
16
+ nwp_x_osgb = 'nwp_x_osgb'
17
+
18
+
19
+ def convert_nwp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> dict:
10
20
  """Convert from Xarray to NWP NumpyBatch"""
11
21
 
12
- example: NWPNumpyBatch = {
22
+ example = {
13
23
  NWPBatchKey.nwp: da.values,
14
24
  NWPBatchKey.nwp_channel_names: da.channel.values,
15
25
  NWPBatchKey.nwp_init_time_utc: da.init_time_utc.values.astype(float),
@@ -0,0 +1,30 @@
1
+ """Convert Satellite to NumpyBatch"""
2
+ import xarray as xr
3
+
4
+
5
+ class SatelliteBatchKey:
6
+
7
+ satellite_actual = 'satellite_actual'
8
+ satellite_time_utc = 'satellite_time_utc'
9
+ satellite_x_geostationary = 'satellite_x_geostationary'
10
+ satellite_y_geostationary = 'satellite_y_geostationary'
11
+ satellite_t0_idx = 'satellite_t0_idx'
12
+
13
+
14
+ def convert_satellite_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> dict:
15
+ """Convert from Xarray to NumpyBatch"""
16
+ example = {
17
+ SatelliteBatchKey.satellite_actual: da.values,
18
+ SatelliteBatchKey.satellite_time_utc: da.time_utc.values.astype(float),
19
+ }
20
+
21
+ for batch_key, dataset_key in (
22
+ (SatelliteBatchKey.satellite_x_geostationary, "x_geostationary"),
23
+ (SatelliteBatchKey.satellite_y_geostationary, "y_geostationary"),
24
+ ):
25
+ example[batch_key] = da[dataset_key].values
26
+
27
+ if t0_idx is not None:
28
+ example[SatelliteBatchKey.satellite_t0_idx] = t0_idx
29
+
30
+ return example
@@ -2,7 +2,6 @@
2
2
  import pvlib
3
3
  import numpy as np
4
4
  import pandas as pd
5
- from ocf_datapipes.batch import BatchKey, NumpyBatch
6
5
 
7
6
 
8
7
  def calculate_azimuth_and_elevation(
@@ -37,8 +36,8 @@ def make_sun_position_numpy_batch(
37
36
  datetimes: pd.DatetimeIndex,
38
37
  lon: float,
39
38
  lat: float,
40
- key_preffix: str = "gsp"
41
- ) -> NumpyBatch:
39
+ key_prefix: str = "gsp"
40
+ ) -> dict:
42
41
  """Creates NumpyBatch with standardized solar coordinates
43
42
 
44
43
  Args:
@@ -58,9 +57,9 @@ def make_sun_position_numpy_batch(
58
57
  elevation = elevation / 180 + 0.5
59
58
 
60
59
  # Make NumpyBatch
61
- sun_numpy_batch: NumpyBatch = {
62
- BatchKey[key_preffix + "_solar_azimuth"]: azimuth,
63
- BatchKey[key_preffix + "_solar_elevation"]: elevation,
60
+ sun_numpy_batch = {
61
+ key_prefix + "_solar_azimuth": azimuth,
62
+ key_prefix + "_solar_elevation": elevation,
64
63
  }
65
64
 
66
65
  return sun_numpy_batch
@@ -28,17 +28,14 @@ from ocf_data_sampler.numpy_batch import (
28
28
 
29
29
 
30
30
  from ocf_data_sampler.config import Configuration, load_yaml_configuration
31
- from ocf_datapipes.batch import BatchKey, NumpyBatch
31
+ from ocf_data_sampler.numpy_batch.nwp import NWPBatchKey
32
+ from ocf_data_sampler.numpy_batch.gsp import GSPBatchKey
32
33
 
33
- from ocf_datapipes.utils.location import Location
34
- from ocf_datapipes.utils.geospatial import osgb_to_lon_lat
34
+ from ocf_data_sampler.select.location import Location
35
+ from ocf_data_sampler.select.geospatial import osgb_to_lon_lat
35
36
 
36
- from ocf_datapipes.utils.consts import (
37
- NWP_MEANS,
38
- NWP_STDS,
39
- )
37
+ from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS
40
38
 
41
- from ocf_datapipes.training.common import concat_xr_time_utc, normalize_gsp
42
39
 
43
40
 
44
41
 
@@ -343,7 +340,7 @@ def slice_datasets_by_time(
343
340
  return sliced_datasets_dict
344
341
 
345
342
 
346
- def fill_nans_in_arrays(batch: NumpyBatch) -> NumpyBatch:
343
+ def fill_nans_in_arrays(batch: dict) -> dict:
347
344
  """Fills all NaN values in each np.ndarray in the batch dictionary with zeros.
348
345
 
349
346
  Operation is performed in-place on the batch.
@@ -375,7 +372,7 @@ def process_and_combine_datasets(
375
372
  config: Configuration,
376
373
  t0: pd.Timedelta,
377
374
  location: Location,
378
- ) -> NumpyBatch:
375
+ ) -> dict:
379
376
  """Normalize and convert data to numpy arrays"""
380
377
 
381
378
  numpy_modalities = []
@@ -392,7 +389,7 @@ def process_and_combine_datasets(
392
389
  nwp_numpy_modalities[nwp_key] = convert_nwp_to_numpy_batch(da_nwp)
393
390
 
394
391
  # Combine the NWPs into NumpyBatch
395
- numpy_modalities.append({BatchKey.nwp: nwp_numpy_modalities})
392
+ numpy_modalities.append({NWPBatchKey.nwp: nwp_numpy_modalities})
396
393
 
397
394
  if "sat" in dataset_dict:
398
395
  # Satellite is already in the range [0-1] so no need to standardise
@@ -404,8 +401,8 @@ def process_and_combine_datasets(
404
401
  gsp_config = config.input_data.gsp
405
402
 
406
403
  if "gsp" in dataset_dict:
407
- da_gsp = concat_xr_time_utc([dataset_dict["gsp"], dataset_dict["gsp_future"]])
408
- da_gsp = normalize_gsp(da_gsp)
404
+ da_gsp = xr.concat([dataset_dict["gsp"], dataset_dict["gsp_future"]], dim="time_utc")
405
+ da_gsp = da_gsp / da_gsp.effective_capacity_mwp
409
406
 
410
407
  numpy_modalities.append(
411
408
  convert_gsp_to_numpy_batch(
@@ -428,9 +425,9 @@ def process_and_combine_datasets(
428
425
  # Add coordinate data
429
426
  # TODO: Do we need all of these?
430
427
  numpy_modalities.append({
431
- BatchKey.gsp_id: location.id,
432
- BatchKey.gsp_x_osgb: location.x,
433
- BatchKey.gsp_y_osgb: location.y,
428
+ GSPBatchKey.gsp_id: location.id,
429
+ GSPBatchKey.gsp_x_osgb: location.x,
430
+ GSPBatchKey.gsp_y_osgb: location.y,
434
431
  })
435
432
 
436
433
  # Combine all the modalities and fill NaNs
@@ -538,7 +535,7 @@ class PVNetUKRegionalDataset(Dataset):
538
535
  return len(self.index_pairs)
539
536
 
540
537
 
541
- def _get_sample(self, t0: pd.Timestamp, location: Location) -> NumpyBatch:
538
+ def _get_sample(self, t0: pd.Timestamp, location: Location) -> dict:
542
539
  """Generate the PVNet sample for given coordinates
543
540
 
544
541
  Args:
@@ -565,7 +562,7 @@ class PVNetUKRegionalDataset(Dataset):
565
562
  return self._get_sample(t0, location)
566
563
 
567
564
 
568
- def get_sample(self, t0: pd.Timestamp, gsp_id: int) -> NumpyBatch:
565
+ def get_sample(self, t0: pd.Timestamp, gsp_id: int) -> dict:
569
566
  """Generate a sample for the given coordinates.
570
567
 
571
568
  Useful for users to generate samples by GSP ID.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.21
3
+ Version: 0.0.22
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -36,14 +36,19 @@ Classifier: Operating System :: POSIX :: Linux
36
36
  Requires-Python: >=3.8
37
37
  Description-Content-Type: text/markdown
38
38
  License-File: LICENSE
39
+ Requires-Dist: torch
39
40
  Requires-Dist: numpy
40
41
  Requires-Dist: pandas
41
42
  Requires-Dist: xarray
42
43
  Requires-Dist: zarr
43
44
  Requires-Dist: dask
44
45
  Requires-Dist: ocf_blosc2
45
- Requires-Dist: ocf_datapipes==3.3.39
46
46
  Requires-Dist: pvlib
47
+ Requires-Dist: pydantic
48
+ Requires-Dist: pyproj
49
+ Requires-Dist: pathy
50
+ Requires-Dist: pyaml_env
51
+ Requires-Dist: pyresample
47
52
  Provides-Extra: docs
48
53
  Requires-Dist: mkdocs>=1.2; extra == "docs"
49
54
  Requires-Dist: mkdocs-material>=8.0; extra == "docs"
@@ -3,6 +3,7 @@ MANIFEST.in
3
3
  README.md
4
4
  pyproject.toml
5
5
  ocf_data_sampler/__init__.py
6
+ ocf_data_sampler/constants.py
6
7
  ocf_data_sampler.egg-info/PKG-INFO
7
8
  ocf_data_sampler.egg-info/SOURCES.txt
8
9
  ocf_data_sampler.egg-info/dependency_links.txt
@@ -1,11 +1,16 @@
1
+ torch
1
2
  numpy
2
3
  pandas
3
4
  xarray
4
5
  zarr
5
6
  dask
6
7
  ocf_blosc2
7
- ocf_datapipes==3.3.39
8
8
  pvlib
9
+ pydantic
10
+ pyproj
11
+ pathy
12
+ pyaml_env
13
+ pyresample
9
14
 
10
15
  [docs]
11
16
  mkdocs>=1.2
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ocf_data_sampler"
7
- version = "0.0.21"
7
+ version = "0.0.22"
8
8
  license = { file = "LICENSE" }
9
9
  readme = "README.md"
10
10
  description = "Sample from weather data for renewable energy prediction"
@@ -18,14 +18,19 @@ maintainers = [
18
18
  ]
19
19
 
20
20
  dependencies = [ # Migration from requirements.txt
21
+ "torch",
21
22
  "numpy",
22
23
  "pandas",
23
24
  "xarray",
24
25
  "zarr",
25
26
  "dask",
26
27
  "ocf_blosc2",
27
- "ocf_datapipes==3.3.39",
28
- "pvlib"
28
+ "pvlib",
29
+ "pydantic",
30
+ "pyproj",
31
+ "pathy",
32
+ "pyaml_env",
33
+ "pyresample"
29
34
  ]
30
35
 
31
36
  keywords = [ # I've added some keywords, but please provide feedback if you'd like them changed!
@@ -1,8 +1,7 @@
1
- from ocf_datapipes.batch import BatchKey
2
1
  from ocf_data_sampler.load.gsp import open_gsp
3
2
 
4
3
  from ocf_data_sampler.numpy_batch import convert_gsp_to_numpy_batch
5
-
4
+ from ocf_data_sampler.numpy_batch.gsp import GSPBatchKey
6
5
 
7
6
  def test_convert_gsp_to_numpy_batch(uk_gsp_zarr_path):
8
7
 
@@ -19,5 +18,5 @@ def test_convert_gsp_to_numpy_batch(uk_gsp_zarr_path):
19
18
  assert isinstance(numpy_batch, dict)
20
19
 
21
20
  # Assert the shape of the numpy batch
22
- assert (numpy_batch[BatchKey.gsp] == da.values).all()
21
+ assert (numpy_batch[GSPBatchKey.gsp] == da.values).all()
23
22
 
@@ -6,7 +6,7 @@ import pytest
6
6
 
7
7
  from ocf_data_sampler.numpy_batch import convert_nwp_to_numpy_batch
8
8
 
9
- from ocf_datapipes.batch import NWPBatchKey
9
+ from ocf_data_sampler.numpy_batch.nwp import NWPBatchKey
10
10
 
11
11
  @pytest.fixture(scope="module")
12
12
  def da_nwp_like():
@@ -7,7 +7,7 @@ import pytest
7
7
 
8
8
  from ocf_data_sampler.numpy_batch import convert_satellite_to_numpy_batch
9
9
 
10
- from ocf_datapipes.batch import BatchKey
10
+ from ocf_data_sampler.numpy_batch.satellite import SatelliteBatchKey
11
11
 
12
12
 
13
13
  @pytest.fixture(scope="module")
@@ -39,4 +39,4 @@ def test_convert_satellite_to_numpy_batch(da_sat_like):
39
39
  assert isinstance(numpy_batch, dict)
40
40
 
41
41
  # Assert the shape of the numpy batch
42
- assert (numpy_batch[BatchKey.satellite_actual] == da_sat_like.values).all()
42
+ assert (numpy_batch[SatelliteBatchKey.satellite_actual] == da_sat_like.values).all()
@@ -6,7 +6,7 @@ from ocf_data_sampler.numpy_batch.sun_position import (
6
6
  calculate_azimuth_and_elevation, make_sun_position_numpy_batch
7
7
  )
8
8
 
9
- from ocf_datapipes.batch import NumpyBatch, BatchKey
9
+ from ocf_data_sampler.numpy_batch.gsp import GSPBatchKey
10
10
 
11
11
 
12
12
  @pytest.mark.parametrize("lat", [0, 5, 10, 23.5])
@@ -69,13 +69,13 @@ def test_make_sun_position_numpy_batch():
69
69
  datetimes = pd.date_range("2024-06-20 12:00", "2024-06-20 16:00", freq="30min")
70
70
  lon, lat = 0, 51.5
71
71
 
72
- batch = make_sun_position_numpy_batch(datetimes, lon, lat, key_preffix="gsp")
72
+ batch = make_sun_position_numpy_batch(datetimes, lon, lat, key_prefix="gsp")
73
73
 
74
- assert BatchKey.gsp_solar_elevation in batch
75
- assert BatchKey.gsp_solar_azimuth in batch
74
+ assert GSPBatchKey.gsp_solar_elevation in batch
75
+ assert GSPBatchKey.gsp_solar_azimuth in batch
76
76
 
77
77
  # The solar coords are normalised in the function
78
- assert (batch[BatchKey.gsp_solar_elevation]>=0).all()
79
- assert (batch[BatchKey.gsp_solar_elevation]<=1).all()
80
- assert (batch[BatchKey.gsp_solar_azimuth]>=0).all()
81
- assert (batch[BatchKey.gsp_solar_azimuth]<=1).all()
78
+ assert (batch[GSPBatchKey.gsp_solar_elevation]>=0).all()
79
+ assert (batch[GSPBatchKey.gsp_solar_elevation]<=1).all()
80
+ assert (batch[GSPBatchKey.gsp_solar_azimuth]>=0).all()
81
+ assert (batch[GSPBatchKey.gsp_solar_azimuth]<=1).all()
@@ -1,6 +1,6 @@
1
1
  import numpy as np
2
2
  import xarray as xr
3
- from ocf_datapipes.utils import Location
3
+ from ocf_data_sampler.select.location import Location
4
4
  import pytest
5
5
 
6
6
  from ocf_data_sampler.select.select_spatial_slice import (
@@ -3,7 +3,9 @@ import tempfile
3
3
 
4
4
  from ocf_data_sampler.torch_datasets.pvnet_uk_regional import PVNetUKRegionalDataset
5
5
  from ocf_data_sampler.config import load_yaml_configuration, save_yaml_configuration
6
- from ocf_datapipes.batch import BatchKey, NWPBatchKey
6
+ from ocf_data_sampler.numpy_batch.nwp import NWPBatchKey
7
+ from ocf_data_sampler.numpy_batch.gsp import GSPBatchKey
8
+ from ocf_data_sampler.numpy_batch.satellite import SatelliteBatchKey
7
9
 
8
10
 
9
11
  @pytest.fixture()
@@ -36,24 +38,24 @@ def test_pvnet(pvnet_config_filename):
36
38
  assert isinstance(sample, dict)
37
39
 
38
40
  for key in [
39
- BatchKey.nwp, BatchKey.satellite_actual, BatchKey.gsp,
40
- BatchKey.gsp_solar_azimuth, BatchKey.gsp_solar_elevation,
41
+ NWPBatchKey.nwp, SatelliteBatchKey.satellite_actual, GSPBatchKey.gsp,
42
+ GSPBatchKey.gsp_solar_azimuth, GSPBatchKey.gsp_solar_elevation,
41
43
  ]:
42
44
  assert key in sample
43
45
 
44
46
  for nwp_source in ["ukv"]:
45
- assert nwp_source in sample[BatchKey.nwp]
47
+ assert nwp_source in sample[NWPBatchKey.nwp]
46
48
 
47
49
  # check the shape of the data is correct
48
50
  # 30 minutes of 5 minute data (inclusive), one channel, 2x2 pixels
49
- assert sample[BatchKey.satellite_actual].shape == (7, 1, 2, 2)
51
+ assert sample[SatelliteBatchKey.satellite_actual].shape == (7, 1, 2, 2)
50
52
  # 3 hours of 60 minute data (inclusive), one channel, 2x2 pixels
51
- assert sample[BatchKey.nwp]["ukv"][NWPBatchKey.nwp].shape == (4, 1, 2, 2)
53
+ assert sample[NWPBatchKey.nwp]["ukv"][NWPBatchKey.nwp].shape == (4, 1, 2, 2)
52
54
  # 3 hours of 30 minute data (inclusive)
53
- assert sample[BatchKey.gsp].shape == (7,)
55
+ assert sample[GSPBatchKey.gsp].shape == (7,)
54
56
  # Solar angles have same shape as GSP data
55
- assert sample[BatchKey.gsp_solar_azimuth].shape == (7,)
56
- assert sample[BatchKey.gsp_solar_elevation].shape == (7,)
57
+ assert sample[GSPBatchKey.gsp_solar_azimuth].shape == (7,)
58
+ assert sample[GSPBatchKey.gsp_solar_elevation].shape == (7,)
57
59
 
58
60
  def test_pvnet_no_gsp(pvnet_config_filename):
59
61
 
@@ -1,20 +0,0 @@
1
- """Convert GSP to Numpy Batch"""
2
-
3
- import xarray as xr
4
- from ocf_datapipes.batch import BatchKey, NumpyBatch
5
-
6
-
7
- def convert_gsp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> NumpyBatch:
8
- """Convert from Xarray to NumpyBatch"""
9
-
10
- example: NumpyBatch = {
11
- BatchKey.gsp: da.values,
12
- BatchKey.gsp_nominal_capacity_mwp: da.isel(time_utc=0)["nominal_capacity_mwp"].values,
13
- BatchKey.gsp_effective_capacity_mwp: da.isel(time_utc=0)["effective_capacity_mwp"].values,
14
- BatchKey.gsp_time_utc: da["time_utc"].values.astype(float),
15
- }
16
-
17
- if t0_idx is not None:
18
- example[BatchKey.gsp_t0_idx] = t0_idx
19
-
20
- return example
@@ -1,23 +0,0 @@
1
- """Convert Satellite to NumpyBatch"""
2
- import xarray as xr
3
-
4
- from ocf_datapipes.batch import BatchKey, NumpyBatch
5
-
6
-
7
- def convert_satellite_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> NumpyBatch:
8
- """Convert from Xarray to NumpyBatch"""
9
- example: NumpyBatch = {
10
- BatchKey.satellite_actual: da.values,
11
- BatchKey.satellite_time_utc: da.time_utc.values.astype(float),
12
- }
13
-
14
- for batch_key, dataset_key in (
15
- (BatchKey.satellite_x_geostationary, "x_geostationary"),
16
- (BatchKey.satellite_y_geostationary, "y_geostationary"),
17
- ):
18
- example[batch_key] = da[dataset_key].values
19
-
20
- if t0_idx is not None:
21
- example[BatchKey.satellite_t0_idx] = t0_idx
22
-
23
- return example