ocf-data-sampler 0.0.19__tar.gz → 0.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (68) hide show
  1. ocf_data_sampler-0.0.22/PKG-INFO +88 -0
  2. ocf_data_sampler-0.0.22/README.md +33 -0
  3. ocf_data_sampler-0.0.22/ocf_data_sampler/config/__init__.py +5 -0
  4. ocf_data_sampler-0.0.22/ocf_data_sampler/config/load.py +33 -0
  5. ocf_data_sampler-0.0.22/ocf_data_sampler/config/model.py +249 -0
  6. ocf_data_sampler-0.0.22/ocf_data_sampler/config/save.py +36 -0
  7. ocf_data_sampler-0.0.22/ocf_data_sampler/constants.py +135 -0
  8. ocf_data_sampler-0.0.22/ocf_data_sampler/numpy_batch/gsp.py +33 -0
  9. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/numpy_batch/nwp.py +13 -3
  10. ocf_data_sampler-0.0.22/ocf_data_sampler/numpy_batch/satellite.py +30 -0
  11. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/numpy_batch/sun_position.py +5 -6
  12. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/dropout.py +2 -2
  13. ocf_data_sampler-0.0.22/ocf_data_sampler/select/geospatial.py +118 -0
  14. ocf_data_sampler-0.0.22/ocf_data_sampler/select/location.py +62 -0
  15. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/select_spatial_slice.py +5 -14
  16. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +16 -20
  17. ocf_data_sampler-0.0.22/ocf_data_sampler.egg-info/PKG-INFO +88 -0
  18. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler.egg-info/SOURCES.txt +24 -3
  19. ocf_data_sampler-0.0.22/ocf_data_sampler.egg-info/requires.txt +17 -0
  20. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler.egg-info/top_level.txt +1 -0
  21. ocf_data_sampler-0.0.22/pyproject.toml +65 -0
  22. ocf_data_sampler-0.0.22/tests/config/test_config.py +152 -0
  23. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/tests/conftest.py +6 -1
  24. ocf_data_sampler-0.0.22/tests/load/test_load_gsp.py +15 -0
  25. ocf_data_sampler-0.0.22/tests/load/test_load_nwp.py +21 -0
  26. ocf_data_sampler-0.0.22/tests/load/test_load_satellite.py +17 -0
  27. ocf_data_sampler-0.0.22/tests/numpy_batch/test_gsp.py +22 -0
  28. ocf_data_sampler-0.0.22/tests/numpy_batch/test_nwp.py +54 -0
  29. ocf_data_sampler-0.0.22/tests/numpy_batch/test_satellite.py +42 -0
  30. ocf_data_sampler-0.0.22/tests/numpy_batch/test_sun_position.py +81 -0
  31. ocf_data_sampler-0.0.22/tests/select/test_dropout.py +75 -0
  32. ocf_data_sampler-0.0.22/tests/select/test_fill_time_periods.py +28 -0
  33. ocf_data_sampler-0.0.22/tests/select/test_find_contiguous_time_periods.py +202 -0
  34. ocf_data_sampler-0.0.22/tests/select/test_location.py +67 -0
  35. ocf_data_sampler-0.0.22/tests/select/test_select_spatial_slice.py +154 -0
  36. ocf_data_sampler-0.0.22/tests/select/test_select_time_slice.py +284 -0
  37. ocf_data_sampler-0.0.22/tests/torch_datasets/test_pvnet_uk_regional.py +74 -0
  38. ocf_data_sampler-0.0.19/PKG-INFO +0 -22
  39. ocf_data_sampler-0.0.19/README.md +0 -4
  40. ocf_data_sampler-0.0.19/ocf_data_sampler/numpy_batch/gsp.py +0 -20
  41. ocf_data_sampler-0.0.19/ocf_data_sampler/numpy_batch/satellite.py +0 -23
  42. ocf_data_sampler-0.0.19/ocf_data_sampler.egg-info/PKG-INFO +0 -22
  43. ocf_data_sampler-0.0.19/ocf_data_sampler.egg-info/requires.txt +0 -8
  44. ocf_data_sampler-0.0.19/requirements.txt +0 -8
  45. ocf_data_sampler-0.0.19/setup.py +0 -24
  46. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/LICENSE +0 -0
  47. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/MANIFEST.in +0 -0
  48. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/__init__.py +0 -0
  49. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  50. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/__init__.py +0 -0
  51. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/gsp.py +0 -0
  52. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  53. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  54. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  55. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  56. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  57. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  58. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/satellite.py +0 -0
  59. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/load/utils.py +0 -0
  60. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/numpy_batch/__init__.py +0 -0
  61. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/__init__.py +0 -0
  62. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  63. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  64. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/select/select_time_slice.py +0 -0
  65. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler/torch_datasets/__init__.py +0 -0
  66. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  67. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/setup.cfg +0 -0
  68. {ocf_data_sampler-0.0.19 → ocf_data_sampler-0.0.22}/tests/__init__.py +0 -0
@@ -0,0 +1,88 @@
1
+ Metadata-Version: 2.1
2
+ Name: ocf_data_sampler
3
+ Version: 0.0.22
4
+ Summary: Sample from weather data for renewable energy prediction
5
+ Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
+ Author-email: info@openclimatefix.org
7
+ Maintainer: Open Climate Fix Ltd
8
+ License: MIT License
9
+
10
+ Copyright (c) 2023 Open Climate Fix
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+
30
+ Project-URL: homepage, https://github.com/openclimatefix
31
+ Project-URL: repository, https://github.com/openclimatefix/ocf-data-sampler
32
+ Keywords: weather data,renewable energy prediction,sample weather data
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Programming Language :: Python :: 3.8
35
+ Classifier: Operating System :: POSIX :: Linux
36
+ Requires-Python: >=3.8
37
+ Description-Content-Type: text/markdown
38
+ License-File: LICENSE
39
+ Requires-Dist: torch
40
+ Requires-Dist: numpy
41
+ Requires-Dist: pandas
42
+ Requires-Dist: xarray
43
+ Requires-Dist: zarr
44
+ Requires-Dist: dask
45
+ Requires-Dist: ocf_blosc2
46
+ Requires-Dist: pvlib
47
+ Requires-Dist: pydantic
48
+ Requires-Dist: pyproj
49
+ Requires-Dist: pathy
50
+ Requires-Dist: pyaml_env
51
+ Requires-Dist: pyresample
52
+ Provides-Extra: docs
53
+ Requires-Dist: mkdocs>=1.2; extra == "docs"
54
+ Requires-Dist: mkdocs-material>=8.0; extra == "docs"
55
+
56
+ # OCF Data Sampler
57
+ <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
58
+ [![All Contributors](https://img.shields.io/badge/all_contributors-5-orange.svg?style=flat-square)](#contributors-)
59
+ <!-- ALL-CONTRIBUTORS-BADGE:END -->
60
+ [![ease of contribution: easy](https://img.shields.io/badge/ease%20of%20contribution:%20easy-32bd50)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)
61
+
62
+ A repo for sampling from weather data for renewable energy prediction
63
+
64
+ ## Contributors ✨
65
+
66
+ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
67
+
68
+ <!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
69
+ <!-- prettier-ignore-start -->
70
+ <!-- markdownlint-disable -->
71
+ <table>
72
+ <tbody>
73
+ <tr>
74
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/dfulu"><img src="https://avatars.githubusercontent.com/u/41546094?v=4?s=100" width="100px;" alt="James Fulton"/><br /><sub><b>James Fulton</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=dfulu" title="Code">💻</a></td>
75
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/AUdaltsova"><img src="https://avatars.githubusercontent.com/u/43303448?v=4?s=100" width="100px;" alt="Alexandra Udaltsova"/><br /><sub><b>Alexandra Udaltsova</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=AUdaltsova" title="Code">💻</a></td>
76
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/Sukh-P"><img src="https://avatars.githubusercontent.com/u/42407101?v=4?s=100" width="100px;" alt="Sukhil Patel"/><br /><sub><b>Sukhil Patel</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=Sukh-P" title="Code">💻</a></td>
77
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/peterdudfield"><img src="https://avatars.githubusercontent.com/u/34686298?v=4?s=100" width="100px;" alt="Peter Dudfield"/><br /><sub><b>Peter Dudfield</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=peterdudfield" title="Code">💻</a></td>
78
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/VikramsDataScience"><img src="https://avatars.githubusercontent.com/u/45002417?v=4?s=100" width="100px;" alt="Vikram Pande"/><br /><sub><b>Vikram Pande</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=VikramsDataScience" title="Code">💻</a></td>
79
+ </tr>
80
+ </tbody>
81
+ </table>
82
+
83
+ <!-- markdownlint-restore -->
84
+ <!-- prettier-ignore-end -->
85
+
86
+ <!-- ALL-CONTRIBUTORS-LIST:END -->
87
+
88
+ This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
@@ -0,0 +1,33 @@
1
+ # OCF Data Sampler
2
+ <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
3
+ [![All Contributors](https://img.shields.io/badge/all_contributors-5-orange.svg?style=flat-square)](#contributors-)
4
+ <!-- ALL-CONTRIBUTORS-BADGE:END -->
5
+ [![ease of contribution: easy](https://img.shields.io/badge/ease%20of%20contribution:%20easy-32bd50)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)
6
+
7
+ A repo for sampling from weather data for renewable energy prediction
8
+
9
+ ## Contributors ✨
10
+
11
+ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
12
+
13
+ <!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
14
+ <!-- prettier-ignore-start -->
15
+ <!-- markdownlint-disable -->
16
+ <table>
17
+ <tbody>
18
+ <tr>
19
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/dfulu"><img src="https://avatars.githubusercontent.com/u/41546094?v=4?s=100" width="100px;" alt="James Fulton"/><br /><sub><b>James Fulton</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=dfulu" title="Code">💻</a></td>
20
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/AUdaltsova"><img src="https://avatars.githubusercontent.com/u/43303448?v=4?s=100" width="100px;" alt="Alexandra Udaltsova"/><br /><sub><b>Alexandra Udaltsova</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=AUdaltsova" title="Code">💻</a></td>
21
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/Sukh-P"><img src="https://avatars.githubusercontent.com/u/42407101?v=4?s=100" width="100px;" alt="Sukhil Patel"/><br /><sub><b>Sukhil Patel</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=Sukh-P" title="Code">💻</a></td>
22
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/peterdudfield"><img src="https://avatars.githubusercontent.com/u/34686298?v=4?s=100" width="100px;" alt="Peter Dudfield"/><br /><sub><b>Peter Dudfield</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=peterdudfield" title="Code">💻</a></td>
23
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/VikramsDataScience"><img src="https://avatars.githubusercontent.com/u/45002417?v=4?s=100" width="100px;" alt="Vikram Pande"/><br /><sub><b>Vikram Pande</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=VikramsDataScience" title="Code">💻</a></td>
24
+ </tr>
25
+ </tbody>
26
+ </table>
27
+
28
+ <!-- markdownlint-restore -->
29
+ <!-- prettier-ignore-end -->
30
+
31
+ <!-- ALL-CONTRIBUTORS-LIST:END -->
32
+
33
+ This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
@@ -0,0 +1,5 @@
1
+ """Configuration model"""
2
+
3
+ from ocf_data_sampler.config.model import Configuration
4
+ from ocf_data_sampler.config.save import save_yaml_configuration
5
+ from ocf_data_sampler.config.load import load_yaml_configuration
@@ -0,0 +1,33 @@
1
+ """Loading configuration functions.
2
+
3
+ Example:
4
+
5
+ from ocf_data_sampler.config import load_yaml_configuration
6
+ configuration = load_yaml_configuration(filename)
7
+ """
8
+
9
+ import fsspec
10
+ from pathy import Pathy
11
+ from pyaml_env import parse_config
12
+
13
+ from ocf_data_sampler.config import Configuration
14
+
15
+
16
+ def load_yaml_configuration(filename: str | Pathy) -> Configuration:
17
+ """
18
+ Load a yaml file which has a configuration in it
19
+
20
+ Args:
21
+ filename: the file name that you want to load. Will load from local, AWS, or GCP
22
+ depending on the protocol suffix (e.g. 's3://bucket/config.yaml').
23
+
24
+ Returns:pydantic class
25
+
26
+ """
27
+ # load the file to a dictionary
28
+ with fsspec.open(filename, mode="r") as stream:
29
+ configuration = parse_config(data=stream)
30
+ # this means we can load ENVs in the yaml file
31
+ # turn into pydantic class
32
+ configuration = Configuration(**configuration)
33
+ return configuration
@@ -0,0 +1,249 @@
1
+ """Configuration model for the dataset.
2
+
3
+ All paths must include the protocol prefix. For local files,
4
+ it's sufficient to just start with a '/'. For aws, start with 's3://',
5
+ for gcp start with 'gs://'.
6
+
7
+ Example:
8
+
9
+ from ocf_data_sampler.config import Configuration
10
+ config = Configuration(**config_dict)
11
+ """
12
+
13
+ import logging
14
+ from typing import Dict, List, Optional
15
+ from typing_extensions import Self
16
+
17
+ from pydantic import BaseModel, Field, RootModel, field_validator, ValidationInfo, model_validator
18
+ from ocf_data_sampler.constants import NWP_PROVIDERS
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ providers = ["pvoutput.org", "solar_sheffield_passiv"]
23
+
24
+
25
+ class Base(BaseModel):
26
+ """Pydantic Base model where no extras can be added"""
27
+
28
+ class Config:
29
+ """config class"""
30
+
31
+ extra = "forbid" # forbid use of extra kwargs
32
+
33
+
34
+ class General(Base):
35
+ """General pydantic model"""
36
+
37
+ name: str = Field("example", description="The name of this configuration file.")
38
+ description: str = Field(
39
+ "example configuration", description="Description of this configuration file"
40
+ )
41
+
42
+
43
+ class DataSourceMixin(Base):
44
+ """Mixin class, to add forecast and history minutes"""
45
+
46
+ forecast_minutes: int = Field(
47
+ ...,
48
+ ge=0,
49
+ description="how many minutes to forecast in the future. ",
50
+ )
51
+ history_minutes: int = Field(
52
+ ...,
53
+ ge=0,
54
+ description="how many historic minutes to use. ",
55
+ )
56
+
57
+
58
+ # noinspection PyMethodParameters
59
+ class DropoutMixin(Base):
60
+ """Mixin class, to add dropout minutes"""
61
+
62
+ dropout_timedeltas_minutes: Optional[List[int]] = Field(
63
+ default=None,
64
+ description="List of possible minutes before t0 where data availability may start. Must be "
65
+ "negative or zero.",
66
+ )
67
+
68
+ dropout_fraction: float = Field(0, description="Chance of dropout being applied to each sample")
69
+
70
+ @field_validator("dropout_timedeltas_minutes")
71
+ def dropout_timedeltas_minutes_negative(cls, v: List[int]) -> List[int]:
72
+ """Validate 'dropout_timedeltas_minutes'"""
73
+ if v is not None:
74
+ for m in v:
75
+ assert m <= 0, "Dropout timedeltas must be negative"
76
+ return v
77
+
78
+ @field_validator("dropout_fraction")
79
+ def dropout_fraction_valid(cls, v: float) -> float:
80
+ """Validate 'dropout_fraction'"""
81
+ assert 0 <= v <= 1, "Dropout fraction must be between 0 and 1"
82
+ return v
83
+
84
+ @model_validator(mode="after")
85
+ def dropout_instructions_consistent(self) -> Self:
86
+ if self.dropout_fraction == 0:
87
+ if self.dropout_timedeltas_minutes is not None:
88
+ raise ValueError("To use dropout timedeltas dropout fraction should be > 0")
89
+ else:
90
+ if self.dropout_timedeltas_minutes is None:
91
+ raise ValueError("To dropout fraction > 0 requires a list of dropout timedeltas")
92
+ return self
93
+
94
+
95
+ # noinspection PyMethodParameters
96
+ class TimeResolutionMixin(Base):
97
+ """Time resolution mix in"""
98
+
99
+ time_resolution_minutes: int = Field(
100
+ ...,
101
+ description="The temporal resolution of the data in minutes",
102
+ )
103
+
104
+
105
+ class Satellite(DataSourceMixin, TimeResolutionMixin, DropoutMixin):
106
+ """Satellite configuration model"""
107
+
108
+ # Todo: remove 'satellite' from names
109
+ satellite_zarr_path: str | tuple[str] | list[str] = Field(
110
+ ...,
111
+ description="The path or list of paths which hold the satellite zarr",
112
+ )
113
+ satellite_channels: list[str] = Field(
114
+ ..., description="the satellite channels that are used"
115
+ )
116
+ satellite_image_size_pixels_height: int = Field(
117
+ ...,
118
+ description="The number of pixels of the height of the region of interest"
119
+ " for non-HRV satellite channels.",
120
+ )
121
+
122
+ satellite_image_size_pixels_width: int = Field(
123
+ ...,
124
+ description="The number of pixels of the width of the region "
125
+ "of interest for non-HRV satellite channels.",
126
+ )
127
+
128
+ live_delay_minutes: int = Field(
129
+ ..., description="The expected delay in minutes of the satellite data"
130
+ )
131
+
132
+
133
+ # noinspection PyMethodParameters
134
+ class NWP(DataSourceMixin, TimeResolutionMixin, DropoutMixin):
135
+ """NWP configuration model"""
136
+
137
+ nwp_zarr_path: str | tuple[str] | list[str] = Field(
138
+ ...,
139
+ description="The path which holds the NWP zarr",
140
+ )
141
+ nwp_channels: list[str] = Field(
142
+ ..., description="the channels used in the nwp data"
143
+ )
144
+ nwp_accum_channels: list[str] = Field([], description="the nwp channels which need to be diffed")
145
+ nwp_image_size_pixels_height: int = Field(..., description="The size of NWP spacial crop in pixels")
146
+ nwp_image_size_pixels_width: int = Field(..., description="The size of NWP spacial crop in pixels")
147
+
148
+ nwp_provider: str = Field(..., description="The provider of the NWP data")
149
+
150
+ max_staleness_minutes: Optional[int] = Field(
151
+ None,
152
+ description="Sets a limit on how stale an NWP init time is allowed to be whilst still being"
153
+ " used to construct an example. If set to None, then the max staleness is set according to"
154
+ " the maximum forecast horizon of the NWP and the requested forecast length.",
155
+ )
156
+
157
+
158
+ @field_validator("nwp_provider")
159
+ def validate_nwp_provider(cls, v: str) -> str:
160
+ """Validate 'nwp_provider'"""
161
+ if v.lower() not in NWP_PROVIDERS:
162
+ message = f"NWP provider {v} is not in {NWP_PROVIDERS}"
163
+ logger.warning(message)
164
+ raise Exception(message)
165
+ return v
166
+
167
+ # Todo: put into time mixin when moving intervals there
168
+ @field_validator("forecast_minutes")
169
+ def forecast_minutes_divide_by_time_resolution(cls, v: int, info: ValidationInfo) -> int:
170
+ if v % info.data["time_resolution_minutes"] != 0:
171
+ message = "Forecast duration must be divisible by time resolution"
172
+ logger.error(message)
173
+ raise Exception(message)
174
+ return v
175
+
176
+ @field_validator("history_minutes")
177
+ def history_minutes_divide_by_time_resolution(cls, v: int, info: ValidationInfo) -> int:
178
+ if v % info.data["time_resolution_minutes"] != 0:
179
+ message = "History duration must be divisible by time resolution"
180
+ logger.error(message)
181
+ raise Exception(message)
182
+ return v
183
+
184
+
185
+ class MultiNWP(RootModel):
186
+ """Configuration for multiple NWPs"""
187
+
188
+ root: Dict[str, NWP]
189
+
190
+ def __getattr__(self, item):
191
+ return self.root[item]
192
+
193
+ def __getitem__(self, item):
194
+ return self.root[item]
195
+
196
+ def __len__(self):
197
+ return len(self.root)
198
+
199
+ def __iter__(self):
200
+ return iter(self.root)
201
+
202
+ def keys(self):
203
+ """Returns dictionary-like keys"""
204
+ return self.root.keys()
205
+
206
+ def items(self):
207
+ """Returns dictionary-like items"""
208
+ return self.root.items()
209
+
210
+
211
+ # noinspection PyMethodParameters
212
+ class GSP(DataSourceMixin, TimeResolutionMixin, DropoutMixin):
213
+ """GSP configuration model"""
214
+
215
+ gsp_zarr_path: str = Field(..., description="The path which holds the GSP zarr")
216
+
217
+ @field_validator("forecast_minutes")
218
+ def forecast_minutes_divide_by_time_resolution(cls, v: int, info: ValidationInfo) -> int:
219
+ if v % info.data["time_resolution_minutes"] != 0:
220
+ message = "Forecast duration must be divisible by time resolution"
221
+ logger.error(message)
222
+ raise Exception(message)
223
+ return v
224
+
225
+ @field_validator("history_minutes")
226
+ def history_minutes_divide_by_time_resolution(cls, v: int, info: ValidationInfo) -> int:
227
+ if v % info.data["time_resolution_minutes"] != 0:
228
+ message = "History duration must be divisible by time resolution"
229
+ logger.error(message)
230
+ raise Exception(message)
231
+ return v
232
+
233
+
234
+ # noinspection PyPep8Naming
235
+ class InputData(Base):
236
+ """
237
+ Input data model.
238
+ """
239
+
240
+ satellite: Optional[Satellite] = None
241
+ nwp: Optional[MultiNWP] = None
242
+ gsp: Optional[GSP] = None
243
+
244
+
245
+ class Configuration(Base):
246
+ """Configuration model for the dataset"""
247
+
248
+ general: General = General()
249
+ input_data: InputData = InputData()
@@ -0,0 +1,36 @@
1
+ """Save functions for the configuration model.
2
+
3
+ Example:
4
+
5
+ from ocf_data_sampler.config import save_yaml_configuration
6
+ configuration = save_yaml_configuration(config, filename)
7
+ """
8
+
9
+ import json
10
+
11
+ import fsspec
12
+ import yaml
13
+ from pathy import Pathy
14
+
15
+ from ocf_data_sampler.config import Configuration
16
+
17
+
18
+ def save_yaml_configuration(
19
+ configuration: Configuration, filename: str | Pathy
20
+ ):
21
+ """
22
+ Save a local yaml file which has the configuration in it.
23
+
24
+ If `filename` is None then saves to configuration.output_data.filepath / configuration.yaml.
25
+
26
+ Will save to GCP, AWS, or local, depending on the protocol suffix of filepath.
27
+ """
28
+ # make a dictionary from the configuration,
29
+ # Note that we make the object json'able first, so that it can be saved to a yaml file
30
+ d = json.loads(configuration.model_dump_json())
31
+ if filename is None:
32
+ filename = Pathy(configuration.output_data.filepath) / "configuration.yaml"
33
+
34
+ # save to a yaml file
35
+ with fsspec.open(filename, "w") as yaml_file:
36
+ yaml.safe_dump(d, yaml_file, default_flow_style=False)
@@ -0,0 +1,135 @@
1
+ import xarray as xr
2
+ import numpy as np
3
+
4
+
5
+ NWP_PROVIDERS = [
6
+ "ukv",
7
+ "ecmwf",
8
+ ]
9
+
10
+
11
+ def _to_data_array(d):
12
+ return xr.DataArray(
13
+ [d[k] for k in d.keys()],
14
+ coords={"channel": [k for k in d.keys()]},
15
+ ).astype(np.float32)
16
+
17
+
18
+ class NWPStatDict(dict):
19
+ """Custom dictionary class to hold NWP normalization stats"""
20
+
21
+ def __getitem__(self, key):
22
+ if key not in NWP_PROVIDERS:
23
+ raise KeyError(f"{key} is not a supported NWP provider - {NWP_PROVIDERS}")
24
+ elif key in self.keys():
25
+ return super().__getitem__(key)
26
+ else:
27
+ raise KeyError(
28
+ f"Values for {key} not yet available in ocf-data-sampler {list(self.keys())}"
29
+ )
30
+
31
+ # ------ UKV
32
+ # Means and std computed WITH version_7 and higher, MetOffice values
33
+ UKV_STD = {
34
+ "cdcb": 2126.99350113,
35
+ "lcc": 39.33210726,
36
+ "mcc": 41.91144559,
37
+ "hcc": 38.07184418,
38
+ "sde": 0.1029753,
39
+ "hcct": 18382.63958991,
40
+ "dswrf": 190.47216887,
41
+ "dlwrf": 39.45988077,
42
+ "h": 1075.77812282,
43
+ "t": 4.38818501,
44
+ "r": 11.45012499,
45
+ "dpt": 4.57250482,
46
+ "vis": 21578.97975625,
47
+ "si10": 3.94718813,
48
+ "wdir10": 94.08407495,
49
+ "prmsl": 1252.71790539,
50
+ "prate": 0.00021497,
51
+ }
52
+ UKV_MEAN = {
53
+ "cdcb": 1412.26599062,
54
+ "lcc": 50.08362643,
55
+ "mcc": 40.88984494,
56
+ "hcc": 29.11949682,
57
+ "sde": 0.00289545,
58
+ "hcct": -18345.97478167,
59
+ "dswrf": 111.28265039,
60
+ "dlwrf": 325.03130139,
61
+ "h": 2096.51991356,
62
+ "t": 283.64913206,
63
+ "r": 81.79229501,
64
+ "dpt": 280.54379901,
65
+ "vis": 32262.03285118,
66
+ "si10": 6.88348448,
67
+ "wdir10": 199.41891636,
68
+ "prmsl": 101321.61574029,
69
+ "prate": 3.45793433e-05,
70
+ }
71
+
72
+ UKV_STD = _to_data_array(UKV_STD)
73
+ UKV_MEAN = _to_data_array(UKV_MEAN)
74
+
75
+ # ------ ECMWF
76
+ # These were calculated from 100 random init times of UK data from 2020-2023
77
+ ECMWF_STD = {
78
+ "dlwrf": 15855867.0,
79
+ "dswrf": 13025427.0,
80
+ "duvrs": 1445635.25,
81
+ "hcc": 0.42244860529899597,
82
+ "lcc": 0.3791404366493225,
83
+ "mcc": 0.38039860129356384,
84
+ "prate": 9.81039775069803e-05,
85
+ "sde": 0.000913831521756947,
86
+ "sr": 16294988.0,
87
+ "t2m": 3.692270040512085,
88
+ "tcc": 0.37487083673477173,
89
+ "u10": 5.531515598297119,
90
+ "u100": 7.2320556640625,
91
+ "u200": 8.049470901489258,
92
+ "v10": 5.411230564117432,
93
+ "v100": 6.944501876831055,
94
+ "v200": 7.561611652374268,
95
+ "diff_dlwrf": 131942.03125,
96
+ "diff_dswrf": 715366.3125,
97
+ "diff_duvrs": 81605.25,
98
+ "diff_sr": 818950.6875,
99
+ }
100
+ ECMWF_MEAN = {
101
+ "dlwrf": 27187026.0,
102
+ "dswrf": 11458988.0,
103
+ "duvrs": 1305651.25,
104
+ "hcc": 0.3961029052734375,
105
+ "lcc": 0.44901806116104126,
106
+ "mcc": 0.3288780450820923,
107
+ "prate": 3.108070450252853e-05,
108
+ "sde": 8.107526082312688e-05,
109
+ "sr": 12905302.0,
110
+ "t2m": 283.48333740234375,
111
+ "tcc": 0.7049227356910706,
112
+ "u10": 1.7677178382873535,
113
+ "u100": 2.393547296524048,
114
+ "u200": 2.7963004112243652,
115
+ "v10": 0.985887885093689,
116
+ "v100": 1.4244288206100464,
117
+ "v200": 1.6010299921035767,
118
+ "diff_dlwrf": 1136464.0,
119
+ "diff_dswrf": 420584.6875,
120
+ "diff_duvrs": 48265.4765625,
121
+ "diff_sr": 469169.5,
122
+ }
123
+
124
+ ECMWF_STD = _to_data_array(ECMWF_STD)
125
+ ECMWF_MEAN = _to_data_array(ECMWF_MEAN)
126
+
127
+ NWP_STDS = NWPStatDict(
128
+ ukv=UKV_STD,
129
+ ecmwf=ECMWF_STD,
130
+ )
131
+ NWP_MEANS = NWPStatDict(
132
+ ukv=UKV_MEAN,
133
+ ecmwf=ECMWF_MEAN,
134
+ )
135
+
@@ -0,0 +1,33 @@
1
+ """Convert GSP to Numpy Batch"""
2
+
3
+ import xarray as xr
4
+
5
+
6
+ class GSPBatchKey:
7
+
8
+ gsp = 'gsp'
9
+ gsp_nominal_capacity_mwp = 'gsp_nominal_capacity_mwp'
10
+ gsp_effective_capacity_mwp = 'gsp_effective_capacity_mwp'
11
+ gsp_time_utc = 'gsp_time_utc'
12
+ gsp_t0_idx = 'gsp_t0_idx'
13
+ gsp_solar_azimuth = 'gsp_solar_azimuth'
14
+ gsp_solar_elevation = 'gsp_solar_elevation'
15
+ gsp_id = 'gsp_id'
16
+ gsp_x_osgb = 'gsp_x_osgb'
17
+ gsp_y_osgb = 'gsp_y_osgb'
18
+
19
+
20
+ def convert_gsp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> dict:
21
+ """Convert from Xarray to NumpyBatch"""
22
+
23
+ example = {
24
+ GSPBatchKey.gsp: da.values,
25
+ GSPBatchKey.gsp_nominal_capacity_mwp: da.isel(time_utc=0)["nominal_capacity_mwp"].values,
26
+ GSPBatchKey.gsp_effective_capacity_mwp: da.isel(time_utc=0)["effective_capacity_mwp"].values,
27
+ GSPBatchKey.gsp_time_utc: da["time_utc"].values.astype(float),
28
+ }
29
+
30
+ if t0_idx is not None:
31
+ example[GSPBatchKey.gsp_t0_idx] = t0_idx
32
+
33
+ return example
@@ -3,13 +3,23 @@
3
3
  import pandas as pd
4
4
  import xarray as xr
5
5
 
6
- from ocf_datapipes.batch import NWPBatchKey, NWPNumpyBatch
7
6
 
7
+ class NWPBatchKey:
8
8
 
9
- def convert_nwp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> NWPNumpyBatch:
9
+ nwp = 'nwp'
10
+ nwp_channel_names = 'nwp_channel_names'
11
+ nwp_init_time_utc = 'nwp_init_time_utc'
12
+ nwp_step = 'nwp_step'
13
+ nwp_target_time_utc = 'nwp_target_time_utc'
14
+ nwp_t0_idx = 'nwp_t0_idx'
15
+ nwp_y_osgb = 'nwp_y_osgb'
16
+ nwp_x_osgb = 'nwp_x_osgb'
17
+
18
+
19
+ def convert_nwp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> dict:
10
20
  """Convert from Xarray to NWP NumpyBatch"""
11
21
 
12
- example: NWPNumpyBatch = {
22
+ example = {
13
23
  NWPBatchKey.nwp: da.values,
14
24
  NWPBatchKey.nwp_channel_names: da.channel.values,
15
25
  NWPBatchKey.nwp_init_time_utc: da.init_time_utc.values.astype(float),
@@ -0,0 +1,30 @@
1
+ """Convert Satellite to NumpyBatch"""
2
+ import xarray as xr
3
+
4
+
5
+ class SatelliteBatchKey:
6
+
7
+ satellite_actual = 'satellite_actual'
8
+ satellite_time_utc = 'satellite_time_utc'
9
+ satellite_x_geostationary = 'satellite_x_geostationary'
10
+ satellite_y_geostationary = 'satellite_y_geostationary'
11
+ satellite_t0_idx = 'satellite_t0_idx'
12
+
13
+
14
+ def convert_satellite_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> dict:
15
+ """Convert from Xarray to NumpyBatch"""
16
+ example = {
17
+ SatelliteBatchKey.satellite_actual: da.values,
18
+ SatelliteBatchKey.satellite_time_utc: da.time_utc.values.astype(float),
19
+ }
20
+
21
+ for batch_key, dataset_key in (
22
+ (SatelliteBatchKey.satellite_x_geostationary, "x_geostationary"),
23
+ (SatelliteBatchKey.satellite_y_geostationary, "y_geostationary"),
24
+ ):
25
+ example[batch_key] = da[dataset_key].values
26
+
27
+ if t0_idx is not None:
28
+ example[SatelliteBatchKey.satellite_t0_idx] = t0_idx
29
+
30
+ return example