ocf-data-sampler 0.0.18__tar.gz → 0.0.42__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- ocf_data_sampler-0.0.42/PKG-INFO +153 -0
- ocf_data_sampler-0.0.42/README.md +98 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/config/__init__.py +5 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/config/load.py +33 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/config/model.py +246 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/config/save.py +73 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/constants.py +173 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/load/load_dataset.py +55 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/load/nwp/providers/ecmwf.py +5 -2
- ocf_data_sampler-0.0.42/ocf_data_sampler/load/site.py +30 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/numpy_sample/__init__.py +8 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/numpy_sample/collate.py +77 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/numpy_sample/gsp.py +34 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/numpy_sample/nwp.py +42 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/numpy_sample/satellite.py +30 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/numpy_sample/site.py +30 -0
- {ocf_data_sampler-0.0.18/ocf_data_sampler/numpy_batch → ocf_data_sampler-0.0.42/ocf_data_sampler/numpy_sample}/sun_position.py +9 -10
- ocf_data_sampler-0.0.42/ocf_data_sampler/select/__init__.py +8 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/select/dropout.py +4 -3
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/select/find_contiguous_time_periods.py +40 -75
- ocf_data_sampler-0.0.42/ocf_data_sampler/select/geospatial.py +160 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/select/location.py +62 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/select/select_spatial_slice.py +13 -16
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/select/select_time_slice.py +24 -33
- ocf_data_sampler-0.0.42/ocf_data_sampler/select/spatial_slice_for_dataset.py +53 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/select/time_slice_for_dataset.py +125 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/torch_datasets/__init__.py +2 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/torch_datasets/process_and_combine.py +131 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +170 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/torch_datasets/site.py +405 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/torch_datasets/valid_time_periods.py +116 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler/utils.py +10 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler.egg-info/PKG-INFO +153 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler.egg-info/SOURCES.txt +75 -0
- ocf_data_sampler-0.0.42/ocf_data_sampler.egg-info/requires.txt +17 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler.egg-info/top_level.txt +2 -0
- ocf_data_sampler-0.0.42/pyproject.toml +65 -0
- ocf_data_sampler-0.0.42/scripts/refactor_site.py +50 -0
- ocf_data_sampler-0.0.42/tests/config/test_config.py +161 -0
- ocf_data_sampler-0.0.42/tests/config/test_save.py +37 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/tests/conftest.py +86 -1
- ocf_data_sampler-0.0.42/tests/load/test_load_gsp.py +15 -0
- ocf_data_sampler-0.0.42/tests/load/test_load_nwp.py +21 -0
- ocf_data_sampler-0.0.42/tests/load/test_load_satellite.py +17 -0
- ocf_data_sampler-0.0.42/tests/load/test_load_sites.py +14 -0
- ocf_data_sampler-0.0.42/tests/numpy_sample/test_collate.py +26 -0
- ocf_data_sampler-0.0.42/tests/numpy_sample/test_gsp.py +38 -0
- ocf_data_sampler-0.0.42/tests/numpy_sample/test_nwp.py +52 -0
- ocf_data_sampler-0.0.42/tests/numpy_sample/test_satellite.py +40 -0
- ocf_data_sampler-0.0.42/tests/numpy_sample/test_sun_position.py +81 -0
- ocf_data_sampler-0.0.42/tests/select/test_dropout.py +75 -0
- ocf_data_sampler-0.0.42/tests/select/test_fill_time_periods.py +28 -0
- ocf_data_sampler-0.0.42/tests/select/test_find_contiguous_time_periods.py +202 -0
- ocf_data_sampler-0.0.42/tests/select/test_location.py +67 -0
- ocf_data_sampler-0.0.42/tests/select/test_select_spatial_slice.py +154 -0
- ocf_data_sampler-0.0.42/tests/select/test_select_time_slice.py +272 -0
- ocf_data_sampler-0.0.42/tests/torch_datasets/conftest.py +18 -0
- ocf_data_sampler-0.0.42/tests/torch_datasets/test_process_and_combine.py +126 -0
- ocf_data_sampler-0.0.42/tests/torch_datasets/test_pvnet_uk_regional.py +59 -0
- ocf_data_sampler-0.0.42/tests/torch_datasets/test_site.py +129 -0
- ocf_data_sampler-0.0.18/PKG-INFO +0 -22
- ocf_data_sampler-0.0.18/README.md +0 -4
- ocf_data_sampler-0.0.18/ocf_data_sampler/numpy_batch/__init__.py +0 -7
- ocf_data_sampler-0.0.18/ocf_data_sampler/numpy_batch/gsp.py +0 -20
- ocf_data_sampler-0.0.18/ocf_data_sampler/numpy_batch/nwp.py +0 -33
- ocf_data_sampler-0.0.18/ocf_data_sampler/numpy_batch/satellite.py +0 -23
- ocf_data_sampler-0.0.18/ocf_data_sampler/select/__init__.py +0 -1
- ocf_data_sampler-0.0.18/ocf_data_sampler/torch_datasets/__init__.py +0 -1
- ocf_data_sampler-0.0.18/ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +0 -578
- ocf_data_sampler-0.0.18/ocf_data_sampler.egg-info/PKG-INFO +0 -22
- ocf_data_sampler-0.0.18/ocf_data_sampler.egg-info/SOURCES.txt +0 -37
- ocf_data_sampler-0.0.18/ocf_data_sampler.egg-info/requires.txt +0 -8
- ocf_data_sampler-0.0.18/requirements.txt +0 -8
- ocf_data_sampler-0.0.18/setup.py +0 -24
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/LICENSE +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/MANIFEST.in +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/__init__.py +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/load/__init__.py +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/load/gsp.py +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/load/nwp/__init__.py +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/load/nwp/nwp.py +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/load/satellite.py +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/load/utils.py +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler/select/fill_time_periods.py +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/setup.cfg +0 -0
- {ocf_data_sampler-0.0.18 → ocf_data_sampler-0.0.42}/tests/__init__.py +0 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: ocf_data_sampler
|
|
3
|
+
Version: 0.0.42
|
|
4
|
+
Summary: Sample from weather data for renewable energy prediction
|
|
5
|
+
Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
|
|
6
|
+
Author-email: info@openclimatefix.org
|
|
7
|
+
Maintainer: Open Climate Fix Ltd
|
|
8
|
+
License: MIT License
|
|
9
|
+
|
|
10
|
+
Copyright (c) 2023 Open Climate Fix
|
|
11
|
+
|
|
12
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
13
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
14
|
+
in the Software without restriction, including without limitation the rights
|
|
15
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
16
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
17
|
+
furnished to do so, subject to the following conditions:
|
|
18
|
+
|
|
19
|
+
The above copyright notice and this permission notice shall be included in all
|
|
20
|
+
copies or substantial portions of the Software.
|
|
21
|
+
|
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
25
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
26
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
27
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
28
|
+
SOFTWARE.
|
|
29
|
+
|
|
30
|
+
Project-URL: homepage, https://github.com/openclimatefix
|
|
31
|
+
Project-URL: repository, https://github.com/openclimatefix/ocf-data-sampler
|
|
32
|
+
Keywords: weather data,renewable energy prediction,sample weather data
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
35
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
36
|
+
Requires-Python: >=3.8
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
License-File: LICENSE
|
|
39
|
+
Requires-Dist: torch
|
|
40
|
+
Requires-Dist: numpy
|
|
41
|
+
Requires-Dist: pandas
|
|
42
|
+
Requires-Dist: xarray
|
|
43
|
+
Requires-Dist: zarr==2.18.3
|
|
44
|
+
Requires-Dist: dask
|
|
45
|
+
Requires-Dist: ocf_blosc2
|
|
46
|
+
Requires-Dist: pvlib
|
|
47
|
+
Requires-Dist: pydantic
|
|
48
|
+
Requires-Dist: pyproj
|
|
49
|
+
Requires-Dist: pathy
|
|
50
|
+
Requires-Dist: pyaml_env
|
|
51
|
+
Requires-Dist: pyresample
|
|
52
|
+
Provides-Extra: docs
|
|
53
|
+
Requires-Dist: mkdocs>=1.2; extra == "docs"
|
|
54
|
+
Requires-Dist: mkdocs-material>=8.0; extra == "docs"
|
|
55
|
+
|
|
56
|
+
# ocf-data-sampler
|
|
57
|
+
|
|
58
|
+
<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
|
|
59
|
+
[](#contributors-)
|
|
60
|
+
<!-- ALL-CONTRIBUTORS-BADGE:END -->
|
|
61
|
+
|
|
62
|
+
[](https://github.com/openclimatefix/ocf-data-sampler/tags)
|
|
63
|
+
[](https://github.com/openclimatefix#how-easy-is-it-to-get-involved)
|
|
64
|
+
|
|
65
|
+
**ocf-data-sampler** contains all the tools needed to create samples and feed them to our models, such as [PVNet](https://github.com/openclimatefix/PVNet/). The data we work with—typically energy data, satellite imagery, and numerical weather predictions (NWPs)—is usually too heavy to do this on the fly, so that's where this repo comes in: handling steps like opening the data, selecting the right samples, normalising and reshaping, and saving to and reading from disk.
|
|
66
|
+
|
|
67
|
+
We are currently migrating to this repo from [ocf_datapipes](https://github.com/openclimatefix/ocf_datapipes/), which performs the same functions but is built around `PyTorch DataPipes`, which are quite cumbersome to work with and are no longer maintained by PyTorch. **ocf-data-sampler** uses `PyTorch Datasets`, and we've taken the opportunity to make the code much cleaner and more manageable.
|
|
68
|
+
|
|
69
|
+
> [!Note]
|
|
70
|
+
> This repository is still in development and does not yet have the full
|
|
71
|
+
> functionality of its predecessor, [ocf_datapipes](https://github.com/openclimatefix/ocf_datapipes/).
|
|
72
|
+
> It might not be ready for use out of the box! We would really appreciate any help to let us make the transition faster.
|
|
73
|
+
|
|
74
|
+
## Documentation
|
|
75
|
+
|
|
76
|
+
**ocf-data-sampler** doesn't have external documentation _yet_; you can read a bit about how our torch datasets work in the README [here](ocf_data_sampler/torch_datasets/README.md).
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
## FAQ
|
|
80
|
+
|
|
81
|
+
If you have any questions about this or any other of our repos, don't hesitate to hop to our [Discussions Page](https://github.com/orgs/openclimatefix/discussions)!
|
|
82
|
+
|
|
83
|
+
### How does ocf-data-sampler deal with data sources that use different projections (e.g. some are in latitude-longitude, and some in OSGB)?
|
|
84
|
+
|
|
85
|
+
When creating samples, we make a spatial crop of a preset size centred around a point of interest (POI, usually a solar or wind farm). The size of the crop is set not in miles or kilometres, but in 'pixels', which would be different for different data sources, depending on their spatial resolution, projections they use, and where the POI is. For example, a latitude-longitude source with a 1° resolution will have pixel sizes corresponding to very different 'surface' distances (that you might measure in, e.g., kilometres) from a source with 0.1° resolution. The pixel size will even be different for the same source depending on how close the POI is to the equator!
|
|
86
|
+
|
|
87
|
+
Instead of trying to accommodate for all these differences and make all the sources use the same spatial grid, we translate the POI's position into the corresponding coordinate system and select the crop using the source's original grid. This 'snapshot' is then passed to the model with no additional information on what specific coordinates it represents; instead, since the size is always the same and the POI is always in the centre, the model gets consistent information on the measurements at a location near the POI and how it affects the target, without any explicit knowledge of where that location is in coordinate system terms.
|
|
88
|
+
|
|
89
|
+
## Development
|
|
90
|
+
|
|
91
|
+
You can install **ocf-data-sampler** for development as follows:
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
pip install git+https://github.com/openclimatefix/ocf-data-sampler.git
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Running the test suite
|
|
98
|
+
|
|
99
|
+
The tests in this project use `pytest`. Once you have it installed, you can run it from the project's directory:
|
|
100
|
+
|
|
101
|
+
```
|
|
102
|
+
cd ocf-data-sampler
|
|
103
|
+
pytest
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Contributing and community
|
|
107
|
+
|
|
108
|
+
[](https://github.com/openclimatefix/ocf-data-sampler/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc)
|
|
109
|
+
|
|
110
|
+
- PR's are welcome! See the [Organisation Profile](https://github.com/openclimatefix) for details on contributing
|
|
111
|
+
- Find out about our other projects in the [OCF Meta Repo](https://github.com/openclimatefix/ocf-meta-repo)
|
|
112
|
+
- Check out the [OCF blog](https://openclimatefix.org/blog) for updates
|
|
113
|
+
- Follow OCF on [LinkedIn](https://uk.linkedin.com/company/open-climate-fix)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
## Contributors
|
|
117
|
+
|
|
118
|
+
Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
|
|
119
|
+
|
|
120
|
+
<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
|
|
121
|
+
<!-- prettier-ignore-start -->
|
|
122
|
+
<!-- markdownlint-disable -->
|
|
123
|
+
<table>
|
|
124
|
+
<tbody>
|
|
125
|
+
<tr>
|
|
126
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/dfulu"><img src="https://avatars.githubusercontent.com/u/41546094?v=4?s=100" width="100px;" alt="James Fulton"/><br /><sub><b>James Fulton</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=dfulu" title="Code">💻</a></td>
|
|
127
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/AUdaltsova"><img src="https://avatars.githubusercontent.com/u/43303448?v=4?s=100" width="100px;" alt="Alexandra Udaltsova"/><br /><sub><b>Alexandra Udaltsova</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=AUdaltsova" title="Code">💻</a></td>
|
|
128
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/Sukh-P"><img src="https://avatars.githubusercontent.com/u/42407101?v=4?s=100" width="100px;" alt="Sukhil Patel"/><br /><sub><b>Sukhil Patel</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=Sukh-P" title="Code">💻</a></td>
|
|
129
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/peterdudfield"><img src="https://avatars.githubusercontent.com/u/34686298?v=4?s=100" width="100px;" alt="Peter Dudfield"/><br /><sub><b>Peter Dudfield</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=peterdudfield" title="Code">💻</a></td>
|
|
130
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/VikramsDataScience"><img src="https://avatars.githubusercontent.com/u/45002417?v=4?s=100" width="100px;" alt="Vikram Pande"/><br /><sub><b>Vikram Pande</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=VikramsDataScience" title="Code">💻</a></td>
|
|
131
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/SophiaLi20"><img src="https://avatars.githubusercontent.com/u/163532536?v=4?s=100" width="100px;" alt="Unnati Bhardwaj"/><br /><sub><b>Unnati Bhardwaj</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=SophiaLi20" title="Documentation">📖</a></td>
|
|
132
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/alirashidAR"><img src="https://avatars.githubusercontent.com/u/110668489?v=4?s=100" width="100px;" alt="Ali Rashid"/><br /><sub><b>Ali Rashid</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=alirashidAR" title="Code">💻</a></td>
|
|
133
|
+
</tr>
|
|
134
|
+
<tr>
|
|
135
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/felix-e-h-p"><img src="https://avatars.githubusercontent.com/u/137530077?v=4?s=100" width="100px;" alt="Felix"/><br /><sub><b>Felix</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=felix-e-h-p" title="Code">💻</a></td>
|
|
136
|
+
<td align="center" valign="top" width="14.28%"><a href="https://timothyajaniportfolio-b6v3zq29k-timthegreat.vercel.app/"><img src="https://avatars.githubusercontent.com/u/60073728?v=4?s=100" width="100px;" alt="Ajani Timothy"/><br /><sub><b>Ajani Timothy</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=Tim1119" title="Code">💻</a></td>
|
|
137
|
+
</tr>
|
|
138
|
+
</tbody>
|
|
139
|
+
</table>
|
|
140
|
+
|
|
141
|
+
<!-- markdownlint-restore -->
|
|
142
|
+
<!-- prettier-ignore-end -->
|
|
143
|
+
|
|
144
|
+
<!-- ALL-CONTRIBUTORS-LIST:END -->
|
|
145
|
+
|
|
146
|
+
This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
*Part of the [Open Climate Fix](https://github.com/orgs/openclimatefix/people) community.*
|
|
151
|
+
|
|
152
|
+
[](https://openclimatefix.org)
|
|
153
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# ocf-data-sampler
|
|
2
|
+
|
|
3
|
+
<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
|
|
4
|
+
[](#contributors-)
|
|
5
|
+
<!-- ALL-CONTRIBUTORS-BADGE:END -->
|
|
6
|
+
|
|
7
|
+
[](https://github.com/openclimatefix/ocf-data-sampler/tags)
|
|
8
|
+
[](https://github.com/openclimatefix#how-easy-is-it-to-get-involved)
|
|
9
|
+
|
|
10
|
+
**ocf-data-sampler** contains all the tools needed to create samples and feed them to our models, such as [PVNet](https://github.com/openclimatefix/PVNet/). The data we work with—typically energy data, satellite imagery, and numerical weather predictions (NWPs)—is usually too heavy to do this on the fly, so that's where this repo comes in: handling steps like opening the data, selecting the right samples, normalising and reshaping, and saving to and reading from disk.
|
|
11
|
+
|
|
12
|
+
We are currently migrating to this repo from [ocf_datapipes](https://github.com/openclimatefix/ocf_datapipes/), which performs the same functions but is built around `PyTorch DataPipes`, which are quite cumbersome to work with and are no longer maintained by PyTorch. **ocf-data-sampler** uses `PyTorch Datasets`, and we've taken the opportunity to make the code much cleaner and more manageable.
|
|
13
|
+
|
|
14
|
+
> [!Note]
|
|
15
|
+
> This repository is still in development and does not yet have the full
|
|
16
|
+
> functionality of its predecessor, [ocf_datapipes](https://github.com/openclimatefix/ocf_datapipes/).
|
|
17
|
+
> It might not be ready for use out of the box! We would really appreciate any help to let us make the transition faster.
|
|
18
|
+
|
|
19
|
+
## Documentation
|
|
20
|
+
|
|
21
|
+
**ocf-data-sampler** doesn't have external documentation _yet_; you can read a bit about how our torch datasets work in the README [here](ocf_data_sampler/torch_datasets/README.md).
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
## FAQ
|
|
25
|
+
|
|
26
|
+
If you have any questions about this or any other of our repos, don't hesitate to hop to our [Discussions Page](https://github.com/orgs/openclimatefix/discussions)!
|
|
27
|
+
|
|
28
|
+
### How does ocf-data-sampler deal with data sources that use different projections (e.g. some are in latitude-longitude, and some in OSGB)?
|
|
29
|
+
|
|
30
|
+
When creating samples, we make a spatial crop of a preset size centred around a point of interest (POI, usually a solar or wind farm). The size of the crop is set not in miles or kilometres, but in 'pixels', which would be different for different data sources, depending on their spatial resolution, projections they use, and where the POI is. For example, a latitude-longitude source with a 1° resolution will have pixel sizes corresponding to very different 'surface' distances (that you might measure in, e.g., kilometres) from a source with 0.1° resolution. The pixel size will even be different for the same source depending on how close the POI is to the equator!
|
|
31
|
+
|
|
32
|
+
Instead of trying to accommodate for all these differences and make all the sources use the same spatial grid, we translate the POI's position into the corresponding coordinate system and select the crop using the source's original grid. This 'snapshot' is then passed to the model with no additional information on what specific coordinates it represents; instead, since the size is always the same and the POI is always in the centre, the model gets consistent information on the measurements at a location near the POI and how it affects the target, without any explicit knowledge of where that location is in coordinate system terms.
|
|
33
|
+
|
|
34
|
+
## Development
|
|
35
|
+
|
|
36
|
+
You can install **ocf-data-sampler** for development as follows:
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
pip install git+https://github.com/openclimatefix/ocf-data-sampler.git
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Running the test suite
|
|
43
|
+
|
|
44
|
+
The tests in this project use `pytest`. Once you have it installed, you can run it from the project's directory:
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
cd ocf-data-sampler
|
|
48
|
+
pytest
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Contributing and community
|
|
52
|
+
|
|
53
|
+
[](https://github.com/openclimatefix/ocf-data-sampler/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc)
|
|
54
|
+
|
|
55
|
+
- PR's are welcome! See the [Organisation Profile](https://github.com/openclimatefix) for details on contributing
|
|
56
|
+
- Find out about our other projects in the [OCF Meta Repo](https://github.com/openclimatefix/ocf-meta-repo)
|
|
57
|
+
- Check out the [OCF blog](https://openclimatefix.org/blog) for updates
|
|
58
|
+
- Follow OCF on [LinkedIn](https://uk.linkedin.com/company/open-climate-fix)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
## Contributors
|
|
62
|
+
|
|
63
|
+
Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
|
|
64
|
+
|
|
65
|
+
<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
|
|
66
|
+
<!-- prettier-ignore-start -->
|
|
67
|
+
<!-- markdownlint-disable -->
|
|
68
|
+
<table>
|
|
69
|
+
<tbody>
|
|
70
|
+
<tr>
|
|
71
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/dfulu"><img src="https://avatars.githubusercontent.com/u/41546094?v=4?s=100" width="100px;" alt="James Fulton"/><br /><sub><b>James Fulton</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=dfulu" title="Code">💻</a></td>
|
|
72
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/AUdaltsova"><img src="https://avatars.githubusercontent.com/u/43303448?v=4?s=100" width="100px;" alt="Alexandra Udaltsova"/><br /><sub><b>Alexandra Udaltsova</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=AUdaltsova" title="Code">💻</a></td>
|
|
73
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/Sukh-P"><img src="https://avatars.githubusercontent.com/u/42407101?v=4?s=100" width="100px;" alt="Sukhil Patel"/><br /><sub><b>Sukhil Patel</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=Sukh-P" title="Code">💻</a></td>
|
|
74
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/peterdudfield"><img src="https://avatars.githubusercontent.com/u/34686298?v=4?s=100" width="100px;" alt="Peter Dudfield"/><br /><sub><b>Peter Dudfield</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=peterdudfield" title="Code">💻</a></td>
|
|
75
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/VikramsDataScience"><img src="https://avatars.githubusercontent.com/u/45002417?v=4?s=100" width="100px;" alt="Vikram Pande"/><br /><sub><b>Vikram Pande</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=VikramsDataScience" title="Code">💻</a></td>
|
|
76
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/SophiaLi20"><img src="https://avatars.githubusercontent.com/u/163532536?v=4?s=100" width="100px;" alt="Unnati Bhardwaj"/><br /><sub><b>Unnati Bhardwaj</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=SophiaLi20" title="Documentation">📖</a></td>
|
|
77
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/alirashidAR"><img src="https://avatars.githubusercontent.com/u/110668489?v=4?s=100" width="100px;" alt="Ali Rashid"/><br /><sub><b>Ali Rashid</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=alirashidAR" title="Code">💻</a></td>
|
|
78
|
+
</tr>
|
|
79
|
+
<tr>
|
|
80
|
+
<td align="center" valign="top" width="14.28%"><a href="https://github.com/felix-e-h-p"><img src="https://avatars.githubusercontent.com/u/137530077?v=4?s=100" width="100px;" alt="Felix"/><br /><sub><b>Felix</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=felix-e-h-p" title="Code">💻</a></td>
|
|
81
|
+
<td align="center" valign="top" width="14.28%"><a href="https://timothyajaniportfolio-b6v3zq29k-timthegreat.vercel.app/"><img src="https://avatars.githubusercontent.com/u/60073728?v=4?s=100" width="100px;" alt="Ajani Timothy"/><br /><sub><b>Ajani Timothy</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=Tim1119" title="Code">💻</a></td>
|
|
82
|
+
</tr>
|
|
83
|
+
</tbody>
|
|
84
|
+
</table>
|
|
85
|
+
|
|
86
|
+
<!-- markdownlint-restore -->
|
|
87
|
+
<!-- prettier-ignore-end -->
|
|
88
|
+
|
|
89
|
+
<!-- ALL-CONTRIBUTORS-LIST:END -->
|
|
90
|
+
|
|
91
|
+
This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
*Part of the [Open Climate Fix](https://github.com/orgs/openclimatefix/people) community.*
|
|
96
|
+
|
|
97
|
+
[](https://openclimatefix.org)
|
|
98
|
+
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Loading configuration functions.
|
|
2
|
+
|
|
3
|
+
Example:
|
|
4
|
+
|
|
5
|
+
from ocf_data_sampler.config import load_yaml_configuration
|
|
6
|
+
configuration = load_yaml_configuration(filename)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import fsspec
|
|
10
|
+
from pathy import Pathy
|
|
11
|
+
from pyaml_env import parse_config
|
|
12
|
+
|
|
13
|
+
from ocf_data_sampler.config import Configuration
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load_yaml_configuration(filename: str | Pathy) -> Configuration:
|
|
17
|
+
"""
|
|
18
|
+
Load a yaml file which has a configuration in it
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
filename: the file name that you want to load. Will load from local, AWS, or GCP
|
|
22
|
+
depending on the protocol suffix (e.g. 's3://bucket/config.yaml').
|
|
23
|
+
|
|
24
|
+
Returns:pydantic class
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
# load the file to a dictionary
|
|
28
|
+
with fsspec.open(filename, mode="r") as stream:
|
|
29
|
+
configuration = parse_config(data=stream)
|
|
30
|
+
# this means we can load ENVs in the yaml file
|
|
31
|
+
# turn into pydantic class
|
|
32
|
+
configuration = Configuration(**configuration)
|
|
33
|
+
return configuration
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""Configuration model for the dataset.
|
|
2
|
+
|
|
3
|
+
All paths must include the protocol prefix. For local files,
|
|
4
|
+
it's sufficient to just start with a '/'. For aws, start with 's3://',
|
|
5
|
+
for gcp start with 'gs://'.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
|
|
9
|
+
from ocf_data_sampler.config import Configuration
|
|
10
|
+
config = Configuration(**config_dict)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Dict, List, Optional
|
|
15
|
+
from typing_extensions import Self
|
|
16
|
+
|
|
17
|
+
from pydantic import BaseModel, Field, RootModel, field_validator, ValidationInfo, model_validator
|
|
18
|
+
|
|
19
|
+
from ocf_data_sampler.constants import NWP_PROVIDERS
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
providers = ["pvoutput.org", "solar_sheffield_passiv"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Base(BaseModel):
|
|
27
|
+
"""Pydantic Base model where no extras can be added"""
|
|
28
|
+
|
|
29
|
+
class Config:
|
|
30
|
+
"""config class"""
|
|
31
|
+
|
|
32
|
+
extra = "forbid" # forbid use of extra kwargs
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class General(Base):
|
|
36
|
+
"""General pydantic model"""
|
|
37
|
+
|
|
38
|
+
name: str = Field("example", description="The name of this configuration file")
|
|
39
|
+
description: str = Field(
|
|
40
|
+
"example configuration", description="Description of this configuration file"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class TimeWindowMixin(Base):
|
|
45
|
+
"""Mixin class, to add interval start, end and resolution minutes"""
|
|
46
|
+
|
|
47
|
+
time_resolution_minutes: int = Field(
|
|
48
|
+
...,
|
|
49
|
+
gt=0,
|
|
50
|
+
description="The temporal resolution of the data in minutes",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
interval_start_minutes: int = Field(
|
|
54
|
+
...,
|
|
55
|
+
description="Data interval starts at `t0 + interval_start_minutes`",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
interval_end_minutes: int = Field(
|
|
59
|
+
...,
|
|
60
|
+
description="Data interval ends at `t0 + interval_end_minutes`",
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
@model_validator(mode='after')
|
|
64
|
+
def check_interval_range(cls, values):
|
|
65
|
+
if values.interval_start_minutes > values.interval_end_minutes:
|
|
66
|
+
raise ValueError('interval_start_minutes must be <= interval_end_minutes')
|
|
67
|
+
return values
|
|
68
|
+
|
|
69
|
+
@field_validator("interval_start_minutes")
|
|
70
|
+
def interval_start_minutes_divide_by_time_resolution(cls, v: int, info: ValidationInfo) -> int:
|
|
71
|
+
if v % info.data["time_resolution_minutes"] != 0:
|
|
72
|
+
raise ValueError("interval_start_minutes must be divisible by time_resolution_minutes")
|
|
73
|
+
return v
|
|
74
|
+
|
|
75
|
+
@field_validator("interval_end_minutes")
|
|
76
|
+
def interval_end_minutes_divide_by_time_resolution(cls, v: int, info: ValidationInfo) -> int:
|
|
77
|
+
if v % info.data["time_resolution_minutes"] != 0:
|
|
78
|
+
raise ValueError("interval_end_minutes must be divisible by time_resolution_minutes")
|
|
79
|
+
return v
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# noinspection PyMethodParameters
|
|
84
|
+
class DropoutMixin(Base):
|
|
85
|
+
"""Mixin class, to add dropout minutes"""
|
|
86
|
+
|
|
87
|
+
dropout_timedeltas_minutes: Optional[List[int]] = Field(
|
|
88
|
+
default=None,
|
|
89
|
+
description="List of possible minutes before t0 where data availability may start. Must be "
|
|
90
|
+
"negative or zero.",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
dropout_fraction: float = Field(
|
|
94
|
+
default=0,
|
|
95
|
+
description="Chance of dropout being applied to each sample",
|
|
96
|
+
ge=0,
|
|
97
|
+
le=1,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
@field_validator("dropout_timedeltas_minutes")
|
|
101
|
+
def dropout_timedeltas_minutes_negative(cls, v: List[int]) -> List[int]:
|
|
102
|
+
"""Validate 'dropout_timedeltas_minutes'"""
|
|
103
|
+
if v is not None:
|
|
104
|
+
for m in v:
|
|
105
|
+
assert m <= 0, "Dropout timedeltas must be negative"
|
|
106
|
+
return v
|
|
107
|
+
|
|
108
|
+
@model_validator(mode="after")
|
|
109
|
+
def dropout_instructions_consistent(self) -> Self:
|
|
110
|
+
if self.dropout_fraction == 0:
|
|
111
|
+
if self.dropout_timedeltas_minutes is not None:
|
|
112
|
+
raise ValueError("To use dropout timedeltas dropout fraction should be > 0")
|
|
113
|
+
else:
|
|
114
|
+
if self.dropout_timedeltas_minutes is None:
|
|
115
|
+
raise ValueError("To dropout fraction > 0 requires a list of dropout timedeltas")
|
|
116
|
+
return self
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class SpatialWindowMixin(Base):
|
|
120
|
+
"""Mixin class, to add path and image size"""
|
|
121
|
+
|
|
122
|
+
image_size_pixels_height: int = Field(
|
|
123
|
+
...,
|
|
124
|
+
ge=0,
|
|
125
|
+
description="The number of pixels of the height of the region of interest",
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
image_size_pixels_width: int = Field(
|
|
129
|
+
...,
|
|
130
|
+
ge=0,
|
|
131
|
+
description="The number of pixels of the width of the region of interest",
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
|
|
136
|
+
"""Satellite configuration model"""
|
|
137
|
+
|
|
138
|
+
zarr_path: str | tuple[str] | list[str] = Field(
|
|
139
|
+
...,
|
|
140
|
+
description="The path or list of paths which hold the data zarr",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
channels: list[str] = Field(
|
|
144
|
+
..., description="the satellite channels that are used"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# noinspection PyMethodParameters
|
|
149
|
+
class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
|
|
150
|
+
"""NWP configuration model"""
|
|
151
|
+
|
|
152
|
+
zarr_path: str | tuple[str] | list[str] = Field(
|
|
153
|
+
...,
|
|
154
|
+
description="The path or list of paths which hold the data zarr",
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
channels: list[str] = Field(
|
|
158
|
+
..., description="the channels used in the nwp data"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
provider: str = Field(..., description="The provider of the NWP data")
|
|
162
|
+
|
|
163
|
+
accum_channels: list[str] = Field([], description="the nwp channels which need to be diffed")
|
|
164
|
+
|
|
165
|
+
max_staleness_minutes: Optional[int] = Field(
|
|
166
|
+
None,
|
|
167
|
+
description="Sets a limit on how stale an NWP init time is allowed to be whilst still being"
|
|
168
|
+
" used to construct an example. If set to None, then the max staleness is set according to"
|
|
169
|
+
" the maximum forecast horizon of the NWP and the requested forecast length.",
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@field_validator("provider")
|
|
174
|
+
def validate_provider(cls, v: str) -> str:
|
|
175
|
+
"""Validate 'provider'"""
|
|
176
|
+
if v.lower() not in NWP_PROVIDERS:
|
|
177
|
+
message = f"NWP provider {v} is not in {NWP_PROVIDERS}"
|
|
178
|
+
logger.warning(message)
|
|
179
|
+
raise Exception(message)
|
|
180
|
+
return v
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class MultiNWP(RootModel):
|
|
184
|
+
"""Configuration for multiple NWPs"""
|
|
185
|
+
|
|
186
|
+
root: Dict[str, NWP]
|
|
187
|
+
|
|
188
|
+
def __getattr__(self, item):
|
|
189
|
+
return self.root[item]
|
|
190
|
+
|
|
191
|
+
def __getitem__(self, item):
|
|
192
|
+
return self.root[item]
|
|
193
|
+
|
|
194
|
+
def __len__(self):
|
|
195
|
+
return len(self.root)
|
|
196
|
+
|
|
197
|
+
def __iter__(self):
|
|
198
|
+
return iter(self.root)
|
|
199
|
+
|
|
200
|
+
def keys(self):
|
|
201
|
+
"""Returns dictionary-like keys"""
|
|
202
|
+
return self.root.keys()
|
|
203
|
+
|
|
204
|
+
def items(self):
|
|
205
|
+
"""Returns dictionary-like items"""
|
|
206
|
+
return self.root.items()
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class GSP(TimeWindowMixin, DropoutMixin):
|
|
210
|
+
"""GSP configuration model"""
|
|
211
|
+
|
|
212
|
+
zarr_path: str = Field(..., description="The path which holds the GSP zarr")
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class Site(TimeWindowMixin, DropoutMixin):
|
|
216
|
+
"""Site configuration model"""
|
|
217
|
+
|
|
218
|
+
file_path: str = Field(
|
|
219
|
+
...,
|
|
220
|
+
description="The NetCDF files holding the power timeseries.",
|
|
221
|
+
)
|
|
222
|
+
metadata_file_path: str = Field(
|
|
223
|
+
...,
|
|
224
|
+
description="The CSV files describing power system",
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# TODO validate the netcdf for sites
|
|
228
|
+
# TODO validate the csv for metadata
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# noinspection PyPep8Naming
|
|
233
|
+
class InputData(Base):
|
|
234
|
+
"""Input data model"""
|
|
235
|
+
|
|
236
|
+
satellite: Optional[Satellite] = None
|
|
237
|
+
nwp: Optional[MultiNWP] = None
|
|
238
|
+
gsp: Optional[GSP] = None
|
|
239
|
+
site: Optional[Site] = None
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class Configuration(Base):
|
|
243
|
+
"""Configuration model for the dataset"""
|
|
244
|
+
|
|
245
|
+
general: General = General()
|
|
246
|
+
input_data: InputData = InputData()
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Save functions for the configuration model.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to save configuration objects to YAML files,
|
|
4
|
+
supporting local and cloud storage locations.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
from ocf_data_sampler.config import save_yaml_configuration
|
|
8
|
+
saved_path = save_yaml_configuration(config, "config.yaml")
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Union
|
|
15
|
+
|
|
16
|
+
import fsspec
|
|
17
|
+
import yaml
|
|
18
|
+
|
|
19
|
+
from ocf_data_sampler.config import Configuration
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def save_yaml_configuration(
|
|
23
|
+
configuration: Configuration,
|
|
24
|
+
filename: Union[str, Path],
|
|
25
|
+
) -> Path:
|
|
26
|
+
"""Save a configuration object to a YAML file.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
configuration: Configuration object containing the settings to save
|
|
30
|
+
filename: Destination path for the YAML file. Can be a local path or
|
|
31
|
+
cloud storage URL (e.g., 'gs://', 's3://'). For local paths,
|
|
32
|
+
absolute paths are recommended.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Path: The path where the configuration was saved
|
|
36
|
+
|
|
37
|
+
Raises:
|
|
38
|
+
ValueError: If filename is None or if writing to the specified path fails
|
|
39
|
+
TypeError: If the configuration cannot be serialized
|
|
40
|
+
"""
|
|
41
|
+
if filename is None:
|
|
42
|
+
raise ValueError("filename cannot be None")
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
# Convert to absolute path if it's a relative path
|
|
46
|
+
if isinstance(filename, (str, Path)) and not any(
|
|
47
|
+
str(filename).startswith(prefix) for prefix in ('gs://', 's3://', '/')
|
|
48
|
+
):
|
|
49
|
+
filename = Path.cwd() / filename
|
|
50
|
+
|
|
51
|
+
filepath = Path(filename)
|
|
52
|
+
|
|
53
|
+
# For local files, check if directory exists before proceeding
|
|
54
|
+
if filepath.is_absolute():
|
|
55
|
+
directory = filepath.parent
|
|
56
|
+
if not directory.exists():
|
|
57
|
+
raise ValueError("Directory does not exist")
|
|
58
|
+
|
|
59
|
+
# Serialize configuration to JSON-compatible dictionary
|
|
60
|
+
config_dict = json.loads(configuration.model_dump_json())
|
|
61
|
+
|
|
62
|
+
# Save to YAML file using fsspec
|
|
63
|
+
with fsspec.open(str(filepath), mode='w') as yaml_file:
|
|
64
|
+
yaml.safe_dump(config_dict, yaml_file, default_flow_style=False)
|
|
65
|
+
|
|
66
|
+
return filepath
|
|
67
|
+
|
|
68
|
+
except json.JSONDecodeError as e:
|
|
69
|
+
raise TypeError(f"Failed to serialize configuration: {str(e)}") from e
|
|
70
|
+
except PermissionError as e:
|
|
71
|
+
raise ValueError(f"Permission denied when writing to {filename}") from e
|
|
72
|
+
except (IOError, OSError) as e:
|
|
73
|
+
raise ValueError(f"Failed to write configuration to {filename}: {str(e)}") from e
|